From 25c107a642c49792b02863391302d5b732c6b556 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Fri, 26 Apr 2024 08:37:58 -0700 Subject: [PATCH 01/11] Make SytemString and SystemChar public --- Sources/System/FilePath/FilePath.swift | 2 +- Sources/System/FilePath/FilePathWindows.swift | 2 +- Sources/System/SystemString.swift | 229 +++++++++++------- Tests/SystemTests/SystemCharTest.swift | 8 +- Tests/SystemTests/SystemStringTests.swift | 1 - 5 files changed, 142 insertions(+), 100 deletions(-) diff --git a/Sources/System/FilePath/FilePath.swift b/Sources/System/FilePath/FilePath.swift index 13064b5a..47498650 100644 --- a/Sources/System/FilePath/FilePath.swift +++ b/Sources/System/FilePath/FilePath.swift @@ -62,7 +62,7 @@ public struct FilePath: Sendable { @available(/*System 0.0.1: macOS 11.0, iOS 14.0, watchOS 7.0, tvOS 14.0*/iOS 8, *) extension FilePath { /// The length of the file path, excluding the null terminator. - public var length: Int { _storage.length } + public var length: Int { _storage.count } } @available(/*System 0.0.1: macOS 11.0, iOS 14.0, watchOS 7.0, tvOS 14.0*/iOS 8, *) diff --git a/Sources/System/FilePath/FilePathWindows.swift b/Sources/System/FilePath/FilePathWindows.swift index b725dd17..11b94d80 100644 --- a/Sources/System/FilePath/FilePathWindows.swift +++ b/Sources/System/FilePath/FilePathWindows.swift @@ -88,7 +88,7 @@ struct _Lexer { // Try to consume a drive letter and subsequent `:`. mutating func eatDrive() -> SystemChar? { let copy = slice - if let d = slice._eat(if: { $0.isLetter }), slice._eat(.colon) != nil { + if let d = slice._eat(if: { $0.isASCIILetter }), slice._eat(.colon) != nil { return d } // Restore slice diff --git a/Sources/System/SystemString.swift b/Sources/System/SystemString.swift index 8ca643b0..a0bf99fc 100644 --- a/Sources/System/SystemString.swift +++ b/Sources/System/SystemString.swift @@ -7,162 +7,207 @@ See https://swift.org/LICENSE.txt for license information */ -// A platform-native character representation, currently used for file paths -internal struct SystemChar: +/// A platform-native character representation. +/// +/// A SystemChar is a `CChar` on Linux and Darwin, and a `UInt16` on Windows. +/// +/// Note that no particular encoding is assumed. +@frozen +public struct SystemChar: RawRepresentable, Sendable, Comparable, Hashable, Codable { - internal typealias RawValue = CInterop.PlatformChar + public typealias RawValue = CInterop.PlatformChar - internal var rawValue: RawValue + public var rawValue: RawValue - internal init(rawValue: RawValue) { self.rawValue = rawValue } + @inlinable + public init(rawValue: RawValue) { self.rawValue = rawValue } - internal init(_ rawValue: RawValue) { self.init(rawValue: rawValue) } + @_alwaysEmitIntoClient + public init(_ rawValue: RawValue) { + self.init(rawValue: rawValue) + } - static func < (lhs: SystemChar, rhs: SystemChar) -> Bool { + @inlinable + public static func < (lhs: SystemChar, rhs: SystemChar) -> Bool { lhs.rawValue < rhs.rawValue } } extension SystemChar { - internal init(ascii: Unicode.Scalar) { - self.init(rawValue: numericCast(UInt8(ascii: ascii))) + /// Create a SystemChar from an ASCII scalar. + @_alwaysEmitIntoClient + public init(ascii: Unicode.Scalar) { + precondition(ascii.isASCII) + self.init(rawValue: numericCast(ascii.value)) } - internal init(codeUnit: CInterop.PlatformUnicodeEncoding.CodeUnit) { - self.init(rawValue: codeUnit._platformChar) + + /// Cast `x` to a `SystemChar` + @_alwaysEmitIntoClient + public init(_ x: some FixedWidthInteger) { + self.init(numericCast(x)) } - internal static var null: SystemChar { SystemChar(0x0) } - internal static var slash: SystemChar { SystemChar(ascii: "/") } - internal static var backslash: SystemChar { SystemChar(ascii: #"\"#) } - internal static var dot: SystemChar { SystemChar(ascii: ".") } - internal static var colon: SystemChar { SystemChar(ascii: ":") } - internal static var question: SystemChar { SystemChar(ascii: "?") } + /// The NULL character `\0` + @_alwaysEmitIntoClient + public static var null: SystemChar { SystemChar(0x0) } - internal var codeUnit: CInterop.PlatformUnicodeEncoding.CodeUnit { - rawValue._platformCodeUnit - } + /// The slash character `/` + @_alwaysEmitIntoClient + public static var slash: SystemChar { SystemChar(ascii: "/") } + + /// The backslash character `\` + @_alwaysEmitIntoClient + public static var backslash: SystemChar { SystemChar(ascii: #"\"#) } + + /// The dot character `.` + @_alwaysEmitIntoClient + public static var dot: SystemChar { SystemChar(ascii: ".") } + + /// The colon character `:` + @_alwaysEmitIntoClient + public static var colon: SystemChar { SystemChar(ascii: ":") } - internal var asciiScalar: Unicode.Scalar? { + /// The question mark character `?` + @_alwaysEmitIntoClient + public static var question: SystemChar { SystemChar(ascii: "?") } + + /// Returns `self` as a `Unicode.Scalar` if ASCII, else `nil` + @_alwaysEmitIntoClient + public var asciiScalar: Unicode.Scalar? { guard isASCII else { return nil } return Unicode.Scalar(UInt8(truncatingIfNeeded: rawValue)) } - internal var isASCII: Bool { + /// Whether `self` is ASCII + @_alwaysEmitIntoClient + public var isASCII: Bool { (0...0x7F).contains(rawValue) } - internal var isLetter: Bool { + /// Whether `self` is an ASCII letter, i.e. in `[a-zA-Z]` + @_alwaysEmitIntoClient + public var isASCIILetter: Bool { guard isASCII else { return false } let asciiRaw: UInt8 = numericCast(rawValue) - return (UInt8(ascii: "a") ... UInt8(ascii: "z")).contains(asciiRaw) || - (UInt8(ascii: "A") ... UInt8(ascii: "Z")).contains(asciiRaw) + switch asciiRaw { + case UInt8(ascii: "a")...UInt8(ascii: "z"): return true + case UInt8(ascii: "A")...UInt8(ascii: "Z"): return true + default: return false + } } } -// A platform-native string representation, currently for file paths -// -// Always null-terminated. -internal struct SystemString: Sendable { - internal typealias Storage = [SystemChar] - internal var nullTerminatedStorage: Storage -} -extension SystemString { - internal init() { - self.nullTerminatedStorage = [.null] +/// A platform-native string representation. A `SystemString` is a collection +/// of non-NULL `SystemChar`s followed by a NULL terminator. +/// +/// TODO: example use or two, showing that innards are not NULL, but there's +/// always a null at the end, NULL is not part of the count +@frozen +public struct SystemString: Sendable { + public typealias Storage = [SystemChar] + + @usableFromInline + internal var _nullTerminatedStorage: Storage + + /// Access the back storage, including the null terminator. Note that + /// `nullTerminatedStorage.count == self.count + 1`, due + /// to the null terminator. + @_alwaysEmitIntoClient + public var nullTerminatedStorage: Storage { _nullTerminatedStorage } + + /// Create a SystemString from pre-existing null-terminated storage + @usableFromInline + internal init(_nullTerminatedStorage storage: [SystemChar]) { + self._nullTerminatedStorage = storage _invariantCheck() } +} - internal var length: Int { - let len = nullTerminatedStorage.count - 1 - assert(len == self.count) - return len - } - - // Common funnel point. Ensure all non-empty inits go here. - internal init(nullTerminated storage: Storage) { - self.nullTerminatedStorage = storage - _invariantCheck() +extension SystemString { + /// Create an empty `SystemString` + public init() { + self.init(_nullTerminatedStorage: [.null]) } - // Ensures that result is null-terminated - internal init(_ chars: C) where C.Element == SystemChar { - var rawChars = Storage(chars) + /// Create a `SystemString` from a collection of `SystemChar`s. + /// A NULL terminator will be added if `chars` lacks one. `chars` must not + /// include any interior NULLs. + @_alwaysEmitIntoClient + public init(_ chars: C) where C.Element == SystemChar { + var rawChars = Array(chars) if rawChars.last != .null { rawChars.append(.null) } - self.init(nullTerminated: rawChars) + precondition( + rawChars.dropLast(1).allSatisfy { $0 != .null }, + "Embedded NULL detected") + self.init(_nullTerminatedStorage: rawChars) } } extension SystemString { - fileprivate func _invariantCheck() { + @_alwaysEmitIntoClient + internal func _invariantCheck() { #if DEBUG - precondition(nullTerminatedStorage.last! == .null) - precondition(nullTerminatedStorage.firstIndex(of: .null) == length) + precondition(_nullTerminatedStorage.last! == .null) + precondition(_nullTerminatedStorage.firstIndex(of: .null) == endIndex) #endif // DEBUG } } extension SystemString: RandomAccessCollection, MutableCollection { - internal typealias Element = SystemChar - internal typealias Index = Storage.Index - internal typealias Indices = Range + public typealias Element = SystemChar + public typealias Index = Int + public typealias Indices = Range - internal var startIndex: Index { - nullTerminatedStorage.startIndex + @inlinable + public var startIndex: Index { + _nullTerminatedStorage.startIndex } - internal var endIndex: Index { - nullTerminatedStorage.index(before: nullTerminatedStorage.endIndex) + @inlinable + public var endIndex: Index { + _nullTerminatedStorage.index(before: _nullTerminatedStorage.endIndex) } - internal subscript(position: Index) -> SystemChar { + @inlinable + public subscript(position: Index) -> SystemChar { _read { precondition(position >= startIndex && position <= endIndex) - yield nullTerminatedStorage[position] + yield _nullTerminatedStorage[position] } set(newValue) { precondition(position >= startIndex && position <= endIndex) - nullTerminatedStorage[position] = newValue + _nullTerminatedStorage[position] = newValue _invariantCheck() } } } extension SystemString: RangeReplaceableCollection { - internal mutating func replaceSubrange( + @inlinable + public mutating func replaceSubrange( _ subrange: Range, with newElements: C ) where C.Element == SystemChar { defer { _invariantCheck() } - nullTerminatedStorage.replaceSubrange(subrange, with: newElements) + _nullTerminatedStorage.replaceSubrange(subrange, with: newElements) } - internal mutating func reserveCapacity(_ n: Int) { + @inlinable + public mutating func reserveCapacity(_ n: Int) { defer { _invariantCheck() } - nullTerminatedStorage.reserveCapacity(1 + n) + _nullTerminatedStorage.reserveCapacity(1 + n) } - internal func withContiguousStorageIfAvailable( + @inlinable + public func withContiguousStorageIfAvailable( _ body: (UnsafeBufferPointer) throws -> R ) rethrows -> R? { // Do not include the null terminator, it is outside the Collection - try nullTerminatedStorage.withContiguousStorageIfAvailable { + try _nullTerminatedStorage.withContiguousStorageIfAvailable { try body(.init(start: $0.baseAddress, count: $0.count-1)) } } - - internal mutating func withContiguousMutableStorageIfAvailable( - _ body: (inout UnsafeMutableBufferPointer) throws -> R - ) rethrows -> R? { - defer { _invariantCheck() } - // Do not include the null terminator, it is outside the Collection - return try nullTerminatedStorage.withContiguousMutableStorageIfAvailable { - var buffer = UnsafeMutableBufferPointer( - start: $0.baseAddress, count: $0.count-1 - ) - return try body(&buffer) - } - } } extension SystemString: Hashable, Codable {} @@ -173,7 +218,7 @@ extension SystemString { internal func withSystemChars( _ f: (UnsafeBufferPointer) throws -> T ) rethrows -> T { - try nullTerminatedStorage.withContiguousStorageIfAvailable(f)! + try _nullTerminatedStorage.withContiguousStorageIfAvailable(f)! } internal func withCodeUnits( @@ -231,11 +276,11 @@ extension String { } extension SystemString: ExpressibleByStringLiteral { - internal init(stringLiteral: String) { + public init(stringLiteral: String) { self.init(stringLiteral) } - internal init(_ string: String) { + public init(_ string: String) { // TODO: can avoid extra strlen self = string.withPlatformString { SystemString(platformString: $0) @@ -246,26 +291,24 @@ extension SystemString: ExpressibleByStringLiteral { extension SystemString: CustomStringConvertible, CustomDebugStringConvertible { internal var string: String { String(decoding: self) } - internal var description: String { string } - internal var debugDescription: String { description.debugDescription } + public var description: String { string } + public var debugDescription: String { description.debugDescription } } extension SystemString { - /// Creates a system string by copying bytes from a null-terminated platform string. + /// Creates a `SystemString` by copying bytes from a null-terminated platform string. /// /// - Parameter platformString: A pointer to a null-terminated platform string. - internal init(platformString: UnsafePointer) { + public init(platformString: UnsafePointer) { let count = 1 + system_platform_strlen(platformString) // TODO: Is this the right way? let chars: Array = platformString.withMemoryRebound( to: SystemChar.self, capacity: count ) { - let bufPtr = UnsafeBufferPointer(start: $0, count: count) - return Array(bufPtr) + Array(UnsafeBufferPointer(start: $0, count: count)) } - - self.init(nullTerminated: chars) + self.init(_nullTerminatedStorage: chars) } /// Calls the given closure with a pointer to the contents of the sytem string, @@ -280,7 +323,7 @@ extension SystemString { /// The pointer passed as an argument to `body` is valid /// only during the execution of this method. /// Don't try to store the pointer for later use. - internal func withPlatformString( + public func withPlatformString( _ f: (UnsafePointer) throws -> T ) rethrows -> T { try withSystemChars { chars in diff --git a/Tests/SystemTests/SystemCharTest.swift b/Tests/SystemTests/SystemCharTest.swift index 0ec5411e..e910e453 100644 --- a/Tests/SystemTests/SystemCharTest.swift +++ b/Tests/SystemTests/SystemCharTest.swift @@ -20,20 +20,20 @@ final class SystemCharTest: XCTestCase { let valid = SystemString( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") for char in valid { - XCTAssertTrue(char.isLetter) + XCTAssertTrue(char.isASCIILetter) } // non printable for value in 0..<(UInt8(ascii: " ")) { - XCTAssertFalse(SystemChar(codeUnit: CInterop.PlatformUnicodeEncoding.CodeUnit(value)).isLetter) + XCTAssertFalse(SystemChar(CInterop.PlatformUnicodeEncoding.CodeUnit(value)).isASCIILetter) } - XCTAssertFalse(SystemChar(codeUnit: 0x7F).isLetter) // DEL + XCTAssertFalse(SystemChar(0x7F).isASCIILetter) // DEL // misc other let invalid = SystemString( ##" !"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~"##) for char in invalid { - XCTAssertFalse(char.isLetter) + XCTAssertFalse(char.isASCIILetter) } } } diff --git a/Tests/SystemTests/SystemStringTests.swift b/Tests/SystemTests/SystemStringTests.swift index a1891714..9e24aaba 100644 --- a/Tests/SystemTests/SystemStringTests.swift +++ b/Tests/SystemTests/SystemStringTests.swift @@ -248,7 +248,6 @@ final class SystemStringTest: XCTestCase { str.append(SystemChar(ascii: "d")) XCTAssert(str == "abcd") XCTAssert(str.count == 4) - XCTAssert(str.count == str.length) str.reserveCapacity(100) XCTAssert(str == "abcd") From cd5fe4effc7109cd55cae84342c711a280c28846 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Fri, 26 Apr 2024 08:55:15 -0700 Subject: [PATCH 02/11] cleanup --- .../System/FilePath/FilePathComponents.swift | 7 +--- Sources/System/SystemString.swift | 38 +++++++++++++------ 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/Sources/System/FilePath/FilePathComponents.swift b/Sources/System/FilePath/FilePathComponents.swift index 6b8a9408..d50af35b 100644 --- a/Sources/System/FilePath/FilePathComponents.swift +++ b/Sources/System/FilePath/FilePathComponents.swift @@ -150,11 +150,6 @@ extension _StrSlice { try f(UnsafeBufferPointer(rebasing: $0[_range])) } } - internal func _withCodeUnits( - _ f: (UnsafeBufferPointer) throws -> T - ) rethrows -> T { - try _slice.withCodeUnits(f) - } internal init?(_platformString s: UnsafePointer) { self.init(SystemString(platformString: s)) @@ -163,7 +158,7 @@ extension _StrSlice { internal func _withPlatformString( _ body: (UnsafePointer) throws -> Result ) rethrows -> Result { - try _slice.withPlatformString(body) + try _slice._withPlatformString(body) } internal var _systemString: SystemString { SystemString(_slice) } diff --git a/Sources/System/SystemString.swift b/Sources/System/SystemString.swift index a0bf99fc..17669a9b 100644 --- a/Sources/System/SystemString.swift +++ b/Sources/System/SystemString.swift @@ -238,19 +238,14 @@ extension SystemString { } extension Slice where Base == SystemString { - internal func withCodeUnits( - _ f: (UnsafeBufferPointer) throws -> T - ) rethrows -> T { - try base.withCodeUnits { - try f(UnsafeBufferPointer(rebasing: $0[indices])) - } - } - internal var string: String { - withCodeUnits { String(decoding: $0, as: CInterop.PlatformUnicodeEncoding.self) } + base.withCodeUnits { + String(decoding: $0[indices], + as: CInterop.PlatformUnicodeEncoding.self) + } } - internal func withPlatformString( + internal func _withPlatformString( _ f: (UnsafePointer) throws -> T ) rethrows -> T { // FIXME: avoid allocation if we're at the end @@ -260,13 +255,32 @@ extension Slice where Base == SystemString { } extension String { - internal init(decoding str: SystemString) { + /// Creates a string by interpreting `str`'s content as UTF-8 on Unix + /// and UTF-16 on Windows. + /// + /// - Parameter str: The system string to be interpreted as + /// `CInterop.PlatformUnicodeEncoding`. + /// + /// If the content of the system string isn't a well-formed Unicode string, + /// this initializer replaces invalid bytes with U+FFFD. + /// This means that conversion to a string and back to a system string + /// might result in a value that's different from the original system string. + public init(decoding str: SystemString) { // TODO: Can avoid extra strlen self = str.withPlatformString { String(platformString: $0) } } - internal init?(validating str: SystemString) { + + /// Creates a string from a system string, validating its contents as UTF-8 on + /// Unix and UTF-16 on Windows. + /// + /// - Parameter str: The system string to be interpreted as + /// `CInterop.PlatformUnicodeEncoding`. + /// + /// If the contents of the system string isn't well-formed Unicode, + /// this initializer returns `nil`. + public init?(validating str: SystemString) { // TODO: Can avoid extra strlen guard let str = str.withPlatformString(String.init(validatingPlatformString:)) else { return nil } From 780f8c3c4388f062e01ec5825dda5fef01fab593 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Fri, 26 Apr 2024 09:00:12 -0700 Subject: [PATCH 03/11] cleanup --- Sources/System/FilePath/FilePathSyntax.swift | 4 ++-- Sources/System/FilePath/FilePathWindows.swift | 4 ++-- Sources/System/SystemString.swift | 15 +++++---------- Tests/SystemTests/SystemStringTests.swift | 2 +- 4 files changed, 10 insertions(+), 15 deletions(-) diff --git a/Sources/System/FilePath/FilePathSyntax.swift b/Sources/System/FilePath/FilePathSyntax.swift index 1c3fc097..f94f4453 100644 --- a/Sources/System/FilePath/FilePathSyntax.swift +++ b/Sources/System/FilePath/FilePathSyntax.swift @@ -267,7 +267,7 @@ extension FilePath.Component { /// * `.. => nil` public var `extension`: String? { guard let range = _extensionRange() else { return nil } - return _slice[range].string + return String(decoding: .init(_slice[range])) } /// The non-extension portion of this file or directory component. @@ -279,7 +279,7 @@ extension FilePath.Component { /// * `.hidden => .hidden` /// * `.. => ..` public var stem: String { - _slice[_stemRange()].string + String(decoding: .init(_slice[_stemRange()])) } } diff --git a/Sources/System/FilePath/FilePathWindows.swift b/Sources/System/FilePath/FilePathWindows.swift index 11b94d80..0967d4b0 100644 --- a/Sources/System/FilePath/FilePathWindows.swift +++ b/Sources/System/FilePath/FilePathWindows.swift @@ -218,7 +218,7 @@ extension _ParsedWindowsRoot { // TODO: check for GUID // TODO: check for drive - return .volume(root[vol].string) + return .volume(String(decoding: .init(root[vol]))) } } @@ -227,7 +227,7 @@ extension WindowsRootInfo { self.volume = parsed.volumeInfo(root) if let host = parsed.host { - self.host = root[host].string + self.host = String(decoding: .init(root[host])) } else { self.host = nil } diff --git a/Sources/System/SystemString.swift b/Sources/System/SystemString.swift index 17669a9b..3e6a36f2 100644 --- a/Sources/System/SystemString.swift +++ b/Sources/System/SystemString.swift @@ -238,13 +238,6 @@ extension SystemString { } extension Slice where Base == SystemString { - internal var string: String { - base.withCodeUnits { - String(decoding: $0[indices], - as: CInterop.PlatformUnicodeEncoding.self) - } - } - internal func _withPlatformString( _ f: (UnsafePointer) throws -> T ) rethrows -> T { @@ -303,10 +296,12 @@ extension SystemString: ExpressibleByStringLiteral { } extension SystemString: CustomStringConvertible, CustomDebugStringConvertible { - internal var string: String { String(decoding: self) } - public var description: String { string } - public var debugDescription: String { description.debugDescription } + public var description: String { String(decoding: self) } + + public var debugDescription: String { + description.debugDescription + } } extension SystemString { diff --git a/Tests/SystemTests/SystemStringTests.swift b/Tests/SystemTests/SystemStringTests.swift index 9e24aaba..612b9836 100644 --- a/Tests/SystemTests/SystemStringTests.swift +++ b/Tests/SystemTests/SystemStringTests.swift @@ -75,7 +75,7 @@ struct StringTest: TestCase { // Test String, SystemString, FilePath construction let sysStr = SystemString(string) - expectEqualSequence(string.unicodeScalars, sysStr.string.unicodeScalars) + expectEqualSequence(string.unicodeScalars, String(decoding: sysStr).unicodeScalars) expectEqual(string, String(decoding: sysStr)) expectEqual(string, String(validating: sysStr)) From 9513e51350760974bca557fcb6d97fe639fec2ba Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Fri, 26 Apr 2024 09:39:46 -0700 Subject: [PATCH 04/11] wip --- Sources/System/FilePath/FilePathComponentView.swift | 10 +++++++--- Sources/System/FilePath/FilePathComponents.swift | 8 ++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/Sources/System/FilePath/FilePathComponentView.swift b/Sources/System/FilePath/FilePathComponentView.swift index 39381b4b..644dbbc3 100644 --- a/Sources/System/FilePath/FilePathComponentView.swift +++ b/Sources/System/FilePath/FilePathComponentView.swift @@ -33,6 +33,10 @@ extension FilePath { internal var _path: FilePath internal var _start: SystemString.Index + internal var _slice: Slice { + _path._storage[_start...] + } + internal init(_ path: FilePath) { self._path = path self._start = path._relativeStart @@ -50,7 +54,7 @@ extension FilePath { // TODO(perf): Small-form root (especially on Unix). Have Root // always copy out (not worth ref counting). Make sure that we're // not needlessly sliding values around or triggering a COW - let rootStr = self.root?._systemString ?? SystemString() + let rootStr = SystemString(root?._slice ?? SystemString()[...]) var comp = ComponentView(self) self = FilePath() defer { @@ -156,7 +160,7 @@ extension FilePath { public init( root: Root?, _ components: C ) where C.Element == Component { - var str = root?._systemString ?? SystemString() + var str = SystemString(root?._slice ?? SystemString()[...]) str.appendComponents(components: components) self.init(str) } @@ -169,7 +173,7 @@ extension FilePath { /// Create a file path from an optional root and a slice of another path's /// components. public init(root: Root?, _ components: ComponentView.SubSequence) { - var str = root?._systemString ?? SystemString() + var str = SystemString(root?._slice ?? SystemString()[...]) let (start, end) = (components.startIndex._storage, components.endIndex._storage) str.append(contentsOf: components.base._slice[start.. { + _path._storage[..<_rootEnd] + } + internal init(_ path: FilePath, rootEnd: SystemString.Index) { self._path = path self._rootEnd = rootEnd @@ -60,6 +64,10 @@ extension FilePath { internal var _path: FilePath internal var _range: Range + internal var _slice: Slice { + _path._storage[_range] + } + // TODO: Make a small-component form to save on ARC overhead when // extracted from a path, and especially to save on allocation overhead // when constructing one from a String literal. From 47966f4866a44dbb289d63f5e0e65f03d065f0d3 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Fri, 26 Apr 2024 09:41:01 -0700 Subject: [PATCH 05/11] wip --- .../System/FilePath/FilePathComponents.swift | 2 +- Sources/System/FilePath/FilePathString.swift | 24 +++++++++---------- Sources/System/SystemString.swift | 1 - 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/Sources/System/FilePath/FilePathComponents.swift b/Sources/System/FilePath/FilePathComponents.swift index 108efd25..6c44515d 100644 --- a/Sources/System/FilePath/FilePathComponents.swift +++ b/Sources/System/FilePath/FilePathComponents.swift @@ -130,7 +130,7 @@ extension SystemString { for idx in components.indices { let component = components[idx] - component._withSystemChars { self.append(contentsOf: $0) } + self.append(contentsOf: component._slice) self.append(platformSeparator) } } diff --git a/Sources/System/FilePath/FilePathString.swift b/Sources/System/FilePath/FilePathString.swift index 4fe9c1fd..0e0a46f6 100644 --- a/Sources/System/FilePath/FilePathString.swift +++ b/Sources/System/FilePath/FilePathString.swift @@ -17,7 +17,7 @@ extension FilePath { /// - Parameter platformString: A pointer to a null-terminated platform /// string. public init(platformString: UnsafePointer) { - self.init(_platformString: platformString) + self.init(SystemString(platformString: platformString)) } /// Creates a file path by copying bytes from a null-terminated platform @@ -104,7 +104,7 @@ extension FilePath { public func withPlatformString( _ body: (UnsafePointer) throws -> Result ) rethrows -> Result { - return try _withPlatformString(body) + return try _storage._withPlatformString(body) } #endif } @@ -120,7 +120,7 @@ extension FilePath.Component { /// - Parameter platformString: A pointer to a null-terminated platform /// string. public init?(platformString: UnsafePointer) { - self.init(_platformString: platformString) + self.init(SystemString(platformString: platformString)) } /// Creates a file path component by copying bytes from a null-terminated @@ -194,7 +194,7 @@ extension FilePath.Component { public func withPlatformString( _ body: (UnsafePointer) throws -> Result ) rethrows -> Result { - try _withPlatformString(body) + try _slice._withPlatformString(body) } } @@ -208,7 +208,7 @@ extension FilePath.Root { /// - Parameter platformString: A pointer to a null-terminated platform /// string. public init?(platformString: UnsafePointer) { - self.init(_platformString: platformString) + self.init(SystemString(platformString: platformString)) } /// Creates a file path root by copying bytes from a null-terminated platform @@ -281,7 +281,7 @@ extension FilePath.Root { public func withPlatformString( _ body: (UnsafePointer) throws -> Result ) rethrows -> Result { - try _withPlatformString(body) + try _slice._withPlatformString(body) } } @@ -459,7 +459,7 @@ extension String { /// conversion to a string and back to a path /// might result in a value that's different from the original path. public init(decoding path: FilePath) { - self.init(_decoding: path) + self.init(decoding: path._storage) } /// Creates a string from a file path, validating its contents as UTF-8 on @@ -471,7 +471,7 @@ extension String { /// If the contents of the file path isn't a well-formed Unicode string, /// this initializer returns `nil`. public init?(validating path: FilePath) { - self.init(_validating: path) + self.init(validating: path._storage) } } @@ -489,7 +489,7 @@ extension String { /// conversion to a string and back to a path component /// might result in a value that's different from the original path component. public init(decoding component: FilePath.Component) { - self.init(_decoding: component) + self.init(decoding: SystemString(component._slice)) } /// Creates a string from a path component, validating its contents as UTF-8 @@ -501,7 +501,7 @@ extension String { /// If the contents of the path component isn't a well-formed Unicode string, /// this initializer returns `nil`. public init?(validating component: FilePath.Component) { - self.init(_validating: component) + self.init(validating: SystemString(component._slice)) } } @@ -521,7 +521,7 @@ extension String { /// conversion to a string and back to a path root /// might result in a value that's different from the original path root. public init(decoding root: FilePath.Root) { - self.init(_decoding: root) + self.init(decoding: SystemString(root._slice)) } /// On Unix, creates the string `"/"` @@ -597,7 +597,7 @@ extension FilePath { public func withCString( _ body: (UnsafePointer) throws -> Result ) rethrows -> Result { - return try _withPlatformString(body) + return try _storage.withPlatformString(body) } } #endif diff --git a/Sources/System/SystemString.swift b/Sources/System/SystemString.swift index 3e6a36f2..52adf8eb 100644 --- a/Sources/System/SystemString.swift +++ b/Sources/System/SystemString.swift @@ -244,7 +244,6 @@ extension Slice where Base == SystemString { // FIXME: avoid allocation if we're at the end return try SystemString(self).withPlatformString(f) } - } extension String { From 4fe4d476e96edd7aa7a2e8f1c0cf38ce9a9adb42 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Fri, 26 Apr 2024 20:14:38 -0700 Subject: [PATCH 06/11] cleanup --- .../System/FilePath/FilePathComponents.swift | 12 ------------ Sources/System/FilePath/FilePathString.swift | 19 +------------------ Sources/System/Internals/Exports.swift | 7 +------ Sources/System/PlatformString.swift | 7 ++++++- 4 files changed, 8 insertions(+), 37 deletions(-) diff --git a/Sources/System/FilePath/FilePathComponents.swift b/Sources/System/FilePath/FilePathComponents.swift index 6c44515d..b98a4dd2 100644 --- a/Sources/System/FilePath/FilePathComponents.swift +++ b/Sources/System/FilePath/FilePathComponents.swift @@ -199,18 +199,6 @@ extension FilePath.Root: _PathSlice { } } -@available(/*System 0.0.1: macOS 11.0, iOS 14.0, watchOS 7.0, tvOS 14.0*/iOS 8, *) -extension FilePath: _PlatformStringable { - func _withPlatformString(_ body: (UnsafePointer) throws -> Result) rethrows -> Result { - try _storage.withPlatformString(body) - } - - init(_platformString: UnsafePointer) { - self.init(SystemString(platformString: _platformString)) - } - -} - @available(/*System 0.0.2: macOS 12.0, iOS 15.0, watchOS 8.0, tvOS 15.0*/iOS 8, *) extension FilePath.Component { // The index of the `.` denoting an extension diff --git a/Sources/System/FilePath/FilePathString.swift b/Sources/System/FilePath/FilePathString.swift index 0e0a46f6..404e438f 100644 --- a/Sources/System/FilePath/FilePathString.swift +++ b/Sources/System/FilePath/FilePathString.swift @@ -535,24 +535,7 @@ extension String { /// On Windows, if the contents of the path root isn't a well-formed Unicode /// string, this initializer returns `nil`. public init?(validating root: FilePath.Root) { - self.init(_validating: root) - } -} - -// MARK: - Internal helpers - -extension String { - fileprivate init(_decoding ps: PS) { - self = ps._withPlatformString { String(platformString: $0) } - } - - fileprivate init?(_validating ps: PS) { - guard let str = ps._withPlatformString( - String.init(validatingPlatformString:) - ) else { - return nil - } - self = str + self.init(validating: SystemString(root._slice)) } } diff --git a/Sources/System/Internals/Exports.swift b/Sources/System/Internals/Exports.swift index e20454ee..d1283197 100644 --- a/Sources/System/Internals/Exports.swift +++ b/Sources/System/Internals/Exports.swift @@ -104,12 +104,7 @@ extension String { internal func _withPlatformString( _ body: (UnsafePointer) throws -> Result ) rethrows -> Result { - // Need to #if because CChar may be signed - #if os(Windows) - return try withCString(encodedAs: CInterop.PlatformUnicodeEncoding.self, body) - #else - return try withCString(body) - #endif + fatalError() } internal init?(_platformString platformString: UnsafePointer) { diff --git a/Sources/System/PlatformString.swift b/Sources/System/PlatformString.swift index 4e2e7ddf..e2a9e05b 100644 --- a/Sources/System/PlatformString.swift +++ b/Sources/System/PlatformString.swift @@ -159,7 +159,12 @@ extension String { public func withPlatformString( _ body: (UnsafePointer) throws -> Result ) rethrows -> Result { - try _withPlatformString(body) + // Need to #if because CChar may be signed + #if os(Windows) + return try withCString(encodedAs: CInterop.PlatformUnicodeEncoding.self, body) + #else + return try withCString(body) + #endif } } From 8bafa5d619d25b7184be65ca5dda89dce4ecb70c Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Fri, 26 Apr 2024 20:20:32 -0700 Subject: [PATCH 07/11] cleanup --- .../FilePath/FilePathComponentView.swift | 13 ---- .../System/FilePath/FilePathComponents.swift | 59 +++---------------- Sources/System/PlatformString.swift | 9 --- 3 files changed, 9 insertions(+), 72 deletions(-) diff --git a/Sources/System/FilePath/FilePathComponentView.swift b/Sources/System/FilePath/FilePathComponentView.swift index 644dbbc3..b8ed69fa 100644 --- a/Sources/System/FilePath/FilePathComponentView.swift +++ b/Sources/System/FilePath/FilePathComponentView.swift @@ -181,19 +181,6 @@ extension FilePath { } } -// MARK: - Internals - -@available(/*System 0.0.2: macOS 12.0, iOS 15.0, watchOS 8.0, tvOS 15.0*/iOS 8, *) -extension FilePath.ComponentView: _PathSlice { - internal var _range: Range { - _start ..< _path._storage.endIndex - } - - internal init(_ str: SystemString) { - fatalError("TODO: consider dropping proto req") - } -} - // MARK: - Invariants @available(/*System 0.0.2: macOS 12.0, iOS 15.0, watchOS 8.0, tvOS 15.0*/iOS 8, *) diff --git a/Sources/System/FilePath/FilePathComponents.swift b/Sources/System/FilePath/FilePathComponents.swift index b98a4dd2..46b98358 100644 --- a/Sources/System/FilePath/FilePathComponents.swift +++ b/Sources/System/FilePath/FilePathComponents.swift @@ -136,42 +136,12 @@ extension SystemString { } } -// Unifying protocol for common functionality between roots, components, -// and views onto SystemString and FilePath. -internal protocol _StrSlice: _PlatformStringable, Hashable, Codable { - var _storage: SystemString { get } - var _range: Range { get } - - init?(_ str: SystemString) - - func _invariantCheck() +// Protocol for types which hash and compare as their underlying +// SystemString slices +internal protocol _SystemStringBacked: Hashable, Codable { + var _slice: Slice { get } } -extension _StrSlice { - internal var _slice: Slice { - Slice(base: _storage, bounds: _range) - } - - internal func _withSystemChars( - _ f: (UnsafeBufferPointer) throws -> T - ) rethrows -> T { - try _storage.withSystemChars { - try f(UnsafeBufferPointer(rebasing: $0[_range])) - } - } - - internal init?(_platformString s: UnsafePointer) { - self.init(SystemString(platformString: s)) - } - - internal func _withPlatformString( - _ body: (UnsafePointer) throws -> Result - ) rethrows -> Result { - try _slice._withPlatformString(body) - } - - internal var _systemString: SystemString { SystemString(_slice) } -} -extension _StrSlice { +extension _SystemStringBacked { public static func == (lhs: Self, rhs: Self) -> Bool { lhs._slice.elementsEqual(rhs._slice) } @@ -182,22 +152,11 @@ extension _StrSlice { } } } -internal protocol _PathSlice: _StrSlice { - var _path: FilePath { get } -} -extension _PathSlice { - internal var _storage: SystemString { _path._storage } -} - -@available(/*System 0.0.2: macOS 12.0, iOS 15.0, watchOS 8.0, tvOS 15.0*/iOS 8, *) -extension FilePath.Component: _PathSlice { -} -@available(/*System 0.0.2: macOS 12.0, iOS 15.0, watchOS 8.0, tvOS 15.0*/iOS 8, *) -extension FilePath.Root: _PathSlice { - internal var _range: Range { - (..<_rootEnd).relative(to: _path._storage) - } +extension FilePath: _SystemStringBacked { + var _slice: Slice { _storage[...] } } +extension FilePath.Component: _SystemStringBacked {} +extension FilePath.Root: _SystemStringBacked {} @available(/*System 0.0.2: macOS 12.0, iOS 15.0, watchOS 8.0, tvOS 15.0*/iOS 8, *) extension FilePath.Component { diff --git a/Sources/System/PlatformString.swift b/Sources/System/PlatformString.swift index e2a9e05b..4561cd4b 100644 --- a/Sources/System/PlatformString.swift +++ b/Sources/System/PlatformString.swift @@ -190,12 +190,3 @@ extension CInterop.PlatformUnicodeEncoding.CodeUnit { #endif } } - -internal protocol _PlatformStringable { - func _withPlatformString( - _ body: (UnsafePointer) throws -> Result - ) rethrows -> Result - - init?(_platformString: UnsafePointer) -} -extension String: _PlatformStringable {} From e80ea343432e471c407bde2684d87060385fa54b Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Sun, 28 Apr 2024 18:27:08 -0700 Subject: [PATCH 08/11] wip: cleanup --- Sources/System/FilePath/FilePath.swift | 4 +- .../FilePath/FilePathComponentView.swift | 14 +++---- .../System/FilePath/FilePathComponents.swift | 12 +++--- Sources/System/FilePath/FilePathParsing.swift | 41 ++++++++----------- Sources/System/FilePath/FilePathSyntax.swift | 2 +- Sources/System/FilePath/FilePathWindows.swift | 8 ++-- Sources/System/SystemString.swift | 27 +----------- .../FilePathTests/FilePathExtras.swift | 2 +- Tests/SystemTests/SystemStringTests.swift | 2 +- 9 files changed, 43 insertions(+), 69 deletions(-) diff --git a/Sources/System/FilePath/FilePath.swift b/Sources/System/FilePath/FilePath.swift index 47498650..7c2764db 100644 --- a/Sources/System/FilePath/FilePath.swift +++ b/Sources/System/FilePath/FilePath.swift @@ -52,7 +52,9 @@ public struct FilePath: Sendable { // In addition to the empty init, this init will properly normalize // separators. All other initializers should be implemented by // ultimately deferring to a normalizing init. - internal init(_ str: SystemString) { + + /// TODO: docs + public init(_ str: SystemString) { self._storage = str self._normalizeSeparators() _invariantCheck() diff --git a/Sources/System/FilePath/FilePathComponentView.swift b/Sources/System/FilePath/FilePathComponentView.swift index b8ed69fa..446e7c0f 100644 --- a/Sources/System/FilePath/FilePathComponentView.swift +++ b/Sources/System/FilePath/FilePathComponentView.swift @@ -74,15 +74,15 @@ extension FilePath.ComponentView: BidirectionalCollection { @available(/*System 0.0.2: macOS 12.0, iOS 15.0, watchOS 8.0, tvOS 15.0*/iOS 8, *) public struct Index: Sendable, Comparable, Hashable { - internal typealias Storage = SystemString.Index + internal typealias _Storage = SystemString.Index - internal var _storage: Storage + internal var _storage: _Storage public static func < (lhs: Self, rhs: Self) -> Bool { lhs._storage < rhs._storage } - fileprivate init(_ idx: Storage) { + fileprivate init(_ idx: _Storage) { self._storage = idx } } @@ -144,11 +144,11 @@ extension FilePath.ComponentView: RangeReplaceableCollection { var str = SystemString() let atEnd = subrange.lowerBound == endIndex if atEnd { - str.append(platformSeparator) + str.append(_platformSeparator) } - str.appendComponents(components: newElements) + str._appendComponents(components: newElements) if !atEnd { - str.append(platformSeparator) + str.append(_platformSeparator) } _path._storage.replaceSubrange(range, with: str) } @@ -161,7 +161,7 @@ extension FilePath { root: Root?, _ components: C ) where C.Element == Component { var str = SystemString(root?._slice ?? SystemString()[...]) - str.appendComponents(components: components) + str._appendComponents(components: components) self.init(str) } diff --git a/Sources/System/FilePath/FilePathComponents.swift b/Sources/System/FilePath/FilePathComponents.swift index 46b98358..22f043e2 100644 --- a/Sources/System/FilePath/FilePathComponents.swift +++ b/Sources/System/FilePath/FilePathComponents.swift @@ -118,7 +118,7 @@ extension FilePath.Root { extension SystemString { // TODO: take insertLeadingSlash: Bool // TODO: turn into an insert operation with slide - internal mutating func appendComponents( + internal mutating func _appendComponents( components: C ) where C.Element == FilePath.Component { // TODO(perf): Consider pre-pass to count capacity, slide @@ -131,7 +131,7 @@ extension SystemString { for idx in components.indices { let component = components[idx] self.append(contentsOf: component._slice) - self.append(platformSeparator) + self.append(_platformSeparator) } } } @@ -189,7 +189,8 @@ internal func _makeExtension(_ ext: String) -> SystemString { @available(/*System 0.0.2: macOS 12.0, iOS 15.0, watchOS 8.0, tvOS 15.0*/iOS 8, *) extension FilePath.Component { - internal init?(_ str: SystemString) { + /// TODO: docs + public init?(_ str: SystemString) { // FIXME: explicit null root? Or something else? let path = FilePath(str) guard path.root == nil, path.components.count == 1 else { @@ -202,7 +203,8 @@ extension FilePath.Component { @available(/*System 0.0.2: macOS 12.0, iOS 15.0, watchOS 8.0, tvOS 15.0*/iOS 8, *) extension FilePath.Root { - internal init?(_ str: SystemString) { + /// TODO: docs + public init?(_ str: SystemString) { // FIXME: explicit null root? Or something else? let path = FilePath(str) guard path.root != nil, path.components.isEmpty else { @@ -222,7 +224,7 @@ extension FilePath.Component { #if DEBUG precondition(!_slice.isEmpty) precondition(_slice.last != .null) - precondition(_slice.allSatisfy { !isSeparator($0) } ) + precondition(_slice.allSatisfy { !_isSeparator($0) } ) precondition(_path._relativeStart <= _slice.startIndex) #endif // DEBUG } diff --git a/Sources/System/FilePath/FilePathParsing.swift b/Sources/System/FilePath/FilePathParsing.swift index 6d014774..5a9ab8dc 100644 --- a/Sources/System/FilePath/FilePathParsing.swift +++ b/Sources/System/FilePath/FilePathParsing.swift @@ -11,24 +11,19 @@ // manages (and hides) the null terminator // The separator we use internally -private var genericSeparator: SystemChar { .slash } +private var _genericSeparator: SystemChar { .slash } // The platform preferred separator // // TODO: Make private -internal var platformSeparator: SystemChar { - _windowsPaths ? .backslash : genericSeparator +internal var _platformSeparator: SystemChar { + _windowsPaths ? .backslash : _genericSeparator } // Whether the character is the canonical separator // TODO: Make private -internal func isSeparator(_ c: SystemChar) -> Bool { - c == platformSeparator -} - -// Whether the character is a pre-normalized separator -internal func isPrenormalSeparator(_ c: SystemChar) -> Bool { - c == genericSeparator || c == platformSeparator +internal func _isSeparator(_ c: SystemChar) -> Bool { + c == _platformSeparator } // Separator normalization, checking, and root parsing is internally hosted @@ -42,7 +37,7 @@ extension SystemString { guard _relativePathStart != endIndex else { return false } assert(!isEmpty) - return isSeparator(self.last!) + return _isSeparator(self.last!) } // Enforce invariants by removing a trailing separator. @@ -82,7 +77,7 @@ extension SystemString { // parsing and (potentially) fixing up semi-formed roots. This // normalization reduces the complexity of the task by allowing us to // use a read-only lexer. - self._replaceAll(genericSeparator, with: platformSeparator) + self._replaceAll(_genericSeparator, with: _platformSeparator) // Windows roots can have meaningful repeated backslashes or may // need backslashes inserted for partially-formed roots. Delegate that to @@ -90,19 +85,19 @@ extension SystemString { readIdx = _prenormalizeWindowsRoots() writeIdx = readIdx } else { - assert(genericSeparator == platformSeparator) + assert(_genericSeparator == _platformSeparator) } while readIdx < endIndex { assert(writeIdx <= readIdx) // Swap and advance our indices. - let wasSeparator = isSeparator(self[readIdx]) + let wasSeparator = _isSeparator(self[readIdx]) self.swapAt(writeIdx, readIdx) self.formIndex(after: &writeIdx) self.formIndex(after: &readIdx) - while wasSeparator, readIdx < endIndex, isSeparator(self[readIdx]) { + while wasSeparator, readIdx < endIndex, _isSeparator(self[readIdx]) { self.formIndex(after: &readIdx) } } @@ -223,8 +218,8 @@ extension FilePath { } } - assert(!isSeparator(_storage[i])) - guard let nextSep = _storage[i...].firstIndex(where: isSeparator) else { + assert(!_isSeparator(_storage[i])) + guard let nextSep = _storage[i...].firstIndex(where: _isSeparator) else { return (_storage.endIndex, _storage.endIndex) } return (nextSep, _storage.index(after: nextSep)) @@ -244,11 +239,11 @@ extension FilePath { var slice = _storage[.. WindowsRootInfo.Volume { + fileprivate func volumeInfo(_ root: SystemString) -> _WindowsRootInfo.Volume { if let d = self.drive { return .drive(Character(d.asciiScalar!)) } @@ -222,7 +222,7 @@ extension _ParsedWindowsRoot { } } -extension WindowsRootInfo { +extension _WindowsRootInfo { internal init(_ root: SystemString, _ parsed: _ParsedWindowsRoot) { self.volume = parsed.volumeInfo(root) @@ -243,7 +243,7 @@ extension WindowsRootInfo { } } -extension WindowsRootInfo { +extension _WindowsRootInfo { /// NOT `\foo\bar` nor `C:foo\bar` internal var isFullyQualified: Bool { return form != .traditional(fullyQualified: false) diff --git a/Sources/System/SystemString.swift b/Sources/System/SystemString.swift index 52adf8eb..6014a68e 100644 --- a/Sources/System/SystemString.swift +++ b/Sources/System/SystemString.swift @@ -212,31 +212,6 @@ extension SystemString: RangeReplaceableCollection { extension SystemString: Hashable, Codable {} -extension SystemString { - - // withSystemChars includes the null terminator - internal func withSystemChars( - _ f: (UnsafeBufferPointer) throws -> T - ) rethrows -> T { - try _nullTerminatedStorage.withContiguousStorageIfAvailable(f)! - } - - internal func withCodeUnits( - _ f: (UnsafeBufferPointer) throws -> T - ) rethrows -> T { - try withSystemChars { chars in - let length = chars.count * MemoryLayout.stride - let count = length / MemoryLayout.stride - return try chars.baseAddress!.withMemoryRebound( - to: CInterop.PlatformUnicodeEncoding.CodeUnit.self, - capacity: count - ) { pointer in - try f(UnsafeBufferPointer(start: pointer, count: count)) - } - } - } -} - extension Slice where Base == SystemString { internal func _withPlatformString( _ f: (UnsafePointer) throws -> T @@ -334,7 +309,7 @@ extension SystemString { public func withPlatformString( _ f: (UnsafePointer) throws -> T ) rethrows -> T { - try withSystemChars { chars in + try _nullTerminatedStorage.withUnsafeBufferPointer { chars in let length = chars.count * MemoryLayout.stride return try chars.baseAddress!.withMemoryRebound( to: CInterop.PlatformChar.self, diff --git a/Tests/SystemTests/FilePathTests/FilePathExtras.swift b/Tests/SystemTests/FilePathTests/FilePathExtras.swift index 82f11373..647d9d70 100644 --- a/Tests/SystemTests/FilePathTests/FilePathExtras.swift +++ b/Tests/SystemTests/FilePathTests/FilePathExtras.swift @@ -43,7 +43,7 @@ extension FilePath { for _ in 0.. Date: Mon, 29 Apr 2024 13:29:20 -0700 Subject: [PATCH 09/11] WIP: proposal --- Documentation/system-string.md | 53 ++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 Documentation/system-string.md diff --git a/Documentation/system-string.md b/Documentation/system-string.md new file mode 100644 index 00000000..b1cdaa99 --- /dev/null +++ b/Documentation/system-string.md @@ -0,0 +1,53 @@ +# SystemString for OS-native string representations + +* Authors: [Michael Ilseman](https://github.com/milseman) +* Implementation: PR (**TODO**) + +## Introduction + +We introduce `SystemString`, which supports OS-native string operations. `SystemString` is a bag-of-bytes type without a prescribed encoding. It is a collection `SystemChar`s, which is `UInt8` on Unix platforms and `UInt16` on Windows platforms. + + +## Motivation + +`SystemString` is the backing storage representation for `FilePath`. `FilePath` normalizes its contents (e.g. `a//b -> a/b`), and so it is insufficient as a OS-preferred bag-of-bytes string representation. + +**TODO**: It would be nice to ship with a few syscalls that make use of it. + +**TODO**: A little more motivation on `SystemChar`. Also, let's make sure we have clarity on layout equivalence and demonstrate how to get from a null-`SystemChar`-termianted `UBP` to null-terminated `UBP`. + +## Proposed solution + +**TODO**: Brief highlights + + +## Detailed design + + +## Source compatibility + +This proposal is additive and source-compatible with existing code. + +## ABI compatibility + +This proposal is additive and ABI-compatible with existing code. + + +## Alternatives considered + +**TODO**: Consider not having `SystemChar` + +**TODO**: Consider separate `SystemByteString` and `SystemBytePairString` types. + +**TODO**: Why we don't want to have a single-byte ASCII representation on Windows and have syscall wrapper adjust/dispatch appropriately. + + +## Future directions + +**TODO**: Map out some future syscalls that this would (partially) unblock + +## Acknowledgments + +**TODO** + + From 570e143ecc2fd22172abc75368be3a896f43b179 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Mon, 29 Apr 2024 13:53:05 -0700 Subject: [PATCH 10/11] Flesh out proposal and docs --- Documentation/system-string.md | 231 ++++++++++++++++++ Sources/System/FilePath/FilePath.swift | 2 +- .../System/FilePath/FilePathComponents.swift | 8 +- Sources/System/Internals/Exports.swift | 6 - 4 files changed, 238 insertions(+), 9 deletions(-) diff --git a/Documentation/system-string.md b/Documentation/system-string.md index b1cdaa99..a5638482 100644 --- a/Documentation/system-string.md +++ b/Documentation/system-string.md @@ -23,6 +23,237 @@ We introduce `SystemString`, which supports OS-native string operations. `System ## Detailed design +`SystemChar` is a raw wrapper around `CChar` on Linux and Darwin, and a `UInt16`on Windows. It is layout compatible with those types and exposes convenience interfaces for getting common values. + +```swift +/// A platform-native character representation. +/// +/// A SystemChar is a `CChar` on Linux and Darwin, and a `UInt16` on Windows. +/// +/// Note that no particular encoding is assumed. +@frozen +public struct SystemChar: + RawRepresentable, Sendable, Comparable, Hashable, Codable { + public typealias RawValue = CInterop.PlatformChar + + public var rawValue: RawValue + + @inlinable + public init(rawValue: RawValue) + + @_alwaysEmitIntoClient + public init(_ rawValue: RawValue) + + @inlinable + public static func < (lhs: SystemChar, rhs: SystemChar) -> Bool +} + +extension SystemChar { + /// Create a SystemChar from an ASCII scalar. + @_alwaysEmitIntoClient + public init(ascii: Unicode.Scalar) + + /// Cast `x` to a `SystemChar` + @_alwaysEmitIntoClient + public init(_ x: some FixedWidthInteger) + + /// The NULL character `\0` + @_alwaysEmitIntoClient + public static var null: SystemChar { get } + + /// The slash character `/` + @_alwaysEmitIntoClient + public static var slash: SystemChar { get } + + /// The backslash character `\` + @_alwaysEmitIntoClient + public static var backslash: SystemChar { get } + + /// The dot character `.` + @_alwaysEmitIntoClient + public static var dot: SystemChar { get } + + /// The colon character `:` + @_alwaysEmitIntoClient + public static var colon: SystemChar { get } + + /// The question mark character `?` + @_alwaysEmitIntoClient + public static var question: SystemChar { get } + + /// Returns `self` as a `Unicode.Scalar` if ASCII, else `nil` + @_alwaysEmitIntoClient + public var asciiScalar: Unicode.Scalar? { get } + + /// Whether `self` is ASCII + @_alwaysEmitIntoClient + public var isASCII: Bool { get } + + /// Whether `self` is an ASCII letter, i.e. in `[a-zA-Z]` + @_alwaysEmitIntoClient + public var isASCIILetter: Bool { get } +} +``` + +`SystemString` is a `RangeReplaceableCollection` of `SystemChar`s and ensures that it is always `NULL`-terminated. The `NULL` is not considered part of the count and the string must not contain `NULL`s inside of it. + +```swift + +/// A platform-native string representation. A `SystemString` is a collection +/// of non-NULL `SystemChar`s followed by a NULL terminator. +/// +/// TODO: example use or two, showing that innards are not NULL, but there's +/// always a null at the end, NULL is not part of the count +@frozen +public struct SystemString: Sendable { + public typealias Storage = [SystemChar] + + @usableFromInline + internal var _nullTerminatedStorage: Storage + + /// Access the back storage, including the null terminator. Note that + /// `nullTerminatedStorage.count == self.count + 1`, due + /// to the null terminator. + @_alwaysEmitIntoClient + public var nullTerminatedStorage: Storage { get } +} + +extension SystemString { + /// Create an empty `SystemString` + public init() + + /// Create a `SystemString` from a collection of `SystemChar`s. + /// A NULL terminator will be added if `chars` lacks one. `chars` must not + /// include any interior NULLs. + @_alwaysEmitIntoClient + public init(_ chars: C) where C.Element == SystemChar +} + +extension SystemString: RandomAccessCollection, MutableCollection { + public typealias Element = SystemChar + public typealias Index = Int + public typealias Indices = Range + + @inlinable + public var startIndex: Index { get } + + @inlinable + public var endIndex: Index { get } + + @inlinable + public subscript(position: Index) -> SystemChar { + _read, set + } +} +extension SystemString: RangeReplaceableCollection { + @inlinable + public mutating func replaceSubrange( + _ subrange: Range, with newElements: C + ) where C.Element == SystemChar + + @inlinable + public mutating func reserveCapacity(_ n: Int) + + @inlinable + public func withContiguousStorageIfAvailable( + _ body: (UnsafeBufferPointer) throws -> R + ) rethrows -> R? +} + +extension SystemString: Hashable, Codable {} + +extension SystemString: ExpressibleByStringLiteral { + public init(stringLiteral: String) + + public init(_ string: String) +} + +extension SystemString: CustomStringConvertible, CustomDebugStringConvertible { + + public var description: String { get } + + public var debugDescription: String { get } +} + +extension SystemString { + /// Creates a `SystemString` by copying bytes from a null-terminated platform string. + /// + /// - Parameter platformString: A pointer to a null-terminated platform string. + public init(platformString: UnsafePointer) + + /// Calls the given closure with a pointer to the contents of the sytem string, + /// represented as a null-terminated platform string. + /// + /// - Parameter body: A closure with a pointer parameter + /// that points to a null-terminated platform string. + /// If `body` has a return value, + /// that value is also used as the return value for this method. + /// - Returns: The return value, if any, of the `body` closure parameter. + /// + /// The pointer passed as an argument to `body` is valid + /// only during the execution of this method. + /// Don't try to store the pointer for later use. + public func withPlatformString( + _ f: (UnsafePointer) throws -> T + ) rethrows -> T +} + +``` + +You can create `String`s from `SystemString`, either decoding them (i.e. performing Unicode error correction on the contents) or validating them (i.e. returning `nil` if invalidly-encoded Unicode content). + +```swift + +extension String { + /// Creates a string by interpreting `str`'s content as UTF-8 on Unix + /// and UTF-16 on Windows. + /// + /// - Parameter str: The system string to be interpreted as + /// `CInterop.PlatformUnicodeEncoding`. + /// + /// If the content of the system string isn't a well-formed Unicode string, + /// this initializer replaces invalid bytes with U+FFFD. + /// This means that conversion to a string and back to a system string + /// might result in a value that's different from the original system string. + public init(decoding str: SystemString) + + /// Creates a string from a system string, validating its contents as UTF-8 on + /// Unix and UTF-16 on Windows. + /// + /// - Parameter str: The system string to be interpreted as + /// `CInterop.PlatformUnicodeEncoding`. + /// + /// If the contents of the system string isn't well-formed Unicode, + /// this initializer returns `nil`. + public init?(validating str: SystemString) +} + + +``` + +You can create a `FilePath`, `FilePath.Root`, and `FilePath.Component` from a `SystemString`. + +```swift +extension FilePath { + /// Create a `FilePath` with the contents of `str`, normalizing separators. + public init(_ str: SystemString) +} + +extension FilePath.Component { + /// Create a `FilePath.Component` with the contents of `str`. + /// + /// Returns `nil` if `str` is empty or contains the directory separator. + public init?(_ str: SystemString) +} + +extension FilePath.Root { + /// Create a `FilePath.Root` with the contents of `str`. + /// + /// Returns `nil` if `str` is empty or is not a root + public init?(_ str: SystemString) +} + +``` ## Source compatibility diff --git a/Sources/System/FilePath/FilePath.swift b/Sources/System/FilePath/FilePath.swift index 7c2764db..f3a9cfa7 100644 --- a/Sources/System/FilePath/FilePath.swift +++ b/Sources/System/FilePath/FilePath.swift @@ -53,7 +53,7 @@ public struct FilePath: Sendable { // separators. All other initializers should be implemented by // ultimately deferring to a normalizing init. - /// TODO: docs + /// Create a `FilePath` with the contents of `str`, normalizing separators. public init(_ str: SystemString) { self._storage = str self._normalizeSeparators() diff --git a/Sources/System/FilePath/FilePathComponents.swift b/Sources/System/FilePath/FilePathComponents.swift index 22f043e2..4ffb822a 100644 --- a/Sources/System/FilePath/FilePathComponents.swift +++ b/Sources/System/FilePath/FilePathComponents.swift @@ -189,7 +189,9 @@ internal func _makeExtension(_ ext: String) -> SystemString { @available(/*System 0.0.2: macOS 12.0, iOS 15.0, watchOS 8.0, tvOS 15.0*/iOS 8, *) extension FilePath.Component { - /// TODO: docs + /// Create a `FilePath.Component` with the contents of `str`. + /// + /// Returns `nil` if `str` is empty or contains the directory separator. public init?(_ str: SystemString) { // FIXME: explicit null root? Or something else? let path = FilePath(str) @@ -203,7 +205,9 @@ extension FilePath.Component { @available(/*System 0.0.2: macOS 12.0, iOS 15.0, watchOS 8.0, tvOS 15.0*/iOS 8, *) extension FilePath.Root { - /// TODO: docs + /// Create a `FilePath.Root` with the contents of `str`. + /// + /// Returns `nil` if `str` is empty or is not a root public init?(_ str: SystemString) { // FIXME: explicit null root? Or something else? let path = FilePath(str) diff --git a/Sources/System/Internals/Exports.swift b/Sources/System/Internals/Exports.swift index d1283197..cd91ea21 100644 --- a/Sources/System/Internals/Exports.swift +++ b/Sources/System/Internals/Exports.swift @@ -101,12 +101,6 @@ internal func system_memset( // Interop between String and platfrom string extension String { - internal func _withPlatformString( - _ body: (UnsafePointer) throws -> Result - ) rethrows -> Result { - fatalError() - } - internal init?(_platformString platformString: UnsafePointer) { // Need to #if because CChar may be signed #if os(Windows) From 1faa91bdae9122540da3ce1e0e711287df545ac3 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Mon, 29 Apr 2024 14:06:31 -0700 Subject: [PATCH 11/11] Explain platform string vs system string --- Documentation/system-string.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Documentation/system-string.md b/Documentation/system-string.md index a5638482..6478d750 100644 --- a/Documentation/system-string.md +++ b/Documentation/system-string.md @@ -266,11 +266,15 @@ This proposal is additive and ABI-compatible with existing code. ## Alternatives considered -**TODO**: Consider not having `SystemChar` +### The role of `SystemString` and `PlatformString` -**TODO**: Consider separate `SystemByteString` and `SystemBytePairString` types. +`PlatformString` is a typealias for the corresponding `UnsafePointer` you'd use with a raw syscall. `SystemString` is an owning type which provides memory safety, COW-semantics, and allows mutations and range-replaceable operations (such as appending). You can always get a `PlatformString` directly from a `SystemString`, and you can construct (by copying the memory) a `SystemString` from a `PlatformString`. -**TODO**: Why we don't want to have a single-byte ASCII representation on Windows and have syscall wrapper adjust/dispatch appropriately. +### **TODO** + +- Consider not having `SystemChar` +- Consider separate `SystemByteString` and `SystemBytePairString` types. +- Why we don't want to have a single-byte ASCII representation on Windows and have syscall wrapper adjust/dispatch appropriately. ## Future directions