Skip to content

Commit

Permalink
Merge pull request swiftlang#73585 from kubamracek/embedded-string-un…
Browse files Browse the repository at this point in the history
…icode-tables

[embedded] Provide Unicode data tables for embedded as a static library
  • Loading branch information
kubamracek authored May 28, 2024
2 parents 5b67c2f + f63f132 commit 67e9df0
Show file tree
Hide file tree
Showing 11 changed files with 234 additions and 32 deletions.
60 changes: 60 additions & 0 deletions stdlib/public/stubs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,63 @@ if("${SWIFT_PRIMARY_VARIANT_SDK}" IN_LIST SWIFT_DARWIN_PLATFORMS)
APPEND_STRING PROPERTY COMPILE_FLAGS
"-fobjc-arc")
endif()

# Embedded Swift Unicode library
if(SWIFT_SHOULD_BUILD_EMBEDDED_STDLIB)
add_custom_target(embedded-unicode ALL)

foreach(entry ${EMBEDDED_STDLIB_TARGET_TRIPLES})
string(REGEX REPLACE "[ \t]+" ";" list "${entry}")
list(GET list 0 arch)
list(GET list 1 mod)
list(GET list 2 triple)

if("${mod}" MATCHES "-windows-msvc$")
continue()
endif()

if (SWIFT_HOST_VARIANT STREQUAL "linux")
set(extra_c_compile_flags -ffreestanding)
elseif (SWIFT_HOST_VARIANT STREQUAL "macosx")
set(extra_c_compile_flags -D__MACH__ -D__APPLE__ -ffreestanding)
endif()

set(SWIFT_SDK_embedded_ARCH_${mod}_MODULE "${mod}")
set(SWIFT_SDK_embedded_LIB_SUBDIR "embedded")
set(SWIFT_SDK_embedded_ARCH_${mod}_TRIPLE "${triple}")

add_swift_target_library_single(
embedded-unicode-${mod}
swiftUnicodeDataTables
STATIC
IS_FRAGILE

Unicode/UnicodeData.cpp
Unicode/UnicodeGrapheme.cpp
Unicode/UnicodeNormalization.cpp
Unicode/UnicodeScalarProps.cpp
Unicode/UnicodeWord.cpp

C_COMPILE_FLAGS ${extra_c_compile_flags}
MODULE_DIR "${CMAKE_BINARY_DIR}/lib/swift/embedded"
SDK "embedded"
ARCHITECTURE "${mod}"
DEPENDS embedded-stdlib-${mod}
INSTALL_IN_COMPONENT stdlib
)
swift_install_in_component(
TARGETS embedded-unicode-${mod}
DESTINATION "lib/swift/embedded/${mod}"
COMPONENT "stdlib"
)
swift_install_in_component(
FILES "${SWIFTLIB_DIR}/embedded/${mod}/libswiftUnicodeDataTables.a"
DESTINATION "lib/swift/embedded/${mod}/"
COMPONENT "stdlib"
PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE
)
set_property(TARGET embedded-unicode-${mod} PROPERTY OSX_ARCHITECTURES "${arch}")

add_dependencies(embedded-unicode embedded-unicode-${mod})
endforeach()
endif()
8 changes: 4 additions & 4 deletions stdlib/public/stubs/Unicode/UnicodeData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//

#include "swift/shims/UnicodeData.h"
#include <limits>
#include <stdint.h>

// Every 4 byte chunks of data that we need to hash (in this case only ever
// scalars and levels who are all uint32), we need to calculate K. At the end
Expand Down Expand Up @@ -162,7 +162,7 @@ __swift_intptr_t _swift_stdlib_getScalarBitArrayIdx(__swift_uint32_t scalar,
// If our chunk index is larger than the quick look indices, then it means
// our scalar appears in chunks who are all 0 and trailing.
if ((__swift_uint64_t) idx > quickLookSize - 1) {
return std::numeric_limits<__swift_intptr_t>::max();
return INTPTR_MAX;
}

// Our scalar actually exists in a quick look bit array that was implemented.
Expand All @@ -172,7 +172,7 @@ __swift_intptr_t _swift_stdlib_getScalarBitArrayIdx(__swift_uint32_t scalar,
// (chunkSize) of the scalars being represented have no property and ours is
// one of them.
if ((quickLook & ((__swift_uint64_t) 1 << chunkBit)) == 0) {
return std::numeric_limits<__swift_intptr_t>::max();
return INTPTR_MAX;
}

// Ok, our scalar failed the quick look check. Go lookup our scalar in the
Expand Down Expand Up @@ -223,7 +223,7 @@ __swift_intptr_t _swift_stdlib_getScalarBitArrayIdx(__swift_uint32_t scalar,
// If our scalar specifically is not turned on within our chunk's bit array,
// then we know for sure that our scalar does not inhibit this property.
if ((chunkWord & ((__swift_uint64_t) 1 << scalarSpecificBit)) == 0) {
return std::numeric_limits<__swift_intptr_t>::max();
return INTPTR_MAX;
}

// Otherwise, this scalar does have whatever property this scalar array is
Expand Down
4 changes: 2 additions & 2 deletions stdlib/public/stubs/Unicode/UnicodeGrapheme.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include "swift/Runtime/Debug.h"
#endif
#include "swift/shims/UnicodeData.h"
#include <limits>
#include <stdint.h>


SWIFT_RUNTIME_STDLIB_INTERNAL
Expand Down Expand Up @@ -68,7 +68,7 @@ __swift_bool _swift_stdlib_isLinkingConsonant(__swift_uint32_t scalar) {
_swift_stdlib_linkingConsonant,
_swift_stdlib_linkingConsonant_ranks);

if (idx == std::numeric_limits<__swift_intptr_t>::max()) {
if (idx == INTPTR_MAX) {
return false;
}

Expand Down
8 changes: 4 additions & 4 deletions stdlib/public/stubs/Unicode/UnicodeNormalization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#endif

#include "swift/shims/UnicodeData.h"
#include <limits>
#include <stdint.h>

SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint16_t _swift_stdlib_getNormData(__swift_uint32_t scalar) {
Expand All @@ -42,7 +42,7 @@ __swift_uint16_t _swift_stdlib_getNormData(__swift_uint32_t scalar) {

// If we don't have an index into the data indices, then this scalar has no
// normalization information.
if (dataIdx == std::numeric_limits<__swift_intptr_t>::max()) {
if (dataIdx == INTPTR_MAX) {
return 0;
}

Expand Down Expand Up @@ -91,7 +91,7 @@ __swift_uint32_t _swift_stdlib_getComposition(__swift_uint32_t x,
auto realY = (array[0] << 11) >> 11;

if (y != realY) {
return std::numeric_limits<__swift_uint32_t>::max();
return UINT32_MAX;
}

auto count = array[0] >> 21;
Expand Down Expand Up @@ -134,6 +134,6 @@ __swift_uint32_t _swift_stdlib_getComposition(__swift_uint32_t x,
// If we made it out here, then our scalar was not found in the composition
// array.
// Return the max here to indicate that we couldn't find one.
return std::numeric_limits<__swift_uint32_t>::max();
return UINT32_MAX;
#endif
}
20 changes: 10 additions & 10 deletions stdlib/public/stubs/Unicode/UnicodeScalarProps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#endif

#include "swift/shims/UnicodeData.h"
#include <limits>
#include <stdint.h>

SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint64_t _swift_stdlib_getBinaryProperties(__swift_uint32_t scalar) {
Expand Down Expand Up @@ -124,7 +124,7 @@ __swift_uint8_t _swift_stdlib_getNumericType(__swift_uint32_t scalar) {
// If we made it out here, then our scalar was not found in the composition
// array.
// Return the max here to indicate that we couldn't find one.
return std::numeric_limits<__swift_uint8_t>::max();
return UINT8_MAX;
#endif
}

Expand Down Expand Up @@ -153,7 +153,7 @@ const char *_swift_stdlib_getNameAlias(__swift_uint32_t scalar) {
_swift_stdlib_nameAlias,
_swift_stdlib_nameAlias_ranks);

if (dataIdx == std::numeric_limits<__swift_intptr_t>::max()) {
if (dataIdx == INTPTR_MAX) {
return nullptr;
}

Expand All @@ -171,7 +171,7 @@ __swift_int32_t _swift_stdlib_getMapping(__swift_uint32_t scalar,
_swift_stdlib_mappings,
_swift_stdlib_mappings_ranks);

if (dataIdx == std::numeric_limits<__swift_intptr_t>::max()) {
if (dataIdx == INTPTR_MAX) {
return 0;
}

Expand Down Expand Up @@ -219,7 +219,7 @@ const __swift_uint8_t *_swift_stdlib_getSpecialMapping(__swift_uint32_t scalar,
_swift_stdlib_special_mappings,
_swift_stdlib_special_mappings_ranks);

if (dataIdx == std::numeric_limits<__swift_intptr_t>::max()) {
if (dataIdx == INTPTR_MAX) {
return nullptr;
}

Expand Down Expand Up @@ -261,7 +261,7 @@ __swift_intptr_t _swift_stdlib_getScalarName(__swift_uint32_t scalar,
#else
auto setOffset = _swift_stdlib_names_scalar_sets[scalar >> 7];

if (setOffset == std::numeric_limits<__swift_uint16_t>::max()) {
if (setOffset == UINT16_MAX) {
return 0;
}

Expand Down Expand Up @@ -385,7 +385,7 @@ __swift_uint16_t _swift_stdlib_getAge(__swift_uint32_t scalar) {
// If we made it out here, then our scalar was not found in the composition
// array.
// Return the max here to indicate that we couldn't find one.
return std::numeric_limits<__swift_uint16_t>::max();
return UINT16_MAX;
#endif
}

Expand Down Expand Up @@ -427,7 +427,7 @@ __swift_uint8_t _swift_stdlib_getGeneralCategory(__swift_uint32_t scalar) {
// If we made it out here, then our scalar was not found in the composition
// array.
// Return the max here to indicate that we couldn't find one.
return std::numeric_limits<__swift_uint8_t>::max();
return UINT8_MAX;
#endif
}

Expand Down Expand Up @@ -485,7 +485,7 @@ __swift_uint8_t _swift_stdlib_getScript(__swift_uint32_t scalar) {
// all in the array. This should never happen because the array represents all
// scalars from 0x0 to 0x10FFFF, but if somehow this branch gets reached,
// return 255 to indicate a failure.
return std::numeric_limits<__swift_uint8_t>::max();
return UINT8_MAX;
#endif
}

Expand All @@ -501,7 +501,7 @@ const __swift_uint8_t *_swift_stdlib_getScriptExtensions(__swift_uint32_t scalar

// If we don't have an index into the data indices, then this scalar has no
// script extensions
if (dataIdx == std::numeric_limits<__swift_intptr_t>::max()) {
if (dataIdx == INTPTR_MAX) {
return 0;
}

Expand Down
4 changes: 2 additions & 2 deletions stdlib/public/stubs/Unicode/UnicodeWord.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include "swift/Runtime/Debug.h"
#endif
#include "swift/shims/UnicodeData.h"
#include <limits>
#include <stdint.h>

SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint8_t _swift_stdlib_getWordBreakProperty(__swift_uint32_t scalar) {
Expand Down Expand Up @@ -46,6 +46,6 @@ __swift_uint8_t _swift_stdlib_getWordBreakProperty(__swift_uint32_t scalar) {
// If we made it out here, then our scalar was not found in the word
// array (this occurs when a scalar doesn't map to any word break
// property). Return the max value here to indicate .any.
return std::numeric_limits<__swift_uint8_t>::max();
return UINT8_MAX;
#endif
}
23 changes: 13 additions & 10 deletions test/embedded/stdlib-strings-datatables.swift
Original file line number Diff line number Diff line change
@@ -1,25 +1,28 @@
// Test String operations that require unicode data tables. This is not an executable test yet, because the data tables
// are not available for linking yet.

// RUN: %target-swift-frontend -emit-ir %s -enable-experimental-feature Embedded
// RUN: %target-run-simple-swift( -enable-experimental-feature Embedded -runtime-compatibility-version none -wmo -Xlinker %swift_obj_root/lib/swift/embedded/%target-cpu-apple-macos/libswiftUnicodeDataTables.a) | %FileCheck %s
// RUN: %target-run-simple-swift(-Osize -Xlinker -dead_strip -enable-experimental-feature Embedded -runtime-compatibility-version none -wmo -Xlinker %swift_obj_root/lib/swift/embedded/%target-cpu-apple-macos/libswiftUnicodeDataTables.a) | %FileCheck %s

// REQUIRES: swift_in_compiler
// REQUIRES: executable_test
// REQUIRES: optimized_stdlib
// REQUIRES: OS=macosx || OS=linux-gnu
// REQUIRES: OS=macosx

public func test1() {
let string = "string"
let other = "other"
let appended = string + other
_ = appended
print(appended) // CHECK: stringother

let _ = "aa" == "bb"
let dict: [String:Int] = [:]
_ = dict
var dict: [String:Int] = [:]
dict["aa"] = 42
print(dict["aa"]!) // CHECK: 42

let _ = "aaa".uppercased()
let u = "aaa".uppercased()
print(u) // CHECK: AAA

let space: Character = " "
let split = appended.split(separator: space)
_ = split
print(split[0]) // CHECK: stringother
}

test1()
39 changes: 39 additions & 0 deletions test/embedded/stdlib-strings-unicode.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// RUN: %target-run-simple-swift( -enable-experimental-feature Embedded -parse-as-library -runtime-compatibility-version none -wmo -Xlinker %swift_obj_root/lib/swift/embedded/%target-cpu-apple-macos/libswiftUnicodeDataTables.a) | %FileCheck %s
// RUN: %target-run-simple-swift(-Osize -Xlinker -dead_strip -enable-experimental-feature Embedded -parse-as-library -runtime-compatibility-version none -wmo -Xlinker %swift_obj_root/lib/swift/embedded/%target-cpu-apple-macos/libswiftUnicodeDataTables.a) | %FileCheck %s

// REQUIRES: swift_in_compiler
// REQUIRES: executable_test
// REQUIRES: optimized_stdlib
// REQUIRES: OS=macosx

@main
struct Main {
static func main() {
let str = "Hello😊"
print(str) // CHECK: Hello😊
print(str.dropLast()) // CHECK: Hello
print(str.dropLast().count) // CHECK: 5

var dict: [String:String] = [:]
let c = "Cafe\u{301}"
let d = "Cafe\u{301}"
let e = "Café"
let f = "Caf\u{65}\u{301}"
let g = "Caf\u{e9}"
dict[c] = str
dict[d] = str
dict[e] = str
dict[f] = str
dict[g] = str
print(dict.count) // CHECK: 1
print(dict[f]!) // CHECK: Hello😊

var emoji = ""
// VAMPIRE, ZERO-WIDTH JOINER, FEMALE SIGN, VARIATION SELECTOR-16
emoji += "\u{1f9db}"
emoji += "\u{200d}"
emoji += "\u{2640}"
emoji += "\u{fe0f}"
print(emoji.count) // CHECK: 1
}
}
40 changes: 40 additions & 0 deletions test/embedded/unicode-dead-strip1.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// RUN: %target-swift-frontend -Osize -parse-as-library -enable-experimental-feature Embedded %s -c -o %t/a.o
// RUN: %target-clang %t/a.o -o %t/a.out -dead_strip %swift_obj_root/lib/swift/embedded/%target-cpu-apple-macos/libswiftUnicodeDataTables.a
// RUN: %llvm-nm --defined-only --format=just-symbols --demangle %t/a.out | grep swift_stdlib_ | sort | %FileCheck %s --check-prefix=INCLUDES
// RUN: %llvm-nm --defined-only --format=just-symbols --demangle %t/a.out | grep swift_stdlib_ | sort | %FileCheck %s --check-prefix=EXCLUDES

// REQUIRES: swift_in_compiler
// REQUIRES: optimized_stdlib
// REQUIRES: OS=macosx

@main
struct Main {
static func main() {
var dict: [String:String] = [:]
let c = "Cafe\u{301}"
let d = "Cafe\u{301}"
let e = "Café"
let f = "Caf\u{65}\u{301}"
let g = "Caf\u{e9}"
dict[c] = "x"
dict[d] = "x"
dict[e] = "x"
dict[f] = "x"
dict[g] = "x"
print(dict.count)
print(dict[f]!)
}
}

// The code uses String equality and hashing, should need the normalization, NFC, NFD tables, and not the others.
// EXCLUDES-NOT: swift_stdlib_case
// EXCLUDES-NOT: swift_stdlib_graphemeBreakProperties
// EXCLUDES-NOT: swift_stdlib_linkingConsonant
// EXCLUDES-NOT: swift_stdlib_mappings
// EXCLUDES-NOT: swift_stdlib_names
// INCLUDES: swift_stdlib_nfc
// INCLUDES: swift_stdlib_nfd
// INCLUDES: swift_stdlib_normData
// EXCLUDES-NOT: swift_stdlib_scripts
// EXCLUDES-NOT: swift_stdlib_special_mappings
// EXCLUDES-NOT: swift_stdlib_words
Loading

0 comments on commit 67e9df0

Please sign in to comment.