diff --git a/icu4c/source/test/intltest/ucdtest.cpp b/icu4c/source/test/intltest/ucdtest.cpp index a63b86e21642..54e1c8751bbd 100644 --- a/icu4c/source/test/intltest/ucdtest.cpp +++ b/icu4c/source/test/intltest/ucdtest.cpp @@ -12,9 +12,11 @@ #include "unicode/putil.h" #include "unicode/uscript.h" #include "unicode/uset.h" +#include "charstr.h" #include "cstring.h" #include "hash.h" #include "patternprops.h" +#include "ppucd.h" #include "normalizer2impl.h" #include "testutil.h" #include "uparse.h" @@ -80,6 +82,7 @@ void UnicodeTest::runIndexedTest( int32_t index, UBool exec, const char* &name, TESTCASE_AUTO(TestPropertyNames); TESTCASE_AUTO(TestIDSUnaryOperator); TESTCASE_AUTO(TestIDCompatMath); + TESTCASE_AUTO(TestBinaryPropertyUsingPpucd); TESTCASE_AUTO_END; } @@ -1024,3 +1027,93 @@ void UnicodeTest::TestIDCompatMath() { assertTrue("idcmStart.contains(U+1D7C3)", idcmStart.contains(0x1D7C3)); assertFalse("idcmStart.contains(U+1D7C4)", idcmStart.contains(0x1D7C4)); } + + +U_NAMESPACE_BEGIN + +class BuiltInPropertyNames : public PropertyNames { +public: + ~BuiltInPropertyNames() override {} + + int32_t getPropertyEnum(const char *name) const override { + return u_getPropertyEnum(name); + } + + int32_t getPropertyValueEnum(int32_t property, const char *name) const override { + return u_getPropertyValueEnum((UProperty) property, name); + } +}; + +U_NAMESPACE_END + +void UnicodeTest::TestBinaryPropertyUsingPpucd() { + IcuTestErrorCode errorCode(*this, "TestBinaryPropertyUsingPpucd()"); + + // Initialize PPUCD parsing object using file in repo and using + // property names present in built-in data in ICU + char buffer[500]; + // get path to `source/data/unidata/` including trailing `/` + char *unidataPath = getUnidataPath(buffer); + if(unidataPath == nullptr) { + errln("exiting early because unable to open ppucd.txt from ICU source tree"); + return; + } + CharString ppucdPath(unidataPath, errorCode); + ppucdPath.appendPathPart("ppucd.txt", errorCode); + PreparsedUCD ppucd(ppucdPath.data(), errorCode); + if(errorCode.isFailure()) { + errln("unable to open %s - %s\n", + ppucdPath.data(), errorCode.errorName()); + return; + } + BuiltInPropertyNames builtInPropNames; + ppucd.setPropertyNames(&builtInPropNames); + + // Define which binary properties we want to compare + constexpr UProperty propsUnderTest[] = { + UCHAR_IDS_UNARY_OPERATOR, + UCHAR_ID_COMPAT_MATH_START, + UCHAR_ID_COMPAT_MATH_CONTINUE, + }; + + // Allocate & initialize UnicodeSets per binary property from PPUCD data + UnicodeSet ppucdPropSets[std::size(propsUnderTest)]; + + // Iterate through PPUCD file, accumulating each line's data into each UnicodeSet per property + PreparsedUCD::LineType lineType; + UnicodeSet newValues; + while((lineType=ppucd.readLine(errorCode))!=PreparsedUCD::NO_LINE && errorCode.isSuccess()) { + if(ppucd.lineHasPropertyValues()) { + const UniProps *lineProps=ppucd.getProps(newValues, errorCode); + + for(uint32_t i = 0; i < std::size(propsUnderTest); i++) { + UProperty prop = propsUnderTest[i]; + if (!newValues.contains(prop)) { + continue; + } + if (lineProps->binProps[prop]) { + ppucdPropSets[i].add(lineProps->start, lineProps->end); + } else { + ppucdPropSets[i].remove(lineProps->start, lineProps->end); + } + } + } + } + + if(errorCode.isFailure()) { + errln("exiting early due to parsing error"); + return; + } + + // Assert that the PPUCD data and the ICU data are equivalent for all properties + for(uint32_t i = 0; i < std::size(propsUnderTest); i++) { + UnicodeSet icuPropSet; + UProperty prop = propsUnderTest[i]; + icuPropSet.applyIntPropertyValue(prop, 1, errorCode); + std::string msg = + std::string() + + "ICU & PPUCD versions of property " + + u_getPropertyName(prop, U_LONG_PROPERTY_NAME); + assertTrue(msg.c_str(), ppucdPropSets[i] == icuPropSet); + } +} \ No newline at end of file diff --git a/icu4c/source/test/intltest/ucdtest.h b/icu4c/source/test/intltest/ucdtest.h index 6c83744c0582..7571da46cf95 100644 --- a/icu4c/source/test/intltest/ucdtest.h +++ b/icu4c/source/test/intltest/ucdtest.h @@ -52,6 +52,7 @@ class UnicodeTest: public IntlTest { void TestPropertyNames(); void TestIDSUnaryOperator(); void TestIDCompatMath(); + void TestBinaryPropertyUsingPpucd(); private: diff --git a/icu4c/source/tools/toolutil/ppucd.cpp b/icu4c/source/tools/toolutil/ppucd.cpp index 0d59b28ce48a..688c54e4549d 100644 --- a/icu4c/source/tools/toolutil/ppucd.cpp +++ b/icu4c/source/tools/toolutil/ppucd.cpp @@ -29,24 +29,6 @@ U_NAMESPACE_BEGIN PropertyNames::~PropertyNames() {} -// TODO: Create a concrete subclass for the default PropertyNames implementation -// using the ICU library built-in property names API & data. -// Currently only the genprops tool uses PreparsedUCD, and provides its own -// PropertyNames implementation using its just-build property names data and its own code. -// At some point, we should use PreparsedUCD in tests, and then we will need the -// default implementation somewhere. -#if 0 -int32_t -PropertyNames::getPropertyEnum(const char *name) const { - return u_getPropertyEnum(name); -} - -int32_t -PropertyNames::getPropertyValueEnum(int32_t property, const char *name) const { - return u_getPropertyValueEnum((UProperty)property, name); -} -#endif - UniProps::UniProps() : start(U_SENTINEL), end(U_SENTINEL), bmg(U_SENTINEL), bpb(U_SENTINEL),