Skip to content

Commit

Permalink
ICU-22689 Add PPUCD-based data driven test for binary props
Browse files Browse the repository at this point in the history
  • Loading branch information
echeran committed Mar 20, 2024
1 parent 7a3dfe8 commit 1be8612
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 18 deletions.
93 changes: 93 additions & 0 deletions icu4c/source/test/intltest/ucdtest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
#include "unicode/putil.h"
#include "unicode/uscript.h"
#include "unicode/uset.h"
#include "charstr.h"
#include "cstring.h"
#include "hash.h"
#include "patternprops.h"
#include "ppucd.h"
#include "normalizer2impl.h"
#include "testutil.h"
#include "uparse.h"
Expand Down Expand Up @@ -80,6 +82,7 @@ void UnicodeTest::runIndexedTest( int32_t index, UBool exec, const char* &name,
TESTCASE_AUTO(TestPropertyNames);
TESTCASE_AUTO(TestIDSUnaryOperator);
TESTCASE_AUTO(TestIDCompatMath);
TESTCASE_AUTO(TestBinaryPropertyUsingPpucd);
TESTCASE_AUTO_END;
}

Expand Down Expand Up @@ -1024,3 +1027,93 @@ void UnicodeTest::TestIDCompatMath() {
assertTrue("idcmStart.contains(U+1D7C3)", idcmStart.contains(0x1D7C3));
assertFalse("idcmStart.contains(U+1D7C4)", idcmStart.contains(0x1D7C4));
}


U_NAMESPACE_BEGIN

class BuiltInPropertyNames : public PropertyNames {
public:
~BuiltInPropertyNames() override {}

int32_t getPropertyEnum(const char *name) const override {
return u_getPropertyEnum(name);
}

int32_t getPropertyValueEnum(int32_t property, const char *name) const override {
return u_getPropertyValueEnum((UProperty) property, name);
}
};

U_NAMESPACE_END

void UnicodeTest::TestBinaryPropertyUsingPpucd() {
IcuTestErrorCode errorCode(*this, "TestBinaryPropertyUsingPpucd()");

// Initialize PPUCD parsing object using file in repo and using
// property names present in built-in data in ICU
char buffer[500];
// get path to `source/data/unidata/` including trailing `/`
char *unidataPath = getUnidataPath(buffer);
if(unidataPath == nullptr) {
errln("exiting early because unable to open ppucd.txt from ICU source tree");
return;
}
CharString ppucdPath(unidataPath, errorCode);
ppucdPath.appendPathPart("ppucd.txt", errorCode);
PreparsedUCD ppucd(ppucdPath.data(), errorCode);
if(errorCode.isFailure()) {
errln("unable to open %s - %s\n",
ppucdPath.data(), errorCode.errorName());
return;
}
BuiltInPropertyNames builtInPropNames;
ppucd.setPropertyNames(&builtInPropNames);

// Define which binary properties we want to compare
constexpr UProperty propsUnderTest[] = {
UCHAR_IDS_UNARY_OPERATOR,
UCHAR_ID_COMPAT_MATH_START,
UCHAR_ID_COMPAT_MATH_CONTINUE,
};

// Allocate & initialize UnicodeSets per binary property from PPUCD data
UnicodeSet ppucdPropSets[std::size(propsUnderTest)];

// Iterate through PPUCD file, accumulating each line's data into each UnicodeSet per property
PreparsedUCD::LineType lineType;
UnicodeSet newValues;
while((lineType=ppucd.readLine(errorCode))!=PreparsedUCD::NO_LINE && errorCode.isSuccess()) {
if(ppucd.lineHasPropertyValues()) {
const UniProps *lineProps=ppucd.getProps(newValues, errorCode);

for(uint32_t i = 0; i < std::size(propsUnderTest); i++) {
UProperty prop = propsUnderTest[i];
if (!newValues.contains(prop)) {
continue;
}
if (lineProps->binProps[prop]) {
ppucdPropSets[i].add(lineProps->start, lineProps->end);
} else {
ppucdPropSets[i].remove(lineProps->start, lineProps->end);
}
}
}
}

if(errorCode.isFailure()) {
errln("exiting early due to parsing error");
return;
}

// Assert that the PPUCD data and the ICU data are equivalent for all properties
for(uint32_t i = 0; i < std::size(propsUnderTest); i++) {
UnicodeSet icuPropSet;
UProperty prop = propsUnderTest[i];
icuPropSet.applyIntPropertyValue(prop, 1, errorCode);
std::string msg =
std::string()
+ "ICU & PPUCD versions of property "
+ u_getPropertyName(prop, U_LONG_PROPERTY_NAME);
assertTrue(msg.c_str(), ppucdPropSets[i] == icuPropSet);
}
}
1 change: 1 addition & 0 deletions icu4c/source/test/intltest/ucdtest.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class UnicodeTest: public IntlTest {
void TestPropertyNames();
void TestIDSUnaryOperator();
void TestIDCompatMath();
void TestBinaryPropertyUsingPpucd();

private:

Expand Down
18 changes: 0 additions & 18 deletions icu4c/source/tools/toolutil/ppucd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,24 +29,6 @@ U_NAMESPACE_BEGIN

PropertyNames::~PropertyNames() {}

// TODO: Create a concrete subclass for the default PropertyNames implementation
// using the ICU library built-in property names API & data.
// Currently only the genprops tool uses PreparsedUCD, and provides its own
// PropertyNames implementation using its just-build property names data and its own code.
// At some point, we should use PreparsedUCD in tests, and then we will need the
// default implementation somewhere.
#if 0
int32_t
PropertyNames::getPropertyEnum(const char *name) const {
return u_getPropertyEnum(name);
}

int32_t
PropertyNames::getPropertyValueEnum(int32_t property, const char *name) const {
return u_getPropertyValueEnum((UProperty)property, name);
}
#endif

UniProps::UniProps()
: start(U_SENTINEL), end(U_SENTINEL),
bmg(U_SENTINEL), bpb(U_SENTINEL),
Expand Down

0 comments on commit 1be8612

Please sign in to comment.