Skip to content

Commit

Permalink
Make use of new JSON Toolkit regex optimisations (#225)
Browse files Browse the repository at this point in the history
Signed-off-by: Juan Cruz Viotti <[email protected]>
  • Loading branch information
jviotti authored Nov 22, 2024
1 parent 6b684b3 commit d2738f4
Show file tree
Hide file tree
Showing 7 changed files with 93 additions and 18 deletions.
2 changes: 1 addition & 1 deletion DEPENDENCIES
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
vendorpull https://github.com/sourcemeta/vendorpull dea311b5bfb53b6926a4140267959ae334d3ecf4
noa https://github.com/sourcemeta/noa caad2e1ceedf9fd1a18686a6a6d1e2b9757ead75
jsontoolkit https://github.com/sourcemeta/jsontoolkit c8c1f6bb8530b0fe0aa83bdfccb8ccb8db5e54ea
jsontoolkit https://github.com/sourcemeta/jsontoolkit 796cda46a7c43312e36920f523e15f15de4ba2f4
googletest https://github.com/google/googletest a7f443b80b105f940225332ed3c31f2790092f47
googlebenchmark https://github.com/google/benchmark 378fe693a1ef51500db21b11ff05a8018c5f0e55
jsonschema-test-suite https://github.com/json-schema-org/JSON-Schema-Test-Suite c2badb1298a8698f86dadf1aea7b44b3a894e5ac
1 change: 1 addition & 0 deletions src/compiler/compile_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ inline auto walk_subschemas(const Context &context,
context.resolver, entry.dialect};
}

// TODO: Get rid of this given the new JSON Toolkit regex optimisations
inline auto pattern_as_prefix(const std::string &pattern)
-> std::optional<std::string> {
static const std::regex starts_with_regex{R"(^\^([a-zA-Z0-9-_/]+)$)"};
Expand Down
2 changes: 1 addition & 1 deletion src/compiler/default_compiler_draft4.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ static auto parse_regex(const std::string &pattern,
const sourcemeta::jsontoolkit::URI &base,
const sourcemeta::jsontoolkit::Pointer &schema_location)
-> sourcemeta::jsontoolkit::Regex {
const auto result{sourcemeta::jsontoolkit::compile(pattern)};
const auto result{sourcemeta::jsontoolkit::to_regex(pattern)};
if (!result.has_value()) {
std::ostringstream message;
message << "Invalid regular expression: " << pattern;
Expand Down
10 changes: 5 additions & 5 deletions src/evaluator/dispatch.inc.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ switch (static_cast<InstructionIndex>(instruction.index())) {

case IS_INSTRUCTION(AssertionRegex): {
EVALUATE_BEGIN_IF_STRING(assertion, AssertionRegex);
result = validate(assertion.value.first, target);
result = matches(assertion.value.first, target);
EVALUATE_END(assertion, AssertionRegex);
}

Expand Down Expand Up @@ -846,7 +846,7 @@ switch (static_cast<InstructionIndex>(instruction.index())) {
if (std::any_of(std::get<2>(loop.value).cbegin(),
std::get<2>(loop.value).cend(),
[&entry](const auto &pattern) {
return validate(pattern.first, entry.first);
return matches(pattern.first, entry.first);
})) {
continue;
}
Expand Down Expand Up @@ -985,7 +985,7 @@ switch (static_cast<InstructionIndex>(instruction.index())) {
assert(!loop.children.empty());
result = true;
for (const auto &entry : target.as_object()) {
if (!validate(loop.value.first, entry.first)) {
if (!matches(loop.value.first, entry.first)) {
continue;
}

Expand Down Expand Up @@ -1016,7 +1016,7 @@ switch (static_cast<InstructionIndex>(instruction.index())) {
target.is_object());
result = true;
for (const auto &entry : target.as_object()) {
if (!validate(loop.value.first, entry.first)) {
if (!matches(loop.value.first, entry.first)) {
result = false;
break;
}
Expand Down Expand Up @@ -1103,7 +1103,7 @@ switch (static_cast<InstructionIndex>(instruction.index())) {
if (std::any_of(std::get<2>(loop.value).cbegin(),
std::get<2>(loop.value).cend(),
[&entry](const auto &pattern) {
return validate(pattern.first, entry.first);
return matches(pattern.first, entry.first);
})) {
continue;
}
Expand Down
2 changes: 1 addition & 1 deletion test/compiler/compiler_json_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ TEST(Compiler_json, regex_basic) {

const Instructions steps{AssertionRegex{
Pointer{}, Pointer{}, "#", 0,
ValueRegex{sourcemeta::jsontoolkit::compile("^a").value(), "^a"}}};
ValueRegex{sourcemeta::jsontoolkit::to_regex("^a").value(), "^a"}}};

const JSON result{to_json({steps, {}})};
const JSON expected{parse(R"EOF([
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

53 changes: 49 additions & 4 deletions vendor/jsontoolkit/src/regex/regex.cc

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 comments on commit d2738f4

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (macos/llvm)

Benchmark suite Current: d2738f4 Previous: 6b684b3 Ratio
Compiler_Draft6_AdaptiveCard 33160325082.999973 ns/iter 35381240959.00002 ns/iter 0.94
Compiler_2019_09_OMC_JSON_V2 6782340624.999961 ns/iter 7198528749.999923 ns/iter 0.94
Evaluator_Draft4_Meta_1_No_Callback 246.44357711463394 ns/iter 246.75124630030646 ns/iter 1.00
Evaluator_Draft4_Required_Properties 329.3661035507257 ns/iter 344.1483570845308 ns/iter 0.96
Evaluator_Draft4_Many_Optional_Properties_Minimal_Match 18.66226327365421 ns/iter 19.42382963056453 ns/iter 0.96
Evaluator_Draft4_Few_Optional_Properties_Minimal_Match 10.59049966307909 ns/iter 10.960070987083578 ns/iter 0.97
Evaluator_Draft4_Items_Schema 349.7771951197807 ns/iter 352.78889374675384 ns/iter 0.99
Evaluator_Draft4_Nested_Object 3.0326815494947033 ns/iter 3.1005003716316244 ns/iter 0.98
Evaluator_Draft4_Properties_Triad_Optional 998.387113737105 ns/iter 1002.6160991447772 ns/iter 1.00
Evaluator_Draft4_Properties_Triad_Closed 707.4182607591252 ns/iter 691.5742045767005 ns/iter 1.02
Evaluator_Draft4_Properties_Triad_Required 1001.2594901572492 ns/iter 988.6785086636701 ns/iter 1.01
Evaluator_Draft4_Properties_Closed 78.19103514609189 ns/iter 78.73299536102398 ns/iter 0.99
Evaluator_Draft4_Non_Recursive_Ref 10.428808419482893 ns/iter 10.561079226336998 ns/iter 0.99
Evaluator_Draft4_Pattern_Properties_True 1126.2062393413764 ns/iter 1136.380173398421 ns/iter 0.99
Evaluator_Draft4_Ref_To_Single_Property 11.14333214876923 ns/iter 11.19132826917999 ns/iter 1.00
Evaluator_Draft4_Additional_Properties_Type 21.12452267276355 ns/iter 22.39897902344242 ns/iter 0.94
Evaluator_Draft4_Nested_Oneof 71.96239759460946 ns/iter 67.61276110250745 ns/iter 1.06
Evaluator_Draft4_Long_Enum 28.52951591138334 ns/iter 28.361593144528573 ns/iter 1.01
Evaluator_Draft4_Type_Object 6.451300715137861 ns/iter 6.011614225132212 ns/iter 1.07
Evaluator_Draft6_Property_Names 140.2460577179822 ns/iter 135.74220057178928 ns/iter 1.03
Evaluator_Draft7_If_Then_Else 27.941771721355092 ns/iter 27.527687689178443 ns/iter 1.02
Evaluator_Draft7_Vercel_1 74422.35879552728 ns/iter 75946.87962374916 ns/iter 0.98
Evaluator_2019_09_Unevaluated_Properties 147.66188359601364 ns/iter 150.42777006406675 ns/iter 0.98
Evaluator_2019_09_OMC_JSON_V2_1 2582.1967228806216 ns/iter 2545.499373080304 ns/iter 1.01
Evaluator_2020_12_Dynamic_Ref 514.2164770763923 ns/iter 508.6388040241405 ns/iter 1.01

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (linux/llvm)

Benchmark suite Current: d2738f4 Previous: 6b684b3 Ratio
Compiler_Draft6_AdaptiveCard 58519052232.99997 ns/iter 57393789611.99998 ns/iter 1.02
Compiler_2019_09_OMC_JSON_V2 10787366524.999926 ns/iter 10719743804.000017 ns/iter 1.01
Evaluator_Draft4_Meta_1_No_Callback 352.3723348167392 ns/iter 352.3160850525179 ns/iter 1.00
Evaluator_Draft4_Required_Properties 748.7803283598264 ns/iter 749.2461784151523 ns/iter 1.00
Evaluator_Draft4_Many_Optional_Properties_Minimal_Match 37.126997007773895 ns/iter 37.77729234440494 ns/iter 0.98
Evaluator_Draft4_Few_Optional_Properties_Minimal_Match 23.302891981415005 ns/iter 22.888513841559767 ns/iter 1.02
Evaluator_Draft4_Items_Schema 729.8552898429745 ns/iter 690.1144045458353 ns/iter 1.06
Evaluator_Draft4_Nested_Object 4.921720404893366 ns/iter 4.940532509553367 ns/iter 1.00
Evaluator_Draft4_Properties_Triad_Optional 1263.8317867651072 ns/iter 1232.2827198896218 ns/iter 1.03
Evaluator_Draft4_Properties_Triad_Closed 984.0026992861106 ns/iter 961.2648669748231 ns/iter 1.02
Evaluator_Draft4_Properties_Triad_Required 1296.3168666808554 ns/iter 1262.4773525388164 ns/iter 1.03
Evaluator_Draft4_Properties_Closed 129.40819264242387 ns/iter 133.28387337001357 ns/iter 0.97
Evaluator_Draft4_Non_Recursive_Ref 28.453764257184435 ns/iter 28.82012037194244 ns/iter 0.99
Evaluator_Draft4_Pattern_Properties_True 1857.3304882159389 ns/iter 1865.7023407891738 ns/iter 1.00
Evaluator_Draft4_Ref_To_Single_Property 22.90613786872226 ns/iter 24.029693444246597 ns/iter 0.95
Evaluator_Draft4_Additional_Properties_Type 52.584793865761185 ns/iter 52.58139479809776 ns/iter 1.00
Evaluator_Draft4_Nested_Oneof 126.02577323944641 ns/iter 121.8676117951128 ns/iter 1.03
Evaluator_Draft4_Long_Enum 20.457722647314775 ns/iter 21.35618289959004 ns/iter 0.96
Evaluator_Draft4_Type_Object 10.236060232234854 ns/iter 10.852164123494276 ns/iter 0.94
Evaluator_Draft6_Property_Names 247.88079311264403 ns/iter 258.4597013839144 ns/iter 0.96
Evaluator_Draft7_If_Then_Else 55.283482782416655 ns/iter 53.987014034949055 ns/iter 1.02
Evaluator_Draft7_Vercel_1 97529.38913341537 ns/iter 103180.8073580543 ns/iter 0.95
Evaluator_2019_09_Unevaluated_Properties 191.96394335679437 ns/iter 190.88110018454827 ns/iter 1.01
Evaluator_2019_09_OMC_JSON_V2_1 3478.7005025617773 ns/iter 3428.9712775680096 ns/iter 1.01
Evaluator_2020_12_Dynamic_Ref 819.6610621707804 ns/iter 821.3687498607283 ns/iter 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (linux/gcc)

Benchmark suite Current: d2738f4 Previous: 6b684b3 Ratio
Evaluator_2020_12_Dynamic_Ref 868.6660816951841 ns/iter 870.9549837858063 ns/iter 1.00
Evaluator_2019_09_Unevaluated_Properties 260.88088229715186 ns/iter 238.17301915004535 ns/iter 1.10
Evaluator_2019_09_OMC_JSON_V2_1 4894.71261489925 ns/iter 5168.227035848232 ns/iter 0.95
Evaluator_Draft7_If_Then_Else 52.253626862980425 ns/iter 57.5316108800236 ns/iter 0.91
Evaluator_Draft7_Vercel_1 111819.85517129404 ns/iter 119139.40225498432 ns/iter 0.94
Evaluator_Draft6_Property_Names 495.2725195427019 ns/iter 498.95400291450375 ns/iter 0.99
Evaluator_Draft4_Meta_1_No_Callback 428.9855359689289 ns/iter 427.72823093736673 ns/iter 1.00
Evaluator_Draft4_Required_Properties 1617.202595104663 ns/iter 1699.5309723736382 ns/iter 0.95
Evaluator_Draft4_Many_Optional_Properties_Minimal_Match 39.368065880860506 ns/iter 39.66359600635807 ns/iter 0.99
Evaluator_Draft4_Few_Optional_Properties_Minimal_Match 32.07917969210791 ns/iter 33.284962747903016 ns/iter 0.96
Evaluator_Draft4_Items_Schema 571.3025491117764 ns/iter 581.0389169258684 ns/iter 0.98
Evaluator_Draft4_Nested_Object 3.5221915830119483 ns/iter 3.519733941329023 ns/iter 1.00
Evaluator_Draft4_Properties_Triad_Optional 1270.654076622335 ns/iter 1302.86712432547 ns/iter 0.98
Evaluator_Draft4_Properties_Triad_Closed 1012.0971305829383 ns/iter 1038.617698576032 ns/iter 0.97
Evaluator_Draft4_Properties_Triad_Required 1313.7879525385138 ns/iter 1348.1010077257286 ns/iter 0.97
Evaluator_Draft4_Properties_Closed 183.97633374201038 ns/iter 188.17150324323498 ns/iter 0.98
Evaluator_Draft4_Non_Recursive_Ref 51.03824700018451 ns/iter 49.43212707477112 ns/iter 1.03
Evaluator_Draft4_Pattern_Properties_True 1738.7256589660428 ns/iter 1726.5697142203064 ns/iter 1.01
Evaluator_Draft4_Ref_To_Single_Property 35.79552584370891 ns/iter 36.893467864069095 ns/iter 0.97
Evaluator_Draft4_Additional_Properties_Type 105.43652862173496 ns/iter 103.44481891813895 ns/iter 1.02
Evaluator_Draft4_Nested_Oneof 112.46497070800692 ns/iter 112.2552542531808 ns/iter 1.00
Evaluator_Draft4_Long_Enum 19.611846464234556 ns/iter 19.812486549336278 ns/iter 0.99
Evaluator_Draft4_Type_Object 10.282879686652556 ns/iter 10.282668416143459 ns/iter 1.00
Compiler_2019_09_OMC_JSON_V2 11189064893.000023 ns/iter 11205384157.9999 ns/iter 1.00
Compiler_Draft6_AdaptiveCard 66932444561.00001 ns/iter 65456100031.99999 ns/iter 1.02

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (macos/gcc)

Benchmark suite Current: d2738f4 Previous: 6b684b3 Ratio
Compiler_Draft6_AdaptiveCard 52043697834.01489 ns/iter 46130548000.33569 ns/iter 1.13
Compiler_2019_09_OMC_JSON_V2 8484287977.218628 ns/iter 8284448146.820068 ns/iter 1.02
Evaluator_Draft4_Meta_1_No_Callback 268.9444807219371 ns/iter 257.4664446454487 ns/iter 1.04
Evaluator_Draft4_Required_Properties 478.95743856679593 ns/iter 456.0304805873986 ns/iter 1.05
Evaluator_Draft4_Many_Optional_Properties_Minimal_Match 26.357152471230233 ns/iter 23.73762384707476 ns/iter 1.11
Evaluator_Draft4_Few_Optional_Properties_Minimal_Match 14.87535862491909 ns/iter 14.446873000803954 ns/iter 1.03
Evaluator_Draft4_Items_Schema 511.46049241243463 ns/iter 482.6414133689619 ns/iter 1.06
Evaluator_Draft4_Nested_Object 2.1288392264135285 ns/iter 2.048100512600004 ns/iter 1.04
Evaluator_Draft4_Properties_Triad_Optional 1176.0923619019204 ns/iter 1080.693832436273 ns/iter 1.09
Evaluator_Draft4_Properties_Triad_Closed 910.7908951284687 ns/iter 858.8724333914523 ns/iter 1.06
Evaluator_Draft4_Properties_Triad_Required 1166.2079726056409 ns/iter 1069.0418678914764 ns/iter 1.09
Evaluator_Draft4_Properties_Closed 101.17134174121277 ns/iter 99.7011146680335 ns/iter 1.01
Evaluator_Draft4_Non_Recursive_Ref 19.85474348576836 ns/iter 21.000697251774366 ns/iter 0.95
Evaluator_Draft4_Pattern_Properties_True 1655.338123050912 ns/iter 1544.0751768844693 ns/iter 1.07
Evaluator_Draft4_Ref_To_Single_Property 14.46248673623708 ns/iter 13.517137933358912 ns/iter 1.07
Evaluator_Draft4_Additional_Properties_Type 39.428480917558566 ns/iter 38.76861662413583 ns/iter 1.02
Evaluator_Draft4_Nested_Oneof 82.93001517119886 ns/iter 80.50963851841409 ns/iter 1.03
Evaluator_Draft4_Long_Enum 12.183947521395764 ns/iter 11.857306245192886 ns/iter 1.03
Evaluator_Draft4_Type_Object 6.773252958544478 ns/iter 6.573463341020236 ns/iter 1.03
Evaluator_Draft6_Property_Names 179.6218156083644 ns/iter 173.9303027607643 ns/iter 1.03
Evaluator_Draft7_If_Then_Else 34.65359045752073 ns/iter 32.30972890551073 ns/iter 1.07
Evaluator_Draft7_Vercel_1 85675.74547558295 ns/iter 82867.76914761934 ns/iter 1.03
Evaluator_2019_09_Unevaluated_Properties 163.21047687201207 ns/iter 148.4203716013303 ns/iter 1.10
Evaluator_2019_09_OMC_JSON_V2_1 2640.0087275349397 ns/iter 2668.583102058009 ns/iter 0.99
Evaluator_2020_12_Dynamic_Ref 606.4864724517467 ns/iter 625.6638850336269 ns/iter 0.97

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (windows/msvc)

Benchmark suite Current: d2738f4 Previous: 6b684b3 Ratio
Compiler_Draft6_AdaptiveCard 124306105800.00006 ns/iter 121418590099.99995 ns/iter 1.02
Compiler_2019_09_OMC_JSON_V2 25485530200.000084 ns/iter 25135184699.99997 ns/iter 1.01
Evaluator_Draft4_Meta_1_No_Callback 453.2251486896378 ns/iter 438.780628005168 ns/iter 1.03
Evaluator_Draft4_Required_Properties 711.7933035713367 ns/iter 736.5248883929152 ns/iter 0.97
Evaluator_Draft4_Many_Optional_Properties_Minimal_Match 34.984072756797985 ns/iter 36.15237007040563 ns/iter 0.97
Evaluator_Draft4_Few_Optional_Properties_Minimal_Match 20.930905637465905 ns/iter 22.221549999997592 ns/iter 0.94
Evaluator_Draft4_Items_Schema 606.5797321430263 ns/iter 667.5066071428505 ns/iter 0.91
Evaluator_Draft4_Nested_Object 4.026366560702179 ns/iter 3.8141523437503926 ns/iter 1.06
Evaluator_Draft4_Properties_Triad_Optional 4368.663750000223 ns/iter 4380.438124999841 ns/iter 1.00
Evaluator_Draft4_Properties_Triad_Closed 3480.191616414417 ns/iter 3522.895221862609 ns/iter 0.99
Evaluator_Draft4_Properties_Triad_Required 4377.169999999353 ns/iter 4414.462500000127 ns/iter 0.99
Evaluator_Draft4_Properties_Closed 139.55923707321443 ns/iter 143.09884852237337 ns/iter 0.98
Evaluator_Draft4_Non_Recursive_Ref 16.778948660715848 ns/iter 14.898959821429969 ns/iter 1.13
Evaluator_Draft4_Pattern_Properties_True 7013.43571428684 ns/iter 7052.571428571851 ns/iter 0.99
Evaluator_Draft4_Ref_To_Single_Property 22.047756249996553 ns/iter 22.813074999998406 ns/iter 0.97
Evaluator_Draft4_Additional_Properties_Type 30.570056349463233 ns/iter 33.39598244019311 ns/iter 0.92
Evaluator_Draft4_Nested_Oneof 141.38045127766725 ns/iter 153.6311160714224 ns/iter 0.92
Evaluator_Draft4_Long_Enum 21.7182875000006 ns/iter 22.780285460044183 ns/iter 0.95
Evaluator_Draft4_Type_Object 9.326862011933839 ns/iter 9.88748031300233 ns/iter 0.94
Evaluator_Draft6_Property_Names 413.4203750000153 ns/iter 415.37064217092626 ns/iter 1.00
Evaluator_Draft7_If_Then_Else 51.717550000012125 ns/iter 53.62727678572249 ns/iter 0.96
Evaluator_Draft7_Vercel_1 110682.57812500093 ns/iter 113537.00000000799 ns/iter 0.97
Evaluator_2019_09_Unevaluated_Properties 421.4930881797151 ns/iter 427.2051875000215 ns/iter 0.99
Evaluator_2019_09_OMC_JSON_V2_1 4127.961862515886 ns/iter 4118.359565429822 ns/iter 1.00
Evaluator_2020_12_Dynamic_Ref 1333.9514803786956 ns/iter 1351.0005357143687 ns/iter 0.99

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.