Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: GlobMatcher uses now reflex::Matcher regex engine #4528

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 0 additions & 73 deletions .github/workflows/scorecard.yml

This file was deleted.

3 changes: 1 addition & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ add_third_party(

add_third_party(
reflex
URL https://github.com/Genivia/RE-flex/archive/refs/tags/v5.1.0.tar.gz
URL https://github.com/Genivia/RE-flex/archive/refs/tags/v5.2.1.tar.gz
PATCH_COMMAND autoreconf -fi
CONFIGURE_COMMAND <SOURCE_DIR>/configure --disable-avx2 --prefix=${THIRD_PARTY_LIB_DIR}/reflex
CXX=${CMAKE_CXX_COMPILER} CC=${CMAKE_C_COMPILER}
Expand Down Expand Up @@ -125,7 +125,6 @@ add_third_party(
-DFLATBUFFERS_BUILD_FLATC=OFF"
)


add_library(TRDP::jsoncons INTERFACE IMPORTED)
add_dependencies(TRDP::jsoncons jsoncons_project)
set_target_properties(TRDP::jsoncons PROPERTIES
Expand Down
28 changes: 27 additions & 1 deletion src/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,25 @@ cxx_link(dfly_core base absl::flat_hash_map absl::str_format redis_lib TRDP::lua
add_executable(dash_bench dash_bench.cc)
cxx_link(dash_bench dfly_core redis_test_lib)

cxx_test(dfly_core_test dfly_core TRDP::fast_float LABELS DFLY)
find_library(LIB_PCRE2 NAMES pcre2-8)
if(LIB_PCRE2)
set(PCRE2_LIB ${LIB_PCRE2})
else()
message(STATUS "pcre2-8 not found. Building without PCRE2 support.")
set(PCRE2_LIB "")
endif()


find_library(LIB_RE2 NAMES re2)
if(LIB_RE2)
set(RE2_LIB ${LIB_RE2})
else()
message(STATUS "re2 not found. Building without RE2 support.")
set(RE2_LIB "")
endif()


cxx_test(dfly_core_test dfly_core TRDP::fast_float ${PCRE2_LIB} ${RE2_LIB} LABELS DFLY)
cxx_test(compact_object_test dfly_core LABELS DFLY)
cxx_test(extent_tree_test dfly_core LABELS DFLY)
cxx_test(dash_test dfly_core file redis_test_lib DATA testdata/ids.txt.zst LABELS DFLY)
Expand All @@ -30,3 +48,11 @@ cxx_test(flatbuffers_test dfly_core TRDP::flatbuffers LABELS DFLY)
cxx_test(bloom_test dfly_core LABELS DFLY)
cxx_test(allocation_tracker_test dfly_core absl::random_random LABELS DFLY)
cxx_test(qlist_test dfly_core DATA testdata/list.txt.zst LABELS DFLY)

if(LIB_PCRE2)
target_compile_definitions(dfly_core_test PRIVATE USE_PCRE2)
endif()

if(LIB_RE2)
target_compile_definitions(dfly_core_test PRIVATE USE_RE2)
endif()
205 changes: 204 additions & 1 deletion src/core/dfly_core_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,20 @@
#include <absl/strings/charconv.h>
#include <absl/strings/numbers.h>
#include <fast_float/fast_float.h>

#ifdef USE_PCRE2
#define PCRE2_CODE_UNIT_WIDTH 8
#include <pcre2.h>
#endif

#ifdef USE_RE2
#include <re2/re2.h>
#endif

#include <reflex/matcher.h>

#include <random>
#include <regex>

#include "base/gtest.h"
#include "base/logging.h"
Expand Down Expand Up @@ -41,6 +52,124 @@ static string GetRandomHex(size_t len) {
return res;
}

/* Glob-style pattern matching taken fron Redis. */
static int stringmatchlen(const char* pattern, int patternLen, const char* string, int stringLen,
int nocase) {
while (patternLen && stringLen) {
switch (pattern[0]) {
case '*':
while (patternLen && pattern[1] == '*') {
pattern++;
patternLen--;
}
if (patternLen == 1)
return 1; /* match */
while (stringLen) {
if (stringmatchlen(pattern + 1, patternLen - 1, string, stringLen, nocase))
return 1; /* match */
string++;
stringLen--;
}
return 0; /* no match */
break;
case '?':
string++;
stringLen--;
break;
case '[': {
int neg, match;

pattern++;
patternLen--;
neg = pattern[0] == '^';
if (neg) {
pattern++;
patternLen--;
}
match = 0;
while (1) {
if (pattern[0] == '\\' && patternLen >= 2) {
pattern++;
patternLen--;
if (pattern[0] == string[0])
match = 1;
} else if (pattern[0] == ']') {
break;
} else if (patternLen == 0) {
pattern--;
patternLen++;
break;
} else if (patternLen >= 3 && pattern[1] == '-') {
int start = pattern[0];
int end = pattern[2];
int c = string[0];
if (start > end) {
int t = start;
start = end;
end = t;
}
if (nocase) {
start = tolower(start);
end = tolower(end);
c = tolower(c);
}
pattern += 2;
patternLen -= 2;
if (c >= start && c <= end)
match = 1;
} else {
if (!nocase) {
if (pattern[0] == string[0])
match = 1;
} else {
if (tolower((int)pattern[0]) == tolower((int)string[0]))
match = 1;
}
}
pattern++;
patternLen--;
}
if (neg)
match = !match;
if (!match)
return 0; /* no match */
string++;
stringLen--;
break;
}
case '\\':
if (patternLen >= 2) {
pattern++;
patternLen--;
}
/* fall through */
default:
if (!nocase) {
if (pattern[0] != string[0])
return 0; /* no match */
} else {
if (tolower((int)pattern[0]) != tolower((int)string[0]))
return 0; /* no match */
}
string++;
stringLen--;
break;
}
pattern++;
patternLen--;
if (stringLen == 0) {
while (*pattern == '*') {
pattern++;
patternLen--;
}
break;
}
}
if (patternLen == 0 && stringLen == 0)
return 1;
return 0;
}

class TxQueueTest : public ::testing::Test {
protected:
TxQueueTest() {
Expand Down Expand Up @@ -107,6 +236,18 @@ class StringMatchTest : public ::testing::Test {
}
};

TEST_F(StringMatchTest, Glob2Regex) {
EXPECT_EQ(GlobMatcher::Glob2Regex(""), "");
EXPECT_EQ(GlobMatcher::Glob2Regex("*"), ".*");
EXPECT_EQ(GlobMatcher::Glob2Regex("\\?"), "\\?");
EXPECT_EQ(GlobMatcher::Glob2Regex("[abc]"), "[abc]");
EXPECT_EQ(GlobMatcher::Glob2Regex("[^abc]"), "[^abc]");
EXPECT_EQ(GlobMatcher::Glob2Regex("h\\[^|"), "h\\[\\^\\|");
EXPECT_EQ(GlobMatcher::Glob2Regex("[$?^]a"), "[$?^]a");
EXPECT_EQ(GlobMatcher::Glob2Regex("[^]a"), ".a");
EXPECT_EQ(GlobMatcher::Glob2Regex("[]a"), "[]a");
}

TEST_F(StringMatchTest, Basic) {
EXPECT_EQ(MatchLen("", "", 0), 1);

Expand Down Expand Up @@ -134,15 +275,18 @@ TEST_F(StringMatchTest, Basic) {
EXPECT_EQ(MatchLen("h[a-z]llo", "hello", 0), 1);
EXPECT_EQ(MatchLen("h[A-Z]llo", "HeLLO", 1), 1);
EXPECT_EQ(MatchLen("[[]", "[", 0), 1);
EXPECT_EQ(MatchLen("[^]a", "xa", 0), 1);

// ?
EXPECT_EQ(MatchLen("h?llo", "hello", 0), 1);
EXPECT_EQ(MatchLen("h??llo", "ha llo", 0), 1);
EXPECT_EQ(MatchLen("h??llo", "hallo", 0), 0);
EXPECT_EQ(MatchLen("h\\?llo", "hallo", 0), 0);
EXPECT_EQ(MatchLen("h\\?llo", "h?llo", 0), 1);
EXPECT_EQ(MatchLen("abc?", "abc\n", 0), 1);
}

// special regex chars
TEST_F(StringMatchTest, Special) {
EXPECT_EQ(MatchLen("h\\[^|", "h[^|", 0), 1);
EXPECT_EQ(MatchLen("[^", "[^", 0), 0);
EXPECT_EQ(MatchLen("[$?^]a", "?a", 0), 1);
Expand Down Expand Up @@ -222,4 +366,63 @@ static void BM_MatchReflexFindStar(benchmark::State& state) {
}
BENCHMARK(BM_MatchReflexFindStar)->Arg(1000)->Arg(10000);

static void BM_MatchStd(benchmark::State& state) {
string random_val = GetRandomHex(state.range(0));
std::regex regex(".*foobar");
std::match_results<std::string::const_iterator> results;
while (state.KeepRunning()) {
std::regex_match(random_val, results, regex);
}
}
BENCHMARK(BM_MatchStd)->Arg(1000)->Arg(10000);


static void BM_MatchRedisGlob(benchmark::State& state) {
string random_val = GetRandomHex(state.range(0));
const char* pattern = "*foobar*";
while (state.KeepRunning()) {
DoNotOptimize(stringmatchlen(pattern, strlen(pattern), random_val.c_str(), random_val.size(), 0));
}
}
BENCHMARK(BM_MatchRedisGlob)->Arg(1000)->Arg(10000);

#ifdef USE_RE2
static void BM_MatchRe2(benchmark::State& state) {
string random_val = GetRandomHex(state.range(0));
re2::RE2 re(".*foobar.*", re2::RE2::Latin1);
CHECK(re.ok());

while (state.KeepRunning()) {
DoNotOptimize(re2::RE2::FullMatch(random_val, re));
}
}
BENCHMARK(BM_MatchRe2)->Arg(1000)->Arg(10000);
#endif

#ifdef USE_PCRE2
static void BM_MatchPcre2Jit(benchmark::State& state) {
string random_val = GetRandomHex(state.range(0));
int errnum;
PCRE2_SIZE erroffset;
pcre2_code* re = pcre2_compile((PCRE2_SPTR) ".*foobar", PCRE2_ZERO_TERMINATED, 0, &errnum,
&erroffset, nullptr);
CHECK(re);
CHECK_EQ(0, pcre2_jit_compile(re, PCRE2_JIT_COMPLETE));
pcre2_match_data* match_data = pcre2_match_data_create_from_pattern(re, NULL);
const char sample[] = "aaaaaaaaaaaaafoobar";
int rc = pcre2_jit_match(re, (PCRE2_SPTR)sample, strlen(sample), 0,
PCRE2_ANCHORED | PCRE2_ENDANCHORED, match_data, NULL);
CHECK_EQ(1, rc);

while (state.KeepRunning()) {
rc = pcre2_jit_match(re, (PCRE2_SPTR)random_val.c_str(), random_val.size(), 0,
PCRE2_ANCHORED | PCRE2_ENDANCHORED, match_data, NULL);
CHECK_EQ(PCRE2_ERROR_NOMATCH, rc);
}
pcre2_match_data_free(match_data);
pcre2_code_free(re);
}
BENCHMARK(BM_MatchPcre2Jit)->Arg(1000)->Arg(10000);
#endif

} // namespace dfly
Loading
Loading