Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: refactoring around glob matching #4540

Merged
merged 1 commit into from
Jan 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ cxx_link(dfly_core base absl::flat_hash_map absl::str_format redis_lib TRDP::lua
add_executable(dash_bench dash_bench.cc)
cxx_link(dash_bench dfly_core redis_test_lib)

cxx_test(dfly_core_test dfly_core LABELS DFLY)
cxx_test(dfly_core_test dfly_core TRDP::fast_float LABELS DFLY)
cxx_test(compact_object_test dfly_core LABELS DFLY)
cxx_test(extent_tree_test dfly_core LABELS DFLY)
cxx_test(dash_test dfly_core file redis_test_lib DATA testdata/ids.txt LABELS DFLY)
Expand Down
125 changes: 120 additions & 5 deletions src/core/dfly_core_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,15 @@
// See LICENSE for licensing terms.
//

#include <absl/strings/charconv.h>
#include <absl/strings/numbers.h>
#include <fast_float/fast_float.h>
#include <reflex/matcher.h>

#include <random>

#include "base/gtest.h"
#include "base/logging.h"
#include "core/glob_matcher.h"
#include "core/intent_lock.h"
#include "core/tx_queue.h"
Expand All @@ -11,6 +19,28 @@ namespace dfly {

using namespace std;

std::random_device rd;

static string GetRandomHex(size_t len) {
std::string res(len, '\0');
size_t indx = 0;

for (; indx < len; indx += 16) { // 2 chars per byte
absl::numbers_internal::FastHexToBufferZeroPad16(rd(), res.data() + indx);
}

if (indx < len) {
char buf[24];
absl::numbers_internal::FastHexToBufferZeroPad16(rd(), buf);

for (unsigned j = 0; indx < len; indx++, j++) {
res[indx] = buf[j];
}
}

return res;
}

class TxQueueTest : public ::testing::Test {
protected:
TxQueueTest() {
Expand Down Expand Up @@ -78,33 +108,118 @@ class StringMatchTest : public ::testing::Test {
};

TEST_F(StringMatchTest, Basic) {
EXPECT_EQ(MatchLen("", "", 0), 1);

EXPECT_EQ(MatchLen("*", "", 0), 0);
EXPECT_EQ(MatchLen("*", "", 1), 0);
EXPECT_EQ(MatchLen("\\\\", "\\", 0), 1);
EXPECT_EQ(MatchLen("h\\\\llo", "h\\llo", 0), 1);

// ExactMatch
EXPECT_EQ(MatchLen("hello", "hello", 0), 1);
EXPECT_EQ(MatchLen("hello", "world", 0), 0);

// Wildcards
EXPECT_EQ(MatchLen("*", "hello", 0), 1);
EXPECT_EQ(MatchLen("h*", "hello", 0), 1);
EXPECT_EQ(MatchLen("h*", "abc", 0), 0);
EXPECT_EQ(MatchLen("h*o", "hello", 0), 1);
EXPECT_EQ(MatchLen("hel*o*", "hello*", 0), 1);
EXPECT_EQ(MatchLen("h\\*llo", "h*llo", 0), 1);

// Single character wildcard
EXPECT_EQ(MatchLen("h[aeiou]llo", "hello", 0), 1);
EXPECT_EQ(MatchLen("h[aeiou]llo", "hallo", 0), 1);
EXPECT_EQ(MatchLen("h[^aeiou]llo", "hallo", 0), 0);
EXPECT_EQ(MatchLen("h[a-z]llo", "hello", 0), 1);

EXPECT_EQ(MatchLen("h[A-Z]llo", "Hello", 1), 1);

EXPECT_EQ(MatchLen("h\\*llo", "h*llo", 0), 1);
EXPECT_EQ(MatchLen("h\\\\llo", "h\\llo", 0), 1);
EXPECT_EQ(MatchLen("h[A-Z]llo", "HeLLO", 1), 1);
EXPECT_EQ(MatchLen("[[]", "[", 0), 1);

// ?
EXPECT_EQ(MatchLen("h?llo", "hello", 0), 1);
EXPECT_EQ(MatchLen("h??llo", "ha llo", 0), 1);
EXPECT_EQ(MatchLen("h??llo", "hallo", 0), 0);
EXPECT_EQ(MatchLen("h\\?llo", "hallo", 0), 0);
EXPECT_EQ(MatchLen("h\\?llo", "h?llo", 0), 1);

// special regex chars
EXPECT_EQ(MatchLen("h\\[^|", "h[^|", 0), 1);
EXPECT_EQ(MatchLen("[^", "[^", 0), 0);
EXPECT_EQ(MatchLen("[$?^]a", "?a", 0), 1);
}

using benchmark::DoNotOptimize;
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moved from dragonfly_test.


// Parse Double benchmarks
static void BM_ParseFastFloat(benchmark::State& state) {
std::vector<std::string> args(100);
std::random_device rd;

for (auto& arg : args) {
arg = std::to_string(std::uniform_real_distribution<double>(0, 1e5)(rd));
}
double res;
while (state.KeepRunning()) {
for (const auto& arg : args) {
fast_float::from_chars(arg.data(), arg.data() + arg.size(), res);
}
}
}
BENCHMARK(BM_ParseFastFloat);

static void BM_ParseDoubleAbsl(benchmark::State& state) {
std::vector<std::string> args(100);

for (auto& arg : args) {
arg = std::to_string(std::uniform_real_distribution<double>(0, 1e5)(rd));
}

double res;
while (state.KeepRunning()) {
for (const auto& arg : args) {
absl::from_chars(arg.data(), arg.data() + arg.size(), res);
}
}
}
BENCHMARK(BM_ParseDoubleAbsl);

static void BM_MatchGlob(benchmark::State& state) {
string random_val = GetRandomHex(state.range(0));
GlobMatcher matcher("*foobar*", true);
while (state.KeepRunning()) {
DoNotOptimize(matcher.Matches(random_val));
}
}
BENCHMARK(BM_MatchGlob)->Arg(1000)->Arg(10000);

static void BM_MatchFindSubstr(benchmark::State& state) {
string random_val = GetRandomHex(state.range(0));

while (state.KeepRunning()) {
DoNotOptimize(random_val.find("foobar"));
}
}
BENCHMARK(BM_MatchFindSubstr)->Arg(1000)->Arg(10000);

static void BM_MatchReflexFind(benchmark::State& state) {
string random_val = GetRandomHex(state.range(0));
reflex::Matcher matcher("foobar");
while (state.KeepRunning()) {
matcher.input(random_val);
DoNotOptimize(matcher.find());
}
}
BENCHMARK(BM_MatchReflexFind)->Arg(1000)->Arg(10000);

static void BM_MatchReflexFindStar(benchmark::State& state) {
string random_val = GetRandomHex(state.range(0));
reflex::Matcher matcher(".*foobar");

while (state.KeepRunning()) {
matcher.input(random_val);
DoNotOptimize(matcher.find());
}
}
BENCHMARK(BM_MatchReflexFindStar)->Arg(1000)->Arg(10000);

} // namespace dfly
3 changes: 3 additions & 0 deletions src/core/glob_matcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
namespace dfly {

class GlobMatcher {
GlobMatcher(const GlobMatcher&) = delete;
GlobMatcher& operator=(const GlobMatcher&) = delete;

public:
explicit GlobMatcher(std::string_view pattern, bool case_sensitive);

Expand Down
2 changes: 1 addition & 1 deletion src/server/common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ OpResult<ScanOpts> ScanOpts::TryFrom(CmdArgList args) {
} else if (opt == "MATCH") {
string_view pattern = ArgS(args, i + 1);
if (pattern != "*")
scan_opts.matcher.emplace(pattern, true);
scan_opts.matcher.reset(new GlobMatcher{pattern, true});
} else if (opt == "TYPE") {
auto obj_type = ObjTypeFromString(ArgS(args, i + 1));
if (!obj_type) {
Expand Down
2 changes: 1 addition & 1 deletion src/server/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ class Context : protected Cancellation {
};

struct ScanOpts {
std::optional<GlobMatcher> matcher;
std::unique_ptr<GlobMatcher> matcher;
size_t limit = 10;
std::optional<CompactObjType> type_filter;
unsigned bucket_id = UINT_MAX;
Expand Down
91 changes: 0 additions & 91 deletions src/server/dragonfly_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,8 @@ extern "C" {
}

#include <absl/strings/ascii.h>
#include <absl/strings/charconv.h>
#include <absl/strings/str_join.h>
#include <absl/strings/strip.h>
#include <fast_float/fast_float.h>
#include <gmock/gmock.h>
#include <reflex/matcher.h>

Expand Down Expand Up @@ -832,97 +830,8 @@ TEST_F(DflyEngineTest, ReplicaofRejectOnLoad) {
ASSERT_THAT(res, ErrArg("LOADING Dragonfly is loading the dataset in memory"));
}

using benchmark::DoNotOptimize;

// TODO: to test transactions with a single shard since then all transactions become local.
// To consider having a parameter in dragonfly engine controlling number of shards
// unconditionally from number of cpus. TO TEST BLPOP under multi for single/multi argument case.

// Parse Double benchmarks
static void BM_ParseFastFloat(benchmark::State& state) {
std::vector<std::string> args(100);
std::random_device rd;

for (auto& arg : args) {
arg = std::to_string(std::uniform_real_distribution<double>(0, 1e5)(rd));
}
double res;
while (state.KeepRunning()) {
for (const auto& arg : args) {
fast_float::from_chars(arg.data(), arg.data() + arg.size(), res);
}
}
}
BENCHMARK(BM_ParseFastFloat);

static void BM_ParseDoubleAbsl(benchmark::State& state) {
std::vector<std::string> args(100);
std::random_device rd;
for (auto& arg : args) {
arg = std::to_string(std::uniform_real_distribution<double>(0, 1e5)(rd));
}

double res;
while (state.KeepRunning()) {
for (const auto& arg : args) {
absl::from_chars(arg.data(), arg.data() + arg.size(), res);
}
}
}
BENCHMARK(BM_ParseDoubleAbsl);

static void BM_MatchPattern(benchmark::State& state) {
absl::InsecureBitGen eng;
string random_val = GetRandomHex(eng, state.range(0));
ScanOpts scan_opts;
scan_opts.matcher.emplace("*foobar*", true);
while (state.KeepRunning()) {
DoNotOptimize(scan_opts.Matches(random_val));
}
}
BENCHMARK(BM_MatchPattern)->Arg(1000)->Arg(10000);

static void BM_MatchFindSubstr(benchmark::State& state) {
absl::InsecureBitGen eng;
string random_val = GetRandomHex(eng, state.range(0));

while (state.KeepRunning()) {
DoNotOptimize(random_val.find("foobar"));
}
}
BENCHMARK(BM_MatchFindSubstr)->Arg(1000)->Arg(10000);

static void BM_MatchReflexFind(benchmark::State& state) {
absl::InsecureBitGen eng;
string random_val = GetRandomHex(eng, state.range(0));
reflex::Matcher matcher("foobar");
matcher.input("xxxxxxfoobaryyyyyyyy");
CHECK_GT(matcher.find(), 0u);
matcher.input("xxxxxxfoobayyyyyyyy");
CHECK_EQ(0u, matcher.find());

while (state.KeepRunning()) {
matcher.input(random_val);
DoNotOptimize(matcher.find());
}
}
BENCHMARK(BM_MatchReflexFind)->Arg(1000)->Arg(10000);

static void BM_MatchReflexMatch(benchmark::State& state) {
absl::InsecureBitGen eng;
string random_val = GetRandomHex(eng, state.range(0));
reflex::Matcher matcher(".*foobar.*");
matcher.input("xxxxxxfoobaryyyyyyyy");
CHECK_GT(matcher.matches(), 0u);
matcher.input("xxxxxxfoobayyyyyyyy");
CHECK_EQ(0u, matcher.matches());

matcher.input(random_val);
while (state.KeepRunning()) {
matcher.input(random_val);
DoNotOptimize(matcher.matches());
}
}
BENCHMARK(BM_MatchReflexMatch)->Arg(1000)->Arg(10000);

} // namespace dfly
4 changes: 2 additions & 2 deletions src/server/generic_family.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1179,7 +1179,7 @@ void GenericFamily::Keys(CmdArgList args, const CommandContext& cmd_cntx) {

ScanOpts scan_opts;
if (pattern != "*") {
scan_opts.matcher.emplace(pattern, true);
scan_opts.matcher.reset(new GlobMatcher{pattern, true});
}

scan_opts.limit = 512;
Expand Down Expand Up @@ -1745,7 +1745,7 @@ void GenericFamily::Scan(CmdArgList args, const CommandContext& cmd_cntx) {
return builder->SendError(ops.status());
}

ScanOpts scan_op = ops.value();
const ScanOpts& scan_op = ops.value();

StringVec keys;
cursor = ScanGeneric(cursor, scan_op, &keys, cmd_cntx.conn_cntx);
Expand Down
2 changes: 1 addition & 1 deletion src/server/hset_family.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1027,7 +1027,7 @@ void HSetFamily::HScan(CmdArgList args, const CommandContext& cmd_cntx) {
return cmd_cntx.rb->SendError(ops.status());
}

ScanOpts scan_op = ops.value();
const ScanOpts& scan_op = ops.value();

auto cb = [&](Transaction* t, EngineShard* shard) {
return OpScan(t->GetOpArgs(shard), key, &cursor, scan_op);
Expand Down
4 changes: 2 additions & 2 deletions src/server/set_family.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
extern "C" {
#include "redis/intset.h"
#include "redis/redis_aux.h"
#include "redis/util.h"
#include "redis/util.h" // for string2ll
}

#include "base/flags.h"
Expand Down Expand Up @@ -1413,7 +1413,7 @@ void SScan(CmdArgList args, const CommandContext& cmd_cntx) {
return cmd_cntx.rb->SendError(ops.status());
}

ScanOpts scan_op = ops.value();
const ScanOpts& scan_op = ops.value();

auto cb = [&](Transaction* t, EngineShard* shard) {
return OpScan(t->GetOpArgs(shard), key, &cursor, scan_op);
Expand Down
2 changes: 1 addition & 1 deletion src/server/zset_family.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2602,7 +2602,7 @@ void ZSetFamily::ZScan(CmdArgList args, const CommandContext& cmd_cntx) {
DVLOG(1) << "Scan invalid args - return " << ops << " to the user";
return rb->SendError(ops.status());
}
ScanOpts scan_op = ops.value();
const ScanOpts& scan_op = ops.value();

auto cb = [&](Transaction* t, EngineShard* shard) {
return OpScan(t->GetOpArgs(shard), key, &cursor, scan_op);
Expand Down
Loading