Skip to content

Commit

Permalink
Add gen_fbin_file.cpp in benchmark (#203)
Browse files Browse the repository at this point in the history
Signed-off-by: Yudong Cai <[email protected]>
  • Loading branch information
cydrain authored Nov 20, 2023
1 parent 66b1656 commit 8eead9b
Show file tree
Hide file tree
Showing 13 changed files with 224 additions and 47 deletions.
1 change: 1 addition & 0 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,4 @@ benchmark_test(benchmark_float_range hdf5/benchmark_float_range.cpp)
benchmark_test(benchmark_float_range_bitset hdf5/benchmark_float_range_bitset.cpp)

benchmark_test(gen_hdf5_file hdf5/gen_hdf5_file.cpp)
benchmark_test(gen_fbin_file hdf5/gen_fbin_file.cpp)
1 change: 1 addition & 0 deletions benchmark/benchmark_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ class Benchmark_base {

protected:
double T0_;
std::string metric_type_;
int32_t dim_;
void* xb_ = nullptr;
void* xq_ = nullptr;
Expand Down
2 changes: 0 additions & 2 deletions benchmark/hdf5/benchmark_binary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,6 @@ class Benchmark_binary : public Benchmark_knowhere, public ::testing::Test {
parse_ann_test_name();
load_hdf5_data<true>();

assert(metric_str_ == METRIC_HAM_STR || metric_str_ == METRIC_JAC_STR);
metric_type_ = (metric_str_ == METRIC_HAM_STR) ? knowhere::metric::HAMMING : knowhere::metric::JACCARD;
cfg_[knowhere::meta::METRIC_TYPE] = metric_type_;
knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType::AVX2);
printf("faiss::distance_compute_blas_threshold: %ld\n", knowhere::KnowhereConfig::GetBlasThreshold());
Expand Down
2 changes: 0 additions & 2 deletions benchmark/hdf5/benchmark_binary_range.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,6 @@ class Benchmark_binary_range : public Benchmark_knowhere, public ::testing::Test
load_hdf5_data_range<true>();
#endif

assert(metric_str_ == METRIC_HAM_STR || metric_str_ == METRIC_JAC_STR);
metric_type_ = (metric_str_ == METRIC_HAM_STR) ? knowhere::metric::HAMMING : knowhere::metric::JACCARD;
cfg_[knowhere::meta::METRIC_TYPE] = metric_type_;
cfg_[knowhere::meta::RADIUS] = *gt_radius_;
knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType::AVX2);
Expand Down
2 changes: 0 additions & 2 deletions benchmark/hdf5/benchmark_float.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,6 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test {
parse_ann_test_name();
load_hdf5_data<false>();

assert(metric_str_ == METRIC_IP_STR || metric_str_ == METRIC_L2_STR);
metric_type_ = (metric_str_ == METRIC_IP_STR) ? "IP" : "L2";
cfg_[knowhere::meta::METRIC_TYPE] = metric_type_;
knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType::AVX2);
printf("faiss::distance_compute_blas_threshold: %ld\n", knowhere::KnowhereConfig::GetBlasThreshold());
Expand Down
2 changes: 0 additions & 2 deletions benchmark/hdf5/benchmark_float_bitset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,6 @@ class Benchmark_float_bitset : public Benchmark_knowhere, public ::testing::Test
parse_ann_test_name();
load_hdf5_data<false>();

assert(metric_str_ == METRIC_IP_STR || metric_str_ == METRIC_L2_STR);
metric_type_ = (metric_str_ == METRIC_IP_STR) ? "IP" : "L2";
cfg_[knowhere::meta::METRIC_TYPE] = metric_type_;
knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType::AVX2);
printf("faiss::distance_compute_blas_threshold: %ld\n", knowhere::KnowhereConfig::GetBlasThreshold());
Expand Down
2 changes: 0 additions & 2 deletions benchmark/hdf5/benchmark_float_qps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,6 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test {
parse_ann_test_name();
load_hdf5_data<false>();

assert(metric_str_ == METRIC_IP_STR || metric_str_ == METRIC_L2_STR);
metric_type_ = (metric_str_ == METRIC_IP_STR) ? knowhere::metric::IP : knowhere::metric::L2;
cfg_[knowhere::meta::METRIC_TYPE] = metric_type_;
knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType::AUTO);
#ifdef KNOWHERE_WITH_GPU
Expand Down
2 changes: 0 additions & 2 deletions benchmark/hdf5/benchmark_float_range.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,6 @@ class Benchmark_float_range : public Benchmark_knowhere, public ::testing::Test
load_hdf5_data_range<false>();
#endif

assert(metric_str_ == METRIC_IP_STR || metric_str_ == METRIC_L2_STR);
metric_type_ = (metric_str_ == METRIC_IP_STR) ? knowhere::metric::IP : knowhere::metric::L2;
cfg_[knowhere::meta::METRIC_TYPE] = metric_type_;
cfg_[knowhere::meta::RADIUS] = *gt_radius_;
knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType::AVX2);
Expand Down
2 changes: 0 additions & 2 deletions benchmark/hdf5/benchmark_float_range_bitset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,6 @@ class Benchmark_float_range_bitset : public Benchmark_knowhere, public ::testing
parse_ann_test_name_with_range();
load_hdf5_data_range<false>();

assert(metric_str_ == METRIC_IP_STR || metric_str_ == METRIC_L2_STR);
metric_type_ = (metric_str_ == METRIC_IP_STR) ? "IP" : "L2";
cfg_[knowhere::meta::METRIC_TYPE] = metric_type_;
cfg_[knowhere::meta::RADIUS] = *gt_radius_;
knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType::AVX2);
Expand Down
77 changes: 47 additions & 30 deletions benchmark/hdf5/benchmark_hdf5.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ static const char* HDF5_DATASET_RADIUS = "radius";

static const char* METRIC_IP_STR = "angular";
static const char* METRIC_L2_STR = "euclidean";
static const char* METRIC_COS_STR = "cosine";
static const char* METRIC_HAM_STR = "hamming";
static const char* METRIC_JAC_STR = "jaccard";

Expand Down Expand Up @@ -61,43 +62,58 @@ class Benchmark_hdf5 : public Benchmark_base {
int32_t
parse_name_and_dim() {
size_t pos1, pos2;

assert(!ann_test_name_.empty() || !"ann_test_name not set");

pos1 = ann_test_name_.find_first_of('-', 0);
assert(pos1 != std::string::npos);
dataset_name_ = ann_test_name_.substr(0, pos1);

pos2 = ann_test_name_.find_first_of('-', pos1 + 1);
assert(pos2 != std::string::npos);

dim_ = std::stoi(ann_test_name_.substr(pos1 + 1, pos2 - pos1 - 1));
return (pos2 + 1);
}

void
set_metric_type(const std::string& str) {
if (str == METRIC_L2_STR || str == "l2") {
metric_type_ = "L2";
} else if (str == METRIC_IP_STR || str == "ip") {
metric_type_ = "IP";
} else if (str == METRIC_COS_STR) {
metric_type_ = "COSINE";
} else if (str == METRIC_HAM_STR) {
metric_type_ = "HAMMING";
} else if (str == METRIC_JAC_STR) {
metric_type_ = "JACCARD";
} else {
assert(false);
}
}
void
parse_ann_test_name() {
auto pos = parse_name_and_dim();
metric_str_ = ann_test_name_.substr(pos);
set_metric_type(metric_str_);
}

void
parse_ann_test_name_with_range() {
auto pos1 = parse_name_and_dim();

auto pos2 = ann_test_name_.find_first_of('-', pos1);
assert(pos2 != std::string::npos);
metric_str_ = ann_test_name_.substr(pos1, pos2 - pos1);

set_metric_type(metric_str_);
assert("range" == ann_test_name_.substr(pos2 + 1));
}

void
parse_ann_test_name_with_range_multi() {
auto pos1 = parse_name_and_dim();

auto pos2 = ann_test_name_.find_first_of('-', pos1);
assert(pos2 != std::string::npos);
metric_str_ = ann_test_name_.substr(pos1, pos2 - pos1);

set_metric_type(metric_str_);
assert("range-multi" == ann_test_name_.substr(pos2 + 1));
}

Expand All @@ -119,10 +135,10 @@ class Benchmark_hdf5 : public Benchmark_base {
assert(dim * 32 == dim_ || !"train dataset has incorrect dimension");
}

if (metric_str_ == METRIC_IP_STR) {
printf("[%.3f s] Normalizing train dataset \n", get_time_diff());
normalize((float*)xb_, nb_, dim_);
}
// if (metric_str_ == METRIC_IP_STR) {
// printf("[%.3f s] Normalizing train dataset \n", get_time_diff());
// normalize((float*)xb_, nb_, dim_);
// }

/* load test data */
printf("[%.3f s] Loading test data\n", get_time_diff());
Expand All @@ -134,10 +150,10 @@ class Benchmark_hdf5 : public Benchmark_base {
assert(dim * 32 == dim_ || !"test dataset has incorrect dimension");
}

if (metric_str_ == METRIC_IP_STR) {
printf("[%.3f s] Normalizing test dataset \n", get_time_diff());
normalize((float*)xq_, nq_, dim_);
}
// if (metric_str_ == METRIC_IP_STR) {
// printf("[%.3f s] Normalizing test dataset \n", get_time_diff());
// normalize((float*)xq_, nq_, dim_);
// }

/* load ground-truth data */
int32_t gt_nq;
Expand Down Expand Up @@ -167,10 +183,10 @@ class Benchmark_hdf5 : public Benchmark_base {
assert(dim * 32 == dim_ || !"train dataset has incorrect dimension");
}

if (metric_str_ == METRIC_IP_STR) {
printf("[%.3f s] Normalizing train dataset \n", get_time_diff());
normalize((float*)xb_, nb_, dim_);
}
// if (metric_str_ == METRIC_IP_STR) {
// printf("[%.3f s] Normalizing train dataset \n", get_time_diff());
// normalize((float*)xb_, nb_, dim_);
// }

/* load test data */
printf("[%.3f s] Loading test data\n", get_time_diff());
Expand All @@ -182,10 +198,10 @@ class Benchmark_hdf5 : public Benchmark_base {
assert(dim * 32 == dim_ || !"test dataset has incorrect dimension");
}

if (metric_str_ == METRIC_IP_STR) {
printf("[%.3f s] Normalizing test dataset \n", get_time_diff());
normalize((float*)xq_, nq_, dim_);
}
// if (metric_str_ == METRIC_IP_STR) {
// printf("[%.3f s] Normalizing test dataset \n", get_time_diff());
// normalize((float*)xq_, nq_, dim_);
// }

/* load ground-truth data */
int32_t cols, rows;
Expand Down Expand Up @@ -221,10 +237,10 @@ class Benchmark_hdf5 : public Benchmark_base {
assert(dim * 32 == dim_ || !"train dataset has incorrect dimension");
}

if (metric_str_ == METRIC_IP_STR) {
printf("[%.3f s] Normalizing train dataset \n", get_time_diff());
normalize((float*)xb_, nb_, dim_);
}
// if (metric_str_ == METRIC_IP_STR) {
// printf("[%.3f s] Normalizing train dataset \n", get_time_diff());
// normalize((float*)xb_, nb_, dim_);
// }

/* load test data */
printf("[%.3f s] Loading test data\n", get_time_diff());
Expand All @@ -236,10 +252,10 @@ class Benchmark_hdf5 : public Benchmark_base {
assert(dim * 32 == dim_ || !"test dataset has incorrect dimension");
}

if (metric_str_ == METRIC_IP_STR) {
printf("[%.3f s] Normalizing test dataset \n", get_time_diff());
normalize((float*)xq_, nq_, dim_);
}
// if (metric_str_ == METRIC_IP_STR) {
// printf("[%.3f s] Normalizing test dataset \n", get_time_diff());
// normalize((float*)xq_, nq_, dim_);
// }

/* load ground-truth data */
int32_t cols, rows;
Expand Down Expand Up @@ -421,5 +437,6 @@ class Benchmark_hdf5 : public Benchmark_base {

protected:
std::string ann_test_name_ = "";
std::string dataset_name_;
std::string metric_str_;
};
2 changes: 0 additions & 2 deletions benchmark/hdf5/benchmark_knowhere.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,6 @@ class Benchmark_knowhere : public Benchmark_hdf5 {
}

protected:
std::string metric_type_;

std::string index_type_;
knowhere::Json cfg_;
knowhere::Index<knowhere::IndexNode> index_;
Expand Down
Loading

0 comments on commit 8eead9b

Please sign in to comment.