Skip to content

Commit

Permalink
sparse: make bm25 avgdl support value smaller than 1 (#1077)
Browse files Browse the repository at this point in the history
Signed-off-by: Shawn Wang <[email protected]>
  • Loading branch information
sparknack authored Feb 15, 2025
1 parent 8106b1c commit 385964a
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 1 deletion.
2 changes: 1 addition & 1 deletion include/knowhere/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -752,7 +752,7 @@ class BaseConfig : public Config {
// type is WAND.
KNOWHERE_CONFIG_DECLARE_FIELD(bm25_avgdl)
.allow_empty_without_default()
.set_range(1, std::numeric_limits<CFG_FLOAT::value_type>::max())
.set_range(0, std::numeric_limits<CFG_FLOAT::value_type>::max())
.description("average document length")
.for_train_and_search()
.for_iterator()
Expand Down
1 change: 1 addition & 0 deletions src/common/comp/brute_force.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ GetDocValueComputer(const BruteForceConfig& cfg) {
auto k1 = cfg.bm25_k1.value();
auto b = cfg.bm25_b.value();
auto avgdl = cfg.bm25_avgdl.value();
avgdl = std::max(avgdl, 1.0f);
return sparse::GetDocValueBM25Computer<T>(k1, b, avgdl);
} else {
return expected<sparse::DocValueComputer<T>>::Err(Status::invalid_metric_type,
Expand Down
3 changes: 3 additions & 0 deletions src/index/sparse/sparse_index_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,9 @@ class SparseInvertedIndexNode : public IndexNode {
auto k1 = cfg.bm25_k1.value();
auto b = cfg.bm25_b.value();
auto avgdl = cfg.bm25_avgdl.value();
// avgdl is used as a denominator in BM25 score computation,
// so it should be at least 1.0 to avoid division by zero.
avgdl = std::max(avgdl, 1.0f);

if (use_wand || cfg.inverted_index_algo.value() == "DAAT_WAND") {
auto index = new sparse::InvertedIndex<T, uint16_t, sparse::InvertedIndexAlgo::DAAT_WAND, mmapped>(
Expand Down
1 change: 1 addition & 0 deletions src/index/sparse/sparse_inverted_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ class InvertedIndex : public BaseInvertedIndex<DType> {
"avgdl must be supplied during searching");
}
auto avgdl = cfg.bm25_avgdl.value();
avgdl = std::max(avgdl, 1.0f);
if constexpr (algo == InvertedIndexAlgo::DAAT_WAND || algo == InvertedIndexAlgo::DAAT_MAXSCORE) {
// daat_wand and daat_maxscore: search time k1/b must equal load time config.
if ((cfg.bm25_k1.has_value() && cfg.bm25_k1.value() != bm25_params_->k1) ||
Expand Down

0 comments on commit 385964a

Please sign in to comment.