Skip to content

Commit

Permalink
add DAAT MaxScore support for sparse vector (#1015)
Browse files Browse the repository at this point in the history
* sparse: add daat maxscore algorithm support

Signed-off-by: Shawn Wang <[email protected]>

* sparse: encapsulate cursor operation so that more algorithms can be introduced

Signed-off-by: Shawn Wang <[email protected]>

---------

Signed-off-by: Shawn Wang <[email protected]>
  • Loading branch information
sparknack authored Jan 13, 2025
1 parent 3572ac1 commit 59bfdc2
Show file tree
Hide file tree
Showing 5 changed files with 263 additions and 93 deletions.
3 changes: 2 additions & 1 deletion include/knowhere/comp/index_param.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,8 @@ constexpr const char* HNSW_REFINE_TYPE = "refine_type";
constexpr const char* SQ_TYPE = "sq_type"; // for IVF_SQ and HNSW_SQ
constexpr const char* PRQ_NUM = "nrq"; // for PRQ, number of redisual quantizers

// Sparse Params
// Sparse Inverted Index Params
constexpr const char* INVERTED_INDEX_ALGO = "inverted_index_algo";
constexpr const char* DROP_RATIO_BUILD = "drop_ratio_build";
constexpr const char* DROP_RATIO_SEARCH = "drop_ratio_search";
} // namespace indexparam
Expand Down
40 changes: 34 additions & 6 deletions src/index/sparse/sparse_index_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

namespace knowhere {

// Inverted Index impl for sparse vectors. May optionally use WAND algorithm to speed up search.
// Inverted Index impl for sparse vectors.
//
// Not overriding RangeSearch, will use the default implementation in IndexNode.
//
Expand Down Expand Up @@ -351,8 +351,6 @@ class SparseInvertedIndexNode : public IndexNode {
expected<sparse::BaseInvertedIndex<T>*>
CreateIndex(const SparseInvertedIndexConfig& cfg) const {
if (IsMetricType(cfg.metric_type.value(), metric::BM25)) {
// quantize float to uint16_t when BM25 metric type is used.
auto idx = new sparse::InvertedIndex<T, uint16_t, use_wand, true, mmapped>();
if (!cfg.bm25_k1.has_value() || !cfg.bm25_b.has_value() || !cfg.bm25_avgdl.has_value()) {
return expected<sparse::BaseInvertedIndex<T>*>::Err(
Status::invalid_args, "BM25 parameters k1, b, and avgdl must be set when building/loading");
Expand All @@ -361,10 +359,40 @@ class SparseInvertedIndexNode : public IndexNode {
auto b = cfg.bm25_b.value();
auto avgdl = cfg.bm25_avgdl.value();
auto max_score_ratio = cfg.wand_bm25_max_score_ratio.value();
idx->SetBM25Params(k1, b, avgdl, max_score_ratio);
return idx;
if (use_wand || cfg.inverted_index_algo.value() == "DAAT_WAND") {
auto index =
new sparse::InvertedIndex<T, uint16_t, sparse::InvertedIndexAlgo::DAAT_WAND, true, mmapped>();
index->SetBM25Params(k1, b, avgdl, max_score_ratio);
return index;
} else if (cfg.inverted_index_algo.value() == "DAAT_MAXSCORE") {
auto index =
new sparse::InvertedIndex<T, uint16_t, sparse::InvertedIndexAlgo::DAAT_MAXSCORE, true, mmapped>();
index->SetBM25Params(k1, b, avgdl, max_score_ratio);
return index;
} else if (cfg.inverted_index_algo.value() == "TAAT_NAIVE") {
auto index =
new sparse::InvertedIndex<T, uint16_t, sparse::InvertedIndexAlgo::TAAT_NAIVE, true, mmapped>();
index->SetBM25Params(k1, b, avgdl, max_score_ratio);
return index;
} else {
return expected<sparse::BaseInvertedIndex<T>*>::Err(Status::invalid_args,
"Invalid search algorithm for SparseInvertedIndex");
}
} else {
return new sparse::InvertedIndex<T, T, use_wand, false, mmapped>();
if (use_wand || cfg.inverted_index_algo.value() == "DAAT_WAND") {
auto index = new sparse::InvertedIndex<T, T, sparse::InvertedIndexAlgo::DAAT_WAND, false, mmapped>();
return index;
} else if (cfg.inverted_index_algo.value() == "DAAT_MAXSCORE") {
auto index =
new sparse::InvertedIndex<T, T, sparse::InvertedIndexAlgo::DAAT_MAXSCORE, false, mmapped>();
return index;
} else if (cfg.inverted_index_algo.value() == "TAAT_NAIVE") {
auto index = new sparse::InvertedIndex<T, T, sparse::InvertedIndexAlgo::TAAT_NAIVE, false, mmapped>();
return index;
} else {
return expected<sparse::BaseInvertedIndex<T>*>::Err(Status::invalid_args,
"Invalid search algorithm for SparseInvertedIndex");
}
}
}

Expand Down
Loading

0 comments on commit 59bfdc2

Please sign in to comment.