Skip to content

Commit

Permalink
add a new filter interface provides hits likes selection ratio
Browse files Browse the repository at this point in the history
Signed-off-by: Xiangyu Wang <[email protected]>
  • Loading branch information
wxyucs committed Jan 23, 2025
1 parent 775cf91 commit 07caca2
Show file tree
Hide file tree
Showing 3 changed files with 138 additions and 0 deletions.
30 changes: 30 additions & 0 deletions examples/cpp/301_feature_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,21 @@ main(int argc, char** argv) {
/******************* Prepare Filter Function *****************/
std::function<bool(int64_t)> filter_func = [](int64_t id) { return id % 2 == 0; };

/******************* Prepare Filter Object *****************/
class MyFilter : public vsag::Filter {
public:
bool
CheckValid(int64_t id) const override {
return id % 2;
}

float
ValidRatio() const override {
return 0.618f;
}
};
auto filter_object = std::make_shared<MyFilter>();

/******************* HNSW Filter Search With Bitset *****************/
auto hnsw_search_parameters = R"(
{
Expand Down Expand Up @@ -121,4 +136,19 @@ main(int argc, char** argv) {
for (int64_t i = 0; i < result->GetDim(); ++i) {
std::cout << result->GetIds()[i] << ": " << result->GetDistances()[i] << std::endl;
}

/******************* HNSW Filter Search With Filter Object *****************/
search_result = index->KnnSearch(query, topk, hnsw_search_parameters, filter_object);
if (not search_result.has_value()) {
std::cerr << "Failed to search index with filter" << search_result.error().message
<< std::endl;
exit(-1);
}
result = search_result.value();

// print result with filter, the result id is odd not even.
std::cout << "object filter results: " << std::endl;
for (int64_t i = 0; i < result->GetDim(); ++i) {
std::cout << result->GetIds()[i] << ": " << result->GetDistances()[i] << std::endl;
}
}
65 changes: 65 additions & 0 deletions include/vsag/filter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@

// Copyright 2024-present the vsag project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>

namespace vsag {

class Filter {
public:
enum class Distribution {
NONE = 0,
RELATED_TO_VECTOR,
};

public:
/**
* @brief Check if a vector is filtered out by pre-filter, true means
* not been filtered out, false means have been filtered out, the result
* of KnnSearch/RangeSearch will only contain non-filtered-out vectors
*
* @param id of the vector
* @return true if vector is valid, otherwise false
*/
[[nodiscard]] virtual bool
CheckValid(int64_t id) const = 0;

/**
* @brief Get valid ratio of pre-filter, 1.0 means all the vectors valid,
* none of them have been filter out.
*
* @return the valid ratio
*/
[[nodiscard]] virtual float
ValidRatio() const {
return 1.0f; // (default) all vectors is valid
}

/**
* @brief Get the distribution of pre-filter
*
* @return distribution type of this filter
*/
[[nodiscard]] virtual Distribution
FilterDistribution() const {
return Distribution::NONE; // (default) no distribution information hints provides
}
};

using FilterPtr = std::shared_ptr<Filter>;

}; // namespace vsag
43 changes: 43 additions & 0 deletions include/vsag/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "vsag/dataset.h"
#include "vsag/errors.h"
#include "vsag/expected.hpp"
#include "vsag/filter.h"
#include "vsag/index_features.h"
#include "vsag/readerset.h"

Expand Down Expand Up @@ -145,6 +146,24 @@ class Index {
const std::string& parameters,
const std::function<bool(int64_t)>& filter) const = 0;

/**
* @brief Performing single KNN search on index
*
* @param query should contains dim, num_elements and vectors
* @param k the result size of every query
* @param filter represents whether an element is filtered out by pre-filter
* @return result contains
* - num_elements: 1
* - ids, distances: length is (num_elements * k)
*/
virtual tl::expected<DatasetPtr, Error>
KnnSearch(const DatasetPtr& query,

Check warning on line 160 in include/vsag/index.h

View check run for this annotation

Codecov / codecov/patch

include/vsag/index.h#L160

Added line #L160 was not covered by tests
int64_t k,
const std::string& parameters,
const FilterPtr& filter) const {
throw std::runtime_error("Index doesn't support new filter");

Check warning on line 164 in include/vsag/index.h

View check run for this annotation

Codecov / codecov/patch

include/vsag/index.h#L164

Added line #L164 was not covered by tests
}

/**
* @brief Performing single range search on index
*
Expand Down Expand Up @@ -209,6 +228,30 @@ class Index {
const std::function<bool(int64_t)>& filter,
int64_t limited_size = -1) const = 0;

/**
* @brief Performing single range search on index
*
* @param query should contains dim, num_elements and vectors
* @param radius of search, determines which results will be returned
* @param limited_size of search result size.
* - limited_size <= 0 : no limit
* - limited_size == 0 : error
* - limited_size >= 1 : limit result size to limited_size
* @param filter represents whether an element is filtered out by pre-filter
* @return result contains
* - num_elements: 1
* - dim: the size of results
* - ids, distances: length is dim
*/
virtual tl::expected<DatasetPtr, Error>
RangeSearch(const DatasetPtr& query,

Check warning on line 247 in include/vsag/index.h

View check run for this annotation

Codecov / codecov/patch

include/vsag/index.h#L247

Added line #L247 was not covered by tests
float radius,
const std::string& parameters,
const FilterPtr& filter,
int64_t limited_size = -1) const {
throw std::runtime_error("Index doesn't support new filter");

Check warning on line 252 in include/vsag/index.h

View check run for this annotation

Codecov / codecov/patch

include/vsag/index.h#L252

Added line #L252 was not covered by tests
}

/**
* @brief Pretraining the conjugate graph involves searching with generated queries and providing feedback.
*
Expand Down

0 comments on commit 07caca2

Please sign in to comment.