From 07caca2f0ff005016dce7f28e75592c7a7534eb7 Mon Sep 17 00:00:00 2001 From: Xiangyu Wang Date: Thu, 23 Jan 2025 17:11:54 +0800 Subject: [PATCH] add a new filter interface provides hits likes selection ratio Signed-off-by: Xiangyu Wang --- examples/cpp/301_feature_filter.cpp | 30 +++++++++++++ include/vsag/filter.h | 65 +++++++++++++++++++++++++++++ include/vsag/index.h | 43 +++++++++++++++++++ 3 files changed, 138 insertions(+) create mode 100644 include/vsag/filter.h diff --git a/examples/cpp/301_feature_filter.cpp b/examples/cpp/301_feature_filter.cpp index 03636053..57180e3d 100644 --- a/examples/cpp/301_feature_filter.cpp +++ b/examples/cpp/301_feature_filter.cpp @@ -84,6 +84,21 @@ main(int argc, char** argv) { /******************* Prepare Filter Function *****************/ std::function filter_func = [](int64_t id) { return id % 2 == 0; }; + /******************* Prepare Filter Object *****************/ + class MyFilter : public vsag::Filter { + public: + bool + CheckValid(int64_t id) const override { + return id % 2; + } + + float + ValidRatio() const override { + return 0.618f; + } + }; + auto filter_object = std::make_shared(); + /******************* HNSW Filter Search With Bitset *****************/ auto hnsw_search_parameters = R"( { @@ -121,4 +136,19 @@ main(int argc, char** argv) { for (int64_t i = 0; i < result->GetDim(); ++i) { std::cout << result->GetIds()[i] << ": " << result->GetDistances()[i] << std::endl; } + + /******************* HNSW Filter Search With Filter Object *****************/ + search_result = index->KnnSearch(query, topk, hnsw_search_parameters, filter_object); + if (not search_result.has_value()) { + std::cerr << "Failed to search index with filter" << search_result.error().message + << std::endl; + exit(-1); + } + result = search_result.value(); + + // print result with filter, the result id is odd not even. + std::cout << "object filter results: " << std::endl; + for (int64_t i = 0; i < result->GetDim(); ++i) { + std::cout << result->GetIds()[i] << ": " << result->GetDistances()[i] << std::endl; + } } diff --git a/include/vsag/filter.h b/include/vsag/filter.h new file mode 100644 index 00000000..cf46b8fd --- /dev/null +++ b/include/vsag/filter.h @@ -0,0 +1,65 @@ + +// Copyright 2024-present the vsag project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +namespace vsag { + +class Filter { +public: + enum class Distribution { + NONE = 0, + RELATED_TO_VECTOR, + }; + +public: + /** + * @brief Check if a vector is filtered out by pre-filter, true means + * not been filtered out, false means have been filtered out, the result + * of KnnSearch/RangeSearch will only contain non-filtered-out vectors + * + * @param id of the vector + * @return true if vector is valid, otherwise false + */ + [[nodiscard]] virtual bool + CheckValid(int64_t id) const = 0; + + /** + * @brief Get valid ratio of pre-filter, 1.0 means all the vectors valid, + * none of them have been filter out. + * + * @return the valid ratio + */ + [[nodiscard]] virtual float + ValidRatio() const { + return 1.0f; // (default) all vectors is valid + } + + /** + * @brief Get the distribution of pre-filter + * + * @return distribution type of this filter + */ + [[nodiscard]] virtual Distribution + FilterDistribution() const { + return Distribution::NONE; // (default) no distribution information hints provides + } +}; + +using FilterPtr = std::shared_ptr; + +}; // namespace vsag diff --git a/include/vsag/index.h b/include/vsag/index.h index 128eab54..856e3839 100644 --- a/include/vsag/index.h +++ b/include/vsag/index.h @@ -28,6 +28,7 @@ #include "vsag/dataset.h" #include "vsag/errors.h" #include "vsag/expected.hpp" +#include "vsag/filter.h" #include "vsag/index_features.h" #include "vsag/readerset.h" @@ -145,6 +146,24 @@ class Index { const std::string& parameters, const std::function& filter) const = 0; + /** + * @brief Performing single KNN search on index + * + * @param query should contains dim, num_elements and vectors + * @param k the result size of every query + * @param filter represents whether an element is filtered out by pre-filter + * @return result contains + * - num_elements: 1 + * - ids, distances: length is (num_elements * k) + */ + virtual tl::expected + KnnSearch(const DatasetPtr& query, + int64_t k, + const std::string& parameters, + const FilterPtr& filter) const { + throw std::runtime_error("Index doesn't support new filter"); + } + /** * @brief Performing single range search on index * @@ -209,6 +228,30 @@ class Index { const std::function& filter, int64_t limited_size = -1) const = 0; + /** + * @brief Performing single range search on index + * + * @param query should contains dim, num_elements and vectors + * @param radius of search, determines which results will be returned + * @param limited_size of search result size. + * - limited_size <= 0 : no limit + * - limited_size == 0 : error + * - limited_size >= 1 : limit result size to limited_size + * @param filter represents whether an element is filtered out by pre-filter + * @return result contains + * - num_elements: 1 + * - dim: the size of results + * - ids, distances: length is dim + */ + virtual tl::expected + RangeSearch(const DatasetPtr& query, + float radius, + const std::string& parameters, + const FilterPtr& filter, + int64_t limited_size = -1) const { + throw std::runtime_error("Index doesn't support new filter"); + } + /** * @brief Pretraining the conjugate graph involves searching with generated queries and providing feedback. *