Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Throw custom RecallError/RecallException when the number of requested neighbors cannot be returned #88

Merged
merged 2 commits into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 17 additions & 8 deletions cpp/src/TypedIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@
#include "hnswlib.h"
#include "std_utils.h"

class RecallError : public std::runtime_error {
public:
RecallError(const std::string &what) : std::runtime_error(what) {}
};

template <typename T> inline const StorageDataType storageDataType();
template <typename T> inline const std::string storageDataTypeName();

Expand Down Expand Up @@ -569,10 +574,11 @@ class TypedIndex : public Index {
nullptr, queryEf);

if (result.size() != (unsigned long)k) {
throw std::runtime_error(
throw RecallError(
"Fewer than expected results were retrieved; only found " +
std::to_string(result.size()) + " of " + std::to_string(k) +
" requested neighbors.");
" requested neighbors. Reconstruct the index with a higher M "
"value to increase recall.");
}

for (int i = k - 1; i >= 0; i--) {
Expand Down Expand Up @@ -606,10 +612,11 @@ class TypedIndex : public Index {
queryEf);

if (result.size() != (unsigned long)k) {
throw std::runtime_error(
throw RecallError(
"Fewer than expected results were retrieved; only found " +
std::to_string(result.size()) + " of " + std::to_string(k) +
" requested neighbors.");
" requested neighbors. Reconstruct the index with a higher M "
"value to increase recall.");
}

for (int i = k - 1; i >= 0; i--) {
Expand Down Expand Up @@ -662,10 +669,11 @@ class TypedIndex : public Index {
algorithmImpl->searchKnn(queryVector.data(), k, nullptr, queryEf);

if (result.size() != (unsigned long)k) {
throw std::runtime_error(
throw RecallError(
"Fewer than expected results were retrieved; only found " +
std::to_string(result.size()) + " of " + std::to_string(k) +
" requested neighbors.");
" requested neighbors. Reconstruct the index with a higher M value "
"to increase recall.");
}

for (int i = k - 1; i >= 0; i--) {
Expand All @@ -683,10 +691,11 @@ class TypedIndex : public Index {
algorithmImpl->searchKnn(norm_array.data(), k, nullptr, queryEf);

if (result.size() != (unsigned long)k) {
throw std::runtime_error(
throw RecallError(
"Fewer than expected results were retrieved; only found " +
std::to_string(result.size()) + " of " + std::to_string(k) +
" requested neighbors.");
" requested neighbors. Reconstruct the index with a higher M value "
"to increase recall.");
}

for (int i = k - 1; i >= 0; i--) {
Expand Down
106 changes: 96 additions & 10 deletions cpp/test/test_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ template <typename dist_t, typename data_t = dist_t,
void testQuery(TypedIndex<dist_t, data_t, scalefactor> &index, int numVectors,
int numDimensions, SpaceType spaceType,
StorageDataType storageType, bool testSingleVectorMethod,
float precisionTolerance) {
float precisionTolerance, int k) {
/**
* Create test data and ids. If we are using Float8 or E4M3 storage, quantize
* the vector values, if we are using Float32 storage, keep the float values
Expand All @@ -53,7 +53,6 @@ void testQuery(TypedIndex<dist_t, data_t, scalefactor> &index, int numVectors,
index.addItems(inputData, ids, -1);
}

int k = 1;
float lowerBound = 0.0f - precisionTolerance;
float upperBound = 0.0f + precisionTolerance;

Expand Down Expand Up @@ -120,8 +119,94 @@ void testQuery(TypedIndex<dist_t, data_t, scalefactor> &index, int numVectors,
}
}

/**
* Test querying the index when k is equal to the total number of items in the
* index.
*/
template <typename dist_t, typename data_t = dist_t,
typename scalefactor = std::ratio<1, 1>>
void testQueryAllNearestNeighbors(
TypedIndex<dist_t, data_t, scalefactor> &index, int numVectors,
int numDimensions, bool testSingleVectorMethod) {

std::vector<std::vector<float>> inputData =
randomVectors(numVectors, numDimensions);
std::vector<hnswlib::labeltype> ids(numVectors);
for (int i = 0; i < numVectors; i++) {
ids[i] = i;
}

// add items to index
if (testSingleVectorMethod == true) {
for (auto id : ids) {
index.addItem(inputData[id], id);
}
} else {
index.addItems(inputData, ids, -1);
}
REQUIRE(index.getNumElements() == numVectors);

std::vector<float> targetVector = inputData[0];
REQUIRE_THROWS_AS(index.query(targetVector, numVectors, -1), RecallError);
}

/**
* This test reproduces https://github.com/spotify/voyager/issues/38, an issue
* where we cannot achieve 100% recall. testQueryAllNearestNeighbors() asserts
* that a custom RecallError is thrown.
*/
TEST_CASE(
"Test querying for kNN when k equals the number of items in the index") {
std::vector<SpaceType> spaceTypesSet = {
SpaceType::Euclidean, SpaceType::InnerProduct, SpaceType::Cosine};
std::vector<StorageDataType> storageTypesSet = {
StorageDataType::Float8, StorageDataType::Float32, StorageDataType::E4M3};
std::vector<int> numDimensionsSet = {32};
std::vector<int> numVectorsSet = {30000};
std::vector<bool> testSingleVectorMethods = {true};

// Use a small M value to exacerbate the issue where a graph becomes
// disconnected. This helps to reproduce this nondeterministic issue.
size_t M_ = 4;

for (auto spaceType : spaceTypesSet) {
for (auto storageType : storageTypesSet) {
for (auto numDimensions : numDimensionsSet) {
for (auto numVectors : numVectorsSet) {
for (auto testSingleVectorMethod : testSingleVectorMethods) {

SUBCASE("Test instantiation ") {
CAPTURE(spaceType);
CAPTURE(numDimensions);
CAPTURE(numVectors);
CAPTURE(storageType);
CAPTURE(std::to_string(testSingleVectorMethod));

if (storageType == StorageDataType::Float8) {
auto index = TypedIndex<float, int8_t, std::ratio<1, 127>>(
spaceType, numDimensions, M_);
testQueryAllNearestNeighbors(index, numVectors, numDimensions,
testSingleVectorMethod);
} else if (storageType == StorageDataType::Float32) {
auto index = TypedIndex<float>(spaceType, numDimensions, M_);
testQueryAllNearestNeighbors(index, numVectors, numDimensions,
testSingleVectorMethod);
} else if (storageType == StorageDataType::E4M3) {
auto index =
TypedIndex<float, E4M3>(spaceType, numDimensions, M_);
testQueryAllNearestNeighbors(index, numVectors, numDimensions,
testSingleVectorMethod);
}
}
}
}
}
}
}
}

TEST_CASE("Test combinations of different instantiations. Test that each "
"vector's NN is itself and distance is approximately zero.") {
"vector's ANN is itself and distance is approximately zero.") {
std::unordered_map<StorageDataType, float> PRECISION_TOLERANCE_PER_DATA_TYPE =
{{StorageDataType::Float32, 0.00001f},
{StorageDataType::Float8, 0.10f},
Expand All @@ -133,6 +218,7 @@ TEST_CASE("Test combinations of different instantiations. Test that each "
std::vector<StorageDataType> storageTypesSet = {
StorageDataType::Float8, StorageDataType::Float32, StorageDataType::E4M3};
std::vector<bool> testSingleVectorMethods = {true, false};
int k = 1;

for (auto spaceType : spaceTypesSet) {
for (auto storageType : storageTypesSet) {
Expand All @@ -154,21 +240,21 @@ TEST_CASE("Test combinations of different instantiations. Test that each "
storageType);
testQuery(index, numVectors, numDimensions, spaceType,
storageType, testSingleVectorMethod,
PRECISION_TOLERANCE_PER_DATA_TYPE[storageType]);
PRECISION_TOLERANCE_PER_DATA_TYPE[storageType], k);
} else if (storageType == StorageDataType::Float32) {
auto index = TypedIndex<float>(spaceType, numDimensions);
testIndexProperties(index, spaceType, numDimensions,
storageType);
testQuery(index, numVectors, numDimensions, spaceType,
storageType, testSingleVectorMethod,
PRECISION_TOLERANCE_PER_DATA_TYPE[storageType]);
PRECISION_TOLERANCE_PER_DATA_TYPE[storageType], k);
} else if (storageType == StorageDataType::E4M3) {
auto index = TypedIndex<float, E4M3>(spaceType, numDimensions);
testIndexProperties(index, spaceType, numDimensions,
storageType);
testQuery(index, numVectors, numDimensions, spaceType,
storageType, testSingleVectorMethod,
PRECISION_TOLERANCE_PER_DATA_TYPE[storageType]);
PRECISION_TOLERANCE_PER_DATA_TYPE[storageType], k);
}
}
}
Expand All @@ -178,8 +264,8 @@ TEST_CASE("Test combinations of different instantiations. Test that each "
}
}

TEST_CASE("Test vectorsToNDArray converts 2D vector of float to NDArray<float, "
"2>") {
TEST_CASE(
"Test vectorsToNDArray converts 2D vector of float to NDArray<float,2>") {
std::vector<std::vector<float>> vectors = {{1.0f, 2.0f, 3.0f, 4.0f},
{5.0f, 6.0f, 7.0f, 8.0f},
{9.0f, 10.0f, 11.0f, 12.0f}};
Expand All @@ -205,8 +291,8 @@ TEST_CASE("Test vectorsToNDArray converts 2D vector of float to NDArray<float, "
REQUIRE(*ndArray[2] == 9.0f);
}

TEST_CASE("Test vectorsToNDArray throws error if vectors are not of the same "
"size") {
TEST_CASE(
"Test vectorsToNDArray throws error if vectors are not of the same size") {
std::vector<std::vector<float>> vectors1 = {{1.0f, 2.0f, 3.0f, 4.0f},
{5.0f, 6.0f, 7.0f},
{9.0f, 10.0f, 11.0f, 12.0f}};
Expand Down
108 changes: 108 additions & 0 deletions docs/java/apidocs/allclasses-index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
<!DOCTYPE HTML>
<html lang>
<head>
<!-- Generated by javadoc (21) on Thu Sep 26 00:30:36 EDT 2024 -->
<title>All Classes and Interfaces (voyager 2.0.9 API)</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="dc.created" content="2024-09-26">
<meta name="description" content="class index">
<meta name="generator" content="javadoc/AllClassesIndexWriter">
<link rel="stylesheet" type="text/css" href="stylesheet.css" title="Style">
<link rel="stylesheet" type="text/css" href="script-dir/jquery-ui.min.css" title="Style">
<script type="text/javascript" src="script.js"></script>
<script type="text/javascript" src="script-dir/jquery-3.6.1.min.js"></script>
<script type="text/javascript" src="script-dir/jquery-ui.min.js"></script>
</head>
<body class="all-classes-index-page">
<script type="text/javascript">var pathtoroot = "./";
loadScripts(document, 'script');</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<div class="flex-box">
<header role="banner" class="flex-header">
<nav role="navigation">
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="top-nav" id="navbar-top"><button id="navbar-toggle-button" aria-controls="navbar-top" aria-expanded="false" aria-label="Toggle navigation links"><span class="nav-bar-toggle-icon">&nbsp;</span><span class="nav-bar-toggle-icon">&nbsp;</span><span class="nav-bar-toggle-icon">&nbsp;</span></button>
<div class="skip-nav"><a href="#skip-navbar-top" title="Skip navigation links">Skip navigation links</a></div>
<ul id="navbar-top-firstrow" class="nav-list" title="Navigation">
<li><a href="index.html">Overview</a></li>
<li>Package</li>
<li>Class</li>
<li>Use</li>
<li><a href="overview-tree.html">Tree</a></li>
<li><a href="index-all.html">Index</a></li>
<li><a href="help-doc.html#all-classes">Help</a></li>
</ul>
</div>
<div class="sub-nav">
<div id="navbar-sub-list"></div>
<div class="nav-list-search"><a href="search.html">SEARCH</a>
<input type="text" id="search-input" disabled placeholder="Search">
<input type="reset" id="reset-button" disabled value="reset">
</div>
</div>
<!-- ========= END OF TOP NAVBAR ========= -->
<span class="skip-nav" id="skip-navbar-top"></span></nav>
</header>
<div class="flex-content">
<main role="main">
<div class="header">
<h1 title="All Classes and Interfaces" class="title">All Classes and Interfaces</h1>
</div>
<div id="all-classes-table">
<div class="table-tabs" role="tablist" aria-orientation="horizontal"><button id="all-classes-table-tab0" role="tab" aria-selected="true" aria-controls="all-classes-table.tabpanel" tabindex="0" onkeydown="switchTab(event)" onclick="show('all-classes-table', 'all-classes-table', 2)" class="active-table-tab">All Classes and Interfaces</button><button id="all-classes-table-tab2" role="tab" aria-selected="false" aria-controls="all-classes-table.tabpanel" tabindex="-1" onkeydown="switchTab(event)" onclick="show('all-classes-table', 'all-classes-table-tab2', 2)" class="table-tab">Classes</button><button id="all-classes-table-tab3" role="tab" aria-selected="false" aria-controls="all-classes-table.tabpanel" tabindex="-1" onkeydown="switchTab(event)" onclick="show('all-classes-table', 'all-classes-table-tab3', 2)" class="table-tab">Enums</button><button id="all-classes-table-tab5" role="tab" aria-selected="false" aria-controls="all-classes-table.tabpanel" tabindex="-1" onkeydown="switchTab(event)" onclick="show('all-classes-table', 'all-classes-table-tab5', 2)" class="table-tab">Exception Classes</button></div>
<div id="all-classes-table.tabpanel" role="tabpanel">
<div class="summary-table two-column-summary" aria-labelledby="all-classes-table-tab0">
<div class="table-header col-first">Class</div>
<div class="table-header col-last">Description</div>
<div class="col-first even-row-color all-classes-table all-classes-table-tab2"><a href="com/spotify/voyager/jni/Index.html" title="class in com.spotify.voyager.jni">Index</a></div>
<div class="col-last even-row-color all-classes-table all-classes-table-tab2">
<div class="block">A Voyager index, providing storage of floating-point vectors and the ability to efficiently
search among those vectors.</div>
</div>
<div class="col-first odd-row-color all-classes-table all-classes-table-tab2"><a href="com/spotify/voyager/jni/Index.QueryResults.html" title="class in com.spotify.voyager.jni">Index.QueryResults</a></div>
<div class="col-last odd-row-color all-classes-table all-classes-table-tab2">
<div class="block">A container for query results, returned by Index.</div>
</div>
<div class="col-first even-row-color all-classes-table all-classes-table-tab3"><a href="com/spotify/voyager/jni/Index.SpaceType.html" title="enum in com.spotify.voyager.jni">Index.SpaceType</a></div>
<div class="col-last even-row-color all-classes-table all-classes-table-tab3">
<div class="block">The space, also known as distance metric, to use when searching.</div>
</div>
<div class="col-first odd-row-color all-classes-table all-classes-table-tab3"><a href="com/spotify/voyager/jni/Index.StorageDataType.html" title="enum in com.spotify.voyager.jni">Index.StorageDataType</a></div>
<div class="col-last odd-row-color all-classes-table all-classes-table-tab3">
<div class="block">The datatype used to use when storing vectors on disk.</div>
</div>
<div class="col-first even-row-color all-classes-table all-classes-table-tab2"><a href="com/spotify/voyager/jni/utils/JniLibExtractor.html" title="class in com.spotify.voyager.jni.utils">JniLibExtractor</a></div>
<div class="col-last even-row-color all-classes-table all-classes-table-tab2">&nbsp;</div>
<div class="col-first odd-row-color all-classes-table all-classes-table-tab5"><a href="com/spotify/voyager/jni/exception/RecallException.html" title="class in com.spotify.voyager.jni.exception">RecallException</a></div>
<div class="col-last odd-row-color all-classes-table all-classes-table-tab5">
<div class="block">An exception that indicates an error about the recall performance of the index.</div>
</div>
<div class="col-first even-row-color all-classes-table all-classes-table-tab2"><a href="com/spotify/voyager/jni/StringIndex.html" title="class in com.spotify.voyager.jni">StringIndex</a></div>
<div class="col-last even-row-color all-classes-table all-classes-table-tab2">
<div class="block">Wrapper around com.spotify.voyager.jni.Index with a simplified interface which maps the index ID
to a provided String.</div>
</div>
<div class="col-first odd-row-color all-classes-table all-classes-table-tab2"><a href="com/spotify/voyager/jni/StringIndex.QueryResults.html" title="class in com.spotify.voyager.jni">StringIndex.QueryResults</a></div>
<div class="col-last odd-row-color all-classes-table all-classes-table-tab2">
<div class="block">A wrapper class for nearest neighbor query results.</div>
</div>
<div class="col-first even-row-color all-classes-table all-classes-table-tab2"><a href="com/spotify/voyager/jni/utils/TinyJson.html" title="class in com.spotify.voyager.jni.utils">TinyJson</a></div>
<div class="col-last even-row-color all-classes-table all-classes-table-tab2">
<div class="block">A dependency-free, super tiny JSON serde class that only supports reading and writing lists of
strings.</div>
</div>
</div>
</div>
</div>
</main>
<footer role="contentinfo">
<hr>
<p class="legal-copy"><small>Copyright &#169; 2024. All rights reserved.</small></p>
</footer>
</div>
</div>
</body>
</html>
Loading
Loading