Skip to content

Commit

Permalink
Throw custom RecallError/RecallException when the number of requested…
Browse files Browse the repository at this point in the history
… neighbors cannot be returned (#88)

* Throw a custom RecallError when number of requested neighbors cannot be returned. Add tests to reproduce error

* Propagate C++ RecallError to Python and Java bindings. Regenerate Java docs
  • Loading branch information
stephen29xie authored Oct 2, 2024
1 parent 88cfc46 commit 4dc7b6d
Show file tree
Hide file tree
Showing 67 changed files with 9,571 additions and 22 deletions.
25 changes: 17 additions & 8 deletions cpp/src/TypedIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@
#include "hnswlib.h"
#include "std_utils.h"

class RecallError : public std::runtime_error {
public:
RecallError(const std::string &what) : std::runtime_error(what) {}
};

template <typename T> inline const StorageDataType storageDataType();
template <typename T> inline const std::string storageDataTypeName();

Expand Down Expand Up @@ -569,10 +574,11 @@ class TypedIndex : public Index {
nullptr, queryEf);

if (result.size() != (unsigned long)k) {
throw std::runtime_error(
throw RecallError(
"Fewer than expected results were retrieved; only found " +
std::to_string(result.size()) + " of " + std::to_string(k) +
" requested neighbors.");
" requested neighbors. Reconstruct the index with a higher M "
"value to increase recall.");
}

for (int i = k - 1; i >= 0; i--) {
Expand Down Expand Up @@ -606,10 +612,11 @@ class TypedIndex : public Index {
queryEf);

if (result.size() != (unsigned long)k) {
throw std::runtime_error(
throw RecallError(
"Fewer than expected results were retrieved; only found " +
std::to_string(result.size()) + " of " + std::to_string(k) +
" requested neighbors.");
" requested neighbors. Reconstruct the index with a higher M "
"value to increase recall.");
}

for (int i = k - 1; i >= 0; i--) {
Expand Down Expand Up @@ -662,10 +669,11 @@ class TypedIndex : public Index {
algorithmImpl->searchKnn(queryVector.data(), k, nullptr, queryEf);

if (result.size() != (unsigned long)k) {
throw std::runtime_error(
throw RecallError(
"Fewer than expected results were retrieved; only found " +
std::to_string(result.size()) + " of " + std::to_string(k) +
" requested neighbors.");
" requested neighbors. Reconstruct the index with a higher M value "
"to increase recall.");
}

for (int i = k - 1; i >= 0; i--) {
Expand All @@ -683,10 +691,11 @@ class TypedIndex : public Index {
algorithmImpl->searchKnn(norm_array.data(), k, nullptr, queryEf);

if (result.size() != (unsigned long)k) {
throw std::runtime_error(
throw RecallError(
"Fewer than expected results were retrieved; only found " +
std::to_string(result.size()) + " of " + std::to_string(k) +
" requested neighbors.");
" requested neighbors. Reconstruct the index with a higher M value "
"to increase recall.");
}

for (int i = k - 1; i >= 0; i--) {
Expand Down
106 changes: 96 additions & 10 deletions cpp/test/test_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ template <typename dist_t, typename data_t = dist_t,
void testQuery(TypedIndex<dist_t, data_t, scalefactor> &index, int numVectors,
int numDimensions, SpaceType spaceType,
StorageDataType storageType, bool testSingleVectorMethod,
float precisionTolerance) {
float precisionTolerance, int k) {
/**
* Create test data and ids. If we are using Float8 or E4M3 storage, quantize
* the vector values, if we are using Float32 storage, keep the float values
Expand All @@ -53,7 +53,6 @@ void testQuery(TypedIndex<dist_t, data_t, scalefactor> &index, int numVectors,
index.addItems(inputData, ids, -1);
}

int k = 1;
float lowerBound = 0.0f - precisionTolerance;
float upperBound = 0.0f + precisionTolerance;

Expand Down Expand Up @@ -120,8 +119,94 @@ void testQuery(TypedIndex<dist_t, data_t, scalefactor> &index, int numVectors,
}
}

/**
* Test querying the index when k is equal to the total number of items in the
* index.
*/
template <typename dist_t, typename data_t = dist_t,
typename scalefactor = std::ratio<1, 1>>
void testQueryAllNearestNeighbors(
TypedIndex<dist_t, data_t, scalefactor> &index, int numVectors,
int numDimensions, bool testSingleVectorMethod) {

std::vector<std::vector<float>> inputData =
randomVectors(numVectors, numDimensions);
std::vector<hnswlib::labeltype> ids(numVectors);
for (int i = 0; i < numVectors; i++) {
ids[i] = i;
}

// add items to index
if (testSingleVectorMethod == true) {
for (auto id : ids) {
index.addItem(inputData[id], id);
}
} else {
index.addItems(inputData, ids, -1);
}
REQUIRE(index.getNumElements() == numVectors);

std::vector<float> targetVector = inputData[0];
REQUIRE_THROWS_AS(index.query(targetVector, numVectors, -1), RecallError);
}

/**
* This test reproduces https://github.com/spotify/voyager/issues/38, an issue
* where we cannot achieve 100% recall. testQueryAllNearestNeighbors() asserts
* that a custom RecallError is thrown.
*/
TEST_CASE(
"Test querying for kNN when k equals the number of items in the index") {
std::vector<SpaceType> spaceTypesSet = {
SpaceType::Euclidean, SpaceType::InnerProduct, SpaceType::Cosine};
std::vector<StorageDataType> storageTypesSet = {
StorageDataType::Float8, StorageDataType::Float32, StorageDataType::E4M3};
std::vector<int> numDimensionsSet = {32};
std::vector<int> numVectorsSet = {30000};
std::vector<bool> testSingleVectorMethods = {true};

// Use a small M value to exacerbate the issue where a graph becomes
// disconnected. This helps to reproduce this nondeterministic issue.
size_t M_ = 4;

for (auto spaceType : spaceTypesSet) {
for (auto storageType : storageTypesSet) {
for (auto numDimensions : numDimensionsSet) {
for (auto numVectors : numVectorsSet) {
for (auto testSingleVectorMethod : testSingleVectorMethods) {

SUBCASE("Test instantiation ") {
CAPTURE(spaceType);
CAPTURE(numDimensions);
CAPTURE(numVectors);
CAPTURE(storageType);
CAPTURE(std::to_string(testSingleVectorMethod));

if (storageType == StorageDataType::Float8) {
auto index = TypedIndex<float, int8_t, std::ratio<1, 127>>(
spaceType, numDimensions, M_);
testQueryAllNearestNeighbors(index, numVectors, numDimensions,
testSingleVectorMethod);
} else if (storageType == StorageDataType::Float32) {
auto index = TypedIndex<float>(spaceType, numDimensions, M_);
testQueryAllNearestNeighbors(index, numVectors, numDimensions,
testSingleVectorMethod);
} else if (storageType == StorageDataType::E4M3) {
auto index =
TypedIndex<float, E4M3>(spaceType, numDimensions, M_);
testQueryAllNearestNeighbors(index, numVectors, numDimensions,
testSingleVectorMethod);
}
}
}
}
}
}
}
}

TEST_CASE("Test combinations of different instantiations. Test that each "
"vector's NN is itself and distance is approximately zero.") {
"vector's ANN is itself and distance is approximately zero.") {
std::unordered_map<StorageDataType, float> PRECISION_TOLERANCE_PER_DATA_TYPE =
{{StorageDataType::Float32, 0.00001f},
{StorageDataType::Float8, 0.10f},
Expand All @@ -133,6 +218,7 @@ TEST_CASE("Test combinations of different instantiations. Test that each "
std::vector<StorageDataType> storageTypesSet = {
StorageDataType::Float8, StorageDataType::Float32, StorageDataType::E4M3};
std::vector<bool> testSingleVectorMethods = {true, false};
int k = 1;

for (auto spaceType : spaceTypesSet) {
for (auto storageType : storageTypesSet) {
Expand All @@ -154,21 +240,21 @@ TEST_CASE("Test combinations of different instantiations. Test that each "
storageType);
testQuery(index, numVectors, numDimensions, spaceType,
storageType, testSingleVectorMethod,
PRECISION_TOLERANCE_PER_DATA_TYPE[storageType]);
PRECISION_TOLERANCE_PER_DATA_TYPE[storageType], k);
} else if (storageType == StorageDataType::Float32) {
auto index = TypedIndex<float>(spaceType, numDimensions);
testIndexProperties(index, spaceType, numDimensions,
storageType);
testQuery(index, numVectors, numDimensions, spaceType,
storageType, testSingleVectorMethod,
PRECISION_TOLERANCE_PER_DATA_TYPE[storageType]);
PRECISION_TOLERANCE_PER_DATA_TYPE[storageType], k);
} else if (storageType == StorageDataType::E4M3) {
auto index = TypedIndex<float, E4M3>(spaceType, numDimensions);
testIndexProperties(index, spaceType, numDimensions,
storageType);
testQuery(index, numVectors, numDimensions, spaceType,
storageType, testSingleVectorMethod,
PRECISION_TOLERANCE_PER_DATA_TYPE[storageType]);
PRECISION_TOLERANCE_PER_DATA_TYPE[storageType], k);
}
}
}
Expand All @@ -178,8 +264,8 @@ TEST_CASE("Test combinations of different instantiations. Test that each "
}
}

TEST_CASE("Test vectorsToNDArray converts 2D vector of float to NDArray<float, "
"2>") {
TEST_CASE(
"Test vectorsToNDArray converts 2D vector of float to NDArray<float,2>") {
std::vector<std::vector<float>> vectors = {{1.0f, 2.0f, 3.0f, 4.0f},
{5.0f, 6.0f, 7.0f, 8.0f},
{9.0f, 10.0f, 11.0f, 12.0f}};
Expand All @@ -205,8 +291,8 @@ TEST_CASE("Test vectorsToNDArray converts 2D vector of float to NDArray<float, "
REQUIRE(*ndArray[2] == 9.0f);
}

TEST_CASE("Test vectorsToNDArray throws error if vectors are not of the same "
"size") {
TEST_CASE(
"Test vectorsToNDArray throws error if vectors are not of the same size") {
std::vector<std::vector<float>> vectors1 = {{1.0f, 2.0f, 3.0f, 4.0f},
{5.0f, 6.0f, 7.0f},
{9.0f, 10.0f, 11.0f, 12.0f}};
Expand Down
108 changes: 108 additions & 0 deletions docs/java/apidocs/allclasses-index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
<!DOCTYPE HTML>
<html lang>
<head>
<!-- Generated by javadoc (21) on Thu Sep 26 00:30:36 EDT 2024 -->
<title>All Classes and Interfaces (voyager 2.0.9 API)</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="dc.created" content="2024-09-26">
<meta name="description" content="class index">
<meta name="generator" content="javadoc/AllClassesIndexWriter">
<link rel="stylesheet" type="text/css" href="stylesheet.css" title="Style">
<link rel="stylesheet" type="text/css" href="script-dir/jquery-ui.min.css" title="Style">
<script type="text/javascript" src="script.js"></script>
<script type="text/javascript" src="script-dir/jquery-3.6.1.min.js"></script>
<script type="text/javascript" src="script-dir/jquery-ui.min.js"></script>
</head>
<body class="all-classes-index-page">
<script type="text/javascript">var pathtoroot = "./";
loadScripts(document, 'script');</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<div class="flex-box">
<header role="banner" class="flex-header">
<nav role="navigation">
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="top-nav" id="navbar-top"><button id="navbar-toggle-button" aria-controls="navbar-top" aria-expanded="false" aria-label="Toggle navigation links"><span class="nav-bar-toggle-icon">&nbsp;</span><span class="nav-bar-toggle-icon">&nbsp;</span><span class="nav-bar-toggle-icon">&nbsp;</span></button>
<div class="skip-nav"><a href="#skip-navbar-top" title="Skip navigation links">Skip navigation links</a></div>
<ul id="navbar-top-firstrow" class="nav-list" title="Navigation">
<li><a href="index.html">Overview</a></li>
<li>Package</li>
<li>Class</li>
<li>Use</li>
<li><a href="overview-tree.html">Tree</a></li>
<li><a href="index-all.html">Index</a></li>
<li><a href="help-doc.html#all-classes">Help</a></li>
</ul>
</div>
<div class="sub-nav">
<div id="navbar-sub-list"></div>
<div class="nav-list-search"><a href="search.html">SEARCH</a>
<input type="text" id="search-input" disabled placeholder="Search">
<input type="reset" id="reset-button" disabled value="reset">
</div>
</div>
<!-- ========= END OF TOP NAVBAR ========= -->
<span class="skip-nav" id="skip-navbar-top"></span></nav>
</header>
<div class="flex-content">
<main role="main">
<div class="header">
<h1 title="All Classes and Interfaces" class="title">All Classes and Interfaces</h1>
</div>
<div id="all-classes-table">
<div class="table-tabs" role="tablist" aria-orientation="horizontal"><button id="all-classes-table-tab0" role="tab" aria-selected="true" aria-controls="all-classes-table.tabpanel" tabindex="0" onkeydown="switchTab(event)" onclick="show('all-classes-table', 'all-classes-table', 2)" class="active-table-tab">All Classes and Interfaces</button><button id="all-classes-table-tab2" role="tab" aria-selected="false" aria-controls="all-classes-table.tabpanel" tabindex="-1" onkeydown="switchTab(event)" onclick="show('all-classes-table', 'all-classes-table-tab2', 2)" class="table-tab">Classes</button><button id="all-classes-table-tab3" role="tab" aria-selected="false" aria-controls="all-classes-table.tabpanel" tabindex="-1" onkeydown="switchTab(event)" onclick="show('all-classes-table', 'all-classes-table-tab3', 2)" class="table-tab">Enums</button><button id="all-classes-table-tab5" role="tab" aria-selected="false" aria-controls="all-classes-table.tabpanel" tabindex="-1" onkeydown="switchTab(event)" onclick="show('all-classes-table', 'all-classes-table-tab5', 2)" class="table-tab">Exception Classes</button></div>
<div id="all-classes-table.tabpanel" role="tabpanel">
<div class="summary-table two-column-summary" aria-labelledby="all-classes-table-tab0">
<div class="table-header col-first">Class</div>
<div class="table-header col-last">Description</div>
<div class="col-first even-row-color all-classes-table all-classes-table-tab2"><a href="com/spotify/voyager/jni/Index.html" title="class in com.spotify.voyager.jni">Index</a></div>
<div class="col-last even-row-color all-classes-table all-classes-table-tab2">
<div class="block">A Voyager index, providing storage of floating-point vectors and the ability to efficiently
search among those vectors.</div>
</div>
<div class="col-first odd-row-color all-classes-table all-classes-table-tab2"><a href="com/spotify/voyager/jni/Index.QueryResults.html" title="class in com.spotify.voyager.jni">Index.QueryResults</a></div>
<div class="col-last odd-row-color all-classes-table all-classes-table-tab2">
<div class="block">A container for query results, returned by Index.</div>
</div>
<div class="col-first even-row-color all-classes-table all-classes-table-tab3"><a href="com/spotify/voyager/jni/Index.SpaceType.html" title="enum in com.spotify.voyager.jni">Index.SpaceType</a></div>
<div class="col-last even-row-color all-classes-table all-classes-table-tab3">
<div class="block">The space, also known as distance metric, to use when searching.</div>
</div>
<div class="col-first odd-row-color all-classes-table all-classes-table-tab3"><a href="com/spotify/voyager/jni/Index.StorageDataType.html" title="enum in com.spotify.voyager.jni">Index.StorageDataType</a></div>
<div class="col-last odd-row-color all-classes-table all-classes-table-tab3">
<div class="block">The datatype used to use when storing vectors on disk.</div>
</div>
<div class="col-first even-row-color all-classes-table all-classes-table-tab2"><a href="com/spotify/voyager/jni/utils/JniLibExtractor.html" title="class in com.spotify.voyager.jni.utils">JniLibExtractor</a></div>
<div class="col-last even-row-color all-classes-table all-classes-table-tab2">&nbsp;</div>
<div class="col-first odd-row-color all-classes-table all-classes-table-tab5"><a href="com/spotify/voyager/jni/exception/RecallException.html" title="class in com.spotify.voyager.jni.exception">RecallException</a></div>
<div class="col-last odd-row-color all-classes-table all-classes-table-tab5">
<div class="block">An exception that indicates an error about the recall performance of the index.</div>
</div>
<div class="col-first even-row-color all-classes-table all-classes-table-tab2"><a href="com/spotify/voyager/jni/StringIndex.html" title="class in com.spotify.voyager.jni">StringIndex</a></div>
<div class="col-last even-row-color all-classes-table all-classes-table-tab2">
<div class="block">Wrapper around com.spotify.voyager.jni.Index with a simplified interface which maps the index ID
to a provided String.</div>
</div>
<div class="col-first odd-row-color all-classes-table all-classes-table-tab2"><a href="com/spotify/voyager/jni/StringIndex.QueryResults.html" title="class in com.spotify.voyager.jni">StringIndex.QueryResults</a></div>
<div class="col-last odd-row-color all-classes-table all-classes-table-tab2">
<div class="block">A wrapper class for nearest neighbor query results.</div>
</div>
<div class="col-first even-row-color all-classes-table all-classes-table-tab2"><a href="com/spotify/voyager/jni/utils/TinyJson.html" title="class in com.spotify.voyager.jni.utils">TinyJson</a></div>
<div class="col-last even-row-color all-classes-table all-classes-table-tab2">
<div class="block">A dependency-free, super tiny JSON serde class that only supports reading and writing lists of
strings.</div>
</div>
</div>
</div>
</div>
</main>
<footer role="contentinfo">
<hr>
<p class="legal-copy"><small>Copyright &#169; 2024. All rights reserved.</small></p>
</footer>
</div>
</div>
</body>
</html>
Loading

0 comments on commit 4dc7b6d

Please sign in to comment.