-
Notifications
You must be signed in to change notification settings - Fork 79
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add CAGRA support with latest RAFT #175
Changes from all commits
5483b82
b9a1b34
c869f27
075c4e2
e7a9e58
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,19 +12,28 @@ | |
# License for the specific language governing permissions and limitations under | ||
# the License | ||
|
||
cmake_minimum_required(VERSION 3.23.0 FATAL_ERROR) | ||
project(knowhere CXX C) | ||
cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) | ||
|
||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) | ||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/") | ||
include(GNUInstallDirs) | ||
include(ExternalProject) | ||
include(cmake/utils/utils.cmake) | ||
|
||
knowhere_option(WITH_RAFT "Build with RAFT indexes" OFF) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moved this up because CMAKE_CUDA_ARCHITECTURES needs to be filled in before initializing the project. |
||
if (WITH_RAFT) | ||
if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "") | ||
set(CMAKE_CUDA_ARCHITECTURES RAPIDS) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This enables all RAPIDS-supported architectures. |
||
endif() | ||
include(cmake/libs/librapids.cmake) | ||
project(knowhere CXX C CUDA) | ||
else() | ||
project(knowhere CXX C) | ||
endif() | ||
|
||
knowhere_option(WITH_UT "Build with UT test" OFF) | ||
knowhere_option(WITH_ASAN "Build with ASAN" OFF) | ||
knowhere_option(WITH_DISKANN "Build with diskann index" OFF) | ||
knowhere_option(WITH_RAFT "Build with RAFT indexes" OFF) | ||
knowhere_option(WITH_BENCHMARK "Build with benchmark" OFF) | ||
knowhere_option(WITH_COVERAGE "Build with coverage" OFF) | ||
knowhere_option(WITH_CCACHE "Build with ccache" ON) | ||
|
@@ -64,18 +73,6 @@ endif() | |
|
||
list( APPEND CMAKE_MODULE_PATH ${CMAKE_BINARY_DIR}/) | ||
|
||
if(WITH_RAFT) | ||
if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "") | ||
set(CMAKE_CUDA_ARCHITECTURES 86;80;75;70;61) | ||
endif() | ||
enable_language(CUDA) | ||
find_package(CUDAToolkit REQUIRED) | ||
if(${CUDAToolkit_VERSION_MAJOR} GREATER 10) | ||
# cuda11 support --threads for compile some large .cu more efficient | ||
add_compile_options($<$<COMPILE_LANGUAGE:CUDA>:--threads=4>) | ||
endif() | ||
endif() | ||
|
||
add_definitions(-DNOT_COMPILE_FOR_SWIG) | ||
|
||
include(cmake/utils/compile_flags.cmake) | ||
|
@@ -113,8 +110,7 @@ if(WITH_COVERAGE) | |
endif() | ||
|
||
knowhere_file_glob(GLOB_RECURSE KNOWHERE_SRCS src/common/*.cc src/index/*.cc | ||
src/io/*.cc src/index/*.cu src/common/raft/*.cu | ||
src/common/raft/*.cc) | ||
src/io/*.cc src/common/*.cu src/index/*.cu src/io/*.cu) | ||
|
||
set(KNOWHERE_LINKER_LIBS "") | ||
|
||
|
@@ -127,13 +123,13 @@ else() | |
endif() | ||
|
||
knowhere_file_glob(GLOB_RECURSE KNOWHERE_GPU_SRCS src/index/gpu/flat_gpu/*.cc | ||
src/index/gpu/ivf_gpu/*.cc src/index/cagra/*.cu) | ||
src/index/gpu/ivf_gpu/*.cc) | ||
list(REMOVE_ITEM KNOWHERE_SRCS ${KNOWHERE_GPU_SRCS}) | ||
|
||
if(NOT WITH_RAFT) | ||
knowhere_file_glob(GLOB_RECURSE KNOWHERE_RAFT_SRCS src/index/ivf_raft/*.cc | ||
src/index/ivf_raft/*.cu src/index/cagra/*.cu | ||
src/common/raft/*.cu src/common/raft/*.cc) | ||
knowhere_file_glob(GLOB_RECURSE KNOWHERE_RAFT_SRCS | ||
src/common/raft/*.cu src/common/raft/*.cc | ||
src/index/gpu_raft/*.cc) | ||
list(REMOVE_ITEM KNOWHERE_SRCS ${KNOWHERE_RAFT_SRCS}) | ||
endif() | ||
|
||
|
@@ -150,7 +146,7 @@ list(APPEND KNOWHERE_LINKER_LIBS ${FOLLY_LIBRARIES}) | |
add_library(knowhere SHARED ${KNOWHERE_SRCS}) | ||
add_dependencies(knowhere ${KNOWHERE_LINKER_LIBS}) | ||
if(WITH_RAFT) | ||
list(APPEND KNOWHERE_LINKER_LIBS raft::raft) | ||
list(APPEND KNOWHERE_LINKER_LIBS raft::raft CUDA::cublas CUDA::cusparse CUDA::cusolver) | ||
endif() | ||
target_link_libraries(knowhere PUBLIC ${KNOWHERE_LINKER_LIBS}) | ||
target_include_directories( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -73,6 +73,60 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { | |
} | ||
} | ||
|
||
void | ||
test_cagra(const knowhere::Json& cfg) { | ||
auto conf = cfg; | ||
|
||
auto find_smallest_max_iters = [&](float expected_recall) -> int32_t { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Finding the best |
||
auto ds_ptr = knowhere::GenDataSet(nq_, dim_, xq_); | ||
auto left = 32; | ||
auto right = 256; | ||
auto max_iterations = left; | ||
|
||
float recall; | ||
while (left <= right) { | ||
max_iterations = left + (right - left) / 2; | ||
conf[knowhere::indexparam::MAX_ITERATIONS] = max_iterations; | ||
|
||
auto result = index_.Search(*ds_ptr, conf, nullptr); | ||
recall = CalcRecall(result.value()->GetIds(), nq_, topk_); | ||
printf( | ||
"[%0.3f s] iterate CAGRA param for recall %.4f: max_iterations=%d, k=%d, " | ||
"R@=%.4f\n", | ||
get_time_diff(), expected_recall, max_iterations, topk_, recall); | ||
std::fflush(stdout); | ||
if (std::abs(recall - expected_recall) <= 0.0001) { | ||
return max_iterations; | ||
} | ||
if (recall < expected_recall) { | ||
left = max_iterations + 1; | ||
} else { | ||
right = max_iterations - 1; | ||
} | ||
} | ||
return left; | ||
}; | ||
|
||
for (auto expected_recall : EXPECTED_RECALLs_) { | ||
conf[knowhere::indexparam::ITOPK_SIZE] = ((int{topk_} + 32 - 1) / 32) * 32; | ||
conf[knowhere::meta::TOPK] = topk_; | ||
conf[knowhere::indexparam::MAX_ITERATIONS] = find_smallest_max_iters(expected_recall); | ||
|
||
printf( | ||
"\n[%0.3f s] %s | %s | k=%d, " | ||
"R@=%.4f\n", | ||
get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), topk_, expected_recall); | ||
printf("================================================================================\n"); | ||
for (auto thread_num : THREAD_NUMs_) { | ||
CALC_TIME_SPAN(task(conf, thread_num, nq_)); | ||
printf(" thread_num = %2d, elapse = %6.3fs, VPS = %.3f\n", thread_num, t_diff, nq_ / t_diff); | ||
std::fflush(stdout); | ||
} | ||
printf("================================================================================\n"); | ||
printf("[%.3f s] Test '%s/%s' done\n\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str()); | ||
} | ||
} | ||
|
||
void | ||
test_hnsw(const knowhere::Json& cfg) { | ||
auto conf = cfg; | ||
|
@@ -221,6 +275,9 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { | |
#ifdef KNOWHERE_WITH_GPU | ||
knowhere::KnowhereConfig::InitGPUResource(GPU_DEVICE_ID, 2); | ||
cfg_[knowhere::meta::DEVICE_ID] = GPU_DEVICE_ID; | ||
#endif | ||
#ifdef KNOWHERE_WITH_RAFT | ||
knowhere::KnowhereConfig::SetRaftMemPool(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Initialize RAFT resources with sensible defaults for benchmarking |
||
#endif | ||
} | ||
|
||
|
@@ -251,6 +308,9 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { | |
// SCANN index params | ||
const std::vector<int32_t> SCANN_REORDER_K = {256, 512, 768, 1024}; | ||
const std::vector<bool> SCANN_WITH_RAW_DATA = {true}; | ||
|
||
// CAGRA index params | ||
const std::vector<int32_t> GRAPH_DEGREE_ = {32, 64}; | ||
}; | ||
|
||
TEST_F(Benchmark_float_qps, TEST_IVF_FLAT) { | ||
|
@@ -344,3 +404,15 @@ TEST_F(Benchmark_float_qps, TEST_SCANN) { | |
} | ||
} | ||
} | ||
TEST_F(Benchmark_float_qps, TEST_CAGRA) { | ||
index_type_ = knowhere::IndexEnum::INDEX_RAFT_CAGRA; | ||
knowhere::Json conf = cfg_; | ||
for (auto gd : GRAPH_DEGREE_) { | ||
conf[knowhere::indexparam::GRAPH_DEGREE] = gd; | ||
conf[knowhere::indexparam::INTERMEDIATE_GRAPH_DEGREE] = gd; | ||
conf[knowhere::indexparam::MAX_ITERATIONS] = 64; | ||
std::string index_file_name = get_index_name({gd}); | ||
create_index(index_file_name, conf); | ||
test_cagra(conf); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -107,6 +107,12 @@ class KnowhereConfig { | |
*/ | ||
static void | ||
SetRaftMemPool(size_t init_size, size_t max_size); | ||
|
||
/** | ||
* Initialize RAFT with defaults | ||
*/ | ||
static void | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Introduce method for initializing RAFT with sensible defaults. |
||
SetRaftMemPool(); | ||
}; | ||
|
||
} // namespace knowhere | ||
|
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Required for RAPIDS CMake used in RAFT 23.12.