Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CAGRA support with latest RAFT #175

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 18 additions & 22 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,28 @@
# License for the specific language governing permissions and limitations under
# the License

cmake_minimum_required(VERSION 3.23.0 FATAL_ERROR)
project(knowhere CXX C)
cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Required for RAPIDS CMake used in RAFT 23.12.


set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/")
include(GNUInstallDirs)
include(ExternalProject)
include(cmake/utils/utils.cmake)

knowhere_option(WITH_RAFT "Build with RAFT indexes" OFF)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved this up because CMAKE_CUDA_ARCHITECTURES needs to be filled in before initializing the project.

if (WITH_RAFT)
if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "")
set(CMAKE_CUDA_ARCHITECTURES RAPIDS)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This enables all RAPIDS-supported architectures.

endif()
include(cmake/libs/librapids.cmake)
project(knowhere CXX C CUDA)
else()
project(knowhere CXX C)
endif()

knowhere_option(WITH_UT "Build with UT test" OFF)
knowhere_option(WITH_ASAN "Build with ASAN" OFF)
knowhere_option(WITH_DISKANN "Build with diskann index" OFF)
knowhere_option(WITH_RAFT "Build with RAFT indexes" OFF)
knowhere_option(WITH_BENCHMARK "Build with benchmark" OFF)
knowhere_option(WITH_COVERAGE "Build with coverage" OFF)
knowhere_option(WITH_CCACHE "Build with ccache" ON)
Expand All @@ -49,18 +58,6 @@ endif()

list( APPEND CMAKE_MODULE_PATH ${CMAKE_BINARY_DIR}/)

if(WITH_RAFT)
if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "")
set(CMAKE_CUDA_ARCHITECTURES 86;80;75;70;61)
endif()
enable_language(CUDA)
find_package(CUDAToolkit REQUIRED)
if(${CUDAToolkit_VERSION_MAJOR} GREATER 10)
# cuda11 support --threads for compile some large .cu more efficient
add_compile_options($<$<COMPILE_LANGUAGE:CUDA>:--threads=4>)
endif()
endif()

add_definitions(-DNOT_COMPILE_FOR_SWIG)

include(cmake/utils/compile_flags.cmake)
Expand Down Expand Up @@ -99,8 +96,7 @@ if(WITH_COVERAGE)
endif()

knowhere_file_glob(GLOB_RECURSE KNOWHERE_SRCS src/common/*.cc src/index/*.cc
src/io/*.cc src/index/*.cu src/common/raft/*.cu
src/common/raft/*.cc)
src/io/*.cc src/common/*.cu src/index/*.cu src/io/*.cu)

set(KNOWHERE_LINKER_LIBS "")

Expand All @@ -113,13 +109,13 @@ else()
endif()

knowhere_file_glob(GLOB_RECURSE KNOWHERE_GPU_SRCS src/index/gpu/flat_gpu/*.cc
src/index/gpu/ivf_gpu/*.cc src/index/cagra/*.cu)
src/index/gpu/ivf_gpu/*.cc)
list(REMOVE_ITEM KNOWHERE_SRCS ${KNOWHERE_GPU_SRCS})

if(NOT WITH_RAFT)
knowhere_file_glob(GLOB_RECURSE KNOWHERE_RAFT_SRCS src/index/ivf_raft/*.cc
src/index/ivf_raft/*.cu src/index/cagra/*.cu
src/common/raft/*.cu src/common/raft/*.cc)
knowhere_file_glob(GLOB_RECURSE KNOWHERE_RAFT_SRCS
src/common/raft/*.cu src/common/raft/*.cc
src/index/gpu_raft/*.cc)
list(REMOVE_ITEM KNOWHERE_SRCS ${KNOWHERE_RAFT_SRCS})
endif()

Expand All @@ -135,7 +131,7 @@ list(APPEND KNOWHERE_LINKER_LIBS ${FOLLY_LIBRARIES})
add_library(knowhere SHARED ${KNOWHERE_SRCS})
add_dependencies(knowhere ${KNOWHERE_LINKER_LIBS})
if(WITH_RAFT)
list(APPEND KNOWHERE_LINKER_LIBS raft::raft)
list(APPEND KNOWHERE_LINKER_LIBS raft::raft CUDA::cublas CUDA::cusparse CUDA::cusolver)
endif()
target_link_libraries(knowhere PUBLIC ${KNOWHERE_LINKER_LIBS})
target_include_directories(
Expand Down
81 changes: 79 additions & 2 deletions benchmark/hdf5/benchmark_float_qps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include <thread>
#include <vector>
#include <nvtx3/nvtx3.hpp>
wphicks marked this conversation as resolved.
Show resolved Hide resolved

#include "benchmark_knowhere.h"
#include "knowhere/comp/index_param.h"
Expand Down Expand Up @@ -73,6 +74,61 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test {
}
}

void
test_cagra(const knowhere::Json& cfg) {
auto conf = cfg;

auto find_smallest_max_iters = [&](float expected_recall) -> int32_t {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Finding the best max_iterations has higher impact than searching over itopk

auto ds_ptr = knowhere::GenDataSet(nq_, dim_, xq_);
auto left = 32;
auto right = 256;
auto max_iterations = left;

float recall;
while (left <= right) {
max_iterations = left + (right - left) / 2;
conf[knowhere::indexparam::MAX_ITERATIONS] = max_iterations;

auto result = index_.Search(*ds_ptr, conf, nullptr);
recall = CalcRecall(result.value()->GetIds(), nq_, topk_);
printf(
"[%0.3f s] iterate CAGRA param for recall %.4f: max_iterations=%d, k=%d, "
"R@=%.4f\n",
get_time_diff(), expected_recall, max_iterations, topk_, recall);
std::fflush(stdout);
if (std::abs(recall - expected_recall) <= 0.0001) {
return max_iterations;
}
if (recall < expected_recall) {
left = max_iterations + 1;
} else {
right = max_iterations - 1;
}
}
return left;
};

for (auto expected_recall : EXPECTED_RECALLs_) {
conf[knowhere::indexparam::ITOPK_SIZE] = ((int{topk_} + 32 - 1) / 32) * 32;
conf[knowhere::meta::TOPK] = topk_;
conf[knowhere::indexparam::MAX_ITERATIONS] = find_smallest_max_iters(expected_recall);

printf(
"\n[%0.3f s] %s | %s | k=%d, "
"R@=%.4f\n",
get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), topk_,
expected_recall);
printf("================================================================================\n");
for (auto thread_num : THREAD_NUMs_) {
CALC_TIME_SPAN(task(conf, thread_num, nq_));
printf(" thread_num = %2d, elapse = %6.3fs, VPS = %.3f\n", thread_num, t_diff, nq_ / t_diff);
std::fflush(stdout);
}
printf("================================================================================\n");
printf("[%.3f s] Test '%s/%s' done\n\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str());
}
}

void
test_hnsw(const knowhere::Json& cfg) {
auto conf = cfg;
Expand Down Expand Up @@ -183,10 +239,12 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test {
private:
void
task(const knowhere::Json& conf, int32_t worker_num, int32_t nq_total) {
NVTX3_FUNC_RANGE();
wphicks marked this conversation as resolved.
Show resolved Hide resolved
auto worker = [&](int32_t idx_start, int32_t num) {
num = std::min(num, nq_total - idx_start);
for (int32_t i = 0; i < num; i++) {
knowhere::DataSetPtr ds_ptr = knowhere::GenDataSet(1, dim_, (const float*)xq_ + (idx_start + i) * dim_);
auto loop_range = nvtx3::scoped_range{"loop range"};
wphicks marked this conversation as resolved.
Show resolved Hide resolved
index_.Search(*ds_ptr, conf, nullptr);
}
};
Expand Down Expand Up @@ -221,6 +279,10 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test {
#ifdef KNOWHERE_WITH_GPU
knowhere::KnowhereConfig::InitGPUResource(GPU_DEVICE_ID, 2);
cfg_[knowhere::meta::DEVICE_ID] = GPU_DEVICE_ID;
#endif
#ifdef KNOWHERE_WITH_RAFT
// knowhere::KnowhereConfig::SetRaftMemPool(24576, 36864);
wphicks marked this conversation as resolved.
Show resolved Hide resolved
knowhere::KnowhereConfig::SetRaftMemPool();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Initialize RAFT resources with sensible defaults for benchmarking

#endif
}

Expand Down Expand Up @@ -251,6 +313,9 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test {
// SCANN index params
const std::vector<int32_t> SCANN_REORDER_K = {256, 512, 768, 1024};
const std::vector<bool> SCANN_WITH_RAW_DATA = {true};

// CAGRA index params
const std::vector<int32_t> GRAPH_DEGREE_ = {32, 64};
};

TEST_F(Benchmark_float_qps, TEST_IVF_FLAT) {
Expand All @@ -271,7 +336,7 @@ TEST_F(Benchmark_float_qps, TEST_IVF_FLAT) {
}
}

TEST_F(Benchmark_float_qps, TEST_IVF_SQ8) {
/* TEST_F(Benchmark_float_qps, TEST_IVF_SQ8) {
wphicks marked this conversation as resolved.
Show resolved Hide resolved
#ifdef KNOWHERE_WITH_GPU
index_type_ = knowhere::IndexEnum::INDEX_FAISS_GPU_IVFSQ8;
#else
Expand All @@ -285,7 +350,7 @@ TEST_F(Benchmark_float_qps, TEST_IVF_SQ8) {
create_index(index_file_name, conf);
test_ivf(conf);
}
}
} */

TEST_F(Benchmark_float_qps, TEST_IVF_PQ) {
#ifdef KNOWHERE_WITH_GPU
Expand Down Expand Up @@ -344,3 +409,15 @@ TEST_F(Benchmark_float_qps, TEST_SCANN) {
}
}
}
TEST_F(Benchmark_float_qps, TEST_CAGRA) {
index_type_ = knowhere::IndexEnum::INDEX_RAFT_CAGRA;
knowhere::Json conf = cfg_;
for (auto gd : GRAPH_DEGREE_) {
conf[knowhere::indexparam::GRAPH_DEGREE] = gd;
conf[knowhere::indexparam::INTERMEDIATE_GRAPH_DEGREE] = gd;
conf[knowhere::indexparam::MAX_ITERATIONS] = 64;
std::string index_file_name = get_index_name({gd});
create_index(index_file_name, conf);
test_cagra(conf);
}
}
3 changes: 1 addition & 2 deletions benchmark/hdf5/benchmark_knowhere.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,10 @@ class Benchmark_knowhere : public Benchmark_hdf5 {
// IVFFLAT_NM should load raw data
if (index_type_ == knowhere::IndexEnum::INDEX_FAISS_IVFFLAT && binary_set.GetByName("RAW_DATA") == nullptr) {
knowhere::BinaryPtr bin = std::make_shared<knowhere::Binary>();
bin->data = std::shared_ptr<uint8_t[]>((uint8_t*)xb_);
bin->data = std::shared_ptr<uint8_t[]>((uint8_t*)xb_, [](const uint8_t[]) {});
wphicks marked this conversation as resolved.
Show resolved Hide resolved
bin->size = dim_ * nb_ * sizeof(float);
binary_set.Append("RAW_DATA", bin);
}

index.Deserialize(binary_set, conf);
}

Expand Down
21 changes: 7 additions & 14 deletions cmake/libs/libraft.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,15 @@
# the License.

add_definitions(-DKNOWHERE_WITH_RAFT)
include(cmake/utils/fetch_rapids.cmake)
include(rapids-cmake)
include(rapids-cpm)
include(rapids-cuda)
include(rapids-export)
include(rapids-find)

rapids_cpm_init()
set(RAFT_VERSION "${RAPIDS_VERSION}")
set(RAFT_FORK "wphicks")
set(RAFT_PINNED_TAG "bug-ivf_flat_filter")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: Revert after merge of rapidsai/raft#1831


set(CMAKE_CUDA_FLAGS
"${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-constexpr")

set(RAPIDS_VERSION 23.04)
set(RAFT_VERSION "${RAPIDS_VERSION}")
set(RAFT_FORK "rapidsai")
set(RAFT_PINNED_TAG "branch-${RAPIDS_VERSION}")
rapids_find_package(CUDAToolkit REQUIRED
BUILD_EXPORT_SET knowhere-exports
INSTALL_EXPORT_SET knowhere-exports
)

function(find_and_configure_raft)
set(oneValueArgs VERSION FORK PINNED_TAG)
Expand Down
14 changes: 13 additions & 1 deletion cmake/utils/fetch_rapids.cmake → cmake/libs/librapids.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# License for the specific language governing permissions and limitations under
# the License.

set(RAPIDS_VERSION "23.04")
set(RAPIDS_VERSION 23.12)

if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAPIDS.cmake)
file(
Expand All @@ -22,3 +22,15 @@ if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAPIDS.cmake)
${CMAKE_CURRENT_BINARY_DIR}/RAPIDS.cmake)
endif()
include(${CMAKE_CURRENT_BINARY_DIR}/RAPIDS.cmake)

include(rapids-cpm) # Dependency tracking
include(rapids-find) # Wrappers for finding packages
include(rapids-cuda) # Common CMake CUDA logic

rapids_cuda_init_architectures(knowhere)
message(STATUS "INIT: ${CMAKE_CUDA_ARCHITECTURES}")

rapids_cpm_init()

set(CMAKE_CUDA_FLAGS
"${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-constexpr")
29 changes: 29 additions & 0 deletions include/knowhere/comp/index_param.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,35 @@ constexpr const char* M = "m"; // PQ param for IVFPQ
constexpr const char* SSIZE = "ssize";
constexpr const char* REORDER_K = "reorder_k";
constexpr const char* WITH_RAW_DATA = "with_raw_data";
// RAFT Params
constexpr const char* REFINE_RATIO = "refine_ratio";
// RAFT-specific IVF Params
constexpr const char* KMEANS_N_ITERS = "kmeans_n_iters";
constexpr const char* KMEANS_TRAINSET_FRACTION = "kmeans_trainset_fraction";
constexpr const char* ADAPTIVE_CENTERS = "adaptive_centers"; // IVF FLAT
constexpr const char* CODEBOOK_KIND = "codebook_kind"; // IVF PQ
constexpr const char* FORCE_RANDOM_ROTATION = "force_random_rotation"; // IVF PQ
constexpr const char* CONSERVATIVE_MEMORY_ALLOCATION = "conservative_memory_allocation"; // IVF PQ
constexpr const char* LUT_DTYPE = "lut_dtype"; // IVF PQ
constexpr const char* INTERNAL_DISTANCE_DTYPE = "internal_distance_dtype"; // IVF PQ
constexpr const char* PREFERRED_SHMEM_CARVEOUT = "preferred_shmem_carveout"; // IVF PQ

// CAGRA Params
constexpr const char* INTERMEDIATE_GRAPH_DEGREE = "intermediate_graph_degree";
constexpr const char* GRAPH_DEGREE = "graph_degree";
constexpr const char* ITOPK_SIZE = "itopk_size";
constexpr const char* MAX_QUERIES = "max_queries";
constexpr const char* BUILD_ALGO = "build_algo";
constexpr const char* SEARCH_ALGO = "search_algo";
constexpr const char* TEAM_SIZE = "team_size";
constexpr const char* SEARCH_WIDTH = "search_width";
constexpr const char* MIN_ITERATIONS = "min_iterations";
constexpr const char* MAX_ITERATIONS = "max_iterations";
constexpr const char* THREAD_BLOCK_SIZE = "thread_block_size";
constexpr const char* HASHMAP_MODE = "hashmap_mode";
constexpr const char* HASHMAP_MIN_BITLEN = "hashmap_min_bitlen";
constexpr const char* HASHMAP_MAX_FILL_RATE = "hashmap_max_fill_rate";
constexpr const char* NN_DESCENT_NITER = "nn_descent_niter";

// HNSW Params
constexpr const char* EFCONSTRUCTION = "efConstruction";
Expand Down
6 changes: 6 additions & 0 deletions include/knowhere/comp/knowhere_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,12 @@ class KnowhereConfig {
*/
static void
SetRaftMemPool(size_t init_size, size_t max_size);

/**
* Initialize RAFT with defaults
*/
static void
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Introduce method for initializing RAFT with sensible defaults.

SetRaftMemPool();
};

} // namespace knowhere
Expand Down
Loading