Skip to content

Commit

Permalink
Merge branch 'branch-24.04' into verify-copyright
Browse files Browse the repository at this point in the history
  • Loading branch information
KyleFromNVIDIA committed Mar 18, 2024
2 parents 44f1cec + 7335267 commit 4e9e4da
Show file tree
Hide file tree
Showing 38 changed files with 440 additions and 347 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
# <div align="left"><img src="https://rapids.ai/assets/images/rapids_logo.png" width="90px"/>&nbsp;RAFT: Reusable Accelerated Functions and Tools for Vector Search and More</div>

> [!IMPORTANT]
> The vector search and clustering algorithms in RAFT are being migrated to a new library dedicated to vector search called [cuVS](https://github.com/rapidsai/cuvs). We will continue to support the vector search algorithms in RAFT during this move, but will no longer update them after the RAPIDS 24.06 (June) release. We plan to complete the migration by RAPIDS 24.08 (August) release.
![RAFT tech stack](img/raft-tech-stack-vss.png)



## Contents
<hr>

Expand Down Expand Up @@ -77,6 +81,8 @@ Projects that use the RAFT ANNS algorithms for accelerating vector search includ

Please see the example [Jupyter notebook](https://github.com/rapidsai/raft/blob/HEAD/notebooks/VectorSearch_QuestionRetrieval.ipynb) to get started RAFT for vector search in Python.



### Information Retrieval

RAFT contains a catalog of reusable primitives for composing algorithms that require fast neighborhood computations, such as
Expand Down
8 changes: 5 additions & 3 deletions build.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

# raft build scripts

Expand Down Expand Up @@ -305,7 +305,7 @@ if hasArg --allgpuarch; then
BUILD_ALL_GPU_ARCH=1
fi

if hasArg --compile-lib || (( ${NUMARGS} == 0 )); then
if hasArg --compile-lib || hasArg pylibraft || (( ${NUMARGS} == 0 )); then
COMPILE_LIBRARY=ON
CMAKE_TARGET="${CMAKE_TARGET};raft_lib"
fi
Expand Down Expand Up @@ -405,7 +405,7 @@ fi

################################################################################
# Configure for building all C++ targets
if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || hasArg bench-prims || hasArg bench-ann; then
if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || hasArg bench-prims || hasArg bench-ann || ((${COMPILE_LIBRARY} == ON )); then
if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then
RAFT_CMAKE_CUDA_ARCHITECTURES="NATIVE"
echo "Building for the architecture of the GPU in the system..."
Expand Down Expand Up @@ -512,6 +512,8 @@ fi

if hasArg docs; then
set -x
export RAPIDS_VERSION="$(sed -E -e 's/^([0-9]{2})\.([0-9]{2})\.([0-9]{2}).*$/\1.\2.\3/' "${REPODIR}/VERSION")"
export RAPIDS_VERSION_MAJOR_MINOR="$(sed -E -e 's/^([0-9]{2})\.([0-9]{2})\.([0-9]{2}).*$/\1.\2/' "${REPODIR}/VERSION")"
cd ${DOXYGEN_BUILD_DIR}
doxygen Doxyfile
cd ${SPHINX_BUILD_DIR}
Expand Down
6 changes: 4 additions & 2 deletions ci/build_docs.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

set -euo pipefail

Expand Down Expand Up @@ -28,7 +28,9 @@ rapids-mamba-retry install \
pylibraft \
raft-dask

export RAPIDS_VERSION_NUMBER="24.04"
export RAPIDS_VERSION="$(rapids-version)"
export RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
export RAPIDS_VERSION_NUMBER="$RAPIDS_VERSION_MAJOR_MINOR"
export RAPIDS_DOCS_DIR="$(mktemp -d)"

rapids-logger "Build CPP docs"
Expand Down
17 changes: 1 addition & 16 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
########################
# RAFT Version Updater #
########################
Expand Down Expand Up @@ -36,23 +36,11 @@ function sed_runner() {
sed -i.bak ''"$1"'' $2 && rm -f ${2}.bak
}

sed_runner "s/set(RAPIDS_VERSION .*)/set(RAPIDS_VERSION \"${NEXT_SHORT_TAG}\")/g" cpp/CMakeLists.txt
sed_runner "s/set(RAPIDS_VERSION .*)/set(RAPIDS_VERSION \"${NEXT_SHORT_TAG}\")/g" cpp/template/cmake/thirdparty/fetch_rapids.cmake
sed_runner "s/set(RAFT_VERSION .*)/set(RAFT_VERSION \"${NEXT_FULL_TAG}\")/g" cpp/CMakeLists.txt
sed_runner 's/'"pylibraft_version .*)"'/'"pylibraft_version ${NEXT_FULL_TAG})"'/g' python/pylibraft/CMakeLists.txt
sed_runner 's/'"raft_dask_version .*)"'/'"raft_dask_version ${NEXT_FULL_TAG})"'/g' python/raft-dask/CMakeLists.txt
sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cmake"'/g' fetch_rapids.cmake

# Centralized version file update
echo "${NEXT_FULL_TAG}" > VERSION

# Wheel testing script
sed_runner "s/branch-.*/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_raft_dask.sh

# Docs update
sed_runner 's/version = .*/version = '"'${NEXT_SHORT_TAG}'"'/g' docs/source/conf.py
sed_runner 's/release = .*/release = '"'${NEXT_FULL_TAG}'"'/g' docs/source/conf.py

DEPENDENCIES=(
dask-cuda
pylibraft
Expand Down Expand Up @@ -84,9 +72,6 @@ sed_runner "/^ucx_py_version:$/ {n;s/.*/ - \"${NEXT_UCX_PY_VERSION}\"/}" conda/
for FILE in .github/workflows/*.yaml; do
sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
done
sed_runner "s/RAPIDS_VERSION_NUMBER=\".*/RAPIDS_VERSION_NUMBER=\"${NEXT_SHORT_TAG}\"/g" ci/build_docs.sh

sed_runner "/^PROJECT_NUMBER/ s|\".*\"|\"${NEXT_SHORT_TAG}\"|g" cpp/doxygen/Doxyfile

sed_runner "/^set(RAFT_VERSION/ s|\".*\"|\"${NEXT_SHORT_TAG}\"|g" docs/source/build.md
sed_runner "s|branch-[0-9][0-9].[0-9][0-9]|branch-${NEXT_SHORT_TAG}|g" docs/source/build.md
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/all_cuda-118_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ dependencies:
- nccl>=2.9.9
- ninja
- numba>=0.57
- numpy>=1.23
- numpy>=1.23,<2.0a0
- numpydoc
- nvcc_linux-aarch64=11.8
- pre-commit
Expand All @@ -57,5 +57,5 @@ dependencies:
- sysroot_linux-aarch64==2.17
- ucx-proc=*=gpu
- ucx-py==0.37.*
- ucx>=1.13.0
- ucx>=1.15.0,<1.16.0
name: all_cuda-118_arch-aarch64
4 changes: 2 additions & 2 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ dependencies:
- nccl>=2.9.9
- ninja
- numba>=0.57
- numpy>=1.23
- numpy>=1.23,<2.0a0
- numpydoc
- nvcc_linux-64=11.8
- pre-commit
Expand All @@ -57,5 +57,5 @@ dependencies:
- sysroot_linux-64==2.17
- ucx-proc=*=gpu
- ucx-py==0.37.*
- ucx>=1.13.0
- ucx>=1.15.0,<1.16.0
name: all_cuda-118_arch-x86_64
4 changes: 2 additions & 2 deletions conda/environments/all_cuda-122_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ dependencies:
- nccl>=2.9.9
- ninja
- numba>=0.57
- numpy>=1.23
- numpy>=1.23,<2.0a0
- numpydoc
- pre-commit
- pydata-sphinx-theme
Expand All @@ -53,5 +53,5 @@ dependencies:
- sysroot_linux-aarch64==2.17
- ucx-proc=*=gpu
- ucx-py==0.37.*
- ucx>=1.13.0
- ucx>=1.15.0,<1.16.0
name: all_cuda-122_arch-aarch64
4 changes: 2 additions & 2 deletions conda/environments/all_cuda-122_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ dependencies:
- nccl>=2.9.9
- ninja
- numba>=0.57
- numpy>=1.23
- numpy>=1.23,<2.0a0
- numpydoc
- pre-commit
- pydata-sphinx-theme
Expand All @@ -53,5 +53,5 @@ dependencies:
- sysroot_linux-64==2.17
- ucx-proc=*=gpu
- ucx-py==0.37.*
- ucx>=1.13.0
- ucx>=1.15.0,<1.16.0
name: all_cuda-122_arch-x86_64
4 changes: 2 additions & 2 deletions conda/recipes/libraft/build_libraft_template.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.

# Just building template so we verify it uses libraft.so and fail if it doesn't build
./build.sh template
./build.sh template --no-nvtx
2 changes: 1 addition & 1 deletion conda/recipes/pylibraft/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ requirements:
{% endif %}
- libraft {{ version }}
- libraft-headers {{ version }}
- numpy >=1.23
- numpy >=1.23,<2.0a0
- python x.x
- rmm ={{ minor_version }}

Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/raft-dask/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ sysroot_version:
- "2.17"

ucx_version:
- ">=1.14.1,<1.16.0"
- ">=1.15.0,<1.16.0"

ucx_py_version:
- "0.37.*"
Expand Down
7 changes: 2 additions & 5 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,8 @@
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
set(RAPIDS_VERSION "24.04")
set(RAFT_VERSION "24.04.00")

cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
include(../fetch_rapids.cmake)
include(../rapids_config.cmake)
include(rapids-cmake)
include(rapids-cpm)
include(rapids-export)
Expand All @@ -34,7 +31,7 @@ endif()

project(
RAFT
VERSION ${RAFT_VERSION}
VERSION "${RAPIDS_VERSION}"
LANGUAGES ${lang_list}
)

Expand Down
2 changes: 1 addition & 1 deletion cpp/doxygen/Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ PROJECT_NAME = "RAFT C++ API"
# could be handy for archiving the generated documentation or if some version
# control system is used.

PROJECT_NUMBER = "24.04"
PROJECT_NUMBER = "$(RAPIDS_VERSION_MAJOR_MINOR)"

# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a
Expand Down
119 changes: 66 additions & 53 deletions cpp/include/raft/distance/detail/pairwise_distance_cutlass_base.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -91,18 +91,11 @@ std::enable_if_t<ops::has_cutlass_op<OpT>::value> cutlassDistanceKernel(const Da

typename EpilogueOutputOp::Params epilog_op_param(dist_op, fin_op);

const DataT *a, *b;

IdxT gemm_lda, gemm_ldb;

// Number of pipelines you want to use
constexpr int NumStages = 3;
// Alignment
constexpr int Alignment = VecLen;

// default initialize problem size with row major inputs
auto problem_size = cutlass::gemm::GemmCoord(n, m, k);

using cutlassDistKernel =
typename cutlass::gemm::kernel::PairwiseDistanceGemm<DataT,
Alignment,
Expand All @@ -116,53 +109,73 @@ std::enable_if_t<ops::has_cutlass_op<OpT>::value> cutlassDistanceKernel(const Da

using cutlassDist = cutlass::gemm::device::GemmUniversalAdapter<cutlassDistKernel>;

if constexpr (isRowMajor) {
a = y;
b = x;
gemm_lda = ldb;
gemm_ldb = lda;
} else {
problem_size = cutlass::gemm::GemmCoord(m, n, k);
a = x;
b = y;
gemm_lda = lda;
gemm_ldb = ldb;
constexpr uint32_t gridYZMax = ((1 << (sizeof(uint16_t) * 8)) - 1);
constexpr uint32_t max_batch_size = gridYZMax * cutlassDistKernel::ThreadblockShape::kN;
IdxT numNbatches = (n - 1 + max_batch_size) / max_batch_size;

for (IdxT i = 0; i < numNbatches; i++) {
const DataT *a, *b;
IdxT gemm_lda, gemm_ldb;
size_t offsetN = i * max_batch_size;

if constexpr (isRowMajor) {
gemm_lda = ldb;
gemm_ldb = lda;
a = y + offsetN * gemm_lda;
b = x;
} else {
gemm_lda = lda;
gemm_ldb = ldb;
a = x;
b = y + offsetN;
}
IdxT chunkN = (i + 1) * max_batch_size;
IdxT currentN = (chunkN < n) ? max_batch_size : (n - offsetN);

// default initialize problem size with row major inputs
auto problem_size = isRowMajor ? cutlass::gemm::GemmCoord(currentN, m, k)
: cutlass::gemm::GemmCoord(m, currentN, k);

typename cutlassDist::Arguments arguments{
mode,
problem_size,
batch_count,
epilog_op_param,
a,
b,
xn, // C matrix eq vector param, which here is A norm
nullptr, // tensor_Z,
(DataT*)yn + offsetN, // this is broadcast vec, which is required to be non-const param
dOutput + offsetN, // Output distance matrix
(int64_t)0, // batch stride A
(int64_t)0, // batch stride B
(int64_t)0, // batch stride Norm A
(int64_t)0,
(int64_t)0, // batch stride Norm B
(int64_t)0, // batch stride Output
gemm_lda, // stride A
gemm_ldb, // stride B
1, // stride A norm
0, // this is no-op for Z
0, // This must be zero
ldd // stride Output matrix
};

// Using the arguments, query for extra workspace required for matrix multiplication computation
size_t workspace_size = cutlassDist::get_workspace_size(arguments);
// Allocate workspace memory
rmm::device_uvector<uint8_t> workspace(workspace_size, stream);
// Instantiate CUTLASS kernel depending on templates
cutlassDist cutlassDist_op;
// Check the problem size is supported or not
RAFT_CUTLASS_TRY(cutlassDist_op.can_implement(arguments));

// Initialize CUTLASS kernel with arguments and workspace pointer
RAFT_CUTLASS_TRY(cutlassDist_op.initialize(arguments, workspace.data(), stream));

// Launch initialized CUTLASS kernel
RAFT_CUTLASS_TRY(cutlassDist_op(stream));
}

typename cutlassDist::Arguments arguments{
mode, problem_size, batch_count, epilog_op_param, a, b,
xn, // C matrix eq vector param, which here is A norm
nullptr, // tensor_Z,
(DataT*)yn, // this is broadcast vec, which is required to be non-const param
dOutput, // Output distance matrix
(int64_t)0, // batch stride A
(int64_t)0, // batch stride B
(int64_t)0, // batch stride Norm A
(int64_t)0,
(int64_t)0, // batch stride Norm B
(int64_t)0, // batch stride Output
gemm_lda, // stride A
gemm_ldb, // stride B
1, // stride A norm
0, // this is no-op for Z
0, // This must be zero
ldd // stride Output matrix
};

// Using the arguments, query for extra workspace required for matrix multiplication computation
size_t workspace_size = cutlassDist::get_workspace_size(arguments);
// Allocate workspace memory
rmm::device_uvector<uint8_t> workspace(workspace_size, stream);
// Instantiate CUTLASS kernel depending on templates
cutlassDist cutlassDist_op;
// Check the problem size is supported or not
RAFT_CUTLASS_TRY(cutlassDist_op.can_implement(arguments));

// Initialize CUTLASS kernel with arguments and workspace pointer
RAFT_CUTLASS_TRY(cutlassDist_op.initialize(arguments, workspace.data(), stream));

// Launch initialized CUTLASS kernel
RAFT_CUTLASS_TRY(cutlassDist_op(stream));
}

}; // namespace detail
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/raft/stats/detail/mean.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -43,7 +43,7 @@ RAFT_KERNEL meanKernelRowMajor(Type* mu, const Type* data, IdxType D, IdxType N)
__syncthreads();
raft::myAtomicAdd(smu + thisColId, thread_data);
__syncthreads();
if (threadIdx.x < ColsPerBlk) raft::myAtomicAdd(mu + colId, smu[thisColId]);
if (threadIdx.x < ColsPerBlk && colId < D) raft::myAtomicAdd(mu + colId, smu[thisColId]);
}

template <typename Type, typename IdxType, int TPB>
Expand Down
Loading

0 comments on commit 4e9e4da

Please sign in to comment.