SVS v0.0.3 (#19)

intel · Feb 7, 2024 · 6533a60 · 6533a60
1 parent b7e5488
commit 6533a60
Show file tree

Hide file tree

Showing 264 changed files with 30,867 additions and 4,989 deletions.
diff --git a/.github/scripts/setup_apt_repo_linux.sh b/.github/scripts/setup_apt_repo_linux.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+# SPDX-FileCopyrightText: 2020 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+#
+# Taken from: https://github.com/oneapi-src/oneapi-ci
+
+wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+echo "deb https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
+sudo apt-get update -o Dir::Etc::sourcelist="sources.list.d/oneAPI.list" -o APT::Get::List-Cleanup="0"
diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml
@@ -28,7 +28,11 @@ jobs:
 
       - name: Install Dependencies
         run: |
-          sudo apt install -y doxygen
+          .github/scripts/setup_apt_repo_linux.sh
+          sudo apt install -y doxygen intel-oneapi-mkl intel-oneapi-mkl-devel
+          # See notes in `build_linux.yml` about persisting MKL environment variables.
+          source /opt/intel/oneapi/setvars.sh
+          printenv >> $GITHUB_ENV
           pip install \
             archspec \
             scikit-build \

diff --git a/.github/workflows/build-linux.yml b/.github/workflows/build-linux.yml
@@ -20,7 +20,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        build_type: [Release]
+        build_type: [RelWithDebugInfo]
         cxx: [g++-11, g++-12, clang++-15]
         include:
           - cxx: g++-11
@@ -32,6 +32,16 @@ jobs:
 
     steps:
     - uses: actions/checkout@v4
+    - name: Install MKL
+      timeout-minutes: 5
+      run: |
+       .github/scripts/setup_apt_repo_linux.sh
+       sudo apt install intel-oneapi-mkl intel-oneapi-mkl-devel
+       # Setup environment variables for building against MKL.
+       # Persist the environment variables for use across multiple subsequent actions.
+       source /opt/intel/oneapi/setvars.sh
+       printenv >> $GITHUB_ENV
+
     - name: Configure build
       working-directory: ${{ runner.temp }}
       env:
@@ -44,6 +54,7 @@ jobs:
               -DSVS_BUILD_BINARIES=YES \
               -DSVS_BUILD_TESTS=YES \
               -DSVS_BUILD_EXAMPLES=YES \
+              -DSVS_EXPERIMENTAL_LEANVEC=YES \
               -DSVS_NO_AVX512=NO
 
     - name: Build Tests and Utilities

diff --git a/.github/workflows/cibuildwheel.yml b/.github/workflows/cibuildwheel.yml
@@ -14,6 +14,11 @@ jobs:
     steps:
     - uses: actions/checkout@v4
 
+    - name: Build Container
+      run: |
+        cd ${GITHUB_WORKSPACE}/docker/x86_64/manylinux2014
+        ./build.sh
+
     - name: Install cibuildwheel
       run: python -m pip install cibuildwheel
 

diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml
@@ -34,7 +34,8 @@ jobs:
 
       - name: Install Dependencies
         run: |
-          sudo apt install -y doxygen
+          .github/scripts/setup_apt_repo_linux.sh
+          sudo apt install -y doxygen intel-oneapi-mkl intel-oneapi-mkl-devel
           pip install \
             archspec \
             scikit-build \

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.18)
+cmake_minimum_required(VERSION 3.21)
 
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_LIST_DIR}/cmake")
 
@@ -9,7 +9,7 @@ project(svs
     # - /bindings/python/tests/test_common.py
     # Manually keep in-sync with:
     # - /bindings/python/setup.py
-    VERSION 0.0.1
+    VERSION 0.0.3
 )
 
 set(SVS_LIB svs_devel)
@@ -57,6 +57,14 @@ include("cmake/robin-map.cmake")
 include("cmake/fmt.cmake")
 include("cmake/toml.cmake")
 
+# LeanVec requires MKL support
+if(SVS_EXPERIMENTAL_LEANVEC)
+    include("cmake/mkl.cmake")
+    target_compile_options(${SVS_LIB} INTERFACE "-DSVS_HAVE_MKL=1")
+else()
+    target_compile_options(${SVS_LIB} INTERFACE "-DSVS_HAVE_MKL=0")
+endif()
+
 #####
 ##### Build Objects
 #####
@@ -82,7 +90,7 @@ endif()
 #
 # If only the unit tests are enabled, then the benchmark will be built as a minimal
 # component to avoid excessive compilation time.
-if(SVS_BUILD_BENCHMARK OR SVS_BUILD_TESTS)
+if(SVS_BUILD_BENCHMARK OR SVS_BUILD_TESTS OR SVS_BUILD_BENCHMARK_TEST_GENERATORS)
     add_subdirectory(benchmark)
 endif()
 

diff --git a/HISTORY.md b/HISTORY.md
@@ -0,0 +1,77 @@
+# SVS 0.0.2 Release Notes
+
+## `pysvs` (Python)
+
+* Deprecated `num_threads` keyword argument from `pysvs.VamanaBuildParameters` and added
+  `num_threads` keyword to `pysvs.Vamana.build`.
+* Exposed the `prune_to` parameter for `pysvs.VamanaBuildParameters` (see description below
+  for an explanation of this change).
+* Added preliminary support for building `pysvs.Flat` and `pysvs.Vamana` directly from
+  `np.float16` arrays.
+
+## `libsvs` (C++)
+
+### Breaking Changes
+
+* Removed `nthreads` member of `VamanaBuildParameters` and added the number of threads as
+  an argument to `svs::Vamana::build`/`svs::Vamana::build`.
+* Added a `prune_to` argument to `VamanaBuildParameters`. This can be set to a value less
+  than graph_max_degree (heuristically, setting this to be 4 less is a good trade-off
+  between accuracy and speed). When pruning is performed, this parameter is used to
+  determine the number of candidates to generate after pruning. Setting this less than
+  `graph_max_degree` greatly reduces the time spent when managing backedges.
+* Improved pruning rules for Euclidean and InnerProduct. Vamana index construction should
+  be faster and yield slightly improved indexes.
+* Added an experimental external-threading interface to `svs::index::VamanaIndex`.
+* Overhauled extension mechanisms using a `tag_invoke` style approach. This decouples the
+  `svs::index::VamanaIndex` implementation from extensions like LVQ, reducing header
+  dependence and improving precision of algorithm customization.
+
+### Save/Load API
+* Enabled context-free saving and loading of simple data structures. This allows simple
+  data structures to be saved and reloaded from TOML files without requiring access to the
+  saving/loading directory. Classes implementing this saving and loading allow for more
+  flexible storage.
+* Overhauled the implementation of saving and loading to enable more scalable implementation.
+* `svs::data::SimpleData` family of data structures are now directly saveable and loadable
+  and no longer require proxy-classes.
+
+**Breaking Serialization Changes**
+
+* Changed LVQ-style datasets from `v0.0.1` to `v0.0.2`: Removed centroids from being stored
+  with the ScaledBiasedCompressedDataset.  Centroids are now stored in the higher level LVQ
+  dataset.
+
+### Back-end Changes
+
+Changes to library internals that do not necessarily affect the top level API but could
+affect performance or users relying on internal APIs.
+
+* Improved the performance of the LVQ inner-product implementation.
+* Moved dynamic uispatcher from the Python bindings into `libsvs`.
+* Data structure loading has been augmented with the `svs::lib::Lazy` class, allowing for
+  arbitrary deferred work to be executed when loading data structures.
+* Removed the old "access mode" style API for multi-level datasets, instead using
+  `tag_invoke` for customization.
+* Reduced binary footprint by removing `std::function` use for general multi-threaded
+  functions.
+* Updated `ANNException` to use `fmtlib` style message directly rather than `std::ostream`
+  style overloading. The new syntax turns
+  ```c++
+  ANNEXCEPTION("Expected ", a, ", got ", b, "!");
+  ```
+  to
+  ```c++
+  ANNEXCEPTION("Expected {}, got {}!", a, b);
+  ```
+
+## Binaries and Utilities
+
+* Added a benchmarking framework in `/benchmark` to automatically run and aggregate index
+  construction and search for large scale benchmarks. Documentation is currently sparse
+  but planned.
+
+## Third Party
+
+* Bump [fmtlib](https://github.com/fmtlib/fmt) from 9.1.0 to 10.1.1.
+