From 2698dcb47ccfb7675ce0821447a7882ff095ca2c Mon Sep 17 00:00:00 2001 From: LHT129 Date: Tue, 24 Dec 2024 14:45:24 +0800 Subject: [PATCH] fix arm compile error (#240) - add arm test on ci Signed-off-by: LHT129 --- .circleci/config.yml | 45 +++++++++++++++++++++++++++++ scripts/deps/install_deps_ubuntu.sh | 12 +++++++- src/simd/CMakeLists.txt | 1 + src/simd/avx.cpp | 5 ++++ src/simd/avx512.cpp | 20 ++++++++----- src/simd/fp32_simd_test.cpp | 12 -------- src/simd/normalize_test.cpp | 12 -------- src/simd/sq4_simd_test.cpp | 13 --------- src/simd/sq4_uniform_simd_test.cpp | 12 -------- src/simd/sq8_simd.h | 2 -- src/simd/sq8_simd_test.cpp | 14 +-------- src/simd/sq8_uniform_simd_test.cpp | 12 -------- src/simd/sse.cpp | 19 +++++++----- tests/CMakeLists.txt | 6 ++-- 14 files changed, 90 insertions(+), 95 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index a4c6c56b..3968d42b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -8,6 +8,11 @@ workflows: branches: ignore: - main + - pull-request-check-aarch64: + filters: + branches: + ignore: + - main main-branch-workflow: jobs: - main-branch-check: @@ -15,6 +20,11 @@ workflows: branches: only: - main + - main-branch-check-aarch64: + filters: + branches: + only: + - main jobs: pull-request-check: @@ -33,12 +43,46 @@ jobs: - ./build - run: make test_parallel + pull-request-check-aarch64: + docker: + - image: ubuntu:22.04 + resource_class: arm.medium + steps: + - checkout + - run: bash scripts/deps/install_deps_ubuntu.sh + - restore_cache: + keys: + - fork-cache-arm-{{ checksum "CMakeLists.txt" }}-{{ checksum ".circleci/fresh_ci_cache.commit" }} + - run: make debug + - save_cache: + key: fork-cache-arm-{{ checksum "CMakeLists.txt" }}-{{ checksum ".circleci/fresh_ci_cache.commit" }} + paths: + - ./build + - run: make test_parallel + main-branch-check: docker: - image: vsaglib/vsag:ubuntu resource_class: medium+ steps: - checkout + - restore_cache: + keys: + - main-ccache-arm-{{ checksum "CMakeLists.txt" }}-{{ checksum ".circleci/fresh_ci_cache.commit" }} + - run: make debug + - save_cache: + key: main-ccache-arm-{{ checksum "CMakeLists.txt" }}-{{ checksum ".circleci/fresh_ci_cache.commit" }} + paths: + - ./build + - run: make test_parallel + + main-branch-check-aarch64: + docker: + - image: ubuntu:22.04 + resource_class: arm.medium + steps: + - checkout + - run: bash scripts/deps/install_deps_ubuntu.sh - restore_cache: keys: - main-ccache-{{ checksum "CMakeLists.txt" }}-{{ checksum ".circleci/fresh_ci_cache.commit" }} @@ -48,3 +92,4 @@ jobs: paths: - ./build - run: make test_parallel + diff --git a/scripts/deps/install_deps_ubuntu.sh b/scripts/deps/install_deps_ubuntu.sh index d452085a..859a59fb 100644 --- a/scripts/deps/install_deps_ubuntu.sh +++ b/scripts/deps/install_deps_ubuntu.sh @@ -1 +1,11 @@ -sudo apt install -y gfortran python3-dev libomp-15-dev lcov intel-mkl +arch=$(uname -m) + +if [[ "$arch" == "x86_64" ]]; then + echo "Executing apt install for x86_64" + apt update && apt install -y gfortran python3-dev libomp-15-dev lcov intel-mkl gcc make cmake g++ +elif [[ "$arch" == "aarch64" ]]; then + echo "Executing apt install for aarch64" + apt update && apt install -y gfortran python3-dev libomp-15-dev gcc make cmake g++ lcov +else + echo "Unknown architecture: $arch" +fi diff --git a/src/simd/CMakeLists.txt b/src/simd/CMakeLists.txt index 68501f06..4322c1fc 100644 --- a/src/simd/CMakeLists.txt +++ b/src/simd/CMakeLists.txt @@ -9,6 +9,7 @@ set (SIMD_SRCS sse.cpp avx.cpp avx512.cpp + normalize.cpp ) if (DIST_CONTAINS_SSE) set (SIMD_SRCS ${SIMD_SRCS} sse.cpp) diff --git a/src/simd/avx.cpp b/src/simd/avx.cpp index 486d0e1a..1ad908b8 100644 --- a/src/simd/avx.cpp +++ b/src/simd/avx.cpp @@ -13,7 +13,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +#if defined(ENABLE_AVX) #include +#endif #include #include @@ -30,6 +32,7 @@ namespace vsag { #define PORTABLE_ALIGN32 __attribute__((aligned(32))) #define PORTABLE_ALIGN64 __attribute__((aligned(64))) +#if defined(ENABLE_AVX) float L2SqrSIMD16ExtAVX(const void* pVect1v, const void* pVect2v, const void* qty_ptr) { float* pVect1 = (float*)pVect1v; @@ -163,6 +166,8 @@ PQDistanceAVXFloat256(const void* single_dim_centers, float single_dim_val, void } } +#endif + namespace avx2 { #if defined(ENABLE_AVX2) diff --git a/src/simd/avx512.cpp b/src/simd/avx512.cpp index c1edce33..517e5ee8 100644 --- a/src/simd/avx512.cpp +++ b/src/simd/avx512.cpp @@ -12,8 +12,9 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - +#if defined(ENABLE_AVX512) #include +#endif #include @@ -30,6 +31,7 @@ namespace vsag { #define PORTABLE_ALIGN32 __attribute__((aligned(32))) #define PORTABLE_ALIGN64 __attribute__((aligned(64))) +#if defined(ENABLE_AVX512) float L2SqrSIMD16ExtAVX512(const void* pVect1v, const void* pVect2v, const void* qty_ptr) { float* pVect1 = (float*)pVect1v; @@ -202,6 +204,8 @@ INT8InnerProduct512ResidualsAVX512Distance(const void* pVect1v, return -INT8InnerProduct512ResidualsAVX512(pVect1v, pVect2v, qty_ptr); } +#endif + namespace avx512 { float FP32ComputeIP(const float* query, const float* codes, uint64_t dim) { @@ -221,7 +225,7 @@ FP32ComputeIP(const float* query, const float* codes, uint64_t dim) { ip += avx2::FP32ComputeIP(query + n * 16, codes + n * 16, dim - n * 16); return ip; #else - return vsag::Generic::FP32ComputeIP(query, codes, dim); + return vsag::generic::FP32ComputeIP(query, codes, dim); #endif } @@ -244,7 +248,7 @@ FP32ComputeL2Sqr(const float* query, const float* codes, uint64_t dim) { l2 += avx2::FP32ComputeL2Sqr(query + n * 16, codes + n * 16, dim - n * 16); return l2; #else - return vsag::Generic::FP32ComputeL2Sqr(query, codes, dim); + return vsag::generic::FP32ComputeL2Sqr(query, codes, dim); #endif } @@ -282,7 +286,7 @@ SQ8ComputeIP(const float* query, finalResult += avx2::SQ8ComputeIP(query + i, codes + i, lowerBound + i, diff + i, dim - i); return finalResult; #else - return Generic::SQ8ComputeIP(query, codes, lowerBound, diff, dim); + return generic::SQ8ComputeIP(query, codes, lowerBound, diff, dim); #endif } @@ -320,7 +324,7 @@ SQ8ComputeL2Sqr(const float* query, result += avx2::SQ8ComputeL2Sqr(query + i, codes + i, lowerBound + i, diff + i, dim - i); return result; #else - return Generic::SQ8ComputeL2Sqr(query, codes, lowerBound, diff, dim); + return generic::SQ8ComputeL2Sqr(query, codes, lowerBound, diff, dim); #endif } @@ -357,7 +361,7 @@ SQ8ComputeCodesIP(const uint8_t* codes1, result += avx2::SQ8ComputeCodesIP(codes1 + i, codes2 + i, lowerBound + i, diff + i, dim - i); return result; #else - return Generic::SQ8ComputeCodesIP(codes1, codes2, lowerBound, diff, dim); + return generic::SQ8ComputeCodesIP(codes1, codes2, lowerBound, diff, dim); #endif } @@ -390,7 +394,7 @@ SQ8ComputeCodesL2Sqr(const uint8_t* codes1, result += avx2::SQ8ComputeCodesL2Sqr(codes1 + i, codes2 + i, lowerBound + i, diff + i, dim - i); return result; #else - return Generic::SQ8ComputeL2Sqr(query, codes, lowerBound, diff, dim); + return generic::SQ8ComputeCodesL2Sqr(codes1, codes2, lowerBound, diff, dim); #endif } @@ -494,7 +498,7 @@ SQ8UniformComputeCodesIP(const uint8_t* codes1, const uint8_t* codes2, uint64_t result += static_cast(avx2::SQ8UniformComputeCodesIP(codes1 + d, codes2 + d, dim - d)); return static_cast(result); #else - return avx2::S8UniformComputeCodesIP(codes1, codes2, dim); + return avx2::SQ8UniformComputeCodesIP(codes1, codes2, dim); #endif } diff --git a/src/simd/fp32_simd_test.cpp b/src/simd/fp32_simd_test.cpp index 36a41da4..50a1d21f 100644 --- a/src/simd/fp32_simd_test.cpp +++ b/src/simd/fp32_simd_test.cpp @@ -22,18 +22,6 @@ using namespace vsag; -#ifndef ENABLE_SSE -namespace sse = generic; -#endif - -#ifndef ENABLE_AVX2 -namespace avx2 = sse; -#endif - -#ifndef ENABLE_AVX512 -namespace avx512 = avx2; -#endif - #define TEST_ACCURACY(Func) \ { \ float gt, sse, avx2, avx512; \ diff --git a/src/simd/normalize_test.cpp b/src/simd/normalize_test.cpp index a84ec2a2..04f39f3b 100644 --- a/src/simd/normalize_test.cpp +++ b/src/simd/normalize_test.cpp @@ -22,18 +22,6 @@ using namespace vsag; -#ifndef ENABLE_SSE -namespace sse = generic; -#endif - -#ifndef ENABLE_AVX2 -namespace avx2 = sse; -#endif - -#ifndef ENABLE_AVX512 -namespace avx512 = avx2; -#endif - TEST_CASE("Normalize SIMD Compute", "[simd]") { auto dims = fixtures::get_common_used_dims(); int64_t count = 100; diff --git a/src/simd/sq4_simd_test.cpp b/src/simd/sq4_simd_test.cpp index 75bdfe46..322ec40a 100644 --- a/src/simd/sq4_simd_test.cpp +++ b/src/simd/sq4_simd_test.cpp @@ -17,24 +17,11 @@ #include -#include "../logger.h" #include "catch2/benchmark/catch_benchmark.hpp" #include "fixtures.h" using namespace vsag; -#ifndef ENABLE_SSE -namespace sse = generic; -#endif - -#ifndef ENABLE_AVX2 -namespace avx2 = sse; -#endif - -#ifndef ENABLE_AVX512 -namespace avx512 = avx2; -#endif - #define TEST_ACCURACY(Func) \ { \ auto gt = generic::Func(codes1.data() + i * code_size, \ diff --git a/src/simd/sq4_uniform_simd_test.cpp b/src/simd/sq4_uniform_simd_test.cpp index 73e37660..8ea7f30f 100644 --- a/src/simd/sq4_uniform_simd_test.cpp +++ b/src/simd/sq4_uniform_simd_test.cpp @@ -23,18 +23,6 @@ using namespace vsag; -#ifndef ENABLE_SSE -namespace sse = generic; -#endif - -#ifndef ENABLE_AVX2 -namespace avx2 = sse; -#endif - -#ifndef ENABLE_AVX512 -namespace avx512 = avx2; -#endif - #define TEST_ACCURACY(Func) \ { \ auto gt = \ diff --git a/src/simd/sq8_simd.h b/src/simd/sq8_simd.h index e6c58858..885665f1 100644 --- a/src/simd/sq8_simd.h +++ b/src/simd/sq8_simd.h @@ -45,7 +45,6 @@ SQ8ComputeCodesL2Sqr(const uint8_t* codes1, uint64_t dim); } // namespace generic -#if defined(ENABLE_SSE) namespace sse { float SQ8ComputeIP(const float* query, @@ -72,7 +71,6 @@ SQ8ComputeCodesL2Sqr(const uint8_t* codes1, const float* diff, uint64_t dim); } // namespace sse -#endif namespace avx2 { float diff --git a/src/simd/sq8_simd_test.cpp b/src/simd/sq8_simd_test.cpp index 82aa9baa..06a68b95 100644 --- a/src/simd/sq8_simd_test.cpp +++ b/src/simd/sq8_simd_test.cpp @@ -13,7 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "simd/sq8_simd.h" +#include "sq8_simd.h" #include "catch2/benchmark/catch_benchmark.hpp" #include "catch2/catch_test_macros.hpp" @@ -22,18 +22,6 @@ using namespace vsag; -#ifndef ENABLE_SSE -namespace sse = generic; -#endif - -#ifndef ENABLE_AVX2 -namespace avx2 = sse; -#endif - -#ifndef ENABLE_AVX512 -namespace avx512 = avx2; -#endif - #define TEST_ACCURACY(Func) \ { \ auto gt = generic::Func( \ diff --git a/src/simd/sq8_uniform_simd_test.cpp b/src/simd/sq8_uniform_simd_test.cpp index 1ccd41c0..b64b8c25 100644 --- a/src/simd/sq8_uniform_simd_test.cpp +++ b/src/simd/sq8_uniform_simd_test.cpp @@ -23,18 +23,6 @@ using namespace vsag; -#ifndef ENABLE_SSE -namespace sse = generic; -#endif - -#ifndef ENABLE_AVX2 -namespace avx2 = sse; -#endif - -#ifndef ENABLE_AVX512 -namespace avx512 = avx2; -#endif - #define TEST_ACCURACY(Func) \ { \ auto gt = \ diff --git a/src/simd/sse.cpp b/src/simd/sse.cpp index 2c57ee12..078f19a5 100644 --- a/src/simd/sse.cpp +++ b/src/simd/sse.cpp @@ -13,7 +13,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +#if defined(ENABLE_SSE) #include +#endif #include @@ -35,6 +37,7 @@ L2Sqr(const void* pVect1v, const void* pVect2v, const void* qty_ptr); extern float InnerProduct(const void* pVect1, const void* pVect2, const void* qty_ptr); +#if defined(ENABLE_SSE) /* L2 Distance */ float L2SqrSIMD4ExtSSE(const void* pVect1v, const void* pVect2v, const void* qty_ptr) { @@ -302,6 +305,8 @@ PQDistanceSSEFloat256(const void* single_dim_centers, float single_dim_val, void } } +#endif + namespace sse { #if defined(ENABLE_SSE) @@ -333,7 +338,7 @@ FP32ComputeIP(const float* query, const float* codes, uint64_t dim) { ip += generic::FP32ComputeIP(query + n * 4, codes + n * 4, dim - n * 4); return ip; #else - return vsag::Generic::FP32ComputeIP(query, codes, dim); + return vsag::generic::FP32ComputeIP(query, codes, dim); #endif } @@ -357,7 +362,7 @@ FP32ComputeL2Sqr(const float* query, const float* codes, uint64_t dim) { l2 += generic::FP32ComputeL2Sqr(query + n * 4, codes + n * 4, dim - n * 4); return l2; #else - return vsag::Generic::FP32ComputeL2Sqr(query, codes, dim); + return vsag::generic::FP32ComputeL2Sqr(query, codes, dim); #endif } @@ -399,7 +404,7 @@ SQ8ComputeIP(const float* query, return result[0] + generic::SQ8ComputeIP(query + i, codes + i, lowerBound + i, diff + i, dim - i); #else - return Generic::SQ8ComputeIP(query, codes, lowerBound, diff, dim); + return generic::SQ8ComputeIP(query, codes, lowerBound, diff, dim); #endif } @@ -441,7 +446,7 @@ SQ8ComputeL2Sqr(const float* query, return result; #else - return Generic::SQ8ComputeL2Sqr(query, codes, lowerBound, diff, dim); + return generic::SQ8ComputeL2Sqr(query, codes, lowerBound, diff, dim); #endif } @@ -481,7 +486,7 @@ SQ8ComputeCodesIP(const uint8_t* codes1, result += generic::SQ8ComputeCodesIP(codes1 + i, codes2 + i, lowerBound + i, diff + i, dim - i); return result; #else - return Generic::SQ8ComputeCodesIP(codes1, codes2, lowerBound, diff, dim); + return generic::SQ8ComputeCodesIP(codes1, codes2, lowerBound, diff, dim); #endif } @@ -523,7 +528,7 @@ SQ8ComputeCodesL2Sqr(const uint8_t* codes1, generic::SQ8ComputeCodesL2Sqr(codes1 + i, codes2 + i, lowerBound + i, diff + i, dim - i); return result; #else - return Generic::SQ8ComputeCodesIP(codes1, codes2, lowerBound, diff, dim); + return generic::SQ8ComputeCodesIP(codes1, codes2, lowerBound, diff, dim); #endif } @@ -627,7 +632,7 @@ SQ8UniformComputeCodesIP(const uint8_t* codes1, const uint8_t* codes2, uint64_t static_cast(generic::SQ8UniformComputeCodesIP(codes1 + d, codes2 + d, dim - d)); return static_cast(result); #else - return generic::S8UniformComputeCodesIP(codes1, codes2, dim); + return generic::SQ8UniformComputeCodesIP(codes1, codes2, dim); #endif } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 72212c38..2f399289 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -10,9 +10,9 @@ endif () if (DIST_CONTAINS_AVX) target_compile_definitions (unittests PRIVATE ENABLE_AVX=1) endif () -if (DIST_CONTAINS_AVX2) - target_compile_definitions (unittests PRIVATE ENABLE_AVX2=1) -endif () +#if (DIST_CONTAINS_AVX2) +# target_compile_definitions (unittests PRIVATE ENABLE_AVX2=1) +#endif () if (DIST_CONTAINS_AVX512) target_compile_definitions (unittests PRIVATE ENABLE_AVX512=1) endif ()