From 8cb0f6945a02dd7ef25917abce94f03d25314957 Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Tue, 31 Oct 2023 20:54:20 +0100 Subject: [PATCH] fix compilation failure on macOS --- CHANGELOG.md | 4 + CMakeLists.txt | 2 +- extras/rapidfuzz_amalgamated.hpp | 36 +++++--- rapidfuzz/details/common.hpp | 19 ++++ rapidfuzz/distance/Jaro.hpp | 5 +- rapidfuzz/distance/Jaro_impl.hpp | 146 +++++++++++++++---------------- 6 files changed, 120 insertions(+), 92 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b4cb478..bf82b452 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ ## Changelog +## [2.2.2] - 2023-10-31 +### Fixed +- fix compilation failure on macOS + ## [2.2.1] - 2023-10-31 ### Fixed - fix wraparound issue in simd implementation of Jaro and Jaro Winkler diff --git a/CMakeLists.txt b/CMakeLists.txt index c0576954..4f4e699e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,7 +32,7 @@ if (CMAKE_BINARY_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) message(FATAL_ERROR "Building in-source is not supported! Create a build dir and remove ${CMAKE_SOURCE_DIR}/CMakeCache.txt") endif() -project(rapidfuzz LANGUAGES CXX VERSION 2.2.1) +project(rapidfuzz LANGUAGES CXX VERSION 2.2.2) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake") include(GNUInstallDirs) diff --git a/extras/rapidfuzz_amalgamated.hpp b/extras/rapidfuzz_amalgamated.hpp index 44198ab8..547668e3 100644 --- a/extras/rapidfuzz_amalgamated.hpp +++ b/extras/rapidfuzz_amalgamated.hpp @@ -1,7 +1,7 @@ // Licensed under the MIT License . // SPDX-License-Identifier: MIT // RapidFuzz v1.0.2 -// Generated: 2023-10-31 11:48:55.108653 +// Generated: 2023-11-01 00:20:18.570286 // ---------------------------------------------------------- // This file is an amalgamation of multiple different files. // You probably shouldn't edit it directly. @@ -1629,6 +1629,24 @@ size_t remove_common_suffix(Range& s1, Range& s2); template > SplittedSentenceView sorted_split(InputIt first, InputIt last); +static inline void* rf_aligned_alloc(size_t alignment, size_t size) +{ +#if defined(_WIN32) + return _aligned_malloc(size, alignment); +#else + return aligned_alloc(alignment, size); +#endif +} + +static inline void rf_aligned_free(void* ptr) +{ +#if defined(_WIN32) + _aligned_free(ptr); +#else + free(ptr); +#endif +} + /**@}*/ } // namespace rapidfuzz::detail @@ -5823,15 +5841,12 @@ jaro_similarity_simd_long_s2(Range scores, const detail::BlockPatternMa assert(static_cast(s2.size()) > sizeof(VecType) * 8); struct AlignedAlloc { - AlignedAlloc(size_t size) - { - // work around compilation failure in msvc - memory = operator new[](size, std::align_val_t(native_simd::alignment)); - } + AlignedAlloc(size_t size) : memory(rf_aligned_alloc(native_simd::alignment, size)) + {} ~AlignedAlloc() { - ::operator delete[](memory, std::align_val_t(native_simd::alignment)); + rf_aligned_free(memory); } void* memory = nullptr; @@ -6199,15 +6214,14 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, /* align for avx2 so we can directly load into avx2 registers */ str_lens_size = result_count(); - // work around compilation failure in msvc - str_lens = static_cast(operator new[](sizeof(VecType) * str_lens_size, - std::align_val_t(get_vec_alignment()))); + str_lens = static_cast( + detail::rf_aligned_alloc(get_vec_alignment(), sizeof(VecType) * str_lens_size)); std::fill(str_lens, str_lens + str_lens_size, VecType(0)); } ~MultiJaro() { - ::operator delete[](str_lens, std::align_val_t(get_vec_alignment())); + detail::rf_aligned_free(str_lens); } /** diff --git a/rapidfuzz/details/common.hpp b/rapidfuzz/details/common.hpp index 534221ad..0e0ad6bc 100644 --- a/rapidfuzz/details/common.hpp +++ b/rapidfuzz/details/common.hpp @@ -75,6 +75,25 @@ size_t remove_common_suffix(Range& s1, Range& s2); template > SplittedSentenceView sorted_split(InputIt first, InputIt last); +static inline void* rf_aligned_alloc(size_t alignment, size_t size) +{ +#if defined(_WIN32) + return _aligned_malloc(size, alignment); +#else + return aligned_alloc(alignment, size); +#endif +} + +static inline void rf_aligned_free(void* ptr) +{ +#if defined(_WIN32) + _aligned_free(ptr); +#else + free(ptr); +#endif +} + + /**@}*/ } // namespace rapidfuzz::detail diff --git a/rapidfuzz/distance/Jaro.hpp b/rapidfuzz/distance/Jaro.hpp index 785d42fa..706a0202 100644 --- a/rapidfuzz/distance/Jaro.hpp +++ b/rapidfuzz/distance/Jaro.hpp @@ -118,16 +118,15 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, /* align for avx2 so we can directly load into avx2 registers */ str_lens_size = result_count(); - // work around compilation failure in msvc str_lens = static_cast( - operator new[](sizeof(VecType) * str_lens_size, std::align_val_t(get_vec_alignment())) + detail::rf_aligned_alloc(get_vec_alignment(), sizeof(VecType) * str_lens_size) ); std::fill(str_lens, str_lens + str_lens_size, VecType(0)); } ~MultiJaro() { - ::operator delete[] (str_lens, std::align_val_t(get_vec_alignment())); + detail::rf_aligned_free(str_lens); } /** diff --git a/rapidfuzz/distance/Jaro_impl.hpp b/rapidfuzz/distance/Jaro_impl.hpp index 2071c897..a26791ec 100644 --- a/rapidfuzz/distance/Jaro_impl.hpp +++ b/rapidfuzz/distance/Jaro_impl.hpp @@ -100,8 +100,9 @@ static inline size_t count_common_chars(const FlaggedCharsMultiword& flagged) } template -static inline FlaggedCharsWord flag_similar_characters_word(const PM_Vec& PM, [[maybe_unused]] Range P, - Range T, int Bound) +static inline FlaggedCharsWord flag_similar_characters_word(const PM_Vec& PM, + [[maybe_unused]] Range P, + Range T, int Bound) { assert(P.size() <= 64); assert(T.size() <= 64); @@ -113,7 +114,7 @@ static inline FlaggedCharsWord flag_similar_characters_word(const PM_Vec& PM, [[ int64_t j = 0; auto T_iter = T.begin(); - for (; j < std::min(static_cast(Bound), static_cast(T.size())); ++j,++T_iter) { + for (; j < std::min(static_cast(Bound), static_cast(T.size())); ++j, ++T_iter) { uint64_t PM_j = PM.get(0, *T_iter) & BoundMask & (~flagged.P_flag); flagged.P_flag |= blsi(PM_j); @@ -122,7 +123,7 @@ static inline FlaggedCharsWord flag_similar_characters_word(const PM_Vec& PM, [[ BoundMask = (BoundMask << 1) | 1; } - for (; j < T.size(); ++j,++T_iter) { + for (; j < T.size(); ++j, ++T_iter) { uint64_t PM_j = PM.get(0, *T_iter) & BoundMask & (~flagged.P_flag); flagged.P_flag |= blsi(PM_j); @@ -136,7 +137,8 @@ static inline FlaggedCharsWord flag_similar_characters_word(const PM_Vec& PM, [[ template static inline void flag_similar_characters_step(const BlockPatternMatchVector& PM, CharT T_j, - FlaggedCharsMultiword& flagged, size_t j, SearchBoundMask BoundMask) + FlaggedCharsMultiword& flagged, size_t j, + SearchBoundMask BoundMask) { size_t j_word = j / 64; size_t j_pos = j % 64; @@ -233,7 +235,7 @@ static inline FlaggedCharsMultiword flag_similar_characters_block(const BlockPat BoundMask.first_mask = ~UINT64_C(0); auto T_iter = T.begin(); - for (int64_t j = 0; j < T.size(); ++j,++T_iter) { + for (int64_t j = 0; j < T.size(); ++j, ++T_iter) { flag_similar_characters_step(PM, *T_iter, flagged, static_cast(j), BoundMask); if (j + Bound + 1 < P.size()) { @@ -326,8 +328,7 @@ static inline int64_t jaro_bounds(int64_t P_len, int64_t T_len) */ int64_t Bound = (T_len > P_len) ? T_len : P_len; Bound /= 2; - if(Bound > 0) - Bound--; + if (Bound > 0) Bound--; return Bound; } @@ -410,7 +411,7 @@ static inline double jaro_similarity(Range P, Range T, doubl template static inline double jaro_similarity(const BlockPatternMatchVector& PM, Range P, Range T, - double score_cutoff) + double score_cutoff) { int64_t P_len = P.size(); int64_t T_len = T.size(); @@ -458,8 +459,7 @@ static inline double jaro_similarity(const BlockPatternMatchVector& PM, Range -struct JaroSimilaritySimdBounds -{ +struct JaroSimilaritySimdBounds { ptrdiff_t maxBound = 0; VecType boundMaskSize; VecType boundMask; @@ -476,18 +476,17 @@ static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_leng [[maybe_unused]] static constexpr size_t alignment = native_simd::alignment; static constexpr size_t vec_width = native_simd::size; - assert(static_cast(s2.size()) <= sizeof(VecType)*8); + assert(static_cast(s2.size()) <= sizeof(VecType) * 8); JaroSimilaritySimdBounds> bounds; VecType maxLen = 0; // todo permutate + max to find maxLen // side-note: we know only the first 8 bit are actually used - for(size_t i = 0; i < vec_width; ++i) - if(s1_lengths[i] > maxLen) - maxLen = s1_lengths[i]; + for (size_t i = 0; i < vec_width; ++i) + if (s1_lengths[i] > maxLen) maxLen = s1_lengths[i]; -#ifdef RAPIDFUZZ_AVX2 +# ifdef RAPIDFUZZ_AVX2 native_simd zero(VecType(0)); native_simd one(1); @@ -500,23 +499,22 @@ static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_leng // todo there could be faster options since comparisions can be relatively expensive for some vector sizes boundSizes -= (boundSizes > zero) & one; - // this can never overflow even when using larger vectors for shifting here, since in the worst case of 8bit vectors - // this shifts by (8/2-1)*2=6 bits - // todo << 1 performs unneeded masking here - // sllv is pretty expensive for 8 / 16 bit since it has to be emulated maybe there is a better solution + // this can never overflow even when using larger vectors for shifting here, since in the worst case of + // 8bit vectors this shifts by (8/2-1)*2=6 bits todo << 1 performs unneeded masking here sllv is pretty + // expensive for 8 / 16 bit since it has to be emulated maybe there is a better solution bounds.boundMaskSize = sllv(one, boundSizes << 1) - one; bounds.boundMask = sllv(one, boundSizes + one) - one; - bounds.maxBound = (s2.size() > static_cast(maxLen)) ? s2.size() : static_cast(maxLen); + bounds.maxBound = + (s2.size() > static_cast(maxLen)) ? s2.size() : static_cast(maxLen); bounds.maxBound /= 2; - if(bounds.maxBound > 0) - bounds.maxBound--; -#else + if (bounds.maxBound > 0) bounds.maxBound--; +# else alignas(alignment) std::array boundMaskSize_; alignas(alignment) std::array boundMask_; // todo try to find a simd implementation for sse2 - for(size_t i = 0; i < vec_width; ++i) { + for (size_t i = 0; i < vec_width; ++i) { int64_t s1_len = static_cast(s1_lengths[i]); int64_t Bound = jaro_bounds(s1_len, s2.size()); @@ -528,7 +526,7 @@ static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_leng bounds.boundMaskSize = native_simd(reinterpret_cast(boundMaskSize_.data())); bounds.boundMask = native_simd(reinterpret_cast(boundMask_.data())); -#endif +# endif int64_t lastRelevantChar = static_cast(maxLen) + bounds.maxBound; if (s2.size() > lastRelevantChar) s2.remove_suffix(s2.size() - lastRelevantChar); @@ -546,16 +544,15 @@ static inline auto jaro_similarity_prepare_bound_long_s2(const VecType* s1_lengt # endif static constexpr size_t vec_width = native_simd::size; - assert(static_cast(s2.size()) > sizeof(VecType)*8); + assert(static_cast(s2.size()) > sizeof(VecType) * 8); JaroSimilaritySimdBounds> bounds; VecType maxLen = 0; // todo permutate + max to find maxLen // side-note: we know only the first 8 bit are actually used - for(size_t i = 0; i < vec_width; ++i) - if(s1_lengths[i] > maxLen) - maxLen = s1_lengths[i]; + for (size_t i = 0; i < vec_width; ++i) + if (s1_lengths[i] > maxLen) maxLen = s1_lengths[i]; bounds.maxBound = s2.size() / 2 - 1; bounds.boundMaskSize = native_simd(bit_mask_lsb(static_cast(2 * bounds.maxBound))); @@ -568,10 +565,9 @@ static inline auto jaro_similarity_prepare_bound_long_s2(const VecType* s1_lengt } template -static inline void jaro_similarity_simd_long_s2(Range scores, const detail::BlockPatternMatchVector& block, - VecType* s1_lengths, - Range s2, - double score_cutoff) noexcept +static inline void +jaro_similarity_simd_long_s2(Range scores, const detail::BlockPatternMatchVector& block, + VecType* s1_lengths, Range s2, double score_cutoff) noexcept { # ifdef RAPIDFUZZ_AVX2 using namespace simd_avx2; @@ -583,19 +579,15 @@ static inline void jaro_similarity_simd_long_s2(Range scores, const det static constexpr size_t vec_width = native_simd::size; static constexpr size_t vecs = static_cast(native_simd::size); assert(block.size() % vecs == 0); - assert(static_cast(s2.size()) > sizeof(VecType)*8); + assert(static_cast(s2.size()) > sizeof(VecType) * 8); - struct AlignedAlloc - { - AlignedAlloc(size_t size) - { - // work around compilation failure in msvc - memory = operator new[](size, std::align_val_t(native_simd::alignment)); - } + struct AlignedAlloc { + AlignedAlloc(size_t size) : memory(rf_aligned_alloc(native_simd::alignment, size)) + {} ~AlignedAlloc() { - ::operator delete[] (memory, std::align_val_t(native_simd::alignment)); + rf_aligned_free(memory); } void* memory = nullptr; @@ -605,7 +597,7 @@ static inline void jaro_similarity_simd_long_s2(Range scores, const det native_simd one(1); size_t result_index = 0; - size_t s2_block_count = static_cast(detail::ceil_div(s2.size(), sizeof(VecType)*8)); + size_t s2_block_count = static_cast(detail::ceil_div(s2.size(), sizeof(VecType) * 8)); AlignedAlloc memory(2 * s2_block_count * sizeof(native_simd)); native_simd* T_flag = static_cast*>(memory.memory); @@ -620,8 +612,10 @@ static inline void jaro_similarity_simd_long_s2(Range scores, const det native_simd P_flag(VecType(0)); - std::fill(T_flag, T_flag + detail::ceil_div(s2_cur.size(), sizeof(VecType)*8), native_simd(VecType(0))); - std::fill(counter, counter + detail::ceil_div(s2_cur.size(), sizeof(VecType)*8), native_simd(VecType(1))); + std::fill(T_flag, T_flag + detail::ceil_div(s2_cur.size(), sizeof(VecType) * 8), + native_simd(VecType(0))); + std::fill(counter, counter + detail::ceil_div(s2_cur.size(), sizeof(VecType) * 8), + native_simd(VecType(1))); // In case s2 is longer than all of the elements in s1_lengths boundMaskSize // might have all bits set and therefor the condition ((boundMask <= boundMaskSize) & one) @@ -629,30 +623,28 @@ static inline void jaro_similarity_simd_long_s2(Range scores, const det // this is solved by splitting the loop into two parts where after this boundary is reached // the first bit inside boundMask is no longer set int64_t j = 0; - for(; j < std::min(bounds.maxBound, s2_cur.size()); ++j) - { + for (; j < std::min(bounds.maxBound, s2_cur.size()); ++j) { alignas(alignment) std::array stored; unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); P_flag |= blsi(PM_j); - size_t T_word_index = static_cast(j) / (sizeof(VecType)*8); + size_t T_word_index = static_cast(j) / (sizeof(VecType) * 8); T_flag[T_word_index] |= andnot(counter[T_word_index], (PM_j == zero)); counter[T_word_index] = counter[T_word_index] << 1; bounds.boundMask = (bounds.boundMask << 1) | ((bounds.boundMask <= bounds.boundMaskSize) & one); } - for(; j < s2_cur.size(); ++j) - { + for (; j < s2_cur.size(); ++j) { alignas(alignment) std::array stored; unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); P_flag |= blsi(PM_j); - size_t T_word_index = static_cast(j) / (sizeof(VecType)*8); + size_t T_word_index = static_cast(j) / (sizeof(VecType) * 8); T_flag[T_word_index] |= andnot(counter[T_word_index], (PM_j == zero)); counter[T_word_index] = counter[T_word_index] << 1; @@ -663,12 +655,14 @@ static inline void jaro_similarity_simd_long_s2(Range scores, const det alignas(alignment) std::array P_flags; P_flag.store(P_flags.data()); - for(size_t i = 0; i < static_cast(detail::ceil_div(s2_cur.size(), sizeof(VecType)*8)); ++i) + for (size_t i = 0; i < static_cast(detail::ceil_div(s2_cur.size(), sizeof(VecType) * 8)); ++i) T_flag[i].store(T_flags + i * vec_width); for (size_t i = 0; i < vec_width; ++i) { VecType CommonChars = counts[i]; - if (!jaro_common_char_filter(static_cast(s1_lengths[result_index]), s2.size(), CommonChars, score_cutoff)) { + if (!jaro_common_char_filter(static_cast(s1_lengths[result_index]), s2.size(), + CommonChars, score_cutoff)) + { scores[static_cast(result_index)] = 0.0; result_index++; continue; @@ -684,17 +678,17 @@ static inline void jaro_similarity_simd_long_s2(Range scores, const det { size_t T_word_index = 0; VecType T_flag_cur = T_flags[T_word_index * vec_width + i]; - while(P_flag_cur) - { - while(!T_flag_cur) - { + while (P_flag_cur) { + while (!T_flag_cur) { ++T_word_index; T_flag_cur = T_flags[T_word_index * vec_width + i]; } VecType PatternFlagMask = blsi(P_flag_cur); - uint64_t PM_j = block.get(cur_block, s2[countr_zero(T_flag_cur) + static_cast(T_word_index * sizeof(VecType) * 8)]); + uint64_t PM_j = + block.get(cur_block, s2[countr_zero(T_flag_cur) + + static_cast(T_word_index * sizeof(VecType) * 8)]); Transpositions += !(PM_j & (static_cast(PatternFlagMask) << offset)); T_flag_cur = blsr(T_flag_cur); @@ -702,8 +696,8 @@ static inline void jaro_similarity_simd_long_s2(Range scores, const det } } - double Sim = - jaro_calculate_similarity(static_cast(s1_lengths[result_index]), s2.size(), CommonChars, Transpositions); + double Sim = jaro_calculate_similarity(static_cast(s1_lengths[result_index]), s2.size(), + CommonChars, Transpositions); scores[static_cast(result_index)] = (Sim >= score_cutoff) ? Sim : 0; result_index++; @@ -712,10 +706,9 @@ static inline void jaro_similarity_simd_long_s2(Range scores, const det } template -static inline void jaro_similarity_simd_short_s2(Range scores, const detail::BlockPatternMatchVector& block, - VecType* s1_lengths, - Range s2, - double score_cutoff) noexcept +static inline void +jaro_similarity_simd_short_s2(Range scores, const detail::BlockPatternMatchVector& block, + VecType* s1_lengths, Range s2, double score_cutoff) noexcept { # ifdef RAPIDFUZZ_AVX2 using namespace simd_avx2; @@ -727,7 +720,7 @@ static inline void jaro_similarity_simd_short_s2(Range scores, const de static constexpr size_t vec_width = native_simd::size; static constexpr size_t vecs = static_cast(native_simd::size); assert(block.size() % vecs == 0); - assert(static_cast(s2.size()) <= sizeof(VecType)*8); + assert(static_cast(s2.size()) <= sizeof(VecType) * 8); native_simd zero(VecType(0)); native_simd one(1); @@ -747,8 +740,7 @@ static inline void jaro_similarity_simd_short_s2(Range scores, const de // this is solved by splitting the loop into two parts where after this boundary is reached // the first bit inside boundMask is no longer set int64_t j = 0; - for(; j < std::min(bounds.maxBound, s2_cur.size()); ++j) - { + for (; j < std::min(bounds.maxBound, s2_cur.size()); ++j) { alignas(alignment) std::array stored; unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); @@ -761,8 +753,7 @@ static inline void jaro_similarity_simd_short_s2(Range scores, const de bounds.boundMask = (bounds.boundMask << 1) | ((bounds.boundMask <= bounds.boundMaskSize) & one); } - for(; j < s2_cur.size(); ++j) - { + for (; j < s2_cur.size(); ++j) { alignas(alignment) std::array stored; unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); @@ -782,7 +773,9 @@ static inline void jaro_similarity_simd_short_s2(Range scores, const de T_flag.store(T_flags.data()); for (size_t i = 0; i < vec_width; ++i) { VecType CommonChars = counts[i]; - if (!jaro_common_char_filter(static_cast(s1_lengths[result_index]), s2.size(), CommonChars, score_cutoff)) { + if (!jaro_common_char_filter(static_cast(s1_lengths[result_index]), s2.size(), + CommonChars, score_cutoff)) + { scores[static_cast(result_index)] = 0.0; result_index++; continue; @@ -805,8 +798,8 @@ static inline void jaro_similarity_simd_short_s2(Range scores, const de P_flag_cur ^= PatternFlagMask; } - double Sim = - jaro_calculate_similarity(static_cast(s1_lengths[result_index]), s2.size(), CommonChars, Transpositions); + double Sim = jaro_calculate_similarity(static_cast(s1_lengths[result_index]), s2.size(), + CommonChars, Transpositions); scores[static_cast(result_index)] = (Sim >= score_cutoff) ? Sim : 0; result_index++; @@ -816,9 +809,8 @@ static inline void jaro_similarity_simd_short_s2(Range scores, const de template static inline void jaro_similarity_simd(Range scores, const detail::BlockPatternMatchVector& block, - VecType* s1_lengths, size_t s1_lengths_size, - Range s2, - double score_cutoff) noexcept + VecType* s1_lengths, size_t s1_lengths_size, Range s2, + double score_cutoff) noexcept { if (score_cutoff > 1.0) { for (int64_t i = 0; i < static_cast(s1_lengths_size); i++) @@ -834,7 +826,7 @@ static inline void jaro_similarity_simd(Range scores, const detail::Blo return; } - if (static_cast(s2.size()) > sizeof(VecType)*8) + if (static_cast(s2.size()) > sizeof(VecType) * 8) return jaro_similarity_simd_long_s2(scores, block, s1_lengths, s2, score_cutoff); else return jaro_similarity_simd_short_s2(scores, block, s1_lengths, s2, score_cutoff);