From d202dda35bb9a8d10a2ec0d2bbe395b60aac4946 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Sun, 6 Feb 2022 23:13:44 +0100 Subject: [PATCH 01/20] Add shl_c --- test/benchmarks/benchmarks.cpp | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/test/benchmarks/benchmarks.cpp b/test/benchmarks/benchmarks.cpp index 8b73eb38..0bda038e 100644 --- a/test/benchmarks/benchmarks.cpp +++ b/test/benchmarks/benchmarks.cpp @@ -246,6 +246,39 @@ template return 0; } + +inline auto shld(uint64_t x1, uint64_t x2, uint64_t c) +{ + return (x2 << c) | (x1 >> (64 - c)); +} + + +[[gnu::noinline]] static intx::uint256 shl_c( + const intx::uint256& x, const uint64_t& shift) noexcept +{ + uint512 extended; + __builtin_memcpy(&extended[4], &x, sizeof(x)); + + const auto sw = shift / 64; + const auto sb = shift % 64; + + if (sw >= 4) + return 0; + + uint256 r; + __builtin_memcpy(&r, &extended[sw], sizeof(r)); + + if (sb == 0) + return r; + + uint256 z; + z[0] = r[0] << sb; + z[1] = shld(r[0], r[1], sb); + z[2] = shld(r[1], r[2], sb); + z[3] = shld(r[2], r[3], sb); + return z; +} + [[gnu::noinline]] static intx::uint256 shl_halves( const intx::uint256& x, const uint256& big_shift) noexcept { @@ -340,6 +373,7 @@ BENCHMARK_TEMPLATE(shift, uint256, uint256, shl_public)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint256, shl_halves)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_public)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_halves)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_c)->DenseRange(-1, 3); #if INTX_HAS_EXTINT BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_llvm)->DenseRange(-1, 3); #endif From 5fe1d3cfed7859f02f3d52e742b69e0062b194e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Sun, 6 Feb 2022 23:20:37 +0100 Subject: [PATCH 02/20] optimize sw --- test/benchmarks/benchmarks.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/test/benchmarks/benchmarks.cpp b/test/benchmarks/benchmarks.cpp index 0bda038e..a7aa01d2 100644 --- a/test/benchmarks/benchmarks.cpp +++ b/test/benchmarks/benchmarks.cpp @@ -253,21 +253,17 @@ inline auto shld(uint64_t x1, uint64_t x2, uint64_t c) } -[[gnu::noinline]] static intx::uint256 shl_c( - const intx::uint256& x, const uint64_t& shift) noexcept +[[gnu::noinline]] static intx::uint256 shl_c(const intx::uint256& x, const uint64_t& shift) noexcept { uint512 extended; __builtin_memcpy(&extended[4], &x, sizeof(x)); - const auto sw = shift / 64; - const auto sb = shift % 64; - - if (sw >= 4) - return 0; + const auto sw = shift >= 256 ? 4 : shift / 64; uint256 r; __builtin_memcpy(&r, &extended[sw], sizeof(r)); + const auto sb = shift % 64; if (sb == 0) return r; From 6548dc546805b139883cfe58f946cb948d44b7b3 Mon Sep 17 00:00:00 2001 From: rodiazet Date: Thu, 17 Feb 2022 15:36:50 +0100 Subject: [PATCH 03/20] [WIP] experimental shl tests added. exp shl implemented for all types. --- test/benchmarks/benchmarks.cpp | 32 +---------- test/experimental/CMakeLists.txt | 2 +- test/experimental/shl.hpp | 60 +++++++++++++++++++++ test/unittests/test_bitwise.cpp | 91 ++++++++++++++++++++++++++++++++ 4 files changed, 154 insertions(+), 31 deletions(-) create mode 100644 test/experimental/shl.hpp diff --git a/test/benchmarks/benchmarks.cpp b/test/benchmarks/benchmarks.cpp index a7aa01d2..c73ac35a 100644 --- a/test/benchmarks/benchmarks.cpp +++ b/test/benchmarks/benchmarks.cpp @@ -3,6 +3,7 @@ // Licensed under the Apache License, Version 2.0. #include "../experimental/addmod.hpp" +#include "../experimental/shl.hpp" #include #include #include @@ -246,35 +247,6 @@ template return 0; } - -inline auto shld(uint64_t x1, uint64_t x2, uint64_t c) -{ - return (x2 << c) | (x1 >> (64 - c)); -} - - -[[gnu::noinline]] static intx::uint256 shl_c(const intx::uint256& x, const uint64_t& shift) noexcept -{ - uint512 extended; - __builtin_memcpy(&extended[4], &x, sizeof(x)); - - const auto sw = shift >= 256 ? 4 : shift / 64; - - uint256 r; - __builtin_memcpy(&r, &extended[sw], sizeof(r)); - - const auto sb = shift % 64; - if (sb == 0) - return r; - - uint256 z; - z[0] = r[0] << sb; - z[1] = shld(r[0], r[1], sb); - z[2] = shld(r[1], r[2], sb); - z[3] = shld(r[2], r[3], sb); - return z; -} - [[gnu::noinline]] static intx::uint256 shl_halves( const intx::uint256& x, const uint256& big_shift) noexcept { @@ -369,7 +341,7 @@ BENCHMARK_TEMPLATE(shift, uint256, uint256, shl_public)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint256, shl_halves)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_public)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_halves)->DenseRange(-1, 3); -BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_c)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_c)->DenseRange(-1, 3); #if INTX_HAS_EXTINT BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_llvm)->DenseRange(-1, 3); #endif diff --git a/test/experimental/CMakeLists.txt b/test/experimental/CMakeLists.txt index 483c2ca0..766b20f5 100644 --- a/test/experimental/CMakeLists.txt +++ b/test/experimental/CMakeLists.txt @@ -2,7 +2,7 @@ # Copyright 2019-2020 Pawel Bylica. # Licensed under the Apache License, Version 2.0. -add_library(experimental STATIC add.cpp add.hpp) +add_library(experimental STATIC add.cpp add.hpp shl.hpp) target_compile_definitions(experimental PRIVATE INTX_EXPERIMENTAL) target_include_directories(experimental PUBLIC ${PROJECT_SOURCE_DIR}/test) target_link_libraries(experimental PUBLIC intx::intx) diff --git a/test/experimental/shl.hpp b/test/experimental/shl.hpp new file mode 100644 index 00000000..8732c915 --- /dev/null +++ b/test/experimental/shl.hpp @@ -0,0 +1,60 @@ +// intx: extended precision integer library. +// Copyright 2019-2020 Pawel Bylica. +// Licensed under the Apache License, Version 2.0. +#pragma once + +#include + +namespace intx +{ +namespace experimental +{ +template +inline constexpr auto shld( + typename uint::word_type x1, typename uint::word_type x2, uint64_t c) +{ + return (x2 << c) | (x1 >> (uint::word_num_bits - c)); +} + +template +inline constexpr uint shl_c(const uint& x, const uint64_t& shift) noexcept +{ + uint<2 * N> extended; + for (unsigned i = 0; i < uint::num_words; ++i) + extended[i + uint::num_words] = x[i]; + + const auto sw = + shift >= uint::num_bits ? uint::num_words : shift / uint::word_num_bits; + + uint r; + for (unsigned i = 0; i < uint::num_words; ++i) + r[i] = extended[size_t(uint::num_words - sw + i)]; + + const auto sb = shift % uint::word_num_bits; + if (sb == 0) + return r; + + uint z; + + z[0] = r[0] << sb; + for (unsigned i = 1; i < uint::num_words; ++i) + z[i] = shld(r[i - 1], r[i], sb); + + return z; +} + +template +inline constexpr uint shl_c(const uint& x, const uint& shift) noexcept +{ + uint64_t high_words_fold = 0; + for (size_t i = 1; i < uint::num_words; ++i) + high_words_fold |= shift[i]; + + if (INTX_UNLIKELY(high_words_fold != 0)) + return 0; + + return shl_c(x, shift[0]); +} + +} // namespace experimental +} // namespace intx \ No newline at end of file diff --git a/test/unittests/test_bitwise.cpp b/test/unittests/test_bitwise.cpp index a6e23252..4a87124f 100644 --- a/test/unittests/test_bitwise.cpp +++ b/test/unittests/test_bitwise.cpp @@ -2,6 +2,7 @@ // Copyright 2019 Pawel Bylica. // Licensed under the Apache License, Version 2.0. +#include "test/experimental/shl.hpp" #include "test_suite.hpp" using namespace intx; @@ -192,6 +193,96 @@ TYPED_TEST(uint_test, shift_by_int) EXPECT_EQ(x << int{TypeParam::num_bits}, 0); } +TYPED_TEST(uint_test, shift_one_bit_exp) +{ + for (unsigned shift = 0; shift < sizeof(TypeParam) * 8; ++shift) + { + SCOPED_TRACE(shift); + constexpr auto x = TypeParam{1}; + const auto a = experimental::shl_c(x, shift); + EXPECT_EQ(x, a >> shift); + } +} + +TYPED_TEST(uint_test, shift_left_overflow_exp) +{ + const auto x = ~TypeParam{}; + + for (unsigned n = 0; n <= sizeof(TypeParam) * 7; ++n) + { + const auto sh = x >> n; + EXPECT_EQ(experimental::shl_c(x, sh), 0) << "n=" << n; + } + + for (unsigned n = 0; n <= sizeof(TypeParam) * 7; ++n) + { + const auto sh = experimental::shl_c(TypeParam{sizeof(TypeParam) * 8}, n); + EXPECT_EQ(experimental::shl_c(x, sh), 0) << "n=" << n; + } +} + +TYPED_TEST(uint_test, shift_right_overflow_exp) +{ + const auto x = ~TypeParam{}; + + for (unsigned n = 0; n <= sizeof(TypeParam) * 7; ++n) + { + const auto sh = x >> n; + EXPECT_EQ(x >> sh, 0) << "n=" << n; + } + + for (unsigned n = 0; n <= sizeof(TypeParam) * 7; ++n) + { + const auto sh = experimental::shl_c(TypeParam{sizeof(TypeParam) * 8}, n); + EXPECT_EQ(x >> sh, 0) << "n=" << n; + } +} + +TYPED_TEST(uint_test, shift_left_overflow_uint64_exp) +{ + const auto x = ~TypeParam{}; + + for (unsigned n = 0; n <= 100; ++n) + { + const uint64_t sh = sizeof(TypeParam) * 8 + n; + EXPECT_EQ(experimental::shl_c(x, sh), 0) << "n=" << n; + } +} + +TYPED_TEST(uint_test, shift_right_overflow_uint64_exp) +{ + const auto x = ~TypeParam{}; + + for (unsigned n = 0; n <= 100; ++n) + { + const uint64_t sh = sizeof(TypeParam) * 8 + n; + EXPECT_EQ(x >> sh, 0) << "n=" << n; + } +} + +TYPED_TEST(uint_test, shift_overflow_exp) +{ + const uint64_t sh = sizeof(TypeParam) * 8; + const auto value = ~TypeParam{}; + EXPECT_EQ(value >> sh, 0); + EXPECT_EQ(value >> TypeParam{sh}, 0); + EXPECT_EQ(experimental::shl_c(value, sh), 0); + EXPECT_EQ(experimental::shl_c(value, TypeParam{sh}), 0); +} + +TYPED_TEST(uint_test, shift_by_int_exp) +{ + const auto x = experimental::shl_c(TypeParam{1}, (sizeof(TypeParam) * 8 - 1)) | TypeParam{1}; + EXPECT_EQ(x >> 0, x); + EXPECT_EQ(experimental::shl_c(x, 0), x); + EXPECT_EQ(x >> 1, experimental::shl_c(TypeParam{1}, uint64_t{TypeParam::num_bits - 2})); + EXPECT_EQ(experimental::shl_c(x, 1), TypeParam{2}); + EXPECT_EQ(x >> int{TypeParam::num_bits - 1}, TypeParam{1}); + EXPECT_EQ(experimental::shl_c(x, int{TypeParam::num_bits - 1}), + experimental::shl_c(TypeParam{1}, uint64_t{TypeParam::num_bits - 1})); + EXPECT_EQ(x >> int{TypeParam::num_bits}, 0); +} + TYPED_TEST(uint_test, not_of_zero) { auto ones = ~TypeParam{}; From 75a6ed3a6a0bba8f39f145f2b58f92119d97e886 Mon Sep 17 00:00:00 2001 From: rodiazet Date: Thu, 17 Feb 2022 16:43:27 +0100 Subject: [PATCH 04/20] [WIP] experimental shr test added. exp shr implemented for all types. --- test/experimental/CMakeLists.txt | 2 +- test/experimental/shr.hpp | 61 ++++++++++++++++++++++++++++++++ test/unittests/test_bitwise.cpp | 26 +++++++------- 3 files changed, 76 insertions(+), 13 deletions(-) create mode 100644 test/experimental/shr.hpp diff --git a/test/experimental/CMakeLists.txt b/test/experimental/CMakeLists.txt index 766b20f5..e8aa7153 100644 --- a/test/experimental/CMakeLists.txt +++ b/test/experimental/CMakeLists.txt @@ -2,7 +2,7 @@ # Copyright 2019-2020 Pawel Bylica. # Licensed under the Apache License, Version 2.0. -add_library(experimental STATIC add.cpp add.hpp shl.hpp) +add_library(experimental STATIC add.cpp add.hpp shl.hpp shr.hpp) target_compile_definitions(experimental PRIVATE INTX_EXPERIMENTAL) target_include_directories(experimental PUBLIC ${PROJECT_SOURCE_DIR}/test) target_link_libraries(experimental PUBLIC intx::intx) diff --git a/test/experimental/shr.hpp b/test/experimental/shr.hpp new file mode 100644 index 00000000..1805c7be --- /dev/null +++ b/test/experimental/shr.hpp @@ -0,0 +1,61 @@ +// intx: extended precision integer library. +// Copyright 2019-2020 Pawel Bylica. +// Licensed under the Apache License, Version 2.0. +#pragma once + +#include + +namespace intx +{ +namespace experimental +{ +template +inline auto shrd(typename uint::word_type x1, typename uint::word_type x2, uint64_t c) +{ + return (x2 >> c) | (x1 << (uint::word_num_bits - c)); +} + +template +uint shr_c(const uint& x, const uint64_t& shift) noexcept +{ + uint<2 * N> extended; + for (unsigned i = 0; i < uint::num_words; ++i) + extended[i] = x[i]; + + const auto sw = + shift >= uint::num_bits ? uint::num_words : shift / uint::word_num_bits; + + uint r; + for (unsigned i = 0; i < uint::num_words; ++i) + r[i] = extended[size_t(sw + i)]; + + const auto sb = shift % uint::word_num_bits; + if (sb == 0) + return r; + + constexpr auto nw = uint::num_words; + + uint z; + z[nw - 1] = r[nw - 1] >> sb; + + for (unsigned i = 0; i < nw - 1; ++i) + z[nw - i - 2] = shrd(r[nw - i - 1], r[nw - i - 2], sb); + + return z; +} + +template +inline constexpr uint shr_c(const uint& x, const uint& shift) noexcept +{ + uint64_t high_words_fold = 0; + for (size_t i = 1; i < uint::num_words; ++i) + high_words_fold |= shift[i]; + + if (INTX_UNLIKELY(high_words_fold != 0)) + return 0; + + return shr_c(x, shift[0]); +} + +} // namespace experimental +} // namespace intx \ No newline at end of file diff --git a/test/unittests/test_bitwise.cpp b/test/unittests/test_bitwise.cpp index 4a87124f..2b726851 100644 --- a/test/unittests/test_bitwise.cpp +++ b/test/unittests/test_bitwise.cpp @@ -3,6 +3,7 @@ // Licensed under the Apache License, Version 2.0. #include "test/experimental/shl.hpp" +#include "test/experimental/shr.hpp" #include "test_suite.hpp" using namespace intx; @@ -200,7 +201,7 @@ TYPED_TEST(uint_test, shift_one_bit_exp) SCOPED_TRACE(shift); constexpr auto x = TypeParam{1}; const auto a = experimental::shl_c(x, shift); - EXPECT_EQ(x, a >> shift); + EXPECT_EQ(x, experimental::shr_c(a, shift)); } } @@ -210,7 +211,7 @@ TYPED_TEST(uint_test, shift_left_overflow_exp) for (unsigned n = 0; n <= sizeof(TypeParam) * 7; ++n) { - const auto sh = x >> n; + const auto sh = experimental::shr_c(x, n); EXPECT_EQ(experimental::shl_c(x, sh), 0) << "n=" << n; } @@ -227,14 +228,14 @@ TYPED_TEST(uint_test, shift_right_overflow_exp) for (unsigned n = 0; n <= sizeof(TypeParam) * 7; ++n) { - const auto sh = x >> n; - EXPECT_EQ(x >> sh, 0) << "n=" << n; + const auto sh = experimental::shr_c(x, n); + EXPECT_EQ(experimental::shr_c(x, sh), 0) << "n=" << n; } for (unsigned n = 0; n <= sizeof(TypeParam) * 7; ++n) { const auto sh = experimental::shl_c(TypeParam{sizeof(TypeParam) * 8}, n); - EXPECT_EQ(x >> sh, 0) << "n=" << n; + EXPECT_EQ(experimental::shr_c(x, sh), 0) << "n=" << n; } } @@ -256,7 +257,7 @@ TYPED_TEST(uint_test, shift_right_overflow_uint64_exp) for (unsigned n = 0; n <= 100; ++n) { const uint64_t sh = sizeof(TypeParam) * 8 + n; - EXPECT_EQ(x >> sh, 0) << "n=" << n; + EXPECT_EQ(experimental::shr_c(x, sh), 0) << "n=" << n; } } @@ -264,8 +265,8 @@ TYPED_TEST(uint_test, shift_overflow_exp) { const uint64_t sh = sizeof(TypeParam) * 8; const auto value = ~TypeParam{}; - EXPECT_EQ(value >> sh, 0); - EXPECT_EQ(value >> TypeParam{sh}, 0); + EXPECT_EQ(experimental::shr_c(value, sh), 0); + EXPECT_EQ(experimental::shr_c(value, TypeParam{sh}), 0); EXPECT_EQ(experimental::shl_c(value, sh), 0); EXPECT_EQ(experimental::shl_c(value, TypeParam{sh}), 0); } @@ -273,14 +274,15 @@ TYPED_TEST(uint_test, shift_overflow_exp) TYPED_TEST(uint_test, shift_by_int_exp) { const auto x = experimental::shl_c(TypeParam{1}, (sizeof(TypeParam) * 8 - 1)) | TypeParam{1}; - EXPECT_EQ(x >> 0, x); + EXPECT_EQ(experimental::shr_c(x, 0), x); EXPECT_EQ(experimental::shl_c(x, 0), x); - EXPECT_EQ(x >> 1, experimental::shl_c(TypeParam{1}, uint64_t{TypeParam::num_bits - 2})); + EXPECT_EQ(experimental::shr_c(x, 1), + experimental::shl_c(TypeParam{1}, uint64_t{TypeParam::num_bits - 2})); EXPECT_EQ(experimental::shl_c(x, 1), TypeParam{2}); - EXPECT_EQ(x >> int{TypeParam::num_bits - 1}, TypeParam{1}); + EXPECT_EQ(experimental::shr_c(x, int{TypeParam::num_bits - 1}), TypeParam{1}); EXPECT_EQ(experimental::shl_c(x, int{TypeParam::num_bits - 1}), experimental::shl_c(TypeParam{1}, uint64_t{TypeParam::num_bits - 1})); - EXPECT_EQ(x >> int{TypeParam::num_bits}, 0); + EXPECT_EQ(experimental::shr_c(x, int{TypeParam::num_bits}), 0); } TYPED_TEST(uint_test, not_of_zero) From 8de0d871fdc03b4825a94635da37636d6cf9d862 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Thu, 17 Feb 2022 18:52:42 +0100 Subject: [PATCH 05/20] Merge into shift.hpp --- test/benchmarks/benchmarks.cpp | 2 +- test/experimental/CMakeLists.txt | 4 +- test/experimental/{shl.hpp => shift.hpp} | 58 +++++++++++++++++++--- test/experimental/shr.hpp | 61 ------------------------ test/unittests/test_bitwise.cpp | 3 +- 5 files changed, 56 insertions(+), 72 deletions(-) rename test/experimental/{shl.hpp => shift.hpp} (50%) delete mode 100644 test/experimental/shr.hpp diff --git a/test/benchmarks/benchmarks.cpp b/test/benchmarks/benchmarks.cpp index c73ac35a..c44a745b 100644 --- a/test/benchmarks/benchmarks.cpp +++ b/test/benchmarks/benchmarks.cpp @@ -3,7 +3,7 @@ // Licensed under the Apache License, Version 2.0. #include "../experimental/addmod.hpp" -#include "../experimental/shl.hpp" +#include "../experimental/shift.hpp" #include #include #include diff --git a/test/experimental/CMakeLists.txt b/test/experimental/CMakeLists.txt index e8aa7153..e6ebe549 100644 --- a/test/experimental/CMakeLists.txt +++ b/test/experimental/CMakeLists.txt @@ -1,8 +1,8 @@ # intx: extended precision integer library. -# Copyright 2019-2020 Pawel Bylica. +# Copyright 2019 Pawel Bylica. # Licensed under the Apache License, Version 2.0. -add_library(experimental STATIC add.cpp add.hpp shl.hpp shr.hpp) +add_library(experimental STATIC add.cpp add.hpp shift.hpp) target_compile_definitions(experimental PRIVATE INTX_EXPERIMENTAL) target_include_directories(experimental PUBLIC ${PROJECT_SOURCE_DIR}/test) target_link_libraries(experimental PUBLIC intx::intx) diff --git a/test/experimental/shl.hpp b/test/experimental/shift.hpp similarity index 50% rename from test/experimental/shl.hpp rename to test/experimental/shift.hpp index 8732c915..4bbe096d 100644 --- a/test/experimental/shl.hpp +++ b/test/experimental/shift.hpp @@ -1,13 +1,11 @@ // intx: extended precision integer library. -// Copyright 2019-2020 Pawel Bylica. +// Copyright 2022 Pawel Bylica. // Licensed under the Apache License, Version 2.0. #pragma once #include -namespace intx -{ -namespace experimental +namespace intx::experimental { template inline constexpr auto shld( @@ -56,5 +54,53 @@ inline constexpr uint shl_c(const uint& x, const uint& shift) noexcept return shl_c(x, shift[0]); } -} // namespace experimental -} // namespace intx \ No newline at end of file + +template +inline auto shrd(typename uint::word_type x1, typename uint::word_type x2, uint64_t c) +{ + return (x2 >> c) | (x1 << (uint::word_num_bits - c)); +} + +template +uint shr_c(const uint& x, const uint64_t& shift) noexcept +{ + uint<2 * N> extended; + for (unsigned i = 0; i < uint::num_words; ++i) + extended[i] = x[i]; + + const auto sw = + shift >= uint::num_bits ? uint::num_words : shift / uint::word_num_bits; + + uint r; + for (unsigned i = 0; i < uint::num_words; ++i) + r[i] = extended[size_t(sw + i)]; + + const auto sb = shift % uint::word_num_bits; + if (sb == 0) + return r; + + constexpr auto nw = uint::num_words; + + uint z; + z[nw - 1] = r[nw - 1] >> sb; + + for (unsigned i = 0; i < nw - 1; ++i) + z[nw - i - 2] = shrd(r[nw - i - 1], r[nw - i - 2], sb); + + return z; +} + +template +inline constexpr uint shr_c(const uint& x, const uint& shift) noexcept +{ + uint64_t high_words_fold = 0; + for (size_t i = 1; i < uint::num_words; ++i) + high_words_fold |= shift[i]; + + if (INTX_UNLIKELY(high_words_fold != 0)) + return 0; + + return shr_c(x, shift[0]); +} + +} // namespace intx::experimental diff --git a/test/experimental/shr.hpp b/test/experimental/shr.hpp deleted file mode 100644 index 1805c7be..00000000 --- a/test/experimental/shr.hpp +++ /dev/null @@ -1,61 +0,0 @@ -// intx: extended precision integer library. -// Copyright 2019-2020 Pawel Bylica. -// Licensed under the Apache License, Version 2.0. -#pragma once - -#include - -namespace intx -{ -namespace experimental -{ -template -inline auto shrd(typename uint::word_type x1, typename uint::word_type x2, uint64_t c) -{ - return (x2 >> c) | (x1 << (uint::word_num_bits - c)); -} - -template -uint shr_c(const uint& x, const uint64_t& shift) noexcept -{ - uint<2 * N> extended; - for (unsigned i = 0; i < uint::num_words; ++i) - extended[i] = x[i]; - - const auto sw = - shift >= uint::num_bits ? uint::num_words : shift / uint::word_num_bits; - - uint r; - for (unsigned i = 0; i < uint::num_words; ++i) - r[i] = extended[size_t(sw + i)]; - - const auto sb = shift % uint::word_num_bits; - if (sb == 0) - return r; - - constexpr auto nw = uint::num_words; - - uint z; - z[nw - 1] = r[nw - 1] >> sb; - - for (unsigned i = 0; i < nw - 1; ++i) - z[nw - i - 2] = shrd(r[nw - i - 1], r[nw - i - 2], sb); - - return z; -} - -template -inline constexpr uint shr_c(const uint& x, const uint& shift) noexcept -{ - uint64_t high_words_fold = 0; - for (size_t i = 1; i < uint::num_words; ++i) - high_words_fold |= shift[i]; - - if (INTX_UNLIKELY(high_words_fold != 0)) - return 0; - - return shr_c(x, shift[0]); -} - -} // namespace experimental -} // namespace intx \ No newline at end of file diff --git a/test/unittests/test_bitwise.cpp b/test/unittests/test_bitwise.cpp index 2b726851..6cd507cc 100644 --- a/test/unittests/test_bitwise.cpp +++ b/test/unittests/test_bitwise.cpp @@ -2,8 +2,7 @@ // Copyright 2019 Pawel Bylica. // Licensed under the Apache License, Version 2.0. -#include "test/experimental/shl.hpp" -#include "test/experimental/shr.hpp" +#include "test/experimental/shift.hpp" #include "test_suite.hpp" using namespace intx; From 1596e745ae9f7f839a6c6978e306830cc3ede3ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Thu, 17 Feb 2022 19:01:56 +0100 Subject: [PATCH 06/20] De-template --- test/experimental/shift.hpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/test/experimental/shift.hpp b/test/experimental/shift.hpp index 4bbe096d..54562748 100644 --- a/test/experimental/shift.hpp +++ b/test/experimental/shift.hpp @@ -7,11 +7,9 @@ namespace intx::experimental { -template -inline constexpr auto shld( - typename uint::word_type x1, typename uint::word_type x2, uint64_t c) +inline constexpr uint64_t shld(uint64_t x1, uint64_t x2, uint64_t c) { - return (x2 << c) | (x1 >> (uint::word_num_bits - c)); + return (x2 << c) | (x1 >> (64 - c)); } template @@ -36,7 +34,7 @@ inline constexpr uint shl_c(const uint& x, const uint64_t& shift) noexcept z[0] = r[0] << sb; for (unsigned i = 1; i < uint::num_words; ++i) - z[i] = shld(r[i - 1], r[i], sb); + z[i] = shld(r[i - 1], r[i], sb); return z; } @@ -55,10 +53,9 @@ inline constexpr uint shl_c(const uint& x, const uint& shift) noexcept } -template -inline auto shrd(typename uint::word_type x1, typename uint::word_type x2, uint64_t c) +inline uint64_t shrd(uint64_t x1, uint64_t x2, uint64_t c) { - return (x2 >> c) | (x1 << (uint::word_num_bits - c)); + return (x2 >> c) | (x1 << (64 - c)); } template @@ -85,7 +82,7 @@ uint shr_c(const uint& x, const uint64_t& shift) noexcept z[nw - 1] = r[nw - 1] >> sb; for (unsigned i = 0; i < nw - 1; ++i) - z[nw - i - 2] = shrd(r[nw - i - 1], r[nw - i - 2], sb); + z[nw - i - 2] = shrd(r[nw - i - 1], r[nw - i - 2], sb); return z; } From 494b42a066278074a2e70a84d67eb1fb4feb9acd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Thu, 17 Feb 2022 18:57:37 +0100 Subject: [PATCH 07/20] Extend benchmarks --- test/benchmarks/benchmarks.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/test/benchmarks/benchmarks.cpp b/test/benchmarks/benchmarks.cpp index c44a745b..af172b52 100644 --- a/test/benchmarks/benchmarks.cpp +++ b/test/benchmarks/benchmarks.cpp @@ -212,6 +212,20 @@ template return x << y; } +template +[[gnu::noinline]] static intx::uint shl_generic( + const intx::uint& x, const uint64_t& y) noexcept +{ + return intx::operator<<(x, y); +} + +template +[[gnu::noinline]] static intx::uint shl_generic( + const intx::uint& x, const intx::uint& y) noexcept +{ + return intx::operator<<(x, y); +} + [[gnu::noinline]] static intx::uint256 shl_halves( const intx::uint256& x, const uint64_t& shift) noexcept { @@ -338,8 +352,10 @@ static void shift(benchmark::State& state) } } BENCHMARK_TEMPLATE(shift, uint256, uint256, shl_public)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, uint256, shl_generic)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint256, shl_halves)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_public)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_generic)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_halves)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_c)->DenseRange(-1, 3); #if INTX_HAS_EXTINT From 10425855088ef1d8bcd14e4743149c189616da15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Thu, 17 Feb 2022 19:24:15 +0100 Subject: [PATCH 08/20] Add shl_e --- test/benchmarks/benchmarks.cpp | 1 + test/experimental/shift.hpp | 36 +++++++++++++++++++++++++++++++++ test/unittests/test_bitwise.cpp | 14 +++++++++++++ 3 files changed, 51 insertions(+) diff --git a/test/benchmarks/benchmarks.cpp b/test/benchmarks/benchmarks.cpp index af172b52..8c41b38e 100644 --- a/test/benchmarks/benchmarks.cpp +++ b/test/benchmarks/benchmarks.cpp @@ -358,6 +358,7 @@ BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_public)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_generic)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_halves)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_c)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_e)->DenseRange(-1, 3); #if INTX_HAS_EXTINT BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_llvm)->DenseRange(-1, 3); #endif diff --git a/test/experimental/shift.hpp b/test/experimental/shift.hpp index 54562748..0adfd531 100644 --- a/test/experimental/shift.hpp +++ b/test/experimental/shift.hpp @@ -52,6 +52,42 @@ inline constexpr uint shl_c(const uint& x, const uint& shift) noexcept return shl_c(x, shift[0]); } +template +inline constexpr uint shl_e(const uint& x, const uint64_t& shift) noexcept +{ + uint r; + + const auto w = shift / 64; + + size_t j = 0; + for (size_t i = w; i < uint::num_words; ++i, ++j) + r[i] = x[j]; + + const auto sb = shift % uint::word_num_bits; + if (sb == 0) + return r; + + uint z; + z[0] = r[0] << sb; + for (unsigned i = 1; i < uint::num_words; ++i) + z[i] = shld(r[i - 1], r[i], sb); + + return z; +} + +template +inline constexpr uint shl_e(const uint& x, const uint& shift) noexcept +{ + uint64_t high_words_fold = 0; + for (size_t i = 1; i < uint::num_words; ++i) + high_words_fold |= shift[i]; + + if (INTX_UNLIKELY(high_words_fold != 0)) + return 0; + + return shl_e(x, shift[0]); +} + inline uint64_t shrd(uint64_t x1, uint64_t x2, uint64_t c) { diff --git a/test/unittests/test_bitwise.cpp b/test/unittests/test_bitwise.cpp index 6cd507cc..8c4da34f 100644 --- a/test/unittests/test_bitwise.cpp +++ b/test/unittests/test_bitwise.cpp @@ -201,6 +201,9 @@ TYPED_TEST(uint_test, shift_one_bit_exp) constexpr auto x = TypeParam{1}; const auto a = experimental::shl_c(x, shift); EXPECT_EQ(x, experimental::shr_c(a, shift)); + + const auto b = experimental::shl_e(x, shift); + EXPECT_EQ(x, experimental::shr_c(b, shift)); } } @@ -212,12 +215,16 @@ TYPED_TEST(uint_test, shift_left_overflow_exp) { const auto sh = experimental::shr_c(x, n); EXPECT_EQ(experimental::shl_c(x, sh), 0) << "n=" << n; + EXPECT_EQ(experimental::shl_e(x, sh), 0) << "n=" << n; } for (unsigned n = 0; n <= sizeof(TypeParam) * 7; ++n) { const auto sh = experimental::shl_c(TypeParam{sizeof(TypeParam) * 8}, n); + const auto sh2 = experimental::shl_e(TypeParam{sizeof(TypeParam) * 8}, n); + EXPECT_EQ(sh, sh2); EXPECT_EQ(experimental::shl_c(x, sh), 0) << "n=" << n; + EXPECT_EQ(experimental::shl_e(x, sh), 0) << "n=" << n; } } @@ -234,6 +241,8 @@ TYPED_TEST(uint_test, shift_right_overflow_exp) for (unsigned n = 0; n <= sizeof(TypeParam) * 7; ++n) { const auto sh = experimental::shl_c(TypeParam{sizeof(TypeParam) * 8}, n); + const auto sh2 = experimental::shl_e(TypeParam{sizeof(TypeParam) * 8}, n); + EXPECT_EQ(sh, sh2); EXPECT_EQ(experimental::shr_c(x, sh), 0) << "n=" << n; } } @@ -246,6 +255,7 @@ TYPED_TEST(uint_test, shift_left_overflow_uint64_exp) { const uint64_t sh = sizeof(TypeParam) * 8 + n; EXPECT_EQ(experimental::shl_c(x, sh), 0) << "n=" << n; + EXPECT_EQ(experimental::shl_e(x, sh), 0) << "n=" << n; } } @@ -275,12 +285,16 @@ TYPED_TEST(uint_test, shift_by_int_exp) const auto x = experimental::shl_c(TypeParam{1}, (sizeof(TypeParam) * 8 - 1)) | TypeParam{1}; EXPECT_EQ(experimental::shr_c(x, 0), x); EXPECT_EQ(experimental::shl_c(x, 0), x); + EXPECT_EQ(experimental::shl_e(x, 0), x); EXPECT_EQ(experimental::shr_c(x, 1), experimental::shl_c(TypeParam{1}, uint64_t{TypeParam::num_bits - 2})); EXPECT_EQ(experimental::shl_c(x, 1), TypeParam{2}); + EXPECT_EQ(experimental::shl_e(x, 1), TypeParam{2}); EXPECT_EQ(experimental::shr_c(x, int{TypeParam::num_bits - 1}), TypeParam{1}); EXPECT_EQ(experimental::shl_c(x, int{TypeParam::num_bits - 1}), experimental::shl_c(TypeParam{1}, uint64_t{TypeParam::num_bits - 1})); + EXPECT_EQ(experimental::shl_e(x, int{TypeParam::num_bits - 1}), + experimental::shl_e(TypeParam{1}, uint64_t{TypeParam::num_bits - 1})); EXPECT_EQ(experimental::shr_c(x, int{TypeParam::num_bits}), 0); } From 7c4f1fed9b8c4045f29bbc8e8de282efadfedd7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Thu, 17 Feb 2022 19:26:30 +0100 Subject: [PATCH 09/20] noinline --- test/experimental/shift.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/experimental/shift.hpp b/test/experimental/shift.hpp index 0adfd531..e72b63aa 100644 --- a/test/experimental/shift.hpp +++ b/test/experimental/shift.hpp @@ -13,7 +13,7 @@ inline constexpr uint64_t shld(uint64_t x1, uint64_t x2, uint64_t c) } template -inline constexpr uint shl_c(const uint& x, const uint64_t& shift) noexcept +[[gnu::noinline]] inline constexpr uint shl_c(const uint& x, const uint64_t& shift) noexcept { uint<2 * N> extended; for (unsigned i = 0; i < uint::num_words; ++i) @@ -53,7 +53,7 @@ inline constexpr uint shl_c(const uint& x, const uint& shift) noexcept } template -inline constexpr uint shl_e(const uint& x, const uint64_t& shift) noexcept +[[gnu::noinline]] inline constexpr uint shl_e(const uint& x, const uint64_t& shift) noexcept { uint r; From e130635a39e075cc6e6715c1cbeb034d6ee73317 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Thu, 17 Feb 2022 19:49:59 +0100 Subject: [PATCH 10/20] Add shl_w() --- test/benchmarks/benchmarks.cpp | 1 + test/experimental/shift.hpp | 35 +++++++++++++++++++++++++++++++++ test/unittests/test_bitwise.cpp | 13 ++++++++++++ 3 files changed, 49 insertions(+) diff --git a/test/benchmarks/benchmarks.cpp b/test/benchmarks/benchmarks.cpp index 8c41b38e..0d6125a7 100644 --- a/test/benchmarks/benchmarks.cpp +++ b/test/benchmarks/benchmarks.cpp @@ -359,6 +359,7 @@ BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_generic)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_halves)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_c)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_e)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_w)->DenseRange(-1, 3); #if INTX_HAS_EXTINT BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_llvm)->DenseRange(-1, 3); #endif diff --git a/test/experimental/shift.hpp b/test/experimental/shift.hpp index e72b63aa..779da132 100644 --- a/test/experimental/shift.hpp +++ b/test/experimental/shift.hpp @@ -89,6 +89,41 @@ inline constexpr uint shl_e(const uint& x, const uint& shift) noexcept } +template +[[gnu::noinline]] inline constexpr uint shl_w(const uint& x, const uint64_t& shift) noexcept +{ + const auto w = shift / 64; + const auto s = shift % 64; + + uint r; + for (size_t i = 0; i < uint::num_words; ++i) + r[i] = i >= w ? x[i - w] : 0; + + if (s == 0) + return r; + + uint z; + z[0] = r[0] << s; + for (unsigned i = 1; i < uint::num_words; ++i) + z[i] = shld(r[i - 1], r[i], s); + + return z; +} + +template +inline constexpr uint shl_w(const uint& x, const uint& shift) noexcept +{ + uint64_t high_words_fold = 0; + for (size_t i = 1; i < uint::num_words; ++i) + high_words_fold |= shift[i]; + + if (INTX_UNLIKELY(high_words_fold != 0)) + return 0; + + return shl_w(x, shift[0]); +} + + inline uint64_t shrd(uint64_t x1, uint64_t x2, uint64_t c) { return (x2 >> c) | (x1 << (64 - c)); diff --git a/test/unittests/test_bitwise.cpp b/test/unittests/test_bitwise.cpp index 8c4da34f..e9fc7aba 100644 --- a/test/unittests/test_bitwise.cpp +++ b/test/unittests/test_bitwise.cpp @@ -203,6 +203,8 @@ TYPED_TEST(uint_test, shift_one_bit_exp) EXPECT_EQ(x, experimental::shr_c(a, shift)); const auto b = experimental::shl_e(x, shift); + const auto c = experimental::shl_w(x, shift); + EXPECT_EQ(b, c); EXPECT_EQ(x, experimental::shr_c(b, shift)); } } @@ -216,15 +218,19 @@ TYPED_TEST(uint_test, shift_left_overflow_exp) const auto sh = experimental::shr_c(x, n); EXPECT_EQ(experimental::shl_c(x, sh), 0) << "n=" << n; EXPECT_EQ(experimental::shl_e(x, sh), 0) << "n=" << n; + EXPECT_EQ(experimental::shl_e(x, sh), 0) << "n=" << n; } for (unsigned n = 0; n <= sizeof(TypeParam) * 7; ++n) { const auto sh = experimental::shl_c(TypeParam{sizeof(TypeParam) * 8}, n); const auto sh2 = experimental::shl_e(TypeParam{sizeof(TypeParam) * 8}, n); + const auto sh3 = experimental::shl_e(TypeParam{sizeof(TypeParam) * 8}, n); EXPECT_EQ(sh, sh2); + EXPECT_EQ(sh, sh3); EXPECT_EQ(experimental::shl_c(x, sh), 0) << "n=" << n; EXPECT_EQ(experimental::shl_e(x, sh), 0) << "n=" << n; + EXPECT_EQ(experimental::shl_w(x, sh), 0) << "n=" << n; } } @@ -242,7 +248,9 @@ TYPED_TEST(uint_test, shift_right_overflow_exp) { const auto sh = experimental::shl_c(TypeParam{sizeof(TypeParam) * 8}, n); const auto sh2 = experimental::shl_e(TypeParam{sizeof(TypeParam) * 8}, n); + const auto sh3 = experimental::shl_w(TypeParam{sizeof(TypeParam) * 8}, n); EXPECT_EQ(sh, sh2); + EXPECT_EQ(sh, sh3); EXPECT_EQ(experimental::shr_c(x, sh), 0) << "n=" << n; } } @@ -256,6 +264,7 @@ TYPED_TEST(uint_test, shift_left_overflow_uint64_exp) const uint64_t sh = sizeof(TypeParam) * 8 + n; EXPECT_EQ(experimental::shl_c(x, sh), 0) << "n=" << n; EXPECT_EQ(experimental::shl_e(x, sh), 0) << "n=" << n; + EXPECT_EQ(experimental::shl_w(x, sh), 0) << "n=" << n; } } @@ -286,15 +295,19 @@ TYPED_TEST(uint_test, shift_by_int_exp) EXPECT_EQ(experimental::shr_c(x, 0), x); EXPECT_EQ(experimental::shl_c(x, 0), x); EXPECT_EQ(experimental::shl_e(x, 0), x); + EXPECT_EQ(experimental::shl_w(x, 0), x); EXPECT_EQ(experimental::shr_c(x, 1), experimental::shl_c(TypeParam{1}, uint64_t{TypeParam::num_bits - 2})); EXPECT_EQ(experimental::shl_c(x, 1), TypeParam{2}); EXPECT_EQ(experimental::shl_e(x, 1), TypeParam{2}); + EXPECT_EQ(experimental::shl_w(x, 1), TypeParam{2}); EXPECT_EQ(experimental::shr_c(x, int{TypeParam::num_bits - 1}), TypeParam{1}); EXPECT_EQ(experimental::shl_c(x, int{TypeParam::num_bits - 1}), experimental::shl_c(TypeParam{1}, uint64_t{TypeParam::num_bits - 1})); EXPECT_EQ(experimental::shl_e(x, int{TypeParam::num_bits - 1}), experimental::shl_e(TypeParam{1}, uint64_t{TypeParam::num_bits - 1})); + EXPECT_EQ(experimental::shl_w(x, int{TypeParam::num_bits - 1}), + experimental::shl_w(TypeParam{1}, uint64_t{TypeParam::num_bits - 1})); EXPECT_EQ(experimental::shr_c(x, int{TypeParam::num_bits}), 0); } From 8ceecf11518ec39cfca90472d57715179386dbe3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Thu, 17 Feb 2022 20:22:32 +0100 Subject: [PATCH 11/20] fake --- test/experimental/shift.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/experimental/shift.hpp b/test/experimental/shift.hpp index 779da132..5a159336 100644 --- a/test/experimental/shift.hpp +++ b/test/experimental/shift.hpp @@ -75,6 +75,11 @@ template return z; } +[[gnu::noinline]] inline constexpr uint256 shl_e(const uint256& x, const uint64_t& /*shift*/) noexcept +{ + return x; +} + template inline constexpr uint shl_e(const uint& x, const uint& shift) noexcept { From 38a80c2ce7796f79877c46c7e066f12a16218c01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Thu, 17 Feb 2022 20:33:41 +0100 Subject: [PATCH 12/20] New impl --- test/experimental/shift.hpp | 43 +++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/test/experimental/shift.hpp b/test/experimental/shift.hpp index 5a159336..fcb4d6bf 100644 --- a/test/experimental/shift.hpp +++ b/test/experimental/shift.hpp @@ -9,6 +9,8 @@ namespace intx::experimental { inline constexpr uint64_t shld(uint64_t x1, uint64_t x2, uint64_t c) { + if (c == 0) + return x2; return (x2 << c) | (x1 >> (64 - c)); } @@ -75,9 +77,46 @@ template return z; } -[[gnu::noinline]] inline constexpr uint256 shl_e(const uint256& x, const uint64_t& /*shift*/) noexcept +[[gnu::noinline]] inline constexpr uint256 shl_e(const uint256& x, const uint64_t& shift) noexcept { - return x; + const auto w = shift / 64; + const auto s = shift % 64; + + switch (w) + { + case 0: + { + uint256 r; + r[0] = x[0] << s; + r[1] = shld(x[0], x[1], s); + r[2] = shld(x[1], x[2], s); + r[3] = shld(x[2], x[3], s); + return r; + } + case 1: + { + uint256 r; + r[1] = x[0] << s; + r[2] = shld(x[0], x[1], s); + r[3] = shld(x[1], x[2], s); + return r; + } + case 2: + { + uint256 r; + r[2] = x[0] << s; + r[3] = shld(x[0], x[1], s); + return r; + } + case 3: + { + uint256 r; + r[3] = x[0] << s; + return r; + } + default: + return {}; + } } template From 66c5702f3e19460111d85b2e64b17c05e116fb87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Thu, 17 Feb 2022 20:39:28 +0100 Subject: [PATCH 13/20] Should be better --- test/experimental/shift.hpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/test/experimental/shift.hpp b/test/experimental/shift.hpp index fcb4d6bf..54a75f8b 100644 --- a/test/experimental/shift.hpp +++ b/test/experimental/shift.hpp @@ -82,41 +82,39 @@ template const auto w = shift / 64; const auto s = shift % 64; + uint256 r; switch (w) { case 0: { - uint256 r; r[0] = x[0] << s; r[1] = shld(x[0], x[1], s); r[2] = shld(x[1], x[2], s); r[3] = shld(x[2], x[3], s); - return r; + break; } case 1: { - uint256 r; r[1] = x[0] << s; r[2] = shld(x[0], x[1], s); r[3] = shld(x[1], x[2], s); - return r; + break; } case 2: { - uint256 r; r[2] = x[0] << s; r[3] = shld(x[0], x[1], s); - return r; + break; } case 3: { - uint256 r; r[3] = x[0] << s; - return r; + break; } default: - return {}; + break; } + return r; } template From 517095f602e44a2f8d343eeffe0afeec212df649 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Thu, 17 Feb 2022 22:01:25 +0100 Subject: [PATCH 14/20] Patch shl_c --- test/experimental/shift.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/experimental/shift.hpp b/test/experimental/shift.hpp index 54a75f8b..66ae400f 100644 --- a/test/experimental/shift.hpp +++ b/test/experimental/shift.hpp @@ -21,8 +21,8 @@ template for (unsigned i = 0; i < uint::num_words; ++i) extended[i + uint::num_words] = x[i]; - const auto sw = - shift >= uint::num_bits ? uint::num_words : shift / uint::word_num_bits; + const auto s = shift / uint::word_num_bits; + const auto sw = s < uint::num_words ? s : uint::num_words; uint r; for (unsigned i = 0; i < uint::num_words; ++i) From 9c7a472d2f6596a32ed1e7e1e1b0a529fe3abaee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Thu, 17 Feb 2022 22:03:05 +0100 Subject: [PATCH 15/20] new shl_w --- test/experimental/shift.hpp | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/test/experimental/shift.hpp b/test/experimental/shift.hpp index 66ae400f..bd37b728 100644 --- a/test/experimental/shift.hpp +++ b/test/experimental/shift.hpp @@ -136,20 +136,21 @@ template { const auto w = shift / 64; const auto s = shift % 64; + const auto t = s == 0 ? 0 : 64 - s; + const auto m = s == 0 ? 0 : ~uint64_t{0}; uint r; + uint64_t carry = 0; for (size_t i = 0; i < uint::num_words; ++i) - r[i] = i >= w ? x[i - w] : 0; - - if (s == 0) - return r; - - uint z; - z[0] = r[0] << s; - for (unsigned i = 1; i < uint::num_words; ++i) - z[i] = shld(r[i - 1], r[i], s); - - return z; + { + auto a = i >= w ? x[i - w] : 0; + auto b = a << s; + auto c = b | carry; + carry = a >> t; + carry &= m; + r[i] = c; + } + return r; } template From 7b9cacd8a454bbb7811760730f380344a1716a74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Thu, 17 Feb 2022 22:12:44 +0100 Subject: [PATCH 16/20] Expand more --- test/experimental/shift.hpp | 53 ++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/test/experimental/shift.hpp b/test/experimental/shift.hpp index bd37b728..8e98e1f6 100644 --- a/test/experimental/shift.hpp +++ b/test/experimental/shift.hpp @@ -87,33 +87,66 @@ template { case 0: { - r[0] = x[0] << s; - r[1] = shld(x[0], x[1], s); - r[2] = shld(x[1], x[2], s); - r[3] = shld(x[2], x[3], s); + r = x; break; } case 1: { - r[1] = x[0] << s; - r[2] = shld(x[0], x[1], s); - r[3] = shld(x[1], x[2], s); + r[1] = x[0]; + r[2] = x[1]; + r[3] = x[2]; break; } case 2: { - r[2] = x[0] << s; - r[3] = shld(x[0], x[1], s); + r[2] = x[0]; + r[3] = x[1]; break; } case 3: { - r[3] = x[0] << s; + r[3] = x[0]; break; } default: break; } + + if (s == 0) + return r; + + switch (w) + { + case 0: + { + r[3] = shld(r[2], r[3], s); + r[2] = shld(r[1], r[2], s); + r[1] = shld(r[0], r[1], s); + r[0] = r[0] << s; + break; + } + case 1: + { + r[3] = shld(r[2], r[3], s); + r[2] = shld(r[1], r[2], s); + r[1] = r[1] << s; + break; + } + case 2: + { + r[3] = shld(r[2], r[3], s); + r[2] = r[2] << s; + break; + } + case 3: + { + r[3] = r[3] << s; + break; + } + default: + break; + } + return r; } From 95c0cae9b54a3ef34cdb06ef3e52dc00e16c5ea7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Fri, 18 Feb 2022 19:43:00 +0100 Subject: [PATCH 17/20] pass shift by value --- test/benchmarks/benchmarks.cpp | 23 ++++++++++------------- test/experimental/shift.hpp | 8 ++++---- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/test/benchmarks/benchmarks.cpp b/test/benchmarks/benchmarks.cpp index 0d6125a7..df25cd77 100644 --- a/test/benchmarks/benchmarks.cpp +++ b/test/benchmarks/benchmarks.cpp @@ -199,8 +199,7 @@ BENCHMARK_TEMPLATE(binop, uint512, uint512, public_mul); BENCHMARK_TEMPLATE(binop, uint512, uint512, gmp::mul); template -[[gnu::noinline]] static intx::uint shl_public( - const intx::uint& x, const uint64_t& y) noexcept +[[gnu::noinline]] static intx::uint shl_public(const intx::uint& x, uint64_t y) noexcept { return x << y; } @@ -213,8 +212,7 @@ template } template -[[gnu::noinline]] static intx::uint shl_generic( - const intx::uint& x, const uint64_t& y) noexcept +[[gnu::noinline]] static intx::uint shl_generic(const intx::uint& x, uint64_t y) noexcept { return intx::operator<<(x, y); } @@ -226,8 +224,7 @@ template return intx::operator<<(x, y); } -[[gnu::noinline]] static intx::uint256 shl_halves( - const intx::uint256& x, const uint64_t& shift) noexcept +[[gnu::noinline]] static intx::uint256 shl_halves(const intx::uint256& x, uint64_t shift) noexcept { constexpr auto num_bits = 256; constexpr auto half_bits = num_bits / 2; @@ -303,7 +300,7 @@ template } #if INTX_HAS_EXTINT -[[gnu::noinline]] static intx::uint256 shl_llvm(const intx::uint256& x, const uint64_t& y) noexcept +[[gnu::noinline]] static intx::uint256 shl_llvm(const intx::uint256& x, uint64_t y) noexcept { unsigned _ExtInt(256) a; std::memcpy(&a, &x, sizeof(a)); @@ -315,7 +312,7 @@ template #endif -template +template static void shift(benchmark::State& state) { const auto& shift_samples_id = [&state]() noexcept { @@ -339,7 +336,7 @@ static void shift(benchmark::State& state) const auto& xs = test::get_samples(sizeof(ArgT) == sizeof(uint256) ? x_256 : x_512); const auto& raw_shifts = test::get_samples(shift_samples_id); - std::array shifts; + std::array, test::num_samples> shifts; std::copy(std::cbegin(raw_shifts), std::cend(raw_shifts), std::begin(shifts)); while (state.KeepRunningBatch(xs.size())) @@ -351,9 +348,9 @@ static void shift(benchmark::State& state) } } } -BENCHMARK_TEMPLATE(shift, uint256, uint256, shl_public)->DenseRange(-1, 3); -BENCHMARK_TEMPLATE(shift, uint256, uint256, shl_generic)->DenseRange(-1, 3); -BENCHMARK_TEMPLATE(shift, uint256, uint256, shl_halves)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, const uint256&, shl_public)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, const uint256&, shl_generic)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, const uint256&, shl_halves)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_public)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_generic)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_halves)->DenseRange(-1, 3); @@ -363,7 +360,7 @@ BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_w)->DenseRange(-1 #if INTX_HAS_EXTINT BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_llvm)->DenseRange(-1, 3); #endif -BENCHMARK_TEMPLATE(shift, uint512, uint512, shl_public)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint512, const uint512&, shl_public)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint512, uint64_t, shl_public)->DenseRange(-1, 3); [[gnu::noinline]] static bool lt_public(const uint256& x, const uint256& y) noexcept diff --git a/test/experimental/shift.hpp b/test/experimental/shift.hpp index 8e98e1f6..77ff5ff0 100644 --- a/test/experimental/shift.hpp +++ b/test/experimental/shift.hpp @@ -15,7 +15,7 @@ inline constexpr uint64_t shld(uint64_t x1, uint64_t x2, uint64_t c) } template -[[gnu::noinline]] inline constexpr uint shl_c(const uint& x, const uint64_t& shift) noexcept +[[gnu::noinline]] inline constexpr uint shl_c(const uint& x, uint64_t shift) noexcept { uint<2 * N> extended; for (unsigned i = 0; i < uint::num_words; ++i) @@ -55,7 +55,7 @@ inline constexpr uint shl_c(const uint& x, const uint& shift) noexcept } template -[[gnu::noinline]] inline constexpr uint shl_e(const uint& x, const uint64_t& shift) noexcept +[[gnu::noinline]] inline constexpr uint shl_e(const uint& x, uint64_t shift) noexcept { uint r; @@ -77,7 +77,7 @@ template return z; } -[[gnu::noinline]] inline constexpr uint256 shl_e(const uint256& x, const uint64_t& shift) noexcept +[[gnu::noinline]] inline constexpr uint256 shl_e(const uint256& x, uint64_t shift) noexcept { const auto w = shift / 64; const auto s = shift % 64; @@ -165,7 +165,7 @@ inline constexpr uint shl_e(const uint& x, const uint& shift) noexcept template -[[gnu::noinline]] inline constexpr uint shl_w(const uint& x, const uint64_t& shift) noexcept +[[gnu::noinline]] inline constexpr uint shl_w(const uint& x, uint64_t shift) noexcept { const auto w = shift / 64; const auto s = shift % 64; From 757e1d45c0dc6d02541f516589080148bbdbe7f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Fri, 18 Feb 2022 20:06:49 +0100 Subject: [PATCH 18/20] shl_bits --- test/benchmarks/benchmarks.cpp | 4 +++ test/experimental/shift.hpp | 65 ++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/test/benchmarks/benchmarks.cpp b/test/benchmarks/benchmarks.cpp index df25cd77..f8cefd0d 100644 --- a/test/benchmarks/benchmarks.cpp +++ b/test/benchmarks/benchmarks.cpp @@ -357,6 +357,10 @@ BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_halves)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_c)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_e)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_w)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_bits_1)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_bits_2)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_bits_3)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_bits_4)->DenseRange(-1, 3); #if INTX_HAS_EXTINT BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_llvm)->DenseRange(-1, 3); #endif diff --git a/test/experimental/shift.hpp b/test/experimental/shift.hpp index 77ff5ff0..3195bf26 100644 --- a/test/experimental/shift.hpp +++ b/test/experimental/shift.hpp @@ -41,6 +41,71 @@ template return z; } +[[gnu::noinline]] inline uint256 shl_bits_1(const uint256& x, uint64_t sb) noexcept +{ + if (sb == 0) + __builtin_unreachable(); + uint256 z; + z[0] = x[0] << sb; + for (unsigned i = 1; i < uint256::num_words; ++i) + z[i] = shld(x[i - 1], x[i], sb); + return z; +} + +[[gnu::noinline]] inline uint256 shl_bits_2(const uint256& x, uint64_t sb) noexcept +{ + if (sb == 0) + __builtin_unreachable(); + + const auto t = 64 - sb; + + uint256 r; + uint64_t carry = 0; + for (size_t i = 0; i < uint256::num_words; ++i) + { + auto a = x[i]; + auto b = a << sb; + auto c = b | carry; + carry = a >> t; + r[i] = c; + } + return r; +} + +[[gnu::noinline]] inline uint256 shl_bits_3(const uint256& x, uint64_t sb) noexcept +{ + if (sb == 0) + __builtin_unreachable(); + + static constexpr size_t num_words = 4; + size_t skip = 0; + uint256 r; + uint64_t carry = 0; + for (size_t i = 0; i < (num_words - skip); ++i) + { + r[num_words - 1 - i - skip] = (x[num_words - 1 - i] >> sb) | carry; + carry = (x[num_words - 1 - i] << (64 - sb - 1)) << 1; + } + return r; +} + +[[gnu::noinline]] inline uint256 shl_bits_4(const uint256& x, uint64_t sb) noexcept +{ + if (sb == 0) + __builtin_unreachable(); + + static constexpr size_t num_words = 4; + size_t skip = 0; + uint256 r; + uint64_t carry = 0; + for (size_t i = 0; i < (num_words - skip); ++i) + { + r[num_words - 1 - i - skip] = (x[num_words - 1 - i] >> sb) | carry; + carry = (x[num_words - 1 - i] << (64 - sb)); + } + return r; +} + template inline constexpr uint shl_c(const uint& x, const uint& shift) noexcept { From 7ce00d6b764ec3225389beac4daf953623460546 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Fri, 18 Feb 2022 21:43:09 +0100 Subject: [PATCH 19/20] Just funkin multiply --- test/benchmarks/benchmarks.cpp | 15 ++++++++++----- test/experimental/shift.hpp | 14 ++++++++------ 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/test/benchmarks/benchmarks.cpp b/test/benchmarks/benchmarks.cpp index f8cefd0d..a14c9a2a 100644 --- a/test/benchmarks/benchmarks.cpp +++ b/test/benchmarks/benchmarks.cpp @@ -229,13 +229,16 @@ template constexpr auto num_bits = 256; constexpr auto half_bits = num_bits / 2; - const auto xlo = uint128{x[0], x[1]}; + const auto xlo_ = uint128{x[0], x[1]}; + const auto xlo = static_cast(xlo_); if (shift < half_bits) { - const auto lo = xlo << shift; + const auto lo_ = xlo << shift; + const auto lo = uint128{lo_}; - const auto xhi = uint128{x[2], x[3]}; + const auto xhi_ = uint128{x[2], x[3]}; + const auto xhi = static_cast(xhi_); // Find the part moved from lo to hi. // The shift right here can be invalid: @@ -243,7 +246,8 @@ template // Split it into 2 valid shifts by (rshift - 1) and 1. const auto rshift = half_bits - shift; const auto lo_overflow = (xlo >> (rshift - 1)) >> 1; - const auto hi = (xhi << shift) | lo_overflow; + const auto hi_ = (xhi << shift) | lo_overflow; + const auto hi = uint128{hi_}; return {lo[0], lo[1], hi[0], hi[1]}; } @@ -251,7 +255,8 @@ template // larger than size of the Int. if (shift < num_bits) { - const auto hi = xlo << (shift - half_bits); + const auto hi_ = xlo << (shift - half_bits); + const auto hi = uint128{hi_}; return {0, 0, hi[0], hi[1]}; } diff --git a/test/experimental/shift.hpp b/test/experimental/shift.hpp index 3195bf26..686df977 100644 --- a/test/experimental/shift.hpp +++ b/test/experimental/shift.hpp @@ -29,14 +29,16 @@ template r[i] = extended[size_t(uint::num_words - sw + i)]; const auto sb = shift % uint::word_num_bits; - if (sb == 0) - return r; + const auto m = uint64_t{1} << sb; uint z; - - z[0] = r[0] << sb; - for (unsigned i = 1; i < uint::num_words; ++i) - z[i] = shld(r[i - 1], r[i], sb); + uint64_t k = 0; + for (unsigned i = 0; i < uint::num_words; ++i) + { + const auto p = umul(r[i], m); + z[i] = p[0] + k; + k = p[1]; + } return z; } From 8550d749f971596d8b3b80bedb0b4b81d1f0c3c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Mon, 21 Feb 2022 11:08:36 +0100 Subject: [PATCH 20/20] Add AVX implementation --- test/benchmarks/benchmarks.cpp | 1 + test/experimental/shift.hpp | 58 +++++++++++++++++++++++++++++++++ test/unittests/test_bitwise.cpp | 42 ++++++++++++++++++++++++ 3 files changed, 101 insertions(+) diff --git a/test/benchmarks/benchmarks.cpp b/test/benchmarks/benchmarks.cpp index a14c9a2a..5950495d 100644 --- a/test/benchmarks/benchmarks.cpp +++ b/test/benchmarks/benchmarks.cpp @@ -362,6 +362,7 @@ BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_halves)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_c)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_e)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_w)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_avx)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_bits_1)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_bits_2)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_bits_3)->DenseRange(-1, 3); diff --git a/test/experimental/shift.hpp b/test/experimental/shift.hpp index 686df977..1b77f50b 100644 --- a/test/experimental/shift.hpp +++ b/test/experimental/shift.hpp @@ -3,10 +3,68 @@ // Licensed under the Apache License, Version 2.0. #pragma once +#include #include namespace intx::experimental { +inline uint256 shl_words_avx(const uint256& x, uint64_t sw) noexcept +{ + sw = (sw < 4) ? sw : 4; + int idxs[][8] = { + {0, 1, 2, 3, 4, 5, 6, 7}, + {-1, -1, 0, 1, 2, 3, 4, 5}, + {-1, -1, -1, -1, 0, 1, 2, 3}, + {-1, -1, -1, -1, -1, -1, 0, 1}, + {-1, -1, -1, -1, -1, -1, -1, -1}, + }; + + auto idx = _mm256_load_si256((__m256i*)idxs[sw]); + auto a = _mm256_load_si256((__m256i*)&x); + + auto p = _mm256_permutevar8x32_epi32(a, idx); + + auto zero = __m256{}; + auto bf = _mm256_blendv_ps(*(__m256*)&p, zero, *(__m256*)&idx); + auto b = *(__m256i*)&bf; + + uint256 res; + _mm256_store_si256((__m256i*)&res, b); + + return res; +} + +inline uint256 shl_bits_avx(const uint256& x, uint64_t sb) noexcept +{ + auto a = _mm256_loadu_si256((__m256i*)&x); + auto zero = __m256i{}; + + auto p = _mm256_permute4x64_epi64(a, 0b10010000); + + auto b = _mm256_blend_epi32(p, zero, 0b11); + + __m128i rcount{int64_t(64 - sb), 0}; + auto c = _mm256_srl_epi64(b, rcount); + + __m128i count{int64_t(sb), 0}; + auto d = _mm256_sll_epi64(a, count); + + auto e = _mm256_or_si256(c, d); + + uint256 res; + _mm256_storeu_si256((__m256i*)&res, e); + + return res; +} + +[[gnu::noinline]] inline uint256 shl_avx(const uint256& x, uint64_t shift) noexcept +{ + auto sw = shift / 64; + auto sb = shift % 64; + auto a = shl_words_avx(x, sw); + return shl_bits_avx(a, sb); +} + inline constexpr uint64_t shld(uint64_t x1, uint64_t x2, uint64_t c) { if (c == 0) diff --git a/test/unittests/test_bitwise.cpp b/test/unittests/test_bitwise.cpp index e9fc7aba..e988c23f 100644 --- a/test/unittests/test_bitwise.cpp +++ b/test/unittests/test_bitwise.cpp @@ -347,3 +347,45 @@ TYPED_TEST(uint_test, shift_against_mul) auto y = a * s; EXPECT_EQ(x, y); } + +TEST(avx, shl_words) +{ + const auto x = 0x18191a1b1c1d1e1f28292a2b2c2d2e2f38393a3b3c3d3e3f48494a4b4c4d4e4f_u256; + EXPECT_EQ(experimental::shl_words_avx(x, 0), x); + EXPECT_EQ(experimental::shl_words_avx(x, 1), x << 64); + EXPECT_EQ(experimental::shl_words_avx(x, 2), x << 128); + EXPECT_EQ(experimental::shl_words_avx(x, 3), x << 192); + EXPECT_EQ(experimental::shl_words_avx(x, 4), 0); + EXPECT_EQ(experimental::shl_words_avx(x, 5), 0); + EXPECT_EQ(experimental::shl_words_avx(x, 123131231), 0); +} + +TEST(avx, shl_bits) +{ + const auto x = 0x18191a1b1c1d1e1f28292a2b2c2d2e2f38393a3b3c3d3e3f48494a4b4c4d4e4f_u256; + EXPECT_EQ(experimental::shl_bits_avx(x, 0), x); + EXPECT_EQ(experimental::shl_bits_avx(x, 1), x << 1); + EXPECT_EQ(experimental::shl_bits_avx(x, 2), x << 2); + EXPECT_EQ(experimental::shl_bits_avx(x, 3), x << 3); + EXPECT_EQ(experimental::shl_bits_avx(x, 31), x << 31); + EXPECT_EQ(experimental::shl_bits_avx(x, 32), x << 32); + EXPECT_EQ(experimental::shl_bits_avx(x, 33), x << 33); + EXPECT_EQ(experimental::shl_bits_avx(x, 63), x << 63); + EXPECT_EQ(experimental::shl_bits_avx(x, 64), x << 64); +} + +TEST(avx, shl_avx) +{ + const auto x = 0x18191a1b1c1d1e1f28292a2b2c2d2e2f38393a3b3c3d3e3f48494a4b4c4d4e4f_u256; + EXPECT_EQ(experimental::shl_avx(x, 0), x); + EXPECT_EQ(experimental::shl_avx(x, 1), x << 1); + EXPECT_EQ(experimental::shl_avx(x, 2), x << 2); + EXPECT_EQ(experimental::shl_avx(x, 3), x << 3); + EXPECT_EQ(experimental::shl_avx(x, 31), x << 31); + EXPECT_EQ(experimental::shl_avx(x, 32), x << 32); + EXPECT_EQ(experimental::shl_avx(x, 33), x << 33); + EXPECT_EQ(experimental::shl_avx(x, 63), x << 63); + EXPECT_EQ(experimental::shl_avx(x, 64), x << 64); + EXPECT_EQ(experimental::shl_avx(x, 65), x << 65); + EXPECT_EQ(experimental::shl_avx(x, 255), x << 255); +}