diff --git a/test/benchmarks/benchmarks.cpp b/test/benchmarks/benchmarks.cpp index 8b73eb38..5950495d 100644 --- a/test/benchmarks/benchmarks.cpp +++ b/test/benchmarks/benchmarks.cpp @@ -3,6 +3,7 @@ // Licensed under the Apache License, Version 2.0. #include "../experimental/addmod.hpp" +#include "../experimental/shift.hpp" #include #include #include @@ -198,8 +199,7 @@ BENCHMARK_TEMPLATE(binop, uint512, uint512, public_mul); BENCHMARK_TEMPLATE(binop, uint512, uint512, gmp::mul); template -[[gnu::noinline]] static intx::uint shl_public( - const intx::uint& x, const uint64_t& y) noexcept +[[gnu::noinline]] static intx::uint shl_public(const intx::uint& x, uint64_t y) noexcept { return x << y; } @@ -211,19 +211,34 @@ template return x << y; } -[[gnu::noinline]] static intx::uint256 shl_halves( - const intx::uint256& x, const uint64_t& shift) noexcept +template +[[gnu::noinline]] static intx::uint shl_generic(const intx::uint& x, uint64_t y) noexcept +{ + return intx::operator<<(x, y); +} + +template +[[gnu::noinline]] static intx::uint shl_generic( + const intx::uint& x, const intx::uint& y) noexcept +{ + return intx::operator<<(x, y); +} + +[[gnu::noinline]] static intx::uint256 shl_halves(const intx::uint256& x, uint64_t shift) noexcept { constexpr auto num_bits = 256; constexpr auto half_bits = num_bits / 2; - const auto xlo = uint128{x[0], x[1]}; + const auto xlo_ = uint128{x[0], x[1]}; + const auto xlo = static_cast(xlo_); if (shift < half_bits) { - const auto lo = xlo << shift; + const auto lo_ = xlo << shift; + const auto lo = uint128{lo_}; - const auto xhi = uint128{x[2], x[3]}; + const auto xhi_ = uint128{x[2], x[3]}; + const auto xhi = static_cast(xhi_); // Find the part moved from lo to hi. // The shift right here can be invalid: @@ -231,7 +246,8 @@ template // Split it into 2 valid shifts by (rshift - 1) and 1. const auto rshift = half_bits - shift; const auto lo_overflow = (xlo >> (rshift - 1)) >> 1; - const auto hi = (xhi << shift) | lo_overflow; + const auto hi_ = (xhi << shift) | lo_overflow; + const auto hi = uint128{hi_}; return {lo[0], lo[1], hi[0], hi[1]}; } @@ -239,7 +255,8 @@ template // larger than size of the Int. if (shift < num_bits) { - const auto hi = xlo << (shift - half_bits); + const auto hi_ = xlo << (shift - half_bits); + const auto hi = uint128{hi_}; return {0, 0, hi[0], hi[1]}; } @@ -288,7 +305,7 @@ template } #if INTX_HAS_EXTINT -[[gnu::noinline]] static intx::uint256 shl_llvm(const intx::uint256& x, const uint64_t& y) noexcept +[[gnu::noinline]] static intx::uint256 shl_llvm(const intx::uint256& x, uint64_t y) noexcept { unsigned _ExtInt(256) a; std::memcpy(&a, &x, sizeof(a)); @@ -300,7 +317,7 @@ template #endif -template +template static void shift(benchmark::State& state) { const auto& shift_samples_id = [&state]() noexcept { @@ -324,7 +341,7 @@ static void shift(benchmark::State& state) const auto& xs = test::get_samples(sizeof(ArgT) == sizeof(uint256) ? x_256 : x_512); const auto& raw_shifts = test::get_samples(shift_samples_id); - std::array shifts; + std::array, test::num_samples> shifts; std::copy(std::cbegin(raw_shifts), std::cend(raw_shifts), std::begin(shifts)); while (state.KeepRunningBatch(xs.size())) @@ -336,14 +353,24 @@ static void shift(benchmark::State& state) } } } -BENCHMARK_TEMPLATE(shift, uint256, uint256, shl_public)->DenseRange(-1, 3); -BENCHMARK_TEMPLATE(shift, uint256, uint256, shl_halves)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, const uint256&, shl_public)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, const uint256&, shl_generic)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, const uint256&, shl_halves)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_public)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_generic)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_halves)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_c)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_e)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_w)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_avx)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_bits_1)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_bits_2)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_bits_3)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_bits_4)->DenseRange(-1, 3); #if INTX_HAS_EXTINT BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_llvm)->DenseRange(-1, 3); #endif -BENCHMARK_TEMPLATE(shift, uint512, uint512, shl_public)->DenseRange(-1, 3); +BENCHMARK_TEMPLATE(shift, uint512, const uint512&, shl_public)->DenseRange(-1, 3); BENCHMARK_TEMPLATE(shift, uint512, uint64_t, shl_public)->DenseRange(-1, 3); [[gnu::noinline]] static bool lt_public(const uint256& x, const uint256& y) noexcept diff --git a/test/experimental/CMakeLists.txt b/test/experimental/CMakeLists.txt index 483c2ca0..e6ebe549 100644 --- a/test/experimental/CMakeLists.txt +++ b/test/experimental/CMakeLists.txt @@ -1,8 +1,8 @@ # intx: extended precision integer library. -# Copyright 2019-2020 Pawel Bylica. +# Copyright 2019 Pawel Bylica. # Licensed under the Apache License, Version 2.0. -add_library(experimental STATIC add.cpp add.hpp) +add_library(experimental STATIC add.cpp add.hpp shift.hpp) target_compile_definitions(experimental PRIVATE INTX_EXPERIMENTAL) target_include_directories(experimental PUBLIC ${PROJECT_SOURCE_DIR}/test) target_link_libraries(experimental PUBLIC intx::intx) diff --git a/test/experimental/shift.hpp b/test/experimental/shift.hpp new file mode 100644 index 00000000..1b77f50b --- /dev/null +++ b/test/experimental/shift.hpp @@ -0,0 +1,375 @@ +// intx: extended precision integer library. +// Copyright 2022 Pawel Bylica. +// Licensed under the Apache License, Version 2.0. +#pragma once + +#include +#include + +namespace intx::experimental +{ +inline uint256 shl_words_avx(const uint256& x, uint64_t sw) noexcept +{ + sw = (sw < 4) ? sw : 4; + int idxs[][8] = { + {0, 1, 2, 3, 4, 5, 6, 7}, + {-1, -1, 0, 1, 2, 3, 4, 5}, + {-1, -1, -1, -1, 0, 1, 2, 3}, + {-1, -1, -1, -1, -1, -1, 0, 1}, + {-1, -1, -1, -1, -1, -1, -1, -1}, + }; + + auto idx = _mm256_load_si256((__m256i*)idxs[sw]); + auto a = _mm256_load_si256((__m256i*)&x); + + auto p = _mm256_permutevar8x32_epi32(a, idx); + + auto zero = __m256{}; + auto bf = _mm256_blendv_ps(*(__m256*)&p, zero, *(__m256*)&idx); + auto b = *(__m256i*)&bf; + + uint256 res; + _mm256_store_si256((__m256i*)&res, b); + + return res; +} + +inline uint256 shl_bits_avx(const uint256& x, uint64_t sb) noexcept +{ + auto a = _mm256_loadu_si256((__m256i*)&x); + auto zero = __m256i{}; + + auto p = _mm256_permute4x64_epi64(a, 0b10010000); + + auto b = _mm256_blend_epi32(p, zero, 0b11); + + __m128i rcount{int64_t(64 - sb), 0}; + auto c = _mm256_srl_epi64(b, rcount); + + __m128i count{int64_t(sb), 0}; + auto d = _mm256_sll_epi64(a, count); + + auto e = _mm256_or_si256(c, d); + + uint256 res; + _mm256_storeu_si256((__m256i*)&res, e); + + return res; +} + +[[gnu::noinline]] inline uint256 shl_avx(const uint256& x, uint64_t shift) noexcept +{ + auto sw = shift / 64; + auto sb = shift % 64; + auto a = shl_words_avx(x, sw); + return shl_bits_avx(a, sb); +} + +inline constexpr uint64_t shld(uint64_t x1, uint64_t x2, uint64_t c) +{ + if (c == 0) + return x2; + return (x2 << c) | (x1 >> (64 - c)); +} + +template +[[gnu::noinline]] inline constexpr uint shl_c(const uint& x, uint64_t shift) noexcept +{ + uint<2 * N> extended; + for (unsigned i = 0; i < uint::num_words; ++i) + extended[i + uint::num_words] = x[i]; + + const auto s = shift / uint::word_num_bits; + const auto sw = s < uint::num_words ? s : uint::num_words; + + uint r; + for (unsigned i = 0; i < uint::num_words; ++i) + r[i] = extended[size_t(uint::num_words - sw + i)]; + + const auto sb = shift % uint::word_num_bits; + const auto m = uint64_t{1} << sb; + + uint z; + uint64_t k = 0; + for (unsigned i = 0; i < uint::num_words; ++i) + { + const auto p = umul(r[i], m); + z[i] = p[0] + k; + k = p[1]; + } + + return z; +} + +[[gnu::noinline]] inline uint256 shl_bits_1(const uint256& x, uint64_t sb) noexcept +{ + if (sb == 0) + __builtin_unreachable(); + uint256 z; + z[0] = x[0] << sb; + for (unsigned i = 1; i < uint256::num_words; ++i) + z[i] = shld(x[i - 1], x[i], sb); + return z; +} + +[[gnu::noinline]] inline uint256 shl_bits_2(const uint256& x, uint64_t sb) noexcept +{ + if (sb == 0) + __builtin_unreachable(); + + const auto t = 64 - sb; + + uint256 r; + uint64_t carry = 0; + for (size_t i = 0; i < uint256::num_words; ++i) + { + auto a = x[i]; + auto b = a << sb; + auto c = b | carry; + carry = a >> t; + r[i] = c; + } + return r; +} + +[[gnu::noinline]] inline uint256 shl_bits_3(const uint256& x, uint64_t sb) noexcept +{ + if (sb == 0) + __builtin_unreachable(); + + static constexpr size_t num_words = 4; + size_t skip = 0; + uint256 r; + uint64_t carry = 0; + for (size_t i = 0; i < (num_words - skip); ++i) + { + r[num_words - 1 - i - skip] = (x[num_words - 1 - i] >> sb) | carry; + carry = (x[num_words - 1 - i] << (64 - sb - 1)) << 1; + } + return r; +} + +[[gnu::noinline]] inline uint256 shl_bits_4(const uint256& x, uint64_t sb) noexcept +{ + if (sb == 0) + __builtin_unreachable(); + + static constexpr size_t num_words = 4; + size_t skip = 0; + uint256 r; + uint64_t carry = 0; + for (size_t i = 0; i < (num_words - skip); ++i) + { + r[num_words - 1 - i - skip] = (x[num_words - 1 - i] >> sb) | carry; + carry = (x[num_words - 1 - i] << (64 - sb)); + } + return r; +} + +template +inline constexpr uint shl_c(const uint& x, const uint& shift) noexcept +{ + uint64_t high_words_fold = 0; + for (size_t i = 1; i < uint::num_words; ++i) + high_words_fold |= shift[i]; + + if (INTX_UNLIKELY(high_words_fold != 0)) + return 0; + + return shl_c(x, shift[0]); +} + +template +[[gnu::noinline]] inline constexpr uint shl_e(const uint& x, uint64_t shift) noexcept +{ + uint r; + + const auto w = shift / 64; + + size_t j = 0; + for (size_t i = w; i < uint::num_words; ++i, ++j) + r[i] = x[j]; + + const auto sb = shift % uint::word_num_bits; + if (sb == 0) + return r; + + uint z; + z[0] = r[0] << sb; + for (unsigned i = 1; i < uint::num_words; ++i) + z[i] = shld(r[i - 1], r[i], sb); + + return z; +} + +[[gnu::noinline]] inline constexpr uint256 shl_e(const uint256& x, uint64_t shift) noexcept +{ + const auto w = shift / 64; + const auto s = shift % 64; + + uint256 r; + switch (w) + { + case 0: + { + r = x; + break; + } + case 1: + { + r[1] = x[0]; + r[2] = x[1]; + r[3] = x[2]; + break; + } + case 2: + { + r[2] = x[0]; + r[3] = x[1]; + break; + } + case 3: + { + r[3] = x[0]; + break; + } + default: + break; + } + + if (s == 0) + return r; + + switch (w) + { + case 0: + { + r[3] = shld(r[2], r[3], s); + r[2] = shld(r[1], r[2], s); + r[1] = shld(r[0], r[1], s); + r[0] = r[0] << s; + break; + } + case 1: + { + r[3] = shld(r[2], r[3], s); + r[2] = shld(r[1], r[2], s); + r[1] = r[1] << s; + break; + } + case 2: + { + r[3] = shld(r[2], r[3], s); + r[2] = r[2] << s; + break; + } + case 3: + { + r[3] = r[3] << s; + break; + } + default: + break; + } + + return r; +} + +template +inline constexpr uint shl_e(const uint& x, const uint& shift) noexcept +{ + uint64_t high_words_fold = 0; + for (size_t i = 1; i < uint::num_words; ++i) + high_words_fold |= shift[i]; + + if (INTX_UNLIKELY(high_words_fold != 0)) + return 0; + + return shl_e(x, shift[0]); +} + + +template +[[gnu::noinline]] inline constexpr uint shl_w(const uint& x, uint64_t shift) noexcept +{ + const auto w = shift / 64; + const auto s = shift % 64; + const auto t = s == 0 ? 0 : 64 - s; + const auto m = s == 0 ? 0 : ~uint64_t{0}; + + uint r; + uint64_t carry = 0; + for (size_t i = 0; i < uint::num_words; ++i) + { + auto a = i >= w ? x[i - w] : 0; + auto b = a << s; + auto c = b | carry; + carry = a >> t; + carry &= m; + r[i] = c; + } + return r; +} + +template +inline constexpr uint shl_w(const uint& x, const uint& shift) noexcept +{ + uint64_t high_words_fold = 0; + for (size_t i = 1; i < uint::num_words; ++i) + high_words_fold |= shift[i]; + + if (INTX_UNLIKELY(high_words_fold != 0)) + return 0; + + return shl_w(x, shift[0]); +} + + +inline uint64_t shrd(uint64_t x1, uint64_t x2, uint64_t c) +{ + return (x2 >> c) | (x1 << (64 - c)); +} + +template +uint shr_c(const uint& x, const uint64_t& shift) noexcept +{ + uint<2 * N> extended; + for (unsigned i = 0; i < uint::num_words; ++i) + extended[i] = x[i]; + + const auto sw = + shift >= uint::num_bits ? uint::num_words : shift / uint::word_num_bits; + + uint r; + for (unsigned i = 0; i < uint::num_words; ++i) + r[i] = extended[size_t(sw + i)]; + + const auto sb = shift % uint::word_num_bits; + if (sb == 0) + return r; + + constexpr auto nw = uint::num_words; + + uint z; + z[nw - 1] = r[nw - 1] >> sb; + + for (unsigned i = 0; i < nw - 1; ++i) + z[nw - i - 2] = shrd(r[nw - i - 1], r[nw - i - 2], sb); + + return z; +} + +template +inline constexpr uint shr_c(const uint& x, const uint& shift) noexcept +{ + uint64_t high_words_fold = 0; + for (size_t i = 1; i < uint::num_words; ++i) + high_words_fold |= shift[i]; + + if (INTX_UNLIKELY(high_words_fold != 0)) + return 0; + + return shr_c(x, shift[0]); +} + +} // namespace intx::experimental diff --git a/test/unittests/test_bitwise.cpp b/test/unittests/test_bitwise.cpp index a6e23252..e988c23f 100644 --- a/test/unittests/test_bitwise.cpp +++ b/test/unittests/test_bitwise.cpp @@ -2,6 +2,7 @@ // Copyright 2019 Pawel Bylica. // Licensed under the Apache License, Version 2.0. +#include "test/experimental/shift.hpp" #include "test_suite.hpp" using namespace intx; @@ -192,6 +193,124 @@ TYPED_TEST(uint_test, shift_by_int) EXPECT_EQ(x << int{TypeParam::num_bits}, 0); } +TYPED_TEST(uint_test, shift_one_bit_exp) +{ + for (unsigned shift = 0; shift < sizeof(TypeParam) * 8; ++shift) + { + SCOPED_TRACE(shift); + constexpr auto x = TypeParam{1}; + const auto a = experimental::shl_c(x, shift); + EXPECT_EQ(x, experimental::shr_c(a, shift)); + + const auto b = experimental::shl_e(x, shift); + const auto c = experimental::shl_w(x, shift); + EXPECT_EQ(b, c); + EXPECT_EQ(x, experimental::shr_c(b, shift)); + } +} + +TYPED_TEST(uint_test, shift_left_overflow_exp) +{ + const auto x = ~TypeParam{}; + + for (unsigned n = 0; n <= sizeof(TypeParam) * 7; ++n) + { + const auto sh = experimental::shr_c(x, n); + EXPECT_EQ(experimental::shl_c(x, sh), 0) << "n=" << n; + EXPECT_EQ(experimental::shl_e(x, sh), 0) << "n=" << n; + EXPECT_EQ(experimental::shl_e(x, sh), 0) << "n=" << n; + } + + for (unsigned n = 0; n <= sizeof(TypeParam) * 7; ++n) + { + const auto sh = experimental::shl_c(TypeParam{sizeof(TypeParam) * 8}, n); + const auto sh2 = experimental::shl_e(TypeParam{sizeof(TypeParam) * 8}, n); + const auto sh3 = experimental::shl_e(TypeParam{sizeof(TypeParam) * 8}, n); + EXPECT_EQ(sh, sh2); + EXPECT_EQ(sh, sh3); + EXPECT_EQ(experimental::shl_c(x, sh), 0) << "n=" << n; + EXPECT_EQ(experimental::shl_e(x, sh), 0) << "n=" << n; + EXPECT_EQ(experimental::shl_w(x, sh), 0) << "n=" << n; + } +} + +TYPED_TEST(uint_test, shift_right_overflow_exp) +{ + const auto x = ~TypeParam{}; + + for (unsigned n = 0; n <= sizeof(TypeParam) * 7; ++n) + { + const auto sh = experimental::shr_c(x, n); + EXPECT_EQ(experimental::shr_c(x, sh), 0) << "n=" << n; + } + + for (unsigned n = 0; n <= sizeof(TypeParam) * 7; ++n) + { + const auto sh = experimental::shl_c(TypeParam{sizeof(TypeParam) * 8}, n); + const auto sh2 = experimental::shl_e(TypeParam{sizeof(TypeParam) * 8}, n); + const auto sh3 = experimental::shl_w(TypeParam{sizeof(TypeParam) * 8}, n); + EXPECT_EQ(sh, sh2); + EXPECT_EQ(sh, sh3); + EXPECT_EQ(experimental::shr_c(x, sh), 0) << "n=" << n; + } +} + +TYPED_TEST(uint_test, shift_left_overflow_uint64_exp) +{ + const auto x = ~TypeParam{}; + + for (unsigned n = 0; n <= 100; ++n) + { + const uint64_t sh = sizeof(TypeParam) * 8 + n; + EXPECT_EQ(experimental::shl_c(x, sh), 0) << "n=" << n; + EXPECT_EQ(experimental::shl_e(x, sh), 0) << "n=" << n; + EXPECT_EQ(experimental::shl_w(x, sh), 0) << "n=" << n; + } +} + +TYPED_TEST(uint_test, shift_right_overflow_uint64_exp) +{ + const auto x = ~TypeParam{}; + + for (unsigned n = 0; n <= 100; ++n) + { + const uint64_t sh = sizeof(TypeParam) * 8 + n; + EXPECT_EQ(experimental::shr_c(x, sh), 0) << "n=" << n; + } +} + +TYPED_TEST(uint_test, shift_overflow_exp) +{ + const uint64_t sh = sizeof(TypeParam) * 8; + const auto value = ~TypeParam{}; + EXPECT_EQ(experimental::shr_c(value, sh), 0); + EXPECT_EQ(experimental::shr_c(value, TypeParam{sh}), 0); + EXPECT_EQ(experimental::shl_c(value, sh), 0); + EXPECT_EQ(experimental::shl_c(value, TypeParam{sh}), 0); +} + +TYPED_TEST(uint_test, shift_by_int_exp) +{ + const auto x = experimental::shl_c(TypeParam{1}, (sizeof(TypeParam) * 8 - 1)) | TypeParam{1}; + EXPECT_EQ(experimental::shr_c(x, 0), x); + EXPECT_EQ(experimental::shl_c(x, 0), x); + EXPECT_EQ(experimental::shl_e(x, 0), x); + EXPECT_EQ(experimental::shl_w(x, 0), x); + EXPECT_EQ(experimental::shr_c(x, 1), + experimental::shl_c(TypeParam{1}, uint64_t{TypeParam::num_bits - 2})); + EXPECT_EQ(experimental::shl_c(x, 1), TypeParam{2}); + EXPECT_EQ(experimental::shl_e(x, 1), TypeParam{2}); + EXPECT_EQ(experimental::shl_w(x, 1), TypeParam{2}); + EXPECT_EQ(experimental::shr_c(x, int{TypeParam::num_bits - 1}), TypeParam{1}); + EXPECT_EQ(experimental::shl_c(x, int{TypeParam::num_bits - 1}), + experimental::shl_c(TypeParam{1}, uint64_t{TypeParam::num_bits - 1})); + EXPECT_EQ(experimental::shl_e(x, int{TypeParam::num_bits - 1}), + experimental::shl_e(TypeParam{1}, uint64_t{TypeParam::num_bits - 1})); + EXPECT_EQ(experimental::shl_w(x, int{TypeParam::num_bits - 1}), + experimental::shl_w(TypeParam{1}, uint64_t{TypeParam::num_bits - 1})); + EXPECT_EQ(experimental::shr_c(x, int{TypeParam::num_bits}), 0); +} + TYPED_TEST(uint_test, not_of_zero) { auto ones = ~TypeParam{}; @@ -228,3 +347,45 @@ TYPED_TEST(uint_test, shift_against_mul) auto y = a * s; EXPECT_EQ(x, y); } + +TEST(avx, shl_words) +{ + const auto x = 0x18191a1b1c1d1e1f28292a2b2c2d2e2f38393a3b3c3d3e3f48494a4b4c4d4e4f_u256; + EXPECT_EQ(experimental::shl_words_avx(x, 0), x); + EXPECT_EQ(experimental::shl_words_avx(x, 1), x << 64); + EXPECT_EQ(experimental::shl_words_avx(x, 2), x << 128); + EXPECT_EQ(experimental::shl_words_avx(x, 3), x << 192); + EXPECT_EQ(experimental::shl_words_avx(x, 4), 0); + EXPECT_EQ(experimental::shl_words_avx(x, 5), 0); + EXPECT_EQ(experimental::shl_words_avx(x, 123131231), 0); +} + +TEST(avx, shl_bits) +{ + const auto x = 0x18191a1b1c1d1e1f28292a2b2c2d2e2f38393a3b3c3d3e3f48494a4b4c4d4e4f_u256; + EXPECT_EQ(experimental::shl_bits_avx(x, 0), x); + EXPECT_EQ(experimental::shl_bits_avx(x, 1), x << 1); + EXPECT_EQ(experimental::shl_bits_avx(x, 2), x << 2); + EXPECT_EQ(experimental::shl_bits_avx(x, 3), x << 3); + EXPECT_EQ(experimental::shl_bits_avx(x, 31), x << 31); + EXPECT_EQ(experimental::shl_bits_avx(x, 32), x << 32); + EXPECT_EQ(experimental::shl_bits_avx(x, 33), x << 33); + EXPECT_EQ(experimental::shl_bits_avx(x, 63), x << 63); + EXPECT_EQ(experimental::shl_bits_avx(x, 64), x << 64); +} + +TEST(avx, shl_avx) +{ + const auto x = 0x18191a1b1c1d1e1f28292a2b2c2d2e2f38393a3b3c3d3e3f48494a4b4c4d4e4f_u256; + EXPECT_EQ(experimental::shl_avx(x, 0), x); + EXPECT_EQ(experimental::shl_avx(x, 1), x << 1); + EXPECT_EQ(experimental::shl_avx(x, 2), x << 2); + EXPECT_EQ(experimental::shl_avx(x, 3), x << 3); + EXPECT_EQ(experimental::shl_avx(x, 31), x << 31); + EXPECT_EQ(experimental::shl_avx(x, 32), x << 32); + EXPECT_EQ(experimental::shl_avx(x, 33), x << 33); + EXPECT_EQ(experimental::shl_avx(x, 63), x << 63); + EXPECT_EQ(experimental::shl_avx(x, 64), x << 64); + EXPECT_EQ(experimental::shl_avx(x, 65), x << 65); + EXPECT_EQ(experimental::shl_avx(x, 255), x << 255); +}