Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimised shift left and shift right experimental implementations. #248

Open
wants to merge 20 commits into
base: master
Choose a base branch
from
Open
57 changes: 42 additions & 15 deletions test/benchmarks/benchmarks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// Licensed under the Apache License, Version 2.0.

#include "../experimental/addmod.hpp"
#include "../experimental/shift.hpp"
#include <benchmark/benchmark.h>
#include <intx/intx.hpp>
#include <test/utils/gmp.hpp>
Expand Down Expand Up @@ -198,8 +199,7 @@ BENCHMARK_TEMPLATE(binop, uint512, uint512, public_mul);
BENCHMARK_TEMPLATE(binop, uint512, uint512, gmp::mul);

template <unsigned N>
[[gnu::noinline]] static intx::uint<N> shl_public(
const intx::uint<N>& x, const uint64_t& y) noexcept
[[gnu::noinline]] static intx::uint<N> shl_public(const intx::uint<N>& x, uint64_t y) noexcept
{
return x << y;
}
Expand All @@ -211,35 +211,52 @@ template <unsigned N>
return x << y;
}

[[gnu::noinline]] static intx::uint256 shl_halves(
const intx::uint256& x, const uint64_t& shift) noexcept
template <unsigned N>
[[gnu::noinline]] static intx::uint<N> shl_generic(const intx::uint<N>& x, uint64_t y) noexcept
{
return intx::operator<<<N>(x, y);
}

template <unsigned N>
[[gnu::noinline]] static intx::uint<N> shl_generic(
const intx::uint<N>& x, const intx::uint<N>& y) noexcept
{
return intx::operator<<<N>(x, y);
}

[[gnu::noinline]] static intx::uint256 shl_halves(const intx::uint256& x, uint64_t shift) noexcept
{
constexpr auto num_bits = 256;
constexpr auto half_bits = num_bits / 2;

const auto xlo = uint128{x[0], x[1]};
const auto xlo_ = uint128{x[0], x[1]};
const auto xlo = static_cast<builtin_uint128>(xlo_);

if (shift < half_bits)
{
const auto lo = xlo << shift;
const auto lo_ = xlo << shift;
const auto lo = uint128{lo_};

const auto xhi = uint128{x[2], x[3]};
const auto xhi_ = uint128{x[2], x[3]};
const auto xhi = static_cast<builtin_uint128>(xhi_);

// Find the part moved from lo to hi.
// The shift right here can be invalid:
// for shift == 0 => lshift == half_bits.
// Split it into 2 valid shifts by (rshift - 1) and 1.
const auto rshift = half_bits - shift;
const auto lo_overflow = (xlo >> (rshift - 1)) >> 1;
const auto hi = (xhi << shift) | lo_overflow;
const auto hi_ = (xhi << shift) | lo_overflow;
const auto hi = uint128{hi_};
return {lo[0], lo[1], hi[0], hi[1]};
}

// This check is only needed if we want "defined" behavior for shifts
// larger than size of the Int.
if (shift < num_bits)
{
const auto hi = xlo << (shift - half_bits);
const auto hi_ = xlo << (shift - half_bits);
const auto hi = uint128{hi_};
return {0, 0, hi[0], hi[1]};
}

Expand Down Expand Up @@ -288,7 +305,7 @@ template <unsigned N>
}

#if INTX_HAS_EXTINT
[[gnu::noinline]] static intx::uint256 shl_llvm(const intx::uint256& x, const uint64_t& y) noexcept
[[gnu::noinline]] static intx::uint256 shl_llvm(const intx::uint256& x, uint64_t y) noexcept
{
unsigned _ExtInt(256) a;
std::memcpy(&a, &x, sizeof(a));
Expand All @@ -300,7 +317,7 @@ template <unsigned N>
#endif


template <typename ArgT, typename ShiftT, ArgT ShiftFn(const ArgT&, const ShiftT&)>
template <typename ArgT, typename ShiftT, ArgT ShiftFn(const ArgT&, ShiftT)>
static void shift(benchmark::State& state)
{
const auto& shift_samples_id = [&state]() noexcept {
Expand All @@ -324,7 +341,7 @@ static void shift(benchmark::State& state)

const auto& xs = test::get_samples<ArgT>(sizeof(ArgT) == sizeof(uint256) ? x_256 : x_512);
const auto& raw_shifts = test::get_samples<uint64_t>(shift_samples_id);
std::array<ShiftT, test::num_samples> shifts;
std::array<std::decay_t<ShiftT>, test::num_samples> shifts;
std::copy(std::cbegin(raw_shifts), std::cend(raw_shifts), std::begin(shifts));

while (state.KeepRunningBatch(xs.size()))
Expand All @@ -336,14 +353,24 @@ static void shift(benchmark::State& state)
}
}
}
BENCHMARK_TEMPLATE(shift, uint256, uint256, shl_public)->DenseRange(-1, 3);
BENCHMARK_TEMPLATE(shift, uint256, uint256, shl_halves)->DenseRange(-1, 3);
BENCHMARK_TEMPLATE(shift, uint256, const uint256&, shl_public)->DenseRange(-1, 3);
BENCHMARK_TEMPLATE(shift, uint256, const uint256&, shl_generic)->DenseRange(-1, 3);
BENCHMARK_TEMPLATE(shift, uint256, const uint256&, shl_halves)->DenseRange(-1, 3);
BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_public)->DenseRange(-1, 3);
BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_generic)->DenseRange(-1, 3);
BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_halves)->DenseRange(-1, 3);
BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_c)->DenseRange(-1, 3);
BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_e)->DenseRange(-1, 3);
BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_w)->DenseRange(-1, 3);
BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_avx)->DenseRange(-1, 3);
BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_bits_1)->DenseRange(-1, 3);
BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_bits_2)->DenseRange(-1, 3);
BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_bits_3)->DenseRange(-1, 3);
BENCHMARK_TEMPLATE(shift, uint256, uint64_t, experimental::shl_bits_4)->DenseRange(-1, 3);
#if INTX_HAS_EXTINT
BENCHMARK_TEMPLATE(shift, uint256, uint64_t, shl_llvm)->DenseRange(-1, 3);
#endif
BENCHMARK_TEMPLATE(shift, uint512, uint512, shl_public)->DenseRange(-1, 3);
BENCHMARK_TEMPLATE(shift, uint512, const uint512&, shl_public)->DenseRange(-1, 3);
BENCHMARK_TEMPLATE(shift, uint512, uint64_t, shl_public)->DenseRange(-1, 3);

[[gnu::noinline]] static bool lt_public(const uint256& x, const uint256& y) noexcept
Expand Down
4 changes: 2 additions & 2 deletions test/experimental/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# intx: extended precision integer library.
# Copyright 2019-2020 Pawel Bylica.
# Copyright 2019 Pawel Bylica.
# Licensed under the Apache License, Version 2.0.

add_library(experimental STATIC add.cpp add.hpp)
add_library(experimental STATIC add.cpp add.hpp shift.hpp)
target_compile_definitions(experimental PRIVATE INTX_EXPERIMENTAL)
target_include_directories(experimental PUBLIC ${PROJECT_SOURCE_DIR}/test)
target_link_libraries(experimental PUBLIC intx::intx)
Expand Down
Loading