-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathintutil.hpp
149 lines (138 loc) · 3.66 KB
/
intutil.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#ifndef INTUTIL_HPP
#define INTUTIL_HPP
/* Copyright (c) 2019 by NEC Corporation
* This file is part of ve-jit */
#include "intutil.h"
#include "throw.hpp"
#include <cstdint>
#include <typeinfo> // for an error
//#include <type_traits> // for an error msg
/// \group integer arithmetic helpers
//@{ int arithmetic helpers
/// \group integer multiplicate inverse
/// For what M is A*M always 1? This exists if A and 2^32 (or 2^64) are relatively prime.
//@{
template<typename T> inline T mod_inverse( T const a ){
THROW("mod_inverse<T> not implemented for "<<typeid(T).name());
return T(0);
}
template<>
inline uint32_t mod_inverse<uint32_t>(uint32_t const a)
{
uint32_t u = 2-a;
uint32_t i = a-1;
i *= i; u *= i+1;
i *= i; u *= i+1;
i *= i; u *= i+1;
i *= i; u *= i+1;
return u;
}
template<> inline int32_t mod_inverse<int32_t>(int32_t const a)
{
return mod_inverse((uint32_t)a);
}
template<>
inline uint64_t mod_inverse<uint64_t>(uint64_t const a)
{
uint64_t u = 2-a;
uint64_t i = a-1;
i *= i; u *= i+1;
i *= i; u *= i+1;
i *= i; u *= i+1;
i *= i; u *= i+1;
i *= i; u *= i+1; // one extra?
return u;
}
template<> inline int64_t mod_inverse<int64_t>(int64_t const a)
{
return mod_inverse((uint64_t)a);
}
//@}
/// \group basic integer ops
/// Do we even want C versions? Maybe C frontends should
/// just call into the C++ code that can use these.
//@{
/** is \c v some 2^N for N>0? */
inline bool constexpr positivePow2(uint64_t v) {
return ((v & (v-1)) == 0);
}
/** A portable count-ones routine */
template<typename T>
inline int popcount(T const n)
{
uint8_t *ptr = (uint8_t *)&n;
int count = 0;
for (unsigned i=0; i<sizeof(T); ++i) {
count += bitcount[ptr[i]];
}
return count;
}
/** greatest common denominator, for a,b > 0 */
template<typename T>
inline T gcd(T a, T b)
{
for (;;)
{
if (a == 0) return b;
b %= a;
if (b == 0) return a;
a %= b;
}
}
/** lowest common multiple, a,b > 0 */
template<typename T>
inline T lcm(T a, T b)
{
int temp = gcd(a, b);
return temp ? (a / temp * b) : 0;
}
/** For +ve inputs a, b solve k*a + j*b = g for k and j and g=gcd(a,b) */
template<typename T>
inline void extendedEuclid( T& k, T a, T& j, T b, T& g)
{
T x = 1, y = 0;
T xLast = 0, yLast = 1;
T q, r, m, n;
while (a != 0)
{
q = b / a;
r = b % a;
m = xLast - q * x;
n = yLast - q * y;
xLast = x;
yLast = y;
x = m;
y = n;
b = a;
a = r;
}
g = b;
k = xLast;
j = yLast;
}
//@}
//@} int arithmetic helpers
/** fast integer division for \em unsigned 21-bit A/D as A*magic(D)>>42.
* This uses \b only 64-bit arithmetic.
* The 21-bit limit comes from needing the hi bits of a 21 x 42-bit multiply */
struct FastDiv21{
static int const bits = 21;
static int const rshift = 42;
static int const safemax = uint64_t{1}<<21;
static uint32_t const mask21 = (1<<bits)-1;
static inline uint64_t constexpr magic(uint32_t d){
return mask21 / d + 1;
}
static inline uint32_t constexpr fastdiv(uint32_t const a, uint64_t const magic){
return a*magic>>rshift;
}
static inline uint32_t constexpr fastmod(uint32_t const a, uint64_t const magic, uint32_t const d){
return a - fastdiv(a,magic) * d;
}
constexpr FastDiv21(uint32_t const d);
uint64_t const _m;
uint32_t const _d;
};
inline constexpr FastDiv21::FastDiv21(uint32_t const d) : _m(magic(d)), _d(d) {}
// vim: ts=4 sw=4 et cindent cino=^=l0,\:.5s,=-.5s,N-s,g.5s,b1 cinkeys=0{,0},0),\:,0#,!^F,o,O,e,0=break
#endif // INTUTIL_HPP