forked from wbhart/mpir
-
Notifications
You must be signed in to change notification settings - Fork 36
/
longlong_post.h
321 lines (284 loc) · 10.3 KB
/
longlong_post.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
Copyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
2004, 2005 Free Software Foundation, Inc.
Copyright 2013 William Hart
This file is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this file; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/* You have to define the following before including this file:
UWtype -- An unsigned type, default type for operations (typically a "word")
UHWtype -- An unsigned type, at least half the size of UWtype.
UDWtype -- An unsigned type, at least twice as large a UWtype
W_TYPE_SIZE -- size in bits of UWtype
SItype, USItype -- Signed and unsigned 32 bit types.
DItype, UDItype -- Signed and unsigned 64 bit types.
On a 32 bit machine UWtype should typically be USItype;
on a 64 bit machine, UWtype should typically be UDItype.
*/
/* Use mpn_umul_ppmm or mpn_udiv_qrnnd functions, if they exist. The "_r"
forms have "reversed" arguments, meaning the pointer is last, which
sometimes allows better parameter passing, in particular on 64-bit
hppa. */
#define mpn_umul_ppmm __MPN(umul_ppmm)
extern UWtype mpn_umul_ppmm(UWtype *, UWtype, UWtype);
#if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm \
&& ! defined (LONGLONG_STANDALONE)
#define umul_ppmm(wh, wl, u, v) \
do { \
UWtype __umul_ppmm__p0; \
(wh) = mpn_umul_ppmm (&__umul_ppmm__p0, (UWtype) (u), (UWtype) (v)); \
(wl) = __umul_ppmm__p0; \
} while (0)
#endif
#define mpn_umul_ppmm_r __MPN(umul_ppmm_r)
extern UWtype mpn_umul_ppmm_r(UWtype, UWtype, UWtype *);
#if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm_r \
&& ! defined (LONGLONG_STANDALONE)
#define umul_ppmm(wh, wl, u, v) \
do { \
UWtype __umul_ppmm__p0; \
(wh) = mpn_umul_ppmm_r ((UWtype) (u), (UWtype) (v), &__umul_ppmm__p0); \
(wl) = __umul_ppmm__p0; \
} while (0)
#endif
#define mpn_udiv_qrnnd __MPN(udiv_qrnnd)
extern UWtype mpn_udiv_qrnnd(UWtype *, UWtype, UWtype, UWtype);
#if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd \
&& ! defined (LONGLONG_STANDALONE)
#define udiv_qrnnd(q, r, n1, n0, d) \
do { \
UWtype __udiv_qrnnd__r; \
(q) = mpn_udiv_qrnnd (&__udiv_qrnnd__r, \
(UWtype) (n1), (UWtype) (n0), (UWtype) d); \
(r) = __udiv_qrnnd__r; \
} while (0)
#endif
#define mpn_udiv_qrnnd_r __MPN(udiv_qrnnd_r)
extern UWtype mpn_udiv_qrnnd_r(UWtype, UWtype, UWtype, UWtype *);
#if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd_r \
&& ! defined (LONGLONG_STANDALONE)
#define udiv_qrnnd(q, r, n1, n0, d) \
do { \
UWtype __udiv_qrnnd__r; \
(q) = mpn_udiv_qrnnd_r ((UWtype) (n1), (UWtype) (n0), (UWtype) d, \
&__udiv_qrnnd__r); \
(r) = __udiv_qrnnd__r; \
} while (0)
#endif
/* If this machine has no inline assembler, use C macros. */
#if !defined (add_ssaaaa)
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
do { \
UWtype __x; \
__x = (al) + (bl); \
(sh) = (ah) + (bh) + (__x < (al)); \
(sl) = __x; \
} while (0)
#endif
#if !defined (sub_ddmmss)
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
do { \
UWtype __x; \
__x = (al) - (bl); \
(sh) = (ah) - (bh) - ((al) < (bl)); \
(sl) = __x; \
} while (0)
#endif
#if !defined (add_333)
#define add_333(sh, sm, sl, ah, am, al, bh, bm, bl) \
do { \
UWtype __cy1, __cy2; \
__cy1 = ((al) + (bl) < (al)); \
(sl) = (al) + (bl); \
__cy2 = ((am) + (bm) < (am)); \
(sm) = (am) + (bm); \
__cy2 += ((sm) + __cy1 < (sm)); \
(sm) = (sm) + __cy1; \
(sh) = (ah) + (bh) + __cy2; \
} while (0)
#endif
#if !defined(sub_333)
#define sub_333(sh, sm, sl, ah, am, al, bh, bm, bl) \
do { \
UWtype __cy1, __cy2; \
__cy1 = ((al) < (bl)); \
(sl) = (al) - (bl); \
__cy2 = ((am) < (bm)); \
(sm) = (am) - (bm); \
__cy2 += ((sm) < __cy1); \
(sm) = (sm) - __cy1; \
(sh) = (ah) - (bh) - __cy2; \
} while (0)
#endif
/* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
smul_ppmm. */
#if !defined (umul_ppmm) && defined (smul_ppmm)
#define umul_ppmm(w1, w0, u, v) \
do { \
UWtype __w1; \
UWtype __xm0 = (u), __xm1 = (v); \
smul_ppmm (__w1, w0, __xm0, __xm1); \
(w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \
+ (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \
} while (0)
#endif
/* If we still don't have umul_ppmm, define it using plain C.
For reference, when this code is used for squaring (ie. u and v identical
expressions), gcc recognises __x1 and __x2 are the same and generates 3
multiplies, not 4. The subsequent additions could be optimized a bit,
but the only place GMP currently uses such a square is mpn_sqr_basecase,
and chips obliged to use this generic C umul will have plenty of worse
performance problems than a couple of extra instructions on the diagonal
of sqr_basecase. */
#if !defined (umul_ppmm)
#define umul_ppmm(w1, w0, u, v) \
do { \
UWtype __x0, __x1, __x2, __x3; \
UHWtype __ul, __vl, __uh, __vh; \
UWtype __u = (u), __v = (v); \
\
__ul = __ll_lowpart (__u); \
__uh = __ll_highpart (__u); \
__vl = __ll_lowpart (__v); \
__vh = __ll_highpart (__v); \
\
__x0 = (UWtype) __ul * __vl; \
__x1 = (UWtype) __ul * __vh; \
__x2 = (UWtype) __uh * __vl; \
__x3 = (UWtype) __uh * __vh; \
\
__x1 += __ll_highpart (__x0);/* this can't give carry */ \
__x1 += __x2; /* but this indeed can */ \
if (__x1 < __x2) /* did we get it? */ \
__x3 += __ll_B; /* yes, add it in the proper pos. */ \
\
(w1) = __x3 + __ll_highpart (__x1); \
(w0) = (__x1 << W_TYPE_SIZE/2) + __ll_lowpart (__x0); \
} while (0)
#endif
/* If we don't have smul_ppmm, define it using umul_ppmm (which surely will
exist in one form or another. */
#if !defined (smul_ppmm)
#define smul_ppmm(w1, w0, u, v) \
do { \
UWtype __w1; \
UWtype __xm0 = (u), __xm1 = (v); \
umul_ppmm (__w1, w0, __xm0, __xm1); \
(w1) = __w1 - (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \
- (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \
} while (0)
#endif
/* Define this unconditionally, so it can be used for debugging. */
#define __udiv_qrnnd_c(q, r, n1, n0, d) \
do { \
UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \
\
ASSERT ((d) != 0); \
ASSERT ((n1) < (d)); \
\
__d1 = __ll_highpart (d); \
__d0 = __ll_lowpart (d); \
\
__q1 = (n1) / __d1; \
__r1 = (n1) - __q1 * __d1; \
__m = __q1 * __d0; \
__r1 = __r1 * __ll_B | __ll_highpart (n0); \
if (__r1 < __m) \
{ \
__q1--, __r1 += (d); \
if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
if (__r1 < __m) \
__q1--, __r1 += (d); \
} \
__r1 -= __m; \
\
__q0 = __r1 / __d1; \
__r0 = __r1 - __q0 * __d1; \
__m = __q0 * __d0; \
__r0 = __r0 * __ll_B | __ll_lowpart (n0); \
if (__r0 < __m) \
{ \
__q0--, __r0 += (d); \
if (__r0 >= (d)) \
if (__r0 < __m) \
__q0--, __r0 += (d); \
} \
__r0 -= __m; \
\
(q) = __q1 * __ll_B | __q0; \
(r) = __r0; \
} while (0)
/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
__udiv_w_sdiv (defined in libgcc or elsewhere). */
#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
#define udiv_qrnnd(q, r, nh, nl, d) \
do { \
UWtype __r; \
(q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \
(r) = __r; \
} while (0)
#endif
/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
#if !defined (udiv_qrnnd)
#define UDIV_NEEDS_NORMALIZATION 1
#define udiv_qrnnd __udiv_qrnnd_c
#endif
#if !defined (count_leading_zeros)
#define count_leading_zeros(count, x) \
do { \
UWtype __xr = (x); \
UWtype __a; \
\
if (W_TYPE_SIZE == 32) \
{ \
__a = __xr < ((UWtype) 1 << 2*__BITS4) \
? (__xr < ((UWtype) 1 << __BITS4) ? 1 : __BITS4 + 1) \
: (__xr < ((UWtype) 1 << 3*__BITS4) ? 2*__BITS4 + 1 \
: 3*__BITS4 + 1); \
} \
else \
{ \
for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
if (((__xr >> __a) & 0xff) != 0) \
break; \
++__a; \
} \
\
(count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a]; \
} while (0)
/* This version gives a well-defined value for zero. */
#define COUNT_LEADING_ZEROS_0 (W_TYPE_SIZE - 1)
#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
#endif
#ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
extern const unsigned char __GMP_DECLSPEC __clz_tab[129];
#endif
#if !defined (count_trailing_zeros)
/* Define count_trailing_zeros using count_leading_zeros. The latter might be
defined in asm, but if it is not, the C version above is good enough. */
#define count_trailing_zeros(count, x) \
do { \
UWtype __ctz_x = (x); \
UWtype __ctz_c; \
ASSERT (__ctz_x != 0); \
count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \
(count) = W_TYPE_SIZE - 1 - __ctz_c; \
} while (0)
#endif
#ifndef UDIV_NEEDS_NORMALIZATION
#define UDIV_NEEDS_NORMALIZATION 0
#endif
/* Whether udiv_qrnnd is actually implemented with udiv_qrnnd_preinv, and
that hence the latter should always be used. */
#ifndef UDIV_PREINV_ALWAYS
#define UDIV_PREINV_ALWAYS 0
#endif