diff --git a/AUTHORS b/AUTHORS
index 6ef3b793e2..297c0f9f44 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -455,7 +455,7 @@ Mathieu Gouttenoire
Primality testing for Gaussian integers.
- github math-gout
+ github math-gout
Michael Abshoff
diff --git a/doc/source/fmpz_vec.rst b/doc/source/fmpz_vec.rst
index 23957dcc3f..1c7f3d8c78 100644
--- a/doc/source/fmpz_vec.rst
+++ b/doc/source/fmpz_vec.rst
@@ -408,13 +408,22 @@ Gaussian content
Dot product
--------------------------------------------------------------------------------
+.. function:: void _fmpz_vec_dot_general_naive(fmpz_t res, const fmpz_t initial, int subtract, const fmpz * a, const fmpz * b, int reverse, slong len)
+ void _fmpz_vec_dot_general(fmpz_t res, const fmpz_t initial, int subtract, const fmpz * a, const fmpz * b, int reverse, slong len)
+
+ Computes the dot product of the vectors *a* and *b*, setting
+ *res* to `s + (-1)^{subtract} \sum_{i=0}^{len-1} a_i b_i`.
+ The initial term *s* is optional and can be
+ omitted by passing *NULL* (equivalently, `s = 0`).
+ The parameter *subtract* must be 0 or 1.
+ If the *reverse* flag is 1, the second vector is reversed.
+
+ Aliasing is allowed between ``res`` and ``initial`` but not
+ between ``res`` and the entries of ``a`` and ``b``.
+
+ The *naive* version is used for testing purposes.
.. function:: void _fmpz_vec_dot(fmpz_t res, const fmpz * vec1, const fmpz * vec2, slong len2)
Sets ``res`` to the dot product of ``(vec1, len2)`` and
``(vec2, len2)``.
-
-.. function:: void _fmpz_vec_dot_ptr(fmpz_t res, const fmpz * vec1, fmpz ** const vec2, slong offset, slong len)
-
- Sets ``res`` to the dot product of ``len`` values at ``vec1`` and the
- ``len`` values ``vec2[i] + offset`` for `0 \leq i < len`.
diff --git a/src/arith/stirling1.c b/src/arith/stirling1.c
index c389724fda..4d9058bbca 100644
--- a/src/arith/stirling1.c
+++ b/src/arith/stirling1.c
@@ -16,18 +16,15 @@
#include "arith.h"
/* compute single coefficient in polynomial product */
-static void
+FLINT_FORCE_INLINE void
_fmpz_poly_mulmid_single(fmpz_t res, const fmpz * poly1, slong len1, const fmpz * poly2, slong len2, slong i)
{
- slong j, top1, top2;
+ slong top1, top2;
top1 = FLINT_MIN(len1 - 1, i);
top2 = FLINT_MIN(len2 - 1, i);
- fmpz_mul(res, poly1 + i - top2, poly2 + top2);
-
- for (j = 1; j < top1 + top2 - i + 1; j++)
- fmpz_addmul(res, poly1 + i - top2 + j, poly2 + top2 - j);
+ _fmpz_vec_dot_general(res, NULL, 0, poly1 + i - top2, poly2 + i - top1, 1, top1 + top2 - i + 1);
}
#define MAX_BASECASE 16
diff --git a/src/fmpq_poly/exp_series.c b/src/fmpq_poly/exp_series.c
index 88f8b3d142..6d4b62a85a 100644
--- a/src/fmpq_poly/exp_series.c
+++ b/src/fmpq_poly/exp_series.c
@@ -40,7 +40,7 @@ _fmpq_poly_exp_series_basecase_deriv(fmpz * B, fmpz_t Bden,
const fmpz * Aprime, const fmpz_t Aden, slong Alen, slong n)
{
fmpz_t t, u;
- slong j, k;
+ slong k;
Alen = FLINT_MIN(Alen, n);
@@ -55,11 +55,8 @@ _fmpq_poly_exp_series_basecase_deriv(fmpz * B, fmpz_t Bden,
for (k = 1; k < n; k++)
{
- fmpz_mul(t, Aprime, B + k - 1);
-
- for (j = 2; j < FLINT_MIN(Alen, k + 1); j++)
- fmpz_addmul(t, Aprime + j - 1, B + k - j);
-
+ slong l = FLINT_MIN(Alen - 1, k);
+ _fmpz_vec_dot_general(t, NULL, 0, Aprime, B + k - l, 1, l);
fmpz_mul_ui(u, Aden, k);
fmpz_divexact(B + k, t, u);
}
diff --git a/src/fmpq_poly/power_sums.c b/src/fmpq_poly/power_sums.c
index 2eac351e65..d808656b76 100644
--- a/src/fmpq_poly/power_sums.c
+++ b/src/fmpq_poly/power_sums.c
@@ -9,6 +9,7 @@
(at your option) any later version. See .
*/
+#include "fmpz_vec.h"
#include "fmpz_poly.h"
#include "fmpq.h"
#include "fmpq_poly.h"
@@ -61,29 +62,20 @@ _fmpq_poly_power_sums(fmpz * res, fmpz_t rden, const fmpz * poly, slong len,
for (k = 1; k < FLINT_MIN(n, len); k++)
{
- fmpz_mul_ui(res + k, poly + len - 1 - k, k);
+ fmpz_mul_si(res + k, poly + len - 1 - k, -k);
fmpz_mul(res + k, res + k, rden);
-
- for (i = 1; i < k - 1; i++)
- fmpz_mul(res + i, res + i, poly + len - 1);
- for (i = 1; i < k; i++)
- fmpz_addmul(res + k, poly + len - 1 - k + i, res + i);
- fmpz_neg(res + k, res + k);
+ _fmpz_vec_scalar_mul_fmpz(res + 1, res + 1, k - 2, poly + len - 1);
+ _fmpz_vec_dot_general(res + k, res + k, 1, poly + len - 1 - k + 1, res + 1, 0, k - 1);
fmpz_mul(rden, rden, poly + len - 1);
}
for (k = len; k < n; k++)
{
- fmpz_zero(res + k);
- for (i = k - len + 1; i < k - 1; i++)
- fmpz_mul(res + i, res + i, poly + len - 1);
- for (i = k - len + 1; i < k; i++)
- fmpz_addmul(res + k, poly + len - 1 - k + i, res + i);
- fmpz_neg(res + k, res + k);
+ _fmpz_vec_scalar_mul_fmpz(res + k - len + 1, res + k - len + 1, len - 2, poly + len - 1);
+ _fmpz_vec_dot_general(res + k, NULL, 1, poly, res + k - len + 1, 0, len - 1);
}
- for (i = n - len + 1; i < n - 1; i++)
- fmpz_mul(res + i, res + i, poly + len - 1);
+ _fmpz_vec_scalar_mul_fmpz(res + n - len + 1, res + n - len + 1, len - 2, poly + len - 1);
fmpz_one(rden);
for (i = n - len; i > 0; i--)
diff --git a/src/fmpq_poly/power_sums_to_poly.c b/src/fmpq_poly/power_sums_to_poly.c
index dbea185cd9..b9f82a0dda 100644
--- a/src/fmpq_poly/power_sums_to_poly.c
+++ b/src/fmpq_poly/power_sums_to_poly.c
@@ -11,6 +11,7 @@
#include "ulong_extras.h"
#include "fmpz.h"
+#include "fmpz_vec.h"
#include "fmpz_poly.h"
#include "fmpq_poly.h"
@@ -30,12 +31,16 @@ _fmpq_poly_power_sums_to_poly(fmpz * res, const fmpz * poly, const fmpz_t den,
fmpz_one(f);
for (k = 1; k <= d; k++)
{
- if(k < len)
+ if (k < len)
+ {
fmpz_mul(res + d - k, poly + k, f);
+ _fmpz_vec_dot_general(res + d - k, res + d - k, 0, res + d - k + 1, poly + 1, 0, k - 1);
+
+ }
else
- fmpz_zero(res + d - k);
- for (i = 1; i < FLINT_MIN(k, len); i++)
- fmpz_addmul(res + d - k, res + d - k + i, poly + i);
+ {
+ _fmpz_vec_dot_general(res + d - k, NULL, 0, res + d - k + 1, poly + 1, 0, len - 1);
+ }
a = n_gcd(FLINT_ABS(fmpz_fdiv_ui(res + d - k, k)), k);
fmpz_divexact_ui(res + d - k, res + d - k, a);
diff --git a/src/fmpq_poly/revert_series_lagrange_fast.c b/src/fmpq_poly/revert_series_lagrange_fast.c
index 5117f1cef7..d78af92bea 100644
--- a/src/fmpq_poly/revert_series_lagrange_fast.c
+++ b/src/fmpq_poly/revert_series_lagrange_fast.c
@@ -44,9 +44,9 @@ void
_fmpq_poly_revert_series_lagrange_fast(fmpz * Qinv, fmpz_t den,
const fmpz * Q, const fmpz_t Qden, slong Qlen, slong n)
{
- slong i, j, k, m;
+ slong i, j, m;
fmpz *R, *Rden, *S, *T, *dens, *tmp;
- fmpz_t Sden, Tden, t;
+ fmpz_t Sden, Tden;
if (Qlen <= 2)
{
@@ -65,7 +65,6 @@ _fmpq_poly_revert_series_lagrange_fast(fmpz * Qinv, fmpz_t den,
m = n_sqrt(n);
- fmpz_init(t);
dens = _fmpz_vec_init(n);
R = _fmpz_vec_init((n - 1) * m);
S = _fmpz_vec_init(n - 1);
@@ -103,12 +102,7 @@ _fmpq_poly_revert_series_lagrange_fast(fmpz * Qinv, fmpz_t den,
for (j = 1; j < m && i + j < n; j++)
{
- fmpz_mul(t, S + 0, Ri(j) + i + j - 1);
-
- for (k = 1; k <= i + j - 1; k++)
- fmpz_addmul(t, S + k, Ri(j) + i + j - 1 - k);
-
- fmpz_set(Qinv + i + j, t);
+ _fmpz_vec_dot_general(Qinv + i + j, NULL, 0, S, Ri(j), 1, i + j);
fmpz_mul(dens + i + j, Sden, Rdeni(j));
fmpz_mul_ui(dens + i + j, dens + i + j, i + j);
}
@@ -126,7 +120,6 @@ _fmpq_poly_revert_series_lagrange_fast(fmpz * Qinv, fmpz_t den,
_set_vec(Qinv, den, Qinv, dens, n);
_fmpq_poly_canonicalise(Qinv, den, n);
- fmpz_clear(t);
_fmpz_vec_clear(dens, n);
_fmpz_vec_clear(R, (n - 1) * m);
_fmpz_vec_clear(S, n - 1);
diff --git a/src/fmpq_poly/sin_cos_series.c b/src/fmpq_poly/sin_cos_series.c
index ceace1fec0..796783eee2 100644
--- a/src/fmpq_poly/sin_cos_series.c
+++ b/src/fmpq_poly/sin_cos_series.c
@@ -61,6 +61,7 @@ _fmpq_poly_sin_cos_series_basecase_can(fmpz * S, fmpz_t Sden,
fmpz_zero(t);
fmpz_zero(u);
+ /* todo: precompute A[j] * j, use dot products */
for (j = 1; j < FLINT_MIN(Alen, k + 1); j++)
{
fmpz_mul_ui(v, A + j, j);
diff --git a/src/fmpz_mat.h b/src/fmpz_mat.h
index e379e74c55..f7009d2c92 100644
--- a/src/fmpz_mat.h
+++ b/src/fmpz_mat.h
@@ -174,8 +174,7 @@ void fmpz_mat_mul_classical(fmpz_mat_t C, const fmpz_mat_t A,
void fmpz_mat_mul_strassen(fmpz_mat_t C, const fmpz_mat_t A, const fmpz_mat_t B);
-void fmpz_mat_mul_classical_inline(fmpz_mat_t C, const fmpz_mat_t A,
- const fmpz_mat_t B);
+#define fmpz_mat_mul_classical_inline _Pragma("GCC error \"'fmpz_mat_mul_classical_inline' is deprecated. Use 'fmpz_mat_mul_classical' instead.\"")
void _fmpz_mat_mul_fft(fmpz_mat_t C,
const fmpz_mat_t A, slong abits,
diff --git a/src/fmpz_mat/charpoly.c b/src/fmpz_mat/charpoly.c
index 66c3664b4e..c43d5763bc 100644
--- a/src/fmpz_mat/charpoly.c
+++ b/src/fmpz_mat/charpoly.c
@@ -17,6 +17,8 @@
#include "fmpz_vec.h"
#include "fmpz_mat.h"
#include "fmpz_poly.h"
+#include "gr.h"
+#include "gr_mat.h"
/*
Assumes that \code{mat} is an $n \times n$ matrix and sets \code{(cp,n+1)}
@@ -27,78 +29,9 @@
void _fmpz_mat_charpoly_berkowitz(fmpz *cp, const fmpz_mat_t mat)
{
- const slong n = mat->r;
-
- if (n == 0)
- {
- fmpz_one(cp);
- }
- else if (n == 1)
- {
- fmpz_neg(cp + 0, fmpz_mat_entry(mat, 0, 0));
- fmpz_one(cp + 1);
- }
- else
- {
- slong i, j, k, t;
- fmpz *a, *A, *s;
-
- a = _fmpz_vec_init(n * n);
- A = a + (n - 1) * n;
-
- _fmpz_vec_zero(cp, n + 1);
- fmpz_neg(cp + 0, fmpz_mat_entry(mat, 0, 0));
-
- for (t = 1; t < n; t++)
- {
- for (i = 0; i <= t; i++)
- {
- fmpz_set(a + 0 * n + i, fmpz_mat_entry(mat, i, t));
- }
-
- fmpz_set(A + 0, fmpz_mat_entry(mat, t, t));
-
- for (k = 1; k < t; k++)
- {
- for (i = 0; i <= t; i++)
- {
- s = a + k * n + i;
- fmpz_zero(s);
- for (j = 0; j <= t; j++)
- {
- fmpz_addmul(s, fmpz_mat_entry(mat, i, j), a + (k - 1) * n + j);
- }
- }
- fmpz_set(A + k, a + k * n + t);
- }
-
- fmpz_zero(A + t);
- for (j = 0; j <= t; j++)
- {
- fmpz_addmul(A + t, fmpz_mat_entry(mat, t, j), a + (t - 1) * n + j);
- }
-
- for (k = 0; k <= t; k++)
- {
- for (j = 0; j < k; j++)
- {
- fmpz_submul(cp + k, A + j, cp + (k - j - 1));
- }
- fmpz_sub(cp + k, cp + k, A + k);
- }
- }
-
- /* Shift all coefficients up by one */
- for (i = n; i > 0; i--)
- {
- fmpz_swap(cp + i, cp + (i - 1));
- }
- fmpz_one(cp + 0);
-
- _fmpz_poly_reverse(cp, cp, n + 1, n + 1);
-
- _fmpz_vec_clear(a, n * n);
- }
+ gr_ctx_t ctx;
+ gr_ctx_init_fmpz(ctx);
+ GR_MUST_SUCCEED(_gr_mat_charpoly_berkowitz(cp, (const gr_mat_struct *) mat, ctx));
}
void fmpz_mat_charpoly_berkowitz(fmpz_poly_t cp, const fmpz_mat_t mat)
diff --git a/src/fmpz_mat/mul.c b/src/fmpz_mat/mul.c
index a2cf467ac7..bbdfa2afb5 100644
--- a/src/fmpz_mat/mul.c
+++ b/src/fmpz_mat/mul.c
@@ -279,6 +279,6 @@ fmpz_mat_mul(fmpz_mat_t C, const fmpz_mat_t A, const fmpz_mat_t B)
else if (abits >= 500 && bbits >= 500 && dim >= 8) /* tuning param */
fmpz_mat_mul_strassen(C, A, B);
else
- fmpz_mat_mul_classical_inline(C, A, B);
+ fmpz_mat_mul_classical(C, A, B);
}
}
diff --git a/src/fmpz_mat/mul_classical.c b/src/fmpz_mat/mul_classical.c
index 299ac130d3..af6572091a 100644
--- a/src/fmpz_mat/mul_classical.c
+++ b/src/fmpz_mat/mul_classical.c
@@ -1,5 +1,5 @@
/*
- Copyright (C) 2010,2011 Fredrik Johansson
+ Copyright (C) 2010, 2011, 2024 Fredrik Johansson
This file is part of FLINT.
@@ -10,38 +10,71 @@
*/
#include "fmpz.h"
+#include "fmpz_vec.h"
#include "fmpz_mat.h"
void
fmpz_mat_mul_classical(fmpz_mat_t C, const fmpz_mat_t A, const fmpz_mat_t B)
{
- slong ar, bc, br;
- slong i, j, k;
+ slong ar, br, bc, i, j;
- ar = A->r;
- br = B->r;
- bc = B->c;
+ ar = fmpz_mat_nrows(A);
+ br = fmpz_mat_nrows(B);
+ bc = fmpz_mat_ncols(B);
- if (br == 0)
+ if (ar == 0 || br == 0 || bc == 0)
{
fmpz_mat_zero(C);
return;
}
- for (i = 0; i < ar; i++)
+ if (br == 1)
{
- for (j = 0; j < bc; j++)
+ for (i = 0; i < ar; i++)
{
- fmpz_mul(fmpz_mat_entry(C, i, j),
- fmpz_mat_entry(A, i, 0),
- fmpz_mat_entry(B, 0, j));
+ for (j = 0; j < bc; j++)
+ {
+ fmpz_mul(fmpz_mat_entry(C, i, j),
+ fmpz_mat_entry(A, i, 0),
+ fmpz_mat_entry(B, 0, j));
+ }
+ }
+ }
+ else if (br == 2)
+ {
+ for (i = 0; i < ar; i++)
+ {
+ for (j = 0; j < bc; j++)
+ {
+ fmpz_fmma(fmpz_mat_entry(C, i, j),
+ fmpz_mat_entry(A, i, 0),
+ fmpz_mat_entry(B, 0, j),
+ fmpz_mat_entry(A, i, 1),
+ fmpz_mat_entry(B, 1, j));
+ }
+ }
+ }
+ else
+ {
+ fmpz * tmp;
+ TMP_INIT;
+
+ TMP_START;
+ tmp = TMP_ALLOC(sizeof(fmpz) * br * bc);
- for (k = 1; k < br; k++)
+ for (i = 0; i < br; i++)
+ for (j = 0; j < bc; j++)
+ tmp[j * br + i] = *fmpz_mat_entry(B, i, j);
+
+ for (i = 0; i < ar; i++)
+ {
+ for (j = 0; j < bc; j++)
{
- fmpz_addmul(fmpz_mat_entry(C, i, j),
- fmpz_mat_entry(A, i, k),
- fmpz_mat_entry(B, k, j));
+ _fmpz_vec_dot_general(fmpz_mat_entry(C, i, j),
+ NULL, 0, fmpz_mat_entry(A, i, 0), tmp + j * br, 0, br);
}
}
+
+ TMP_END;
}
}
diff --git a/src/fmpz_mat/mul_classical_inline.c b/src/fmpz_mat/mul_classical_inline.c
deleted file mode 100644
index 1626b54d8a..0000000000
--- a/src/fmpz_mat/mul_classical_inline.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- Copyright (C) 2011 Fredrik Johansson
-
- This file is part of FLINT.
-
- FLINT is free software: you can redistribute it and/or modify it under
- the terms of the GNU Lesser General Public License (LGPL) as published
- by the Free Software Foundation; either version 2.1 of the License, or
- (at your option) any later version. See .
-*/
-
-#include "gmpcompat.h"
-#include "fmpz.h"
-#include "fmpz_mat.h"
-
-void
-fmpz_mat_mul_classical_inline(fmpz_mat_t C, const fmpz_mat_t A,
- const fmpz_mat_t B)
-{
- slong ar, bc, br;
- slong i, j, k;
-
- fmpz a, b;
- mpz_t t;
-
- mp_limb_t au, bu;
- mp_limb_t pos[3];
- mp_limb_t neg[3];
-
- ar = A->r;
- br = B->r;
- bc = B->c;
-
- mpz_init(t);
-
- for (i = 0; i < ar; i++)
- {
- for (j = 0; j < bc; j++)
- {
- flint_mpz_set_ui(t, UWORD(0));
-
- pos[2] = pos[1] = pos[0] = neg[2] = neg[1] = neg[0] = UWORD(0);
-
- for (k = 0; k < br; k++)
- {
- a = A->rows[i][k];
- b = B->rows[k][j];
-
- if (a == 0 || b == 0)
- continue;
-
- if (!COEFF_IS_MPZ(a)) /* a is small */
- {
- if (!COEFF_IS_MPZ(b)) /* both are small */
- {
- au = FLINT_ABS(a);
- bu = FLINT_ABS(b);
-
- umul_ppmm(au, bu, au, bu);
-
- if ((a ^ b) >= WORD(0))
- add_sssaaaaaa(pos[2], pos[1], pos[0],
- pos[2], pos[1], pos[0], 0, au, bu);
- else
- add_sssaaaaaa(neg[2], neg[1], neg[0],
- neg[2], neg[1], neg[0], 0, au, bu);
- }
- else
- {
- if (a >= 0)
- flint_mpz_addmul_ui(t, COEFF_TO_PTR(b), a);
- else
- flint_mpz_submul_ui(t, COEFF_TO_PTR(b), -a);
- }
- }
- else if (!COEFF_IS_MPZ(b)) /* b is small */
- {
- if (b >= 0)
- flint_mpz_addmul_ui(t, COEFF_TO_PTR(a), b);
- else
- flint_mpz_submul_ui(t, COEFF_TO_PTR(a), -b);
- }
- else
- {
- mpz_addmul(t, COEFF_TO_PTR(a), COEFF_TO_PTR(b));
- }
- }
-
- if (mpz_sgn(t) != 0 || pos[2] || neg[2] || pos[1] || neg[1])
- {
- __mpz_struct r;
-
- r._mp_size = pos[2] ? 3 : (pos[1] ? 2 : pos[0] != 0);
- r._mp_alloc = r._mp_size;
- r._mp_d = pos;
-
- mpz_add(t, t, &r);
-
- r._mp_size = neg[2] ? 3 : (neg[1] ? 2 : neg[0] != 0);
- r._mp_alloc = r._mp_size;
- r._mp_d = neg;
-
- mpz_sub(t, t, &r);
-
- fmpz_set_mpz(fmpz_mat_entry(C, i, j), t);
- }
- else
- {
- if (neg[0] > pos[0])
- fmpz_neg_ui(fmpz_mat_entry(C, i, j), neg[0] - pos[0]);
- else
- fmpz_set_ui(fmpz_mat_entry(C, i, j), pos[0] - neg[0]);
- }
- }
- }
-
- mpz_clear(t);
-}
diff --git a/src/fmpz_mat/profile/p-mul.c b/src/fmpz_mat/profile/p-mul.c
index 3c1240e302..e999601a37 100644
--- a/src/fmpz_mat/profile/p-mul.c
+++ b/src/fmpz_mat/profile/p-mul.c
@@ -50,9 +50,6 @@ void sample(void * arg, ulong count)
else if (algorithm == 1)
for (i = 0; i < count; i++)
fmpz_mat_mul_classical(C, A, B);
- else if (algorithm == 2)
- for (i = 0; i < count; i++)
- fmpz_mat_mul_classical_inline(C, A, B);
else if (algorithm == 3)
for (i = 0; i < count; i++)
fmpz_mat_mul_multi_mod(C, A, B);
@@ -71,7 +68,7 @@ void sample(void * arg, ulong count)
int main(void)
{
- double min_default, min_classical, min_inline, min_multi_mod, min_strassen, max;
+ double min_default, min_classical, min_multi_mod, min_strassen, max;
mat_mul_t params;
slong bits, dim;
@@ -93,28 +90,22 @@ int main(void)
params.algorithm = 1;
prof_repeat(&min_classical, &max, sample, ¶ms);
- params.algorithm = 2;
- prof_repeat(&min_inline, &max, sample, ¶ms);
-
params.algorithm = 3;
prof_repeat(&min_multi_mod, &max, sample, ¶ms);
params.algorithm = 4;
prof_repeat(&min_strassen, &max, sample, ¶ms);
- flint_printf("dim = %wd default/classical/inline/multi_mod/strassen %.2f %.2f %.2f %.2f %.2f (us)\n",
- dim, min_default, min_classical, min_inline, min_multi_mod, min_strassen);
+ flint_printf("dim = %wd default/classical/multi_mod/strassen %.2f %.2f %.2f %.2f (us)\n",
+ dim, min_default, min_classical, min_multi_mod, min_strassen);
if (min_multi_mod < 0.6*min_default)
flint_printf("BAD!\n");
- if (min_inline < 0.6*min_default)
- flint_printf("BAD!\n");
-
if (min_strassen < 0.7*min_default)
flint_printf("BAD!\n");
- if (min_multi_mod < 0.7*min_inline)
+ if (min_multi_mod < 0.7*min_classical)
break;
}
}
diff --git a/src/fmpz_mat/profile/p-mul_double_word.c b/src/fmpz_mat/profile/p-mul_double_word.c
index 2631b056af..38c11f354e 100644
--- a/src/fmpz_mat/profile/p-mul_double_word.c
+++ b/src/fmpz_mat/profile/p-mul_double_word.c
@@ -71,7 +71,7 @@ int main(void)
if (dim < 150)
{
- fmpz_mat_mul_classical_inline(D, A, B);
+ fmpz_mat_mul_classical(D, A, B);
if (!fmpz_mat_equal(D, E))
{
diff --git a/src/fmpz_mat/profile/p-mul_multi_mod.c b/src/fmpz_mat/profile/p-mul_multi_mod.c
index b6cf3167c9..135a4d1e4a 100644
--- a/src/fmpz_mat/profile/p-mul_multi_mod.c
+++ b/src/fmpz_mat/profile/p-mul_multi_mod.c
@@ -61,7 +61,7 @@ int main(void)
if (dim < 150)
{
- fmpz_mat_mul_classical_inline(D, A, B);
+ fmpz_mat_mul_classical(D, A, B);
if (!fmpz_mat_equal(D, E))
{
diff --git a/src/fmpz_mat/test/t-mul.c b/src/fmpz_mat/test/t-mul.c
index ba71570382..2456038115 100644
--- a/src/fmpz_mat/test/t-mul.c
+++ b/src/fmpz_mat/test/t-mul.c
@@ -55,7 +55,7 @@ TEST_FUNCTION_START(fmpz_mat_mul, state)
fmpz_mat_randtest(C, state, n_randint(state, 200) + 1);
fmpz_mat_mul(C, A, B);
- fmpz_mat_mul_classical_inline(D, A, B);
+ fmpz_mat_mul_classical(D, A, B);
if (!fmpz_mat_equal(C, D))
{
@@ -169,8 +169,8 @@ TEST_FUNCTION_START(fmpz_mat_mul, state)
if (!fmpz_mat_equal(A, B))
{
flint_printf("FAIL: window aliasing failed\n");
- fmpz_mat_print(A); flint_printf("\n\n");
- fmpz_mat_print(B); flint_printf("\n\n");
+ fmpz_mat_print(A); flint_printf("\n\n");
+ fmpz_mat_print(B); flint_printf("\n\n");
fflush(stdout);
flint_abort();
}
diff --git a/src/fmpz_mat/test/t-mul_blas.c b/src/fmpz_mat/test/t-mul_blas.c
index cb856966c6..d06b2a5c26 100644
--- a/src/fmpz_mat/test/t-mul_blas.c
+++ b/src/fmpz_mat/test/t-mul_blas.c
@@ -44,7 +44,7 @@ TEST_FUNCTION_START(fmpz_mat_mul_blas, state)
/* Make sure noise in the output is ok */
fmpz_mat_randtest(C, state, n_randint(state, 200) + 1);
- fmpz_mat_mul_classical_inline(C, A, B);
+ fmpz_mat_mul_classical(C, A, B);
if (fmpz_mat_mul_blas(D, A, B))
{
if (!fmpz_mat_equal(C, D))
@@ -92,7 +92,7 @@ TEST_FUNCTION_START(fmpz_mat_mul_blas, state)
fmpz_mat_randtest(C, state, n_randint(state, 200) + 1);
fmpz_mat_randtest(D, state, n_randint(state, 200) + 1);
- fmpz_mat_mul_classical_inline(C, A, B);
+ fmpz_mat_mul_classical(C, A, B);
if (fmpz_mat_mul_blas(D, A, B))
{
if (!fmpz_mat_equal(C, D))
diff --git a/src/fmpz_mat/test/t-mul_classical.c b/src/fmpz_mat/test/t-mul_classical.c
index 679ce9494e..e28bbab32c 100644
--- a/src/fmpz_mat/test/t-mul_classical.c
+++ b/src/fmpz_mat/test/t-mul_classical.c
@@ -19,6 +19,7 @@ TEST_FUNCTION_START(fmpz_mat_mul_classical, state)
for (i = 0; i < 100 * flint_test_multiplier(); i++)
{
slong m, n, k;
+ slong i, j, h;
m = n_randint(state, 50);
n = n_randint(state, 50);
@@ -36,7 +37,13 @@ TEST_FUNCTION_START(fmpz_mat_mul_classical, state)
fmpz_mat_randtest(C, state, n_randint(state, 200) + 1);
fmpz_mat_mul_classical(C, A, B);
- fmpz_mat_mul_classical_inline(D, A, B);
+
+ for (i = 0; i < D->r; i++)
+ for (j = 0; j < D->c; j++)
+ for (h = 0; h < B->r; h++)
+ fmpz_addmul(fmpz_mat_entry(D, i, j),
+ fmpz_mat_entry(A, i, h),
+ fmpz_mat_entry(B, h, j));
if (!fmpz_mat_equal(C, D))
{
diff --git a/src/fmpz_mat/test/t-mul_double_word.c b/src/fmpz_mat/test/t-mul_double_word.c
index 0d63157a11..93d3f9b355 100644
--- a/src/fmpz_mat/test/t-mul_double_word.c
+++ b/src/fmpz_mat/test/t-mul_double_word.c
@@ -87,7 +87,7 @@ TEST_FUNCTION_START(fmpz_mat_mul_double_word, state)
fmpz_mat_randtest(C, state, n_randint(state, 200) + 1);
_fmpz_mat_mul_double_word(C, A, B);
- fmpz_mat_mul_classical_inline(D, A, B);
+ fmpz_mat_mul_classical(D, A, B);
if (!fmpz_mat_equal(C, D))
{
diff --git a/src/fmpz_mat/test/t-mul_fft.c b/src/fmpz_mat/test/t-mul_fft.c
index dc60e0ac89..56fc9fabdf 100644
--- a/src/fmpz_mat/test/t-mul_fft.c
+++ b/src/fmpz_mat/test/t-mul_fft.c
@@ -38,7 +38,7 @@ TEST_FUNCTION_START(fmpz_mat_mul_fft, state)
fmpz_mat_randtest(C, state, n_randint(state, 2000) + 1);
fmpz_mat_mul_fft(C, A, B);
- fmpz_mat_mul_classical_inline(D, A, B);
+ fmpz_mat_mul_classical(D, A, B);
if (!fmpz_mat_equal(C, D))
{
diff --git a/src/fmpz_mat/test/t-mul_multi_mod.c b/src/fmpz_mat/test/t-mul_multi_mod.c
index 07df123128..24ecf6098d 100644
--- a/src/fmpz_mat/test/t-mul_multi_mod.c
+++ b/src/fmpz_mat/test/t-mul_multi_mod.c
@@ -36,7 +36,7 @@ TEST_FUNCTION_START(fmpz_mat_mul_multi_mod, state)
/* Make sure noise in the output is ok */
fmpz_mat_randtest(C, state, n_randint(state, 200) + 1);
- fmpz_mat_mul_classical_inline(C, A, B);
+ fmpz_mat_mul_classical(C, A, B);
fmpz_mat_mul_multi_mod(D, A, B);
if (!fmpz_mat_equal(C, D))
@@ -71,7 +71,7 @@ TEST_FUNCTION_START(fmpz_mat_mul_multi_mod, state)
/* Make sure noise in the output is ok */
fmpz_mat_randtest(C, state, n_randint(state, 200) + 1);
- fmpz_mat_mul_classical_inline(C, A, B);
+ fmpz_mat_mul_classical(C, A, B);
fmpz_mat_mul_multi_mod(D, A, B);
if (!fmpz_mat_equal(C, D))
diff --git a/src/fmpz_mat/test/t-mul_small.c b/src/fmpz_mat/test/t-mul_small.c
index 1db309bc00..2bda60b825 100644
--- a/src/fmpz_mat/test/t-mul_small.c
+++ b/src/fmpz_mat/test/t-mul_small.c
@@ -47,7 +47,7 @@ TEST_FUNCTION_START(fmpz_mat_mul_small, state)
fmpz_mat_randtest(D, state, n_randint(state, 200) + 1);
_fmpz_mat_mul_small(C, A, B);
- fmpz_mat_mul_classical_inline(D, A, B);
+ fmpz_mat_mul_classical(D, A, B);
if (!fmpz_mat_equal(C, D))
{
diff --git a/src/fmpz_mod_mat/mul_classical_threaded.c b/src/fmpz_mod_mat/mul_classical_threaded.c
index dcab679382..c2679b9036 100644
--- a/src/fmpz_mod_mat/mul_classical_threaded.c
+++ b/src/fmpz_mod_mat/mul_classical_threaded.c
@@ -24,6 +24,17 @@ with op = 1, computes D = C + A*B
with op = -1, computes D = C - A*B
*/
+static inline void
+_fmpz_vec_dot_ptr(fmpz_t c, const fmpz * vec1, fmpz ** const vec2,
+ slong offset, slong len)
+{
+ slong i;
+
+ fmpz_zero(c);
+ for (i = 0; i < len; i++)
+ fmpz_addmul(c, vec1 + i, vec2[i] + offset);
+}
+
static inline void
_fmpz_mod_mat_addmul_basic_op(fmpz ** D, fmpz ** const C, fmpz ** const A,
fmpz ** const B, slong m, slong k, slong n, int op, fmpz_t p)
diff --git a/src/fmpz_poly/2norm.c b/src/fmpz_poly/2norm.c
index 2ec047c296..e0fc9dabe7 100644
--- a/src/fmpz_poly/2norm.c
+++ b/src/fmpz_poly/2norm.c
@@ -10,15 +10,13 @@
*/
#include "fmpz.h"
+#include "fmpz_vec.h"
#include "fmpz_poly.h"
void
_fmpz_poly_2norm(fmpz_t res, const fmpz * poly, slong len)
{
- slong i;
- fmpz_zero(res);
- for (i = 0; i < len; i++)
- fmpz_addmul(res, poly + i, poly + i);
+ _fmpz_vec_dot(res, poly, poly, len);
fmpz_sqrt(res, res);
}
diff --git a/src/fmpz_poly/div_series_basecase.c b/src/fmpz_poly/div_series_basecase.c
index b412f0ba88..479852d459 100644
--- a/src/fmpz_poly/div_series_basecase.c
+++ b/src/fmpz_poly/div_series_basecase.c
@@ -180,10 +180,9 @@ _fmpz_poly_div_series_basecase(fmpz * Q, const fmpz * A, slong Alen,
}
else
{
- fmpz_mul(Q + i, B + 1, Q + i - 1);
-
- for (j = 2; j < FLINT_MIN(i + 1, Blen); j++)
- fmpz_addmul(Q + i, B + j, Q + i - j);
+ slong l = FLINT_MIN(i, Blen - 1);
+ /* todo: merge final subtraction */
+ _fmpz_vec_dot_general(Q + i, NULL, 0, B + 1, Q + i - l, 1, l);
}
if (i < Alen)
diff --git a/src/fmpz_poly/gcd_modular.c b/src/fmpz_poly/gcd_modular.c
index fc6390e33f..c6dfb47d95 100644
--- a/src/fmpz_poly/gcd_modular.c
+++ b/src/fmpz_poly/gcd_modular.c
@@ -67,15 +67,12 @@ void _fmpz_poly_gcd_modular(fmpz * res, const fmpz * poly1, slong len1,
if (len1 < 64 && len2 < 64) /* compute the squares of the 2-norms */
{
- fmpz_set_ui(l, 0);
- for (i = 0; i < len1; i++)
- fmpz_addmul(l, A + i, A + i);
+ _fmpz_vec_dot(l, A, A, len1);
nb1 = fmpz_bits(l);
- fmpz_set_ui(l, 0);
- for (i = 0; i < len2; i++)
- fmpz_addmul(l, B + i, B + i);
+ _fmpz_vec_dot(l, B, B, len2);
nb2 = fmpz_bits(l);
- } else /* approximate to save time */
+ }
+ else /* approximate to save time */
{
nb1 = 2*bits1 + FLINT_BIT_COUNT(len1);
nb2 = 2*bits2 + FLINT_BIT_COUNT(len2);
diff --git a/src/fmpz_poly/inv_series.c b/src/fmpz_poly/inv_series.c
index e9affd9771..eae17bf875 100644
--- a/src/fmpz_poly/inv_series.c
+++ b/src/fmpz_poly/inv_series.c
@@ -220,13 +220,8 @@ _fmpz_poly_inv_series_basecase(fmpz * Qinv, const fmpz * Q, slong Qlen, slong n)
}
else
{
- fmpz_mul(Qinv + i, Q + 1, Qinv + i - 1);
-
- for (j = 2; j < FLINT_MIN(i + 1, Qlen); j++)
- fmpz_addmul(Qinv + i, Q + j, Qinv + i - j);
-
- if (neg)
- fmpz_neg(Qinv + i, Qinv + i);
+ slong l = FLINT_MIN(i, Qlen - 1);
+ _fmpz_vec_dot_general(Qinv + i, NULL, neg, Q + 1, Qinv + i - l, 1, l);
}
}
diff --git a/src/fmpz_poly/mul_classical.c b/src/fmpz_poly/mul_classical.c
index 844ddc135d..71d5e36fc1 100644
--- a/src/fmpz_poly/mul_classical.c
+++ b/src/fmpz_poly/mul_classical.c
@@ -1,5 +1,6 @@
/*
Copyright (C) 2008, 2009 William Hart
+ Copyright (C) 2024 Fredrik Johansson
This file is part of FLINT.
@@ -18,26 +19,37 @@ void
_fmpz_poly_mul_classical(fmpz * res, const fmpz * poly1,
slong len1, const fmpz * poly2, slong len2)
{
- if (len1 == 1 && len2 == 1) /* Special case if the length of both inputs is 1 */
+ slong i, top1, top2;
+
+ if (len1 == 1 && len2 == 1)
{
fmpz_mul(res, poly1, poly2);
+ return;
}
- else /* Ordinary case */
+
+ if (len1 == 1)
{
- slong i;
+ _fmpz_vec_scalar_mul_fmpz(res, poly2, len2, poly1);
+ return;
+ }
- /* Set res[i] = poly1[i]*poly2[0] */
+ if (len2 == 1)
+ {
_fmpz_vec_scalar_mul_fmpz(res, poly1, len1, poly2);
+ return;
+ }
- /* Set res[i+len1-1] = in1[len1-1]*in2[i] */
- _fmpz_vec_scalar_mul_fmpz(res + len1, poly2 + 1, len2 - 1,
- poly1 + len1 - 1);
+ fmpz_mul(res, poly1, poly2);
- /* out[i+j] += in1[i]*in2[j] */
- for (i = 0; i < len1 - 1; i++)
- _fmpz_vec_scalar_addmul_fmpz(res + i + 1, poly2 + 1, len2 - 1,
- poly1 + i);
+ for (i = 1; i < len1 + len2 - 2; i++)
+ {
+ top1 = FLINT_MIN(len1 - 1, i);
+ top2 = FLINT_MIN(len2 - 1, i);
+
+ _fmpz_vec_dot_general(res + i, NULL, 0, poly1 + i - top2, poly2 + i - top1, 1, top1 + top2 - i + 1);
}
+
+ fmpz_mul(res + len1 + len2 - 2, poly1 + len1 - 1, poly2 + len2 - 1);
}
void
diff --git a/src/fmpz_poly/mullow_classical.c b/src/fmpz_poly/mullow_classical.c
index 94553b1bc1..bcfa94e3e3 100644
--- a/src/fmpz_poly/mullow_classical.c
+++ b/src/fmpz_poly/mullow_classical.c
@@ -1,5 +1,6 @@
/*
Copyright (C) 2008, 2009 William Hart
+ Copyright (C) 2024 Fredrik Johansson
This file is part of FLINT.
@@ -20,27 +21,37 @@ void
_fmpz_poly_mullow_classical(fmpz * res, const fmpz * poly1, slong len1,
const fmpz * poly2, slong len2, slong n)
{
- if ((len1 == 1 && len2 == 1) || n == 1) /* Special case if the length of output is 1 */
+ slong i, top1, top2;
+
+ len1 = FLINT_MIN(len1, n);
+ len2 = FLINT_MIN(len2, n);
+
+ if (n == 1)
{
fmpz_mul(res, poly1, poly2);
+ return;
}
- else /* Ordinary case */
+
+ if (len1 == 1)
{
- slong i;
+ _fmpz_vec_scalar_mul_fmpz(res, poly2, n, poly1);
+ return;
+ }
- /* Set res[i] = poly1[i]*poly2[0] */
- _fmpz_vec_scalar_mul_fmpz(res, poly1, FLINT_MIN(len1, n), poly2);
+ if (len2 == 1)
+ {
+ _fmpz_vec_scalar_mul_fmpz(res, poly1, n, poly2);
+ return;
+ }
- /* Set res[i+len1-1] = in1[len1-1]*in2[i] */
- if (n > len1)
- _fmpz_vec_scalar_mul_fmpz(res + len1, poly2 + 1, n - len1,
- poly1 + len1 - 1);
+ fmpz_mul(res, poly1, poly2);
+
+ for (i = 1; i < n; i++)
+ {
+ top1 = FLINT_MIN(len1 - 1, i);
+ top2 = FLINT_MIN(len2 - 1, i);
- /* out[i+j] += in1[i]*in2[j] */
- for (i = 0; i < FLINT_MIN(len1, n) - 1; i++)
- _fmpz_vec_scalar_addmul_fmpz(res + i + 1, poly2 + 1,
- FLINT_MIN(len2, n - i) - 1,
- poly1 + i);
+ _fmpz_vec_dot_general(res + i, NULL, 0, poly1 + i - top2, poly2 + i - top1, 1, top1 + top2 - i + 1);
}
}
diff --git a/src/fmpz_poly/power_sums_naive.c b/src/fmpz_poly/power_sums_naive.c
index 16a6c5ccaa..a1fee2d1e1 100644
--- a/src/fmpz_poly/power_sums_naive.c
+++ b/src/fmpz_poly/power_sums_naive.c
@@ -10,28 +10,24 @@
*/
#include "fmpz.h"
+#include "fmpz_vec.h"
#include "fmpz_poly.h"
void
_fmpz_poly_power_sums_naive(fmpz * res, const fmpz * poly, slong len, slong n)
{
- slong i, k;
+ slong k;
fmpz_set_ui(res, len - 1);
+
for (k = 1; k < FLINT_MIN(n, len); k++)
{
- fmpz_mul_ui(res + k, poly + len - 1 - k, k);
- for (i = 1; i < k; i++)
- fmpz_addmul(res + k, poly + len - 1 - k + i, res + i);
- fmpz_neg(res + k, res + k);
+ fmpz_mul_si(res + k, poly + len - 1 - k, -k);
+ _fmpz_vec_dot_general(res + k, res + k, 1, poly + len - 1 - k + 1, res + 1, 0, k - 1);
}
+
for (k = len; k < n; k++)
- {
- fmpz_zero(res + k);
- for (i = k - len + 1; i < k; i++)
- fmpz_addmul(res + k, poly + len - 1 - k + i, res + i);
- fmpz_neg(res + k, res + k);
- }
+ _fmpz_vec_dot_general(res + k, NULL, 1, poly, res + k - len + 1, 0, len - 1);
}
void
diff --git a/src/fmpz_poly/power_sums_to_poly.c b/src/fmpz_poly/power_sums_to_poly.c
index df00b67ea4..b1cfeb17a0 100644
--- a/src/fmpz_poly/power_sums_to_poly.c
+++ b/src/fmpz_poly/power_sums_to_poly.c
@@ -10,30 +10,25 @@
*/
#include "fmpz.h"
+#include "fmpz_vec.h"
#include "fmpz_poly.h"
void
_fmpz_poly_power_sums_to_poly(fmpz * res, const fmpz * poly, slong len)
{
- slong i, k;
+ slong k;
slong d = fmpz_get_ui(poly);
fmpz_one(res + d);
for (k = 1; k < FLINT_MIN(d + 1, len); k++)
{
- fmpz_set(res + d - k, poly + k);
- for (i = 1; i < k; i++)
- fmpz_addmul(res + d - k, res + d - k + i, poly + i);
- fmpz_divexact_si(res + d - k, res + d - k, k);
- fmpz_neg(res + d - k, res + d - k);
+ _fmpz_vec_dot_general(res + d - k, poly + k, 0, res + d - k + 1, poly + 1, 0, k - 1);
+ fmpz_divexact_si(res + d - k, res + d - k, -k);
}
for (k = len; k <= d; k++)
{
- fmpz_zero(res + d - k);
- for (i = 1; i < len; i++)
- fmpz_addmul(res + d - k, res + d - k + i, poly + i);
- fmpz_divexact_si(res + d - k, res + d - k, k);
- fmpz_neg(res + d - k, res + d - k);
+ _fmpz_vec_dot_general(res + d - k, NULL, 0, res + d - k + 1, poly + 1, 0, len - 1);
+ fmpz_divexact_si(res + d - k, res + d - k, -k);
}
}
diff --git a/src/fmpz_poly/resultant_modular.c b/src/fmpz_poly/resultant_modular.c
index 3ed5bf4f6c..8ea6c4c6e9 100644
--- a/src/fmpz_poly/resultant_modular.c
+++ b/src/fmpz_poly/resultant_modular.c
@@ -68,10 +68,8 @@ void _fmpz_poly_resultant_modular(fmpz_t res, const fmpz * poly1, slong len1,
fmpz_init(b1);
fmpz_init(b2);
- for (i = 0; i < len1; i++)
- fmpz_addmul(b1, A + i, A + i);
- for (i = 0; i < len2; i++)
- fmpz_addmul(b2, B + i, B + i);
+ _fmpz_vec_dot(b1, A, A, len1);
+ _fmpz_vec_dot(b2, B, B, len2);
fmpz_pow_ui(b1, b1, len2 - 1);
fmpz_pow_ui(b2, b2, len1 - 1);
diff --git a/src/fmpz_poly/sqr_classical.c b/src/fmpz_poly/sqr_classical.c
index d9b112b6ae..2352dfd719 100644
--- a/src/fmpz_poly/sqr_classical.c
+++ b/src/fmpz_poly/sqr_classical.c
@@ -1,6 +1,7 @@
/*
Copyright (C) 2008, 2009 William Hart
Copyright (C) 2011 Sebastian Pancratz
+ Copyright (C) 2024 Fredrik Johansson
This file is part of FLINT.
@@ -15,29 +16,39 @@
#include "fmpz_poly.h"
/* Assumes len > 0. */
-void _fmpz_poly_sqr_classical(fmpz *rop, const fmpz *op, slong len)
+void _fmpz_poly_sqr_classical(fmpz *res, const fmpz *op, slong len)
{
- if (len == 1) /* Special case */
+ slong i, start, stop;
+
+ if (len == 1)
{
- fmpz_mul(rop, op, op);
+ fmpz_mul(res, op, op);
+ return;
}
- else /* Ordinary case */
- {
- slong i;
- _fmpz_vec_scalar_mul_fmpz(rop, op, len, op);
+ fmpz_mul(res, op, op);
+ fmpz_mul(res + 1, op, op + 1);
+ fmpz_mul_2exp(res + 1, res + 1, 1);
- _fmpz_vec_scalar_mul_fmpz(rop + len, op + 1, len - 1, op + len - 1);
+ for (i = 2; i < 2 * len - 3; i++)
+ {
+ start = FLINT_MAX(0, i - len + 1);
+ stop = FLINT_MIN(len - 1, (i + 1) / 2 - 1);
- for (i = 1; i < len - 1; i++)
- _fmpz_vec_scalar_addmul_fmpz(rop + i + 1, op + 1, i - 1, op + i);
+ _fmpz_vec_dot_general(res + i, NULL, 0, op + start, op + i - stop, 1, stop - start + 1);
+ fmpz_mul_2exp(res + i, res + i, 1);
- for (i = 1; i < 2 * len - 2; i++)
- fmpz_mul_ui(rop + i, rop + i, 2);
+ if (i % 2 == 0 && i / 2 < len)
+ fmpz_addmul(res + i, op + i / 2, op + i / 2);
+ }
- for (i = 1; i < len - 1; i++)
- fmpz_addmul(rop + 2 * i, op + i, op + i);
+ if (len > 2)
+ {
+ fmpz_mul(res + 2 * len - 3, op + len - 1, op + len - 2);
+ fmpz_mul_2exp(res + 2 * len - 3, res + 2 * len - 3, 1);
}
+
+ fmpz_mul(res + 2 * len - 2, op + len - 1, op + len - 1);
}
void fmpz_poly_sqr_classical(fmpz_poly_t rop, const fmpz_poly_t op)
diff --git a/src/fmpz_poly/sqrlow_classical.c b/src/fmpz_poly/sqrlow_classical.c
index adf9f1afef..c55390fa02 100644
--- a/src/fmpz_poly/sqrlow_classical.c
+++ b/src/fmpz_poly/sqrlow_classical.c
@@ -1,5 +1,6 @@
/*
Copyright (C) 2011 Sebastian Pancratz
+ Copyright (C) 2024 Fredrik Johansson
This file is part of FLINT.
@@ -16,30 +17,42 @@
/*
Assumes len > 0 and 0 < n <= 2 * len - 1.
*/
-void _fmpz_poly_sqrlow_classical(fmpz *rop, const fmpz *op, slong len, slong n)
+void _fmpz_poly_sqrlow_classical(fmpz *res, const fmpz *op, slong len, slong n)
{
- if (len == 1 || n == 1) /* Special case */
+ slong i, start, stop;
+
+ len = FLINT_MIN(len, n);
+
+ if (n == 1)
{
- fmpz_mul(rop, op, op);
+ fmpz_mul(res, op, op);
+ return;
}
- else /* Ordinary case */
- {
- slong i;
- _fmpz_vec_scalar_mul_fmpz(rop, op, FLINT_MIN(len, n), op);
+ fmpz_mul(res, op, op);
+ fmpz_mul(res + 1, op, op + 1);
+ fmpz_mul_2exp(res + 1, res + 1, 1);
- _fmpz_vec_scalar_mul_fmpz(rop + len, op + 1, n - len, op + len - 1);
+ for (i = 2; i < FLINT_MIN(n, 2 * len - 3); i++)
+ {
+ start = FLINT_MAX(0, i - len + 1);
+ stop = FLINT_MIN(len - 1, (i + 1) / 2 - 1);
- for (i = 1; i < len - 1; i++)
- _fmpz_vec_scalar_addmul_fmpz(rop + i + 1,
- op + 1, FLINT_MIN(i - 1, n - (i + 1)), op + i);
+ _fmpz_vec_dot_general(res + i, NULL, 0, op + start, op + i - stop, 1, stop - start + 1);
+ fmpz_mul_2exp(res + i, res + i, 1);
- for (i = 1; i < FLINT_MIN(2 * len - 2, n); i++)
- fmpz_mul_ui(rop + i, rop + i, 2);
+ if (i % 2 == 0 && i / 2 < len)
+ fmpz_addmul(res + i, op + i / 2, op + i / 2);
+ }
- for (i = 1; i < FLINT_MIN(len - 1, (n + 1) / 2); i++)
- fmpz_addmul(rop + 2 * i, op + i, op + i);
+ if (len > 2 && n >= 2 * len - 2)
+ {
+ fmpz_mul(res + 2 * len - 3, op + len - 1, op + len - 2);
+ fmpz_mul_2exp(res + 2 * len - 3, res + 2 * len - 3, 1);
}
+
+ if (n >= 2 * len - 1)
+ fmpz_mul(res + 2 * len - 2, op + len - 1, op + len - 1);
}
void
diff --git a/src/fmpz_vec.h b/src/fmpz_vec.h
index 8250567429..4606e43c60 100644
--- a/src/fmpz_vec.h
+++ b/src/fmpz_vec.h
@@ -256,10 +256,16 @@ void _fmpz_vec_lcm(fmpz_t res, const fmpz * vec, slong len);
/* Dot product *************************************************************/
-void _fmpz_vec_dot(fmpz_t res, const fmpz * vec1, const fmpz * vec2, slong len2);
+void _fmpz_vec_dot_general_naive(fmpz_t res, const fmpz_t initial, int subtract,
+ const fmpz * a, const fmpz * b, int reverse, slong len);
+void _fmpz_vec_dot_general(fmpz_t res, const fmpz_t initial, int subtract,
+ const fmpz * a, const fmpz * b, int reverse, slong len);
-void _fmpz_vec_dot_ptr(fmpz_t c, const fmpz * vec1,
- fmpz ** const vec2, slong offset, slong len);
+FMPZ_VEC_INLINE
+void _fmpz_vec_dot(fmpz_t res, const fmpz * vec1, const fmpz * vec2, slong len2)
+{
+ _fmpz_vec_dot_general(res, NULL, 0, vec1, vec2, 0, len2);
+}
#ifdef __cplusplus
}
diff --git a/src/fmpz_vec/dot.c b/src/fmpz_vec/dot.c
index 5c0e1ca75a..548c71bf2a 100644
--- a/src/fmpz_vec/dot.c
+++ b/src/fmpz_vec/dot.c
@@ -1,7 +1,5 @@
/*
- Copyright (C) 2010, 2020 William Hart
- Copyright (C) 2010, 2011 Fredrik Johansson
- Copyright (C) 2014 Abhinav Baid
+ Copyright (C) 2024 Fredrik Johansson
This file is part of FLINT.
@@ -11,26 +9,394 @@
(at your option) any later version. See .
*/
+#include "mpn_extras.h"
#include "fmpz.h"
+#include "fmpz_extras.h"
#include "fmpz_vec.h"
void
-_fmpz_vec_dot(fmpz_t res, const fmpz * vec1, const fmpz * vec2, slong len2)
+_fmpz_vec_dot_general_naive(fmpz_t res, const fmpz_t initial,
+ int subtract, const fmpz * a, const fmpz * b, int reverse, slong len)
{
slong i;
- fmpz_zero(res);
- for (i = 0; i < len2; i++)
- fmpz_addmul(res, vec1 + i, vec2 + i);
+ if (initial == NULL)
+ {
+ if (len == 0)
+ {
+ fmpz_zero(res);
+ return;
+ }
+
+ fmpz_mul(res, a, reverse ? b + len - 1 : b);
+
+ if (subtract)
+ {
+ fmpz_neg(res, res);
+ for (i = 1; i < len; i++)
+ fmpz_submul(res, a + i, reverse ? b + len - 1 - i : b + i);
+ }
+ else
+ {
+ for (i = 1; i < len; i++)
+ fmpz_addmul(res, a + i, reverse ? b + len - 1 - i : b + i);
+ }
+ }
+ else
+ {
+ if (res != initial)
+ fmpz_set(res, initial);
+
+ if (subtract)
+ for(i = 0; i < len; i++)
+ fmpz_submul(res, a + i, reverse ? b + len - 1 - i : b + i);
+ else
+ for(i = 0; i < len; i++)
+ fmpz_addmul(res, a + i, reverse ? b + len - 1 - i : b + i);
+ }
+}
+
+#define INITIAL_ALLOC 64
+
+#define FMPZ_GET_MPN(xptr, xn, xneg, xtmplimb, x) \
+ if (!COEFF_IS_MPZ(x)) \
+ { \
+ (xtmplimb) = FLINT_ABS(x); \
+ (xptr) = &(xtmplimb); \
+ (xn) = 1; \
+ (xneg) = ((x) < 0); \
+ } \
+ else \
+ { \
+ __mpz_struct * __z1 = COEFF_TO_PTR(x); \
+ (xptr) = __z1->_mp_d; \
+ (xn) = FLINT_ABS(__z1->_mp_size); \
+ (xneg) = (__z1->_mp_size < 0); \
+ }
+
+/* (s,sn) = (a,an) + (b,bn). Allows an == 0 but not bn == 0. */
+#define MPN_ADD(s, sn, a, an, b, bn) \
+ do { \
+ if ((an) == 0) \
+ { \
+ FLINT_SWAP(mp_ptr, s, b); \
+ (sn) = (bn); \
+ } \
+ else if ((an) >= (bn)) \
+ { \
+ mp_limb_t __cy; \
+ (s)[(an)] = __cy = mpn_add((s), (a), (an), (b), (bn)); \
+ (sn) = (an) + (__cy != 0); \
+ } \
+ else \
+ { \
+ mp_limb_t __cy; \
+ (s)[(bn)] = __cy = mpn_add((s), (b), (bn), (a), (an)); \
+ (sn) = (bn) + (__cy != 0); \
+ } \
+ } while (0)
+
+/* (s,sn) = (s,sn) + (a,an) * b. Allows sn == 0 but not an == 0. */
+#define MPN_ADDMUL_1(s, sn, a, an, b) \
+ do { \
+ mp_limb_t __cy; \
+ if ((sn) >= (an)) \
+ { \
+ FLINT_ASSERT((an) != 0); \
+ __cy = mpn_addmul_1((s), (a), (an), (b)); \
+ if ((sn) > (an)) \
+ __cy = mpn_add_1((s) + (an), (s) + (an), (sn) - (an), __cy); \
+ (s)[(sn)] = __cy; \
+ (sn) += (__cy != 0); \
+ } \
+ else \
+ { \
+ (s)[(an)] = mpn_mul_1((s) + (sn), (a) + (sn), (an) - (sn), (b)); \
+ if ((sn) != 0) \
+ { \
+ __cy = mpn_addmul_1((s), (a), (sn), (b)); \
+ (s)[(an)] += mpn_add_1((s) + (sn), (s) + (sn), (an) - (sn), __cy); \
+ } \
+ (sn) = (an) + ((s)[(an)] != 0); \
+ } \
+ } while (0) \
+
+
+FLINT_STATIC_NOINLINE
+void _fmpz_set_mpn(fmpz_t res, mp_srcptr x, mp_size_t xn, int neg)
+{
+ if (xn <= 1 && x[0] <= COEFF_MAX)
+ {
+ _fmpz_demote(res);
+ *res = neg ? -(slong) x[0] : x[0];
+ }
+ else
+ {
+ fmpz_set_mpn_large(res, x, xn, neg);
+ }
}
void
-_fmpz_vec_dot_ptr(fmpz_t c, const fmpz * vec1, fmpz ** const vec2,
- slong offset, slong len)
+_fmpz_vec_dot_general(fmpz_t res, const fmpz_t initial, int subtract,
+ const fmpz * a, const fmpz * b, int reverse, slong len)
{
+ mp_limb_t tmp1[INITIAL_ALLOC + 2];
+ mp_limb_t tmp2[INITIAL_ALLOC + 2];
+ mp_limb_t tmp3[INITIAL_ALLOC + 2];
+ mp_size_t alloc = INITIAL_ALLOC;
+ mp_size_t new_alloc;
+
+ /* We maintain separate sums for small terms, large positive terms,
+ and large negative terms, the idea being to have fewer
+ adjustments in the main loop in exchange for some added
+ complexity combining things in the end. Should profile
+ alternative strategies. */
+ mp_limb_t s0 = 0, s1 = 0, s2 = 0;
+ mp_ptr neg = tmp1;
+ mp_ptr pos = tmp2;
+ mp_size_t posn = 0, negn = 0;
+
+ /* Temporary space for products. */
+ mp_ptr t = tmp3;
+ mp_size_t tn;
+
+ mp_ptr tmp_heap = NULL;
+
slong i;
- fmpz_zero(c);
+ if (len <= 1)
+ {
+ if (initial == NULL)
+ {
+ if (len == 1)
+ {
+ fmpz_mul(res, a, b);
+ if (subtract)
+ fmpz_neg(res, res);
+ }
+ else
+ fmpz_zero(res);
+ }
+ else
+ {
+ if (res != initial)
+ fmpz_set(res, initial);
+
+ if (subtract)
+ {
+ if (len == 1)
+ fmpz_submul(res, a, b);
+ }
+ else
+ {
+ if (len == 1)
+ fmpz_addmul(res, a, b);
+ }
+ }
+ return;
+ }
+
+ if (initial != NULL)
+ {
+ fmpz ca;
+ mp_limb_t atmp;
+ mp_srcptr ap;
+ mp_size_t an;
+ int aneg;
+
+ ca = *initial;
+ FMPZ_GET_MPN(ap, an, aneg, atmp, ca);
+
+ if (an <= 2)
+ {
+ s0 = ap[0];
+ if (an == 2)
+ s1 = ap[1];
+
+ if (aneg ^ subtract)
+ sub_dddmmmsss(s2, s1, s0, 0, 0, 0, 0, s1, s0);
+ }
+ else
+ {
+ if (an > INITIAL_ALLOC)
+ {
+ new_alloc = an + 4;
+
+ tmp_heap = flint_malloc(3 * (new_alloc + 2) * sizeof(mp_limb_t));
+
+ t = tmp_heap;
+ pos = t + (new_alloc + 2);
+ neg = pos + (new_alloc + 2);
+
+ alloc = new_alloc;
+ }
+
+ if (aneg ^ subtract)
+ {
+ flint_mpn_copyi(neg, ap, an);
+ negn = an;
+ }
+ else
+ {
+ flint_mpn_copyi(pos, ap, an);
+ posn = an;
+ }
+ }
+ }
+
for (i = 0; i < len; i++)
- fmpz_addmul(c, vec1 + i, vec2[i] + offset);
+ {
+ fmpz ca, cb;
+ mp_limb_t atmp, btmp;
+ mp_srcptr ap, bp;
+ mp_size_t an, bn;
+ mp_limb_t cy;
+ int aneg, bneg;
+
+ ca = a[i];
+ if (ca == 0)
+ continue;
+
+ cb = reverse ? b[len - 1 - i] : b[i];
+ if (cb == 0)
+ continue;
+
+ if (!COEFF_IS_MPZ(ca) && !COEFF_IS_MPZ(cb))
+ {
+ mp_limb_t hi, lo;
+ smul_ppmm(hi, lo, ca, cb);
+ add_sssaaaaaa(s2, s1, s0, s2, s1, s0, FLINT_SIGN_EXT(hi), hi, lo);
+ continue;
+ }
+
+ FMPZ_GET_MPN(ap, an, aneg, atmp, ca);
+ FMPZ_GET_MPN(bp, bn, bneg, btmp, cb);
+ tn = an + bn;
+
+ if (tn > alloc)
+ {
+ mp_ptr p1, p2, p3;
+
+ new_alloc = FLINT_MAX(3 * alloc / 2, tn + 4);
+
+ p1 = flint_malloc(3 * (new_alloc + 2) * sizeof(mp_limb_t));
+ p2 = p1 + (new_alloc + 2);
+ p3 = p2 + (new_alloc + 2);
+
+ flint_mpn_copyi(p2, pos, posn);
+ flint_mpn_copyi(p3, neg, negn);
+ t = p1;
+ pos = p2;
+ neg = p3;
+
+ FLINT_SWAP(mp_ptr, tmp_heap, p1);
+
+ if (p1 != NULL)
+ flint_free(p1);
+
+ alloc = new_alloc;
+ }
+
+ if (an < bn)
+ {
+ FLINT_SWAP(mp_srcptr, ap, bp);
+ FLINT_SWAP(mp_size_t, an, bn);
+ }
+
+ if (bn == 1)
+ {
+ mp_limb_t b0 = bp[0];
+
+ if (aneg ^ bneg)
+ MPN_ADDMUL_1(neg, negn, ap, an, b0);
+ else
+ MPN_ADDMUL_1(pos, posn, ap, an, b0);
+ continue;
+ }
+
+ if (ap == bp && an == bn)
+ {
+ flint_mpn_sqr(t, ap, an);
+ cy = t[tn - 1];
+ }
+ else
+ {
+ cy = flint_mpn_mul(t, ap, an, bp, bn);
+ }
+
+ tn -= (cy == 0);
+
+ if (aneg ^ bneg)
+ MPN_ADD(neg, negn, neg, negn, t, tn);
+ else
+ MPN_ADD(pos, posn, pos, posn, t, tn);
+ }
+
+ /* There are only small terms. */
+ if (posn == 0 && negn == 0)
+ {
+ if (subtract)
+ sub_dddmmmsss(s2, s1, s0, 0, 0, 0, s2, s1, s0);
+
+ fmpz_set_signed_uiuiui(res, s2, s1, s0);
+ return;
+ }
+
+ /* Add small terms to large ones. */
+ if (((slong) s2 >= WORD(0)))
+ {
+ t[2] = s2;
+ t[1] = s1;
+ t[0] = s0;
+ MPN_ADD(pos, posn, pos, posn, t, 3);
+ }
+ else
+ {
+ sub_dddmmmsss(t[2], t[1], t[0], 0, 0, 0, s2, s1, s0);
+ MPN_ADD(neg, negn, neg, negn, t, 3);
+ }
+
+ MPN_NORM(pos, posn);
+ MPN_NORM(neg, negn);
+
+ if (negn == 0)
+ {
+ _fmpz_set_mpn(res, pos, posn, 0 ^ subtract);
+ }
+ else if (posn == 0)
+ {
+ _fmpz_set_mpn(res, neg, negn, 1 ^ subtract);
+ }
+ else
+ {
+ /* Do subtraction */
+ int tneg = 0;
+
+ if (posn > negn)
+ {
+ tn = posn;
+ }
+ else if (negn > posn)
+ {
+ tn = negn;
+ tneg = 1;
+ }
+ else if (posn != 0)
+ {
+ tn = posn;
+ if (mpn_cmp(pos, neg, tn) < 0)
+ tneg = 1;
+ }
+
+ if (tneg)
+ mpn_sub(t, neg, negn, pos, posn);
+ else
+ mpn_sub(t, pos, posn, neg, negn);
+
+ MPN_NORM(t, tn);
+ _fmpz_set_mpn(res, t, tn, tneg ^ subtract);
+ }
+
+ if (tmp_heap != NULL)
+ flint_free(tmp_heap);
}
diff --git a/src/fmpz_vec/test/main.c b/src/fmpz_vec/test/main.c
index 9a4681abc3..3fb5d04c70 100644
--- a/src/fmpz_vec/test/main.c
+++ b/src/fmpz_vec/test/main.c
@@ -17,6 +17,7 @@
#include "t-add.c"
#include "t-content.c"
#include "t-dot.c"
+#include "t-dot_general.c"
#include "t-get_d_vec_2exp.c"
#include "t-get_set_fft.c"
#include "t-get_set_nmod_vec.c"
@@ -61,6 +62,7 @@ test_struct tests[] =
TEST_FUNCTION(fmpz_vec_add),
TEST_FUNCTION(fmpz_vec_content),
TEST_FUNCTION(fmpz_vec_dot),
+ TEST_FUNCTION(fmpz_vec_dot_general),
TEST_FUNCTION(fmpz_vec_get_d_vec_2exp),
TEST_FUNCTION(fmpz_vec_get_set_fft),
TEST_FUNCTION(fmpz_vec_get_set_nmod_vec),
diff --git a/src/fmpz_vec/test/t-dot_general.c b/src/fmpz_vec/test/t-dot_general.c
new file mode 100644
index 0000000000..d78baef966
--- /dev/null
+++ b/src/fmpz_vec/test/t-dot_general.c
@@ -0,0 +1,82 @@
+/*
+ Copyright (C) 2024 Fredrik Johansson
+
+ This file is part of FLINT.
+
+ FLINT is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include "test_helpers.h"
+#include "fmpz.h"
+#include "fmpz_vec.h"
+
+TEST_FUNCTION_START(fmpz_vec_dot_general, state)
+{
+ int iter;
+
+ for (iter = 0; iter < 100000 * flint_test_multiplier(); iter++)
+ {
+ fmpz * a, * b;
+ fmpz_t s, t, c;
+ slong n, bits1, bits2;
+ int alias, negate, initial, reverse;
+
+ initial = n_randint(state, 2);
+ alias = n_randint(state, 2);
+ negate = n_randint(state, 2);
+ reverse = n_randint(state, 2);
+ n = n_randint(state, 8);
+
+ if (n_randint(state, 30) == 0)
+ bits1 = 2 + n_randint(state, 20000);
+ else
+ bits1 = 2 + n_randint(state, 1000);
+
+ bits2 = 2 + n_randint(state, 1000);
+
+ a = _fmpz_vec_init(n);
+ b = _fmpz_vec_init(n);
+ fmpz_init(s);
+ fmpz_init(t);
+ fmpz_init(c);
+
+ fmpz_randtest(c, state, bits1);
+ _fmpz_vec_randtest(a, state, n, bits1);
+ _fmpz_vec_randtest(b, state, n, bits2);
+
+ if (initial && alias)
+ {
+ fmpz_set(s, c);
+ fmpz_set(t, c);
+ _fmpz_vec_dot_general(s, s, negate, a, b, reverse, n);
+ _fmpz_vec_dot_general_naive(t, t, negate, a, b, reverse, n);
+ }
+ else
+ {
+ _fmpz_vec_dot_general(s, initial ? c : NULL, negate, a, b, reverse, n);
+ _fmpz_vec_dot_general_naive(t, initial ? c : NULL, negate, a, b, reverse, n);
+ }
+
+ if (!fmpz_equal(s, t) || !_fmpz_is_canonical(s))
+ {
+ flint_printf("negate = %d, initial = %d, reverse = %d, alias = %d\n", negate, initial, reverse, alias);
+ flint_printf("c = %{fmpz}\n\n", c);
+ flint_printf("a = %{fmpz*}\n\n", a, n);
+ flint_printf("b = %{fmpz*}\n\n", b, n);
+ flint_printf("s = %{fmpz}\n\n", s);
+ flint_printf("t = %{fmpz}\n\n", t);
+ flint_abort();
+ }
+
+ fmpz_clear(s);
+ fmpz_clear(t);
+ fmpz_clear(c);
+ _fmpz_vec_clear(a, n);
+ _fmpz_vec_clear(b, n);
+ }
+
+ TEST_FUNCTION_END(state);
+}
diff --git a/src/gr/fmpz.c b/src/gr/fmpz.c
index 80ec5dbb1c..d24743ce57 100644
--- a/src/gr/fmpz.c
+++ b/src/gr/fmpz.c
@@ -15,6 +15,7 @@
#include "fmpz_factor.h"
#include "fmpz_poly.h"
#include "fmpz_poly_factor.h"
+#include "fmpz_vec.h"
#include "fmpz_mat.h"
#include "fmpq.h"
#include "gr.h"
@@ -917,74 +918,14 @@ _gr_fmpz_vec_sum(fmpz_t res, const fmpz * vec, slong len, gr_ctx_t ctx)
int
_gr_fmpz_vec_dot(fmpz_t res, const fmpz_t initial, int subtract, const fmpz * vec1, const fmpz * vec2, slong len, gr_ctx_t ctx)
{
- slong i;
-
- if (len <= 0)
- {
- if (initial == NULL)
- fmpz_zero(res);
- else
- fmpz_set(res, initial);
- return GR_SUCCESS;
- }
-
- if (initial == NULL)
- {
- fmpz_mul(res, vec1, vec2);
- }
- else
- {
- if (subtract)
- fmpz_neg(res, initial);
- else
- fmpz_set(res, initial);
-
- fmpz_addmul(res, vec1, vec2);
- }
-
- for (i = 1; i < len; i++)
- fmpz_addmul(res, vec1 + i, vec2 + i);
-
- if (subtract)
- fmpz_neg(res, res);
-
+ _fmpz_vec_dot_general(res, initial, subtract, vec1, vec2, 0, len);
return GR_SUCCESS;
}
int
_gr_fmpz_vec_dot_rev(fmpz_t res, const fmpz_t initial, int subtract, const fmpz * vec1, const fmpz * vec2, slong len, gr_ctx_t ctx)
{
- slong i;
-
- if (len <= 0)
- {
- if (initial == NULL)
- fmpz_zero(res);
- else
- fmpz_set(res, initial);
- return GR_SUCCESS;
- }
-
- if (initial == NULL)
- {
- fmpz_mul(res, vec1, vec2 + len - 1);
- }
- else
- {
- if (subtract)
- fmpz_neg(res, initial);
- else
- fmpz_set(res, initial);
-
- fmpz_addmul(res, vec1, vec2 + len - 1);
- }
-
- for (i = 1; i < len; i++)
- fmpz_addmul(res, vec1 + i, vec2 + len - 1 - i);
-
- if (subtract)
- fmpz_neg(res, res);
-
+ _fmpz_vec_dot_general(res, initial, subtract, vec1, vec2, 1, len);
return GR_SUCCESS;
}
diff --git a/src/gr/fmpz_mod.c b/src/gr/fmpz_mod.c
index 99101bc6d6..58e3b97e3a 100644
--- a/src/gr/fmpz_mod.c
+++ b/src/gr/fmpz_mod.c
@@ -11,6 +11,7 @@
#include "fmpz.h"
#include "fmpz_factor.h"
+#include "fmpz_vec.h"
#include "fmpz_mod.h"
#include "fmpz_mod_mat.h"
#include "fmpz_mod_poly.h"
@@ -403,8 +404,6 @@ _gr_fmpz_mod_pow_fmpz(fmpz_t res, const fmpz_t x, const fmpz_t exp, const gr_ctx
int
_gr_fmpz_mod_vec_dot(fmpz_t res, const fmpz_t initial, int subtract, const fmpz * vec1, const fmpz * vec2, slong len, gr_ctx_t ctx)
{
- slong i;
-
if (len <= 0)
{
if (initial == NULL)
@@ -414,28 +413,8 @@ _gr_fmpz_mod_vec_dot(fmpz_t res, const fmpz_t initial, int subtract, const fmpz
return GR_SUCCESS;
}
- if (initial == NULL)
- {
- fmpz_mul(res, vec1, vec2);
- }
- else
- {
- if (subtract)
- fmpz_neg(res, initial);
- else
- fmpz_set(res, initial);
-
- fmpz_addmul(res, vec1, vec2);
- }
-
- for (i = 1; i < len; i++)
- fmpz_addmul(res, vec1 + i, vec2 + i);
-
- if (subtract)
- fmpz_neg(res, res);
-
+ _fmpz_vec_dot_general(res, initial, subtract, vec1, vec2, 0, len);
fmpz_mod_set_fmpz(res, res, FMPZ_MOD_CTX(ctx));
-
return GR_SUCCESS;
}
@@ -443,8 +422,6 @@ _gr_fmpz_mod_vec_dot(fmpz_t res, const fmpz_t initial, int subtract, const fmpz
int
_gr_fmpz_mod_vec_dot_rev(fmpz_t res, const fmpz_t initial, int subtract, const fmpz * vec1, const fmpz * vec2, slong len, gr_ctx_t ctx)
{
- slong i;
-
if (len <= 0)
{
if (initial == NULL)
@@ -454,28 +431,8 @@ _gr_fmpz_mod_vec_dot_rev(fmpz_t res, const fmpz_t initial, int subtract, const f
return GR_SUCCESS;
}
- if (initial == NULL)
- {
- fmpz_mul(res, vec1, vec2 + len - 1);
- }
- else
- {
- if (subtract)
- fmpz_neg(res, initial);
- else
- fmpz_set(res, initial);
-
- fmpz_addmul(res, vec1, vec2 + len - 1);
- }
-
- for (i = 1; i < len; i++)
- fmpz_addmul(res, vec1 + i, vec2 + len - 1 - i);
-
- if (subtract)
- fmpz_neg(res, res);
-
+ _fmpz_vec_dot_general(res, initial, subtract, vec1, vec2, 1, len);
fmpz_mod_set_fmpz(res, res, FMPZ_MOD_CTX(ctx));
-
return GR_SUCCESS;
}
diff --git a/src/nmod_poly/power_sums.c b/src/nmod_poly/power_sums.c
index af187d0e2d..78625f504d 100644
--- a/src/nmod_poly/power_sums.c
+++ b/src/nmod_poly/power_sums.c
@@ -78,6 +78,7 @@ nmod_poly_power_sums(nmod_poly_t res, const nmod_poly_t poly, slong n)
}
}
+/* todo: should use dot products */
void
_nmod_poly_power_sums_naive(mp_ptr res, mp_srcptr poly, slong len, slong n,
nmod_t mod)
diff --git a/src/nmod_poly/power_sums_to_poly.c b/src/nmod_poly/power_sums_to_poly.c
index ab351b89b3..1281b55dec 100644
--- a/src/nmod_poly/power_sums_to_poly.c
+++ b/src/nmod_poly/power_sums_to_poly.c
@@ -56,6 +56,7 @@ nmod_poly_power_sums_to_poly(nmod_poly_t res, const nmod_poly_t Q)
}
}
+/* todo: should use dot products */
void
_nmod_poly_power_sums_to_poly_naive(mp_ptr res, mp_srcptr poly, slong len,
nmod_t mod)