diff --git a/CMake/cmake_config.h.in b/CMake/cmake_config.h.in index faa5d2591a..b146014dd6 100644 --- a/CMake/cmake_config.h.in +++ b/CMake/cmake_config.h.in @@ -16,12 +16,15 @@ #cmakedefine01 FLINT_USES_FENV -#cmakedefine FLINT_HAVE_FFT_SMALL +#cmakedefine01 FLINT_HAVE_FFT_SMALL #cmakedefine01 FLINT_KNOW_STRONG_ORDER +#if FLINT_HAVE_FFT_SMALL /* Just set to some reasonable threshold */ -#define FLINT_FFT_SMALL_THRESHOLD 600 +# define FLINT_FFT_SMALL_MUL_THRESHOLD 800 +# define FLINT_FFT_SMALL_SQR_THRESHOLD 1400 +#endif #ifdef _MSC_VER # if defined(FLINT_BUILD_DLL) diff --git a/configure.ac b/configure.ac index b7bc78e2f9..254864d880 100644 --- a/configure.ac +++ b/configure.ac @@ -1003,18 +1003,31 @@ FLINT_CHECK_FFT_SMALL( [AC_SUBST(FFT_SMALL, [fft_small\ \ \ ]) AC_DEFINE(FLINT_HAVE_FFT_SMALL, 1, [Define to use the fft_small module]) + dnl FIXME: Push different configuration files instead of hardcoding them here. case $flint_cv_arch in FAST_VROUNDPD_PATTERN) - fft_small_threshold="400" + dnl Zen 3 + fft_small_mul_threshold="400" + fft_small_sqr_threshold="800" ;; SLOW_VROUNDPD_PATTERN) - fft_small_threshold="1540" + dnl Skylake + fft_small_mul_threshold="1540" + fft_small_sqr_threshold="3080" + ;; + ARM64_PATTERN) + dnl Apple M1 + fft_small_threshold="810" + fft_small_threshold="865" ;; *) - fft_small_threshold="500" + dnl Be conservative here + fft_small_mul_threshold="800" + fft_small_sqr_threshold="1400" ;; esac - AC_DEFINE_UNQUOTED(FLINT_FFT_SMALL_THRESHOLD,[$fft_small_threshold],[Define to set threshold for when to use fft_small module])], + AC_DEFINE_UNQUOTED(FLINT_FFT_SMALL_MUL_THRESHOLD,[$fft_small_mul_threshold],[Define to set threshold for when to use multiplication with fft_small module]) + AC_DEFINE_UNQUOTED(FLINT_FFT_SMALL_SQR_THRESHOLD,[$fft_small_sqr_threshold],[Define to set threshold for when to use squaring with fft_small module])], [AC_SUBST(FFT_SMALL, [\ \ \ \ \ \ \ \ \ \ \ \ ])]) ################################################################################ diff --git a/src/mpn_extras.h b/src/mpn_extras.h index 3d89405c33..3cf2fbf40c 100644 --- a/src/mpn_extras.h +++ b/src/mpn_extras.h @@ -130,9 +130,9 @@ mp_limb_t flint_mpn_2add_n_inplace(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); /* General multiplication ****************************************************/ -#ifdef FLINT_HAVE_FFT_SMALL -# define FLINT_FFT_MUL_THRESHOLD FLINT_FFT_SMALL_THRESHOLD -# define FLINT_FFT_SQR_THRESHOLD (2 * FLINT_FFT_SMALL_THRESHOLD) +#if FLINT_HAVE_FFT_SMALL +# define FLINT_FFT_MUL_THRESHOLD FLINT_FFT_SMALL_MUL_THRESHOLD +# define FLINT_FFT_SQR_THRESHOLD FLINT_FFT_SMALL_SQR_THRESHOLD #else /* FLINT's FFT can beat GMP below this threshold but apparently not consistently. Something needs retuning? */