diff --git a/include/finufft/defs.h b/include/finufft/defs.h index c8ef5a11..42e5e7ff 100644 --- a/include/finufft/defs.h +++ b/include/finufft/defs.h @@ -18,6 +18,7 @@ // public header gives access to f_opts, f_spread_opts, f_plan... // (and clobbers FINUFFT* macros; watch out!) #include +#include // --------------- Private data types for compilation in either prec --------- // Devnote: must match those in relevant prec of public finufft.h interface! @@ -89,7 +90,7 @@ inline constexpr BIGINT MAX_NF = BIGINT(1e12); inline constexpr BIGINT MAX_NU_PTS = BIGINT(1e14); // -------------- Math consts (not in math.h) and useful math macros ---------- -#include +#include // either-precision unit imaginary number... #define IMA (CPX(0.0, 1.0)) @@ -118,11 +119,11 @@ inline constexpr BIGINT MAX_NU_PTS = BIGINT(1e14); // These macros should probably be replaced by modern C++ std lib or random123. // (RAND_MAX is in stdlib.h) #include -static inline FLT rand01() { return FLT(rand()) / FLT(RAND_MAX); } +static inline FLT rand01 [[maybe_unused]] () { return FLT(rand()) / FLT(RAND_MAX); } // unif[-1,1]: -static inline FLT randm11() { return 2 * rand01() - FLT(1); } +static inline FLT randm11 [[maybe_unused]] () { return 2 * rand01() - FLT(1); } // complex unif[-1,1] for Re and Im: -static inline CPX crandm11() { return randm11() + IMA * randm11(); } +static inline CPX crandm11 [[maybe_unused]] () { return randm11() + IMA * randm11(); } // Thread-safe seed-carrying versions of above (x is ptr to seed)... // MR: we have to leave those as macros for now, as "rand_r" is deprecated @@ -134,11 +135,17 @@ static inline CPX crandm11() { return randm11() + IMA * randm11(); } // complex unif[-1,1] for Re and Im: #define crandm11r(x) (randm11r(x) + IMA * randm11r(x)) #else -static inline FLT rand01r(unsigned int *x) { return FLT(rand_r(x)) / FLT(RAND_MAX); } +static inline FLT rand01r [[maybe_unused]] (unsigned int *x) { + return FLT(rand_r(x)) / FLT(RAND_MAX); +} // unif[-1,1]: -static inline FLT randm11r(unsigned int *x) { return 2 * rand01r(x) - FLT(1); } +static inline FLT randm11r [[maybe_unused]] (unsigned int *x) { + return 2 * rand01r(x) - FLT(1); +} // complex unif[-1,1] for Re and Im: -static inline CPX crandm11r(unsigned int *x) { return randm11r(x) + IMA * randm11r(x); } +static inline CPX crandm11r [[maybe_unused]] (unsigned int *x) { + return randm11r(x) + IMA * randm11r(x); +} #endif // ----- OpenMP macros which also work when omp not present ----- @@ -147,16 +154,24 @@ static inline CPX crandm11r(unsigned int *x) { return randm11r(x) + IMA * randm1 #ifdef _OPENMP #include // point to actual omp utils -static inline int MY_OMP_GET_NUM_THREADS() { return omp_get_num_threads(); } -static inline int MY_OMP_GET_MAX_THREADS() { return omp_get_max_threads(); } -static inline int MY_OMP_GET_THREAD_NUM() { return omp_get_thread_num(); } -static inline void MY_OMP_SET_NUM_THREADS(int x) { omp_set_num_threads(x); } +static inline int MY_OMP_GET_NUM_THREADS [[maybe_unused]] () { + return omp_get_num_threads(); +} +static inline int MY_OMP_GET_MAX_THREADS [[maybe_unused]] () { + return omp_get_max_threads(); +} +static inline int MY_OMP_GET_THREAD_NUM [[maybe_unused]] () { + return omp_get_thread_num(); +} +static inline void MY_OMP_SET_NUM_THREADS [[maybe_unused]] (int x) { + omp_set_num_threads(x); +} #else // non-omp safe dummy versions of omp utils... -static inline int MY_OMP_GET_NUM_THREADS() { return 1; } -static inline int MY_OMP_GET_MAX_THREADS() { return 1; } -static inline int MY_OMP_GET_THREAD_NUM() { return 0; } -static inline void MY_OMP_SET_NUM_THREADS(int) {} +static inline int MY_OMP_GET_NUM_THREADS [[maybe_unused]] () { return 1; } +static inline int MY_OMP_GET_MAX_THREADS [[maybe_unused]] () { return 1; } +static inline int MY_OMP_GET_THREAD_NUM [[maybe_unused]] () { return 0; } +static inline void MY_OMP_SET_NUM_THREADS [[maybe_unused]] (int) {} #endif // Prec-switching name macros (respond to SINGLE), used in lib & test sources @@ -212,10 +227,17 @@ template struct type3params { T X3, C3, D3, h3, gam3; // z }; -typedef struct FINUFFT_PLAN_S { // the main plan object, fully C++ - - int type; // transform type (Rokhlin naming): 1,2 or 3 - int dim; // overall dimension: 1,2 or 3 +struct FINUFFT_PLAN_S { // the main plan object, fully C++ + // These default and delete specifications just state the obvious, + // but are here to silence compiler warnings. + FINUFFT_PLAN_S() = default; + // Copy construction and assignent are already deleted implicitly + // because of the unique_ptr member. + FINUFFT_PLAN_S(const FINUFFT_PLAN_S &) = delete; + FINUFFT_PLAN_S &operator=(const FINUFFT_PLAN_S &) = delete; + + int type; // transform type (Rokhlin naming): 1,2 or 3 + int dim; // overall dimension: 1,2 or 3 int ntrans; // how many transforms to do at once (vector or "many" mode) BIGINT nj; // num of NU pts in type 1,2 (for type 3, num input x pts) BIGINT nk; // number of NU freq pts (type 3 only) @@ -258,12 +280,9 @@ typedef struct FINUFFT_PLAN_S { // the main plan object, fully C++ FINUFFT_PLAN innerT2plan; // ptr used for type 2 in step 2 of type 3 // other internal structs; each is C-compatible of course -#ifndef FINUFFT_USE_DUCC0 - FFTW_PLAN fftwPlan; -#endif + std::unique_ptr> fftPlan; finufft_opts opts; // this and spopts could be made ptrs finufft_spread_opts spopts; - -} FINUFFT_PLAN_S; +}; #endif // DEFS_H diff --git a/include/finufft/fft.h b/include/finufft/fft.h index 906f6244..bab43966 100644 --- a/include/finufft/fft.h +++ b/include/finufft/fft.h @@ -1,18 +1,189 @@ #ifndef FINUFFT_INCLUDE_FINUFFT_FFT_H #define FINUFFT_INCLUDE_FINUFFT_FFT_H +#include + #ifdef FINUFFT_USE_DUCC0 -#include "ducc0/fft/fftnd_impl.h" -// temporary hacks to allow compilation of tests that assume FFTW is used -static inline void FFTW_FORGET_WISDOM() {} -static inline void FFTW_CLEANUP() {} -static inline void FFTW_CLEANUP_THREADS() {} +#include + +template class Finufft_FFT_plan { +public: + [[maybe_unused]] Finufft_FFT_plan(void (*)(void *) = nullptr, + void (*)(void *) = nullptr, void * = nullptr) {} + [[maybe_unused]] void plan(const std::vector & /*dims*/, size_t /*batchSize*/, + std::complex * /*ptr*/, int /*sign*/, int /*options*/, + int /*nthreads*/) {} + [[maybe_unused]] static std::complex *alloc_complex(size_t N) { + return new std::complex[N]; + } + [[maybe_unused]] static void free(std::complex *ptr) { delete[] ptr; } + + [[maybe_unused]] static void forget_wisdom() {} + [[maybe_unused]] static void cleanup() {} + [[maybe_unused]] static void cleanup_threads() {} +}; + #else -#include "fftw_defs.h" + +//clang-format off +#include +#include // (after complex) needed so can typedef FFTW_CPX +//clang-format on +#include + +template class Finufft_FFT_plan {}; + +template<> struct Finufft_FFT_plan { +private: + static std::mutex &mut() { + static std::mutex mut_; + return mut_; + } + fftwf_plan plan_; + + void (*fftw_lock_fun)(void *); // Function ptr that locks the FFTW planner + void (*fftw_unlock_fun)(void *); // Function ptr that unlocks the FFTW planner + void *lock_data; + void lock() { fftw_lock_fun ? fftw_lock_fun(lock_data) : mut().lock(); } + void unlock() { fftw_lock_fun ? fftw_unlock_fun(lock_data) : mut().unlock(); } + +public: + [[maybe_unused]] Finufft_FFT_plan(void (*fftw_lock_fun_)(void *) = nullptr, + void (*fftw_unlock_fun_)(void *) = nullptr, + void *lock_data_ = nullptr) + : plan_(nullptr), fftw_lock_fun(fftw_lock_fun_), fftw_unlock_fun(fftw_unlock_fun_), + lock_data(lock_data_) { + lock(); +#ifdef _OPENMP + static bool initialized = false; + if (!initialized) { + fftwf_init_threads(); + initialized = true; + } +#endif + unlock(); + } + [[maybe_unused]] ~Finufft_FFT_plan() { + lock(); + fftwf_destroy_plan(plan_); + unlock(); + } + + void plan + [[maybe_unused]] (const std::vector &dims, size_t batchSize, + std::complex *ptr, int sign, int options, int nthreads) { + uint64_t nf = 1; + for (auto i : dims) nf *= i; + lock(); +#ifdef _OPENMP + fftwf_plan_with_nthreads(nthreads); +#endif + plan_ = fftwf_plan_many_dft(int(dims.size()), dims.data(), int(batchSize), + reinterpret_cast(ptr), nullptr, 1, + int(nf), reinterpret_cast(ptr), nullptr, + 1, int(nf), sign, unsigned(options)); + unlock(); + } + static std::complex *alloc_complex [[maybe_unused]] (size_t N) { + return reinterpret_cast *>(fftwf_alloc_complex(N)); + } + static void free [[maybe_unused]] (std::complex *ptr) { + if (ptr) fftwf_free(reinterpret_cast(ptr)); + } + void execute [[maybe_unused]] () { fftwf_execute(plan_); } + + static void forget_wisdom [[maybe_unused]] () { fftwf_forget_wisdom(); } + static void cleanup [[maybe_unused]] () { fftwf_cleanup(); } + static void cleanup_threads [[maybe_unused]] () { +#ifdef _OPENMP + fftwf_cleanup_threads(); #endif + } +}; + +template<> struct Finufft_FFT_plan { +private: + static std::mutex &mut() { + static std::mutex mut_; + return mut_; + } + fftw_plan plan_; + + void (*fftw_lock_fun)(void *); // Function ptr that locks the FFTW planner + void (*fftw_unlock_fun)(void *); // Function ptr that unlocks the FFTW planner + void *lock_data; + void lock() { fftw_lock_fun ? fftw_lock_fun(lock_data) : mut().lock(); } + void unlock() { fftw_lock_fun ? fftw_unlock_fun(lock_data) : mut().unlock(); } + +public: + [[maybe_unused]] Finufft_FFT_plan(void (*fftw_lock_fun_)(void *) = nullptr, + void (*fftw_unlock_fun_)(void *) = nullptr, + void *lock_data_ = nullptr) + : plan_(nullptr), fftw_lock_fun(fftw_lock_fun_), fftw_unlock_fun(fftw_unlock_fun_), + lock_data(lock_data_) { + lock(); +#ifdef _OPENMP + static bool initialized = false; + if (!initialized) { + fftw_init_threads(); + initialized = true; + } +#endif + unlock(); + } + [[maybe_unused]] ~Finufft_FFT_plan() { + lock(); + fftw_destroy_plan(plan_); + unlock(); + } + + void plan + [[maybe_unused]] (const std::vector &dims, size_t batchSize, + std::complex *ptr, int sign, int options, int nthreads) { + uint64_t nf = 1; + for (auto i : dims) nf *= i; + lock(); +#ifdef _OPENMP + fftw_plan_with_nthreads(nthreads); +#endif + plan_ = fftw_plan_many_dft(int(dims.size()), dims.data(), int(batchSize), + reinterpret_cast(ptr), nullptr, 1, int(nf), + reinterpret_cast(ptr), nullptr, 1, int(nf), + sign, unsigned(options)); + unlock(); + } + static std::complex *alloc_complex [[maybe_unused]] (size_t N) { + return reinterpret_cast *>(fftw_alloc_complex(N)); + } + static void free [[maybe_unused]] (std::complex *ptr) { + fftw_free(reinterpret_cast(ptr)); + } + void execute [[maybe_unused]] () { fftw_execute(plan_); } + + static void forget_wisdom [[maybe_unused]] () { fftw_forget_wisdom(); } + static void cleanup [[maybe_unused]] () { fftw_cleanup(); } + static void cleanup_threads [[maybe_unused]] () { +#ifdef _OPENMP + fftw_cleanup_threads(); +#endif + } +}; + +#endif + #include -int *gridsize_for_fft(FINUFFT_PLAN p); +static inline void finufft_fft_forget_wisdom [[maybe_unused]] () { + Finufft_FFT_plan::forget_wisdom(); +} +static inline void finufft_fft_cleanup [[maybe_unused]] () { + Finufft_FFT_plan::cleanup(); +} +static inline void finufft_fft_cleanup_threads [[maybe_unused]] () { + Finufft_FFT_plan::cleanup_threads(); +} + +std::vector gridsize_for_fft(FINUFFT_PLAN p); void do_fft(FINUFFT_PLAN p); #endif // FINUFFT_INCLUDE_FINUFFT_FFT_H diff --git a/include/finufft/fftw_defs.h b/include/finufft/fftw_defs.h deleted file mode 100644 index 39f8d8c7..00000000 --- a/include/finufft/fftw_defs.h +++ /dev/null @@ -1,44 +0,0 @@ -// all FFTW-related private FINUFFT headers - -#ifndef FFTW_DEFS_H -#define FFTW_DEFS_H - -// Here we define typedefs and MACROS to switch between single and double -// precision library compilation, which need different FFTW command symbols. -// Barnett simplified via FFTWIFY, 6/7/22. - -#include // (after complex.h) needed so can typedef FFTW_CPX - -// precision-switching names for interfaces to FFTW... -#ifdef SINGLE -// macro to prepend fftw_ (for double) or fftwf_ (for single) to a string -// without a space. The 2nd level of indirection is needed for safety, see: -// https://isocpp.org/wiki/faq/misc-technical-issues#macros-with-token-pasting -#define FFTWIFY_UNSAFE(x) fftwf_##x -#else -#define FFTWIFY_UNSAFE(x) fftw_##x -#endif -#define FFTWIFY(x) FFTWIFY_UNSAFE(x) -// now use this tool (note we replaced typedefs v<=2.0.4, in favor of macros): -#define FFTW_CPX FFTWIFY(complex) -#define FFTW_PLAN FFTWIFY(plan) -#define FFTW_ALLOC_CPX FFTWIFY(alloc_complex) -#define FFTW_PLAN_MANY_DFT FFTWIFY(plan_many_dft) -#define FFTW_EX FFTWIFY(execute) -#define FFTW_DE FFTWIFY(destroy_plan) -#define FFTW_FR FFTWIFY(free) -#define FFTW_FORGET_WISDOM FFTWIFY(forget_wisdom) -#define FFTW_CLEANUP FFTWIFY(cleanup) -// the following OMP switch could be done in the src code instead... -#ifdef _OPENMP -#define FFTW_INIT FFTWIFY(init_threads) -#define FFTW_PLAN_TH FFTWIFY(plan_with_nthreads) -#define FFTW_CLEANUP_THREADS FFTWIFY(cleanup_threads) -#else -// no OMP (no fftw{f}_threads or _omp), need dummy fftw threads calls... -#define FFTW_INIT() -#define FFTW_PLAN_TH(x) -#define FFTW_CLEANUP_THREADS() -#endif - -#endif // FFTW_DEFS_H diff --git a/perftest/guru_timing_test.cpp b/perftest/guru_timing_test.cpp index 90055a36..b5030a85 100644 --- a/perftest/guru_timing_test.cpp +++ b/perftest/guru_timing_test.cpp @@ -145,9 +145,9 @@ int main(int argc, char *argv[]) } // Andrea found the following are needed to get reliable independent timings: - FFTW_CLEANUP(); - FFTW_CLEANUP_THREADS(); - FFTW_FORGET_WISDOM(); + finufft_fft_cleanup(); + finufft_fft_cleanup_threads(); + finufft_fft_forget_wisdom(); // std::this_thread::sleep_for(std::chrono::seconds(1)); sleep(tsleep); diff --git a/src/fft.cpp b/src/fft.cpp index 1b801a8b..bb7e3244 100644 --- a/src/fft.cpp +++ b/src/fft.cpp @@ -3,31 +3,23 @@ using namespace std; -int *gridsize_for_fft(FINUFFT_PLAN p) { +#ifdef FINUFFT_USE_DUCC0 +#include "ducc0/fft/fftnd_impl.h" +#endif + +std::vector gridsize_for_fft(FINUFFT_PLAN p) { // local helper func returns a new int array of length dim, extracted from // the finufft plan, that fftw_plan_many_dft needs as its 2nd argument. - int *nf; - if (p->dim == 1) { - nf = new int[1]; - nf[0] = (int)p->nf1; - } else if (p->dim == 2) { - nf = new int[2]; - nf[0] = (int)p->nf2; - nf[1] = (int)p->nf1; - } // fftw enforced row major ordering, ie dims are backwards ordered - else { - nf = new int[3]; - nf[0] = (int)p->nf3; - nf[1] = (int)p->nf2; - nf[2] = (int)p->nf1; - } - return nf; + if (p->dim == 1) return {(int)p->nf1}; + if (p->dim == 2) return {(int)p->nf2, (int)p->nf1}; + // if (p->dim == 3) + return {(int)p->nf3, (int)p->nf2, (int)p->nf1}; } void do_fft(FINUFFT_PLAN p) { #ifdef FINUFFT_USE_DUCC0 size_t nthreads = min(MY_OMP_GET_MAX_THREADS(), p->opts.nthreads); - int *ns = gridsize_for_fft(p); + const auto ns = gridsize_for_fft(p); vector arrdims, axes; arrdims.push_back(size_t(p->batchSize)); arrdims.push_back(size_t(ns[0])); @@ -60,8 +52,10 @@ void do_fft(FINUFFT_PLAN p) { else { size_t y_lo = size_t((p->ms + 1) / 2); size_t y_hi = size_t(ns[1] - p->ms / 2); - auto sub1 = ducc0::subarray(data, {{}, {}, {0, y_lo}}); - auto sub2 = ducc0::subarray(data, {{}, {}, {y_hi, ducc0::MAXIDX}}); + // the next line is analogous to the Python statement "sub1 = data[:, :, :y_lo]" + auto sub1 = ducc0::subarray(data, {{}, {}, {0, y_lo}}); + // the next line is analogous to the Python statement "sub2 = data[:, :, y_hi:]" + auto sub2 = ducc0::subarray(data, {{}, {}, {y_hi, ducc0::MAXIDX}}); if (p->type == 1) // spreading, not all parts of the output array are needed // do axis 2 in full ducc0::c2c(data, data, {2}, p->fftSign < 0, FLT(1), nthreads); @@ -108,8 +102,7 @@ void do_fft(FINUFFT_PLAN p) { } } #endif - delete[] ns; #else - FFTW_EX(p->fftwPlan); // if thisBatchSizefftPlan->execute(); // if thisBatchSize #include "../contrib/legendre_rule_fast.h" +#include +#include +#include #include #include -#include -#include -#include -#include +#include #include using namespace std; @@ -85,42 +85,13 @@ Design notes for guru interface implementation: namespace finufft { namespace common { -#ifndef FINUFFT_USE_DUCC0 -// Technically global state... -// Needs to be static to avoid name collision with SINGLE/DOUBLE -static std::mutex fftw_lock; - -class FFTWLockGuard { -public: - FFTWLockGuard(void (*lock_fun)(void *), void (*unlock_fun)(void *), void *lock_data) - : unlock_fun_(unlock_fun), lock_data_(lock_data), fftw_lock_(fftw_lock) { - if (lock_fun) - lock_fun(lock_data_); - else - fftw_lock_.lock(); - } - ~FFTWLockGuard() { - if (unlock_fun_) - unlock_fun_(lock_data_); - else - fftw_lock_.unlock(); - } - -private: - void (*unlock_fun_)(void *); - void *lock_data_; - std::mutex &fftw_lock_; -}; - -#endif - static int set_nf_type12(BIGINT ms, finufft_opts opts, finufft_spread_opts spopts, BIGINT *nf) // Type 1 & 2 recipe for how to set 1d size of upsampled array, nf, given opts // and requested number of Fourier modes ms. Returns 0 if success, else an // error code if nf was unreasonably big (& tell the world). { - *nf = (BIGINT)(opts.upsampfac * ms); // manner of rounding not crucial + *nf = BIGINT(opts.upsampfac * double(ms)); // manner of rounding not crucial if (*nf < 2 * spopts.nspread) *nf = 2 * spopts.nspread; // otherwise spread fails if (*nf < MAX_NF) { *nf = next235even(*nf); // expensive at huge nf @@ -185,7 +156,7 @@ void set_nhg_type3(FLT S, FLT X, finufft_opts opts, finufft_spread_opts spopts, else Ssafe = max(Ssafe, 1 / X); // use the safe X and S... - FLT nfd = 2.0 * opts.upsampfac * Ssafe * Xsafe / PI + nss; + auto nfd = FLT(2.0 * opts.upsampfac * Ssafe * Xsafe / PI + nss); if (!isfinite(nfd)) nfd = 0.0; // use FLT to catch inf *nf = (BIGINT)nfd; // printf("initial nf=%lld, ns=%d\n",*nf,spopts.nspread); @@ -193,8 +164,8 @@ void set_nhg_type3(FLT S, FLT X, finufft_opts opts, finufft_spread_opts spopts, if (*nf < 2 * spopts.nspread) *nf = 2 * spopts.nspread; if (*nf < MAX_NF) // otherwise will fail anyway *nf = next235even(*nf); // expensive at huge nf - *h = 2 * PI / *nf; // upsampled grid spacing - *gam = (FLT)*nf / (2.0 * opts.upsampfac * Ssafe); // x scale fac to x' + *h = FLT(2.0 * PI / *nf); // upsampled grid spacing + *gam = FLT(*nf / (2.0 * opts.upsampfac * Ssafe)); // x scale fac to x' } void onedim_fseries_kernel(BIGINT nf, FLT *fwkerhalf, finufft_spread_opts opts) @@ -578,6 +549,9 @@ int FINUFFT_MAKEPLAN(int type, int dim, BIGINT *n_modes, int iflag, int ntrans, printf("[%s] new plan: FINUFFT version " FINUFFT_VER " .................\n", __func__); + p->fftPlan = std::make_unique>( + p->opts.fftw_lock_fun, p->opts.fftw_unlock_fun, p->opts.fftw_lock_data); + if ((type != 1) && (type != 2) && (type != 3)) { fprintf(stderr, "[%s] Invalid type (%d), should be 1, 2 or 3.\n", __func__, type); return FINUFFT_ERR_TYPE_NOTVALID; @@ -682,21 +656,8 @@ int FINUFFT_MAKEPLAN(int type, int dim, BIGINT *n_modes, int iflag, int ntrans, // ------------------------ types 1,2: planning needed --------------------- if (type == 1 || type == 2) { -#ifndef FINUFFT_USE_DUCC0 int nthr_fft = nthr; // give FFTW all threads (or use o.spread_thread?) // Note: batchSize not used since might be only 1. - // Now place FFTW initialization in a lock, courtesy of OMP. Makes FINUFFT - // thread-safe (can be called inside OMP) - { - static bool did_fftw_init = false; // the only global state of FINUFFT - FFTWLockGuard lock(p->opts.fftw_lock_fun, p->opts.fftw_unlock_fun, - p->opts.fftw_lock_data); - if (!did_fftw_init) { - FFTW_INIT(); // setup FFTW global state; should only do once - did_fftw_init = true; // ensure other FINUFFT threads don't clash - } - } -#endif p->spopts.spread_direction = type; @@ -760,11 +721,7 @@ int FINUFFT_MAKEPLAN(int type, int dim, BIGINT *n_modes, int iflag, int ntrans, } timer.restart(); -#ifdef FINUFFT_USE_DUCC0 - p->fwBatch = (CPX *)malloc(p->nf * p->batchSize * sizeof(CPX)); // the big workspace -#else - p->fwBatch = (CPX *)FFTW_ALLOC_CPX(p->nf * p->batchSize); // the big workspace -#endif + p->fwBatch = p->fftPlan->alloc_complex(p->nf * p->batchSize); // the big workspace if (p->opts.debug) printf("[%s] fwBatch %.2fGB alloc: \t%.3g s\n", __func__, (double)1E-09 * sizeof(CPX) * p->nf * p->batchSize, timer.elapsedsec()); @@ -777,30 +734,12 @@ int FINUFFT_MAKEPLAN(int type, int dim, BIGINT *n_modes, int iflag, int ntrans, return FINUFFT_ERR_ALLOC; } -#ifndef FINUFFT_USE_DUCC0 timer.restart(); // plan the FFTW - int *ns = gridsize_for_fft(p); - // fftw_plan_many_dft args: rank, gridsize/dim, howmany, in, inembed, istride, - // idist, ot, onembed, ostride, odist, sign, flags - { - FFTWLockGuard lock(p->opts.fftw_lock_fun, p->opts.fftw_unlock_fun, - p->opts.fftw_lock_data); - // FFTW_PLAN_TH sets all future fftw_plan calls to use nthr_fft threads. - // FIXME: Since this might override what the user wants for fftw, we'd like to - // set it just for our one plan and then revert to the user value. - // Unfortunately fftw_planner_nthreads wasn't introduced until fftw 3.3.9, and - // there isn't a convenient mechanism to probe the version - // there is fftw_version which returns a string, but that's not compile time - FFTW_PLAN_TH(nthr_fft); - p->fftwPlan = FFTW_PLAN_MANY_DFT(dim, ns, p->batchSize, (FFTW_CPX *)p->fwBatch, - NULL, 1, p->nf, (FFTW_CPX *)p->fwBatch, NULL, 1, - p->nf, p->fftSign, p->opts.fftw); - } + const auto ns = gridsize_for_fft(p); + p->fftPlan->plan(ns, p->batchSize, p->fwBatch, p->fftSign, p->opts.fftw, nthr_fft); if (p->opts.debug) - printf("[%s] FFTW plan (mode %d, nthr=%d):\t%.3g s\n", __func__, p->opts.fftw, + printf("[%s] FFT plan (mode %d, nthr=%d):\t%.3g s\n", __func__, p->opts.fftw, nthr_fft, timer.elapsedsec()); - delete[] ns; -#endif } else { // -------------------------- type 3 (no planning) ------------ @@ -924,13 +863,8 @@ int FINUFFT_SETPTS(FINUFFT_PLAN p, BIGINT nj, FLT *xj, FLT *yj, FLT *zj, BIGINT __func__); return FINUFFT_ERR_MAXNALLOC; } -#ifdef FINUFFT_USE_DUCC0 - free(p->fwBatch); - p->fwBatch = (CPX *)malloc(p->nf * p->batchSize * sizeof(CPX)); // maybe big workspace -#else - if (p->fwBatch) FFTW_FR(p->fwBatch); - p->fwBatch = (CPX *)FFTW_ALLOC_CPX(p->nf * p->batchSize); // maybe big workspace -#endif + p->fftPlan->free(p->fwBatch); + p->fwBatch = p->fftPlan->alloc_complex(p->nf * p->batchSize); // maybe big workspace // (note FFTW_ALLOC is not needed over malloc, but matches its type) if (p->CpBatch) free(p->CpBatch); @@ -1245,20 +1179,9 @@ int FINUFFT_DESTROY(FINUFFT_PLAN p) if (!p) // NULL ptr, so not a ptr to a plan, report error return 1; -#ifdef FINUFFT_USE_DUCC0 - free(p->fwBatch); // free the big FFTW (or t3 spread) working array -#else - FFTW_FR(p->fwBatch); // free the big FFTW (or t3 spread) working array -#endif + p->fftPlan->free(p->fwBatch); // free the big FFTW (or t3 spread) working array free(p->sortIndices); if (p->type == 1 || p->type == 2) { -#ifndef FINUFFT_USE_DUCC0 - { - FFTWLockGuard lock(p->opts.fftw_lock_fun, p->opts.fftw_unlock_fun, - p->opts.fftw_lock_data); - FFTW_DE(p->fftwPlan); - } -#endif free(p->phiHat1); free(p->phiHat2); free(p->phiHat3); diff --git a/test/finufft1dmany_test.cpp b/test/finufft1dmany_test.cpp index e17cbb65..1219670a 100644 --- a/test/finufft1dmany_test.cpp +++ b/test/finufft1dmany_test.cpp @@ -87,7 +87,7 @@ int main(int argc, char *argv[]) { printf("\tone mode: rel err in F[%lld] of trans#%d is %.3g\n", (long long)nt1, i, err); // compare the result with FINUFFT1D1 - FFTW_FORGET_WISDOM(); + finufft_fft_forget_wisdom(); CPX *F_1d1 = (CPX *)malloc(sizeof(CPX) * N * ntransf); CPX *Fstart; CPX *cstart; @@ -118,7 +118,7 @@ int main(int argc, char *argv[]) { free(F_1d1); printf("test 1d2 many vs repeated single: ------------------------------------\n"); - FFTW_FORGET_WISDOM(); + finufft_fft_forget_wisdom(); #pragma omp parallel { @@ -148,7 +148,7 @@ int main(int argc, char *argv[]) { printf("\tone targ: rel err in c[%lld] of trans#%d is %.3g\n", (long long)jt, i, err); // check against single calls to FINUFFT1D2... - FFTW_FORGET_WISDOM(); + finufft_fft_forget_wisdom(); CPX *c_1d2 = (CPX *)malloc(sizeof(CPX) * M * ntransf); timer.restart(); for (BIGINT j = 0; j < ntransf; j++) { @@ -173,7 +173,7 @@ int main(int argc, char *argv[]) { free(c_1d2); printf("test 1d3 many vs repeated single: ------------------------------------\n"); - FFTW_FORGET_WISDOM(); + finufft_fft_forget_wisdom(); #pragma omp parallel { @@ -214,7 +214,7 @@ int main(int argc, char *argv[]) { printf("\tone targ: rel err in F[%lld] of trans#%d is %.3g\n", (long long)kt, i, err); // compare the result with single calls to FINUFFT1D3... - FFTW_FORGET_WISDOM(); + finufft_fft_forget_wisdom(); CPX *f_1d3 = (CPX *)malloc(sizeof(CPX) * N * ntransf); timer.restart(); for (int k = 0; k < ntransf; k++) { diff --git a/test/finufft2dmany_test.cpp b/test/finufft2dmany_test.cpp index fd26f919..155c71ae 100644 --- a/test/finufft2dmany_test.cpp +++ b/test/finufft2dmany_test.cpp @@ -94,7 +94,7 @@ int main(int argc, char *argv[]) { (long long)nt2, i, err); // compare the result with FINUFFT2D1 - FFTW_FORGET_WISDOM(); + finufft_fft_forget_wisdom(); finufft_opts simpleopts = opts; simpleopts.debug = 0; // don't output timing for calls of FINUFFT2D1 simpleopts.spread_debug = 0; @@ -134,7 +134,7 @@ int main(int argc, char *argv[]) { for (BIGINT m = 0; m < N * ntransf; ++m) F[m] = crandm11r(&se); } - FFTW_FORGET_WISDOM(); + finufft_fft_forget_wisdom(); timer.restart(); ier = FINUFFT2D2MANY(ntransf, M, x, y, c, isign, tol, N1, N2, F, &opts); ti = timer.elapsedsec(); @@ -145,7 +145,7 @@ int main(int argc, char *argv[]) { printf("ntr=%d: (%lld,%lld) modes to %lld NU pts in %.3g s \t%.3g NU pts/s\n", ntransf, (long long)N1, (long long)N2, (long long)M, ti, ntransf * M / ti); - FFTW_FORGET_WISDOM(); + finufft_fft_forget_wisdom(); i = ntransf - 1; // choose a data to check BIGINT jt = M / 2; // check arbitrary choice of one targ pt CPX ct = CPX(0, 0); @@ -182,7 +182,7 @@ int main(int argc, char *argv[]) { free(c_2d2); printf("test 2d3 many vs repeated single: ------------------------------------\n"); - FFTW_FORGET_WISDOM(); + finufft_fft_forget_wisdom(); // reuse the strengths c, interpret N as number of targs: #pragma omp parallel @@ -231,7 +231,7 @@ int main(int argc, char *argv[]) { printf("\tone targ: rel err in F[%lld] of trans#%d is %.3g\n", (long long)kt, i, err); // compare the result with FINUFFT2D3... - FFTW_FORGET_WISDOM(); + finufft_fft_forget_wisdom(); CPX *f_2d3 = (CPX *)malloc(sizeof(CPX) * N * ntransf); timer.restart(); for (int k = 0; k < ntransf; ++k) { diff --git a/test/finufft3dmany_test.cpp b/test/finufft3dmany_test.cpp index 485f90b0..ad7c954c 100644 --- a/test/finufft3dmany_test.cpp +++ b/test/finufft3dmany_test.cpp @@ -103,7 +103,7 @@ int main(int argc, char *argv[]) { (long long)nt2, (long long)nt3, i, err); // compare the result with FINUFFT3D1 - FFTW_FORGET_WISDOM(); + finufft_fft_forget_wisdom(); finufft_opts simpleopts = opts; simpleopts.debug = 0; // don't output timing for calls of FINUFFT3D1 simpleopts.spread_debug = 0; @@ -142,7 +142,7 @@ int main(int argc, char *argv[]) { #pragma omp for schedule(static, TEST_RANDCHUNK) for (BIGINT m = 0; m < N * ntransf; ++m) F[m] = crandm11r(&se); } - FFTW_FORGET_WISDOM(); + finufft_fft_forget_wisdom(); timer.restart(); ier = FINUFFT3D2MANY(ntransf, M, x, y, z, c, isign, tol, N1, N2, N3, F, &opts); ti = timer.elapsedsec(); @@ -171,7 +171,7 @@ int main(int argc, char *argv[]) { errmax = max(err, errmax); printf("\tone targ: rel err in c[%lld] of trans#%d is %.3g\n", (long long)jt, i, err); - FFTW_FORGET_WISDOM(); + finufft_fft_forget_wisdom(); // compare the result with FINUFFT3D2... CPX *c_3d2 = (CPX *)malloc(sizeof(CPX) * M * ntransf); timer.restart(); @@ -198,7 +198,7 @@ int main(int argc, char *argv[]) { free(c_3d2); printf("test 3d3 many vs repeated single: ------------------------------------\n"); - FFTW_FORGET_WISDOM(); + finufft_fft_forget_wisdom(); // reuse the strengths c, interpret N as number of targs: #pragma omp parallel { @@ -250,7 +250,7 @@ int main(int argc, char *argv[]) { errmax = max(err, errmax); printf("\t one targ: rel err in F[%lld] of trans#%d is %.3g\n", (long long)kt, i, err); - FFTW_FORGET_WISDOM(); + finufft_fft_forget_wisdom(); // compare the result with FINUFFT3D3... CPX *f_3d3 = (CPX *)malloc(sizeof(CPX) * N * ntransf); timer.restart();