diff --git a/.gitignore b/.gitignore index cb4660d03..fed3e92c2 100644 --- a/.gitignore +++ b/.gitignore @@ -26,7 +26,7 @@ fortran/examples/nufft2dmany_demof fortran/examples/nufft3d_demof test/dumbinputs test/finufft1d_basicpassfail -test/testutils +test/testlib __pycache__* docs/_build diff --git a/CMakeLists.txt b/CMakeLists.txt index 4c66e1b58..9188648fa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -248,27 +248,19 @@ function(set_finufft_options target) endfunction() if(FINUFFT_USE_CPU) - # Main finufft libraries - if(NOT FINUFFT_STATIC_LINKING) - add_library( - finufft SHARED + set(FINUFFT_SOURCES src/spreadinterp.cpp - src/utils.cpp contrib/legendre_rule_fast.cpp src/fft.cpp src/finufft_core.cpp src/c_interface.cpp + src/finufft_utils.cpp fortran/finufftfort.cpp) + # Main finufft libraries + if(NOT FINUFFT_STATIC_LINKING) + add_library(finufft SHARED ${FINUFFT_SOURCES}) else() - add_library( - finufft STATIC - src/spreadinterp.cpp - src/utils.cpp - contrib/legendre_rule_fast.cpp - src/fft.cpp - src/finufft_core.cpp - src/c_interface.cpp - fortran/finufftfort.cpp) + add_library(finufft STATIC ${FINUFFT_SOURCES}) endif() set_finufft_options(finufft) diff --git a/devel/foldrescale.cpp b/devel/foldrescale.cpp index d05ac986a..a84e3f4e4 100644 --- a/devel/foldrescale.cpp +++ b/devel/foldrescale.cpp @@ -1,7 +1,6 @@ -#include "finufft/defs.h" +#include "finufft/test_defs.h" #include #include -#include #include #include // no vectorize @@ -17,22 +16,22 @@ This should be done in C++ not as a macro, someday. */ #define FOLDRESCALE(x, N, p) \ - (p ? (x + (x >= -PI ? (x < PI ? PI : -PI) : 3 * PI)) * ((FLT)M_1_2PI * N) \ + (p ? (x + (x >= -PI ? (x < PI ? PI : -PI) : 3 * PI)) * ((FLT)INV_2PI * N) \ : (x >= 0.0 ? (x < (FLT)N ? x : x - (FLT)N) : x + (FLT)N)) #define FOLDRESCALE04(x, N, p) \ - (p ? ((x * FLT(M_1_2PI) + FLT(0.5)) - floor(x * FLT(M_1_2PI) + FLT(0.5))) * FLT(N) \ + (p ? ((x * FLT(INV_2PI) + FLT(0.5)) - floor(x * FLT(INV_2PI) + FLT(0.5))) * FLT(N) \ : ((x / FLT(N)) - floor(x / FLT(N))) * FLT(N)) #define FOLDRESCALE05(x, N, p) \ - FLT(N) * (p ? ((x * FLT(M_1_2PI) + FLT(0.5)) - floor(x * FLT(M_1_2PI) + FLT(0.5))) \ + FLT(N) * (p ? ((x * FLT(INV_2PI) + FLT(0.5)) - floor(x * FLT(INV_2PI) + FLT(0.5))) \ : ((x / FLT(N)) - floor(x / FLT(N)))) inline __attribute__((always_inline)) FLT foldRescale00(FLT x, BIGINT N, bool p) { FLT result; FLT fN = FLT(N); if (p) { - static constexpr FLT x2pi = FLT(M_1_2PI); + static constexpr FLT x2pi = FLT(INV_2PI); result = x * x2pi + FLT(0.5); result -= floor(result); } else { @@ -44,14 +43,14 @@ inline __attribute__((always_inline)) FLT foldRescale00(FLT x, BIGINT N, bool p) } inline __attribute__((always_inline)) FLT foldRescale01(FLT x, BIGINT N, bool p) { - return p ? (x + (x >= -PI ? (x < PI ? PI : -PI) : 3 * PI)) * ((FLT)M_1_2PI * N) + return p ? (x + (x >= -PI ? (x < PI ? PI : -PI) : 3 * PI)) * ((FLT)INV_2PI * N) : (x >= 0.0 ? (x < (FLT)N ? x : x - (FLT)N) : x + (FLT)N); } template inline __attribute__((always_inline)) FLT foldRescale02(FLT x, BIGINT N) { if constexpr (p) { - return (x + (x >= -PI ? (x < PI ? PI : -PI) : 3 * PI)) * ((FLT)M_1_2PI * N); + return (x + (x >= -PI ? (x < PI ? PI : -PI) : 3 * PI)) * ((FLT)INV_2PI * N); } else { return (x >= 0.0 ? (x < (FLT)N ? x : x - (FLT)N) : x + (FLT)N); } @@ -62,7 +61,7 @@ inline __attribute__((always_inline)) FLT foldRescale03(FLT x, BIGINT N) { FLT result; FLT fN = FLT(N); if constexpr (p) { - static constexpr FLT x2pi = FLT(M_1_2PI); + static constexpr FLT x2pi = FLT(INV_2PI); result = std::fma(x, x2pi, FLT(0.5)); result -= floor(result); } else { @@ -73,7 +72,6 @@ inline __attribute__((always_inline)) FLT foldRescale03(FLT x, BIGINT N) { return result * fN; } - static std::mt19937_64 gen; static std::uniform_real_distribution<> dis(-10, 10); static const auto N = std::uniform_int_distribution<>{0, 1000}(gen); @@ -185,7 +183,6 @@ static void BM_FoldRescale05N(benchmark::State &state) { } } - BENCHMARK(BM_BASELINE)->Iterations(10000000); BENCHMARK(BM_FoldRescaleMacro)->Iterations(1000000); BENCHMARK(BM_FoldRescale00)->Iterations(1000000); diff --git a/include/cufinufft/utils.h b/include/cufinufft/utils.h index 26eb313ab..de01a9ea3 100644 --- a/include/cufinufft/utils.h +++ b/include/cufinufft/utils.h @@ -7,15 +7,17 @@ #include #include -#include - -#include - #include +#include #include #include +#ifndef _USE_MATH_DEFINES +#define _USE_MATH_DEFINES +#endif +#include + #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 || defined(__clang__) #else __inline__ __device__ double atomicAdd(double *address, double val) { @@ -87,17 +89,6 @@ class WithCudaDevice { } }; -// jfm timer class -class CNTime { -public: - void start(); - double restart(); - double elapsedsec(); - -private: - struct timeval initial; -}; - // ahb math helpers CUFINUFFT_BIGINT next235beven(CUFINUFFT_BIGINT n, CUFINUFFT_BIGINT b); @@ -118,8 +109,8 @@ template T infnorm(int n, std::complex *a) { */ template -static __forceinline__ __device__ void atomicAddComplexShared(cuda_complex *address, - cuda_complex res) { +static __forceinline__ __device__ void atomicAddComplexShared( + cuda_complex *address, cuda_complex res) { const auto raw_address = reinterpret_cast(address); atomicAdd(raw_address, res.x); atomicAdd(raw_address + 1, res.y); @@ -131,8 +122,8 @@ static __forceinline__ __device__ void atomicAddComplexShared(cuda_complex *a * on shared memory are supported so we leverage them */ template -static __forceinline__ __device__ void atomicAddComplexGlobal(cuda_complex *address, - cuda_complex res) { +static __forceinline__ __device__ void atomicAddComplexGlobal( + cuda_complex *address, cuda_complex res) { if constexpr ( std::is_same_v, float2> && COMPUTE_CAPABILITY_90_OR_HIGHER) { atomicAdd(address, res); diff --git a/include/finufft/dirft.h b/include/finufft/dirft.h deleted file mode 100644 index 2449d864e..000000000 --- a/include/finufft/dirft.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef DIRFT_H -#define DIRFT_H - -#include - -template -void dirft1d1(BIGINT nj, T *x, std::complex *c, int isign, BIGINT ms, - std::complex *f); -template -void dirft1d2(BIGINT nj, T *x, std::complex *c, int iflag, BIGINT ms, - std::complex *f); -template -void dirft1d3(BIGINT nj, T *x, std::complex *c, int iflag, BIGINT nk, T *s, - std::complex *f); - -template -void dirft2d1(BIGINT nj, T *x, T *y, std::complex *c, int iflag, BIGINT ms, BIGINT mt, - std::complex *f); -template -void dirft2d2(BIGINT nj, T *x, T *y, std::complex *c, int iflag, BIGINT ms, BIGINT mt, - std::complex *f); -template -void dirft2d3(BIGINT nj, T *x, T *y, std::complex *c, int iflag, BIGINT nk, T *s, T *t, - std::complex *f); - -template -void dirft3d1(BIGINT nj, T *x, T *y, T *z, std::complex *c, int iflag, BIGINT ms, - BIGINT mt, BIGINT mu, std::complex *f); -template -void dirft3d2(BIGINT nj, T *x, T *y, T *z, std::complex *c, int iflag, BIGINT ms, - BIGINT mt, BIGINT mu, std::complex *f); -template -void dirft3d3(BIGINT nj, T *x, T *y, T *z, std::complex *c, int iflag, BIGINT nk, T *s, - T *t, T *u, std::complex *f); - -#endif diff --git a/include/finufft/finufft_utils.hpp b/include/finufft/finufft_utils.hpp new file mode 100644 index 000000000..7577a57a1 --- /dev/null +++ b/include/finufft/finufft_utils.hpp @@ -0,0 +1,75 @@ +// Header for utils.cpp, a little library of low-level array stuff. +// These are just the functions which depend on single/double precision (FLT) + +#pragma once + +#include +#include + +#include "finufft_core.h" + +// for CNTime... +// using chrono since the interface is portable between linux and windows + +namespace finufft::utils { + +template +FINUFFT_EXPORT FINUFFT_ALWAYS_INLINE void FINUFFT_CDECL arrayrange(BIGINT n, const T *a, + T *lo, T *hi) +// With a a length-n array, writes out min(a) to lo and max(a) to hi, +// so that all a values lie in [lo,hi]. +// If n==0, lo and hi are not finite. +{ + *lo = INFINITY; + *hi = -INFINITY; + for (BIGINT m = 0; m < n; ++m) { + if (a[m] < *lo) *lo = a[m]; + if (a[m] > *hi) *hi = a[m]; + } +} +template +FINUFFT_EXPORT FINUFFT_ALWAYS_INLINE void FINUFFT_CDECL arraywidcen(BIGINT n, const T *a, + T *w, T *c) +// Writes out w = half-width and c = center of an interval enclosing all a[n]'s +// Only chooses a nonzero center if this increases w by less than fraction +// ARRAYWIDCEN_GROWFRAC defined in finufft_core.h. +// This prevents rephasings which don't grow nf by much. 6/8/17 +// If n==0, w and c are not finite. +{ + T lo, hi; + arrayrange(n, a, &lo, &hi); + *w = (hi - lo) / 2; + *c = (hi + lo) / 2; + if (std::abs(*c) < ARRAYWIDCEN_GROWFRAC * (*w)) { + *w += std::abs(*c); + *c = 0.0; + } +} + +FINUFFT_EXPORT BIGINT next235even(BIGINT n); + +// jfm's timer class +class FINUFFT_EXPORT CNTime { +public: + FINUFFT_NEVER_INLINE void start(); + FINUFFT_NEVER_INLINE double restart(); + FINUFFT_NEVER_INLINE double elapsedsec() const; + +private: + double initial; +}; + +// openmp helpers +int get_num_threads_parallel_block(); + +} // namespace finufft::utils + +// thread-safe rand number generator for Windows platform +#ifdef _WIN32 +#include +namespace finufft { +namespace utils { +FINUFFT_EXPORT int FINUFFT_CDECL rand_r(unsigned int *seedp); +} // namespace utils +} // namespace finufft +#endif diff --git a/include/finufft/test_defs.h b/include/finufft/test_defs.h index 556315242..8a3f76fca 100644 --- a/include/finufft/test_defs.h +++ b/include/finufft/test_defs.h @@ -16,7 +16,6 @@ // convenient private finufft internals #include -#include #include // --------------- Private data types for compilation in either prec --------- diff --git a/include/finufft/utils.h b/include/finufft/utils.h deleted file mode 100644 index 0b875fdfe..000000000 --- a/include/finufft/utils.h +++ /dev/null @@ -1,114 +0,0 @@ -// Header for utils.cpp, a little library of low-level array stuff. -// These are just the functions which depend on single/double precision (FLT) - -#ifndef UTILS_H -#define UTILS_H - -#include "finufft/finufft_core.h" -// for CNTime... -// using chrono since the interface is portable between linux and windows -#include - -namespace finufft { -namespace utils { - -// ahb's low-level array helpers -template -FINUFFT_EXPORT T FINUFFT_CDECL relerrtwonorm(BIGINT n, const std::complex *a, - const std::complex *b) -// ||a-b||_2 / ||a||_2 -{ - T err = 0.0, nrm = 0.0; - for (BIGINT m = 0; m < n; ++m) { - // note std::norm here & below is |a|^2 ("field norm") not usual |a| ... - nrm += std::norm(a[m]); - err += std::norm(a[m] - b[m]); - } - return sqrt(err / nrm); -} -template -FINUFFT_EXPORT T FINUFFT_CDECL errtwonorm(BIGINT n, const std::complex *a, - const std::complex *b) -// ||a-b||_2 -{ - T err = 0.0; // compute error 2-norm - for (BIGINT m = 0; m < n; ++m) err += std::norm(a[m] - b[m]); - return sqrt(err); -} -template -FINUFFT_EXPORT T FINUFFT_CDECL twonorm(BIGINT n, const std::complex *a) -// ||a||_2 -{ - T nrm = 0.0; - for (BIGINT m = 0; m < n; ++m) nrm += std::norm(a[m]); - return sqrt(nrm); -} -template -FINUFFT_EXPORT T FINUFFT_CDECL infnorm(BIGINT n, const std::complex *a) -// ||a||_infty -{ - T nrm = 0.0; - for (BIGINT m = 0; m < n; ++m) nrm = std::max(nrm, std::norm(a[m])); - return sqrt(nrm); -} -template -FINUFFT_EXPORT void FINUFFT_CDECL arrayrange(BIGINT n, const T *a, T *lo, T *hi) -// With a a length-n array, writes out min(a) to lo and max(a) to hi, -// so that all a values lie in [lo,hi]. -// If n==0, lo and hi are not finite. -{ - *lo = INFINITY; - *hi = -INFINITY; - for (BIGINT m = 0; m < n; ++m) { - if (a[m] < *lo) *lo = a[m]; - if (a[m] > *hi) *hi = a[m]; - } -} -template -FINUFFT_EXPORT void FINUFFT_CDECL arraywidcen(BIGINT n, const T *a, T *w, T *c) -// Writes out w = half-width and c = center of an interval enclosing all a[n]'s -// Only chooses a nonzero center if this increases w by less than fraction -// ARRAYWIDCEN_GROWFRAC defined in finufft_core.h. -// This prevents rephasings which don't grow nf by much. 6/8/17 -// If n==0, w and c are not finite. -{ - T lo, hi; - arrayrange(n, a, &lo, &hi); - *w = (hi - lo) / 2; - *c = (hi + lo) / 2; - if (std::abs(*c) < ARRAYWIDCEN_GROWFRAC * (*w)) { - *w += std::abs(*c); - *c = 0.0; - } -} - -FINUFFT_EXPORT BIGINT FINUFFT_CDECL next235even(BIGINT n); - -// jfm's timer class -class FINUFFT_EXPORT CNTime { -public: - void start(); - double restart(); - double elapsedsec(); - -private: - double initial; -}; - -// openmp helpers -int get_num_threads_parallel_block(); - -} // namespace utils -} // namespace finufft - -// thread-safe rand number generator for Windows platform -#ifdef _WIN32 -#include -namespace finufft { -namespace utils { -FINUFFT_EXPORT int FINUFFT_CDECL rand_r(unsigned int *seedp); -} // namespace utils -} // namespace finufft -#endif - -#endif // UTILS_H diff --git a/makefile b/makefile index 928bbf209..23a5bab61 100644 --- a/makefile +++ b/makefile @@ -136,7 +136,7 @@ STATICLIB = lib-static/$(LIBNAME).a ABSDYNLIB = $(FINUFFT)$(DYNLIB) # spreader objs -SOBJS = src/utils.o src/spreadinterp.o +SOBJS = src/finufft_utils.o src/spreadinterp.o # all lib dual-precision objs (note DUCC_OBJS empty if unused) OBJS = $(SOBJS) contrib/legendre_rule_fast.o src/fft.o src/finufft_core.o src/c_interface.o fortran/finufftfort.o $(DUCC_OBJS) @@ -262,10 +262,10 @@ test/%: test/%.cpp $(DYNLIB) test/%f: test/%.cpp $(DYNLIB) $(CXX) $(CXXFLAGS) ${LDFLAGS} -DSINGLE $< $(ABSDYNLIB) $(LIBSFFT) -o $@ # low-level tests that are cleaner if depend on only specific objects... -test/testutils: test/testutils.cpp src/utils.o - $(CXX) $(CXXFLAGS) ${LDFLAGS} test/testutils.cpp src/utils.o $(LIBS) -o test/testutils -test/testutilsf: test/testutils.cpp src/utils.o - $(CXX) $(CXXFLAGS) ${LDFLAGS} -DSINGLE test/testutils.cpp src/utils.o $(LIBS) -o test/testutilsf +test/testutils: test/testutils.cpp src/finufft_utils.o + $(CXX) $(CXXFLAGS) ${LDFLAGS} test/testutils.cpp src/finufft_utils.o $(LIBS) -o test/testutils +test/testutilsf: test/testutils.cpp src/finufft_utils.o + $(CXX) $(CXXFLAGS) ${LDFLAGS} -DSINGLE test/testutils.cpp src/finufft_utils.o $(LIBS) -o test/testutilsf # make sure all double-prec test executables ready for testing TESTS := $(basename $(wildcard test/*.cpp)) diff --git a/perftest/guru_timing_test.cpp b/perftest/guru_timing_test.cpp index a291a269b..72145fcc0 100644 --- a/perftest/guru_timing_test.cpp +++ b/perftest/guru_timing_test.cpp @@ -1,4 +1,6 @@ +#include "finufft/finufft_utils.hpp" #include + // for sleep call #if defined(WIN32) || defined(_WIN32) || defined(__WIN32) && !defined(__CYGWIN__) #include @@ -154,7 +156,7 @@ int main(int argc, char *argv[]) printf("FINUFFT %dd%d use guru interface to do %d calls together:-------------------\n", ndim, type, ntransf); FINUFFT_PLAN plan; // instantiate a finufft_plan - finufft::utils::CNTime timer; + CNTime timer; timer.start(); // Guru Step 1 BIGINT n_modes[3] = {N1, N2, N3}; // #modes per dimension (ignored for t3) int ier = FINUFFT_MAKEPLAN(type, ndim, n_modes, isign, ntransf, tol, &plan, &opts); @@ -258,7 +260,7 @@ double finufftFunnel(CPX *cStart, CPX *fStart, FLT *x, FLT *y, FLT *z, FINUFFT_P Malleo 2019; xyz passed in by Barnett 5/26/20 to prevent X_orig fields. */ { - finufft::utils::CNTime timer; + CNTime timer; timer.start(); int ier = 0; double t = 0; diff --git a/perftest/manysmallprobs.cpp b/perftest/manysmallprobs.cpp index f0e4c29ae..d669eb770 100644 --- a/perftest/manysmallprobs.cpp +++ b/perftest/manysmallprobs.cpp @@ -1,16 +1,17 @@ + +#include + // public header #include "finufft.h" #include "finufft/test_defs.h" // private access to timer -#include "finufft/utils.h" -using namespace finufft::utils; +#include "finufft/finufft_utils.hpp" -#include -#include -#include using namespace std; +using namespace finufft::utils; + int main(int argc, char *argv[]) /* What is small-problem cost of FINUFFT library from C++, using plain arrays of C++ complex numbers? Barnett 10/31/17. @@ -48,7 +49,7 @@ int main(int argc, char *argv[]) complex *F = (complex *)malloc(sizeof(complex) * N); printf("repeatedly calling the simple interface: --------------------- \n"); - finufft::utils::CNTime timer; + CNTime timer; timer.start(); for (int r = 0; r < reps; ++r) { // call the NUFFT (with iflag=+1): // printf("rep %d\n",r); diff --git a/perftest/spreadtestnd.cpp b/perftest/spreadtestnd.cpp index 5aab26fb3..6f942ee43 100644 --- a/perftest/spreadtestnd.cpp +++ b/perftest/spreadtestnd.cpp @@ -1,14 +1,16 @@ +#include "finufft/finufft_utils.hpp" #include #include -#include #include #include #include #include +#include "finufft/finufft_utils.hpp" using namespace finufft::spreadinterp; -using namespace finufft::utils; // for timer +using namespace std; +using namespace finufft::utils; void usage() { printf("usage: spreadtestnd dims [M N [tol [sort [flags [debug [kerpad [kerevalmeth " diff --git a/perftest/spreadtestndall.cpp b/perftest/spreadtestndall.cpp index 950c3526e..2ff12ba78 100644 --- a/perftest/spreadtestndall.cpp +++ b/perftest/spreadtestndall.cpp @@ -1,6 +1,6 @@ +#include "finufft/finufft_utils.hpp" #include #include -#include #include #include diff --git a/src/cuda/CMakeLists.txt b/src/cuda/CMakeLists.txt index 9f8d1344c..812f90f02 100644 --- a/src/cuda/CMakeLists.txt +++ b/src/cuda/CMakeLists.txt @@ -19,11 +19,9 @@ set(PRECISION_DEPENDENT_SRC common.cu) set(CUFINUFFT_INCLUDE_DIRS - ${PROJECT_SOURCE_DIR}/include - ${PROJECT_SOURCE_DIR}/contrib + ${PROJECT_SOURCE_DIR}/include ${PROJECT_SOURCE_DIR}/contrib $ - $ - $) + $) set(CUFINUFFT_INCLUDE_DIRS ${CUFINUFFT_INCLUDE_DIRS} @@ -43,41 +41,17 @@ set(FINUFFT_CUDA_FLAGS > >) -add_library(cufinufft_common_objects OBJECT ${PRECISION_INDEPENDENT_SRC}) -target_include_directories(cufinufft_common_objects - PUBLIC ${CUFINUFFT_INCLUDE_DIRS}) -set_target_properties( - cufinufft_common_objects - PROPERTIES POSITION_INDEPENDENT_CODE ${FINUFFT_POSITION_INDEPENDENT_CODE} - CUDA_ARCHITECTURES "${FINUFFT_CUDA_ARCHITECTURES}" - CUDA_SEPARABLE_COMPILATION ON - CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON) -target_compile_features(cufinufft_common_objects PRIVATE cxx_std_17) -target_compile_options(cufinufft_common_objects PRIVATE ${FINUFFT_CUDA_FLAGS}) - -add_library(cufinufft_objects OBJECT ${PRECISION_DEPENDENT_SRC}) -target_include_directories(cufinufft_objects PUBLIC ${CUFINUFFT_INCLUDE_DIRS}) -set_target_properties( - cufinufft_objects - PROPERTIES POSITION_INDEPENDENT_CODE ${FINUFFT_POSITION_INDEPENDENT_CODE} - CUDA_ARCHITECTURES "${FINUFFT_CUDA_ARCHITECTURES}" - CUDA_SEPARABLE_COMPILATION ON - CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON) -target_compile_features(cufinufft_objects PRIVATE cxx_std_17) -target_compile_options(cufinufft_objects PRIVATE ${FINUFFT_CUDA_FLAGS}) - if(FINUFFT_SHARED_LINKING) - add_library(cufinufft SHARED $ - $) + add_library(cufinufft SHARED ${PRECISION_INDEPENDENT_SRC} + ${PRECISION_DEPENDENT_SRC}) else() - add_library(cufinufft STATIC $ - $) + add_library(cufinufft STATIC ${PRECISION_INDEPENDENT_SRC} + ${PRECISION_DEPENDENT_SRC}) set_target_properties( cufinufft PROPERTIES POSITION_INDEPENDENT_CODE ${FINUFFT_POSITION_INDEPENDENT_CODE}) endif() +target_include_directories(cufinufft PUBLIC ${CUFINUFFT_INCLUDE_DIRS}) set_target_properties( cufinufft @@ -85,15 +59,14 @@ set_target_properties( CUDA_SEPARABLE_COMPILATION ON CUDA_STANDARD 17 CUDA_STANDARD_REQUIRED ON + WINDOWS_EXPORT_ALL_SYMBOLS ON ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") target_compile_features(cufinufft PRIVATE cxx_std_17) target_compile_options(cufinufft PRIVATE ${FINUFFT_CUDA_FLAGS}) -if(WIN32) - target_link_libraries(cufinufft PUBLIC CUDA::cudart CUDA::cufft - CUDA::nvToolsExt) +if(WIN32 OR (BUILD_TESTING AND FINUFFT_BUILD_TESTS)) + target_link_libraries(cufinufft PUBLIC CUDA::cudart CUDA::cufft) else() - target_link_libraries(cufinufft PUBLIC CUDA::cudart_static CUDA::cufft_static - CUDA::nvToolsExt) + target_link_libraries(cufinufft PUBLIC CUDA::cudart_static CUDA::cufft_static) endif() file(GLOB CUFINUFFT_PUBLIC_HEADERS "${CMAKE_SOURCE_DIR}/include/cufinufft*.h") diff --git a/src/cuda/utils.cpp b/src/cuda/utils.cpp index 9c3003cb8..406ee7e13 100644 --- a/src/cuda/utils.cpp +++ b/src/cuda/utils.cpp @@ -23,27 +23,5 @@ CUFINUFFT_BIGINT next235beven(CUFINUFFT_BIGINT n, CUFINUFFT_BIGINT b) return nplus; } -// ----------------------- helpers for timing (always stay double prec)... - -void CNTime::start() { gettimeofday(&initial, 0); } - -double CNTime::restart() -// Barnett changed to returning in sec -{ - double delta = this->elapsedsec(); - this->start(); - return delta; -} - -double CNTime::elapsedsec() -// returns answers as double, in seconds, to microsec accuracy. Barnett 5/22/18 -{ - struct timeval now; - gettimeofday(&now, 0); - double nowsec = (double)now.tv_sec + 1e-6 * now.tv_usec; - double initialsec = (double)initial.tv_sec + 1e-6 * initial.tv_usec; - return nowsec - initialsec; -} - } // namespace utils } // namespace cufinufft diff --git a/src/finufft_core.cpp b/src/finufft_core.cpp index 1adfabedb..b0d0fd33d 100644 --- a/src/finufft_core.cpp +++ b/src/finufft_core.cpp @@ -1,14 +1,12 @@ #include #include +#include #include -#include #include "../contrib/legendre_rule_fast.h" #include #include -#include #include -#include #include #include @@ -75,7 +73,7 @@ Design notes for guru interface implementation: // ---------- local math routines (were in common.cpp; no need now): -------- namespace finufft { -namespace common { +namespace utils { static int set_nf_type12(BIGINT ms, const finufft_opts &opts, const finufft_spread_opts &spopts, BIGINT *nf) @@ -364,11 +362,11 @@ static void deconvolveshuffle2d(int dir, T prefac, const std::vector &ker1, fw[j] = 0.0; for (BIGINT k2 = 0; k2 <= k2max; ++k2, pp += 2 * ms) // non-neg y-freqs // point fk and fw to the start of this y value's row (2* is for complex): - common::deconvolveshuffle1d(dir, prefac / ker2[k2], ker1, ms, fk + pp, nf1, - &fw[nf1 * k2], modeord); + utils::deconvolveshuffle1d(dir, prefac / ker2[k2], ker1, ms, fk + pp, nf1, + &fw[nf1 * k2], modeord); for (BIGINT k2 = k2min; k2 < 0; ++k2, pn += 2 * ms) // neg y-freqs - common::deconvolveshuffle1d(dir, prefac / ker2[-k2], ker1, ms, fk + pn, nf1, - &fw[nf1 * (nf2 + k2)], modeord); + utils::deconvolveshuffle1d(dir, prefac / ker2[-k2], ker1, ms, fk + pn, nf1, + &fw[nf1 * (nf2 + k2)], modeord); } template @@ -409,11 +407,11 @@ static void deconvolveshuffle3d(int dir, T prefac, std::vector &ker1, fw[j] = 0.0; for (BIGINT k3 = 0; k3 <= k3max; ++k3, pp += 2 * ms * mt) // non-neg z-freqs // point fk and fw to the start of this z value's plane (2* is for complex): - common::deconvolveshuffle2d(dir, prefac / ker3[k3], ker1, ker2, ms, mt, fk + pp, nf1, - nf2, &fw[np * k3], modeord); + utils::deconvolveshuffle2d(dir, prefac / ker3[k3], ker1, ker2, ms, mt, fk + pp, nf1, + nf2, &fw[np * k3], modeord); for (BIGINT k3 = k3min; k3 < 0; ++k3, pn += 2 * ms * mt) // neg z-freqs - common::deconvolveshuffle2d(dir, prefac / ker3[-k3], ker1, ker2, ms, mt, fk + pn, nf1, - nf2, &fw[np * (nf3 + k3)], modeord); + utils::deconvolveshuffle2d(dir, prefac / ker3[-k3], ker1, ker2, ms, mt, fk + pn, nf1, + nf2, &fw[np * (nf3 + k3)], modeord); } // --------- batch helper functions for t1,2 exec: --------------------------- @@ -488,12 +486,12 @@ static int deconvolveBatch(int batchSize, FINUFFT_PLAN_T *p, std::complex return 0; } -} // namespace common +} // namespace utils } // namespace finufft // --------------- rest is the 5 user guru (plan) interface drivers: --------- // (not namespaced since have safe names finufft{f}_* ) -using namespace finufft::common; // accesses routines defined above +using namespace finufft::utils; // accesses routines defined above // Marco Barbone: 5.8.2024 // These are user-facing. diff --git a/src/utils.cpp b/src/finufft_utils.cpp similarity index 95% rename from src/utils.cpp rename to src/finufft_utils.cpp index 2627a179a..8bcf8ddab 100644 --- a/src/utils.cpp +++ b/src/finufft_utils.cpp @@ -5,11 +5,11 @@ #include -#include "finufft/utils.h" +#include + using namespace std; -namespace finufft { -namespace utils { +namespace finufft::utils { BIGINT next235even(BIGINT n) // finds even integer not less than n, with prime factors no larger than 5 @@ -47,7 +47,7 @@ double CNTime::restart() return delta; } -double CNTime::elapsedsec() +double CNTime::elapsedsec() const // returns answers as double, in seconds, to microsec accuracy. Barnett 5/22/18 { std::uint64_t now = std::chrono::duration_cast( @@ -85,5 +85,4 @@ int rand_r(unsigned int * /*seedp*/) } #endif -} // namespace utils -} // namespace finufft +} // namespace finufft::utils diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp index f6cf925e0..497e2396d 100644 --- a/src/spreadinterp.cpp +++ b/src/spreadinterp.cpp @@ -1,7 +1,7 @@ // Spreading/interpolating module within FINUFFT. +#include #include -#include #include "ker_horner_allw_loop_constexpr.h" #include "ker_lowupsampfac_horner_allw_loop_constexpr.h" diff --git a/test/dumbinputs.cpp b/test/dumbinputs.cpp index e182cfe5f..866458c86 100644 --- a/test/dumbinputs.cpp +++ b/test/dumbinputs.cpp @@ -34,12 +34,12 @@ */ // This switches FLT macro from double to float if SINGLE is defined, etc... -#include "directft/dirft1d.cpp" -#include "directft/dirft2d.cpp" -#include "directft/dirft3d.cpp" +#include "utils/dirft1d.hpp" +#include "utils/dirft2d.hpp" +#include "utils/norms.hpp" #include + using namespace std; -using namespace finufft::utils; // for twonorm, etc int main(int argc, char *argv[]) { int M = 100; // number of nonuniform points diff --git a/test/finufft1d_test.cpp b/test/finufft1d_test.cpp index d8a66ed0d..00475c2da 100644 --- a/test/finufft1d_test.cpp +++ b/test/finufft1d_test.cpp @@ -1,6 +1,8 @@ #include // this enforces recompilation, responding to SINGLE... -#include "directft/dirft1d.cpp" +#include "finufft/finufft_utils.hpp" +#include "utils/dirft1d.hpp" +#include "utils/norms.hpp" using namespace std; using namespace finufft::utils; diff --git a/test/finufft1dmany_test.cpp b/test/finufft1dmany_test.cpp index f2b13534d..0491a4b27 100644 --- a/test/finufft1dmany_test.cpp +++ b/test/finufft1dmany_test.cpp @@ -1,6 +1,8 @@ #include // this enforces recompilation, responding to SINGLE... -#include "directft/dirft1d.cpp" +#include "finufft/finufft_utils.hpp" +#include "utils/dirft1d.hpp" +#include "utils/norms.hpp" using namespace std; using namespace finufft::utils; diff --git a/test/finufft2d_test.cpp b/test/finufft2d_test.cpp index eb1fec761..b79f5967b 100644 --- a/test/finufft2d_test.cpp +++ b/test/finufft2d_test.cpp @@ -1,6 +1,9 @@ #include // this enforces recompilation, responding to SINGLE... -#include "directft/dirft2d.cpp" +#include "finufft/finufft_utils.hpp" +#include "utils/dirft2d.hpp" +#include "utils/norms.hpp" + using namespace std; using namespace finufft::utils; diff --git a/test/finufft2dmany_test.cpp b/test/finufft2dmany_test.cpp index 0efbaded9..547c37b9b 100644 --- a/test/finufft2dmany_test.cpp +++ b/test/finufft2dmany_test.cpp @@ -1,6 +1,8 @@ #include // this enforces recompilation, responding to SINGLE... -#include "directft/dirft2d.cpp" +#include "finufft/finufft_utils.hpp" +#include "utils/dirft2d.hpp" +#include "utils/norms.hpp" using namespace std; using namespace finufft::utils; diff --git a/test/finufft3d_test.cpp b/test/finufft3d_test.cpp index 1e89d471c..9cbd5c042 100644 --- a/test/finufft3d_test.cpp +++ b/test/finufft3d_test.cpp @@ -1,6 +1,8 @@ #include // this enforces recompilation, responding to SINGLE... -#include "directft/dirft3d.cpp" +#include "finufft/finufft_utils.hpp" +#include "utils/dirft3d.hpp" +#include "utils/norms.hpp" using namespace std; using namespace finufft::utils; diff --git a/test/finufft3dkernel_test.cpp b/test/finufft3dkernel_test.cpp index 9bc6d1955..87276261b 100644 --- a/test/finufft3dkernel_test.cpp +++ b/test/finufft3dkernel_test.cpp @@ -1,6 +1,9 @@ #include // this enforces recompilation, responding to SINGLE... -#include "directft/dirft3d.cpp" +#include "finufft/finufft_utils.hpp" +#include "utils/dirft3d.hpp" +#include "utils/norms.hpp" + using namespace std; using namespace finufft::utils; diff --git a/test/finufft3dmany_test.cpp b/test/finufft3dmany_test.cpp index b1d315719..9cae4e41d 100644 --- a/test/finufft3dmany_test.cpp +++ b/test/finufft3dmany_test.cpp @@ -1,6 +1,9 @@ #include // this enforces recompilation, responding to SINGLE... -#include "directft/dirft3d.cpp" +#include "finufft/finufft_utils.hpp" +#include "utils/dirft3d.hpp" +#include "utils/norms.hpp" + using namespace std; using namespace finufft::utils; diff --git a/test/testutils.cpp b/test/testutils.cpp index 6facb72cd..100a46531 100644 --- a/test/testutils.cpp +++ b/test/testutils.cpp @@ -15,7 +15,11 @@ */ // This switches FLT macro from double to float if SINGLE is defined, etc... + +#include "finufft/finufft_utils.hpp" +#include "utils/norms.hpp" #include + using namespace finufft::utils; int main(int argc, char *argv[]) { diff --git a/test/directft/dirft1d.cpp b/test/utils/dirft1d.hpp similarity index 98% rename from test/directft/dirft1d.cpp rename to test/utils/dirft1d.hpp index c80299b47..22863edb3 100644 --- a/test/directft/dirft1d.cpp +++ b/test/utils/dirft1d.hpp @@ -1,5 +1,4 @@ -#include -#include +#include "finufft/finufft_core.h" #include // This is basically a port of dirft1d.f from CMCL package, except with diff --git a/test/directft/dirft2d.cpp b/test/utils/dirft2d.hpp similarity index 98% rename from test/directft/dirft2d.cpp rename to test/utils/dirft2d.hpp index 62f126c15..26b813e3c 100644 --- a/test/directft/dirft2d.cpp +++ b/test/utils/dirft2d.hpp @@ -1,5 +1,4 @@ -#include -#include +#include "finufft/finufft_core.h" #include // This is basically a port of dirft2d.f from CMCL package, except with diff --git a/test/directft/dirft3d.cpp b/test/utils/dirft3d.hpp similarity index 98% rename from test/directft/dirft3d.cpp rename to test/utils/dirft3d.hpp index b77111257..795505d7a 100644 --- a/test/directft/dirft3d.cpp +++ b/test/utils/dirft3d.hpp @@ -1,5 +1,6 @@ -#include -#include +#pragma once + +#include "finufft/finufft_core.h" #include // This is basically a port of dirft2d.f from CMCL package, except with diff --git a/test/utils/norms.hpp b/test/utils/norms.hpp new file mode 100644 index 000000000..c7d42a74b --- /dev/null +++ b/test/utils/norms.hpp @@ -0,0 +1,43 @@ +#pragma once + +#include + +// ahb's low-level array helpers +template +FINUFFT_EXPORT T FINUFFT_CDECL relerrtwonorm(BIGINT n, const std::complex *a, + const std::complex *b) +// ||a-b||_2 / ||a||_2 +{ + T err = 0.0, nrm = 0.0; + for (BIGINT m = 0; m < n; ++m) { + // note std::norm here & below is |a|^2 ("field norm") not usual |a| ... + nrm += std::norm(a[m]); + err += std::norm(a[m] - b[m]); + } + return sqrt(err / nrm); +} +template +FINUFFT_EXPORT T FINUFFT_CDECL errtwonorm(BIGINT n, const std::complex *a, + const std::complex *b) +// ||a-b||_2 +{ + T err = 0.0; // compute error 2-norm + for (BIGINT m = 0; m < n; ++m) err += std::norm(a[m] - b[m]); + return sqrt(err); +} +template +FINUFFT_EXPORT T FINUFFT_CDECL twonorm(BIGINT n, const std::complex *a) +// ||a||_2 +{ + T nrm = 0.0; + for (BIGINT m = 0; m < n; ++m) nrm += std::norm(a[m]); + return sqrt(nrm); +} +template +FINUFFT_EXPORT T FINUFFT_CDECL infnorm(BIGINT n, const std::complex *a) +// ||a||_infty +{ + T nrm = 0.0; + for (BIGINT m = 0; m < n; ++m) nrm = std::max(nrm, std::norm(a[m])); + return sqrt(nrm); +}