From 2830104d1ea35cdeb88428122bcd8288b550e8e4 Mon Sep 17 00:00:00 2001 From: Tchoubar Date: Sat, 23 Mar 2024 20:53:03 -0400 Subject: [PATCH] Minor fixes of GPU support and auto-cpmp. --- Makefile | 6 + README.md | 8 +- README.txt | 23 ++ cpp/gcc/Makefile | 11 +- cpp/src/core/srercode.h | 1 + cpp/vc/SRWLClientPython.vcxproj | 218 +----------------- cpp/vc/SRWLClientPython.vcxproj.user | 23 +- cpp/vc/SRWLIB.vcxproj | 4 +- .../srwpy/SRWLIB_ExampleViewDataFile.py | 6 +- env/python/srwpy/examples/SRWLIB_Example17.py | 8 +- 10 files changed, 63 insertions(+), 245 deletions(-) diff --git a/Makefile b/Makefile index 40f324b5..160c1a22 100644 --- a/Makefile +++ b/Makefile @@ -23,6 +23,12 @@ examples_dir = env/python/srwpy/examples #example10_data_dir = $(examples_dir)/data_example_10 export MODE ?= 0 +#HG20042024 +ifeq ($(MODE), cuda) +export CUDA_PATH ?= /usr/local/cuda +export CUDA_MATHLIBS_PATH ?= /usr/local/cuda +endif + nofftw: core pylib all: clean fftw core pylib diff --git a/README.md b/README.md index 00a005d6..4fbcb4ea 100644 --- a/README.md +++ b/README.md @@ -253,16 +253,16 @@ pip install -e . ## VI. GPU Acceleration of SRW -SRW has basic support for GPU acceleration of some routines through CUDA. Compilation of SRW with GPU acceleration requires the CUDA HPC SDK to be installed and, on Linux can be performed with: +SRW has basic support for GPU acceleration of some routines through CUDA. Compilation of SRW with GPU acceleration requires the CUDA HPC SDK or CUDA Toolkit to be installed. +To compile on Linux, run the following from the SRW base directory: ```bash MODE=cuda make ``` -To compile on Windows, open the SRW solution in Visual Studio, set the target to the `_cuda` variants and update the library and include paths for the SRWLIB project. You may also have to copy the following DLLs from the HPC SDK install into the env/python/srwpy directory: +To compile on Windows, ensure that CUDA Toolkit v12.4 is installed, open the SRW solution in Visual Studio, set the target for the SRWLIB project to the `_cuda` variant and update the library and include paths. You may also have to copy the following DLLs from the HPC SDK or CUDA Toolkit install into the env/python/srwpy directory to resolve the `DLL load failed while importing srwlpy` error: -- cudart64_110.dll -- cufft64_10.dll +- cufft64_11.dll ## Authors and Contributors to SRW project diff --git a/README.txt b/README.txt index b2007318..a41d69df 100644 --- a/README.txt +++ b/README.txt @@ -144,6 +144,29 @@ IV. Compiling and testing SRW Library and its Python binding on Mac OSX. sudo port install gcc47 Modify the SRW_Dev/cpp/gcc/Makefile so that CC=/gcc and CXX=/g++, and proceed to the compilation as described in III.1.2.2. +V. Compiling and testing SRW Library and its Python binding on Windows and Linux (via CMake/Pip) +------------------------------------------------------------------ + + Run the following in a Visual Studio Developer Command Line/Linux Terminal: + + cmake -B build + cmake --build build -j + + The pip installable version of the package can be obtained by running the following in a Visual Studio Developer Command Line/Linux Terminal: + + cd env/python + pip install -e . + +VI. GPU Acceleration of SRW +------------------------------------------------------------------ + + SRW has basic support for GPU acceleration of some routines through CUDA. Compilation of SRW with GPU acceleration requires the CUDA HPC SDK or CUDA Toolkit to be installed. + To compile on Linux, run the following from the SRW base directory: + + MODE=cuda make + + To compile on Windows, ensure that CUDA Toolkit v12.4 is installed, open the SRW solution in Visual Studio, set the target for the SRWLIB project to the `_cuda` variant and update the library and include paths. You may also have to copy the following DLLs from the HPC SDK or CUDA Toolkit install into the env/python/srwpy directory to resolve the `DLL load failed while importing srwlpy` error: + - cufft64_11.dll Authors and Contributors to SRW project: ---------------------------------------- diff --git a/cpp/gcc/Makefile b/cpp/gcc/Makefile index a9f9fb6f..482aa32f 100644 --- a/cpp/gcc/Makefile +++ b/cpp/gcc/Makefile @@ -4,6 +4,8 @@ SRW_SRC_GEN_DIR=$(SRW_SRC_DIR)/core SRW_SRC_LIB_DIR=$(SRW_SRC_DIR)/lib SRW_SRC_GENESIS_DIR=$(SRW_SRC_DIR)/ext/genesis/genesis_july08 SH_SRC_PARSE_DIR=$(SRW_SRC_DIR)/ext/auxparse +#HG20032024 +SH_SRC_AUXGPU_DIR=$(SRW_SRC_DIR)/ext/auxgpu SH_SRC_GEN_MATH_DIR=$(SRW_SRC_DIR)/ext/genmath LIB_DIR=$(SOFT_DEV_DIR)/../ext_lib @@ -47,8 +49,9 @@ CUDA_INCLUDES = -I$(CUDA_PATH)/include -I$(CUDA_MATHLIBS_PATH)/include CUDA_LIBS = -L$(CUDA_PATH)/lib64 -L$(CUDA_MATHLIBS_PATH)/lib64 SRW_SRC_DEF += -D_OFFLOAD_GPU -DUSE_CUDA -D_FFTW3 -SRW_INCLUDES += $(CUDA_INCLUDES) -#SRW_CFLAGS += -std=c++17 #HG01012024 +#HG20032024 Add SH_SRC_AUXGPU_DIR to SRW_INCLUDES +SRW_INCLUDES += $(CUDA_INCLUDES) -I$(SH_SRC_AUXGPU_DIR) +#SRW_CFLAGS += -std=c++17 #HG01012023 LDFLAGS += $(CUDA_LIBS) -lcudart_static -lcudadevrt -lcufft -lrt NVCFLAGS = -O3 -arch=sm_80 -dlto -rdc=true CUDA_OBJ=gmfft_gpu.o srradstr_gpu.o sroptelm_gpu.o sroptdrf_gpu.o sroptgtr_gpu.o srradmnp_gpu.o @@ -128,6 +131,10 @@ lib: $(CUDA_OBJ) $(OBJ) %.o: $(SRW_SRC_GEN_DIR)/%.cu $(NVCC) -dc $(NVCFLAGS) $(SRW_INCLUDES) $(SRW_SRC_DEF) -Xcompiler="$(CFLAGS)" -c $< +#HG20032024 +%.o: $(SH_SRC_AUXGPU_DIR)/%.cpp + $(CXX) $(CFLAGS) -c $< + else lib: $(OBJ) ar -cvq $(PRG) *.o diff --git a/cpp/src/core/srercode.h b/cpp/src/core/srercode.h index 6f0ee7a9..996f3e59 100644 --- a/cpp/src/core/srercode.h +++ b/cpp/src/core/srercode.h @@ -282,6 +282,7 @@ #define WARN_ELEC_BEAM_IS_NOT_ULTRARELATIVISTIC 23 + SRW_WARNINGS_OFFSET #define GENESIS_RAD_HARM_CALC_NEEDS_ELEC_DISTRIB 24 + SRW_WARNINGS_OFFSET #define ZERO_WFR_RAD_CURV_PH_TERM_NOT_TREATED 25 + SRW_WARNINGS_OFFSET +#define GPU_COMPUTATION_FAILED 26 + SRW_WARNINGS_OFFSET /*HG21032024*/ //------------------------------------------------------------------------- diff --git a/cpp/vc/SRWLClientPython.vcxproj b/cpp/vc/SRWLClientPython.vcxproj index 00d46875..2b39fa5a 100644 --- a/cpp/vc/SRWLClientPython.vcxproj +++ b/cpp/vc/SRWLClientPython.vcxproj @@ -9,14 +9,6 @@ Debug_Py2x x64 - - Debug_Py3_11_cuda - Win32 - - - Debug_Py3_11_cuda - x64 - Debug_Py3_11 Win32 @@ -81,14 +73,6 @@ Release_Py2x x64 - - Release_Py3_11_cuda - Win32 - - - Release_Py3_11_cuda - x64 - Release_Py3_11 Win32 @@ -207,12 +191,6 @@ Unicode true - - DynamicLibrary - v143 - Unicode - true - DynamicLibrary v143 @@ -243,11 +221,6 @@ v143 Unicode - - DynamicLibrary - v143 - Unicode - DynamicLibrary v143 @@ -311,13 +284,6 @@ true false - - DynamicLibrary - v143 - Unicode - true - false - DynamicLibrary v143 @@ -352,12 +318,6 @@ Unicode false - - DynamicLibrary - v143 - Unicode - false - DynamicLibrary v143 @@ -365,9 +325,7 @@ false - - - + @@ -395,9 +353,6 @@ - - - @@ -416,9 +371,6 @@ - - - @@ -449,9 +401,6 @@ - - - @@ -470,9 +419,6 @@ - - - @@ -510,11 +456,6 @@ $(Platform)\$(Configuration)\ true - - $(SolutionDir)$(Platform)\$(Configuration)\ - $(Platform)\$(Configuration)\ - true - $(SolutionDir)$(Platform)\$(Configuration)\ $(Platform)\$(Configuration)\ @@ -562,13 +503,6 @@ srwlpy .pyd - - $(ProjectDir) - $(Platform)\$(Configuration)\ - true - srwlpy - .pyd - $(ProjectDir) $(Platform)\$(Configuration)\ @@ -625,13 +559,6 @@ srwlpy .pyd - - $(ProjectDir) - $(Platform)\$(Configuration)\ - false - srwlpy - .pyd - $(ProjectDir) $(Platform)\$(Configuration)\ @@ -681,13 +608,6 @@ srwlpy .pyd - - $(ProjectDir) - $(Platform)\$(Configuration)\ - false - srwlpy - .pyd - $(SolutionDir)$(Platform)\$(Configuration)\ $(Platform)\$(Configuration)\ @@ -874,33 +794,6 @@ - - - Disabled - ..\src\lib;..\..\..\Python33\include;%(AdditionalIncludeDirectories) - WIN32;_DEBUG;_WINDOWS;_USRDLL;SRWLIB_CLIENT;SRWLCLIENTPYTHON_EXPORTS;_CRT_SECURE_NO_WARNINGS;NON_UNIX_STDIO;%(PreprocessorDefinitions) - true - EnableFastChecks - MultiThreadedDebug - - - Level3 - EditAndContinue - - - ..\..\..\Python33\libs\python33.lib;srw_win32.lib;%(AdditionalDependencies) - $(SolutionDir)..\..\env\work\srw_python\srwlpy.pyd - LIBC;%(IgnoreSpecificDefaultLibraries) - true - Windows - MachineX86 - false - - - - - - Disabled @@ -1094,44 +987,14 @@ ..\..\..\Python311_x64\libs\python311.lib;srw_x64.lib;%(AdditionalDependencies) - $(SolutionDir)..\..\env_old\work\srw_python\srwlpy.pyd - LIBCMT;%(IgnoreSpecificDefaultLibraries) - true - Windows - MachineX64 - - - - - - - - - X64 - - - Disabled - ..\src\lib;..\src\ext\auxgpu;..\..\..\Python311_x64\include;%(AdditionalIncludeDirectories) - WIN32;_DEBUG;_WINDOWS;_USRDLL;SRWLIB_CLIENT;SRWLCLIENTPYTHON_EXPORTS;_CRT_SECURE_NO_WARNINGS;NON_UNIX_STDIO;_OFFLOAD_GPU;%(PreprocessorDefinitions) - true - EnableFastChecks - MultiThreadedDebug - NotUsing - Level3 - ProgramDatabase - true - - - ..\..\..\Python311_x64\libs\python311.lib;$(CUDA_PATH)\lib\x64\cudart_static.lib;$(CUDA_PATH)\lib\x64\cudadevrt.lib;srw_x64.lib;%(AdditionalDependencies) - $(SolutionDir)..\..\env_old\work\srw_python\srwlpy.pyd + srwlpy.pyd LIBCMT;%(IgnoreSpecificDefaultLibraries) true Windows MachineX64 - - + copy $(SolutionDir)srwlpy.pyd "$(SolutionDir)..\..\env\python\srwpy\" @@ -1378,37 +1241,6 @@ copy $(SolutionDir)srwlpy.pyd "$(SolutionDir)..\..\env\work\srw_python\" - - - MaxSpeed - false - ..\src\lib;..\..\..\Python36\include;%(AdditionalIncludeDirectories) - WIN32;NDEBUG;_WINDOWS;_USRDLL;SRWLCLIENTPYTHON_EXPORTS;SRWLIB_CLIENT;_CRT_SECURE_NO_WARNINGS;NON_UNIX_STDIO;%(PreprocessorDefinitions) - MultiThreaded - true - - - Level3 - ProgramDatabase - false - Default - true - - - ..\..\..\Python36\libs\python36.lib;srw_win32.lib;%(AdditionalDependencies) - srwlpy.pyd - LIBC;%(IgnoreSpecificDefaultLibraries) - true - Windows - true - true - MachineX86 - false - - - copy $(SolutionDir)srwlpy.pyd "$(SolutionDir)..\..\env\work\srw_python\" - - X64 @@ -1671,45 +1503,7 @@ srwlpy.pgd - copy $(SolutionDir)srwlpy.pyd "$(SolutionDir)..\..\env_old\work\srw_python\" - - - - - X64 - - - MaxSpeed - false - ..\src\lib;..\src\ext\auxgpu;..\..\..\Python311_x64\include;%(AdditionalIncludeDirectories) - WIN32;NDEBUG;_WINDOWS;_USRDLL;SRWLCLIENTPYTHON_EXPORTS;SRWLIB_CLIENT;_CRT_SECURE_NO_WARNINGS;NON_UNIX_STDIO;_OFFLOAD_GPU;%(PreprocessorDefinitions) - MultiThreaded - false - - - Level3 - None - Speed - OnlyExplicitInline - true - false - true - Precise - - - ..\..\..\Python311_x64\libs\python311.lib;$(CUDA_PATH)\lib\x64\cudart_static.lib;$(CUDA_PATH)\lib\x64\cudadevrt.lib;srw_x64.lib;%(AdditionalDependencies) - srwlpy.pyd - LIBC;%(IgnoreSpecificDefaultLibraries) - true - Windows - true - true - MachineX64 - Default - srwlpy.pgd - - - copy $(SolutionDir)srwlpy.pyd "$(SolutionDir)..\..\env_old\work\srw_python\" + copy $(SolutionDir)srwlpy.pyd "$(SolutionDir)..\..\env\python\srwpy\" @@ -1832,7 +1626,5 @@ - - - + \ No newline at end of file diff --git a/cpp/vc/SRWLClientPython.vcxproj.user b/cpp/vc/SRWLClientPython.vcxproj.user index 08158168..35133fc9 100644 --- a/cpp/vc/SRWLClientPython.vcxproj.user +++ b/cpp/vc/SRWLClientPython.vcxproj.user @@ -31,14 +31,6 @@ WindowsLocalDebugger - C:\SoftwareDevelopments\Python311_x64\python.exe - example_3_source_2_vh_then_60mm_oc.py - ..\..\env_old\work\srw_python - WindowsLocalDebugger - NativeOnly - false - - C:\SoftwareDevelopments\Python311_x64\python.exe SRWLIB_Example17_test_GPU.py ..\..\env_old\work\srw_python @@ -91,13 +83,8 @@ C:\SoftwareDevelopments\Python311_x64\python.exe WindowsLocalDebugger - split-delay-test-vcc.py - ..\..\env_old\work\srw_python - - - C:\SoftwareDevelopments\Python311_x64\python.exe - WindowsLocalDebugger - split_delay_channel_cut_cryst_02d.py + + ..\..\env_old\work\srw_python @@ -130,12 +117,6 @@ ..\..\env\work\srw_python WindowsLocalDebugger - - python - test_hdf5_convert.py - ..\..\env\work\srw_python - WindowsLocalDebugger - python smf-preliminary-03-an-2d-test-01.py diff --git a/cpp/vc/SRWLIB.vcxproj b/cpp/vc/SRWLIB.vcxproj index d773c0bd..fdab7c92 100644 --- a/cpp/vc/SRWLIB.vcxproj +++ b/cpp/vc/SRWLIB.vcxproj @@ -196,7 +196,7 @@ - + @@ -1343,6 +1343,6 @@ - + \ No newline at end of file diff --git a/env/python/srwpy/SRWLIB_ExampleViewDataFile.py b/env/python/srwpy/SRWLIB_ExampleViewDataFile.py index ea9379a1..f92fa8aa 100644 --- a/env/python/srwpy/SRWLIB_ExampleViewDataFile.py +++ b/env/python/srwpy/SRWLIB_ExampleViewDataFile.py @@ -15,7 +15,8 @@ import optparse import os -if __name__=='__main__': +def main(): #HG22032024 +#if __name__=='__main__': p = optparse.OptionParser() p.add_option('-f', '--infile', dest='infile', metavar='FILE', default='', help='input file name') p.add_option('-e', '--e', dest='e', metavar='NUMBER', type='float', default=0, help='photon energy') @@ -58,3 +59,6 @@ opt.scale, opt.width_pixels) uti_plot_show() + +if __name__=='__main__': #HG22032024 + main() \ No newline at end of file diff --git a/env/python/srwpy/examples/SRWLIB_Example17.py b/env/python/srwpy/examples/SRWLIB_Example17.py index ce76abba..bf72c4de 100644 --- a/env/python/srwpy/examples/SRWLIB_Example17.py +++ b/env/python/srwpy/examples/SRWLIB_Example17.py @@ -190,15 +190,19 @@ #***********Wavefront Propagation print(' Propagating wavefront ... ', end='') +tryUsingGPU = 1 #0 #Set to 1 if GPU should be used, 0 otherwise #OC21032024 +if(tryUsingGPU): print('trying to use GPU ... ', end='') t = time.time() -srwl.PropagElecField(wfr, opBL) +srwl.PropagElecField(wfr, opBL, None, tryUsingGPU) +#srwl.PropagElecField(wfr, opBL) print('done in', round(time.time() - t), 's') print(' Extracting, projecting propagated wavefront intensity on detector and saving it to file ... ', end='') t = time.time() mesh1 = deepcopy(wfr.mesh) arI1 = array('f', [0]*mesh1.nx*mesh1.ny) #"flat" array to take 2D intensity data -srwl.CalcIntFromElecField(arI1, wfr, 6, 0, 3, mesh1.eStart, 0, 0) #extracts intensity +srwl.CalcIntFromElecField(arI1, wfr, 6, 0, 3, mesh1.eStart, 0, 0, None, None, tryUsingGPU) #extracts intensity (eventually using GPU) +#srwl.CalcIntFromElecField(arI1, wfr, 6, 0, 3, mesh1.eStart, 0, 0) #extracts intensity stkDet = det.treat_int(arI1, _mesh = mesh1) #"Projecting" intensity on detector (by interpolation) mesh1 = stkDet.mesh; arI1 = stkDet.arS