From e908b4f098755a6d5c3f9593bfe7c7ddfd8259e1 Mon Sep 17 00:00:00 2001 From: Avirup Sircar Date: Fri, 5 Jan 2024 10:36:13 -0500 Subject: [PATCH] DFT-EFE compiled with dealii-9.5.1; matrix invert changed to using slate lapackpp insted of mkl; modified installation instructions --- installation_instructions | 24 +----- src/basis/CMakeLists.txt | 9 +-- .../EFEOverlapInverseOperatorContext.t.cpp | 75 ++++--------------- src/basis/FEBasisManagerDealii.t.cpp | 1 + src/linearAlgebra/BlasLapack.h | 27 ++++--- src/linearAlgebra/BlasLapack.t.cpp | 40 +++++----- src/quadrature/CMakeLists.txt | 9 +-- ...issonProblemOrthoEnrichmentAdaptiveQuad.py | 12 +-- test/physics/src/CMakeLists.txt | 4 +- 9 files changed, 68 insertions(+), 133 deletions(-) diff --git a/installation_instructions b/installation_instructions index 78def30fe..cdec20d40 100644 --- a/installation_instructions +++ b/installation_instructions @@ -1,36 +1,20 @@ module list -ouput should have the following modules +Output should have the following modules - 1) gcc/8.2.0 3) openmpi/4.0.6 5) cuda/11.4.0 - 2) mkl/2021.3.0 4) cmake/3.17.3 6) python3.6-anaconda/5.2.0 + 1) gcc/10.3.0 3) openmpi/4.1.6 + 2) mkl/2022.0.2 4) cmake/3.22.2 6) python3.10-anaconda/2023.03 7) boost/1.78.0 -install sphinx, sphinx-rtd-theme and breathe using pip (optional) - mkdir build For greatlakes: - -For Compilation with CUDA - - For a CPU only compilation - CPU: -python configure.py --CMAKE_BUILD_TYPE=Debug --CMAKE_C_COMPILER=gcc --CMAKE_CXX_COMPILER=g++ --CMAKE_C_FLAGS="-std=gnu++17 -march=native -fopenmp" --CMAKE_CXX_FLAGS="-std=gnu++17 -march=native -fopenmp" --MPI_C_COMPILER=mpicc --MPI_CXX_COMPILER=mpic++ --BOOST_DIR="/sw/pkgs/arc/stacks/gcc/10.3.0/boost/1.78.0" --SLATE_DIR="/home/vikramg/DFT-softwares-gcc/slate/2022.07.inhouse/gcc10.3.0_openmpi4.1.4" --DEALII_PATH="/home/vikramg/DFT-softwares-gcc/dealii/installMinimal" --ENABLE_MPI=ON --ENABLE_CUDA=OFF --LIBXML_PATH="/usr/include/libxml2/" --LIBXML_LIBRARIES="/usr/lib64/libxml2.so" - -GPU (not available for now, don't use): -python configure.py --CMAKE_BUILD_TYPE=Debug --CMAKE_C_COMPILER=gcc --CMAKE_CXX_COMPILER=g++ --CMAKE_C_FLAGS="-std=gnu++17 -march=native -fopenmp" --CMAKE_CXX_FLAGS="-std=gnu++17 -march=native -fopenmp" --MPI_C_COMPILER=mpicc --MPI_CXX_COMPILER=mpic++ --SLATE_DIR="/home/vikramg/DFT-FE-softwares/slate/gpu/2022.7.15/gcc8.2.0_openmpi4.0.6_nvcc11.4.0" --DEALII_PATH="/home/vikramg/DFT-FE-softwares/dealiiDevCustomized/install_gcc8.2.0_openmpi4.0.6_minimal" --ENABLE_MPI=ON --ENABLE_CUDA=ON --CMAKE_CUDA_FLAGS="-arch=sm_70 -fopenmp" +python configure.py --CMAKE_BUILD_TYPE=Debug --CMAKE_C_COMPILER=gcc --CMAKE_CXX_COMPILER=g++ --CMAKE_C_FLAGS="-std=gnu++17 -march=native -fopenmp" --CMAKE_CXX_FLAGS="-std=gnu++17 -march=native -fopenmp" --MPI_C_COMPILER=mpicc --MPI_CXX_COMPILER=mpic++ --BOOST_DIR="/sw/pkgs/arc/stacks/gcc/10.3.0/boost/1.78.0" --SLATE_DIR="/home/vikramg/DFTEFE-softwares/slate/install" --DEALII_PATH="/scratch/vikramg_root/vikramg/shared_data/avirup/dealii/installmoduleboostkokkos" --ENABLE_MPI=ON --ENABLE_CUDA=OFF --LIBXML_PATH="/usr/include/libxml2/" --LIBXML_LIBRARIES="/usr/lib64/libxml2.so" python install.py -For perlmutter: - -python configure.py --CMAKE_BUILD_TYPE=Debug --CMAKE_C_COMPILER=/opt/cray/pe/craype/2.7.15/bin/cc --CMAKE_CXX_COMPILER=CC --CMAKE_CXX_FLAGS="-std=gnu++17 -march=znver3 -fPIC -target-accel=nvidia80" --CMAKE_C_FLAGS="-std=gnu++17 -march=znver3 -fPIC -target-accel=nvidia80" --MPI_C_COMPILER=/opt/cray/pe/craype/2.7.15/bin/cc --MPI_CXX_COMPILER=CC --SLATE_DIR="/global/common/software/m2360/softwareDFTEFE/perlmutter/slate/gpu/2022.7.inhouse.gcc11.2.0.nvcc11.7.64.mpich.8.1.17" --DEALII_PATH="/global/common/software/m2360/softwareDFTEFE/perlmutter/dealii/install" --ENABLE_MPI=ON --ENABLE_CUDA=ON --CMAKE_CUDA_FLAGS="-I$MPICH_DIR/include -L$MPICH_DIR/lib -lmpich -arch=sm_80" - make - -see generated sphinx doc at "build/docs/sphinx/index.html" (optional) diff --git a/src/basis/CMakeLists.txt b/src/basis/CMakeLists.txt index 8c5fe3a2a..587b419b9 100644 --- a/src/basis/CMakeLists.txt +++ b/src/basis/CMakeLists.txt @@ -40,12 +40,9 @@ add_library(dft-efe-basis SHARED ${DFT-EFE-BASIS-SOURCES}) include_directories(../) find_package(deal.II 9.3.0 REQUIRED HINTS ${DEALII_PATH}) -target_include_directories(dft-efe-basis PUBLIC ${DEAL_II_INCLUDE_DIRS}) -IF("${CMAKE_BUILD_TYPE}" STREQUAL "Release") - target_link_libraries (dft-efe-basis PUBLIC ${DEAL_II_LIBRARIES_RELEASE}) -ELSE() - target_link_libraries (dft-efe-basis PUBLIC ${DEAL_II_LIBRARIES_DEBUG}) -ENDIF() +target_include_directories(dft-efe-basis PUBLIC ${DEALII_PATH}/${DEAL_II_INCLUDE_RELDIR}) +target_link_libraries (dft-efe-basis PUBLIC dealii::dealii_release) +#target_link_libraries (dft-efe-basis PUBLIC ${DEALII_PATH}/lib) find_path(MKLROOT include/mkl_blas.h) if(MKLROOT AND EXISTS ${MKLROOT}/include/mkl_blas.h) diff --git a/src/basis/EFEOverlapInverseOperatorContext.t.cpp b/src/basis/EFEOverlapInverseOperatorContext.t.cpp index f3dbc9bb6..5a3bab87b 100644 --- a/src/basis/EFEOverlapInverseOperatorContext.t.cpp +++ b/src/basis/EFEOverlapInverseOperatorContext.t.cpp @@ -28,15 +28,14 @@ #include #include #include -#include +// #include namespace dftefe { namespace basis { - namespace EFEBlockInverse + /*namespace EFEBlockInverse { - // extern "C" - // { + // Functions from intel MKL library // // LU decomoposition of a general matrix // void dgetrf(int* M, int *N, double* A, int* lda, int* IPIV, int* // INFO); @@ -44,7 +43,6 @@ namespace dftefe // // generate inverse of a matrix given its LU decomposition // void dgetri(int* N, double* A, int* lda, int* IPIV, double* WORK, // int* lwork, int* INFO); - // } void inverse(double *A, int N) @@ -60,7 +58,7 @@ namespace dftefe delete[] IPIV; delete[] WORK; } - } // namespace EFEBlockInverse + } // namespace EFEBlockInverse*/ // Write M^-1 apply on a matrix for GLL with spectral finite element // M^-1 does not have a cell structure. @@ -219,39 +217,19 @@ namespace dftefe utils::throwException(mpiIsSuccessAndMsg.first, "MPI Error:" + mpiIsSuccessAndMsg.second); - // do inversion + // do inversion of enrichment block using slate lapackpp + utils::MemoryStorage + ipiv(d_nglobalEnrichmentIds); - // utils::MemoryStorage - // ipiv(d_nglobalEnrichmentIds); - - // linearAlgebra::blasLapack::inverse( - // d_nglobalEnrichmentIds, - // d_basisOverlapEnrichmentBlock.data(), - // d_nglobalEnrichmentIds, - // ipiv.data()); - - // std::cout << "Enrichment Block : \n"; - // int cc = 0; - // for (auto i : basisOverlapEnrichmentBlockSTL) - // { - // if(cc % d_nglobalEnrichmentIds == 0) - // std::cout << "\n"; - // std::cout << i << " "; - // cc+=1; - // } + linearAlgebra::blasLapack::inverse( + d_nglobalEnrichmentIds, + basisOverlapEnrichmentBlockSTL.data(), + d_nglobalEnrichmentIds, + ipiv.data()); + /* do inversion of enrichment block using intel mkl EFEBlockInverse::inverse(basisOverlapEnrichmentBlockSTL.data(), - d_nglobalEnrichmentIds); - - // std::cout << "Enrichment Inverse Block : \n"; - // cc = 0; - // for (auto i : basisOverlapEnrichmentBlockSTL) - // { - // if(cc % d_nglobalEnrichmentIds == 0) - // std::cout << "\n"; - // std::cout << i << " "; - // cc+=1; - // } + d_nglobalEnrichmentIds);*/ d_basisOverlapEnrichmentBlock ->template copyFrom( @@ -299,18 +277,6 @@ namespace dftefe d_nglobalEnrichmentIds * numComponents), XenrichedGlobalVecTmp(d_nglobalEnrichmentIds * numComponents), YenrichedGlobalVec(d_nglobalEnrichmentIds * numComponents); - // std::vector - // XenrichedGlobalVecSTL(d_nglobalEnrichmentIds*numComponents,0), - // XenrichedGlobalVecSTLTmp(d_nglobalEnrichmentIds*numComponents,0); - - // for ( size_type i = 0 ; i < X.locallyOwnedSize() ; i++) - // { - // std::pair pair = - // X.getMPIPatternP2P()->localToGlobalAndRangeId(i); if(pair.second == - // 1) for ( size_type j = 0 ; j < numComponents ; j++ ) - // *(XenrichedGlobalVecSTLTmp.data() + pair.first * numComponents + j) - // = *(X.data() + i*numComponents + j); - // } XenrichedGlobalVecTmp.template copyFrom( X.begin(), @@ -355,19 +321,6 @@ namespace dftefe numComponents, *d_linAlgOpContext); - // utils::MemoryStorage - // YenrichedLocallyOwnedVec(nlocallyOwnedEnrichmentIds*numComponents); - - // for ( size_type i = 0 ; i < Y.locallyOwnedSize() ; i++) - // { - // std::pair pair = - // Y.getMPIPatternP2P()->localToGlobalAndRangeId(i); if(pair.second == - // 1) for ( size_type j = 0 ; j < numComponents ; j++ ) - // *(YenrichedLocallyOwnedVec.data() + i*numComponents + j) = - // *(YenrichedGlobalVec.data() + pair.first * numComponents + j); - // } - - YenrichedGlobalVec.template copyTo( Y.begin(), nlocallyOwnedEnrichmentIds * numComponents, diff --git a/src/basis/FEBasisManagerDealii.t.cpp b/src/basis/FEBasisManagerDealii.t.cpp index e9a8d1f6b..2b1d986f3 100644 --- a/src/basis/FEBasisManagerDealii.t.cpp +++ b/src/basis/FEBasisManagerDealii.t.cpp @@ -27,6 +27,7 @@ #include "TriangulationDealiiParallel.h" #include "TriangulationDealiiSerial.h" #include "FECellDealii.h" +#include namespace dftefe diff --git a/src/linearAlgebra/BlasLapack.h b/src/linearAlgebra/BlasLapack.h index 125e34757..191704f5d 100644 --- a/src/linearAlgebra/BlasLapack.h +++ b/src/linearAlgebra/BlasLapack.h @@ -29,6 +29,9 @@ #include #include #include +#define LAPACK_COMPLEX_CPP +#define HAVE_LAPACK_CONFIG_H +#include namespace dftefe { namespace linearAlgebra @@ -421,18 +424,18 @@ namespace dftefe const size_type * lddc, LinAlgOpContext & context); - // /** - // * @brief Matrix inversion - // * - // * @note: Assumes the matrix to be square (no pseudoinverse) - // */ - // template - // void - // inverse( const size_type n, - // ValueType * A, - // const size_type lda, - // ValueType * ipiv); + /** + * @brief Matrix inversion + * + * @note: Assumes the matrix to be square (no pseudoinverse) + */ + template + void + inverse( const size_type n, + ValueType * A, + const size_type lda, + ValueType * ipiv); } // namespace blasLapack } // namespace linearAlgebra diff --git a/src/linearAlgebra/BlasLapack.t.cpp b/src/linearAlgebra/BlasLapack.t.cpp index ecf42a4f2..438db7f58 100644 --- a/src/linearAlgebra/BlasLapack.t.cpp +++ b/src/linearAlgebra/BlasLapack.t.cpp @@ -471,26 +471,26 @@ namespace dftefe } } - // template - // void - // inverse( const size_type n, - // ValueType * A, - // const size_type lda, - // ValueType * ipiv) - // { - // lapack::getrf(n, - // n, - // A, - // lda, - // ipiv); - - // lapack::getri(n, - // A, - // lda, - // ipiv); - - // } + template + void + inverse( const size_type n, + ValueType * A, + const size_type lda, + ValueType * ipiv) + { + lapack::getrf(n, + n, + A, + lda, + ipiv); + + lapack::getri(n, + A, + lda, + ipiv); + + } } // namespace blasLapack } // namespace linearAlgebra diff --git a/src/quadrature/CMakeLists.txt b/src/quadrature/CMakeLists.txt index d4d7d4283..d05fffbcc 100644 --- a/src/quadrature/CMakeLists.txt +++ b/src/quadrature/CMakeLists.txt @@ -31,12 +31,9 @@ set(DFT-EFE-QUADRATURE-SOURCES add_library(dft-efe-quadrature SHARED ${DFT-EFE-QUADRATURE-SOURCES}) find_package(deal.II 9.3.0 REQUIRED HINTS ${DEALII_PATH}) -target_include_directories(dft-efe-quadrature PUBLIC ${DEAL_II_INCLUDE_DIRS}) -IF("${CMAKE_BUILD_TYPE}" STREQUAL "Release") - target_link_libraries (dft-efe-quadrature PUBLIC ${DEAL_II_LIBRARIES_RELEASE}) -ELSE() - target_link_libraries (dft-efe-quadrature PUBLIC ${DEAL_II_LIBRARIES_DEBUG}) -ENDIF() +target_include_directories(dft-efe-quadrature PUBLIC ${DEALII_PATH}/${DEAL_II_INCLUDE_RELDIR}) +target_link_libraries(dft-efe-quadrature PUBLIC dealii::dealii_release) +#target_link_libraries (dft-efe-quadrature PUBLIC ${DEALII_PATH}/${DEAL_II_LIBRARY_RELDIR}) if (NOT TARGET dft-efe-utils) add_subdirectory(../utils ${CMAKE_CURRENT_BINARY_DIR}/buildUtils) diff --git a/test/physics/TestPoissonProblemOrthoEnrichmentAdaptiveQuad.py b/test/physics/TestPoissonProblemOrthoEnrichmentAdaptiveQuad.py index dc41a0352..0b3aaab45 100644 --- a/test/physics/TestPoissonProblemOrthoEnrichmentAdaptiveQuad.py +++ b/test/physics/TestPoissonProblemOrthoEnrichmentAdaptiveQuad.py @@ -98,7 +98,7 @@ class BuildAndRunTestPoissonProblemOrthoEnrichmentAdaptiveQuad(rfm.RegressionTes executable = "./"+target_name tagsDict = {'compileOrRun': 'compile', 'unitOrAggregate': 'unit','slowOrFast': 'fast', 'arch': 'cpu', - 'serialOrParallel': 'serial'} + 'serialOrParallel': 'parallel'} tags = {x.lower() for x in tagsDict.values()} valid_systems = ss.getValidSystems(tagsDict['arch']) valid_prog_environs = ['*'] @@ -117,13 +117,13 @@ def set_launcher_and_resources(self): self.job.launcher = getlauncher('local')() if "parallel" in self.tags: - self.job.launcher.options = ['-n 2'] + self.job.launcher.options = ['-n 5'] self.extra_resources = ss.setResources(self.tagsDict['arch'], - time_limit = "00:05:00", + time_limit = "00:30:00", num_nodes = 1, - num_tasks_per_node = 2, - ntasks = 2, - mem_per_cpu = '2gb') + num_tasks_per_node = 5, + ntasks = 5, + mem_per_cpu = '5gb') @sanity_function diff --git a/test/physics/src/CMakeLists.txt b/test/physics/src/CMakeLists.txt index e141f8464..fc9c91024 100644 --- a/test/physics/src/CMakeLists.txt +++ b/test/physics/src/CMakeLists.txt @@ -25,6 +25,6 @@ add_subdirectory(${MAIN_PROJECT_DIR}/src/physics ${MAIN_PROJECT_DIR}/test/physic if(ENABLE_MPI) add_compile_definitions(DFTEFE_WITH_MPI) -add_executable(TestTwoBodyInteraction TestTwoBodyInteraction.cpp ) -target_link_libraries(TestTwoBodyInteraction PUBLIC dft-efe-basis dft-efe-physics dft-efe-utils dft-efe-quadrature dft-efe-linalg dft-efe-atoms) +add_executable(TestPoissonProblemOrthoEnrichmentAdaptiveQuad TestPoissonProblemOrthoEnrichmentAdaptiveQuad.cpp ) +target_link_libraries(TestPoissonProblemOrthoEnrichmentAdaptiveQuad PUBLIC dft-efe-basis dft-efe-physics dft-efe-utils dft-efe-quadrature dft-efe-linalg dft-efe-atoms) endif()