Skip to content

OpenACC and Hackathons Summit Performance Data Documentation

Matt Norman edited this page Jul 13, 2022 · 2 revisions

GPU

Thatchroof

RTX 5000 in single precision only using CUDA release 11.7, V11.7.64 with driver 515.43.04

Fortran + GNU

module load gcc-12.1.0-gcc-11.1.0-g2ai6t2
cmake -DCMAKE_Fortran_COMPILER=mpif90                                     \
      -DOPENMP_FLAGS="-fopenmp"                                           \   
      -DOPENACC_FLAGS="-fopenacc -ffast-math -foffload=nvptx-none=\"-lm -O3 -ffast-math -DSINGLE_PREC -march=sm_80 -moptimize\" -fopenacc-dim=16384:1:128 -DSINGLE_PREC -fopt-info-omp" \
      -DOPENMP45_FLAGS="-fopenmp -ffast-math -foffload=nvptx-none=\"-lm -O3 -latomic -ffast-math -DSINGLE_PREC -march=sm_80 -moptimize\" -DSINGLE_PREC -fopt-info-omp"                  \
      -DFFLAGS="-O3 -march=native -mtune=native -ffree-line-length-none -DNO_INFORM -DSINGLE_PREC -I/usr/lib/x86_64-linux-gnu/fortran/gfortran-mod-15"  \
      -DLDFLAGS="-L/usr/lib/x86_64-linux-gnu -lpnetcdf"                   \   
      -DNX=2048                                                           \   
      -DNZ=1024                                                           \
      -DSIM_TIME=10                                                     \
      -DOUT_FREQ=20                                                     \
      ..  

Fortran + NVHPC

export OMPI_FC=nvfortran
# Version 22.5.0
cmake -DCMAKE_Fortran_COMPILER=mpif90               \
      -DFFLAGS="-O3 -march=native -mtune=native -Mextend -DNO_INFORM -DSINGLE_PREC -I/opt/parallel-netcdf-1.12.0_nvhpc/include"           \
      -DLDFLAGS="-L/opt/parallel-netcdf-1.12.0_nvhpc/lib -lpnetcdf"                                    \
      -DOPENMP_FLAGS="-mp -Minfo=mp"                                                                   \
      -DOPENACC_FLAGS:STRING="-acc -gpu=cc86,fastmath,loadcache:L1,pinned,unroll,fma,ptxinfo -Minfo=accel"               \
      -DOPENMP45_FLAGS:STRING="-Minfo=mp -mp=gpu -gpu=cc86,fastmath,loadcache:L1,pinned,unroll,fma,ptxinfo"              \
      -DDO_CONCURRENT_FLAGS:STRING="-stdpar=gpu -Minfo=stdpar -gpu=cc86,fastmath,loadcache:L2,unroll,fma,ptxinfo" \
      -DNX=2048 \  
      -DNZ=1024 \
      -DSIM_TIME=10 \  
      -DOUT_FREQ=20 \
      ..

C++ YAKL GNU+CUDA

export OMPI_CXX=g++-11
export OMPI_FC=gfortran-11
export OMPI_F90=gfortran-11
export OMPI_CC=gcc-11

./cmake_clean.sh

cmake -DCMAKE_CXX_COMPILER=mpic++                       \
      -DCMAKE_Fortran_COMPILER=mpif90                    \
      -DCMAKE_C_COMPILER=mpicc                           \
      -DYAKL_CUDA_FLAGS="-O3 -DHAVE_MPI -DNO_INFORM --use_fast_math -arch sm_86 -ccbin mpic++ -DSINGLE_PREC -I/usr/lib/x86_64-linux-gnu/fortran/gfortran-mod-15" \
      -DLDFLAGS="-L/usr/lib/x86_64-linux-gnu -lpnetcdf" \
      -DNX=2048                                         \
      -DNZ=1024                                         \
      -DSIM_TIME=10                                   \
      -DOUT_FREQ=-1                                   \
      -DYAKL_ARCH="CUDA"                                \
      ..  

CPU

Thatchroof

Fortran + GNU

module load gcc-12.1.0-gcc-11.1.0-g2ai6t2
cmake -DCMAKE_Fortran_COMPILER=mpif90                                     \
      -DFFLAGS="-O3 -march=native -mtune=native -ffree-line-length-none -DNO_INFORM -DSINGLE_PREC -I/usr/lib/x86_64-linux-gnu/fortran/gfortran-mod-15"  \
      -DLDFLAGS="-L/usr/lib/x86_64-linux-gnu -lpnetcdf"                   \
      -DNX=256                                                            \
      -DNZ=128                                                            \
      -DSIM_TIME=250                                                    \
      -DOUT_FREQ=2000                                                   \
      ..  

Fortran + Intel

cmake -DCMAKE_Fortran_COMPILER=mpif90               \
      -DFFLAGS="-O3 -xHost -DNO_INFORM -DSINGLE_PREC -fp-model=fast=2 -fast-transcendentals -I/opt/parallel-netcdf-1.12.0_intel/include"           \
      -DLDFLAGS="-L/opt/parallel-netcdf-1.12.0_intel/lib -lpnetcdf"                                    \
      -DOPENMP_FLAGS="-qopenmp"                                                                   \
      -DNX=256 \  
      -DNZ=128 \  
      -DSIM_TIME=250 \  
      -DOUT_FREQ=500 \   
      ..  

Fortran + NVHPC

cmake -DCMAKE_Fortran_COMPILER=mpif90               \
      -DFFLAGS="-O3 -Mfprelaxed -march=native -mtune=native -Mextend -DNO_INFORM -I/opt/parallel-netcdf-1.12.0_nvhpc/include"           \
      -DLDFLAGS="-L/opt/parallel-netcdf-1.12.0_nvhpc/lib -lpnetcdf"                                    \
      -DOPENMP_FLAGS="-mp -Minfo=mp"                                                                   \
      -DOPENACC_FLAGS:STRING="-acc -gpu=cc86,fastmath,loadcache:L1,pinned,unroll,fma,ptxinfo -Minfo=accel"               \
      -DOPENMP45_FLAGS:STRING="-Minfo=mp -mp=gpu -gpu=cc86,fastmath,loadcache:L1,pinned,unroll,fma,ptxinfo"              \
      -DDO_CONCURRENT_FLAGS:STRING="-stdpar=gpu -Minfo=stdpar -gpu=cc86,fastmath,loadcache:L2,unroll,fma,ptxinfo" \
      -DNX=256  \
      -DNZ=128  \
      -DSIM_TIME=250\
      -DOUT_FREQ=500\
      ..