Skip to content

Add CI for GPU tests on Ruche #74

Add CI for GPU tests on Ruche

Add CI for GPU tests on Ruche #74

Workflow file for this run

name: Build and test
on:
pull_request:
branches:
- main
env:
# Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
BUILD_TYPE: RelWithDebInfo
# Force the use of BuildKit for Docker
DOCKER_BUILDKIT: 1
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: DoozyX/clang-format-lint-action@v0.13
with:
source: 'common/ fft/ examples/'
exclude: ''
extensions: 'hpp,cpp'
clangFormatVersion: 12
check_build_base_needed:
runs-on: ubuntu-latest
outputs:
changed_docker_files: steps.changed_docker_files.outputs.any_changed == 'true'
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 2
- name: Get changed Docker files
id: changed_docker_files
uses: tj-actions/changed-files@v42
with:
files: docker/base/**/Dockerfile
build_base:
runs-on: ubuntu-latest
needs: check_build_base_needed
if: needs.check_build_base_needed.outputs.changed_docker_files == 'true'
strategy:
matrix:
backend:
# - openmp
- cuda
# - hip
steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@v1.2.0
with:
tool-cache: true
large-packages: false
- name: Checkout repository
uses: actions/checkout@v4
# TODO check if current build has a different Dockerfile
- name: Get Singularity
env:
SINGULARITY_VERSION: 3.11.2
run: |
wget https://github.com/sylabs/singularity/releases/download/v${{ env.SINGULARITY_VERSION }}/singularity-ce_${{ env.SINGULARITY_VERSION }}-jammy_amd64.deb
sudo apt-get install ./singularity-ce_${{ env.SINGULARITY_VERSION }}-jammy_amd64.deb
- name: Login in GitHub Containers Repository with Docker
run: echo ${{ secrets.GITHUB_TOKEN }} | docker login ghcr.io -u ${{ github.actor }} --password-stdin
- name: Login in GitHub Containers Repository with Singularity
run: echo ${{ secrets.GITHUB_TOKEN }} | singularity remote login -u ${{ github.actor }} --password-stdin oras://ghcr.io
- name: Build Docker image
run: |
docker build \
-t ghcr.io/cexa-project/kokkos-fft/base_${{ matrix.backend }} \
--cache-from ghcr.io/cexa-project/kokkos-fft/base_${{ matrix.backend }} \
--build-arg BUIDKIT_INLINE_CACHE=1 \
--progress=plain \
docker/base/${{ matrix.backend }}
- name: Push Docker image
run: docker push ghcr.io/cexa-project/kokkos-fft/base_${{ matrix.backend }}
- name: Convert Docker image to Singularity
run: singularity build base_${{ matrix.backend }}.sif docker://ghcr.io/cexa-project/kokkos-fft/base_${{ matrix.backend }}
- name: Push Singularity image
run: singularity push base_${{ matrix.backend }}.sif oras://ghcr.io/cexa-project/kokkos-fft/base_${{ matrix.backend }}.sif
build:
runs-on: ubuntu-latest
needs: build_base
# run this job even if build_base did not run
if: always()
strategy:
matrix:
backend:
# - name: openmp
# cmake_flags: -DKokkos_ENABLE_OPENMP=ON
- name: cuda
cmake_flags: -DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_AMPERE80=ON
# - name: hip
# cmake_flags: -DKokkos_ENABLE_HIP=ON -DKokkos_ARCH_VEGA90A=ON
target:
- name: native
cmake_flags: ""
- name: host_device
cmake_flags: -DKokkosFFT_ENABLE_HOST_AND_DEVICE=ON
exclude:
- backend:
name: openmp
target:
name: host_device
steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@v1.2.0
with:
tool-cache: true
large-packages: false
- name: Checkout built branch
uses: actions/checkout@v4
with:
submodules: recursive
- name: Login in GitHub Containers Repository
run: echo ${{ secrets.GITHUB_TOKEN }} | docker login ghcr.io -u ${{ github.actor }} --password-stdin
- name: Pull image
run: docker pull ghcr.io/cexa-project/kokkos-fft/base_${{ matrix.backend.name }}
- name: Configure
run: |
docker run -v ${{ github.workspace }}:/work ghcr.io/cexa-project/kokkos-fft/base_${{ matrix.backend.name }} \
cmake -B build \
-DCMAKE_INSTALL_PREFIX=/work/install \
-DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} \
-DCMAKE_CXX_STANDARD=17 \
-DBUILD_TESTING=ON \
-DKokkosFFT_INTERNAL_Kokkos=ON \
${{ matrix.backend.cmake_flags }} \
${{ matrix.target.cmake_flags }}
- name: Build
run: |
docker run -v ${{ github.workspace }}:/work ghcr.io/cexa-project/kokkos-fft/base_${{ matrix.backend.name }} \
cmake --build build -j $(( $(nproc) * 2 + 1 ))
- name: Prepare artifacts
# this is mandatory to preserve execution rights
run: tar --exclude CMakeFiles -cvf tests_${{ matrix.backend.name }}.tar build/
if: matrix.target.name == 'native'
- name: Save artifacts
uses: actions/upload-artifact@v3
with:
name: tests_${{ matrix.backend.name }}
path: tests_${{ matrix.backend.name }}.tar
if: matrix.target.name == 'native'
- name: Install
run: |
docker run -v ${{ github.workspace }}:/work ghcr.io/cexa-project/kokkos-fft/base_${{ matrix.backend.name }} \
cmake --install build
- name: Configure and build test code
run: |
docker run -v ${{ github.workspace }}:/work ghcr.io/cexa-project/kokkos-fft/base_${{ matrix.backend.name }} \
cmake -B build_test \
-DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} \
-DCMAKE_CXX_STANDARD=17 \
-DCMAKE_PREFIX_PATH=/work/install \
install_test/src
docker run -v ${{ github.workspace }}:/work ghcr.io/cexa-project/kokkos-fft/base_${{ matrix.backend.name }} \
cmake --build build_test -j $(( $(nproc) * 2 + 1 ))
test_cuda:
# run CUDA tests on Ruche supercomputer
runs-on:
- self-hosted
- cuda
needs: build
# run this job even if build_base did not run
if: always() && needs.build.result == 'success'
env:
SINGULARITY_VERSION: 3.8.3/gcc-11.2.0
steps:
- name: Get artifacts
uses: actions/download-artifact@v3
with:
name: tests_cuda
- name: Deploy artifacts
run: tar -xvf tests_cuda.tar
- name: Pull Singularity image
run: |
module load singularity/${{ env.SINGULARITY_VERSION }}
singularity pull oras://ghcr.io/cexa-project/kokkos-fft/base_cuda.sif
- name: Run test within Slurm job
run: |
srun --nodes=1 --time=01:00:00 -p gpua100 --gres=gpu:1 bash -c " \
module load singularity/${{ env.SINGULARITY_VERSION }}; \
singularity run --nv --bind $PWD:/work/build -W /work/build base_cuda.sif ctest \
"