From 63fd238aa985ac6b195cae75b49a84dbd876ad9c Mon Sep 17 00:00:00 2001 From: GlassOfWhiskey Date: Wed, 20 Nov 2024 22:04:48 +0100 Subject: [PATCH] Update Docker base image to Ubuntu Noble This commit updates the Docker base image used to build Slurm containers to the new Ubuntu Noble (24.04 LTS) version. Consequently, the Slurm version shipped with apt is updated to `v23.11.4`. --- .github/dependabot.yml | 6 ++++++ .github/workflows/ci-tests.yaml | 6 +++--- .github/workflows/release.yml | 4 ++-- slurm/README.md | 16 ++++++++-------- slurm/docker-compose.yml | 5 ++--- slurm/slurmctld/Dockerfile | 6 +++--- slurm/slurmctld/config/cgroups.conf | 2 -- slurm/slurmctld/config/slurm.conf | 2 +- slurm/slurmd/Dockerfile | 6 +++--- slurm/slurmd/config/cgroups.conf | 2 -- 10 files changed, 28 insertions(+), 27 deletions(-) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..9096371 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "daily" \ No newline at end of file diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index 1dffe53..d786e08 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -27,7 +27,7 @@ jobs: CREATION_DATE=${{ env.NOW }} context: slurm/slurmctld load: true - tags: alphaunito/slurmctld:21.08.5 + tags: alphaunito/slurmctld:23.11.4 - name: "Build slurmd image" uses: docker/build-push-action@v5 with: @@ -36,7 +36,7 @@ jobs: CREATION_DATE=${{ env.NOW }} context: slurm/slurmd load: true - tags: alphaunito/slurmd:21.08.5 + tags: alphaunito/slurmd:23.11.4 - name: "Start Docker Compose" id: start-compose run: | @@ -54,7 +54,7 @@ jobs: --project-name slurm \ exec \ --user hpcuser \ - slurmctld srun hostname + slurmctld srun --deadline 00:01:00 hostname - name: "Show slurmctld logs on failure" if: ${{ always() && (steps.start-compose.outcome == 'failure' || steps.run-tests.outcome == 'failure') }} run: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a810542..50da827 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -30,7 +30,7 @@ jobs: CREATION_DATE=${{ env.NOW }} context: slurm/slurmctld push: true - tags: alphaunito/slurmctld:21.08.5 + tags: alphaunito/slurmctld:23.11.4 - name: "Build slurmd image" uses: docker/build-push-action@v5 with: @@ -39,4 +39,4 @@ jobs: CREATION_DATE=${{ env.NOW }} context: slurm/slurmd push: true - tags: alphaunito/slurmd:21.08.5 \ No newline at end of file + tags: alphaunito/slurmd:23.11.4 \ No newline at end of file diff --git a/slurm/README.md b/slurm/README.md index 8d8c74b..a178503 100644 --- a/slurm/README.md +++ b/slurm/README.md @@ -1,11 +1,11 @@ # Slurm on Docker -This folder contains a fully Dockerized version of the [Slurm](https://slurm.schedmd.com/) queue manager. The version of SLURM shipped is the one downloaded from the apt repository, which is currently `21.08.5` +This folder contains a fully Dockerized version of the [Slurm](https://slurm.schedmd.com/) queue manager. The version of SLURM shipped is the one downloaded from the apt repository, which is currently `23.11.4` This repository contains the source code of different container images: -- `alphaunito/slurmctld:21.08.5`, which runs the Slurm control plane -- `alphaunito/slurmd:21:08.5`, which runs a Slurm compute node +- `alphaunito/slurmctld:23.11.4`, which runs the Slurm control plane +- `alphaunito/slurmd:23.11.4`, which runs a Slurm compute node Plus, it also contains a [docker-compose.yml](./docker-compose.yml) file that can deplyo an entire Slurm cluster with a single controller and a set of compute nodes. All these components are detailed below @@ -14,8 +14,8 @@ Plus, it also contains a [docker-compose.yml](./docker-compose.yml) file that ca The `slurmctld` process is the central management daemon of Slurm. It constitutes the control plane of the Slurm queue manager. The `slurmctld` Docker image can be build and published on DockerHub using the following commands ```bash -docker build -t alphaunito/slurmctld:21.08.5 slurmctld -docker push alphaunito/slurmctld:21.08.5 +docker build -t alphaunito/slurmctld:23.11.4 slurmctld +docker push alphaunito/slurmctld:23.11.4 ``` To correctly populate the `slurm.conf` file, a `slurmctld` container needs 3 environment variables: @@ -31,8 +31,8 @@ Note that all the compute nodes in the simulated HPC cluster should have a reach The `slurmd` process is the compute node daemon for Slurm. It monitors all tasks running on the compute node , accepts work (tasks), launches tasks, and kills running tasks upon request. The `slurmd` Docker image can be build and published using the following commands ```bash -docker build -t alphaunito/slurmd:21.08.5 slurmd -docker push alphaunito/slurmd:21.08.5 +docker build -t alphaunito/slurmd:23.11.4 slurmd +docker push alphaunito/slurmd:23.11.4 ``` To correctly connect to a `slurmctld` node, a `slurmd` container needs a `SLURMCTLD_HOSTNAME` variable that should contain the hostname of the target `slurmctld` container. If this variable is not set, the container displays an error message and terminates @@ -51,4 +51,4 @@ To allow for unprivileged workloads, an `hpcuser` has been configured inside the docker exec -it --user hpcuser slurmctld bash ``` -In order to simulate an HPC facility, where the home folder is commonly mounted on a shared parallel file system, users may want to mount the `/home/hpcuser` folder as a shared volume among all the containers in the Slurm cluster. \ No newline at end of file +In order to simulate an HPC facility, where the home folder is commonly mounted on a shared parallel file system, users may want to mount the `/home/hpcuser` folder as a shared volume among all the containers in the Slurm cluster. diff --git a/slurm/docker-compose.yml b/slurm/docker-compose.yml index a8ce86e..7619f09 100644 --- a/slurm/docker-compose.yml +++ b/slurm/docker-compose.yml @@ -1,7 +1,6 @@ -version: "3.8" services: slurmctld: - image: alphaunito/slurmctld:21.08.5 + image: alphaunito/slurmctld:23.11.4 environment: SLURMD_HOSTNAME_PREFIX: ${COMPOSE_PROJECT_NAME}-slurmd SLURMD_NODES: 2 @@ -13,7 +12,7 @@ services: - munge:/etc/munge - mysql:/var/lib/mysql slurmd: - image: alphaunito/slurmd:21.08.5 + image: alphaunito/slurmd:23.11.4 deploy: mode: replicated replicas: 2 diff --git a/slurm/slurmctld/Dockerfile b/slurm/slurmctld/Dockerfile index 65cc211..abf0095 100644 --- a/slurm/slurmctld/Dockerfile +++ b/slurm/slurmctld/Dockerfile @@ -1,17 +1,17 @@ -FROM ubuntu:jammy +FROM ubuntu:noble ARG COMMIT="none" ARG CREATION_DATE="none" LABEL org.opencontainers.image.authors="Iacopo Colonnelli " -LABEL org.opencontainers.image.base.name="docker.io/ubuntu:jammy" +LABEL org.opencontainers.image.base.name="docker.io/ubuntu:noble" LABEL org.opencontainers.image.created="${CREATION_DATE}" LABEL org.opencontainers.image.licenses="OpenSSL" LABEL org.opencontainers.image.revision="${COMMIT}" LABEL org.opencontainers.image.ref.name="alphaunito/slurmctld" LABEL org.opencontainers.image.source="https://github.com/alpha-unito/docker-for-hpc" LABEL org.opencontainers.image.title="Slurm management daemon" -LABEL org.opencontainers.image.version="21.08.5" +LABEL org.opencontainers.image.version="23.11.4" RUN apt update \ && apt install -y --no-install-recommends \ diff --git a/slurm/slurmctld/config/cgroups.conf b/slurm/slurmctld/config/cgroups.conf index 30f9fd2..2b46597 100644 --- a/slurm/slurmctld/config/cgroups.conf +++ b/slurm/slurmctld/config/cgroups.conf @@ -1,5 +1,3 @@ -CgroupAutomount=yes -CgroupReleaseAgentDir="/etc/slurm/cgroup" ConstrainCores=yes ConstrainDevices=no ConstrainRAMSpace=yes diff --git a/slurm/slurmctld/config/slurm.conf b/slurm/slurmctld/config/slurm.conf index baac5a6..16171dd 100644 --- a/slurm/slurmctld/config/slurm.conf +++ b/slurm/slurmctld/config/slurm.conf @@ -12,7 +12,7 @@ PartitionName=docker Nodes=ALL Default=YES MaxTime=INFINITE State=UP ProctrackType=proctrack/linuxproc ReturnToService=1 SchedulerType=sched/backfill -SelectType=select/cons_res +SelectType=select/cons_tres SelectTypeParameters=CR_Core SlurmctldDebug=info SlurmctldHost=__SLURMCTLD_HOST__ diff --git a/slurm/slurmd/Dockerfile b/slurm/slurmd/Dockerfile index cfb54ca..c059950 100644 --- a/slurm/slurmd/Dockerfile +++ b/slurm/slurmd/Dockerfile @@ -1,17 +1,17 @@ -FROM ubuntu:jammy +FROM ubuntu:noble ARG COMMIT="none" ARG CREATION_DATE="none" LABEL org.opencontainers.image.authors="Iacopo Colonnelli " -LABEL org.opencontainers.image.base.name="docker.io/ubuntu:jammy" +LABEL org.opencontainers.image.base.name="docker.io/ubuntu:noble" LABEL org.opencontainers.image.created="${CREATION_DATE}" LABEL org.opencontainers.image.licenses="OpenSSL" LABEL org.opencontainers.image.revision="${COMMIT}" LABEL org.opencontainers.image.ref.name="alphaunito/slurmd" LABEL org.opencontainers.image.source="https://github.com/alpha-unito/docker-for-hpc" LABEL org.opencontainers.image.title="Slurm compute node" -LABEL org.opencontainers.image.version="21.08.5" +LABEL org.opencontainers.image.version="23.11.4" RUN apt update \ && apt install -y --no-install-recommends \ diff --git a/slurm/slurmd/config/cgroups.conf b/slurm/slurmd/config/cgroups.conf index 30f9fd2..2b46597 100644 --- a/slurm/slurmd/config/cgroups.conf +++ b/slurm/slurmd/config/cgroups.conf @@ -1,5 +1,3 @@ -CgroupAutomount=yes -CgroupReleaseAgentDir="/etc/slurm/cgroup" ConstrainCores=yes ConstrainDevices=no ConstrainRAMSpace=yes