diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..9096371 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "daily" \ No newline at end of file diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index 1dffe53..d786e08 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -27,7 +27,7 @@ jobs: CREATION_DATE=${{ env.NOW }} context: slurm/slurmctld load: true - tags: alphaunito/slurmctld:21.08.5 + tags: alphaunito/slurmctld:23.11.4 - name: "Build slurmd image" uses: docker/build-push-action@v5 with: @@ -36,7 +36,7 @@ jobs: CREATION_DATE=${{ env.NOW }} context: slurm/slurmd load: true - tags: alphaunito/slurmd:21.08.5 + tags: alphaunito/slurmd:23.11.4 - name: "Start Docker Compose" id: start-compose run: | @@ -54,7 +54,7 @@ jobs: --project-name slurm \ exec \ --user hpcuser \ - slurmctld srun hostname + slurmctld srun --deadline 00:01:00 hostname - name: "Show slurmctld logs on failure" if: ${{ always() && (steps.start-compose.outcome == 'failure' || steps.run-tests.outcome == 'failure') }} run: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a810542..50da827 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -30,7 +30,7 @@ jobs: CREATION_DATE=${{ env.NOW }} context: slurm/slurmctld push: true - tags: alphaunito/slurmctld:21.08.5 + tags: alphaunito/slurmctld:23.11.4 - name: "Build slurmd image" uses: docker/build-push-action@v5 with: @@ -39,4 +39,4 @@ jobs: CREATION_DATE=${{ env.NOW }} context: slurm/slurmd push: true - tags: alphaunito/slurmd:21.08.5 \ No newline at end of file + tags: alphaunito/slurmd:23.11.4 \ No newline at end of file diff --git a/slurm/README.md b/slurm/README.md index 8d8c74b..a178503 100644 --- a/slurm/README.md +++ b/slurm/README.md @@ -1,11 +1,11 @@ # Slurm on Docker -This folder contains a fully Dockerized version of the [Slurm](https://slurm.schedmd.com/) queue manager. The version of SLURM shipped is the one downloaded from the apt repository, which is currently `21.08.5` +This folder contains a fully Dockerized version of the [Slurm](https://slurm.schedmd.com/) queue manager. The version of SLURM shipped is the one downloaded from the apt repository, which is currently `23.11.4` This repository contains the source code of different container images: -- `alphaunito/slurmctld:21.08.5`, which runs the Slurm control plane -- `alphaunito/slurmd:21:08.5`, which runs a Slurm compute node +- `alphaunito/slurmctld:23.11.4`, which runs the Slurm control plane +- `alphaunito/slurmd:23.11.4`, which runs a Slurm compute node Plus, it also contains a [docker-compose.yml](./docker-compose.yml) file that can deplyo an entire Slurm cluster with a single controller and a set of compute nodes. All these components are detailed below @@ -14,8 +14,8 @@ Plus, it also contains a [docker-compose.yml](./docker-compose.yml) file that ca The `slurmctld` process is the central management daemon of Slurm. It constitutes the control plane of the Slurm queue manager. The `slurmctld` Docker image can be build and published on DockerHub using the following commands ```bash -docker build -t alphaunito/slurmctld:21.08.5 slurmctld -docker push alphaunito/slurmctld:21.08.5 +docker build -t alphaunito/slurmctld:23.11.4 slurmctld +docker push alphaunito/slurmctld:23.11.4 ``` To correctly populate the `slurm.conf` file, a `slurmctld` container needs 3 environment variables: @@ -31,8 +31,8 @@ Note that all the compute nodes in the simulated HPC cluster should have a reach The `slurmd` process is the compute node daemon for Slurm. It monitors all tasks running on the compute node , accepts work (tasks), launches tasks, and kills running tasks upon request. The `slurmd` Docker image can be build and published using the following commands ```bash -docker build -t alphaunito/slurmd:21.08.5 slurmd -docker push alphaunito/slurmd:21.08.5 +docker build -t alphaunito/slurmd:23.11.4 slurmd +docker push alphaunito/slurmd:23.11.4 ``` To correctly connect to a `slurmctld` node, a `slurmd` container needs a `SLURMCTLD_HOSTNAME` variable that should contain the hostname of the target `slurmctld` container. If this variable is not set, the container displays an error message and terminates @@ -51,4 +51,4 @@ To allow for unprivileged workloads, an `hpcuser` has been configured inside the docker exec -it --user hpcuser slurmctld bash ``` -In order to simulate an HPC facility, where the home folder is commonly mounted on a shared parallel file system, users may want to mount the `/home/hpcuser` folder as a shared volume among all the containers in the Slurm cluster. \ No newline at end of file +In order to simulate an HPC facility, where the home folder is commonly mounted on a shared parallel file system, users may want to mount the `/home/hpcuser` folder as a shared volume among all the containers in the Slurm cluster. diff --git a/slurm/docker-compose.yml b/slurm/docker-compose.yml index a8ce86e..7619f09 100644 --- a/slurm/docker-compose.yml +++ b/slurm/docker-compose.yml @@ -1,7 +1,6 @@ -version: "3.8" services: slurmctld: - image: alphaunito/slurmctld:21.08.5 + image: alphaunito/slurmctld:23.11.4 environment: SLURMD_HOSTNAME_PREFIX: ${COMPOSE_PROJECT_NAME}-slurmd SLURMD_NODES: 2 @@ -13,7 +12,7 @@ services: - munge:/etc/munge - mysql:/var/lib/mysql slurmd: - image: alphaunito/slurmd:21.08.5 + image: alphaunito/slurmd:23.11.4 deploy: mode: replicated replicas: 2 diff --git a/slurm/slurmctld/Dockerfile b/slurm/slurmctld/Dockerfile index 65cc211..1111b8e 100644 --- a/slurm/slurmctld/Dockerfile +++ b/slurm/slurmctld/Dockerfile @@ -1,17 +1,17 @@ -FROM ubuntu:jammy +FROM ubuntu:noble ARG COMMIT="none" ARG CREATION_DATE="none" LABEL org.opencontainers.image.authors="Iacopo Colonnelli " -LABEL org.opencontainers.image.base.name="docker.io/ubuntu:jammy" +LABEL org.opencontainers.image.base.name="docker.io/ubuntu:noble" LABEL org.opencontainers.image.created="${CREATION_DATE}" LABEL org.opencontainers.image.licenses="OpenSSL" LABEL org.opencontainers.image.revision="${COMMIT}" LABEL org.opencontainers.image.ref.name="alphaunito/slurmctld" LABEL org.opencontainers.image.source="https://github.com/alpha-unito/docker-for-hpc" LABEL org.opencontainers.image.title="Slurm management daemon" -LABEL org.opencontainers.image.version="21.08.5" +LABEL org.opencontainers.image.version="23.11.4" RUN apt update \ && apt install -y --no-install-recommends \ @@ -48,8 +48,7 @@ RUN apt update --gecos "" \ hpcuser -COPY config/cgroups.conf \ - config/slurm.conf \ +COPY config/slurm.conf \ /etc/slurm/ COPY config/supervisord.conf \ @@ -65,4 +64,4 @@ EXPOSE 22 6817 WORKDIR /home/hpcuser -ENTRYPOINT supervisord \ No newline at end of file +ENTRYPOINT [supervisord, -c /etc/supervisor/conf.d/supervisord.conf] \ No newline at end of file diff --git a/slurm/slurmctld/config/slurm.conf b/slurm/slurmctld/config/slurm.conf index baac5a6..16171dd 100644 --- a/slurm/slurmctld/config/slurm.conf +++ b/slurm/slurmctld/config/slurm.conf @@ -12,7 +12,7 @@ PartitionName=docker Nodes=ALL Default=YES MaxTime=INFINITE State=UP ProctrackType=proctrack/linuxproc ReturnToService=1 SchedulerType=sched/backfill -SelectType=select/cons_res +SelectType=select/cons_tres SelectTypeParameters=CR_Core SlurmctldDebug=info SlurmctldHost=__SLURMCTLD_HOST__ diff --git a/slurm/slurmctld/config/supervisord.conf b/slurm/slurmctld/config/supervisord.conf index 901f751..c03bd45 100644 --- a/slurm/slurmctld/config/supervisord.conf +++ b/slurm/slurmctld/config/supervisord.conf @@ -2,6 +2,7 @@ nodaemon=true logfile=/dev/null logfile_maxbytes=0 +user=root [program:munge] command=gosu munge /usr/sbin/munged --foreground diff --git a/slurm/slurmd/Dockerfile b/slurm/slurmd/Dockerfile index cfb54ca..5fc00fd 100644 --- a/slurm/slurmd/Dockerfile +++ b/slurm/slurmd/Dockerfile @@ -1,17 +1,17 @@ -FROM ubuntu:jammy +FROM ubuntu:noble ARG COMMIT="none" ARG CREATION_DATE="none" LABEL org.opencontainers.image.authors="Iacopo Colonnelli " -LABEL org.opencontainers.image.base.name="docker.io/ubuntu:jammy" +LABEL org.opencontainers.image.base.name="docker.io/ubuntu:noble" LABEL org.opencontainers.image.created="${CREATION_DATE}" LABEL org.opencontainers.image.licenses="OpenSSL" LABEL org.opencontainers.image.revision="${COMMIT}" LABEL org.opencontainers.image.ref.name="alphaunito/slurmd" LABEL org.opencontainers.image.source="https://github.com/alpha-unito/docker-for-hpc" LABEL org.opencontainers.image.title="Slurm compute node" -LABEL org.opencontainers.image.version="21.08.5" +LABEL org.opencontainers.image.version="23.11.4" RUN apt update \ && apt install -y --no-install-recommends \ @@ -50,7 +50,7 @@ RUN apt update --gecos "" \ hpcuser -COPY config/cgroups.conf \ +COPY config/cgroup.conf \ /etc/slurm/ COPY config/supervisord.conf \ @@ -66,4 +66,4 @@ EXPOSE 22 WORKDIR /home/hpcuser -ENTRYPOINT supervisord \ No newline at end of file +ENTRYPOINT [supervisord, -c /etc/supervisor/conf.d/supervisord.conf] \ No newline at end of file diff --git a/slurm/slurmctld/config/cgroups.conf b/slurm/slurmd/config/cgroup.conf similarity index 62% rename from slurm/slurmctld/config/cgroups.conf rename to slurm/slurmd/config/cgroup.conf index 30f9fd2..2b46597 100644 --- a/slurm/slurmctld/config/cgroups.conf +++ b/slurm/slurmd/config/cgroup.conf @@ -1,5 +1,3 @@ -CgroupAutomount=yes -CgroupReleaseAgentDir="/etc/slurm/cgroup" ConstrainCores=yes ConstrainDevices=no ConstrainRAMSpace=yes diff --git a/slurm/slurmd/config/cgroups.conf b/slurm/slurmd/config/cgroups.conf deleted file mode 100644 index 30f9fd2..0000000 --- a/slurm/slurmd/config/cgroups.conf +++ /dev/null @@ -1,7 +0,0 @@ -CgroupAutomount=yes -CgroupReleaseAgentDir="/etc/slurm/cgroup" -ConstrainCores=yes -ConstrainDevices=no -ConstrainRAMSpace=yes -ConstrainSwapSpace=yes -IgnoreSystemd=yes diff --git a/slurm/slurmd/config/supervisord.conf b/slurm/slurmd/config/supervisord.conf index 5d2dcc7..f780d2d 100644 --- a/slurm/slurmd/config/supervisord.conf +++ b/slurm/slurmd/config/supervisord.conf @@ -2,6 +2,7 @@ nodaemon=true logfile=/dev/null logfile_maxbytes=0 +user=root [program:munge] command=gosu munge /usr/sbin/munged --foreground