Skip to content

Commit

Permalink
Merge pull request #1 from AIPowerGrid/urek
Browse files Browse the repository at this point in the history
Upgrading
  • Loading branch information
websines authored Dec 6, 2024
2 parents 541441a + 858601a commit 682cbcc
Show file tree
Hide file tree
Showing 41 changed files with 1,145 additions and 323 deletions.
13 changes: 13 additions & 0 deletions .github/workflows/maintests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,19 @@ jobs:
with:
extra_args: --all-files

dockerfile-lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Lint CUDA Dockerfile
uses: hadolint/hadolint-action@master
with:
dockerfile: "Dockerfiles/Dockerfile.cuda"
- name: Lint RoCM Dockerfile
uses: hadolint/hadolint-action@master
with:
dockerfile: "Dockerfiles/Dockerfile.rocm"

unit-tests:
runs-on: ubuntu-latest
strategy:
Expand Down
13 changes: 13 additions & 0 deletions .github/workflows/prtests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,19 @@ jobs:
with:
extra_args: --all-files

dockerfile-lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Lint CUDA Dockerfile
uses: hadolint/hadolint-action@master
with:
dockerfile: "Dockerfiles/Dockerfile.cuda"
- name: Lint RoCM Dockerfile
uses: hadolint/hadolint-action@master
with:
dockerfile: "Dockerfiles/Dockerfile.rocm"

unit-tests:
runs-on: ubuntu-latest
strategy:
Expand Down
5 changes: 5 additions & 0 deletions .hadolint.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
ignored:
- DL3008 # Pin versions in apt get install
- DL3042 # Avoid cache directory with `pip install`
- DL3002 # Last USER should not be root
failure-threshold: warning
16 changes: 8 additions & 8 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
rev: v5.0.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 24.4.2
rev: 24.10.0
hooks:
- id: black
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.5.4
rev: v0.7.3
hooks:
- id: ruff
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v1.11.0'
rev: 'v1.13.0'
hooks:
- id: mypy
args: []
Expand All @@ -38,9 +38,9 @@ repos:
- python-dotenv
- aiohttp
- horde_safety==0.2.3
- torch==2.3.1
- torch==2.5.0
- ruamel.yaml
- horde_engine==2.15.3
- horde_sdk==0.14.11
- horde_model_reference==0.9.0
- horde_engine==2.18.1
- horde_sdk==0.16.4
- horde_model_reference==0.9.1
- semver
30 changes: 0 additions & 30 deletions Dockerfiles/Dockerfile.12.1.1-22.04

This file was deleted.

30 changes: 0 additions & 30 deletions Dockerfiles/Dockerfile.12.2.2-22.04

This file was deleted.

30 changes: 0 additions & 30 deletions Dockerfiles/Dockerfile.12.3.2-22.04

This file was deleted.

71 changes: 71 additions & 0 deletions Dockerfiles/Dockerfile.cuda
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Stage 1: Base environment setup
ARG CUDA_VERSION=12.4.1
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 AS base

SHELL ["/bin/bash", "-o", "pipefail", "-c"]

ARG DEBIAN_FRONTEND=noninteractive
ARG PYTHON_VERSION=3.11
ENV PYTHON_VERSION=${PYTHON_VERSION}
ENV APP_HOME=/horde-worker-reGen

RUN apt-get update && \
apt-get install -y --no-install-recommends software-properties-common && \
add-apt-repository ppa:deadsnakes/ppa && \
apt-get install -y --no-install-recommends \
python${PYTHON_VERSION} \
python3-pip \
python${PYTHON_VERSION}-venv \
libgl1 \
git && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

# Extract CUDA version for PyTorch
ARG CUDA_VERSION
RUN CUDA_VERSION_SHORT=$(echo "${CUDA_VERSION}" | cut -d. -f1-2 | tr -d '.') && \
echo "${CUDA_VERSION_SHORT}" && \
echo "export CUDA_VERSION_SHORT=${CUDA_VERSION_SHORT}" >> /env_vars

# Stage 2: Clone repository and install dependencies
FROM base AS builder

ARG GIT_BRANCH=main
ARG GIT_OWNER=Haidra-Org

RUN echo "export GIT_BRANCH=${GIT_BRANCH}" >> /env_vars && \
echo "export GIT_OWNER=${GIT_OWNER}" >> /env_vars

WORKDIR "${APP_HOME}"

# Clone the repository
RUN git clone "https://github.com/${GIT_OWNER}/horde-worker-reGen.git" . && \
git switch "${GIT_BRANCH}"

# Create virtual environment
RUN python"${PYTHON_VERSION}" -m venv "${APP_HOME}/venv"
ENV PATH="${APP_HOME}/venv/bin:$PATH"

# Install dependencies
ARG PIP_CACHE_DIR=/pip-cache
ARG USE_PIP_CACHE=true

RUN --mount=type=cache,target="${PIP_CACHE_DIR}",sharing=locked,id=pip-cache \

Check failure on line 53 in Dockerfiles/Dockerfile.cuda

View workflow job for this annotation

GitHub Actions / dockerfile-lint

SC1091 info: Not following: File not included in mock.
. /env_vars && \
if [ "${USE_PIP_CACHE}" = "true" ]; then \
pip install --cache-dir="${PIP_CACHE_DIR}" opencv-python-headless -r requirements.txt -U --extra-index-url "https://download.pytorch.org/whl/cu${CUDA_VERSION_SHORT}"; \
else \
pip install opencv-python-headless -r requirements.txt -U --extra-index-url "https://download.pytorch.org/whl/cu${CUDA_VERSION_SHORT}"; \
fi

# Stage 3: Final stage
FROM builder AS final

WORKDIR "${APP_HOME}"
COPY entrypoint.sh /entrypoint.sh
COPY setup_*.sh "${APP_HOME}"
RUN chmod +x /entrypoint.sh

STOPSIGNAL SIGINT

ENTRYPOINT ["/entrypoint.sh"]
75 changes: 75 additions & 0 deletions Dockerfiles/Dockerfile.rocm
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Stage 1: Base environment setup
ARG ROCM_VERSION=6.1.2
FROM rocm/rocm-terminal:${ROCM_VERSION} AS base

USER root
WORKDIR /
SHELL ["/bin/bash", "-o", "pipefail", "-c"]

ARG DEBIAN_FRONTEND=noninteractive
ARG PYTHON_VERSION=3.11
ENV PYTHON_VERSION=${PYTHON_VERSION}
ENV APP_HOME=/horde-worker-reGen

RUN apt-get update && \
apt-get install -y --no-install-recommends software-properties-common && \
add-apt-repository ppa:deadsnakes/ppa && \
apt-get install -y --no-install-recommends \
python${PYTHON_VERSION} \
python3-pip \
python${PYTHON_VERSION}-venv \
python${PYTHON_VERSION}-dev \
python${PYTHON_VERSION}-distutils \
ninja-build \
rocm \
git && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

# Extract ROCm version for PyTorch
ARG ROCM_VERSION
RUN ROCM_VERSION_SHORT=$(echo "${ROCM_VERSION}" | cut -d. -f1-2) && \
echo "export ROCM_VERSION_SHORT=${ROCM_VERSION_SHORT}" >> /env_vars

# Stage 2: Clone repository and install dependencies
FROM base AS builder

ARG GIT_BRANCH=main
ARG GIT_OWNER=Haidra-Org

RUN echo "export GIT_BRANCH=${GIT_BRANCH}" >> /env_vars && \
echo "export GIT_OWNER=${GIT_OWNER}" >> /env_vars

WORKDIR "${APP_HOME}"

# Clone the repository
RUN git clone "https://github.com/${GIT_OWNER}/horde-worker-reGen.git" . && \
git switch "${GIT_BRANCH}"

# Create virtual environment
RUN python"${PYTHON_VERSION}" -m venv "${APP_HOME}/venv"
ENV PATH="${APP_HOME}/venv/bin:$PATH"

# Install dependencies
ARG PIP_CACHE_DIR=/pip-cache
ARG USE_PIP_CACHE=true

RUN --mount=type=cache,target="${PIP_CACHE_DIR}",sharing=locked,id=pip-cache \

Check failure on line 57 in Dockerfiles/Dockerfile.rocm

View workflow job for this annotation

GitHub Actions / dockerfile-lint

SC1091 info: Not following: File not included in mock.
. /env_vars && \
if [ "${USE_PIP_CACHE}" = "true" ]; then \
pip install --cache-dir="${PIP_CACHE_DIR}" opencv-python-headless -r requirements.rocm.txt -U --extra-index-url "https://download.pytorch.org/whl/rocm${ROCM_VERSION_SHORT}"; \
else \
pip install opencv-python-headless -r requirements.rocm.txt -U --extra-index-url "https://download.pytorch.org/whl/rocm${ROCM_VERSION_SHORT}"; \
fi && \
pip uninstall -y pynvml nvidia-ml-py
# Stage 3: Final stage
FROM builder AS final

WORKDIR "${APP_HOME}"
COPY entrypoint.sh /entrypoint.sh
COPY setup_*.sh "${APP_HOME}"
RUN chmod +x /entrypoint.sh

STOPSIGNAL SIGINT

ENTRYPOINT ["/entrypoint.sh"]
Loading

0 comments on commit 682cbcc

Please sign in to comment.