Skip to content

Commit

Permalink
updated github runner template
Browse files Browse the repository at this point in the history
  • Loading branch information
PicoCreator authored Feb 3, 2024
1 parent 021424f commit 24cd174
Show file tree
Hide file tree
Showing 3 changed files with 204 additions and 90 deletions.
180 changes: 90 additions & 90 deletions .github/workflows/docker-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,98 +115,98 @@ jobs:
cache-from: type=gha,src=docker/env-cuda-12-1
cache-to: type=gha,mode=max

# build_runner:
# name: Docker Env Image (github-worker-12-1)
build_runner:
name: Docker Env Image (github-worker-12-1)

# needs: build_env
# runs-on: ubuntu-latest
# permissions:
# contents: read
# packages: write
# # This is used to complete the identity challenge
# # with sigstore/fulcio when running outside of PRs.
# id-token: write

# steps:
# # Get and log the free space
# - name: Get system free space (Before reclaim)
# run: |
# echo "Free space:"
# df -h
needs: build_env
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
# This is used to complete the identity challenge
# with sigstore/fulcio when running outside of PRs.
id-token: write

steps:
# Get and log the free space
- name: Get system free space (Before reclaim)
run: |
echo "Free space:"
df -h
# # Due to the docker image being > available space on the runner
# # we need to do some optimization, to create more space.
# # https://github.com/marketplace/actions/disk-space-reclaimer
# # https://stackoverflow.com/questions/76294509/github-actions-docker-service-container-25gb-cannot-be-loaded
# - name: Maximize build space
# uses: insightsengineering/disk-space-reclaimer@v1
# with:
# # this might remove tools that are actually needed,
# # if set to "true" but frees about 6 GB
# tools-cache: true

# # all of these default to true, but feel free to set to
# # "false" if necessary for your workflow
# android: true
# dotnet: true
# haskell: true
# large-packages: true
# swap-storage: true
# docker-images: true
# Due to the docker image being > available space on the runner
# we need to do some optimization, to create more space.
# https://github.com/marketplace/actions/disk-space-reclaimer
# https://stackoverflow.com/questions/76294509/github-actions-docker-service-container-25gb-cannot-be-loaded
- name: Maximize build space
uses: insightsengineering/disk-space-reclaimer@v1
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tools-cache: true

# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: true
docker-images: true

# # Get and log the free space
# - name: Get system free space (After reclaim)
# run: |
# echo "Free space:"
# df -h

# - name: Checkout repository
# uses: actions/checkout@v3

# # Install the cosign tool except on PR
# # https://github.com/sigstore/cosign-installer
# - name: Install cosign
# if: github.event_name != 'pull_request'
# uses: sigstore/[email protected]
# with:
# cosign-release: 'v3.3.0'

# # Workaround: https://github.com/docker/build-push-action/issues/461
# - name: Setup Docker buildx
# uses: docker/setup-buildx-action@v2

# # Login against a Docker registry except on PR
# # https://github.com/docker/login-action
# - name: Log into registry ${{ env.REGISTRY }}
# if: github.event_name != 'pull_request'
# uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c
# with:
# registry: ${{ env.REGISTRY }}
# username: ${{ github.actor }}
# password: ${{ secrets.GITHUB_TOKEN }}

# # Extract metadata (tags, labels) for Docker
# # https://github.com/docker/metadata-action
# - name: Extract Docker metadata
# id: meta
# uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38
# with:
# images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
# Get and log the free space
- name: Get system free space (After reclaim)
run: |
echo "Free space:"
df -h
- name: Checkout repository
uses: actions/checkout@v3

# Install the cosign tool except on PR
# https://github.com/sigstore/cosign-installer
- name: Install cosign
if: github.event_name != 'pull_request'
uses: sigstore/[email protected]
with:
cosign-release: 'v3.3.0'

# Workaround: https://github.com/docker/build-push-action/issues/461
- name: Setup Docker buildx
uses: docker/setup-buildx-action@v2

# Login against a Docker registry except on PR
# https://github.com/docker/login-action
- name: Log into registry ${{ env.REGISTRY }}
if: github.event_name != 'pull_request'
uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

# Extract metadata (tags, labels) for Docker
# https://github.com/docker/metadata-action
- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}

# - name: downcase IMAGE_NAME
# run: |
# echo "IMAGE_NAME_LC=${IMAGE_NAME,,}" >>${GITHUB_ENV}
- name: downcase IMAGE_NAME
run: |
echo "IMAGE_NAME_LC=${IMAGE_NAME,,}" >>${GITHUB_ENV}
# # Build and push Docker image with Buildx (don't push on PR)
# # https://github.com/docker/build-push-action
# - name: Build and push Docker image (github-worker-cuda-12-1)
# id: build-and-push
# uses: docker/build-push-action@v4
# with:
# context: "{{defaultContext}}:docker/github-worker-cuda-12-1"
# push: ${{ github.event_name != 'pull_request' }} # Don't push on PR
# tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME_LC }}:github-worker-cuda-12-1
# # tags: ${{ steps.meta.outputs.tags }}
# labels: ${{ steps.meta.outputs.labels }}
# cache-from: type=gha,src=docker/github-worker-cuda-12-1
# cache-to: type=gha,mode=max
# Build and push Docker image with Buildx (don't push on PR)
# https://github.com/docker/build-push-action
- name: Build and push Docker image (github-worker-cuda-12-1)
id: build-and-push
uses: docker/build-push-action@v4
with:
context: "{{defaultContext}}:docker/github-worker-cuda-12-1"
push: ${{ github.event_name != 'pull_request' }} # Don't push on PR
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME_LC }}:github-worker-cuda-12-1
# tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha,src=docker/github-worker-cuda-12-1
cache-to: type=gha,mode=max
41 changes: 41 additions & 0 deletions docker/github-worker-cuda-12-1/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Temporary, until the rwkv package is public
FROM ghcr.io/rwkv/rwkv-lm-lora:env-cuda-12-1
# FROM ghcr.io/rwkv/rwkv-infctx-trainer:env-cuda-12-1

# Install the github runner
RUN cd / && mkdir actions-runner && cd actions-runner && \
curl -o actions-runner-linux-x64-2.312.0.tar.gz -L \
https://github.com/actions/runner/releases/download/v2.312.0/actions-runner-linux-x64-2.312.0.tar.gz && \
tar xzf ./actions-runner-linux-x64-2.312.0.tar.gz && \
rm ./actions-runner-linux-x64-2.312.0.tar.gz

# Clone the runner, for lane2 track
RUN cd / && cp -r /actions-runner /actions-runner-lane2

# Install dependencies
RUN cd /actions-runner && ./bin/installdependencies.sh && \
cd /actions-runner-lane2 && ./bin/installdependencies.sh

# Copy the entrypoint script, and set it up
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]

# Configure default env variables
ENV RUNNER_LABELS=""
ENV RUNNER_NAME=""
ENV RUNNER_TOKEN=""
ENV RUNNER_REPO_URL="https://github.com/RWKV"

# Runner with lane2 track
# ---
# this helps setup dual runs on the same machine
# to help ensure better utilization of GPUs.
#
# In general DS2/3_offload should be used.
#
# Tags should be adjusted to be half their original spec
# to account for the fact that we are running two runners
#
# This is only useful for high GPU, and high ram count machines
ENV RUNNER_LANE2="false"
73 changes: 73 additions & 0 deletions docker/github-worker-cuda-12-1/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#!/bin/bash

export RUNNER_ALLOW_RUNASROOT="1"
cd /actions-runner

# CUDA version for label
CUDA_VER="cuda-12-1"

# Check the URL, token, and name of the runner from the container ENV vars
# and if they are not set, provide default values
if [[ -z "${RUNNER_NAME}" ]]; then
export RUNNER_NAME=$(hostname)
fi
if [[ -z "${RUNNER_TOKEN}" ]]; then
echo "# [WARNING] RUNNER_TOKEN is missing, skipping github runner setup"
else
echo "# [INFO] lane1 starting up ... "

# If lane2 runner is enabled, start it
# this is enabled with RUNNER_LANE2=true
if [ "$RUNNER_LANE2" != true ]; then

# Configure unattended
./config.sh \
--unattended \
--url "${RUNNER_REPO_URL}" \
--token "${RUNNER_TOKEN}" \
--name "${RUNNER_NAME}" \
--replace \
--labels "nolane,${CUDA_VER},${RUNNER_LABELS}"

# Run it in background, and get the PID
./run.sh &

echo "# [INFO] lane2 runner is disabled"
else
# Configure unattended
./config.sh \
--unattended \
--url "${RUNNER_REPO_URL}" \
--token "${RUNNER_TOKEN}" \
--name "${RUNNER_NAME}-lane1" \
--replace \
--labels "lane1,${CUDA_VER},${RUNNER_LABELS}"

# Run it in background, and get the PID
./run.sh &

echo "# [INFO] lane2 starting up ... "

cd /actions-runner-lane2
./config.sh \
--unattended \
--url "${RUNNER_REPO_URL}" \
--token "${RUNNER_TOKEN}" \
--name "${RUNNER_NAME}-lane2" \
--replace \
--labels "lane2,${CUDA_VER},${RUNNER_LABELS}"

# Run it in background, and get the PID
./run.sh &
fi
fi

# Follow up on any forwarded command args
if [[ $# -gt 0 ]]; then
cd /root
exec "$@"
fi

# Wait for everything to exit
# wait $RUNNER_PID
wait

0 comments on commit 24cd174

Please sign in to comment.