Skip to content

Commit

Permalink
test
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed Aug 29, 2024
1 parent 08a8659 commit ca841a7
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 79 deletions.
57 changes: 30 additions & 27 deletions .github/workflows/test_api_rocm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,43 +25,46 @@ concurrency:
cancel-in-progress: true
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}

env:
IMAGE: ghcr.io/huggingface/optimum-benchmark:latest-rocm

jobs:
build_image_and_run_api_rocm_tests:
runs-on: [self-hosted, amd-gpu, single-gpu, mi250]

setup_rocm_docker_devices:
# the script in scripts/rocm_docker_devices.sh sets the environment variable ROCM_DOCKER_DEVICES
# which is a string of the form --device /dev/kfd --device /dev/dri/renderD128 --device /dev/dri/renderD129 ...
# that's used in the next job to mount the devices in the docker container
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Set target devices
- name: Set up ROCM_DOCKER_DEVICES env var
run: |
echo "DEVICE:$DEVICE"
echo "DEVICE=$DEVICE" >> $GITHUB_ENV
chmod +x scripts/rocm_docker_devices.sh
scripts/rocm_docker_devices.sh
shell: bash

- name: Set outputs
id: rocm_docker_devices
run: echo "::set-output name=rocm_docker_devices::$ROCM_DOCKER_DEVICES"

run_api_rocm_tests:
needs: setup_rocm_docker_devices

runs-on: [self-hosted, amd-gpu, single-gpu, mi250]

- name: Unroot docker image
container:
image: ghcr.io/huggingface/optimum-benchmark:latest-rocm
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ ${{ needs.setup_rocm_docker_devices.outputs.rocm_docker_devices }}

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Install dependencies
run: |
docker build --build-arg IMAGE=${{ env.IMAGE }} --build-arg USER_ID=$(id -u) --build-arg GROUP_ID=$(id -g) -t ${{ env.IMAGE }}-unroot docker/unroot
pip install -e .[testing,timm,diffusers,codecarbon]
- name: Run tests
uses: addnab/docker-run-action@v3
env:
DEVICE: ${{ env.DEVICE }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
PUSH_REPO_ID: optimum-benchmark/rocm
with:
image: ${{ env.IMAGE }}-unroot
options: |
--rm
--shm-size 64G
--env HF_TOKEN
--env PUSH_REPO_ID
--device /dev/kfd
--device /dev/dri/${{ env.DEVICE }}
--volume ${{ github.workspace }}:/workspace
--workdir /workspace
run: |
pip install -e .[testing,timm,diffusers,codecarbon]
pytest -s -x -k "api and cuda"
run: |
pytest -s -x -k "api and cuda"
38 changes: 10 additions & 28 deletions .github/workflows/test_cli_rocm_pytorch_multi_gpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,43 +25,25 @@ concurrency:
cancel-in-progress: true
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}

env:
IMAGE: ghcr.io/huggingface/optimum-benchmark:latest-rocm

jobs:
run_cli_rocm_pytorch_multi_gpu_tests:
runs-on: [self-hosted, amd-gpu, multi-gpu, mi250]

container:
image: ghcr.io/huggingface/optimum-benchmark:latest-rocm
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set target devices
- name: Install dependencies
run: |
echo "DEVICE0:$DEVICE0"
echo "DEVICE1:$DEVICE1"
echo "DEVICE0=$DEVICE0" >> $GITHUB_ENV
echo "DEVICE1=$DEVICE1" >> $GITHUB_ENV
- name: Unroot docker image
run: |
docker build --build-arg IMAGE=${{ env.IMAGE }} --build-arg USER_ID=$(id -u) --build-arg GROUP_ID=$(id -g) -t ${{ env.IMAGE }}-unroot docker/unroot
pip install -e .[testing,diffusers,timm,deepspeed,peft,autoawq,auto-gptq]
- name: Run tests
uses: addnab/docker-run-action@v3
env:
DEVICE0: ${{ env.DEVICE0 }}
DEVICE1: ${{ env.DEVICE1 }}
with:
image: ${{ env.IMAGE }}-unroot
options: |
--rm
--shm-size 64G
--device /dev/kfd
--device /dev/dri/${{ env.DEVICE0 }}
--device /dev/dri/${{ env.DEVICE1 }}
--volume ${{ github.workspace }}:/workspace
--workdir /workspace
run: |
pip install -e .[testing,diffusers,timm,deepspeed,peft,autoawq,auto-gptq]
pytest -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not (bnb or awq)"
HF_TOKEN: ${{ secrets.HF_TOKEN }}
PUSH_REPO_ID: optimum-benchmark/rocm
run: |
pytest -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not (bnb or awq)"
34 changes: 10 additions & 24 deletions .github/workflows/test_cli_rocm_pytorch_single_gpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,39 +25,25 @@ concurrency:
cancel-in-progress: true
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}

env:
IMAGE: ghcr.io/huggingface/optimum-benchmark:latest-rocm

jobs:
run_cli_rocm_pytorch_single_gpu_tests:
runs-on: [self-hosted, amd-gpu, single-gpu, mi250]

container:
image: ghcr.io/huggingface/optimum-benchmark:latest-rocm
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set target devices
- name: Install dependencies
run: |
echo "DEVICE:$DEVICE"
echo "DEVICE=$DEVICE" >> $GITHUB_ENV
- name: Unroot docker image
run: |
docker build --build-arg IMAGE=${{ env.IMAGE }} --build-arg USER_ID=$(id -u) --build-arg GROUP_ID=$(id -g) -t ${{ env.IMAGE }}-unroot docker/unroot
pip install -e .[testing,diffusers,timm,peft,autoawq,auto-gptq]
- name: Run tests
uses: addnab/docker-run-action@v3
env:
DEVICE: ${{ env.DEVICE }}
with:
image: ${{ env.IMAGE }}-unroot
options: |
--rm
--shm-size 64G
--device /dev/kfd
--device /dev/dri/${{ env.DEVICE }}
--volume ${{ github.workspace }}:/workspace
--workdir /workspace
run: |
pip install -e .[testing,diffusers,timm,peft,autoawq,auto-gptq]
pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not (bnb or awq)"
HF_TOKEN: ${{ secrets.HF_TOKEN }}
PUSH_REPO_ID: optimum-benchmark/rocm
run: |
pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not (bnb or awq)"
27 changes: 27 additions & 0 deletions scripts/rocm_docker_devices.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash

# Check if the variable is set
if [ -z "$ROCR_VISIBLE_DEVICES" ]; then
echo "Environment variable ROCR_VISIBLE_DEVICES is not set"
exit 1
fi

# Get the list of renderDxxx devices in /dev/dri
ROCM_RENDER_DEVICES=($(ls /dev/dri/renderD*))

# Split the ROCR_VISIBLE_DEVICES variable by commas to get individual device indices
IFS=',' read -r -a DEVICE_INDICES <<<"$ROCR_VISIBLE_DEVICES"

# Construct the --device options for Docker
ROCM_DOCKER_DEVICES="--device /dev/kfd"
for INDEX in "${DEVICE_INDICES[@]}"; do
if [ "$INDEX" -lt "${#ROCM_RENDER_DEVICES[@]}" ]; then
ROCM_DOCKER_DEVICES+=" --device ${ROCM_RENDER_DEVICES[$INDEX]}"
else
echo "Index $INDEX is out of range for available render devices"
exit 1
fi
done

# export the ROCM_DOCKER_DEVICES variable
export ROCM_DOCKER_DEVICES

0 comments on commit ca841a7

Please sign in to comment.