Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into rhoai-2.14
Browse files Browse the repository at this point in the history
  • Loading branch information
dchourasia committed Sep 10, 2024
2 parents 999d6d4 + 0dd12c5 commit f33b171
Show file tree
Hide file tree
Showing 9 changed files with 1,940 additions and 17 deletions.
483 changes: 483 additions & 0 deletions .tekton/ray-cuda-pull-request.yaml

Large diffs are not rendered by default.

480 changes: 480 additions & 0 deletions .tekton/ray-cuda-push.yaml

Large diffs are not rendered by default.

483 changes: 483 additions & 0 deletions .tekton/ray-rocm-pull-request.yaml

Large diffs are not rendered by default.

480 changes: 480 additions & 0 deletions .tekton/ray-rocm-push.yaml

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,12 @@
" worker_memory_requests=128,\n",
" worker_memory_limits=256,\n",
" head_memory=128,\n",
" # Use the following parameters with NVIDIA GPUs \n",
" image=\"quay.io/rhoai/ray:2.23.0-py39-cu121\",\n",
" # Use the following parameters with NVIDIA GPUs\n",
" image=\"quay.io/rhoai/ray:2.35.0-py39-cu121-torch24-fa26\",\n",
" head_extended_resource_requests={'nvidia.com/gpu':1},\n",
" worker_extended_resource_requests={'nvidia.com/gpu':1},\n",
" # Or replace them with these parameters for AMD GPUs\n",
" # image=\"quay.io/rhoai/ray:2.35.0-py39-rocm61-torch24\",\n",
" # image=\"quay.io/rhoai/ray:2.35.0-py39-rocm61-torch24-fa26\",\n",
" # head_extended_resource_requests={'amd.com/gpu':1},\n",
" # worker_extended_resource_requests={'amd.com/gpu':1},\n",
"))"
Expand Down
6 changes: 0 additions & 6 deletions examples/ray-finetune-llm-deepspeed/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,5 @@
accelerate==0.31.0
awscliv2==2.3.0
datasets==2.19.2
deepspeed==0.14.4
# Flash Attention 2 requires PyTorch to be installed first
# See https://github.com/Dao-AILab/flash-attention/issues/453
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.3cxx11abiFALSE-cp39-cp39-linux_x86_64.whl
peft==0.11.1
ray[train]==2.23.0
torch==2.3.1
transformers==4.44.0
5 changes: 0 additions & 5 deletions examples/ray-finetune-llm-deepspeed/requirements_rocm.txt

This file was deleted.

12 changes: 10 additions & 2 deletions images/runtime/examples/ray-torch-cuda/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
FROM quay.io/rhoai/ray:2.23.0-py39-cu121
FROM quay.io/rhoai/ray:2.35.0-py39-cu121

RUN python3 -m pip install torch==2.3.1
# Install PyTorch
RUN python3 -m pip install torch==2.4.0

# Install Flash Attention
RUN python3 -m pip install wheel ninja
RUN python3 -m pip install flash-attn==2.6.3 --no-build-isolation

# Install DeepSpeed
RUN DS_ACCELERATOR=cuda DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed==0.15.0 --global-option="build_ext" --global-option="-j8"
2 changes: 1 addition & 1 deletion images/runtime/examples/ray-torch-rocm/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ RUN python3 -m pip install torch==2.4.0 --index-url https://download.pytorch.org
# Install Flash Attention
RUN python3 -m pip install wheel ninja

run export TMP_DIR=$(mktemp -d) \
RUN export TMP_DIR=$(mktemp -d) \
&& cd $TMP_DIR \
&& git clone --depth 1 --branch v2.6.3 https://github.com/Dao-AILab/flash-attention.git \
&& cd flash-attention \
Expand Down

0 comments on commit f33b171

Please sign in to comment.