Merge remote-tracking branch 'upstream/main' into rhoai-2.14

red-hat-data-services · Sep 10, 2024 · f33b171 · f33b171
2 parents 999d6d4 + 0dd12c5
commit f33b171
Show file tree

Hide file tree

Showing 9 changed files with 1,940 additions and 17 deletions.
diff --git a/.tekton/ray-cuda-pull-request.yaml b/.tekton/ray-cuda-pull-request.yaml
diff --git a/.tekton/ray-cuda-push.yaml b/.tekton/ray-cuda-push.yaml
diff --git a/.tekton/ray-rocm-pull-request.yaml b/.tekton/ray-rocm-pull-request.yaml
diff --git a/.tekton/ray-rocm-push.yaml b/.tekton/ray-rocm-push.yaml
diff --git a/examples/ray-finetune-llm-deepspeed/ray_finetune_llm_deepspeed.ipynb b/examples/ray-finetune-llm-deepspeed/ray_finetune_llm_deepspeed.ipynb
@@ -67,12 +67,12 @@
     "    worker_memory_requests=128,\n",
     "    worker_memory_limits=256,\n",
     "    head_memory=128,\n",
-    "    # Use the following parameters with NVIDIA GPUs \n",
-    "    image=\"quay.io/rhoai/ray:2.23.0-py39-cu121\",\n",
+    "    # Use the following parameters with NVIDIA GPUs\n",
+    "    image=\"quay.io/rhoai/ray:2.35.0-py39-cu121-torch24-fa26\",\n",
     "    head_extended_resource_requests={'nvidia.com/gpu':1},\n",
     "    worker_extended_resource_requests={'nvidia.com/gpu':1},\n",
     "    # Or replace them with these parameters for AMD GPUs\n",
-    "    # image=\"quay.io/rhoai/ray:2.35.0-py39-rocm61-torch24\",\n",
+    "    # image=\"quay.io/rhoai/ray:2.35.0-py39-rocm61-torch24-fa26\",\n",
     "    # head_extended_resource_requests={'amd.com/gpu':1},\n",
     "    # worker_extended_resource_requests={'amd.com/gpu':1},\n",
     "))"

diff --git a/examples/ray-finetune-llm-deepspeed/requirements.txt b/examples/ray-finetune-llm-deepspeed/requirements.txt
@@ -1,11 +1,5 @@
 accelerate==0.31.0
 awscliv2==2.3.0
 datasets==2.19.2
-deepspeed==0.14.4
-# Flash Attention 2 requires PyTorch to be installed first
-# See https://github.com/Dao-AILab/flash-attention/issues/453
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.3cxx11abiFALSE-cp39-cp39-linux_x86_64.whl
 peft==0.11.1
-ray[train]==2.23.0
-torch==2.3.1
 transformers==4.44.0
diff --git a/examples/ray-finetune-llm-deepspeed/requirements_rocm.txt b/examples/ray-finetune-llm-deepspeed/requirements_rocm.txt
diff --git a/images/runtime/examples/ray-torch-cuda/Dockerfile b/images/runtime/examples/ray-torch-cuda/Dockerfile
@@ -1,3 +1,11 @@
-FROM quay.io/rhoai/ray:2.23.0-py39-cu121
+FROM quay.io/rhoai/ray:2.35.0-py39-cu121
 
-RUN python3 -m pip install torch==2.3.1
+# Install PyTorch
+RUN python3 -m pip install torch==2.4.0
+
+# Install Flash Attention
+RUN python3 -m pip install wheel ninja
+RUN python3 -m pip install flash-attn==2.6.3 --no-build-isolation
+
+# Install DeepSpeed
+RUN DS_ACCELERATOR=cuda DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed==0.15.0 --global-option="build_ext" --global-option="-j8"
diff --git a/images/runtime/examples/ray-torch-rocm/Dockerfile b/images/runtime/examples/ray-torch-rocm/Dockerfile
@@ -8,7 +8,7 @@ RUN python3 -m pip install torch==2.4.0 --index-url https://download.pytorch.org
 # Install Flash Attention
 RUN python3 -m pip install wheel ninja
 
-run export TMP_DIR=$(mktemp -d) \
+RUN export TMP_DIR=$(mktemp -d) \
     && cd $TMP_DIR \
     && git clone --depth 1 --branch v2.6.3 https://github.com/Dao-AILab/flash-attention.git \
     && cd flash-attention \