diff --git a/deploy/llms/Dockerfile.fastchat-worker b/deploy/llms/Dockerfile.fastchat-worker index f1e92526f..e38a95cc8 100644 --- a/deploy/llms/Dockerfile.fastchat-worker +++ b/deploy/llms/Dockerfile.fastchat-worker @@ -39,5 +39,16 @@ RUN python3.9 -m pip install einops scipy transformers_stream_generator==0.0.4 d # Install requirements for Qutantization with auto-gptq RUN python3.9 -m pip install auto-gptq optimum -i ${PYTHON_INDEX_URL} +# Install requirements for vllm worker +# Ray v2.9.3 and vllm v0.3.3 +RUN python3.9 -m pip install vllm==0.3.3 +RUN python3.9 -m pip install -U "ray[default]==2.9.3" -i ${PYTHON_INDEX_URL} + +# Allow to use environment variable to set ray & python version to pass the version check +# for now, ray: 2.9.3, python: 3.9.x +# this utils.py file is from ray 2.9.0 ray-ml image +# search 'KubeAGI' in utils.py for what's changed +COPY deploy/llms/utils.py /usr/local/lib/python3.9/dist-packages/ray/_private/utils.py + COPY deploy/llms/start-worker.sh / ENTRYPOINT ["/start-worker.sh"] diff --git a/deploy/llms/Dockerfile.fastchat-worker-vllm b/deploy/llms/Dockerfile.fastchat-worker-vllm deleted file mode 100644 index 49e031aea..000000000 --- a/deploy/llms/Dockerfile.fastchat-worker-vllm +++ /dev/null @@ -1,14 +0,0 @@ -ARG BASE_IMAGE_VERSION="v0.2.36" -FROM kubeagi/arcadia-fastchat-worker:${BASE_IMAGE_VERSION} - -# Official: https://pypi.org/simple -ARG PYTHON_INDEX_URL="https://pypi.mirrors.ustc.edu.cn/simple/" -# Install requirements for vllm worker -# Ray v2.9.3 and vllm v0.3.3 -RUN python3.9 -m pip install vllm -i ${PYTHON_INDEX_URL} - -# Allow to use environment variable to set ray & python version to pass the version check -# for now, ray: 2.9.0, python: 3.9.x -# this utils.py file is from ray 2.9.0 ray-ml image -# search 'KubeAGI' in utils.py for what's changed -COPY utils.py /usr/local/lib/python3.9/dist-packages/ray/_private/utils.py \ No newline at end of file diff --git a/pkg/worker/runner.go b/pkg/worker/runner.go index 585bfa0c8..fe287fd81 100644 --- a/pkg/worker/runner.go +++ b/pkg/worker/runner.go @@ -33,8 +33,9 @@ import ( const ( // tag is the same version as fastchat - defaultFastChatImage = "kubeagi/arcadia-fastchat-worker:v0.2.36" - defaultFastchatVLLMImage = "kubeagi/arcadia-fastchat-worker:vllm-v0.2.36" + defaultFastChatImage = "kubeagi/arcadia-fastchat-worker:v0.2.36" + // For ease of maintenance and stability, VLLM module is now included in standard image as a default feature. + defaultFastchatVLLMImage = "kubeagi/arcadia-fastchat-worker:v0.2.36" // defaultKubeAGIImage for RunnerKubeAGI defaultKubeAGIImage = "kubeagi/core-library-cli:v0.0.1" )