Skip to content

Commit

Permalink
Merge pull request #781 from 0xff-dev/main
Browse files Browse the repository at this point in the history
fix: upgrade fastchat to v0.2.36
  • Loading branch information
bjwswang authored Mar 4, 2024
2 parents 2321d4d + 218974e commit a7df03c
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 37 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/worker_image_build_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ jobs:
test_image_build:
runs-on: ubuntu-latest
steps:
- name: Maximize build space
uses: easimon/maximize-build-space@master
with:
root-reserve-mb: 512
swap-size-mb: 1024
remove-dotnet: 'true'
- uses: actions/checkout@v4
with:
fetch-depth: 0
Expand Down
77 changes: 43 additions & 34 deletions deploy/llms/Dockerfile.fastchat-worker
Original file line number Diff line number Diff line change
@@ -1,34 +1,43 @@
FROM nvidia/cuda:12.2.0-devel-ubuntu20.04


# Define a build argument with a default value
ARG PACKAGE_REGISTRY="mirrors.tuna.tsinghua.edu.cn"

# Update the package registry based on the build argument
RUN sed -i "s/archive.ubuntu.com/$PACKAGE_REGISTRY/g" /etc/apt/sources.list \
&& sed -i "s/security.ubuntu.com/$PACKAGE_REGISTRY/g" /etc/apt/sources.list

# Configure the default Timezone
ENV TZ=Asia/Shanghai
RUN export DEBIAN_FRONTEND=noninteractive \
&& apt-get update \
&& apt-get install -y tzdata \
&& ln -fs /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
&& dpkg-reconfigure --frontend noninteractive tzdata

# Official: https://pypi.org/simple
ARG PYTHON_INDEX_URL="https://pypi.mirrors.ustc.edu.cn/simple/"

# Install fastchat along with its dependencies
RUN apt-get install -y python3.9 python3.9-distutils curl python3-pip python3-dev
RUN python3.9 -m pip install tomli setuptools_scm wavedrom -i ${PYTHON_INDEX_URL}
RUN python3.9 -m pip install fschat fschat[model_worker] -i ${PYTHON_INDEX_URL}

# Install requirements for QWen(https://huggingface.co/Qwen/Qwen-72B-Chat)
RUN python3.9 -m pip install einops scipy transformers_stream_generator==0.0.4 deepspeed -i ${PYTHON_INDEX_URL}

# Install requirements for Qutantization with auto-gptq
RUN python3.9 -m pip install auto-gptq optimum -i ${PYTHON_INDEX_URL}

COPY deploy/llms/start-worker.sh /
ENTRYPOINT ["/start-worker.sh"]
FROM nvidia/cuda:12.2.0-devel-ubuntu20.04


# Define a build argument with a default value
ARG PACKAGE_REGISTRY="mirrors.tuna.tsinghua.edu.cn"

# Update the package registry based on the build argument
RUN sed -i "s/archive.ubuntu.com/$PACKAGE_REGISTRY/g" /etc/apt/sources.list \
&& sed -i "s/security.ubuntu.com/$PACKAGE_REGISTRY/g" /etc/apt/sources.list

# Configure the default Timezone
ENV TZ=Asia/Shanghai
RUN export DEBIAN_FRONTEND=noninteractive \
&& apt-get update \
&& apt-get install -y tzdata git \
&& ln -fs /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
&& dpkg-reconfigure --frontend noninteractive tzdata

# Official: https://pypi.org/simple
ARG PYTHON_INDEX_URL="https://pypi.mirrors.ustc.edu.cn/simple/"

# Install fastchat along with its dependencies
RUN apt-get install -y python3.9 python3.9-distutils curl python3-pip python3-dev
RUN python3.9 -m pip install tomli setuptools_scm wavedrom transformers==4.37.0 -i ${PYTHON_INDEX_URL}
RUN python3.9 -m pip install --upgrade pip -i ${PYTHON_INDEX_URL}
RUN git clone https://github.com/lm-sys/FastChat.git \
&& cd FastChat \
&& python3.9 -m pip install -e ".[model_worker]" -i ${PYTHON_INDEX_URL} \
&& git rev-parse HEAD > $HOME/.fastchat \
&& cd ..

# Configure the following environment variables to allow fastchat to pull model files from modelscope
# export VLLM_USE_MODELSCOPE=True ,export FASTCHAT_USE_MODELSCOPE=True
RUN python3.9 -m pip install modelscope pydantic==1.10.14 -i ${PYTHON_INDEX_URL}

# Install requirements for QWen(https://huggingface.co/Qwen/Qwen-72B-Chat)
RUN python3.9 -m pip install einops scipy transformers_stream_generator==0.0.4 deepspeed -i ${PYTHON_INDEX_URL}

# Install requirements for Qutantization with auto-gptq
RUN python3.9 -m pip install auto-gptq optimum -i ${PYTHON_INDEX_URL}

COPY deploy/llms/start-worker.sh /
ENTRYPOINT ["/start-worker.sh"]
4 changes: 2 additions & 2 deletions deploy/llms/Dockerfile.fastchat-worker-vllm
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ARG BASE_IMAGE_VERSION="v0.2.0"
ARG BASE_IMAGE_VERSION="v0.2.36"
FROM kubeagi/arcadia-fastchat-worker:${BASE_IMAGE_VERSION}

# Official: https://pypi.org/simple
Expand All @@ -10,4 +10,4 @@ RUN python3.9 -m pip install vllm -i ${PYTHON_INDEX_URL}
# for now, ray: 2.9.0, python: 3.9.x
# this utils.py file is from ray 2.9.0 ray-ml image
# search 'KubeAGI' in utils.py for what's changed
COPY utils.py /usr/local/lib/python3.9/dist-packages/ray/_private/utils.py
COPY utils.py /usr/local/lib/python3.9/dist-packages/ray/_private/utils.py
2 changes: 1 addition & 1 deletion deploy/llms/start-worker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ fi

echo "Run model worker..."
python3.9 -m $FASTCHAT_WORKER_NAME --model-names $FASTCHAT_REGISTRATION_MODEL_NAME \
--model-path /data/models/$FASTCHAT_MODEL_NAME --worker-address $FASTCHAT_WORKER_ADDRESS \
--model-path $FASTCHAT_MODEL_NAME_PATH --worker-address $FASTCHAT_WORKER_ADDRESS \
--controller-address $FASTCHAT_CONTROLLER_ADDRESS \
--num-gpus $NUMBER_GPUS \
--host 0.0.0.0 --port 21002 $EXTRA_ARGS

0 comments on commit a7df03c

Please sign in to comment.