forked from oobabooga/text-generation-webui
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
creating a layer with Docker/docker-compose (oobabooga#633)
- Loading branch information
Showing
6 changed files
with
230 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
.env | ||
Dockerfile | ||
/characters | ||
/extensions | ||
/loras | ||
/models | ||
/presets | ||
/prompts | ||
/softprompts | ||
/training |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# by default the Dockerfile specifies these versions: 3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX | ||
# however for me to work i had to specify the exact version for my card ( 2060 ) it was 7.5 | ||
# https://developer.nvidia.com/cuda-gpus you can find the version for your card here | ||
TORCH_CUDA_ARCH_LIST=7.5 | ||
|
||
# these commands worked for me with roughly 4.5GB of vram | ||
CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices | ||
|
||
# the following examples have been tested with the files linked in docs/README_docker.md: | ||
# example running 13b with 4bit/128 groupsize : CLI_ARGS=--model llama-13b-4bit-128g --wbits 4 --listen --groupsize 128 --pre_layer 25 | ||
# example with loading api extension and public share: CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices --no-stream --extensions api --share | ||
# example running 7b with 8bit groupsize : CLI_ARGS=--model llama-7b --load-in-8bit --listen --auto-devices | ||
|
||
# the port the webui binds to on the host | ||
HOST_PORT=7860 | ||
# the port the webui binds to inside the container | ||
CONTAINER_PORT=7860 | ||
|
||
# the port the api binds to on the host | ||
HOST_API_PORT=5000 | ||
# the port the api binds to inside the container | ||
CONTAINER_API_PORT=5000 | ||
|
||
# the version used to install text-generation-webui from | ||
WEBUI_VERSION=HEAD |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder | ||
|
||
RUN apt-get update && \ | ||
apt-get install --no-install-recommends -y git vim build-essential python3-dev python3-venv && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
RUN git clone https://github.com/oobabooga/GPTQ-for-LLaMa /build | ||
|
||
WORKDIR /build | ||
|
||
RUN python3 -m venv /build/venv | ||
RUN . /build/venv/bin/activate && \ | ||
pip3 install --upgrade pip setuptools && \ | ||
pip3 install torch torchvision torchaudio && \ | ||
pip3 install -r requirements.txt | ||
|
||
# https://developer.nvidia.com/cuda-gpus | ||
# for a rtx 2060: ARG TORCH_CUDA_ARCH_LIST="7.5" | ||
ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX" | ||
RUN . /build/venv/bin/activate && \ | ||
python3 setup_cuda.py bdist_wheel -d . | ||
|
||
FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04 | ||
|
||
LABEL maintainer="Your Name <[email protected]>" | ||
LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI" | ||
|
||
RUN apt-get update && \ | ||
apt-get install --no-install-recommends -y git python3 python3-pip && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
RUN --mount=type=cache,target=/root/.cache/pip pip3 install virtualenv | ||
|
||
COPY . /app/ | ||
|
||
WORKDIR /app | ||
|
||
ARG WEBUI_VERSION | ||
RUN test -n "${WEBUI_VERSION}" && git reset --hard ${WEBUI_VERSION} || echo "Using provided webui source" | ||
|
||
RUN virtualenv /app/venv | ||
RUN . /app/venv/bin/activate && \ | ||
pip3 install --upgrade pip setuptools && \ | ||
pip3 install torch torchvision torchaudio && \ | ||
pip3 install -r requirements.txt | ||
|
||
COPY --from=builder /build /app/repositories/GPTQ-for-LLaMa | ||
RUN . /app/venv/bin/activate && \ | ||
pip3 install /app/repositories/GPTQ-for-LLaMa/*.whl | ||
|
||
ENV CLI_ARGS="" | ||
|
||
RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/api && pip3 install -r requirements.txt | ||
RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/elevenlabs_tts && pip3 install -r requirements.txt | ||
RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/google_translate && pip3 install -r requirements.txt | ||
RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/silero_tts && pip3 install -r requirements.txt | ||
RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/whisper_stt && pip3 install -r requirements.txt | ||
|
||
RUN cp /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda118.so /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so | ||
|
||
CMD . /app/venv/bin/activate && python3 server.py ${CLI_ARGS} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
version: "3.3" | ||
services: | ||
text-generation-webui: | ||
build: | ||
context: . | ||
args: | ||
# specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus | ||
TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST} | ||
GPTQ_VERSION: ${GPTQ_VERSION} | ||
WEBUI_VERSION: ${WEBUI_VERSION} | ||
env_file: .env | ||
ports: | ||
- "${HOST_PORT}:${CONTAINER_PORT}" | ||
- "${HOST_API_PORT}:${CONTAINER_API_PORT}" | ||
stdin_open: true | ||
tty: true | ||
volumes: | ||
- ./characters:/app/characters | ||
- ./extensions:/app/extensions | ||
- ./loras:/app/loras | ||
- ./models:/app/models | ||
- ./presets:/app/presets | ||
- ./prompts:/app/prompts | ||
- ./softprompts:/app/softprompts | ||
- ./training:/app/training | ||
deploy: | ||
resources: | ||
reservations: | ||
devices: | ||
- driver: nvidia | ||
device_ids: ['0'] | ||
capabilities: [gpu] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
- [Linux](#linux) | ||
- [Ubuntu 22.04](#ubuntu-2204) | ||
- [update the drivers](#update-the-drivers) | ||
- [reboot](#reboot) | ||
- [docker \& container toolkit](#docker--container-toolkit) | ||
- [Manjaro](#manjaro) | ||
- [update the drivers](#update-the-drivers-1) | ||
- [reboot](#reboot-1) | ||
- [docker \& container toolkit](#docker--container-toolkit-1) | ||
- [prepare environment \& startup](#prepare-environment--startup) | ||
- [place models in models folder](#place-models-in-models-folder) | ||
- [prepare .env file](#prepare-env-file) | ||
- [startup docker container](#startup-docker-container) | ||
- [Windows](#windows) | ||
# Linux | ||
|
||
## Ubuntu 22.04 | ||
|
||
### update the drivers | ||
in the the “software updater” update drivers to the last version of the prop driver. | ||
|
||
### reboot | ||
to switch using to new driver | ||
|
||
```bash | ||
sudo apt update | ||
sudo apt-get install curl | ||
|
||
sudo mkdir -m 0755 -p /etc/apt/keyrings | ||
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg | ||
|
||
echo \ | ||
"deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ | ||
"$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \ | ||
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null | ||
|
||
sudo apt update | ||
sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin docker-compose -y | ||
|
||
sudo usermod -aG docker $USER | ||
newgrp docker | ||
``` | ||
|
||
### docker & container toolkit | ||
```bash | ||
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg | ||
|
||
echo "deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://nvidia.github.io/libnvidia-container/stable/ubuntu22.04/amd64 /" | \ | ||
sudo tee /etc/apt/sources.list.d/nvidia.list > /dev/null | ||
|
||
sudo apt update | ||
|
||
sudo apt install nvidia-docker2 nvidia-container-runtime -y | ||
sudo systemctl restart docker | ||
``` | ||
|
||
## Manjaro | ||
|
||
### update the drivers | ||
```bash | ||
sudo mhwd -a pci nonfree 0300 | ||
``` | ||
### reboot | ||
```bash | ||
reboot | ||
``` | ||
### docker & container toolkit | ||
```bash | ||
yay -S docker docker-compose buildkit gcc nvidia-docker | ||
sudo usermod -aG docker $USER | ||
newgrp docker | ||
sudo systemctl restart docker # required by nvidia-container-runtime | ||
``` | ||
|
||
## prepare environment & startup | ||
|
||
### place models in models folder | ||
download and place the models inside the models folder. tested with: | ||
|
||
4bit | ||
https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483891617 | ||
https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1483941105 | ||
|
||
8bit: | ||
https://github.com/oobabooga/text-generation-webui/pull/530#issuecomment-1484235789 | ||
|
||
### prepare .env file | ||
edit .env values to your needs | ||
```bash | ||
cp .env.example .env | ||
nano .env | ||
``` | ||
|
||
### startup docker container | ||
```bash | ||
docker-compose up --build | ||
``` | ||
|
||
|
||
# Windows | ||
coming soon |