Skip to content

Commit

Permalink
Merge pull request #2 from mobiusml/docker_config
Browse files Browse the repository at this point in the history
Add docker config
  • Loading branch information
HRashidi authored Aug 19, 2024
2 parents bbe52d2 + f95bfd1 commit f658b25
Show file tree
Hide file tree
Showing 5 changed files with 8,134 additions and 3 deletions.
44 changes: 44 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Use NVIDIA CUDA as base image
FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04

# Build args
ARG INSTALL_FLASH_ATTENTION=false

# Set working directory
WORKDIR /app

# Set environment variables to non-interactive (this prevents some prompts)
ENV DEBIAN_FRONTEND=non-interactive

# Install required libraries, tools, and Python3
RUN apt-get update && apt-get install -y ffmpeg curl git python3.10 python3-pip

# Install poetry
RUN curl -sSL https://install.python-poetry.org | python3 -

# Update PATH
RUN echo 'export PATH="/root/.local/bin:$PATH"' >> /root/.bashrc
ENV PATH="/root/.local/bin:$PATH"

# Copy project files into the container
COPY . /app

# Install the package with poetry
RUN poetry install

# Install flash attention
RUN poetry run pip install torch --index-url https://download.pytorch.org/whl/cu121
RUN if [[ "$INSTALL_FLASH_ATTENTION" = "true" ]] ; then \
poetry run pip install flash-attn --no-build-isolation; \
else \
echo Skip flash_atten installation ; \
fi

# Disable buffering for stdout and stderr to get the logs in real time
ENV PYTHONUNBUFFERED=1

# Expose the desired port
EXPOSE 8000

# Run the app
CMD ["poetry", "run", "aana", "deploy", "aana_chat_with_video.app:aana_app", "--host", "0.0.0.0"]
46 changes: 44 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ Once the application is running, you will see the message `Deployed successfully

> **⚠️ Warning**
>
> The applications require 1 largs GPUs to run. GPU should have at least 48GB of memory.
> The applications require 1 large GPUs to run. GPU should have at least 48GB of memory.
>
> The applications will detect the available GPU automatically but you need to make sure that `CUDA_VISIBLE_DEVICES` is set correctly.
>
Expand All @@ -54,4 +54,46 @@ Once the application is running, you will see the message `Deployed successfully
2. Send a POST request to the app.

See [Chat with Video Demo notebook](notebooks/chat_with_video_demo.ipynb) for more information.
See [Chat with Video Demo notebook](notebooks/chat_with_video_demo.ipynb) for more information.

## Running with Docker

We provide a docker-compose configuration to run the application in a Docker container.

Requirements:

- Docker Engine >= 26.1.0
- Docker Compose >= 1.29.2
- NVIDIA Driver >= 525.60.13

To run the application, simply run the following command:

```bash
docker-compose up
```

The application will be accessible at `http://localhost:8000` on the host server.


> **⚠️ Warning**
>
> The applications require 1 GPUs to run.
>
> The applications will detect the available GPU automatically but you need to make sure that `CUDA_VISIBLE_DEVICES` is set correctly.
>
> Sometimes `CUDA_VISIBLE_DEVICES` is set to an empty string and the application will not be able to detect the GPU. Use `unset CUDA_VISIBLE_DEVICES` to unset the variable.
>
> You can also set the `CUDA_VISIBLE_DEVICES` environment variable to the GPU index you want to use: `CUDA_VISIBLE_DEVICES=0 docker-compose up`.

> **💡Tip**
>
> Some models use Flash Attention for better performance. You can set the build argument `INSTALL_FLASH_ATTENTION` to `true` to install Flash Attention.
>
> ```bash
> INSTALL_FLASH_ATTENTION=true docker-compose build
> ```
>
> After building the image, you can use `docker-compose up` command to run the application.
>
> You can also set the `INSTALL_FLASH_ATTENTION` environment variable to `true` in the `docker-compose.yaml` file.
68 changes: 68 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
version: '3.8'

services:
postgres:
restart: always
container_name: aana_chat_with_video_db
image: postgres
command: postgres -c 'max_connections=1000'
healthcheck:
test: /usr/bin/pg_isready
timeout: 45s
interval: 10s
retries: 10
ports:
- '15430:15430'
expose:
- 15430
environment:
PGPASSWORD: '${POSTGRES_PASSWORD:-Yf?5nX39}'
PGUSER: '${POSTGRES_USER:-aana_db_user}'
PGDATABASE: '${POSTGRES_DB:-aana_db}'
POSTGRES_PASSWORD: '${POSTGRES_PASSWORD:-Yf?5nX39}'
POSTGRES_USER: '${POSTGRES_USER:-aana_db_user}'
POSTGRES_DB: '${POSTGRES_DB:-aana_db}'
PGPORT: '15430'
PGDATA: '/pgdata'
volumes:
- pg_data:/pgdata

aana_chat_with_video_app:
restart: always
container_name: aana_chat_with_video_app
depends_on:
postgres:
condition: service_healthy
ports:
- 8000:8000 # request server
expose:
- '8000'
build:
context: .
dockerfile: Dockerfile
args:
INSTALL_FLASH_ATTENTION: '${INSTALL_FLASH_ATTENTION:-false}'
deploy:
resources:
reservations:
devices:
- capabilities: ["gpu"]
environment:
CUDA_VISIBLE_DEVICES:
HF_HUB_ENABLE_HF_TRANSFER: '${HF_HUB_ENABLE_HF_TRANSFER:-1}'
HF_TOKEN: '${HF_TOKEN}'
HF_DATASETS_CACHE: /root/.cache/huggingface
NUM_WORKERS: '${NUM_WORKERS:-2}'
TMP_DATA_DIR: /tmp/aana_data
DB_CONFIG: '{"datastore_type":"postgresql","datastore_config":{"host":"postgres","port":"15430","user":"${POSTGRES_USER:-aana_db_user}","password":"${POSTGRES_PASSWORD:-Yf?5nX39}","database":"${POSTGRES_DB:-aana_db}"}}'
volumes:
- app_data:/tmp/aana_data
- hf_datasets_cache:/root/.cache/huggingface

volumes:
pg_data:
name: aana_chat_with_video_postgres_data
app_data:
name: aana_chat_with_video_app_data
hf_datasets_cache:
name: hf_datasets_cache
Loading

0 comments on commit f658b25

Please sign in to comment.