Merge pull request #2 from mobiusml/docker_config

Add docker config
mobiusml · Aug 19, 2024 · f658b25 · f658b25
2 parents bbe52d2 + f95bfd1
commit f658b25
Show file tree

Hide file tree

Showing 5 changed files with 8,134 additions and 3 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,44 @@
+# Use NVIDIA CUDA as base image
+FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
+
+# Build args
+ARG INSTALL_FLASH_ATTENTION=false
+
+# Set working directory
+WORKDIR /app
+
+# Set environment variables to non-interactive (this prevents some prompts)
+ENV DEBIAN_FRONTEND=non-interactive
+
+# Install required libraries, tools, and Python3
+RUN apt-get update && apt-get install -y ffmpeg curl git python3.10 python3-pip
+
+# Install poetry
+RUN curl -sSL https://install.python-poetry.org | python3 -
+
+# Update PATH
+RUN echo 'export PATH="/root/.local/bin:$PATH"' >> /root/.bashrc
+ENV PATH="/root/.local/bin:$PATH"
+
+# Copy project files into the container
+COPY . /app
+
+# Install the package with poetry
+RUN poetry install
+
+# Install flash attention
+RUN poetry run pip install torch --index-url https://download.pytorch.org/whl/cu121
+RUN if [[ "$INSTALL_FLASH_ATTENTION" = "true" ]] ; then \
+      poetry run pip install flash-attn --no-build-isolation; \
+    else \
+      echo Skip flash_atten installation ; \
+    fi
+
+# Disable buffering for stdout and stderr to get the logs in real time
+ENV PYTHONUNBUFFERED=1
+
+# Expose the desired port
+EXPOSE 8000
+
+# Run the app
+CMD ["poetry", "run", "aana", "deploy", "aana_chat_with_video.app:aana_app", "--host", "0.0.0.0"]
diff --git a/README.md b/README.md
@@ -44,7 +44,7 @@ Once the application is running, you will see the message `Deployed successfully
 
 > **⚠️ Warning**
 >
-> The applications require 1 largs GPUs to run. GPU should have at least 48GB of memory.
+> The applications require 1 large GPUs to run. GPU should have at least 48GB of memory.
 >
 > The applications will detect the available GPU automatically but you need to make sure that `CUDA_VISIBLE_DEVICES` is set correctly.
 > 
@@ -54,4 +54,46 @@ Once the application is running, you will see the message `Deployed successfully
 
 2. Send a POST request to the app.
 
-See [Chat with Video Demo notebook](notebooks/chat_with_video_demo.ipynb) for more information.
+See [Chat with Video Demo notebook](notebooks/chat_with_video_demo.ipynb) for more information.
+
+## Running with Docker
+
+We provide a docker-compose configuration to run the application in a Docker container.
+
+Requirements:
+
+- Docker Engine >= 26.1.0
+- Docker Compose >= 1.29.2
+- NVIDIA Driver >= 525.60.13
+
+To run the application, simply run the following command:
+
+```bash
+docker-compose up
+```
+
+The application will be accessible at `http://localhost:8000` on the host server.
+
+
+> **⚠️ Warning**
+>
+> The applications require 1 GPUs to run.
+>
+> The applications will detect the available GPU automatically but you need to make sure that `CUDA_VISIBLE_DEVICES` is set correctly.
+> 
+> Sometimes `CUDA_VISIBLE_DEVICES` is set to an empty string and the application will not be able to detect the GPU. Use `unset CUDA_VISIBLE_DEVICES` to unset the variable.
+> 
+> You can also set the `CUDA_VISIBLE_DEVICES` environment variable to the GPU index you want to use: `CUDA_VISIBLE_DEVICES=0 docker-compose up`.
+
+
+> **💡Tip**
+>
+> Some models use Flash Attention for better performance. You can set the build argument `INSTALL_FLASH_ATTENTION` to `true` to install Flash Attention. 
+>
+> ```bash
+> INSTALL_FLASH_ATTENTION=true docker-compose build
+> ```
+>
+> After building the image, you can use `docker-compose up` command to run the application.
+>
+> You can also set the `INSTALL_FLASH_ATTENTION` environment variable to `true` in the `docker-compose.yaml` file.
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -0,0 +1,68 @@
+version: '3.8'
+
+services:
+  postgres:
+    restart: always
+    container_name: aana_chat_with_video_db
+    image: postgres
+    command: postgres -c 'max_connections=1000'
+    healthcheck:
+      test: /usr/bin/pg_isready
+      timeout: 45s
+      interval: 10s
+      retries: 10
+    ports:
+      - '15430:15430'
+    expose:
+      - 15430
+    environment:
+      PGPASSWORD: '${POSTGRES_PASSWORD:-Yf?5nX39}'
+      PGUSER: '${POSTGRES_USER:-aana_db_user}'
+      PGDATABASE: '${POSTGRES_DB:-aana_db}'
+      POSTGRES_PASSWORD: '${POSTGRES_PASSWORD:-Yf?5nX39}'
+      POSTGRES_USER: '${POSTGRES_USER:-aana_db_user}'
+      POSTGRES_DB: '${POSTGRES_DB:-aana_db}'
+      PGPORT: '15430'
+      PGDATA: '/pgdata'
+    volumes:
+      - pg_data:/pgdata
+
+  aana_chat_with_video_app:
+    restart: always
+    container_name: aana_chat_with_video_app
+    depends_on:
+      postgres:
+        condition: service_healthy
+    ports:
+      - 8000:8000   # request server
+    expose:
+      - '8000'
+    build:
+      context: .
+      dockerfile: Dockerfile
+      args:
+        INSTALL_FLASH_ATTENTION: '${INSTALL_FLASH_ATTENTION:-false}'
+    deploy:
+      resources:
+        reservations:
+          devices:
+              - capabilities: ["gpu"]
+    environment:
+      CUDA_VISIBLE_DEVICES:
+      HF_HUB_ENABLE_HF_TRANSFER: '${HF_HUB_ENABLE_HF_TRANSFER:-1}'
+      HF_TOKEN: '${HF_TOKEN}'
+      HF_DATASETS_CACHE: /root/.cache/huggingface
+      NUM_WORKERS: '${NUM_WORKERS:-2}'
+      TMP_DATA_DIR: /tmp/aana_data
+      DB_CONFIG: '{"datastore_type":"postgresql","datastore_config":{"host":"postgres","port":"15430","user":"${POSTGRES_USER:-aana_db_user}","password":"${POSTGRES_PASSWORD:-Yf?5nX39}","database":"${POSTGRES_DB:-aana_db}"}}'
+    volumes:
+      - app_data:/tmp/aana_data
+      - hf_datasets_cache:/root/.cache/huggingface
+
+volumes:
+  pg_data:
+    name: aana_chat_with_video_postgres_data
+  app_data:
+    name: aana_chat_with_video_app_data
+  hf_datasets_cache:
+    name: hf_datasets_cache