diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 979e8466bc..374f2d0e28 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,7 +29,7 @@ exclude: build|stubs|^bot/templates/$|openassistant/templates|docs/docs/api/openapi.json|scripts/postprocessing/regex_pii_detector.py default_language_version: - python: python3 + python: python3.10 ci: autofix_prs: true diff --git a/.vscode/launch.json b/.vscode/launch.json index aae38fbe91..1a9997def9 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -106,6 +106,38 @@ "CUDA_VISIBLE_DEVICES": "1,2,3,4,5", "OMP_NUM_THREADS": "1" } + }, + { + "name": "Debug: Inference Server", + "type": "python", + "request": "attach", + "connect": { + "host": "localhost", + "port": 5678 + }, + "pathMappings": [ + { + "localRoot": "${workspaceFolder}/inference/server", + "remoteRoot": "/opt/inference/server" + } + ], + "justMyCode": false + }, + { + "name": "Debug: Worker", + "type": "python", + "request": "attach", + "connect": { + "host": "localhost", + "port": 5679 + }, + "pathMappings": [ + { + "localRoot": "${workspaceFolder}/inference/worker", + "remoteRoot": "/opt/inference/worker" + } + ], + "justMyCode": false } ] } diff --git a/docker-compose.yaml b/docker-compose.yaml index 6497af6a1b..af7709ff3f 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -231,12 +231,14 @@ services: TRUSTED_CLIENT_KEYS: "6969" ALLOW_DEBUG_AUTH: "True" API_ROOT: "http://localhost:8000" + DEBUG: "True" volumes: - "./oasst-shared:/opt/inference/lib/oasst-shared" - "./inference/server:/opt/inference/server" restart: unless-stopped ports: - "8000:8000" + - "5678:5678" # Port to attach debugger depends_on: inference-redis: condition: service_healthy @@ -254,9 +256,12 @@ services: MODEL_CONFIG_NAME: ${MODEL_CONFIG_NAME:-distilgpt2} BACKEND_URL: "ws://inference-server:8000" PARALLELISM: 2 + DEBUG: "True" volumes: - "./oasst-shared:/opt/inference/lib/oasst-shared" - "./inference/worker:/opt/inference/worker" + ports: + - "5679:5679" # Port to attach debugger deploy: replicas: 1 profiles: ["inference"] diff --git a/docker/inference/Dockerfile.server b/docker/inference/Dockerfile.server index 17d69173d3..a1efbe4dad 100644 --- a/docker/inference/Dockerfile.server +++ b/docker/inference/Dockerfile.server @@ -78,8 +78,8 @@ USER ${APP_USER} VOLUME [ "${APP_BASE}/lib/oasst-shared" ] VOLUME [ "${APP_BASE}/lib/oasst-data" ] - -CMD uvicorn main:app --reload --host 0.0.0.0 --port "${PORT}" +# In the dev image, we start uvicorn from Python so that we can attach the debugger +CMD python main.py diff --git a/inference/README.md b/inference/README.md index 3f9e339ed2..9604b309e0 100644 --- a/inference/README.md +++ b/inference/README.md @@ -60,6 +60,26 @@ python __main__.py # You'll soon see a `User:` prompt, where you can type your prompts. ``` +## Debugging + +The inference server and the worker allow attaching a Python debugger. To do +this from VS Code, start the inference server & worker using docker compose as +described above (e.g. with `docker compose --profile inference up --build`), +then simply pick one of the following launch profiles, depending on what you +would like to debug: + +- Debug: Inference Server +- Debug: Worker + +### Waiting for Debugger on Startup + +It can be helpful to wait for the debugger before starting the application. This +can be achieved by uncommenting `debugpy.wait_for_client()` in the appropriate +location: + +- `inference/server/main.py` for the inference server +- `inference/worker/__main.py__` for the worker + ## Distributed Testing We run distributed load tests using the diff --git a/inference/server/main.py b/inference/server/main.py index 711d75ee9c..4b39758e37 100644 --- a/inference/server/main.py +++ b/inference/server/main.py @@ -148,3 +148,21 @@ async def maybe_add_debug_api_keys(): async def welcome_message(): logger.warning("Inference server started") logger.warning("To stop the server, press Ctrl+C") + + +if __name__ == "__main__": + import os + + import uvicorn + + port = int(os.getenv("PORT", "8000")) + is_debug = bool(os.getenv("DEBUG", "False")) + + if is_debug: + import debugpy + + debugpy.listen(("0.0.0.0", 5678)) + # Uncomment to wait here until a debugger is attached + # debugpy.wait_for_client() + + uvicorn.run("main:app", host="0.0.0.0", port=port, reload=is_debug) diff --git a/inference/server/requirements.txt b/inference/server/requirements.txt index db5045481b..d97a2c260f 100644 --- a/inference/server/requirements.txt +++ b/inference/server/requirements.txt @@ -4,6 +4,7 @@ asyncpg authlib beautifulsoup4 # web_retriever plugin cryptography==39.0.0 +debugpy fastapi-limiter fastapi[all]==0.88.0 google-api-python-client diff --git a/inference/worker/__main__.py b/inference/worker/__main__.py index 569e340276..baa7f7d2cf 100644 --- a/inference/worker/__main__.py +++ b/inference/worker/__main__.py @@ -1,4 +1,5 @@ import concurrent.futures +import os import signal import sys import time @@ -130,4 +131,13 @@ def main(): if __name__ == "__main__": + is_debug = bool(os.getenv("DEBUG", "False")) + + if is_debug: + import debugpy + + debugpy.listen(("0.0.0.0", 5679)) + # Uncomment to wait here until a debugger is attached + # debugpy.wait_for_client() + main() diff --git a/inference/worker/requirements.txt b/inference/worker/requirements.txt index cbb25fe322..94a7dc18e1 100644 --- a/inference/worker/requirements.txt +++ b/inference/worker/requirements.txt @@ -1,4 +1,5 @@ aiohttp +debugpy hf_transfer huggingface_hub langchain==0.0.142