diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 979e8466bc..374f2d0e28 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -29,7 +29,7 @@
 exclude: build|stubs|^bot/templates/$|openassistant/templates|docs/docs/api/openapi.json|scripts/postprocessing/regex_pii_detector.py
 
 default_language_version:
-  python: python3
+  python: python3.10
 
 ci:
   autofix_prs: true
diff --git a/.vscode/launch.json b/.vscode/launch.json
index aae38fbe91..1a9997def9 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -106,6 +106,38 @@
         "CUDA_VISIBLE_DEVICES": "1,2,3,4,5",
         "OMP_NUM_THREADS": "1"
       }
+    },
+    {
+      "name": "Debug: Inference Server",
+      "type": "python",
+      "request": "attach",
+      "connect": {
+        "host": "localhost",
+        "port": 5678
+      },
+      "pathMappings": [
+        {
+          "localRoot": "${workspaceFolder}/inference/server",
+          "remoteRoot": "/opt/inference/server"
+        }
+      ],
+      "justMyCode": false
+    },
+    {
+      "name": "Debug: Worker",
+      "type": "python",
+      "request": "attach",
+      "connect": {
+        "host": "localhost",
+        "port": 5679
+      },
+      "pathMappings": [
+        {
+          "localRoot": "${workspaceFolder}/inference/worker",
+          "remoteRoot": "/opt/inference/worker"
+        }
+      ],
+      "justMyCode": false
     }
   ]
 }
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 6497af6a1b..af7709ff3f 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -231,12 +231,14 @@ services:
       TRUSTED_CLIENT_KEYS: "6969"
       ALLOW_DEBUG_AUTH: "True"
       API_ROOT: "http://localhost:8000"
+      DEBUG: "True"
     volumes:
       - "./oasst-shared:/opt/inference/lib/oasst-shared"
       - "./inference/server:/opt/inference/server"
     restart: unless-stopped
     ports:
       - "8000:8000"
+      - "5678:5678" # Port to attach debugger
     depends_on:
       inference-redis:
         condition: service_healthy
@@ -254,9 +256,12 @@ services:
       MODEL_CONFIG_NAME: ${MODEL_CONFIG_NAME:-distilgpt2}
       BACKEND_URL: "ws://inference-server:8000"
       PARALLELISM: 2
+      DEBUG: "True"
     volumes:
       - "./oasst-shared:/opt/inference/lib/oasst-shared"
       - "./inference/worker:/opt/inference/worker"
+    ports:
+      - "5679:5679" # Port to attach debugger
     deploy:
       replicas: 1
     profiles: ["inference"]
diff --git a/docker/inference/Dockerfile.server b/docker/inference/Dockerfile.server
index 17d69173d3..a1efbe4dad 100644
--- a/docker/inference/Dockerfile.server
+++ b/docker/inference/Dockerfile.server
@@ -78,8 +78,8 @@ USER ${APP_USER}
 VOLUME [ "${APP_BASE}/lib/oasst-shared" ]
 VOLUME [ "${APP_BASE}/lib/oasst-data" ]
 
-
-CMD uvicorn main:app --reload --host 0.0.0.0 --port "${PORT}"
+# In the dev image, we start uvicorn from Python so that we can attach the debugger
+CMD python main.py
 
 
 
diff --git a/inference/README.md b/inference/README.md
index 3f9e339ed2..9604b309e0 100644
--- a/inference/README.md
+++ b/inference/README.md
@@ -60,6 +60,26 @@ python __main__.py
 # You'll soon see a `User:` prompt, where you can type your prompts.
 ```
 
+## Debugging
+
+The inference server and the worker allow attaching a Python debugger. To do
+this from VS Code, start the inference server & worker using docker compose as
+described above (e.g. with `docker compose --profile inference up --build`),
+then simply pick one of the following launch profiles, depending on what you
+would like to debug:
+
+- Debug: Inference Server
+- Debug: Worker
+
+### Waiting for Debugger on Startup
+
+It can be helpful to wait for the debugger before starting the application. This
+can be achieved by uncommenting `debugpy.wait_for_client()` in the appropriate
+location:
+
+- `inference/server/main.py` for the inference server
+- `inference/worker/__main.py__` for the worker
+
 ## Distributed Testing
 
 We run distributed load tests using the
diff --git a/inference/server/main.py b/inference/server/main.py
index 711d75ee9c..4b39758e37 100644
--- a/inference/server/main.py
+++ b/inference/server/main.py
@@ -148,3 +148,21 @@ async def maybe_add_debug_api_keys():
 async def welcome_message():
     logger.warning("Inference server started")
     logger.warning("To stop the server, press Ctrl+C")
+
+
+if __name__ == "__main__":
+    import os
+
+    import uvicorn
+
+    port = int(os.getenv("PORT", "8000"))
+    is_debug = bool(os.getenv("DEBUG", "False"))
+
+    if is_debug:
+        import debugpy
+
+        debugpy.listen(("0.0.0.0", 5678))
+        # Uncomment to wait here until a debugger is attached
+        # debugpy.wait_for_client()
+
+    uvicorn.run("main:app", host="0.0.0.0", port=port, reload=is_debug)
diff --git a/inference/server/requirements.txt b/inference/server/requirements.txt
index db5045481b..d97a2c260f 100644
--- a/inference/server/requirements.txt
+++ b/inference/server/requirements.txt
@@ -4,6 +4,7 @@ asyncpg
 authlib
 beautifulsoup4 # web_retriever plugin
 cryptography==39.0.0
+debugpy
 fastapi-limiter
 fastapi[all]==0.88.0
 google-api-python-client
diff --git a/inference/worker/__main__.py b/inference/worker/__main__.py
index 569e340276..baa7f7d2cf 100644
--- a/inference/worker/__main__.py
+++ b/inference/worker/__main__.py
@@ -1,4 +1,5 @@
 import concurrent.futures
+import os
 import signal
 import sys
 import time
@@ -130,4 +131,13 @@ def main():
 
 
 if __name__ == "__main__":
+    is_debug = bool(os.getenv("DEBUG", "False"))
+
+    if is_debug:
+        import debugpy
+
+        debugpy.listen(("0.0.0.0", 5679))
+        # Uncomment to wait here until a debugger is attached
+        # debugpy.wait_for_client()
+
     main()
diff --git a/inference/worker/requirements.txt b/inference/worker/requirements.txt
index cbb25fe322..94a7dc18e1 100644
--- a/inference/worker/requirements.txt
+++ b/inference/worker/requirements.txt
@@ -1,4 +1,5 @@
 aiohttp
+debugpy
 hf_transfer
 huggingface_hub
 langchain==0.0.142