From a352aaacfe7f9ee0efe5449351f939c917f8e0ed Mon Sep 17 00:00:00 2001 From: bgoelTT Date: Sun, 19 Jan 2025 19:47:42 -0500 Subject: [PATCH 01/15] Add initial docker setup --- tt-metal-stable-diffusion-1.4/.env.default | 6 + tt-metal-stable-diffusion-1.4/README.md | 51 ++++++ .../docker-compose.yaml | 27 +++ .../requirements-test.txt | 3 + .../requirements.txt | 5 + .../server/flaskserver.py | 158 ++++++++++++++++++ .../server/gunicorn.conf.py | 15 ++ .../stable-diffusion-1.4.src.Dockerfile | 97 +++++++++++ .../tests/locust_config.conf | 6 + .../tests/locustfile.py | 17 ++ .../tests/test_inference_api.py | 47 ++++++ tt-metal-stable-diffusion-1.4/tests/utils.py | 34 ++++ 12 files changed, 466 insertions(+) create mode 100644 tt-metal-stable-diffusion-1.4/.env.default create mode 100644 tt-metal-stable-diffusion-1.4/README.md create mode 100644 tt-metal-stable-diffusion-1.4/docker-compose.yaml create mode 100644 tt-metal-stable-diffusion-1.4/requirements-test.txt create mode 100644 tt-metal-stable-diffusion-1.4/requirements.txt create mode 100644 tt-metal-stable-diffusion-1.4/server/flaskserver.py create mode 100644 tt-metal-stable-diffusion-1.4/server/gunicorn.conf.py create mode 100644 tt-metal-stable-diffusion-1.4/stable-diffusion-1.4.src.Dockerfile create mode 100644 tt-metal-stable-diffusion-1.4/tests/locust_config.conf create mode 100644 tt-metal-stable-diffusion-1.4/tests/locustfile.py create mode 100644 tt-metal-stable-diffusion-1.4/tests/test_inference_api.py create mode 100644 tt-metal-stable-diffusion-1.4/tests/utils.py diff --git a/tt-metal-stable-diffusion-1.4/.env.default b/tt-metal-stable-diffusion-1.4/.env.default new file mode 100644 index 0000000..8d9ab88 --- /dev/null +++ b/tt-metal-stable-diffusion-1.4/.env.default @@ -0,0 +1,6 @@ +TT_METAL_DOCKERFILE_VERSION=v0.53.0-rc51 +TT_METAL_COMMIT_SHA_OR_TAG=4da4a5e79a13ece7ff5096c30cef79cb0c504f0e +TT_METAL_COMMIT_DOCKER_TAG=4da4a5e79a13 # 12-character version of TT_METAL_COMMIT_SHA_OR_TAG +IMAGE_VERSION=v0.0.1 +# These are secrets and must be stored securely for production environments +JWT_SECRET=testing diff --git a/tt-metal-stable-diffusion-1.4/README.md b/tt-metal-stable-diffusion-1.4/README.md new file mode 100644 index 0000000..4ea7045 --- /dev/null +++ b/tt-metal-stable-diffusion-1.4/README.md @@ -0,0 +1,51 @@ +# TT Metalium YOLOv4 Inference API + +This implementation supports YOLOv4 execution on Grayskull and Worhmole. + + +## Table of Contents +- [Run server](#run-server) +- [JWT_TOKEN Authorization](#jwt_token-authorization) +- [Development](#development) +- [Tests](#tests) + + +## Run server +To run the YOLOv4 inference server, run the following command from the project root at `tt-inference-server`: +```bash +cd tt-inference-server +docker compose --env-file tt-metal-yolov4/.env.default -f tt-metal-yolov4/docker-compose.yaml up --build +``` + +This will start the default Docker container with the entrypoint command set to `server/run_uvicorn.sh`. The next section describes how to override the container's default command with an interractive shell via `bash`. + + +### JWT_TOKEN Authorization + +To authenticate requests use the header `Authorization`. The JWT token can be computed using the script `jwt_util.py`. This is an example: +```bash +export JWT_SECRET= +export AUTHORIZATION="Bearer $(python scripts/jwt_util.py --secret ${JWT_SECRET?ERROR env var JWT_SECRET must be set} encode '{"team_id": "tenstorrent", "token_id":"debug-test"}')" +``` + + +## Development +Inside the container you can then start the server with: +```bash +docker compose --env-file tt-metal-yolov4/.env.default -f tt-metal-yolov4/docker-compose.yaml run --rm --build inference_server /bin/bash +``` + +Inside the container, run `cd ~/app/server` to navigate to the server implementation. + + +## Tests +Tests can be found in `tests/`. The tests have their own dependencies found in `requirements-test.txt`. + +To load test the server, we use `locust` to simulate a single client sending an infinite-FPS video stream to the server for 1 minute. +This yields a server performance ceiling of ~25FPS. First, ensure the server is running (see [how to run the server](#run-server)). Then in a different shell with the base dev `venv` activated: +```bash +cd tt-metal-yolov4 +pip install -r requirements-test.txt +cd tests/ +locust --config locust_config.conf +``` diff --git a/tt-metal-stable-diffusion-1.4/docker-compose.yaml b/tt-metal-stable-diffusion-1.4/docker-compose.yaml new file mode 100644 index 0000000..dd5aaae --- /dev/null +++ b/tt-metal-stable-diffusion-1.4/docker-compose.yaml @@ -0,0 +1,27 @@ +services: + inference_server: + image: ghcr.io/tenstorrent/tt-inference-server/tt-metal-stable-diffusion-1.4-src-base:${IMAGE_VERSION}-tt-metal-${TT_METAL_COMMIT_DOCKER_TAG} + build: + context: . + dockerfile: stable-diffusion-1.4.src.Dockerfile + args: + TT_METAL_DOCKERFILE_VERSION: ${TT_METAL_DOCKERFILE_VERSION} + TT_METAL_COMMIT_SHA_OR_TAG: ${TT_METAL_COMMIT_SHA_OR_TAG} + container_name: inference_server + ports: + - "7000:7000" + devices: + - "/dev/tenstorrent:/dev/tenstorrent" + volumes: + - "/dev/hugepages-1G/:/dev/hugepages-1G:rw" + shm_size: "32G" + cap_add: + - ALL + stdin_open: true + tty: true + # this is redundant as docker compose automatically uses the .env file as its in the same directory + # but this explicitly demonstrates its usage + env_file: + - .env.default + restart: no + command: /bin/bash diff --git a/tt-metal-stable-diffusion-1.4/requirements-test.txt b/tt-metal-stable-diffusion-1.4/requirements-test.txt new file mode 100644 index 0000000..4db54de --- /dev/null +++ b/tt-metal-stable-diffusion-1.4/requirements-test.txt @@ -0,0 +1,3 @@ +pillow==10.3.0 +locust==2.25.0 +pytest==7.2.2 diff --git a/tt-metal-stable-diffusion-1.4/requirements.txt b/tt-metal-stable-diffusion-1.4/requirements.txt new file mode 100644 index 0000000..804ed9f --- /dev/null +++ b/tt-metal-stable-diffusion-1.4/requirements.txt @@ -0,0 +1,5 @@ +# inference server requirements +flask==3.0.2 +gunicorn==21.2.0 +requests==2.31.0 +pyjwt==2.7.0 diff --git a/tt-metal-stable-diffusion-1.4/server/flaskserver.py b/tt-metal-stable-diffusion-1.4/server/flaskserver.py new file mode 100644 index 0000000..3684d2f --- /dev/null +++ b/tt-metal-stable-diffusion-1.4/server/flaskserver.py @@ -0,0 +1,158 @@ +# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + +from flask import ( + Flask, + request, + jsonify, + send_from_directory, +) +import json +import os +import atexit + +app = Flask(__name__) + + +@app.route("/") +def hello_world(): + return "Hello, World!" + + +@app.route("/submit", methods=["POST"]) +def submit(): + data = request.get_json() + prompt = data.get("prompt") + print(prompt) + + json_file_path = ( + "models/demos/wormhole/stable_diffusion/demo/web_demo/input_prompts.json" + ) + + if not os.path.isfile(json_file_path): + with open(json_file_path, "w") as f: + json.dump({"prompts": []}, f) + + with open(json_file_path, "r") as f: + prompts_data = json.load(f) + + prompts_data["prompts"].append({"prompt": prompt, "status": "not generated"}) + + with open(json_file_path, "w") as f: + json.dump(prompts_data, f, indent=4) + + return jsonify({"message": "Prompt received and added to queue."}) + + +@app.route("/update_status", methods=["POST"]) +def update_status(): + data = request.get_json() + prompt = data.get("prompt") + + json_file_path = ( + "models/demos/wormhole/stable_diffusion/demo/web_demo/input_prompts.json" + ) + + with open(json_file_path, "r") as f: + prompts_data = json.load(f) + + for p in prompts_data["prompts"]: + if p["prompt"] == prompt: + p["status"] = "generated" + break + + with open(json_file_path, "w") as f: + json.dump(prompts_data, f, indent=4) + + return jsonify({"message": "Prompt status updated to generated."}) + + +@app.route("/get_image", methods=["GET"]) +def get_image(): + image_name = "interactive_512x512_ttnn.png" + directory = os.getcwd() # Get the current working directory + return send_from_directory(directory, image_name) + + +@app.route("/image_exists", methods=["GET"]) +def image_exists(): + image_path = "interactive_512x512_ttnn.png" + if os.path.isfile(image_path): + return jsonify({"exists": True}), 200 + else: + return jsonify({"exists": False}), 200 + + +@app.route("/clean_up", methods=["POST"]) +def clean_up(): + json_file_path = ( + "models/demos/wormhole/stable_diffusion/demo/web_demo/input_prompts.json" + ) + + with open(json_file_path, "r") as f: + prompts_data = json.load(f) + + prompts_data["prompts"] = [ + p for p in prompts_data["prompts"] if p["status"] != "done" + ] + + with open(json_file_path, "w") as f: + json.dump(prompts_data, f, indent=4) + + return jsonify({"message": "Cleaned up done prompts."}) + + +@app.route("/get_latest_time", methods=["GET"]) +def get_latest_time(): + json_file_path = ( + "models/demos/wormhole/stable_diffusion/demo/web_demo/input_prompts.json" + ) + + if not os.path.isfile(json_file_path): + return jsonify({"message": "No prompts found"}), 404 + + with open(json_file_path, "r") as f: + prompts_data = json.load(f) + + # Filter prompts that have a total_acc time available + completed_prompts = [p for p in prompts_data["prompts"] if "total_acc" in p] + + if not completed_prompts: + return jsonify({"message": "No completed prompts with time available"}), 404 + + # Get the latest prompt with total_acc + latest_prompt = completed_prompts[-1] # Assuming prompts are in chronological order + + return ( + jsonify( + { + "prompt": latest_prompt["prompt"], + "total_acc": latest_prompt["total_acc"], + "batch_size": latest_prompt["batch_size"], + "steps": latest_prompt["steps"], + } + ), + 200, + ) + + +def cleanup(): + if os.path.isfile( + "models/demos/wormhole/stable_diffusion/demo/web_demo/input_prompts.json" + ): + os.remove( + "models/demos/wormhole/stable_diffusion/demo/web_demo/input_prompts.json" + ) + print("Deleted json") + + if os.path.isfile("interactive_512x512_ttnn.png"): + os.remove("interactive_512x512_ttnn.png") + print("Deleted image") + + +atexit.register(cleanup) + + +def create_server(): + return app diff --git a/tt-metal-stable-diffusion-1.4/server/gunicorn.conf.py b/tt-metal-stable-diffusion-1.4/server/gunicorn.conf.py new file mode 100644 index 0000000..1e904fc --- /dev/null +++ b/tt-metal-stable-diffusion-1.4/server/gunicorn.conf.py @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC + + +workers = 1 +# use 0.0.0.0 for externally accessible +bind = f"0.0.0.0:{7000}" +reload = False +worker_class = "gthread" +threads = 16 +timeout = 160 + +# server factory +wsgi_app = "flaskserver:create_server()" diff --git a/tt-metal-stable-diffusion-1.4/stable-diffusion-1.4.src.Dockerfile b/tt-metal-stable-diffusion-1.4/stable-diffusion-1.4.src.Dockerfile new file mode 100644 index 0000000..dc93435 --- /dev/null +++ b/tt-metal-stable-diffusion-1.4/stable-diffusion-1.4.src.Dockerfile @@ -0,0 +1,97 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC + +# default base image, override with --build-arg TT_METAL_DOCKERFILE_VERSION= +ARG TT_METAL_DOCKERFILE_VERSION + +FROM ghcr.io/tenstorrent/tt-metal/tt-metalium/ubuntu-20.04-amd64:$TT_METAL_DOCKERFILE_VERSION-dev + +# Build stage +LABEL maintainer="Ben Goel " +# connect Github repo with package +LABEL org.opencontainers.image.source=https://github.com/tenstorrent/tt-inference-server + +ARG DEBIAN_FRONTEND=noninteractive +# default commit sha, override with --build-arg TT_METAL_COMMIT_SHA_OR_TAG= +ARG TT_METAL_COMMIT_SHA_OR_TAG + +# make build commit SHA available in the image for reference and debugging +ENV TT_METAL_COMMIT_SHA_OR_TAG=${TT_METAL_COMMIT_SHA_OR_TAG} +ENV SHELL=/bin/bash +ENV TZ=America/Los_Angeles +# tt-metal build vars +ENV ARCH_NAME=wormhole_b0 +ENV TT_METAL_HOME=/tt-metal +ENV CONFIG=Release +ENV TT_METAL_ENV=dev +ENV LOGURU_LEVEL=INFO +# derived vars +ENV PYTHONPATH=${TT_METAL_HOME} +# note: PYTHON_ENV_DIR is used by create_venv.sh +ENV PYTHON_ENV_DIR=${TT_METAL_HOME}/python_env +ENV LD_LIBRARY_PATH=${TT_METAL_HOME}/build/lib + +# extra system deps +RUN apt-get update && apt-get install -y \ + libsndfile1 \ + wget \ + nano \ + acl \ + jq \ + vim \ + # user deps + htop \ + screen \ + tmux \ + unzip \ + zip \ + curl \ + iputils-ping \ + rsync \ + && rm -rf /var/lib/apt/lists/* + +# build tt-metal +RUN git clone https://github.com/tenstorrent-metal/tt-metal.git ${TT_METAL_HOME} \ + && cd ${TT_METAL_HOME} \ + && git checkout ${TT_METAL_COMMIT_SHA_OR_TAG} \ + && git submodule update --init --recursive \ + && git submodule foreach 'git lfs fetch --all && git lfs pull' \ + && bash ./build_metal.sh \ + && bash ./create_venv.sh + +# user setup +ARG HOME_DIR=/home/user +RUN useradd -u 1000 -s /bin/bash -d ${HOME_DIR} user \ + && mkdir -p ${HOME_DIR} \ + && chown -R user:user ${HOME_DIR} \ + && chown -R user:user ${TT_METAL_HOME} + +USER user + +# tt-metal python env default +RUN echo "source ${PYTHON_ENV_DIR}/bin/activate" >> ${HOME_DIR}/.bashrc + +# install tt-smi +RUN /bin/bash -c "source ${PYTHON_ENV_DIR}/bin/activate \ + && pip3 install --upgrade pip \ + && pip3 install git+https://github.com/tenstorrent/tt-smi" + +# runtime required for tt-metal on WH +ENV WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml + +WORKDIR ${HOME_DIR} + +# install inference server requirements +ARG APP_DIR="${HOME_DIR}/app" +ENV APP_DIR=${APP_DIR} +WORKDIR ${APP_DIR} +ENV PYTHONPATH=${PYTHONPATH}:${APP_DIR} +COPY --chown=user:user "/server" "${APP_DIR}/server" +COPY --chown=user:user "/requirements.txt" "${APP_DIR}/requirements.txt" +RUN /bin/bash -c "source ${PYTHON_ENV_DIR}/bin/activate \ + && pip install --default-timeout=240 --no-cache-dir -r requirements.txt" + +# spinup inference server +WORKDIR "${TT_METAL_HOME}" +CMD ["/bin/bash", "-c", "source ${PYTHON_ENV_DIR}/bin/activate && source ${APP_DIR}/server/run_uvicorn.sh"] diff --git a/tt-metal-stable-diffusion-1.4/tests/locust_config.conf b/tt-metal-stable-diffusion-1.4/tests/locust_config.conf new file mode 100644 index 0000000..d72876a --- /dev/null +++ b/tt-metal-stable-diffusion-1.4/tests/locust_config.conf @@ -0,0 +1,6 @@ +locustfile = locustfile.py +headless = true +host = http://localhost:7000 +users = 1 +spawn-rate = 1 +run-time = 1m diff --git a/tt-metal-stable-diffusion-1.4/tests/locustfile.py b/tt-metal-stable-diffusion-1.4/tests/locustfile.py new file mode 100644 index 0000000..2b05872 --- /dev/null +++ b/tt-metal-stable-diffusion-1.4/tests/locustfile.py @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC + +from locust import HttpUser, task +from utils import get_auth_header, sample_file + + +# load sample file in memory +file = sample_file() + + +class HelloWorldUser(HttpUser): + @task + def hello_world(self): + headers = get_auth_header() + self.client.post("/objdetection_v2", files=file, headers=headers) diff --git a/tt-metal-stable-diffusion-1.4/tests/test_inference_api.py b/tt-metal-stable-diffusion-1.4/tests/test_inference_api.py new file mode 100644 index 0000000..d38602f --- /dev/null +++ b/tt-metal-stable-diffusion-1.4/tests/test_inference_api.py @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC + +from http import HTTPStatus +import os +import pytest +import requests +from utils import get_auth_header, sample_file + + +DEPLOY_URL = "http://127.0.0.1" +SERVICE_PORT = int(os.getenv("SERVICE_PORT", 7000)) +API_BASE_URL = f"{DEPLOY_URL}:{SERVICE_PORT}" +API_URL = f"{API_BASE_URL}/objdetection_v2" +HEALTH_URL = f"{API_BASE_URL}/health" + + +def test_valid_api_call(): + # get sample image file + file = sample_file() + # make request with auth headers + headers = get_auth_header() + response = requests.post(API_URL, files=file, headers=headers) + # perform status and value checking + assert response.status_code == HTTPStatus.OK + assert isinstance(response.json(), list) + + +def test_invalid_api_call(): + # get sample image file + file = sample_file() + # make request with INVALID auth header + headers = get_auth_header() + headers.update(Authorization="INVALID API KEY") + response = requests.post(API_URL, files=file, headers=headers) + # assert request was unauthorized + assert response.status_code == HTTPStatus.UNAUTHORIZED + + +@pytest.mark.skip( + reason="Not implemented, see https://github.com/tenstorrent/tt-inference-server/issues/63" +) +def test_get_health(): + headers = {} + response = requests.get(HEALTH_URL, headers=headers, timeout=35) + assert response.status_code == 200 diff --git a/tt-metal-stable-diffusion-1.4/tests/utils.py b/tt-metal-stable-diffusion-1.4/tests/utils.py new file mode 100644 index 0000000..e884c07 --- /dev/null +++ b/tt-metal-stable-diffusion-1.4/tests/utils.py @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC + +import io +import os +from PIL import Image +import requests + + +def get_auth_header(): + if authorization_header := os.getenv("AUTHORIZATION", None): + headers = {"Authorization": authorization_header} + return headers + else: + raise RuntimeError("AUTHORIZATION environment variable is undefined.") + + +# save image as JPEG in-memory +def sample_file(): + # load sample image + url = "http://images.cocodataset.org/val2017/000000039769.jpg" + pil_image = Image.open(requests.get(url, stream=True).raw) + pil_image = pil_image.resize((320, 320)) # Resize to target dimensions + # convert to bytes + buf = io.BytesIO() + # format as JPEG + pil_image.save( + buf, + format="JPEG", + ) + byte_im = buf.getvalue() + file = {"file": byte_im} + return file From 900c08b981edf418fa896248ca38f41f993f65e1 Mon Sep 17 00:00:00 2001 From: bgoelTT Date: Sun, 19 Jan 2025 21:18:14 -0500 Subject: [PATCH 02/15] Add sd server as background process in flask server --- tt-metal-stable-diffusion-1.4/.env.default | 2 +- .../docker-compose.yaml | 3 +- .../server/flaskserver.py | 44 +++++++++++++++++++ .../stable-diffusion-1.4.src.Dockerfile | 4 +- 4 files changed, 48 insertions(+), 5 deletions(-) diff --git a/tt-metal-stable-diffusion-1.4/.env.default b/tt-metal-stable-diffusion-1.4/.env.default index 8d9ab88..4a3f359 100644 --- a/tt-metal-stable-diffusion-1.4/.env.default +++ b/tt-metal-stable-diffusion-1.4/.env.default @@ -1,4 +1,4 @@ -TT_METAL_DOCKERFILE_VERSION=v0.53.0-rc51 +TT_METAL_DOCKERFILE_VERSION=v0.53.0-rc34 TT_METAL_COMMIT_SHA_OR_TAG=4da4a5e79a13ece7ff5096c30cef79cb0c504f0e TT_METAL_COMMIT_DOCKER_TAG=4da4a5e79a13 # 12-character version of TT_METAL_COMMIT_SHA_OR_TAG IMAGE_VERSION=v0.0.1 diff --git a/tt-metal-stable-diffusion-1.4/docker-compose.yaml b/tt-metal-stable-diffusion-1.4/docker-compose.yaml index dd5aaae..743133d 100644 --- a/tt-metal-stable-diffusion-1.4/docker-compose.yaml +++ b/tt-metal-stable-diffusion-1.4/docker-compose.yaml @@ -7,7 +7,7 @@ services: args: TT_METAL_DOCKERFILE_VERSION: ${TT_METAL_DOCKERFILE_VERSION} TT_METAL_COMMIT_SHA_OR_TAG: ${TT_METAL_COMMIT_SHA_OR_TAG} - container_name: inference_server + container_name: sd_inference_server ports: - "7000:7000" devices: @@ -24,4 +24,3 @@ services: env_file: - .env.default restart: no - command: /bin/bash diff --git a/tt-metal-stable-diffusion-1.4/server/flaskserver.py b/tt-metal-stable-diffusion-1.4/server/flaskserver.py index 3684d2f..a7a756c 100644 --- a/tt-metal-stable-diffusion-1.4/server/flaskserver.py +++ b/tt-metal-stable-diffusion-1.4/server/flaskserver.py @@ -12,6 +12,44 @@ import os import atexit +import subprocess +import signal +import sys + +# script to run in background +script = "pytest models/demos/wormhole/stable_diffusion/demo/web_demo/sdserver.py" + +# Start script using subprocess +process1 = subprocess.Popen(script, shell=True) + + +# Function to kill process using port 5000 +def kill_port_5000(): + try: + result = subprocess.check_output( + "lsof -i :5000 | grep LISTEN | awk '{print $2}'", shell=True + ) + pid = int(result.strip()) + print(f"Killing process {pid} using port 5000") + os.kill(pid, signal.SIGTERM) + except subprocess.CalledProcessError: + print("No process found using port 5000") + except Exception as e: + print(f"Error occurred: {e}") + + +# Function to terminate both processes and kill port 5000 +def signal_handler(sig, frame): + print("Terminating processes...") + process1.terminate() + kill_port_5000() + print("Processes terminated and port 5000 cleared.") + sys.exit(0) + + +signal.signal(signal.SIGINT, signal_handler) +signal.signal(signal.SIGTERM, signal_handler) + app = Flask(__name__) @@ -150,6 +188,12 @@ def cleanup(): os.remove("interactive_512x512_ttnn.png") print("Deleted image") + print("Running. Press Ctrl+C to stop.") + try: + process1.wait() + except KeyboardInterrupt: + signal_handler(None, None) + atexit.register(cleanup) diff --git a/tt-metal-stable-diffusion-1.4/stable-diffusion-1.4.src.Dockerfile b/tt-metal-stable-diffusion-1.4/stable-diffusion-1.4.src.Dockerfile index dc93435..64fcdf0 100644 --- a/tt-metal-stable-diffusion-1.4/stable-diffusion-1.4.src.Dockerfile +++ b/tt-metal-stable-diffusion-1.4/stable-diffusion-1.4.src.Dockerfile @@ -86,7 +86,7 @@ WORKDIR ${HOME_DIR} ARG APP_DIR="${HOME_DIR}/app" ENV APP_DIR=${APP_DIR} WORKDIR ${APP_DIR} -ENV PYTHONPATH=${PYTHONPATH}:${APP_DIR} +ENV PYTHONPATH=${PYTHONPATH}:${APP_DIR}/server COPY --chown=user:user "/server" "${APP_DIR}/server" COPY --chown=user:user "/requirements.txt" "${APP_DIR}/requirements.txt" RUN /bin/bash -c "source ${PYTHON_ENV_DIR}/bin/activate \ @@ -94,4 +94,4 @@ RUN /bin/bash -c "source ${PYTHON_ENV_DIR}/bin/activate \ # spinup inference server WORKDIR "${TT_METAL_HOME}" -CMD ["/bin/bash", "-c", "source ${PYTHON_ENV_DIR}/bin/activate && source ${APP_DIR}/server/run_uvicorn.sh"] +CMD ["/bin/bash", "-c", "source ${PYTHON_ENV_DIR}/bin/activate && gunicorn --config ${APP_DIR}/server/gunicorn.conf.py"] From 302f29369d981f46d8edaed008b30cfee7cdb47a Mon Sep 17 00:00:00 2001 From: bgoelTT Date: Sun, 19 Jan 2025 21:35:14 -0500 Subject: [PATCH 03/15] Update README and testing --- tt-metal-stable-diffusion-1.4/README.md | 18 ++++++++-------- .../tests/locustfile.py | 5 ++--- .../tests/test_inference_api.py | 20 ++++++++++-------- tt-metal-stable-diffusion-1.4/tests/utils.py | 21 ++----------------- 4 files changed, 24 insertions(+), 40 deletions(-) diff --git a/tt-metal-stable-diffusion-1.4/README.md b/tt-metal-stable-diffusion-1.4/README.md index 4ea7045..61b7cf2 100644 --- a/tt-metal-stable-diffusion-1.4/README.md +++ b/tt-metal-stable-diffusion-1.4/README.md @@ -1,6 +1,6 @@ -# TT Metalium YOLOv4 Inference API +# TT Metalium Stable Diffusion 1.4 Inference API -This implementation supports YOLOv4 execution on Grayskull and Worhmole. +This implementation supports Stable Diffusion 1.4 execution on Worhmole n150 (n300 currently broken). ## Table of Contents @@ -11,19 +11,20 @@ This implementation supports YOLOv4 execution on Grayskull and Worhmole. ## Run server -To run the YOLOv4 inference server, run the following command from the project root at `tt-inference-server`: +To run the SD1.4 inference server, run the following command from the project root at `tt-inference-server`: ```bash cd tt-inference-server -docker compose --env-file tt-metal-yolov4/.env.default -f tt-metal-yolov4/docker-compose.yaml up --build +docker compose --env-file tt-metal-stable-diffusion-1.4/.env.default -f tt-metal-stable-diffusion-1.4/docker-compose.yaml up --build ``` -This will start the default Docker container with the entrypoint command set to `server/run_uvicorn.sh`. The next section describes how to override the container's default command with an interractive shell via `bash`. +This will start the default Docker container with the entrypoint command set to run the gunicorn server. The next section describes how to override the container's default command with an interractive shell via `bash`. ### JWT_TOKEN Authorization To authenticate requests use the header `Authorization`. The JWT token can be computed using the script `jwt_util.py`. This is an example: ```bash +cd tt-inference-server/tt-metal-yolov4/server export JWT_SECRET= export AUTHORIZATION="Bearer $(python scripts/jwt_util.py --secret ${JWT_SECRET?ERROR env var JWT_SECRET must be set} encode '{"team_id": "tenstorrent", "token_id":"debug-test"}')" ``` @@ -32,7 +33,7 @@ export AUTHORIZATION="Bearer $(python scripts/jwt_util.py --secret ${JWT_SECRET? ## Development Inside the container you can then start the server with: ```bash -docker compose --env-file tt-metal-yolov4/.env.default -f tt-metal-yolov4/docker-compose.yaml run --rm --build inference_server /bin/bash +docker compose --env-file tt-metal-stable-diffusion-1.4/.env.default -f tt-metal-stable-diffusion-1.4/docker-compose.yaml run --rm --build inference_server /bin/bash ``` Inside the container, run `cd ~/app/server` to navigate to the server implementation. @@ -41,10 +42,9 @@ Inside the container, run `cd ~/app/server` to navigate to the server implementa ## Tests Tests can be found in `tests/`. The tests have their own dependencies found in `requirements-test.txt`. -To load test the server, we use `locust` to simulate a single client sending an infinite-FPS video stream to the server for 1 minute. -This yields a server performance ceiling of ~25FPS. First, ensure the server is running (see [how to run the server](#run-server)). Then in a different shell with the base dev `venv` activated: +First, ensure the server is running (see [how to run the server](#run-server)). Then in a different shell with the base dev `venv` activated: ```bash -cd tt-metal-yolov4 +cd tt-metal-stable-diffusion-1.4 pip install -r requirements-test.txt cd tests/ locust --config locust_config.conf diff --git a/tt-metal-stable-diffusion-1.4/tests/locustfile.py b/tt-metal-stable-diffusion-1.4/tests/locustfile.py index 2b05872..4ba00b4 100644 --- a/tt-metal-stable-diffusion-1.4/tests/locustfile.py +++ b/tt-metal-stable-diffusion-1.4/tests/locustfile.py @@ -3,7 +3,7 @@ # SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC from locust import HttpUser, task -from utils import get_auth_header, sample_file +from utils import sample_file # load sample file in memory @@ -13,5 +13,4 @@ class HelloWorldUser(HttpUser): @task def hello_world(self): - headers = get_auth_header() - self.client.post("/objdetection_v2", files=file, headers=headers) + raise NotImplementedError diff --git a/tt-metal-stable-diffusion-1.4/tests/test_inference_api.py b/tt-metal-stable-diffusion-1.4/tests/test_inference_api.py index d38602f..021f09e 100644 --- a/tt-metal-stable-diffusion-1.4/tests/test_inference_api.py +++ b/tt-metal-stable-diffusion-1.4/tests/test_inference_api.py @@ -6,34 +6,36 @@ import os import pytest import requests -from utils import get_auth_header, sample_file +from utils import get_auth_header, get_sample_prompt DEPLOY_URL = "http://127.0.0.1" SERVICE_PORT = int(os.getenv("SERVICE_PORT", 7000)) API_BASE_URL = f"{DEPLOY_URL}:{SERVICE_PORT}" -API_URL = f"{API_BASE_URL}/objdetection_v2" +API_URL = f"{API_BASE_URL}/submit" HEALTH_URL = f"{API_BASE_URL}/health" def test_valid_api_call(): - # get sample image file - file = sample_file() + # get sample prompt + sample_prompt = get_sample_prompt() + body = {"prompt": sample_prompt} # make request with auth headers headers = get_auth_header() - response = requests.post(API_URL, files=file, headers=headers) + response = requests.post(API_URL, json=body, headers=headers) # perform status and value checking assert response.status_code == HTTPStatus.OK - assert isinstance(response.json(), list) + assert isinstance(response.json(), dict) def test_invalid_api_call(): - # get sample image file - file = sample_file() + # get sample prompt + sample_prompt = get_sample_prompt() + body = {"prompt": sample_prompt} # make request with INVALID auth header headers = get_auth_header() headers.update(Authorization="INVALID API KEY") - response = requests.post(API_URL, files=file, headers=headers) + response = requests.post(API_URL, json=body, headers=headers) # assert request was unauthorized assert response.status_code == HTTPStatus.UNAUTHORIZED diff --git a/tt-metal-stable-diffusion-1.4/tests/utils.py b/tt-metal-stable-diffusion-1.4/tests/utils.py index e884c07..2cdf981 100644 --- a/tt-metal-stable-diffusion-1.4/tests/utils.py +++ b/tt-metal-stable-diffusion-1.4/tests/utils.py @@ -2,10 +2,7 @@ # # SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC -import io import os -from PIL import Image -import requests def get_auth_header(): @@ -16,19 +13,5 @@ def get_auth_header(): raise RuntimeError("AUTHORIZATION environment variable is undefined.") -# save image as JPEG in-memory -def sample_file(): - # load sample image - url = "http://images.cocodataset.org/val2017/000000039769.jpg" - pil_image = Image.open(requests.get(url, stream=True).raw) - pil_image = pil_image.resize((320, 320)) # Resize to target dimensions - # convert to bytes - buf = io.BytesIO() - # format as JPEG - pil_image.save( - buf, - format="JPEG", - ) - byte_im = buf.getvalue() - file = {"file": byte_im} - return file +def get_sample_prompt(): + return "Red dog" From 136841a7905ebb57a47bd0bc977e2e2a3b2b8e00 Mon Sep 17 00:00:00 2001 From: bgoelTT Date: Mon, 20 Jan 2025 11:20:49 -0500 Subject: [PATCH 04/15] Add ready check mechanism, not finished --- .../server/flaskserver.py | 62 +++++++++++-------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/tt-metal-stable-diffusion-1.4/server/flaskserver.py b/tt-metal-stable-diffusion-1.4/server/flaskserver.py index a7a756c..ecf1f91 100644 --- a/tt-metal-stable-diffusion-1.4/server/flaskserver.py +++ b/tt-metal-stable-diffusion-1.4/server/flaskserver.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from flask import ( + abort, Flask, request, jsonify, @@ -11,6 +12,8 @@ import json import os import atexit +import time +from http import HTTPStatus import subprocess import signal @@ -52,34 +55,44 @@ def signal_handler(sig, frame): app = Flask(__name__) +# var to indicate ready state +ready = False + +# internal json prompt file +json_file_path = ( + "models/demos/wormhole/stable_diffusion/demo/web_demo/input_prompts.json" +) + @app.route("/") def hello_world(): return "Hello, World!" -@app.route("/submit", methods=["POST"]) -def submit(): - data = request.get_json() - prompt = data.get("prompt") - print(prompt) - - json_file_path = ( - "models/demos/wormhole/stable_diffusion/demo/web_demo/input_prompts.json" - ) - - if not os.path.isfile(json_file_path): - with open(json_file_path, "w") as f: +def submit_prompt(prompt_file, prompt): + if not os.path.isfile(prompt_file): + with open(prompt_file, "w") as f: json.dump({"prompts": []}, f) - with open(json_file_path, "r") as f: + with open(prompt_file, "r") as f: prompts_data = json.load(f) prompts_data["prompts"].append({"prompt": prompt, "status": "not generated"}) - with open(json_file_path, "w") as f: + with open(prompt_file, "w") as f: json.dump(prompts_data, f, indent=4) + +@app.route("/submit", methods=["POST"]) +def submit(): + if not ready: + abort(HTTPStatus.SERVICE_UNAVAILABLE, description="Server is not ready yet") + data = request.get_json() + prompt = data.get("prompt") + print(prompt) + + submit_prompt(json_file_path, prompt) + return jsonify({"message": "Prompt received and added to queue."}) @@ -88,10 +101,6 @@ def update_status(): data = request.get_json() prompt = data.get("prompt") - json_file_path = ( - "models/demos/wormhole/stable_diffusion/demo/web_demo/input_prompts.json" - ) - with open(json_file_path, "r") as f: prompts_data = json.load(f) @@ -124,10 +133,6 @@ def image_exists(): @app.route("/clean_up", methods=["POST"]) def clean_up(): - json_file_path = ( - "models/demos/wormhole/stable_diffusion/demo/web_demo/input_prompts.json" - ) - with open(json_file_path, "r") as f: prompts_data = json.load(f) @@ -143,10 +148,6 @@ def clean_up(): @app.route("/get_latest_time", methods=["GET"]) def get_latest_time(): - json_file_path = ( - "models/demos/wormhole/stable_diffusion/demo/web_demo/input_prompts.json" - ) - if not os.path.isfile(json_file_path): return jsonify({"message": "No prompts found"}), 404 @@ -199,4 +200,13 @@ def cleanup(): def create_server(): + sample_prompt = "Unicorn on a banana" + submit_prompt(json_file_path, sample_prompt) + while not ready: + with open(json_file_path, "r") as f: + prompts_data = json.load(f) + for p in prompts_data["prompts"]: + if p["prompt"] == sample_prompt: + print(p["status"]) + time.sleep(2) return app From 572cdb352e73ad4f9248c8e6c07204f16ee47540 Mon Sep 17 00:00:00 2001 From: bgoelTT Date: Tue, 21 Jan 2025 16:56:05 -0500 Subject: [PATCH 05/15] Finish readycheck warmup thread --- .../server/flaskserver.py | 58 +++++++++---------- .../server/gunicorn.conf.py | 2 +- 2 files changed, 28 insertions(+), 32 deletions(-) diff --git a/tt-metal-stable-diffusion-1.4/server/flaskserver.py b/tt-metal-stable-diffusion-1.4/server/flaskserver.py index ecf1f91..0a46b5f 100644 --- a/tt-metal-stable-diffusion-1.4/server/flaskserver.py +++ b/tt-metal-stable-diffusion-1.4/server/flaskserver.py @@ -13,6 +13,7 @@ import os import atexit import time +import threading from http import HTTPStatus import subprocess @@ -26,27 +27,10 @@ process1 = subprocess.Popen(script, shell=True) -# Function to kill process using port 5000 -def kill_port_5000(): - try: - result = subprocess.check_output( - "lsof -i :5000 | grep LISTEN | awk '{print $2}'", shell=True - ) - pid = int(result.strip()) - print(f"Killing process {pid} using port 5000") - os.kill(pid, signal.SIGTERM) - except subprocess.CalledProcessError: - print("No process found using port 5000") - except Exception as e: - print(f"Error occurred: {e}") - - # Function to terminate both processes and kill port 5000 def signal_handler(sig, frame): print("Terminating processes...") process1.terminate() - kill_port_5000() - print("Processes terminated and port 5000 cleared.") sys.exit(0) @@ -83,8 +67,33 @@ def submit_prompt(prompt_file, prompt): json.dump(prompts_data, f, indent=4) +def warmup(): + sample_prompt = "Unicorn on a banana" + # submit sample prompt to perform tracing and server warmup + submit_prompt(json_file_path, sample_prompt) + global ready + while not ready: + with open(json_file_path, "r") as f: + prompts_data = json.load(f) + # sample prompt should be first prompt + sample_prompt_data = prompts_data["prompts"][0] + if sample_prompt_data["prompt"] == sample_prompt: + # TODO: remove this and replace with status check == "done" + # to flip ready flag + if sample_prompt_data["status"] == "done": + ready = True + print(sample_prompt_data["status"]) + time.sleep(3) + + +# start warmup routine in background while server starts +warmup_thread = threading.Thread(target=warmup, name="warmup") +warmup_thread.start() + + @app.route("/submit", methods=["POST"]) def submit(): + global ready if not ready: abort(HTTPStatus.SERVICE_UNAVAILABLE, description="Server is not ready yet") data = request.get_json() @@ -189,24 +198,11 @@ def cleanup(): os.remove("interactive_512x512_ttnn.png") print("Deleted image") - print("Running. Press Ctrl+C to stop.") - try: - process1.wait() - except KeyboardInterrupt: - signal_handler(None, None) + signal_handler(None, None) atexit.register(cleanup) def create_server(): - sample_prompt = "Unicorn on a banana" - submit_prompt(json_file_path, sample_prompt) - while not ready: - with open(json_file_path, "r") as f: - prompts_data = json.load(f) - for p in prompts_data["prompts"]: - if p["prompt"] == sample_prompt: - print(p["status"]) - time.sleep(2) return app diff --git a/tt-metal-stable-diffusion-1.4/server/gunicorn.conf.py b/tt-metal-stable-diffusion-1.4/server/gunicorn.conf.py index 1e904fc..991c63c 100644 --- a/tt-metal-stable-diffusion-1.4/server/gunicorn.conf.py +++ b/tt-metal-stable-diffusion-1.4/server/gunicorn.conf.py @@ -9,7 +9,7 @@ reload = False worker_class = "gthread" threads = 16 -timeout = 160 +timeout = 0 # server factory wsgi_app = "flaskserver:create_server()" From cd869eb2cf28af9324201a8923a4fb1427f0a360 Mon Sep 17 00:00:00 2001 From: bgoelTT Date: Tue, 21 Jan 2025 17:17:09 -0500 Subject: [PATCH 06/15] Add health check endpoint --- tt-metal-stable-diffusion-1.4/server/flaskserver.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tt-metal-stable-diffusion-1.4/server/flaskserver.py b/tt-metal-stable-diffusion-1.4/server/flaskserver.py index 0a46b5f..9a424da 100644 --- a/tt-metal-stable-diffusion-1.4/server/flaskserver.py +++ b/tt-metal-stable-diffusion-1.4/server/flaskserver.py @@ -91,6 +91,13 @@ def warmup(): warmup_thread.start() +@app.route("/health") +def health_check(): + if not ready: + abort(HTTPStatus.SERVICE_UNAVAILABLE, description="Server is not ready yet") + return "OK\n", 200 + + @app.route("/submit", methods=["POST"]) def submit(): global ready From f7774d76216f4cda6dbd311dc2374ead92b7bbf7 Mon Sep 17 00:00:00 2001 From: bgoelTT Date: Wed, 22 Jan 2025 16:54:00 -0500 Subject: [PATCH 07/15] Add common API token utility and use --- .../docker-compose.yaml | 4 +- .../server/flaskserver.py | 2 + .../server/gunicorn.conf.py | 2 +- .../stable-diffusion-1.4.src.Dockerfile | 7 +-- utils/authentication.py | 52 +++++++++++++++++++ 5 files changed, 61 insertions(+), 6 deletions(-) create mode 100644 utils/authentication.py diff --git a/tt-metal-stable-diffusion-1.4/docker-compose.yaml b/tt-metal-stable-diffusion-1.4/docker-compose.yaml index 743133d..c01e2f0 100644 --- a/tt-metal-stable-diffusion-1.4/docker-compose.yaml +++ b/tt-metal-stable-diffusion-1.4/docker-compose.yaml @@ -2,8 +2,8 @@ services: inference_server: image: ghcr.io/tenstorrent/tt-inference-server/tt-metal-stable-diffusion-1.4-src-base:${IMAGE_VERSION}-tt-metal-${TT_METAL_COMMIT_DOCKER_TAG} build: - context: . - dockerfile: stable-diffusion-1.4.src.Dockerfile + context: ../ + dockerfile: tt-metal-stable-diffusion-1.4/stable-diffusion-1.4.src.Dockerfile args: TT_METAL_DOCKERFILE_VERSION: ${TT_METAL_DOCKERFILE_VERSION} TT_METAL_COMMIT_SHA_OR_TAG: ${TT_METAL_COMMIT_SHA_OR_TAG} diff --git a/tt-metal-stable-diffusion-1.4/server/flaskserver.py b/tt-metal-stable-diffusion-1.4/server/flaskserver.py index 9a424da..d31fbaf 100644 --- a/tt-metal-stable-diffusion-1.4/server/flaskserver.py +++ b/tt-metal-stable-diffusion-1.4/server/flaskserver.py @@ -15,6 +15,7 @@ import time import threading from http import HTTPStatus +from utils.authentication import api_key_required import subprocess import signal @@ -99,6 +100,7 @@ def health_check(): @app.route("/submit", methods=["POST"]) +@api_key_required def submit(): global ready if not ready: diff --git a/tt-metal-stable-diffusion-1.4/server/gunicorn.conf.py b/tt-metal-stable-diffusion-1.4/server/gunicorn.conf.py index 991c63c..0a473c7 100644 --- a/tt-metal-stable-diffusion-1.4/server/gunicorn.conf.py +++ b/tt-metal-stable-diffusion-1.4/server/gunicorn.conf.py @@ -12,4 +12,4 @@ timeout = 0 # server factory -wsgi_app = "flaskserver:create_server()" +wsgi_app = "server.flaskserver:create_server()" diff --git a/tt-metal-stable-diffusion-1.4/stable-diffusion-1.4.src.Dockerfile b/tt-metal-stable-diffusion-1.4/stable-diffusion-1.4.src.Dockerfile index 64fcdf0..93a96a4 100644 --- a/tt-metal-stable-diffusion-1.4/stable-diffusion-1.4.src.Dockerfile +++ b/tt-metal-stable-diffusion-1.4/stable-diffusion-1.4.src.Dockerfile @@ -86,9 +86,10 @@ WORKDIR ${HOME_DIR} ARG APP_DIR="${HOME_DIR}/app" ENV APP_DIR=${APP_DIR} WORKDIR ${APP_DIR} -ENV PYTHONPATH=${PYTHONPATH}:${APP_DIR}/server -COPY --chown=user:user "/server" "${APP_DIR}/server" -COPY --chown=user:user "/requirements.txt" "${APP_DIR}/requirements.txt" +ENV PYTHONPATH=${PYTHONPATH}:${APP_DIR} +COPY --chown=user:user "tt-metal-stable-diffusion-1.4/server" "${APP_DIR}/server" +COPY --chown=user:user "utils" "${APP_DIR}/utils" +COPY --chown=user:user "tt-metal-stable-diffusion-1.4/requirements.txt" "${APP_DIR}/requirements.txt" RUN /bin/bash -c "source ${PYTHON_ENV_DIR}/bin/activate \ && pip install --default-timeout=240 --no-cache-dir -r requirements.txt" diff --git a/utils/authentication.py b/utils/authentication.py new file mode 100644 index 0000000..53c665a --- /dev/null +++ b/utils/authentication.py @@ -0,0 +1,52 @@ +# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 +from flask import abort, request +from functools import wraps +from http import HTTPStatus +import jwt +import os +from typing import Optional + + +def normalize_token(token) -> [str, str]: + """ + Note that scheme is case insensitive for the authorization header. + See: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Authorization#directives + """ # noqa: E501 + one_space = " " + words = token.split(one_space) + scheme = words[0].lower() + return [scheme, " ".join(words[1:])] + + +def read_authorization( + headers, +) -> Optional[dict]: + authorization = headers.get("authorization") + if not authorization: + abort(HTTPStatus.UNAUTHORIZED, description="Must provide Authorization header.") + [scheme, parameters] = normalize_token(authorization) + if scheme != "bearer": + user_error_msg = f"Authorization scheme was '{scheme}' instead of bearer" + abort(HTTPStatus.UNAUTHORIZED, description=user_error_msg) + try: + payload = jwt.decode(parameters, os.getenv("JWT_SECRET"), algorithms=["HS256"]) + if not payload: + abort(HTTPStatus.UNAUTHORIZED) + return payload + except jwt.InvalidTokenError as exc: + user_error_msg = f"JWT payload decode error: {exc}" + abort(HTTPStatus.BAD_REQUEST, description=user_error_msg) + + +def api_key_required(f): + """Decorates an endpoint to require API key validation""" + + @wraps(f) + def wrapper(*args, **kwargs): + _ = read_authorization(request.headers) + + return f(*args, **kwargs) + + return wrapper From a92e81c5f6e16e40897a4dbec32727c43c48f800 Mon Sep 17 00:00:00 2001 From: bgoelTT Date: Thu, 23 Jan 2025 11:40:31 -0500 Subject: [PATCH 08/15] Return JSON object --- tt-metal-stable-diffusion-1.4/server/flaskserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tt-metal-stable-diffusion-1.4/server/flaskserver.py b/tt-metal-stable-diffusion-1.4/server/flaskserver.py index d31fbaf..63d947c 100644 --- a/tt-metal-stable-diffusion-1.4/server/flaskserver.py +++ b/tt-metal-stable-diffusion-1.4/server/flaskserver.py @@ -96,7 +96,7 @@ def warmup(): def health_check(): if not ready: abort(HTTPStatus.SERVICE_UNAVAILABLE, description="Server is not ready yet") - return "OK\n", 200 + return jsonify({"message": "OK\n"}), 200 @app.route("/submit", methods=["POST"]) From 4582863842ea70ee47405a866d94acbf9caf686e Mon Sep 17 00:00:00 2001 From: bgoelTT Date: Sat, 1 Feb 2025 21:24:53 -0500 Subject: [PATCH 09/15] Use tt-metal release v0.55.0 --- tt-metal-stable-diffusion-1.4/.env.default | 6 +++--- .../stable-diffusion-1.4.src.Dockerfile | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tt-metal-stable-diffusion-1.4/.env.default b/tt-metal-stable-diffusion-1.4/.env.default index 4a3f359..ee3654a 100644 --- a/tt-metal-stable-diffusion-1.4/.env.default +++ b/tt-metal-stable-diffusion-1.4/.env.default @@ -1,6 +1,6 @@ -TT_METAL_DOCKERFILE_VERSION=v0.53.0-rc34 -TT_METAL_COMMIT_SHA_OR_TAG=4da4a5e79a13ece7ff5096c30cef79cb0c504f0e -TT_METAL_COMMIT_DOCKER_TAG=4da4a5e79a13 # 12-character version of TT_METAL_COMMIT_SHA_OR_TAG +TT_METAL_DOCKERFILE_VERSION=v0.55.0 +TT_METAL_COMMIT_SHA_OR_TAG=v0.55.0 +TT_METAL_COMMIT_DOCKER_TAG=v0.55.0 # technically redundant but this var is used to name the image IMAGE_VERSION=v0.0.1 # These are secrets and must be stored securely for production environments JWT_SECRET=testing diff --git a/tt-metal-stable-diffusion-1.4/stable-diffusion-1.4.src.Dockerfile b/tt-metal-stable-diffusion-1.4/stable-diffusion-1.4.src.Dockerfile index 93a96a4..273e3cb 100644 --- a/tt-metal-stable-diffusion-1.4/stable-diffusion-1.4.src.Dockerfile +++ b/tt-metal-stable-diffusion-1.4/stable-diffusion-1.4.src.Dockerfile @@ -5,7 +5,7 @@ # default base image, override with --build-arg TT_METAL_DOCKERFILE_VERSION= ARG TT_METAL_DOCKERFILE_VERSION -FROM ghcr.io/tenstorrent/tt-metal/tt-metalium/ubuntu-20.04-amd64:$TT_METAL_DOCKERFILE_VERSION-dev +FROM ghcr.io/tenstorrent/tt-metal/tt-metalium-ubuntu-20.04-amd64-release:$TT_METAL_DOCKERFILE_VERSION # Build stage LABEL maintainer="Ben Goel " @@ -40,6 +40,7 @@ RUN apt-get update && apt-get install -y \ acl \ jq \ vim \ + git-lfs \ # user deps htop \ screen \ From f5650c5ee81ff896ef7d4752d47299551e5294ea Mon Sep 17 00:00:00 2001 From: bgoelTT Date: Sat, 1 Feb 2025 21:53:48 -0500 Subject: [PATCH 10/15] Use logging instead of print --- .../server/flaskserver.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tt-metal-stable-diffusion-1.4/server/flaskserver.py b/tt-metal-stable-diffusion-1.4/server/flaskserver.py index 63d947c..bf5e1a9 100644 --- a/tt-metal-stable-diffusion-1.4/server/flaskserver.py +++ b/tt-metal-stable-diffusion-1.4/server/flaskserver.py @@ -10,6 +10,7 @@ send_from_directory, ) import json +import logging import os import atexit import time @@ -21,6 +22,11 @@ import signal import sys + +# initialize logger +logger = logging.getLogger(__name__) + + # script to run in background script = "pytest models/demos/wormhole/stable_diffusion/demo/web_demo/sdserver.py" @@ -30,7 +36,7 @@ # Function to terminate both processes and kill port 5000 def signal_handler(sig, frame): - print("Terminating processes...") + logger.info("Terminating processes...") process1.terminate() sys.exit(0) @@ -83,7 +89,7 @@ def warmup(): # to flip ready flag if sample_prompt_data["status"] == "done": ready = True - print(sample_prompt_data["status"]) + logger.info("Warmup complete") time.sleep(3) @@ -107,7 +113,7 @@ def submit(): abort(HTTPStatus.SERVICE_UNAVAILABLE, description="Server is not ready yet") data = request.get_json() prompt = data.get("prompt") - print(prompt) + logger.info(f"Prompt: {prompt}") submit_prompt(json_file_path, prompt) @@ -201,13 +207,14 @@ def cleanup(): os.remove( "models/demos/wormhole/stable_diffusion/demo/web_demo/input_prompts.json" ) - print("Deleted json") + logger.info("Deleted json") if os.path.isfile("interactive_512x512_ttnn.png"): os.remove("interactive_512x512_ttnn.png") - print("Deleted image") + logger.info("Deleted image") signal_handler(None, None) + logger.info("Cleanup complete") atexit.register(cleanup) From ada9877436eadf4e0684096e69095f837f32f2f2 Mon Sep 17 00:00:00 2001 From: bgoelTT Date: Sat, 1 Feb 2025 21:55:00 -0500 Subject: [PATCH 11/15] Update header year to 2025 --- tt-metal-stable-diffusion-1.4/server/flaskserver.py | 4 ++-- tt-metal-stable-diffusion-1.4/server/gunicorn.conf.py | 2 +- .../stable-diffusion-1.4.src.Dockerfile | 2 +- tt-metal-stable-diffusion-1.4/tests/test_inference_api.py | 2 +- tt-metal-stable-diffusion-1.4/tests/utils.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tt-metal-stable-diffusion-1.4/server/flaskserver.py b/tt-metal-stable-diffusion-1.4/server/flaskserver.py index bf5e1a9..7f56ebd 100644 --- a/tt-metal-stable-diffusion-1.4/server/flaskserver.py +++ b/tt-metal-stable-diffusion-1.4/server/flaskserver.py @@ -1,5 +1,5 @@ -# SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. - +# SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. +# # SPDX-License-Identifier: Apache-2.0 from flask import ( diff --git a/tt-metal-stable-diffusion-1.4/server/gunicorn.conf.py b/tt-metal-stable-diffusion-1.4/server/gunicorn.conf.py index 0a473c7..45a7f5e 100644 --- a/tt-metal-stable-diffusion-1.4/server/gunicorn.conf.py +++ b/tt-metal-stable-diffusion-1.4/server/gunicorn.conf.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # -# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC +# SPDX-FileCopyrightText: © 2025 Tenstorrent AI ULC workers = 1 diff --git a/tt-metal-stable-diffusion-1.4/stable-diffusion-1.4.src.Dockerfile b/tt-metal-stable-diffusion-1.4/stable-diffusion-1.4.src.Dockerfile index 273e3cb..189c47a 100644 --- a/tt-metal-stable-diffusion-1.4/stable-diffusion-1.4.src.Dockerfile +++ b/tt-metal-stable-diffusion-1.4/stable-diffusion-1.4.src.Dockerfile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # -# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC +# SPDX-FileCopyrightText: © 2025 Tenstorrent AI ULC # default base image, override with --build-arg TT_METAL_DOCKERFILE_VERSION= ARG TT_METAL_DOCKERFILE_VERSION diff --git a/tt-metal-stable-diffusion-1.4/tests/test_inference_api.py b/tt-metal-stable-diffusion-1.4/tests/test_inference_api.py index 021f09e..ea50034 100644 --- a/tt-metal-stable-diffusion-1.4/tests/test_inference_api.py +++ b/tt-metal-stable-diffusion-1.4/tests/test_inference_api.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # -# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC +# SPDX-FileCopyrightText: © 2025 Tenstorrent AI ULC from http import HTTPStatus import os diff --git a/tt-metal-stable-diffusion-1.4/tests/utils.py b/tt-metal-stable-diffusion-1.4/tests/utils.py index 2cdf981..9df47d3 100644 --- a/tt-metal-stable-diffusion-1.4/tests/utils.py +++ b/tt-metal-stable-diffusion-1.4/tests/utils.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # -# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC +# SPDX-FileCopyrightText: © 2025 Tenstorrent AI ULC import os From a75d0e60591eef2e453f62a25b2843a1eacb2d6c Mon Sep 17 00:00:00 2001 From: bgoelTT Date: Sat, 1 Feb 2025 22:26:35 -0500 Subject: [PATCH 12/15] Refactor repeated functionality --- .../server/flaskserver.py | 40 ++++++++++--------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/tt-metal-stable-diffusion-1.4/server/flaskserver.py b/tt-metal-stable-diffusion-1.4/server/flaskserver.py index 7f56ebd..f78a7ab 100644 --- a/tt-metal-stable-diffusion-1.4/server/flaskserver.py +++ b/tt-metal-stable-diffusion-1.4/server/flaskserver.py @@ -60,18 +60,26 @@ def hello_world(): return "Hello, World!" +def read_json_file(file_path): + if not os.path.isfile(file_path): + raise FileNotFoundError(f"{file_path} is not a file") + with open(file_path, "r") as f: + return json.load(f) + + +def write_json_file(file_path, data): + with open(file_path, "w") as f: + json.dump(data, f, indent=4) + + def submit_prompt(prompt_file, prompt): if not os.path.isfile(prompt_file): - with open(prompt_file, "w") as f: - json.dump({"prompts": []}, f) - - with open(prompt_file, "r") as f: - prompts_data = json.load(f) + write_json_file(prompt_file, {"prompts": []}) + prompts_data = read_json_file(prompt_file) prompts_data["prompts"].append({"prompt": prompt, "status": "not generated"}) - with open(prompt_file, "w") as f: - json.dump(prompts_data, f, indent=4) + write_json_file(prompt_file, prompts_data) def warmup(): @@ -80,8 +88,7 @@ def warmup(): submit_prompt(json_file_path, sample_prompt) global ready while not ready: - with open(json_file_path, "r") as f: - prompts_data = json.load(f) + prompts_data = read_json_file(json_file_path) # sample prompt should be first prompt sample_prompt_data = prompts_data["prompts"][0] if sample_prompt_data["prompt"] == sample_prompt: @@ -125,16 +132,14 @@ def update_status(): data = request.get_json() prompt = data.get("prompt") - with open(json_file_path, "r") as f: - prompts_data = json.load(f) + prompts_data = read_json_file(json_file_path) for p in prompts_data["prompts"]: if p["prompt"] == prompt: p["status"] = "generated" break - with open(json_file_path, "w") as f: - json.dump(prompts_data, f, indent=4) + write_json_file(json_file_path, prompts_data) return jsonify({"message": "Prompt status updated to generated."}) @@ -157,15 +162,13 @@ def image_exists(): @app.route("/clean_up", methods=["POST"]) def clean_up(): - with open(json_file_path, "r") as f: - prompts_data = json.load(f) + prompts_data = read_json_file(json_file_path) prompts_data["prompts"] = [ p for p in prompts_data["prompts"] if p["status"] != "done" ] - with open(json_file_path, "w") as f: - json.dump(prompts_data, f, indent=4) + write_json_file(json_file_path, prompts_data) return jsonify({"message": "Cleaned up done prompts."}) @@ -175,8 +178,7 @@ def get_latest_time(): if not os.path.isfile(json_file_path): return jsonify({"message": "No prompts found"}), 404 - with open(json_file_path, "r") as f: - prompts_data = json.load(f) + prompts_data = read_json_file(json_file_path) # Filter prompts that have a total_acc time available completed_prompts = [p for p in prompts_data["prompts"] if "total_acc" in p] From 835fc911c47df4d3bb029b91891ee7c71fa17899 Mon Sep 17 00:00:00 2001 From: bgoelTT Date: Sat, 1 Feb 2025 22:50:58 -0500 Subject: [PATCH 13/15] Add mutual exclusion to ready variable --- .../server/flaskserver.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tt-metal-stable-diffusion-1.4/server/flaskserver.py b/tt-metal-stable-diffusion-1.4/server/flaskserver.py index f78a7ab..0899ffe 100644 --- a/tt-metal-stable-diffusion-1.4/server/flaskserver.py +++ b/tt-metal-stable-diffusion-1.4/server/flaskserver.py @@ -48,6 +48,8 @@ def signal_handler(sig, frame): # var to indicate ready state ready = False +# lock for guaranteeing mutual exclusion +ready_lock = threading.Lock() # internal json prompt file json_file_path = ( @@ -87,7 +89,9 @@ def warmup(): # submit sample prompt to perform tracing and server warmup submit_prompt(json_file_path, sample_prompt) global ready - while not ready: + with ready_lock: + is_ready = ready + while not is_ready: prompts_data = read_json_file(json_file_path) # sample prompt should be first prompt sample_prompt_data = prompts_data["prompts"][0] @@ -95,7 +99,8 @@ def warmup(): # TODO: remove this and replace with status check == "done" # to flip ready flag if sample_prompt_data["status"] == "done": - ready = True + with ready_lock: + ready = True logger.info("Warmup complete") time.sleep(3) @@ -107,8 +112,9 @@ def warmup(): @app.route("/health") def health_check(): - if not ready: - abort(HTTPStatus.SERVICE_UNAVAILABLE, description="Server is not ready yet") + with ready_lock: + if not ready: + abort(HTTPStatus.SERVICE_UNAVAILABLE, description="Server is not ready yet") return jsonify({"message": "OK\n"}), 200 @@ -116,8 +122,9 @@ def health_check(): @api_key_required def submit(): global ready - if not ready: - abort(HTTPStatus.SERVICE_UNAVAILABLE, description="Server is not ready yet") + with ready_lock: + if not ready: + abort(HTTPStatus.SERVICE_UNAVAILABLE, description="Server is not ready yet") data = request.get_json() prompt = data.get("prompt") logger.info(f"Prompt: {prompt}") From 016af35a2f6692da38bfa5282f2d30a98e48331e Mon Sep 17 00:00:00 2001 From: bgoelTT Date: Sat, 1 Feb 2025 23:25:50 -0500 Subject: [PATCH 14/15] Remove locust tests and add healthcheck test --- .../tests/locust_config.conf | 6 ------ .../tests/locustfile.py | 16 ---------------- .../tests/test_inference_api.py | 4 ---- 3 files changed, 26 deletions(-) delete mode 100644 tt-metal-stable-diffusion-1.4/tests/locust_config.conf delete mode 100644 tt-metal-stable-diffusion-1.4/tests/locustfile.py diff --git a/tt-metal-stable-diffusion-1.4/tests/locust_config.conf b/tt-metal-stable-diffusion-1.4/tests/locust_config.conf deleted file mode 100644 index d72876a..0000000 --- a/tt-metal-stable-diffusion-1.4/tests/locust_config.conf +++ /dev/null @@ -1,6 +0,0 @@ -locustfile = locustfile.py -headless = true -host = http://localhost:7000 -users = 1 -spawn-rate = 1 -run-time = 1m diff --git a/tt-metal-stable-diffusion-1.4/tests/locustfile.py b/tt-metal-stable-diffusion-1.4/tests/locustfile.py deleted file mode 100644 index 4ba00b4..0000000 --- a/tt-metal-stable-diffusion-1.4/tests/locustfile.py +++ /dev/null @@ -1,16 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# -# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC - -from locust import HttpUser, task -from utils import sample_file - - -# load sample file in memory -file = sample_file() - - -class HelloWorldUser(HttpUser): - @task - def hello_world(self): - raise NotImplementedError diff --git a/tt-metal-stable-diffusion-1.4/tests/test_inference_api.py b/tt-metal-stable-diffusion-1.4/tests/test_inference_api.py index ea50034..fa7fb13 100644 --- a/tt-metal-stable-diffusion-1.4/tests/test_inference_api.py +++ b/tt-metal-stable-diffusion-1.4/tests/test_inference_api.py @@ -4,7 +4,6 @@ from http import HTTPStatus import os -import pytest import requests from utils import get_auth_header, get_sample_prompt @@ -40,9 +39,6 @@ def test_invalid_api_call(): assert response.status_code == HTTPStatus.UNAUTHORIZED -@pytest.mark.skip( - reason="Not implemented, see https://github.com/tenstorrent/tt-inference-server/issues/63" -) def test_get_health(): headers = {} response = requests.get(HEALTH_URL, headers=headers, timeout=35) From 63d973c60e50c8f39a5ee2eaf567d3b00d7452fb Mon Sep 17 00:00:00 2001 From: bgoelTT Date: Sat, 1 Feb 2025 23:58:05 -0500 Subject: [PATCH 15/15] Test more API endpoints and fix bug in locking mechanism --- .../server/flaskserver.py | 7 ++++--- .../tests/test_inference_api.py | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/tt-metal-stable-diffusion-1.4/server/flaskserver.py b/tt-metal-stable-diffusion-1.4/server/flaskserver.py index 0899ffe..4bc9aa5 100644 --- a/tt-metal-stable-diffusion-1.4/server/flaskserver.py +++ b/tt-metal-stable-diffusion-1.4/server/flaskserver.py @@ -89,9 +89,10 @@ def warmup(): # submit sample prompt to perform tracing and server warmup submit_prompt(json_file_path, sample_prompt) global ready - with ready_lock: - is_ready = ready - while not is_ready: + while True: + with ready_lock: + if ready: + break prompts_data = read_json_file(json_file_path) # sample prompt should be first prompt sample_prompt_data = prompts_data["prompts"][0] diff --git a/tt-metal-stable-diffusion-1.4/tests/test_inference_api.py b/tt-metal-stable-diffusion-1.4/tests/test_inference_api.py index fa7fb13..729b755 100644 --- a/tt-metal-stable-diffusion-1.4/tests/test_inference_api.py +++ b/tt-metal-stable-diffusion-1.4/tests/test_inference_api.py @@ -5,6 +5,7 @@ from http import HTTPStatus import os import requests +import time from utils import get_auth_header, get_sample_prompt @@ -12,6 +13,8 @@ SERVICE_PORT = int(os.getenv("SERVICE_PORT", 7000)) API_BASE_URL = f"{DEPLOY_URL}:{SERVICE_PORT}" API_URL = f"{API_BASE_URL}/submit" +API_GET_IMAGE_URL = f"{API_BASE_URL}/get_image" +API_GET_LATEST_PROMPT_URL = f"{API_BASE_URL}/get_latest_time" HEALTH_URL = f"{API_BASE_URL}/health" @@ -26,6 +29,17 @@ def test_valid_api_call(): assert response.status_code == HTTPStatus.OK assert isinstance(response.json(), dict) + # wait generous amount of time for image to be generated + time.sleep(15) + # request image + response = requests.get(API_GET_IMAGE_URL) + assert response.status_code == HTTPStatus.OK + + # check that prompt was correctly generated + response = requests.get(API_GET_LATEST_PROMPT_URL) + assert response.status_code == HTTPStatus.OK + assert response.json()["prompt"] == sample_prompt + def test_invalid_api_call(): # get sample prompt @@ -41,5 +55,5 @@ def test_invalid_api_call(): def test_get_health(): headers = {} - response = requests.get(HEALTH_URL, headers=headers, timeout=35) + response = requests.get(HEALTH_URL, headers=headers, timeout=5) assert response.status_code == 200