From a494f5cbb79980175da2bab668f6cb1cfc04563d Mon Sep 17 00:00:00 2001 From: Ceyda Cinarel Date: Tue, 16 Feb 2021 17:46:19 +0900 Subject: [PATCH] add error message,make logpath configurable, remove model_name requirement --- README.md | 33 ++++++++++++++-- setup.py | 4 +- torchserve_dashboard/api.py | 16 +++++++- torchserve_dashboard/dash.py | 73 +++++++++++++++++++++++------------- 4 files changed, 92 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 51dd9a3..9241c82 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Torchserve Dashboard -Torchserve Dashboard using streamlit +Torchserve Dashboard using Streamlit Related blog [post](https://cceyda.github.io/blog/torchserve/streamlit/dashboard/2020/10/15/torchserve.html) @@ -13,9 +13,12 @@ Simply run: ```bash pip3 install torchserve-dashboard --user -# torchserve-dashboard [streamlit_options] -- [config_path] [model_store(optional)] +# torchserve-dashboard [streamlit_options] -- [config_path] [model_store(optional)] [log_location(optional)] [metrics_location(optional)] torchserve-dashboard --server.port 8105 -- --config_path ./torchserve.properties --model_store ./model_store ``` + +:exclamation: Keep in mind that If you change any of the `--config_path`,`--model_store`,`--metrics_location`,`--log_location` options while there is a torchserver already running before starting torch-dashboard they won't come into effect until you stop&start torchserve. + OR ```bash git clone https://github.com/cceyda/torchserve-dashboard.git @@ -31,10 +34,34 @@ number_of_gpu=0 batch_size=1 model_store=/mnt/pretrained/model_store ``` + # Updates [15-oct-2020] add [scale workers](https://pytorch.org/serve/management_api.html#scale-workers) tab -# Help +[16-feb-2021] (functionality) make logpath configurable,(functionality)remove model_name requirement,(UI)add cosmetic error messages + +# FAQs +- **Does torchserver keep running in the background?** + + The torchserver is spawned using `Popen` and keeps running in the background even if you stop the dashboard. + +- **What about environment variables?** + + These environment variables are passed to the torchserve command: + + `ENVIRON_WHITELIST=["LD_LIBRARY_PATH","LC_CTYPE","LC_ALL","PATH","JAVA_HOME","PYTHONPATH","TS_CONFIG_FILE","LOG_LOCATION","METRICS_LOCATION"]` + +- **How to change the logging format of torchserve?** + + You can set the location of your custom log4j config in your configuration file as in [here](https://pytorch.org/serve/logging.html#provide-with-config-properties) + + `vmargs=-Dlog4j.configuration=file:///path/to/custom/log4j.properties` + +- **What is the meaning behind the weird versioning** + + The minor follows the compatible torchserve version, patch version reflects the dashboard versioning + +# Help & Question & Feedback Open an issue diff --git a/setup.py b/setup.py index d1f17ae..255ec51 100644 --- a/setup.py +++ b/setup.py @@ -3,11 +3,11 @@ """ from setuptools import find_packages, setup -dependencies = ["streamlit>=0.68", "click>=7.1.2", "httpx>=0.16.0"] +dependencies = ["streamlit>=0.76", "click>=7.1.2", "httpx>=0.16.0"] setup( name="torchserve_dashboard", - version="v0.2.3", + version="v0.2.4", url="https://github.com/cceyda/torchserve-dashboard", license="Apache Software License 2.0", author="Ceyda Cinarel", diff --git a/torchserve_dashboard/api.py b/torchserve_dashboard/api.py index 2bca0e7..ebf6f55 100644 --- a/torchserve_dashboard/api.py +++ b/torchserve_dashboard/api.py @@ -5,6 +5,7 @@ import streamlit as st +ENVIRON_WHITELIST=["LD_LIBRARY_PATH","LC_CTYPE","LC_ALL","PATH","JAVA_HOME","PYTHONPATH","TS_CONFIG_FILE","LOG_LOCATION","METRICS_LOCATION"] def raise_on_not200(response): if response.status_code != 200: @@ -15,12 +16,23 @@ def raise_on_not200(response): client = httpx.Client(timeout=1000, event_hooks={"response": [raise_on_not200]}) -def start_torchserve(model_store, config_path): +def start_torchserve(model_store, config_path, log_location=None, metrics_location=None): + new_env={} + env=os.environ() + for x in ENVIRON_WHITELIST: + if x in env: + new_env[x]=env[x] + + if log_location: + new_env["LOG_LOCATION"]=log_location + if metrics_location: + new_env["METRICS_LOCATION"]=metrics_location if os.path.exists(model_store) and os.path.exists(config_path): torchserve_cmd = f"torchserve --start --ncs --model-store {model_store} --ts-config {config_path}" subprocess.Popen( torchserve_cmd.split(" "), - stdout=open("/dev/null", "w"), + env=new_env, + stdout=open("/dev/null", "r"), stderr=open("/dev/null", "w"), preexec_fn=os.setpgrp, ) diff --git a/torchserve_dashboard/dash.py b/torchserve_dashboard/dash.py index 107cbda..2063391 100644 --- a/torchserve_dashboard/dash.py +++ b/torchserve_dashboard/dash.py @@ -6,6 +6,7 @@ from streamlit.script_runner import RerunException import api as tsa +from pathlib import Path st.set_page_config( page_title="Torchserve Management Dashboard", @@ -13,11 +14,13 @@ layout="centered", initial_sidebar_state="expanded", ) - +st.write(os.environ) parser = argparse.ArgumentParser(description="Torchserve dashboard") parser.add_argument("--model_store", default=None, help="Directory where your models are stored") parser.add_argument("--config_path", default="./default.torchserve.properties", help="Torchserve config path") +parser.add_argument("--log_location", default="./", help="Passed as environment variable LOG_LOCATION to Torchserve") +parser.add_argument("--metrics_location", default="./", help="Passed as environment variable METRICS_LOCATION to Torchserve") try: args = parser.parse_args() except SystemExit as e: @@ -28,6 +31,12 @@ M_API = "http://127.0.0.1:8081" model_store = args.model_store config_path = args.config_path +log_location = args.log_location +if log_location: + log_location = str(Path(log_location).resolve()) +metrics_location = args.metrics_location +if metrics_location: + metrics_location = str(Path(metrics_location).resolve()) config = None default_key = "None" @@ -56,16 +65,18 @@ def last_res(): def get_model_store(): return os.listdir(model_store) - +# As a design choice I'm leaving config_path,log_location,metrics_location non-editable from the UI as a semi-security measure (maybe?:/) ##########Sidebar########## st.sidebar.markdown(f"## Help") -st.sidebar.markdown(f"### Management API: \n {M_API}") -st.sidebar.markdown(f"### Model Store Path: \n {model_store}") -st.sidebar.markdown(f"### Config Path: \n {config_path}") +with st.sidebar.beta_expander(label="Show Paths:", expanded=False): + st.markdown(f"### Model Store Path: \n {model_store}") + st.markdown(f"### Config Path: \n {config_path}") + st.markdown(f"### Log Location: \n {log_location}") + st.markdown(f"### Metrics Location: \n {metrics_location}") start = st.sidebar.button("Start Torchserve") if start: - last_res()[0]= tsa.start_torchserve(model_store, config_path) + last_res()[0]= tsa.start_torchserve(model_store, config_path, log_location, metrics_location) rerun() stop = st.sidebar.button("Stop Torchserve") @@ -104,7 +115,7 @@ def get_model_store(): p = st.checkbox("or use another path") if p: mar_path = placeholder.text_input("Input mar file path*") - model_name = st.text_input(label="Model name *") + model_name = st.text_input(label="Model name (overrides predefined)") col1, col2 = st.beta_columns(2) batch_size = col1.number_input(label="batch_size", value=0, min_value=0, step=1) max_batch_delay = col2.number_input(label="max_batch_delay", value=0, min_value=0, step=100) @@ -114,21 +125,26 @@ def get_model_store(): runtime = col2.text_input(label="runtime") proceed = st.button("Register") - if proceed and model_name and mar_path != default_key: - st.write(f"Registering Model...{mar_path} as {model_name}") - res = tsa.register_model( - M_API, - mar_path, - model_name, - handler=handler, - runtime=runtime, - batch_size=batch_size, - max_batch_delay=max_batch_delay, - initial_workers=initial_workers, - response_timeout=response_timeout, - ) - last_res()[0] = res - rerun() + if proceed: + if mar_path != default_key: + st.write(f"Registering Model...{mar_path}") + res = tsa.register_model( + M_API, + mar_path, + model_name, + handler=handler, + runtime=runtime, + batch_size=batch_size, + max_batch_delay=max_batch_delay, + initial_workers=initial_workers, + response_timeout=response_timeout, + ) + last_res()[0] = res + rerun() + else: + st.write(":octagonal_sign: Fill the required fileds!") + + with st.beta_expander(label="Remove a model", expanded=False): @@ -141,11 +157,14 @@ def get_model_store(): versions = [m["modelVersion"] for m in versions] version = st.selectbox("Choose version to remove", [default_key] + versions, index=0) proceed = st.button("Remove") - if proceed and model_name != default_key and version != default_key: - res = tsa.delete_model(M_API, model_name, version) - last_res()[0] = res - rerun() - + if proceed: + if model_name != default_key and version != default_key: + res = tsa.delete_model(M_API, model_name, version) + last_res()[0] = res + rerun() + else: + st.write(":octagonal_sign: Pick a model & version!") + with st.beta_expander(label="Get model details", expanded=False): st.header("Get model details")