From c007322718f1ef9a901e839573f14b0082bf3d6a Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhukicks@gmail.com>
Date: Fri, 7 Jun 2024 01:15:58 +0530
Subject: [PATCH] Added execution environment tabs

---
 docs/benchmarks/medical_imaging/3d-unet.md |   3 +
 main.py                                    | 102 +++++++++++++--------
 2 files changed, 66 insertions(+), 39 deletions(-)
diff --git a/docs/benchmarks/medical_imaging/3d-unet.md b/docs/benchmarks/medical_imaging/3d-unet.md
index dbe233993..252ba3d07 100644
--- a/docs/benchmarks/medical_imaging/3d-unet.md
+++ b/docs/benchmarks/medical_imaging/3d-unet.md
@@ -5,6 +5,9 @@
 === "MLCommons-Python"
     ### MLPerf Reference Implementation in Python
 
+    3d-unet-99    
+{{ mlperf_inference_implementation_readme (4, "3d-unet-99", "reference") }}
+
     3d-unet-99.9    
 {{ mlperf_inference_implementation_readme (4, "3d-unet-99.9", "reference") }}
 
diff --git a/main.py b/main.py
index 785bc7e80..6fe136880 100644
--- a/main.py
+++ b/main.py
@@ -11,6 +11,7 @@ def mlperf_inference_implementation_readme(spaces, model, implementation):
 
         content=""
         scenarios = []
+        execution_envs = ["Docker","Native"]
 
         if implementation == "reference":
             devices = [ "CPU", "CUDA", "ROCm" ]
@@ -24,7 +25,7 @@ def mlperf_inference_implementation_readme(spaces, model, implementation):
                  frameworks = [ "Pytorch" ]
 
         elif implementation == "nvidia":
-            if model in [ "sdxl", "llama2-99", "llama2-99.9" ]:
+            if model in [ "sdxl", "llama2-70b-99", "llama2-70b-99.9" ]:
                  return pre_space+"    WIP"
             devices = [ "CUDA" ]
             frameworks = [ "TensorRT" ]
@@ -64,10 +65,12 @@ def mlperf_inference_implementation_readme(spaces, model, implementation):
         for category in categories:
             if category == "Edge" and not scenarios:
                  scenarios = [ "Offline", "SingleStream" ]
-            if model.lower() in [ "resnet50", "retinanet" ]:
+            if model.lower() in [ "resnet50", "retinanet" ] and not "MultiStream" in scenarios:#MultiStream was duplicating
                  scenarios.append("MultiStream")
             elif category == "Datacenter" and not scenarios:
                  scenarios = [ "Offline", "Server" ] 
+            if "99.9" in model:     # Directly refer the commands of <model>-99
+                 return f"\n{pre_space}Follow the same procedure described in {model.split('.')[0]}. Change the `--model` tag value to `{model.lower()}`."
 
             content += f"{pre_space}=== \"{category.lower()}\"\n\n"
 
@@ -87,47 +90,60 @@ def mlperf_inference_implementation_readme(spaces, model, implementation):
                         if device.lower() != "cpu":
                              continue
                     cur_space2 = cur_space1 + "    "
+                    cur_space3 = cur_space2 + "    "
+                    cur_space4 = cur_space3 + "    "
+                    
                     content += f"{cur_space1}=== \"{device}\"\n"
                     content += f"{cur_space2}###### {device} device\n\n"
 
                     if "99.9" not in model: #not showing docker command as it is already done for the 99% variant
-                        content += f"{cur_space2}###### Docker Setup Command\n\n"
-
-                        docker_info = get_docker_info(spaces+12, model, implementation, device)
-                        content += docker_info
-
-                        test_query_count=get_test_query_count(model, implementation, device)
-
-                        content += mlperf_inference_run_command(spaces+12, model, implementation, framework.lower(), category.lower(), "Offline", device.lower(), "test", test_query_count, True)
-                        content += f"{cur_space2}The above command should get you to an interactive shell inside the docker container and do a quick test run for the Offline scenario. Once inside the docker container please do the below commands to do the accuracy + performance runs for each scenario.\n\n"
-                        content += f"{cur_space2}<details>\n"
-                        content += f"{cur_space2}<summary> Please click here to see more options for the docker launch </summary>\n\n"
-                        content += f"{cur_space2}* `--docker_cm_repo <Custom CM repo URL>`: to use a custom fork of cm4mlops repository inside the docker image\n\n"
-                        content += f"{cur_space2}* `--docker_cache=no`: to not use docker cache during the image build\n"
-                    else:
-                        content += f"{cur_space2}Use the same docker container as for the {model.replace('99.9', '99')} model.\n\n"
-
-                    if device.lower() not in [ "cuda" ]:
-                        content += f"{cur_space2}* `--docker_os=ubuntu`: ubuntu and rhel are supported. \n"
-                        content += f"{cur_space2}* `--docker_os_version=20.04`: [20.04, 22.04] are supported for Ubuntu and [8, 9] for RHEL\n"
-
-                    content += f"{cur_space2}</details>\n"
-                    run_suffix = ""
-                    run_suffix += f"\n{cur_space2}    ###### Run Options\n\n"
-                    run_suffix += f"{cur_space2}     * Use `--division=closed` to do a closed division submission which includes compliance runs\n\n"
-                    run_suffix += f"{cur_space2}     * Use `--rerun` to do a rerun even when a valid run exists\n\n"
-
-                    for scenario in scenarios:
-                        cur_space3 = cur_space2 + "    "
-                        content += f"{cur_space2}=== \"{scenario}\"\n{cur_space3}####### {scenario}\n"
-                        run_cmd = mlperf_inference_run_command(spaces+16, model, implementation, framework.lower(), category.lower(), scenario, device.lower(), "valid")
-                        content += run_cmd
-                        content += run_suffix
-
-                    content += f"{cur_space2}=== \"All Scenarios\"\n{cur_space3}####### All Scenarios\n"
-                    run_cmd = mlperf_inference_run_command(spaces+16, model, implementation, framework.lower(), category.lower(), "All Scenarios", device.lower(), "valid")
-                    content += run_cmd
-                    content += run_suffix
+                        # to select the execution environments(currently Docker and Native)
+                        for execution_env in execution_envs:
+                            if (device == "ROCm" or implementation == "qualcomm") and execution_env == "Docker":
+                                continue  # docker not currently supported for Qualcomm implementation and ROCm device
+                            if implementation == "nvidia" and execution_env == "Native":
+                                continue  # Nvidia implementation only supports execution through docker
+                            content += f"{cur_space2}=== \"{execution_env}\"\n"
+                            content += f"{cur_space3}###### {execution_env} Environment\n\n"
+                            test_query_count=get_test_query_count(model, implementation, device)
+                            if execution_env == "Native": # Native implementation steps through virtual environment
+                                content += f"{cur_space3}##### Setup a virtual environment for Python\n"
+                                content += get_venv_command(spaces+16)
+                                content += f"{cur_space3}##### Execute the CM command\n"
+                                content += mlperf_inference_run_command(spaces+16, model, implementation, framework.lower(), category.lower(), "Offline", device.lower(), "test", test_query_count, True).replace("--docker ","")
+                                content += f"{cur_space3}The above command should do a test run of Offline scenario and record the estimated offline_target_qps.\n\n"
+                            else: # Docker implementation steps
+                                docker_info = get_docker_info(spaces+16, model, implementation, device)
+                                content += docker_info
+                                content += mlperf_inference_run_command(spaces+16, model, implementation, framework.lower(), category.lower(), "Offline", device.lower(), "test", test_query_count, True)
+                                content += f"{cur_space3}The above command should get you to an interactive shell inside the docker container and do a quick test run for the Offline scenario. Once inside the docker container please do the below commands to do the accuracy + performance runs for each scenario.\n\n"
+                                content += f"{cur_space3}<details>\n"
+                                content += f"{cur_space3}<summary> Please click here to see more options for the docker launch </summary>\n\n"
+                                content += f"{cur_space3}* `--docker_cm_repo <Custom CM repo URL>`: to use a custom fork of cm4mlops repository inside the docker image\n\n"
+                                content += f"{cur_space3}* `--docker_cache=no`: to not use docker cache during the image build\n"
+
+                                if device.lower() not in [ "cuda" ]:
+                                    content += f"{cur_space3}* `--docker_os=ubuntu`: ubuntu and rhel are supported. \n"
+                                    content += f"{cur_space3}* `--docker_os_version=20.04`: [20.04, 22.04] are supported for Ubuntu and [8, 9] for RHEL\n"
+
+                                content += f"{cur_space3}</details>\n"
+                            run_suffix = ""
+                            run_suffix += f"\n{cur_space3}    ###### Run Options\n\n"
+                            run_suffix += f"{cur_space3}     * Use `--division=closed` to do a closed division submission which includes compliance runs\n\n"
+                            run_suffix += f"{cur_space3}     * Use `--rerun` to do a rerun even when a valid run exists\n\n"  
+
+                            for scenario in scenarios:
+                                content += f"{cur_space3}=== \"{scenario}\"\n{cur_space4}####### {scenario}\n\n"
+                                run_cmd = mlperf_inference_run_command(spaces+20, model, implementation, framework.lower(), category.lower(), scenario, device.lower(), "valid")
+                                content += run_cmd
+                                content += run_suffix
+                      
+                            content += f"{cur_space3}=== \"All Scenarios\"\n{cur_space4}####### All Scenarios\n\n"
+                            run_cmd = mlperf_inference_run_command(spaces+20, model, implementation, framework.lower(), category.lower(), "All Scenarios", device.lower(), "valid")
+                            content += run_cmd
+                            content += run_suffix
+
+                    
 
         readme_prefix = get_readme_prefix(spaces, model, implementation)
 
@@ -158,6 +174,14 @@ def get_readme_prefix(spaces, model, implementation):
         #pre_space += "  "
 
         return readme_prefix
+    
+    def get_venv_command(spaces):
+      pre_space = " "*spaces
+      return f"""\n
+{pre_space}```bash
+{pre_space}cm run script --tags=\"install python-venv\" --name=mlperf
+{pre_space}export CM_SCRIPT_EXTRA_CMD=\"--adr.python.name=mlperf\"
+{pre_space}```\n"""   
 
     def get_docker_info(spaces, model, implementation, device):
         info = ""