From 42e25e4b226789c8424626e41e3c9c03815e2686 Mon Sep 17 00:00:00 2001
From: derekk-nm <derek@neuralmagic.com>
Date: Wed, 3 Jul 2024 19:19:53 +0000
Subject: [PATCH 01/18] Code coverage JSON and summary

collects code coverage report as JSON to upload to build artifacts, then parses the file to generate a markdown table for presentation in the GitHub summary.

Removes

Temporarily limits test cases and platforms in nm-remote-push, and skips benchmark, etc in nm-build-test to make testing go faster.
---
 .../actions/nm-code-coverage-md/action.yml    |  14 ++
 .github/actions/nm-summary-test/action.yml    |   6 +
 .github/scripts/coverage_report_breakdown.py  | 158 ++++++++++++++++++
 .github/scripts/run-tests                     |   2 +-
 .github/workflows/nm-build-test.yml           |  98 +++++------
 .github/workflows/nm-remote-push.yml          |   9 +-
 .github/workflows/nm-test.yml                 |  16 ++
 .../tests/test_skip_env_vars/smoke.txt        |  16 +-
 8 files changed, 257 insertions(+), 62 deletions(-)
 create mode 100644 .github/actions/nm-code-coverage-md/action.yml
 create mode 100644 .github/scripts/coverage_report_breakdown.py

diff --git a/.github/actions/nm-code-coverage-md/action.yml b/.github/actions/nm-code-coverage-md/action.yml
new file mode 100644
index 0000000000000..92bdab67ed904
--- /dev/null
+++ b/.github/actions/nm-code-coverage-md/action.yml
@@ -0,0 +1,14 @@
+name: run coverage_report_breakdown
+description: 'run coverage_report_breakdown to generate markdown showing test coverage breakdown'
+inputs:
+  coverage_json:
+    description: 'file containing coverage report in JSON format'
+    required: true
+runs:
+  using: composite
+  steps:
+    - id: coverage_report_breakdown
+      run: |
+        COVERAGE_MD = `python3 ./.github/scripts/coverage_report_breakdown.py --coverage_json_file`
+        echo "markdown=${COVERAGE_MD}" >> $GITHUB_OUTPUT
+      shell: bash
diff --git a/.github/actions/nm-summary-test/action.yml b/.github/actions/nm-summary-test/action.yml
index 9da08953dfc38..5b3e8e071c413 100644
--- a/.github/actions/nm-summary-test/action.yml
+++ b/.github/actions/nm-summary-test/action.yml
@@ -22,6 +22,9 @@ inputs:
   test_status:
     description: 'status from test step'
     required: true
+  test_coverage_md:
+    description: 'test coverage summary markdown for this particular run'
+    required: false
 runs:
   using: composite
   steps:
@@ -42,4 +45,7 @@ runs:
       echo "| whl: | ${{ inputs.whl }} |" >> $GITHUB_STEP_SUMMARY
       echo "| magic_wand: | ${{ inputs.magic_wand }} |" >> $GITHUB_STEP_SUMMARY
       echo "| test: | ${TEST_EMOJI} |" >> $GITHUB_STEP_SUMMARY
+      echo "" >> $GITHUB_STEP_SUMMARY
+      echo "Test Coverage" >> $GITHUB_STEP_SUMMARY
+      echo "${{ inputs.test_coverage_md }}" >> $GITHUB_STEP_SUMMARY
     shell: bash
diff --git a/.github/scripts/coverage_report_breakdown.py b/.github/scripts/coverage_report_breakdown.py
new file mode 100644
index 0000000000000..2b2e4d584f947
--- /dev/null
+++ b/.github/scripts/coverage_report_breakdown.py
@@ -0,0 +1,158 @@
+# this script parses the provided coverage JSON file to report
+# the results broken down into categories of interest.
+import argparse
+from collections import UserDict
+from pathlib import Path
+from typing import Optional
+
+import pandas as pd
+
+
+class CoverageMetrics(UserDict):
+    """
+    encapsulates code coverage metrics
+    """
+    def __init__(self, user_dict):
+        super().__init__(user_dict)
+        if "percent_covered_display" in self.data:
+            del self.data["percent_covered_display"]
+
+
+class CodeCoverage:
+    """
+    reads and reports on code coverage data as generated by the coverage tool
+    """
+    def __init__(self, file_path: Path):
+        self.format: [int, None] = None
+        self.version: [str, None] = None
+        self.timestamp: [str, None] = None
+        self.show_contexts: [bool, None] = None
+        self.branch_coverage: [bool, None] = None
+        self.overall_metrics: [CoverageMetrics, None] = None
+        self.tests: [pd.Series, None] = None
+        self.source: [pd.Series, None] = None
+
+        if file_path.suffix == ".json":
+            if not file_path.exists():
+                raise ValueError(f"{file_path} not found")
+            self._from_json(file_path)
+        else:
+            raise ValueError("only coverage json reports are supported")
+
+    def _from_json(self, json_file_path: Path):
+        """
+        loads the code coverage data from a JSON report generated with
+        `coverage json`
+        :param json_file_path: path to the file to load
+        """
+        coverage_df = pd.read_json(json_file_path, orient="records")
+        self.format = coverage_df["meta"]["format"]
+        self.version = coverage_df["meta"]["version"]
+        self.timestamp = coverage_df["meta"]["timestamp"]
+        self.show_contexts = coverage_df["meta"]["show_contexts"]
+        self.branch_coverage = coverage_df["meta"]["branch_coverage"]
+        self.overall_metrics = CoverageMetrics(coverage_df["totals"].dropna().to_dict())
+
+        # segment the list of files by test cases and source code
+        files_df = coverage_df.loc[:, ['files']].dropna()
+        self.tests = files_df.iloc[files_df.index.str.startswith("tests/")]
+        self.source = files_df[~files_df.index.isin(self.tests.index)]
+
+        # add a column to the list of source files to facilitate grouping
+        # metrics by top level directories under vllm
+        def get_sub_dir(file_path):
+            file_parts = Path(file_path).parts
+            subdir = file_parts[file_parts.index("vllm") + 1]
+            if subdir == Path(file_path).name:
+                # we're at the root of the vllm dir, so leave subdir empty
+                subdir = ""
+            return subdir
+
+        # temporarily move the index to a "filepath" column
+        self.source.reset_index(names="filepath", inplace=True)
+        # extract the subdirectory under vllm from filepath to the sub_dir column
+        self.source.loc[:, "sub_dir"] = self.source.loc[:, "filepath"].apply(get_sub_dir)
+        # make the filepath column the index again
+        self.source.set_index("filepath", inplace=True)
+
+    @staticmethod
+    def _calculate_metrics(coverage_data: pd.Series) -> CoverageMetrics:
+        """
+        common method to calculate metrics
+        """
+        metrics_dict = {}
+        for metric in ["covered_lines", "num_statements", "missing_lines", "excluded_lines"]:
+            metrics_dict[metric] = sum(d[0]["summary"][metric] for d in coverage_data)
+        metrics_dict["percent_covered"] = metrics_dict["covered_lines"] / metrics_dict["num_statements"] * 100
+        return CoverageMetrics(metrics_dict)
+
+    def tests_metrics(self) -> CoverageMetrics:
+        """
+        creates summary metrics for all tests
+        """
+        return self._calculate_metrics(self.tests.values)
+
+    def source_metrics(self, sub_dir: Optional[str] = None) -> CoverageMetrics:
+        """
+        creates summary metrics for the requested vllm subdirectory,
+        or for the reported vllm source if a subdirectory is not specified.
+        sub_dir = "" will report for files directly under vllm
+        """
+        data = self.source
+        if sub_dir is not None:
+            data = self.source[self.source["sub_dir"] == sub_dir]
+
+        return self._calculate_metrics(data.values)
+
+    def to_github_markdown(self) -> str:
+        """
+        returns a string in the form of github compatible markdown with top
+        level and drill down metrics.
+        """
+        # make a dataframe with top level metric summary info
+        overall_metrics = self.overall_metrics
+        overall_metrics["Collection"] = "Overall"
+        test_metrics = self.tests_metrics()
+        test_metrics["Collection"] = "Test Code"
+        source_metrics = self.source_metrics()
+        source_metrics["Collection"] = "Source Code"
+        summary_df = pd.DataFrame(
+            [overall_metrics, test_metrics, source_metrics]
+        )
+        # make the percent_covered value compatible with the string "%" formatting
+        summary_df["percent_covered"] = summary_df["percent_covered"] / 100
+
+        # compose a set of the subdirectory breakdown summary info
+        breakdown_list = []
+        for sub_dir in sorted(cc.source["sub_dir"].unique()):
+            sub_dir_metrics = cc.source_metrics(sub_dir)
+            if sub_dir == "":
+                label = "vllm 'root'"
+            else:
+                label = sub_dir
+            sub_dir_metrics["Collection"] = label
+            breakdown_list.append(sub_dir_metrics)
+        breakdown_df = pd.DataFrame(breakdown_list)
+        # make the percent_covered value compatible with the string "%" formatting
+        breakdown_df["percent_covered"] = breakdown_df["percent_covered"] / 100
+
+        # join the top level and breakdown data with separator rows between them
+        # add a separator row and subtitle row
+        empty_row_df = pd.Series(pd.NA, index=summary_df.columns).to_frame().transpose()
+        header_row_df = empty_row_df.copy()
+        header_row_df["Collection"] = "vllm Subdirs"
+        summary_df = pd.concat([summary_df, empty_row_df, header_row_df, breakdown_df], ignore_index=True)
+        # clean up the `nan` values for display purposes
+        summary_df = summary_df.astype(str)
+        summary_df.replace({"nan": None}, inplace=True)
+
+        return summary_df.to_markdown(index=False, tablefmt="github", missingval="", floatfmt=(".0f", ".0f", ".0f", ".0f", ".0f", ".1%"))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("coverage_json_file", type=str, help="file path to coverage JSON output")
+    args = parser.parse_args()
+    cc = CodeCoverage(Path(args.coverage_json_file))
+
+    print(cc.to_github_markdown())
diff --git a/.github/scripts/run-tests b/.github/scripts/run-tests
index e2e12772d0816..2e0742e0350a1 100755
--- a/.github/scripts/run-tests
+++ b/.github/scripts/run-tests
@@ -69,7 +69,7 @@ done
 
 # run selected tests
 SUCCESS=0
-CC_PYTEST_FLAGS="--cov=${SRC_DIR} --cov=${TEST_DIR} --cov-report=html:cc-vllm-html --cov-append"
+CC_PYTEST_FLAGS="--cov=${SRC_DIR} --cov=${TEST_DIR} --cov-report=html:cc-vllm-html --cov-report=json:cc-vllm.json --cov-append"
 for TEST in "${TESTS_FOUND[@]}"
 do
     LOCAL_SUCCESS=0
diff --git a/.github/workflows/nm-build-test.yml b/.github/workflows/nm-build-test.yml
index 9a5043308a067..ae290d4566a37 100644
--- a/.github/workflows/nm-build-test.yml
+++ b/.github/workflows/nm-build-test.yml
@@ -133,52 +133,52 @@ jobs:
             test_skip_env_vars: ${{ matrix.test_config.test }}
         secrets: inherit
 
-    BENCHMARK:
-        needs: [BUILD]
-        if: success()
-        uses: ./.github/workflows/nm-benchmark.yml
-        with:
-            label: ${{ inputs.benchmark_label }}
-            benchmark_config_list_file: ${{ inputs.benchmark_config_list_file }}
-            timeout: ${{ inputs.benchmark_timeout }}
-            gitref: ${{ github.ref }}
-            python: ${{ inputs.python }}
-            whl: ${{ needs.BUILD.outputs.whl }}
-            # Always push if it is a scheduled job
-            push_benchmark_results_to_gh_pages: "${{ github.event_name == 'schedule' || inputs.push_benchmark_results_to_gh_pages }}"
-        secrets: inherit
-
-    LM-EVAL:
-      needs: [BUILD]
-      uses: ./.github/workflows/nm-lm-eval.yml
-      with:
-        label: ${{ inputs.lm_eval_label }}
-        timeout: ${{ inputs.lm_eval_timeout }}
-        gitref: ${{ inputs.gitref }}
-        python: ${{ inputs.python }}
-        whl: ${{ needs.BUILD.outputs.whl }}
-        lm_eval_configuration: ${{ inputs.lm_eval_configuration }}
-      secrets: inherit
-
-    # uploading is only available when using GCP autoscaling group
-    UPLOAD:
-        needs: [TEST, BENCHMARK, LM-EVAL]
-        if: ${{ inputs.push_to_pypi }}
-        uses: ./.github/workflows/nm-upload-assets-to-gcp.yml
-        with:
-            label: gcp-k8s-util
-            timeout: ${{ inputs.build_timeout }}
-            gitref: ${{ github.ref }}
-        secrets: inherit
-
-    # update docker
-    DOCKER:
-        needs: [BUILD, UPLOAD]
-        if: ${{ inputs.push_to_pypi }}
-        uses: ./.github/workflows/publish-docker.yml
-        with:
-            push_to_repository: ${{ inputs.push_to_pypi }}
-            gitref: ${{ inputs.gitref }}
-            wf_category: ${{ inputs.wf_category }}
-            wheel: ${{ needs.BUILD.outputs.whl }}
-        secrets: inherit
+#    BENCHMARK:
+#        needs: [BUILD]
+#        if: success()
+#        uses: ./.github/workflows/nm-benchmark.yml
+#        with:
+#            label: ${{ inputs.benchmark_label }}
+#            benchmark_config_list_file: ${{ inputs.benchmark_config_list_file }}
+#            timeout: ${{ inputs.benchmark_timeout }}
+#            gitref: ${{ github.ref }}
+#            python: ${{ inputs.python }}
+#            whl: ${{ needs.BUILD.outputs.whl }}
+#            # Always push if it is a scheduled job
+#            push_benchmark_results_to_gh_pages: "${{ github.event_name == 'schedule' || inputs.push_benchmark_results_to_gh_pages }}"
+#        secrets: inherit
+#
+#    LM-EVAL:
+#      needs: [BUILD]
+#      uses: ./.github/workflows/nm-lm-eval.yml
+#      with:
+#        label: ${{ inputs.lm_eval_label }}
+#        timeout: ${{ inputs.lm_eval_timeout }}
+#        gitref: ${{ inputs.gitref }}
+#        python: ${{ inputs.python }}
+#        whl: ${{ needs.BUILD.outputs.whl }}
+#        lm_eval_configuration: ${{ inputs.lm_eval_configuration }}
+#      secrets: inherit
+#
+#    # uploading is only available when using GCP autoscaling group
+#    UPLOAD:
+#        needs: [TEST, BENCHMARK, LM-EVAL]
+#        if: ${{ inputs.push_to_pypi }}
+#        uses: ./.github/workflows/nm-upload-assets-to-gcp.yml
+#        with:
+#            label: gcp-k8s-util
+#            timeout: ${{ inputs.build_timeout }}
+#            gitref: ${{ github.ref }}
+#        secrets: inherit
+#
+#    # update docker
+#    DOCKER:
+#        needs: [BUILD, UPLOAD]
+#        if: ${{ inputs.push_to_pypi }}
+#        uses: ./.github/workflows/publish-docker.yml
+#        with:
+#            push_to_repository: ${{ inputs.push_to_pypi }}
+#            gitref: ${{ inputs.gitref }}
+#            wf_category: ${{ inputs.wf_category }}
+#            wheel: ${{ needs.BUILD.outputs.whl }}
+#        secrets: inherit
diff --git a/.github/workflows/nm-remote-push.yml b/.github/workflows/nm-remote-push.yml
index a44274d9e8a11..33c65f42bcf52 100644
--- a/.github/workflows/nm-remote-push.yml
+++ b/.github/workflows/nm-remote-push.yml
@@ -19,10 +19,11 @@ jobs:
             gitref: ${{ github.ref }}
             push_to_pypi: false
 
-            test_configs: '[{"python":"3.8.17","label":"gcp-k8s-l4-solo","test":"neuralmagic/tests/test_skip_env_vars/smoke.txt"},
-                            {"python":"3.9.17","label":"gcp-k8s-l4-solo","test":"neuralmagic/tests/test_skip_env_vars/smoke.txt"},
-                            {"python":"3.10.12","label":"gcp-k8s-l4-solo","test":"neuralmagic/tests/test_skip_env_vars/smoke.txt"},
-                            {"python":"3.11.4","label":"gcp-k8s-l4-solo","test":"neuralmagic/tests/test_skip_env_vars/smoke.txt"}]'
+            test_configs: '[{"python":"3.8.17","label":"gcp-k8s-l4-solo","test":"neuralmagic/tests/test_skip_env_vars/smoke.txt"},]'
+#          '[{"python":"3.8.17","label":"gcp-k8s-l4-solo","test":"neuralmagic/tests/test_skip_env_vars/smoke.txt"},
+#                            {"python":"3.9.17","label":"gcp-k8s-l4-solo","test":"neuralmagic/tests/test_skip_env_vars/smoke.txt"},
+#                            {"python":"3.10.12","label":"gcp-k8s-l4-solo","test":"neuralmagic/tests/test_skip_env_vars/smoke.txt"},
+#                            {"python":"3.11.4","label":"gcp-k8s-l4-solo","test":"neuralmagic/tests/test_skip_env_vars/smoke.txt"}]'
             test_timeout: 480
 
             benchmark_label: gcp-k8s-l4-solo
diff --git a/.github/workflows/nm-test.yml b/.github/workflows/nm-test.yml
index 01d6fa96730d3..a21f3fa1949b3 100644
--- a/.github/workflows/nm-test.yml
+++ b/.github/workflows/nm-test.yml
@@ -151,6 +151,14 @@ jobs:
                 path: cc-vllm-html
                 retention-days: 15
 
+            - name: upload code coverage json
+              uses: actions/upload-artifact@v4
+              if: success() || failure()
+              with:
+                name: cc-vllm-json-${{ inputs.test_label }}-${{ inputs.python }}
+                path: cc-vllm.json
+                retention-days: 5
+
             - name: report test results
               id: report_test
               uses: ./.github/actions/nm-testmo-run-submit-thread/
@@ -162,6 +170,13 @@ jobs:
                 results: test-results
                 step_status: ${{ steps.test.outputs.status }}
 
+            - name: collect test coverage breakdown
+              id: coverage_breakdown
+              uses: ./.github/actions/nm-code-coverage-md/
+              if: success () | failure()
+              with:
+                coverage_json: cc-vllm.json
+
             - name: summary
               uses: ./.github/actions/nm-summary-test/
               if: success() || failure()
@@ -173,6 +188,7 @@ jobs:
                 whl: ${{ steps.test.outputs.whl }}
                 magic_wand: ${{ steps.test.outputs.magic_wand }}
                 test_status: ${{ steps.test.outputs.status }}
+                test_coverage_md: ${{ steps.coverage_breakdown.markdown }}
 
             - name: complete testmo run
               uses: ./.github/actions/nm-testmo-run-complete/
diff --git a/neuralmagic/tests/test_skip_env_vars/smoke.txt b/neuralmagic/tests/test_skip_env_vars/smoke.txt
index e901455dfd5be..2a30fa64f463b 100644
--- a/neuralmagic/tests/test_skip_env_vars/smoke.txt
+++ b/neuralmagic/tests/test_skip_env_vars/smoke.txt
@@ -1,5 +1,5 @@
 TEST_ACCURACY=DISABLE
-TEST_ASYNC_ENGINE=ENABLE
+TEST_ASYNC_ENGINE=DISABLE
 TEST_BASIC_CORRECTNESS=DISABLE
 TEST_CORE=ENABLE
 TEST_DISTRIBUTED=DISABLE
@@ -7,14 +7,14 @@ TEST_ENGINE=ENABLE
 TEST_ENTRYPOINTS=DISABLE
 TEST_KERNELS=DISABLE
 TEST_LORA=DISABLE
-TEST_METRICS=ENABLE
+TEST_METRICS=DISABLE
 TEST_MODELS=DISABLE
-TEST_MODELS_CORE=ENABLE
-TEST_PREFIX_CACHING=ENABLE
-TEST_QUANTIZATION=ENABLE
+TEST_MODELS_CORE=DISABLE
+TEST_PREFIX_CACHING=DISABLE
+TEST_QUANTIZATION=DISABLE
 TEST_SAMPLERS=DISABLE
 TEST_SPEC_DECODE=DISABLE
 TEST_TENSORIZER_LOADER=DISABLE
-TEST_TOKENIZATION=ENABLE
-TEST_TRACING=ENABLE
-TEST_WORKER=ENABLE
+TEST_TOKENIZATION=DISABLE
+TEST_TRACING=DISABLE
+TEST_WORKER=DISABLE

From 6a3e374855906530ecd4291b9763a7d144fd3e40 Mon Sep 17 00:00:00 2001
From: derekk-nm <derek@neuralmagic.com>
Date: Wed, 3 Jul 2024 19:24:37 +0000
Subject: [PATCH 02/18] fix syntax

---
 .github/workflows/nm-test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/nm-test.yml b/.github/workflows/nm-test.yml
index a21f3fa1949b3..dfa32368d2ad9 100644
--- a/.github/workflows/nm-test.yml
+++ b/.github/workflows/nm-test.yml
@@ -173,7 +173,7 @@ jobs:
             - name: collect test coverage breakdown
               id: coverage_breakdown
               uses: ./.github/actions/nm-code-coverage-md/
-              if: success () | failure()
+              if: success () || failure()
               with:
                 coverage_json: cc-vllm.json
 

From 10a86fa4d75df570fce9fe32ade75786c3837deb Mon Sep 17 00:00:00 2001
From: derekk-nm <derek@neuralmagic.com>
Date: Fri, 5 Jul 2024 12:09:16 +0000
Subject: [PATCH 03/18] pass in json file, add some omit entries, unskip some
 tests

---
 .github/actions/nm-code-coverage-md/action.yml | 2 +-
 pyproject.toml                                 | 5 +++++
 tests/models/test_mistral.py                   | 2 +-
 tests/models/test_models.py                    | 2 +-
 4 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/.github/actions/nm-code-coverage-md/action.yml b/.github/actions/nm-code-coverage-md/action.yml
index 92bdab67ed904..7ce67c916c902 100644
--- a/.github/actions/nm-code-coverage-md/action.yml
+++ b/.github/actions/nm-code-coverage-md/action.yml
@@ -9,6 +9,6 @@ runs:
   steps:
     - id: coverage_report_breakdown
       run: |
-        COVERAGE_MD = `python3 ./.github/scripts/coverage_report_breakdown.py --coverage_json_file`
+        COVERAGE_MD = `python3 ./.github/scripts/coverage_report_breakdown.py --coverage_json_file ${{ coverage_json }}`
         echo "markdown=${COVERAGE_MD}" >> $GITHUB_OUTPUT
       shell: bash
diff --git a/pyproject.toml b/pyproject.toml
index d1bcf81d3f763..cb26ec8d2f983 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -101,6 +101,7 @@ exclude_also = [
     "if __name__ == .__main__.:",
     "if TYPE_CHECKING:",
     "@(abc\\.)?abstractmethod",
+    "@overload",
     # coverage for these devices is to be ignored until we plan to deploy them
     'engine_config.device_config.device_type == "cpu"',
     'engine_config.device_config.device_type == "neuron"',
@@ -114,6 +115,10 @@ exclude_also = [
     "def _shared_pointers",
     "def np_cache_weights_iterator",
     "def convert_pyslice_to_tensor",
+    "def _shared_pointers(",
+    "def convert_bin_to_safetensor_file(",
+    "def np_cache_weights_iterator(",
+    "def convert_pyslice_to_tensor(",
 ]
 
 [tool.coverage.html]
diff --git a/tests/models/test_mistral.py b/tests/models/test_mistral.py
index 24a0de8d464dd..12a1807030c76 100644
--- a/tests/models/test_mistral.py
+++ b/tests/models/test_mistral.py
@@ -19,7 +19,7 @@
 
 
 # UPSTREAM SYNC: we run OOM on the A10g instances.
-@pytest.mark.skip("Not enough memory in automation testing.")
+# @pytest.mark.skip("Not enough memory in automation testing.")
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("dtype", ["bfloat16"])
 @pytest.mark.parametrize("max_tokens", [64])
diff --git a/tests/models/test_models.py b/tests/models/test_models.py
index d856caec8abdd..df8086024e74b 100644
--- a/tests/models/test_models.py
+++ b/tests/models/test_models.py
@@ -29,7 +29,7 @@
 
 
 # UPSTREAM SYNC: we run OOM on the A10g instances.
-@pytest.mark.skip("Not enough memory in automation testing.")
+# @pytest.mark.skip("Not enough memory in automation testing.")
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("dtype", ["float"])
 @pytest.mark.parametrize("max_tokens", [96])

From 51cc2c0bc49c4b1d36077f1d2d4e6cd10271bd9f Mon Sep 17 00:00:00 2001
From: derekk-nm <derek@neuralmagic.com>
Date: Fri, 5 Jul 2024 13:01:58 +0000
Subject: [PATCH 04/18] fixes

* proper reference to input variable
* install required tabulate package
* fix regex for exclude_also in coverage report
---
 .github/actions/nm-code-coverage-md/action.yml |  4 +++-
 pyproject.toml                                 | 14 +++++++-------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/.github/actions/nm-code-coverage-md/action.yml b/.github/actions/nm-code-coverage-md/action.yml
index 7ce67c916c902..cc0b63292a4ce 100644
--- a/.github/actions/nm-code-coverage-md/action.yml
+++ b/.github/actions/nm-code-coverage-md/action.yml
@@ -9,6 +9,8 @@ runs:
   steps:
     - id: coverage_report_breakdown
       run: |
-        COVERAGE_MD = `python3 ./.github/scripts/coverage_report_breakdown.py --coverage_json_file ${{ coverage_json }}`
+        # the pandas.DataFrame.to_markdown() method requires the `tabulate` package
+        pip3 install tabulate
+        COVERAGE_MD = `python3 ./.github/scripts/coverage_report_breakdown.py --coverage_json_file ${{ inputs.coverage_json }}`
         echo "markdown=${COVERAGE_MD}" >> $GITHUB_OUTPUT
       shell: bash
diff --git a/pyproject.toml b/pyproject.toml
index cb26ec8d2f983..0911b742acfa7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -105,20 +105,20 @@ exclude_also = [
     # coverage for these devices is to be ignored until we plan to deploy them
     'engine_config.device_config.device_type == "cpu"',
     'engine_config.device_config.device_type == "neuron"',
-    "elif is_neuron():",
-    "elif is_cpu():",
+    "elif is_neuron\(\):",
+    "elif is_cpu\(\):",
     # customop.py has some placeholder code not yet executed
     "return self.forward_cuda",
     # ignore some legacy code that we won't support in nm-vllm
-    "enable_hf_transfer()",
+    "enable_hf_transfer\(\)",
     "def convert_bin_to_safetensor_file",
     "def _shared_pointers",
     "def np_cache_weights_iterator",
     "def convert_pyslice_to_tensor",
-    "def _shared_pointers(",
-    "def convert_bin_to_safetensor_file(",
-    "def np_cache_weights_iterator(",
-    "def convert_pyslice_to_tensor(",
+    "def _shared_pointers\(",
+    "def convert_bin_to_safetensor_file\(",
+    "def np_cache_weights_iterator\(",
+    "def convert_pyslice_to_tensor\(",
 ]
 
 [tool.coverage.html]

From 4f7cc0309aa60dd5ea21922d65d34f9e550a8a2d Mon Sep 17 00:00:00 2001
From: derekk-nm <derek@neuralmagic.com>
Date: Fri, 5 Jul 2024 13:21:35 +0000
Subject: [PATCH 05/18] try paren escape again

---
 pyproject.toml | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0911b742acfa7..b9f28b03b2616 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -105,20 +105,20 @@ exclude_also = [
     # coverage for these devices is to be ignored until we plan to deploy them
     'engine_config.device_config.device_type == "cpu"',
     'engine_config.device_config.device_type == "neuron"',
-    "elif is_neuron\(\):",
-    "elif is_cpu\(\):",
+    "elif is_neuron():",
+    "elif is_cpu():",
     # customop.py has some placeholder code not yet executed
     "return self.forward_cuda",
     # ignore some legacy code that we won't support in nm-vllm
-    "enable_hf_transfer\(\)",
+    "enable_hf_transfer()",
     "def convert_bin_to_safetensor_file",
     "def _shared_pointers",
     "def np_cache_weights_iterator",
     "def convert_pyslice_to_tensor",
-    "def _shared_pointers\(",
-    "def convert_bin_to_safetensor_file\(",
-    "def np_cache_weights_iterator\(",
-    "def convert_pyslice_to_tensor\(",
+    "def _shared_pointers",
+    "def convert_bin_to_safetensor_file",
+    "def np_cache_weights_iterator",
+    "def convert_pyslice_to_tensor",
 ]
 
 [tool.coverage.html]

From 3704e8f7b4019bd93bf5f5c3a40c792b5b9aa4c4 Mon Sep 17 00:00:00 2001
From: derekk-nm <derek@neuralmagic.com>
Date: Fri, 5 Jul 2024 13:36:40 +0000
Subject: [PATCH 06/18] use $() instead of backticks

---
 .github/actions/nm-code-coverage-md/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/actions/nm-code-coverage-md/action.yml b/.github/actions/nm-code-coverage-md/action.yml
index cc0b63292a4ce..7a822a0f3ecb3 100644
--- a/.github/actions/nm-code-coverage-md/action.yml
+++ b/.github/actions/nm-code-coverage-md/action.yml
@@ -11,6 +11,6 @@ runs:
       run: |
         # the pandas.DataFrame.to_markdown() method requires the `tabulate` package
         pip3 install tabulate
-        COVERAGE_MD = `python3 ./.github/scripts/coverage_report_breakdown.py --coverage_json_file ${{ inputs.coverage_json }}`
+        COVERAGE_MD = $(python3 ./.github/scripts/coverage_report_breakdown.py --coverage_json_file ${{ inputs.coverage_json }})
         echo "markdown=${COVERAGE_MD}" >> $GITHUB_OUTPUT
       shell: bash

From 6cd7ae46fd083aea6bb06c35456d3f9ace58bb46 Mon Sep 17 00:00:00 2001
From: derekk-nm <derek@neuralmagic.com>
Date: Fri, 5 Jul 2024 15:45:04 +0000
Subject: [PATCH 07/18] fix coverage_report_breakdown usage. reduce test cases
 more.

---
 .github/actions/nm-code-coverage-md/action.yml | 2 +-
 neuralmagic/tests/test_skip_env_vars/smoke.txt | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/actions/nm-code-coverage-md/action.yml b/.github/actions/nm-code-coverage-md/action.yml
index 7a822a0f3ecb3..286e0f3a2454f 100644
--- a/.github/actions/nm-code-coverage-md/action.yml
+++ b/.github/actions/nm-code-coverage-md/action.yml
@@ -11,6 +11,6 @@ runs:
       run: |
         # the pandas.DataFrame.to_markdown() method requires the `tabulate` package
         pip3 install tabulate
-        COVERAGE_MD = $(python3 ./.github/scripts/coverage_report_breakdown.py --coverage_json_file ${{ inputs.coverage_json }})
+        COVERAGE_MD = $(python3 ./.github/scripts/coverage_report_breakdown.py ${{ inputs.coverage_json }})
         echo "markdown=${COVERAGE_MD}" >> $GITHUB_OUTPUT
       shell: bash
diff --git a/neuralmagic/tests/test_skip_env_vars/smoke.txt b/neuralmagic/tests/test_skip_env_vars/smoke.txt
index 2a30fa64f463b..d1252b8c88ea9 100644
--- a/neuralmagic/tests/test_skip_env_vars/smoke.txt
+++ b/neuralmagic/tests/test_skip_env_vars/smoke.txt
@@ -1,13 +1,13 @@
 TEST_ACCURACY=DISABLE
 TEST_ASYNC_ENGINE=DISABLE
 TEST_BASIC_CORRECTNESS=DISABLE
-TEST_CORE=ENABLE
+TEST_CORE=DISABLE
 TEST_DISTRIBUTED=DISABLE
-TEST_ENGINE=ENABLE
+TEST_ENGINE=DISABLE
 TEST_ENTRYPOINTS=DISABLE
 TEST_KERNELS=DISABLE
 TEST_LORA=DISABLE
-TEST_METRICS=DISABLE
+TEST_METRICS=ENABLE
 TEST_MODELS=DISABLE
 TEST_MODELS_CORE=DISABLE
 TEST_PREFIX_CACHING=DISABLE

From 81558fd39f338072c31da42536ed1175d7b822c7 Mon Sep 17 00:00:00 2001
From: derekk-nm <derek@neuralmagic.com>
Date: Fri, 5 Jul 2024 15:47:57 +0000
Subject: [PATCH 08/18] debugging print out

---
 .github/actions/nm-code-coverage-md/action.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/actions/nm-code-coverage-md/action.yml b/.github/actions/nm-code-coverage-md/action.yml
index 286e0f3a2454f..b1f291a258311 100644
--- a/.github/actions/nm-code-coverage-md/action.yml
+++ b/.github/actions/nm-code-coverage-md/action.yml
@@ -12,5 +12,6 @@ runs:
         # the pandas.DataFrame.to_markdown() method requires the `tabulate` package
         pip3 install tabulate
         COVERAGE_MD = $(python3 ./.github/scripts/coverage_report_breakdown.py ${{ inputs.coverage_json }})
+        echo "markdown: ${COVERAGE_MD}"
         echo "markdown=${COVERAGE_MD}" >> $GITHUB_OUTPUT
       shell: bash

From 12cb478572a377f62b0fb009981b096ace48acde Mon Sep 17 00:00:00 2001
From: derekk-nm <derek@neuralmagic.com>
Date: Fri, 5 Jul 2024 17:43:05 +0000
Subject: [PATCH 09/18] fix bash syntax. change tests run.

---
 .github/actions/nm-code-coverage-md/action.yml | 2 +-
 neuralmagic/tests/test_skip_env_vars/smoke.txt | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/actions/nm-code-coverage-md/action.yml b/.github/actions/nm-code-coverage-md/action.yml
index b1f291a258311..36755c4472084 100644
--- a/.github/actions/nm-code-coverage-md/action.yml
+++ b/.github/actions/nm-code-coverage-md/action.yml
@@ -11,7 +11,7 @@ runs:
       run: |
         # the pandas.DataFrame.to_markdown() method requires the `tabulate` package
         pip3 install tabulate
-        COVERAGE_MD = $(python3 ./.github/scripts/coverage_report_breakdown.py ${{ inputs.coverage_json }})
+        COVERAGE_MD=$(python3 ./.github/scripts/coverage_report_breakdown.py ${{ inputs.coverage_json }})
         echo "markdown: ${COVERAGE_MD}"
         echo "markdown=${COVERAGE_MD}" >> $GITHUB_OUTPUT
       shell: bash
diff --git a/neuralmagic/tests/test_skip_env_vars/smoke.txt b/neuralmagic/tests/test_skip_env_vars/smoke.txt
index d1252b8c88ea9..6da4a3538ad88 100644
--- a/neuralmagic/tests/test_skip_env_vars/smoke.txt
+++ b/neuralmagic/tests/test_skip_env_vars/smoke.txt
@@ -7,7 +7,7 @@ TEST_ENGINE=DISABLE
 TEST_ENTRYPOINTS=DISABLE
 TEST_KERNELS=DISABLE
 TEST_LORA=DISABLE
-TEST_METRICS=ENABLE
+TEST_METRICS=DISABLE
 TEST_MODELS=DISABLE
 TEST_MODELS_CORE=DISABLE
 TEST_PREFIX_CACHING=DISABLE
@@ -17,4 +17,4 @@ TEST_SPEC_DECODE=DISABLE
 TEST_TENSORIZER_LOADER=DISABLE
 TEST_TOKENIZATION=DISABLE
 TEST_TRACING=DISABLE
-TEST_WORKER=DISABLE
+TEST_WORKER=ENABLE

From 5e7022d0f611ead8ad3af269367df1e937aa730b Mon Sep 17 00:00:00 2001
From: derekk-nm <derek@neuralmagic.com>
Date: Sun, 7 Jul 2024 17:39:56 +0000
Subject: [PATCH 10/18] properly pass along coverage markdown

---
 .github/actions/nm-code-coverage-md/action.yml | 1 -
 .github/workflows/nm-test.yml                  | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/actions/nm-code-coverage-md/action.yml b/.github/actions/nm-code-coverage-md/action.yml
index 36755c4472084..f99806d146670 100644
--- a/.github/actions/nm-code-coverage-md/action.yml
+++ b/.github/actions/nm-code-coverage-md/action.yml
@@ -12,6 +12,5 @@ runs:
         # the pandas.DataFrame.to_markdown() method requires the `tabulate` package
         pip3 install tabulate
         COVERAGE_MD=$(python3 ./.github/scripts/coverage_report_breakdown.py ${{ inputs.coverage_json }})
-        echo "markdown: ${COVERAGE_MD}"
         echo "markdown=${COVERAGE_MD}" >> $GITHUB_OUTPUT
       shell: bash
diff --git a/.github/workflows/nm-test.yml b/.github/workflows/nm-test.yml
index dfa32368d2ad9..0080ea16aebd0 100644
--- a/.github/workflows/nm-test.yml
+++ b/.github/workflows/nm-test.yml
@@ -188,7 +188,7 @@ jobs:
                 whl: ${{ steps.test.outputs.whl }}
                 magic_wand: ${{ steps.test.outputs.magic_wand }}
                 test_status: ${{ steps.test.outputs.status }}
-                test_coverage_md: ${{ steps.coverage_breakdown.markdown }}
+                test_coverage_md: ${{ steps.coverage_breakdown.outputs.markdown }}
 
             - name: complete testmo run
               uses: ./.github/actions/nm-testmo-run-complete/

From a27b1ee1e46607a9b142ce36d9c52e4f47eb1894 Mon Sep 17 00:00:00 2001
From: derekk-nm <derek@neuralmagic.com>
Date: Sun, 7 Jul 2024 20:00:07 +0000
Subject: [PATCH 11/18] attempt to handle multi-line github output content

---
 .github/actions/nm-code-coverage-md/action.yml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/.github/actions/nm-code-coverage-md/action.yml b/.github/actions/nm-code-coverage-md/action.yml
index f99806d146670..f7f9a3ee67e63 100644
--- a/.github/actions/nm-code-coverage-md/action.yml
+++ b/.github/actions/nm-code-coverage-md/action.yml
@@ -11,6 +11,11 @@ runs:
       run: |
         # the pandas.DataFrame.to_markdown() method requires the `tabulate` package
         pip3 install tabulate
-        COVERAGE_MD=$(python3 ./.github/scripts/coverage_report_breakdown.py ${{ inputs.coverage_json }})
+        # prepare a multiline response to pass to github_output
+        {
+          echo 'COVERAGE_MD<<EOF'
+          $(python3 ./.github/scripts/coverage_report_breakdown.py ${{ inputs.coverage_json }})
+          echo EOF
+        }
         echo "markdown=${COVERAGE_MD}" >> $GITHUB_OUTPUT
       shell: bash

From e6563eed6f53d4477f61100c2bceb03b6cec822a Mon Sep 17 00:00:00 2001
From: derekk-nm <derek@neuralmagic.com>
Date: Mon, 8 Jul 2024 10:50:03 +0000
Subject: [PATCH 12/18] attempt to handle multi-line github output content

temporarily restrict tests to one file for debugging
---
 .github/actions/nm-code-coverage-md/action.yml | 8 +++-----
 .github/scripts/run-tests                      | 2 +-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/.github/actions/nm-code-coverage-md/action.yml b/.github/actions/nm-code-coverage-md/action.yml
index f7f9a3ee67e63..54f54e0007afa 100644
--- a/.github/actions/nm-code-coverage-md/action.yml
+++ b/.github/actions/nm-code-coverage-md/action.yml
@@ -12,10 +12,8 @@ runs:
         # the pandas.DataFrame.to_markdown() method requires the `tabulate` package
         pip3 install tabulate
         # prepare a multiline response to pass to github_output
-        {
-          echo 'COVERAGE_MD<<EOF'
-          $(python3 ./.github/scripts/coverage_report_breakdown.py ${{ inputs.coverage_json }})
-          echo EOF
-        }
+        echo 'COVERAGE_MD<<EOF'
+        $(python3 ./.github/scripts/coverage_report_breakdown.py ${{ inputs.coverage_json }})
+        echo EOF
         echo "markdown=${COVERAGE_MD}" >> $GITHUB_OUTPUT
       shell: bash
diff --git a/.github/scripts/run-tests b/.github/scripts/run-tests
index 2e0742e0350a1..50b68bdc0f8ea 100755
--- a/.github/scripts/run-tests
+++ b/.github/scripts/run-tests
@@ -59,7 +59,7 @@ if [ ! -d "${TEST_DIR}" ]; then
 fi
 
 # find tests
-TESTS_DOT_PY=$(find ${TEST_DIR} -type f -name "test*.py")
+TESTS_DOT_PY=$(find ${TEST_DIR} -type f -name "test_regression.py")
 TESTS_FOUND=(${TESTS_DOT_PY})
 
 echo "found:"

From 9a9d68159551cb116437e46a182d294a84d21cc7 Mon Sep 17 00:00:00 2001
From: derekk-nm <derek@neuralmagic.com>
Date: Mon, 8 Jul 2024 11:48:45 +0000
Subject: [PATCH 13/18] move coverage summary to summary

may be easier than passing the multi-line table from one action to another.
---
 .github/actions/nm-summary-test/action.yml | 15 +++++++++++----
 .github/workflows/nm-test.yml              |  9 +--------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/.github/actions/nm-summary-test/action.yml b/.github/actions/nm-summary-test/action.yml
index 5b3e8e071c413..cfff601d8c140 100644
--- a/.github/actions/nm-summary-test/action.yml
+++ b/.github/actions/nm-summary-test/action.yml
@@ -22,9 +22,9 @@ inputs:
   test_status:
     description: 'status from test step'
     required: true
-  test_coverage_md:
-    description: 'test coverage summary markdown for this particular run'
-    required: false
+  coverage_json:
+    description: 'file containing coverage report in JSON format'
+    required: true
 runs:
   using: composite
   steps:
@@ -47,5 +47,12 @@ runs:
       echo "| test: | ${TEST_EMOJI} |" >> $GITHUB_STEP_SUMMARY
       echo "" >> $GITHUB_STEP_SUMMARY
       echo "Test Coverage" >> $GITHUB_STEP_SUMMARY
-      echo "${{ inputs.test_coverage_md }}" >> $GITHUB_STEP_SUMMARY
+      # the pandas.DataFrame.to_markdown() method requires the `tabulate` package
+      pip3 install tabulate
+      # prepare a multiline response to pass to github_output
+      {
+        echo 'COVERAGE_MD<<EOF'
+        $(python3 ./.github/scripts/coverage_report_breakdown.py ${{ inputs.coverage_json }})
+        echo EOF
+      } >> $GITHUB_STEP_SUMMARY
     shell: bash
diff --git a/.github/workflows/nm-test.yml b/.github/workflows/nm-test.yml
index 0080ea16aebd0..bfa41c80651e0 100644
--- a/.github/workflows/nm-test.yml
+++ b/.github/workflows/nm-test.yml
@@ -170,13 +170,6 @@ jobs:
                 results: test-results
                 step_status: ${{ steps.test.outputs.status }}
 
-            - name: collect test coverage breakdown
-              id: coverage_breakdown
-              uses: ./.github/actions/nm-code-coverage-md/
-              if: success () || failure()
-              with:
-                coverage_json: cc-vllm.json
-
             - name: summary
               uses: ./.github/actions/nm-summary-test/
               if: success() || failure()
@@ -188,7 +181,7 @@ jobs:
                 whl: ${{ steps.test.outputs.whl }}
                 magic_wand: ${{ steps.test.outputs.magic_wand }}
                 test_status: ${{ steps.test.outputs.status }}
-                test_coverage_md: ${{ steps.coverage_breakdown.outputs.markdown }}
+                coverage_json: cc-vllm.json
 
             - name: complete testmo run
               uses: ./.github/actions/nm-testmo-run-complete/

From b27c1cf5df5a9fd2220c32b748fc538894a418f4 Mon Sep 17 00:00:00 2001
From: derekk-nm <derek@neuralmagic.com>
Date: Mon, 8 Jul 2024 13:00:04 +0000
Subject: [PATCH 14/18] use nm-code-coverage-md action again

this time with correction.
---
 .github/actions/nm-code-coverage-md/action.yml |  2 +-
 .github/actions/nm-summary-test/action.yml     | 15 ++++-----------
 .github/workflows/nm-test.yml                  |  9 ++++++++-
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/.github/actions/nm-code-coverage-md/action.yml b/.github/actions/nm-code-coverage-md/action.yml
index 54f54e0007afa..5d771dacd577c 100644
--- a/.github/actions/nm-code-coverage-md/action.yml
+++ b/.github/actions/nm-code-coverage-md/action.yml
@@ -13,7 +13,7 @@ runs:
         pip3 install tabulate
         # prepare a multiline response to pass to github_output
         echo 'COVERAGE_MD<<EOF'
-        $(python3 ./.github/scripts/coverage_report_breakdown.py ${{ inputs.coverage_json }})
+        python3 ./.github/scripts/coverage_report_breakdown.py ${{ inputs.coverage_json }}
         echo EOF
         echo "markdown=${COVERAGE_MD}" >> $GITHUB_OUTPUT
       shell: bash
diff --git a/.github/actions/nm-summary-test/action.yml b/.github/actions/nm-summary-test/action.yml
index cfff601d8c140..5b3e8e071c413 100644
--- a/.github/actions/nm-summary-test/action.yml
+++ b/.github/actions/nm-summary-test/action.yml
@@ -22,9 +22,9 @@ inputs:
   test_status:
     description: 'status from test step'
     required: true
-  coverage_json:
-    description: 'file containing coverage report in JSON format'
-    required: true
+  test_coverage_md:
+    description: 'test coverage summary markdown for this particular run'
+    required: false
 runs:
   using: composite
   steps:
@@ -47,12 +47,5 @@ runs:
       echo "| test: | ${TEST_EMOJI} |" >> $GITHUB_STEP_SUMMARY
       echo "" >> $GITHUB_STEP_SUMMARY
       echo "Test Coverage" >> $GITHUB_STEP_SUMMARY
-      # the pandas.DataFrame.to_markdown() method requires the `tabulate` package
-      pip3 install tabulate
-      # prepare a multiline response to pass to github_output
-      {
-        echo 'COVERAGE_MD<<EOF'
-        $(python3 ./.github/scripts/coverage_report_breakdown.py ${{ inputs.coverage_json }})
-        echo EOF
-      } >> $GITHUB_STEP_SUMMARY
+      echo "${{ inputs.test_coverage_md }}" >> $GITHUB_STEP_SUMMARY
     shell: bash
diff --git a/.github/workflows/nm-test.yml b/.github/workflows/nm-test.yml
index bfa41c80651e0..0080ea16aebd0 100644
--- a/.github/workflows/nm-test.yml
+++ b/.github/workflows/nm-test.yml
@@ -170,6 +170,13 @@ jobs:
                 results: test-results
                 step_status: ${{ steps.test.outputs.status }}
 
+            - name: collect test coverage breakdown
+              id: coverage_breakdown
+              uses: ./.github/actions/nm-code-coverage-md/
+              if: success () || failure()
+              with:
+                coverage_json: cc-vllm.json
+
             - name: summary
               uses: ./.github/actions/nm-summary-test/
               if: success() || failure()
@@ -181,7 +188,7 @@ jobs:
                 whl: ${{ steps.test.outputs.whl }}
                 magic_wand: ${{ steps.test.outputs.magic_wand }}
                 test_status: ${{ steps.test.outputs.status }}
-                coverage_json: cc-vllm.json
+                test_coverage_md: ${{ steps.coverage_breakdown.outputs.markdown }}
 
             - name: complete testmo run
               uses: ./.github/actions/nm-testmo-run-complete/

From 844715bcd5e0c632b1dc752daaca33205739a5cb Mon Sep 17 00:00:00 2001
From: derekk-nm <derek@neuralmagic.com>
Date: Mon, 8 Jul 2024 14:00:03 +0000
Subject: [PATCH 15/18] push it back to summary again.

this time with correction.
---
 .github/actions/nm-summary-test/action.yml | 16 ++++++++++++----
 .github/workflows/nm-test.yml              | 14 +++++++-------
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/.github/actions/nm-summary-test/action.yml b/.github/actions/nm-summary-test/action.yml
index 5b3e8e071c413..fced5237f9f4c 100644
--- a/.github/actions/nm-summary-test/action.yml
+++ b/.github/actions/nm-summary-test/action.yml
@@ -22,9 +22,9 @@ inputs:
   test_status:
     description: 'status from test step'
     required: true
-  test_coverage_md:
-    description: 'test coverage summary markdown for this particular run'
-    required: false
+  coverage_json:
+    description: 'file containing coverage report in JSON format'
+    required: true
 runs:
   using: composite
   steps:
@@ -47,5 +47,13 @@ runs:
       echo "| test: | ${TEST_EMOJI} |" >> $GITHUB_STEP_SUMMARY
       echo "" >> $GITHUB_STEP_SUMMARY
       echo "Test Coverage" >> $GITHUB_STEP_SUMMARY
-      echo "${{ inputs.test_coverage_md }}" >> $GITHUB_STEP_SUMMARY
+      # the pandas.DataFrame.to_markdown() method requires the `tabulate` package
+      pip3 install tabulate
+      # prepare a multiline response to pass to github_output
+      {
+        echo 'COVERAGE_MD<<EOF'
+        python3 ./.github/scripts/coverage_report_breakdown.py ${{ inputs.coverage_json }}
+        echo EOF
+      } >> $GITHUB_STEP_SUMMARY
+#      echo "${{ inputs.test_coverage_md }}" >> $GITHUB_STEP_SUMMARY
     shell: bash
diff --git a/.github/workflows/nm-test.yml b/.github/workflows/nm-test.yml
index 0080ea16aebd0..885ad3fe58415 100644
--- a/.github/workflows/nm-test.yml
+++ b/.github/workflows/nm-test.yml
@@ -170,12 +170,12 @@ jobs:
                 results: test-results
                 step_status: ${{ steps.test.outputs.status }}
 
-            - name: collect test coverage breakdown
-              id: coverage_breakdown
-              uses: ./.github/actions/nm-code-coverage-md/
-              if: success () || failure()
-              with:
-                coverage_json: cc-vllm.json
+#            - name: collect test coverage breakdown
+#              id: coverage_breakdown
+#              uses: ./.github/actions/nm-code-coverage-md/
+#              if: success () || failure()
+#              with:
+#                coverage_json: cc-vllm.json
 
             - name: summary
               uses: ./.github/actions/nm-summary-test/
@@ -188,7 +188,7 @@ jobs:
                 whl: ${{ steps.test.outputs.whl }}
                 magic_wand: ${{ steps.test.outputs.magic_wand }}
                 test_status: ${{ steps.test.outputs.status }}
-                test_coverage_md: ${{ steps.coverage_breakdown.outputs.markdown }}
+                coverage_json: cc-vllm.json
 
             - name: complete testmo run
               uses: ./.github/actions/nm-testmo-run-complete/

From 179d2c2dd20c97d0c1462ad521e9c5d2e7ccf3fc Mon Sep 17 00:00:00 2001
From: derekk-nm <derek@neuralmagic.com>
Date: Mon, 8 Jul 2024 15:47:37 +0000
Subject: [PATCH 16/18] try using file content

and aligning columns
---
 .github/actions/nm-summary-test/action.yml   | 16 +++++++---------
 .github/scripts/coverage_report_breakdown.py |  2 +-
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/.github/actions/nm-summary-test/action.yml b/.github/actions/nm-summary-test/action.yml
index fced5237f9f4c..71b9e3f03a6c1 100644
--- a/.github/actions/nm-summary-test/action.yml
+++ b/.github/actions/nm-summary-test/action.yml
@@ -32,7 +32,8 @@ runs:
       TESTMO_URL=${{ inputs.testmo_run_url }}
       TEST_STATUS=${{ inputs.test_status }}
       TEST_EMOJI=$(./.github/scripts/step-status ${TEST_STATUS})
-      echo "testmo URL: ${TESTMO_URL}" >> $GITHUB_STEP_SUMMARY
+      echo "## TestMo URL: " >> $GITHUB_STEP_SUMMARY
+      echo "${TESTMO_URL}" >> $GITHUB_STEP_SUMMARY
       echo ""
       echo "| Parameter | |" >> $GITHUB_STEP_SUMMARY
       echo "|---|---|" >> $GITHUB_STEP_SUMMARY
@@ -46,14 +47,11 @@ runs:
       echo "| magic_wand: | ${{ inputs.magic_wand }} |" >> $GITHUB_STEP_SUMMARY
       echo "| test: | ${TEST_EMOJI} |" >> $GITHUB_STEP_SUMMARY
       echo "" >> $GITHUB_STEP_SUMMARY
-      echo "Test Coverage" >> $GITHUB_STEP_SUMMARY
+      echo "## Test Coverage" >> $GITHUB_STEP_SUMMARY
       # the pandas.DataFrame.to_markdown() method requires the `tabulate` package
       pip3 install tabulate
-      # prepare a multiline response to pass to github_output
-      {
-        echo 'COVERAGE_MD<<EOF'
-        python3 ./.github/scripts/coverage_report_breakdown.py ${{ inputs.coverage_json }}
-        echo EOF
-      } >> $GITHUB_STEP_SUMMARY
-#      echo "${{ inputs.test_coverage_md }}" >> $GITHUB_STEP_SUMMARY
+      # As a multiline response we cannot pass the table directly to github
+      # so redirect it to a file, then cat the file to the output
+      python3 ./.github/scripts/coverage_report_breakdown.py ${{ inputs.coverage_json }} > COVERAGE_MD
+      cat COVERAGE_MD >> $GITHUB_STEP_SUMMARY
     shell: bash
diff --git a/.github/scripts/coverage_report_breakdown.py b/.github/scripts/coverage_report_breakdown.py
index 2b2e4d584f947..0cc401922cbb4 100644
--- a/.github/scripts/coverage_report_breakdown.py
+++ b/.github/scripts/coverage_report_breakdown.py
@@ -146,7 +146,7 @@ def to_github_markdown(self) -> str:
         summary_df = summary_df.astype(str)
         summary_df.replace({"nan": None}, inplace=True)
 
-        return summary_df.to_markdown(index=False, tablefmt="github", missingval="", floatfmt=(".0f", ".0f", ".0f", ".0f", ".0f", ".1%"))
+        return summary_df.to_markdown(index=False, tablefmt="github", missingval="", floatfmt=(".0f", ".0f", ".0f", ".0f", ".0f", ".1%"), colalign=("left", "right", "right", "right", "right", "decimal"))
 
 
 if __name__ == "__main__":

From 21268c439054fcf423df473ff2261e2bbd34277c Mon Sep 17 00:00:00 2001
From: derekk-nm <derek@neuralmagic.com>
Date: Mon, 8 Jul 2024 17:42:30 +0000
Subject: [PATCH 17/18] tweak summary

and revert changes used for testing purposes.
---
 .../actions/nm-code-coverage-md/action.yml    | 19 ----
 .github/actions/nm-summary-test/action.yml    |  7 +-
 .github/scripts/coverage_report_breakdown.py  | 52 ++++++----
 .github/scripts/run-tests                     |  2 +-
 .github/workflows/nm-build-test.yml           | 98 +++++++++----------
 .github/workflows/nm-remote-push.yml          |  9 +-
 .github/workflows/nm-test.yml                 |  8 --
 .../tests/test_skip_env_vars/smoke.txt        | 18 ++--
 tests/models/test_mistral.py                  |  2 +-
 tests/models/test_models.py                   |  2 +-
 10 files changed, 102 insertions(+), 115 deletions(-)
 delete mode 100644 .github/actions/nm-code-coverage-md/action.yml

diff --git a/.github/actions/nm-code-coverage-md/action.yml b/.github/actions/nm-code-coverage-md/action.yml
deleted file mode 100644
index 5d771dacd577c..0000000000000
--- a/.github/actions/nm-code-coverage-md/action.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-name: run coverage_report_breakdown
-description: 'run coverage_report_breakdown to generate markdown showing test coverage breakdown'
-inputs:
-  coverage_json:
-    description: 'file containing coverage report in JSON format'
-    required: true
-runs:
-  using: composite
-  steps:
-    - id: coverage_report_breakdown
-      run: |
-        # the pandas.DataFrame.to_markdown() method requires the `tabulate` package
-        pip3 install tabulate
-        # prepare a multiline response to pass to github_output
-        echo 'COVERAGE_MD<<EOF'
-        python3 ./.github/scripts/coverage_report_breakdown.py ${{ inputs.coverage_json }}
-        echo EOF
-        echo "markdown=${COVERAGE_MD}" >> $GITHUB_OUTPUT
-      shell: bash
diff --git a/.github/actions/nm-summary-test/action.yml b/.github/actions/nm-summary-test/action.yml
index 71b9e3f03a6c1..8e036f3d16ffe 100644
--- a/.github/actions/nm-summary-test/action.yml
+++ b/.github/actions/nm-summary-test/action.yml
@@ -32,9 +32,6 @@ runs:
       TESTMO_URL=${{ inputs.testmo_run_url }}
       TEST_STATUS=${{ inputs.test_status }}
       TEST_EMOJI=$(./.github/scripts/step-status ${TEST_STATUS})
-      echo "## TestMo URL: " >> $GITHUB_STEP_SUMMARY
-      echo "${TESTMO_URL}" >> $GITHUB_STEP_SUMMARY
-      echo ""
       echo "| Parameter | |" >> $GITHUB_STEP_SUMMARY
       echo "|---|---|" >> $GITHUB_STEP_SUMMARY
       echo "| label: | \`${{ inputs.test_label }}\` |" >> $GITHUB_STEP_SUMMARY
@@ -47,8 +44,10 @@ runs:
       echo "| magic_wand: | ${{ inputs.magic_wand }} |" >> $GITHUB_STEP_SUMMARY
       echo "| test: | ${TEST_EMOJI} |" >> $GITHUB_STEP_SUMMARY
       echo "" >> $GITHUB_STEP_SUMMARY
+      echo "#### TestMo URL: ${TESTMO_URL}" >> $GITHUB_STEP_SUMMARY
       echo "## Test Coverage" >> $GITHUB_STEP_SUMMARY
-      # the pandas.DataFrame.to_markdown() method requires the `tabulate` package
+      # coverage_report_breakdown.py requires the `tabulate` package
+      # to generate the markdown for the summary.
       pip3 install tabulate
       # As a multiline response we cannot pass the table directly to github
       # so redirect it to a file, then cat the file to the output
diff --git a/.github/scripts/coverage_report_breakdown.py b/.github/scripts/coverage_report_breakdown.py
index 0cc401922cbb4..3d920f4849595 100644
--- a/.github/scripts/coverage_report_breakdown.py
+++ b/.github/scripts/coverage_report_breakdown.py
@@ -12,6 +12,7 @@ class CoverageMetrics(UserDict):
     """
     encapsulates code coverage metrics
     """
+
     def __init__(self, user_dict):
         super().__init__(user_dict)
         if "percent_covered_display" in self.data:
@@ -22,6 +23,7 @@ class CodeCoverage:
     """
     reads and reports on code coverage data as generated by the coverage tool
     """
+
     def __init__(self, file_path: Path):
         self.format: [int, None] = None
         self.version: [str, None] = None
@@ -51,7 +53,8 @@ def _from_json(self, json_file_path: Path):
         self.timestamp = coverage_df["meta"]["timestamp"]
         self.show_contexts = coverage_df["meta"]["show_contexts"]
         self.branch_coverage = coverage_df["meta"]["branch_coverage"]
-        self.overall_metrics = CoverageMetrics(coverage_df["totals"].dropna().to_dict())
+        self.overall_metrics = CoverageMetrics(
+            coverage_df["totals"].dropna().to_dict())
 
         # segment the list of files by test cases and source code
         files_df = coverage_df.loc[:, ['files']].dropna()
@@ -70,8 +73,9 @@ def get_sub_dir(file_path):
 
         # temporarily move the index to a "filepath" column
         self.source.reset_index(names="filepath", inplace=True)
-        # extract the subdirectory under vllm from filepath to the sub_dir column
-        self.source.loc[:, "sub_dir"] = self.source.loc[:, "filepath"].apply(get_sub_dir)
+        # extract subdirectories under vllm, put into the sub_dir column
+        self.source.loc[:, "sub_dir"] = self.source.loc[:, "filepath"].apply(
+            get_sub_dir)
         # make the filepath column the index again
         self.source.set_index("filepath", inplace=True)
 
@@ -81,9 +85,14 @@ def _calculate_metrics(coverage_data: pd.Series) -> CoverageMetrics:
         common method to calculate metrics
         """
         metrics_dict = {}
-        for metric in ["covered_lines", "num_statements", "missing_lines", "excluded_lines"]:
-            metrics_dict[metric] = sum(d[0]["summary"][metric] for d in coverage_data)
-        metrics_dict["percent_covered"] = metrics_dict["covered_lines"] / metrics_dict["num_statements"] * 100
+        for metric in [
+                "covered_lines", "num_statements", "missing_lines",
+                "excluded_lines"
+        ]:
+            metrics_dict[metric] = sum(d[0]["summary"][metric]
+                                       for d in coverage_data)
+        metrics_dict["percent_covered"] = metrics_dict[
+            "covered_lines"] / metrics_dict["num_statements"] * 100
         return CoverageMetrics(metrics_dict)
 
     def tests_metrics(self) -> CoverageMetrics:
@@ -117,41 +126,48 @@ def to_github_markdown(self) -> str:
         source_metrics = self.source_metrics()
         source_metrics["Collection"] = "Source Code"
         summary_df = pd.DataFrame(
-            [overall_metrics, test_metrics, source_metrics]
-        )
-        # make the percent_covered value compatible with the string "%" formatting
+            [overall_metrics, test_metrics, source_metrics])
+        # make percent_covered value compatible with the string "%" formatting
         summary_df["percent_covered"] = summary_df["percent_covered"] / 100
 
         # compose a set of the subdirectory breakdown summary info
         breakdown_list = []
         for sub_dir in sorted(cc.source["sub_dir"].unique()):
             sub_dir_metrics = cc.source_metrics(sub_dir)
-            if sub_dir == "":
-                label = "vllm 'root'"
-            else:
-                label = sub_dir
+            label = "vllm 'root'" if sub_dir == "" else sub_dir
             sub_dir_metrics["Collection"] = label
             breakdown_list.append(sub_dir_metrics)
         breakdown_df = pd.DataFrame(breakdown_list)
-        # make the percent_covered value compatible with the string "%" formatting
+        # make percent_covered value compatible with the string "%" formatting
         breakdown_df["percent_covered"] = breakdown_df["percent_covered"] / 100
 
         # join the top level and breakdown data with separator rows between them
         # add a separator row and subtitle row
-        empty_row_df = pd.Series(pd.NA, index=summary_df.columns).to_frame().transpose()
+        empty_row_df = pd.Series(
+            pd.NA, index=summary_df.columns).to_frame().transpose()
         header_row_df = empty_row_df.copy()
         header_row_df["Collection"] = "vllm Subdirs"
-        summary_df = pd.concat([summary_df, empty_row_df, header_row_df, breakdown_df], ignore_index=True)
+        summary_df = pd.concat(
+            [summary_df, empty_row_df, header_row_df, breakdown_df],
+            ignore_index=True)
         # clean up the `nan` values for display purposes
         summary_df = summary_df.astype(str)
         summary_df.replace({"nan": None}, inplace=True)
 
-        return summary_df.to_markdown(index=False, tablefmt="github", missingval="", floatfmt=(".0f", ".0f", ".0f", ".0f", ".0f", ".1%"), colalign=("left", "right", "right", "right", "right", "decimal"))
+        return summary_df.to_markdown(index=False,
+                                      tablefmt="github",
+                                      missingval="",
+                                      floatfmt=(".0f", ".0f", ".0f", ".0f",
+                                                ".0f", ".1%"),
+                                      colalign=("left", "right", "right",
+                                                "right", "right", "decimal"))
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument("coverage_json_file", type=str, help="file path to coverage JSON output")
+    parser.add_argument("coverage_json_file",
+                        type=str,
+                        help="file path to coverage JSON output")
     args = parser.parse_args()
     cc = CodeCoverage(Path(args.coverage_json_file))
 
diff --git a/.github/scripts/run-tests b/.github/scripts/run-tests
index 50b68bdc0f8ea..2e0742e0350a1 100755
--- a/.github/scripts/run-tests
+++ b/.github/scripts/run-tests
@@ -59,7 +59,7 @@ if [ ! -d "${TEST_DIR}" ]; then
 fi
 
 # find tests
-TESTS_DOT_PY=$(find ${TEST_DIR} -type f -name "test_regression.py")
+TESTS_DOT_PY=$(find ${TEST_DIR} -type f -name "test*.py")
 TESTS_FOUND=(${TESTS_DOT_PY})
 
 echo "found:"
diff --git a/.github/workflows/nm-build-test.yml b/.github/workflows/nm-build-test.yml
index ae290d4566a37..9a5043308a067 100644
--- a/.github/workflows/nm-build-test.yml
+++ b/.github/workflows/nm-build-test.yml
@@ -133,52 +133,52 @@ jobs:
             test_skip_env_vars: ${{ matrix.test_config.test }}
         secrets: inherit
 
-#    BENCHMARK:
-#        needs: [BUILD]
-#        if: success()
-#        uses: ./.github/workflows/nm-benchmark.yml
-#        with:
-#            label: ${{ inputs.benchmark_label }}
-#            benchmark_config_list_file: ${{ inputs.benchmark_config_list_file }}
-#            timeout: ${{ inputs.benchmark_timeout }}
-#            gitref: ${{ github.ref }}
-#            python: ${{ inputs.python }}
-#            whl: ${{ needs.BUILD.outputs.whl }}
-#            # Always push if it is a scheduled job
-#            push_benchmark_results_to_gh_pages: "${{ github.event_name == 'schedule' || inputs.push_benchmark_results_to_gh_pages }}"
-#        secrets: inherit
-#
-#    LM-EVAL:
-#      needs: [BUILD]
-#      uses: ./.github/workflows/nm-lm-eval.yml
-#      with:
-#        label: ${{ inputs.lm_eval_label }}
-#        timeout: ${{ inputs.lm_eval_timeout }}
-#        gitref: ${{ inputs.gitref }}
-#        python: ${{ inputs.python }}
-#        whl: ${{ needs.BUILD.outputs.whl }}
-#        lm_eval_configuration: ${{ inputs.lm_eval_configuration }}
-#      secrets: inherit
-#
-#    # uploading is only available when using GCP autoscaling group
-#    UPLOAD:
-#        needs: [TEST, BENCHMARK, LM-EVAL]
-#        if: ${{ inputs.push_to_pypi }}
-#        uses: ./.github/workflows/nm-upload-assets-to-gcp.yml
-#        with:
-#            label: gcp-k8s-util
-#            timeout: ${{ inputs.build_timeout }}
-#            gitref: ${{ github.ref }}
-#        secrets: inherit
-#
-#    # update docker
-#    DOCKER:
-#        needs: [BUILD, UPLOAD]
-#        if: ${{ inputs.push_to_pypi }}
-#        uses: ./.github/workflows/publish-docker.yml
-#        with:
-#            push_to_repository: ${{ inputs.push_to_pypi }}
-#            gitref: ${{ inputs.gitref }}
-#            wf_category: ${{ inputs.wf_category }}
-#            wheel: ${{ needs.BUILD.outputs.whl }}
-#        secrets: inherit
+    BENCHMARK:
+        needs: [BUILD]
+        if: success()
+        uses: ./.github/workflows/nm-benchmark.yml
+        with:
+            label: ${{ inputs.benchmark_label }}
+            benchmark_config_list_file: ${{ inputs.benchmark_config_list_file }}
+            timeout: ${{ inputs.benchmark_timeout }}
+            gitref: ${{ github.ref }}
+            python: ${{ inputs.python }}
+            whl: ${{ needs.BUILD.outputs.whl }}
+            # Always push if it is a scheduled job
+            push_benchmark_results_to_gh_pages: "${{ github.event_name == 'schedule' || inputs.push_benchmark_results_to_gh_pages }}"
+        secrets: inherit
+
+    LM-EVAL:
+      needs: [BUILD]
+      uses: ./.github/workflows/nm-lm-eval.yml
+      with:
+        label: ${{ inputs.lm_eval_label }}
+        timeout: ${{ inputs.lm_eval_timeout }}
+        gitref: ${{ inputs.gitref }}
+        python: ${{ inputs.python }}
+        whl: ${{ needs.BUILD.outputs.whl }}
+        lm_eval_configuration: ${{ inputs.lm_eval_configuration }}
+      secrets: inherit
+
+    # uploading is only available when using GCP autoscaling group
+    UPLOAD:
+        needs: [TEST, BENCHMARK, LM-EVAL]
+        if: ${{ inputs.push_to_pypi }}
+        uses: ./.github/workflows/nm-upload-assets-to-gcp.yml
+        with:
+            label: gcp-k8s-util
+            timeout: ${{ inputs.build_timeout }}
+            gitref: ${{ github.ref }}
+        secrets: inherit
+
+    # update docker
+    DOCKER:
+        needs: [BUILD, UPLOAD]
+        if: ${{ inputs.push_to_pypi }}
+        uses: ./.github/workflows/publish-docker.yml
+        with:
+            push_to_repository: ${{ inputs.push_to_pypi }}
+            gitref: ${{ inputs.gitref }}
+            wf_category: ${{ inputs.wf_category }}
+            wheel: ${{ needs.BUILD.outputs.whl }}
+        secrets: inherit
diff --git a/.github/workflows/nm-remote-push.yml b/.github/workflows/nm-remote-push.yml
index 33c65f42bcf52..a44274d9e8a11 100644
--- a/.github/workflows/nm-remote-push.yml
+++ b/.github/workflows/nm-remote-push.yml
@@ -19,11 +19,10 @@ jobs:
             gitref: ${{ github.ref }}
             push_to_pypi: false
 
-            test_configs: '[{"python":"3.8.17","label":"gcp-k8s-l4-solo","test":"neuralmagic/tests/test_skip_env_vars/smoke.txt"},]'
-#          '[{"python":"3.8.17","label":"gcp-k8s-l4-solo","test":"neuralmagic/tests/test_skip_env_vars/smoke.txt"},
-#                            {"python":"3.9.17","label":"gcp-k8s-l4-solo","test":"neuralmagic/tests/test_skip_env_vars/smoke.txt"},
-#                            {"python":"3.10.12","label":"gcp-k8s-l4-solo","test":"neuralmagic/tests/test_skip_env_vars/smoke.txt"},
-#                            {"python":"3.11.4","label":"gcp-k8s-l4-solo","test":"neuralmagic/tests/test_skip_env_vars/smoke.txt"}]'
+            test_configs: '[{"python":"3.8.17","label":"gcp-k8s-l4-solo","test":"neuralmagic/tests/test_skip_env_vars/smoke.txt"},
+                            {"python":"3.9.17","label":"gcp-k8s-l4-solo","test":"neuralmagic/tests/test_skip_env_vars/smoke.txt"},
+                            {"python":"3.10.12","label":"gcp-k8s-l4-solo","test":"neuralmagic/tests/test_skip_env_vars/smoke.txt"},
+                            {"python":"3.11.4","label":"gcp-k8s-l4-solo","test":"neuralmagic/tests/test_skip_env_vars/smoke.txt"}]'
             test_timeout: 480
 
             benchmark_label: gcp-k8s-l4-solo
diff --git a/.github/workflows/nm-test.yml b/.github/workflows/nm-test.yml
index 885ad3fe58415..887b236190b46 100644
--- a/.github/workflows/nm-test.yml
+++ b/.github/workflows/nm-test.yml
@@ -124,7 +124,6 @@ jobs:
               uses: ./.github/actions/nm-install-whl/
               with:
                 python: ${{ inputs.python }}
-                venv:
 
             - name: run buildkite script
               run: |
@@ -170,13 +169,6 @@ jobs:
                 results: test-results
                 step_status: ${{ steps.test.outputs.status }}
 
-#            - name: collect test coverage breakdown
-#              id: coverage_breakdown
-#              uses: ./.github/actions/nm-code-coverage-md/
-#              if: success () || failure()
-#              with:
-#                coverage_json: cc-vllm.json
-
             - name: summary
               uses: ./.github/actions/nm-summary-test/
               if: success() || failure()
diff --git a/neuralmagic/tests/test_skip_env_vars/smoke.txt b/neuralmagic/tests/test_skip_env_vars/smoke.txt
index 6da4a3538ad88..e901455dfd5be 100644
--- a/neuralmagic/tests/test_skip_env_vars/smoke.txt
+++ b/neuralmagic/tests/test_skip_env_vars/smoke.txt
@@ -1,20 +1,20 @@
 TEST_ACCURACY=DISABLE
-TEST_ASYNC_ENGINE=DISABLE
+TEST_ASYNC_ENGINE=ENABLE
 TEST_BASIC_CORRECTNESS=DISABLE
-TEST_CORE=DISABLE
+TEST_CORE=ENABLE
 TEST_DISTRIBUTED=DISABLE
-TEST_ENGINE=DISABLE
+TEST_ENGINE=ENABLE
 TEST_ENTRYPOINTS=DISABLE
 TEST_KERNELS=DISABLE
 TEST_LORA=DISABLE
-TEST_METRICS=DISABLE
+TEST_METRICS=ENABLE
 TEST_MODELS=DISABLE
-TEST_MODELS_CORE=DISABLE
-TEST_PREFIX_CACHING=DISABLE
-TEST_QUANTIZATION=DISABLE
+TEST_MODELS_CORE=ENABLE
+TEST_PREFIX_CACHING=ENABLE
+TEST_QUANTIZATION=ENABLE
 TEST_SAMPLERS=DISABLE
 TEST_SPEC_DECODE=DISABLE
 TEST_TENSORIZER_LOADER=DISABLE
-TEST_TOKENIZATION=DISABLE
-TEST_TRACING=DISABLE
+TEST_TOKENIZATION=ENABLE
+TEST_TRACING=ENABLE
 TEST_WORKER=ENABLE
diff --git a/tests/models/test_mistral.py b/tests/models/test_mistral.py
index 12a1807030c76..24a0de8d464dd 100644
--- a/tests/models/test_mistral.py
+++ b/tests/models/test_mistral.py
@@ -19,7 +19,7 @@
 
 
 # UPSTREAM SYNC: we run OOM on the A10g instances.
-# @pytest.mark.skip("Not enough memory in automation testing.")
+@pytest.mark.skip("Not enough memory in automation testing.")
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("dtype", ["bfloat16"])
 @pytest.mark.parametrize("max_tokens", [64])
diff --git a/tests/models/test_models.py b/tests/models/test_models.py
index df8086024e74b..d856caec8abdd 100644
--- a/tests/models/test_models.py
+++ b/tests/models/test_models.py
@@ -29,7 +29,7 @@
 
 
 # UPSTREAM SYNC: we run OOM on the A10g instances.
-# @pytest.mark.skip("Not enough memory in automation testing.")
+@pytest.mark.skip("Not enough memory in automation testing.")
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("dtype", ["float"])
 @pytest.mark.parametrize("max_tokens", [96])

From 884ffe8a402dd91de42c60734ba6a6dba485d39c Mon Sep 17 00:00:00 2001
From: derekk-nm <derek@neuralmagic.com>
Date: Fri, 12 Jul 2024 17:40:49 +0000
Subject: [PATCH 18/18] exclude coverage when is_usage_stats_enabled

our test runs won't ever enable it
---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index b9f28b03b2616..5b7666d2830cf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -119,6 +119,7 @@ exclude_also = [
     "def convert_bin_to_safetensor_file",
     "def np_cache_weights_iterator",
     "def convert_pyslice_to_tensor",
+    "if is_usage_stats_enabled"
 ]
 
 [tool.coverage.html]