Remote push refactor (#297)

SUMMARY: * updated model test structure to focus on core models * refactored tests to use environment variables (currently at "test group" level - so each folder has an env variable). All tests are off by default and they are explicitly enabled * refactored workflows build-test workflow to use a list of env variables rather than skip test list WHY: * this enables us to be more sane about what is and is not on - as opposed to a long list of files * this enables us to actually track what is run and what is not run (via testmo, which tracks skipped tests) * this enables us to have more fine-grained control over what is run vs not run (we can add more env vars at the sub-group level to turn off more tests) --------- Signed-off-by: kerthcet <[email protected]> Signed-off-by: Muralidhar Andoorveedu <[email protected]> Signed-off-by: pandyamarut <[email protected]> Co-authored-by: Alexander Matveev <[email protected]> Co-authored-by: Woosuk Kwon <[email protected]> Co-authored-by: Cyrus Leung <[email protected]> Co-authored-by: Wenwei Zhang <[email protected]> Co-authored-by: Alexei-V-Ivanov-AMD <[email protected]> Co-authored-by: Alexey Kondratiev <[email protected]> Co-authored-by: Mor Zusman <[email protected]> Co-authored-by: Mor Zusman <[email protected]> Co-authored-by: Aurick Qiao <[email protected]> Co-authored-by: Kuntai Du <[email protected]> Co-authored-by: Antoni Baum <[email protected]> Co-authored-by: HUANG Fei <[email protected]> Co-authored-by: Isotr0py <[email protected]> Co-authored-by: Simon Mo <[email protected]> Co-authored-by: Michael Goin <[email protected]> Co-authored-by: Kante Yin <[email protected]> Co-authored-by: sasha0552 <[email protected]> Co-authored-by: SangBin Cho <[email protected]> Co-authored-by: Tyler Michael Smith <[email protected]> Co-authored-by: Cody Yu <[email protected]> Co-authored-by: raywanb <[email protected]> Co-authored-by: Nick Hill <[email protected]> Co-authored-by: Philipp Moritz <[email protected]> Co-authored-by: Letian Li <[email protected]> Co-authored-by: Murali Andoorveedu <[email protected]> Co-authored-by: Dipika Sikka <[email protected]> Co-authored-by: Varun Sundar Rabindranath <[email protected]> Co-authored-by: Varun Sundar Rabindranath <[email protected]> Co-authored-by: Elisei Smirnov <[email protected]> Co-authored-by: Elisei Smirnov <[email protected]> Co-authored-by: youkaichao <[email protected]> Co-authored-by: leiwen83 <[email protected]> Co-authored-by: Lei Wen <[email protected]> Co-authored-by: Eric Xihui Lin <[email protected]> Co-authored-by: beagleski <[email protected]> Co-authored-by: bapatra <[email protected]> Co-authored-by: Barun Patra <[email protected]> Co-authored-by: Lily Liu <[email protected]> Co-authored-by: Roger Wang <[email protected]> Co-authored-by: Zhuohan Li <[email protected]> Co-authored-by: Isotr0py <[email protected]> Co-authored-by: Michał Moskal <[email protected]> Co-authored-by: Ruth Evans <[email protected]> Co-authored-by: Divakar Verma <[email protected]> Co-authored-by: Roger Wang <[email protected]> Co-authored-by: Junichi Sato <[email protected]> Co-authored-by: Marut Pandya <[email protected]> Co-authored-by: afeldman-nm <[email protected]> Co-authored-by: Ronen Schaffer <[email protected]> Co-authored-by: Itay Etelis <[email protected]> Co-authored-by: omkar kakarparthi <[email protected]> Co-authored-by: Alexei V. Ivanov <[email protected]> Co-authored-by: Breno Faria <[email protected]> Co-authored-by: Breno Faria <[email protected]> Co-authored-by: Hyunsung Lee <[email protected]> Co-authored-by: Chansung Park <[email protected]> Co-authored-by: SnowDist <[email protected]> Co-authored-by: functionxu123 <[email protected]> Co-authored-by: xuhao <[email protected]> Co-authored-by: Domenic Barbuzzi <[email protected]>
neuralmagic · Jun 24, 2024 · ce8a19b · ce8a19b
1 parent fb14a05
commit ce8a19b
Show file tree

Hide file tree

Showing 141 changed files with 1,208 additions and 365 deletions.
diff --git a/.github/actions/nm-set-env-test-skip/action.yml b/.github/actions/nm-set-env-test-skip/action.yml
@@ -0,0 +1,15 @@
+name: set test skip env vars
+description: 'sets env variables for test skipping. See tests/utils_skip.py'
+inputs:
+  test_skip_env_vars:
+    description: 'file with list of env vars controlling which tests to run.'
+    required: true
+
+runs:
+  using: composite
+  steps:
+  - run: |
+      cat "${ENV_VAR_FILE}" >> $GITHUB_ENV
+    env:
+        ENV_VAR_FILE: ${{ inputs.test_skip_env_vars }}
+    shell: bash
diff --git a/.github/workflows/nm-build-test.yml b/.github/workflows/nm-build-test.yml
@@ -45,8 +45,8 @@ on:
         description: "git commit hash or branch name"
         type: string
         required: true
-      test_skip_list:
-        description: 'file containing tests to skip'
+      test_skip_env_vars:
+        description: 'file with list of env vars controlling which tests to run'
         type: string
         required: true
       # benchmark related parameters
@@ -91,22 +91,22 @@ jobs:
             gitref: ${{ github.ref }}
             python: ${{ inputs.python }}
             whl: ${{ needs.BUILD.outputs.whl }}
-            test_skip_list: ${{ inputs.test_skip_list }}
+            test_skip_env_vars: ${{ inputs.test_skip_env_vars }}
         secrets: inherit
 
     # TODO: re-enable
-    TEST-MULTI:
-        needs: [BUILD]
-        if: success()  # && contains(fromJSON('["NIGHTLY", "WEEKLY", "RELEASE"]'), inputs.wf_category)
-        uses: ./.github/workflows/nm-test.yml
-        with:
-            test_label: ${{ inputs.test_label_multi }}
-            timeout: ${{ inputs.test_timeout }}
-            gitref: ${{ github.ref }}
-            python: ${{ inputs.python }}
-            whl: ${{ needs.BUILD.outputs.whl }}
-            test_skip_list: ${{ inputs.test_skip_list }}
-        secrets: inherit
+    # TEST-MULTI:
+    #     needs: [BUILD]
+    #     if: success() && contains(fromJSON('["NIGHTLY", "WEEKLY", "RELEASE"]'), inputs.wf_category)
+    #     uses: ./.github/workflows/nm-test.yml
+    #     with:
+    #         test_label: ${{ inputs.test_label_multi }}
+    #         timeout: ${{ inputs.test_timeout }}
+    #         gitref: ${{ github.ref }}
+    #         python: ${{ inputs.python }}
+    #         whl: ${{ needs.BUILD.outputs.whl }}
+    #         test_skip_env_vars: ${{ inputs.test_skip_env_vars }}
+    #     secrets: inherit
 
     UPLOAD:
         needs: [TEST-SOLO]

diff --git a/.github/workflows/nm-nightly.yml b/.github/workflows/nm-nightly.yml
@@ -1,4 +1,4 @@
-name: nm Nightly
+name: nm nightly
 run-name: ${{ github.actor }} triggered nightly on ${{ github.ref }}
 on:
     schedule:
@@ -45,7 +45,7 @@ jobs:
             test_label_solo: gcp-k8s-l4-solo
             test_label_multi: ignore
             test_timeout: 480
-            test_skip_list: neuralmagic/tests/skip-for-nightly.txt
+            test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/full.txt
 
             benchmark_label: gcp-k8s-l4-solo
             benchmark_config_list_file: ./.github/data/nm_benchmark_remote_push_configs_list.txt
@@ -63,7 +63,7 @@ jobs:
             test_label_solo: aws-avx2-32G-a10g-24G
             test_label_multi: ignore
             test_timeout: 480
-            test_skip_list: neuralmagic/tests/skip-for-nightly.txt
+            test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/full.txt
 
             benchmark_label: gcp-k8s-l4-solo
             benchmark_config_list_file: ./.github/data/nm_benchmark_remote_push_configs_list.txt
@@ -81,7 +81,8 @@ jobs:
             test_label_solo: gcp-k8s-l4-solo
             test_label_multi: ignore
             test_timeout: 480
-            test_skip_list: neuralmagic/tests/skip-for-nightly.txt
+            test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/full.txt
+
 
             benchmark_label: gcp-k8s-l4-solo
             benchmark_config_list_file: ./.github/data/nm_benchmark_remote_push_configs_list.txt

diff --git a/.github/workflows/nm-release.yml b/.github/workflows/nm-release.yml
@@ -23,7 +23,7 @@ jobs:
       test_label_solo: gcp-k8s-l4-solo
       test_label_multi: ignore
       test_timeout: 720
-      test_skip_list: neuralmagic/tests/skip-for-release.txt
+      test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/full.txt
 
       benchmark_label: gcp-k8s-l4-solo
       benchmark_config_list_file: ./.github/data/nm_benchmark_nightly_configs_list.txt
@@ -41,7 +41,7 @@ jobs:
       test_label_solo: gcp-k8s-l4-solo
       test_label_multi: ignore
       test_timeout: 720
-      test_skip_list: neuralmagic/tests/skip-for-release.txt
+      test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/full.txt
 
       benchmark_label: gcp-k8s-l4-solo
       benchmark_config_list_file: ./.github/data/nm_benchmark_nightly_configs_list.txt
@@ -59,7 +59,7 @@ jobs:
       test_label_solo: gcp-k8s-l4-solo
       test_label_multi: ignore
       test_timeout: 720
-      test_skip_list: neuralmagic/tests/skip-for-release.txt
+      test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/full.txt
 
       benchmark_label: gcp-k8s-l4-solo
       benchmark_config_list_file: ./.github/data/nm_benchmark_nightly_configs_list.txt
@@ -77,7 +77,7 @@ jobs:
       test_label_solo: gcp-k8s-l4-solo
       test_label_multi: ignore
       test_timeout: 720
-      test_skip_list: neuralmagic/tests/skip-for-release.txt
+      test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/full.txt
 
       benchmark_label: gcp-k8s-l4-solo
       benchmark_config_list_file: ./.github/data/nm_benchmark_nightly_configs_list.txt

diff --git a/.github/workflows/nm-remote-push.yml b/.github/workflows/nm-remote-push.yml
@@ -12,37 +12,37 @@ concurrency:
 
 jobs:
 
-#    BUILD-TEST-3-8:
-#        uses: ./.github/workflows/nm-build-test.yml
-#        with:
-#            python: 3.8.17
-#            gitref: ${{ github.ref }}
-#
-#            test_label_solo: gcp-k8s-l4-solo
-#            test_label_multi: ignore
-#            test_timeout: 480
-#            test_skip_list: neuralmagic/tests/skip-for-remote-push-tmp.txt
-#
-#            benchmark_label: gcp-k8s-l4-solo
-#            benchmark_config_list_file: ./.github/data/nm_benchmark_remote_push_configs_list.txt
-#            benchmark_timeout: 480
-#        secrets: inherit
-#
-#    BUILD-TEST-3-9:
-#        uses: ./.github/workflows/nm-build-test.yml
-#        with:
-#            python: 3.9.17
-#            gitref: ${{ github.ref }}
-#
-#            test_label_solo: gcp-k8s-l4-solo
-#            test_label_multi: ignore
-#            test_timeout: 480
-#            test_skip_list: neuralmagic/tests/skip-for-remote-push-tmp.txt
-#
-#            benchmark_label: gcp-k8s-l4-solo
-#            benchmark_config_list_file: ./.github/data/nm_benchmark_remote_push_configs_list.txt
-#            benchmark_timeout: 480
-#        secrets: inherit
+    BUILD-TEST-3-8:
+        uses: ./.github/workflows/nm-build-test.yml
+        with:
+            python: 3.8.17
+            gitref: ${{ github.ref }}
+
+            test_label_solo: gcp-k8s-l4-solo
+            test_label_multi: ignore
+            test_timeout: 480
+            test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/smoke.txt
+
+            benchmark_label: gcp-k8s-l4-solo
+            benchmark_config_list_file: ./.github/data/nm_benchmark_remote_push_configs_list.txt
+            benchmark_timeout: 480
+        secrets: inherit
+
+    BUILD-TEST-3-9:
+        uses: ./.github/workflows/nm-build-test.yml
+        with:
+            python: 3.9.17
+            gitref: ${{ github.ref }}
+
+            test_label_solo: gcp-k8s-l4-solo
+            test_label_multi: ignore
+            test_timeout: 480
+            test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/smoke.txt
+
+            benchmark_label: gcp-k8s-l4-solo
+            benchmark_config_list_file: ./.github/data/nm_benchmark_remote_push_configs_list.txt
+            benchmark_timeout: 480
+        secrets: inherit
 
     BUILD-TEST-3-10:
         uses: ./.github/workflows/nm-build-test.yml
@@ -51,27 +51,27 @@ jobs:
             gitref: ${{ github.ref }}
 
             test_label_solo: gcp-k8s-l4-solo
-            test_label_multi: gcp-k8s-l4-duo
-            test_timeout: 1440
-            test_skip_list: neuralmagic/tests/skip-for-remote-push-tmp.txt
+            test_label_multi: ignore
+            test_timeout: 480
+            test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/smoke.txt
 
             benchmark_label: gcp-k8s-l4-solo
             benchmark_config_list_file: ./.github/data/nm_benchmark_remote_push_configs_list.txt
             benchmark_timeout: 480
         secrets: inherit
 
-#    BUILD-TEST-3-11:
-#        uses: ./.github/workflows/nm-build-test.yml
-#        with:
-#            python: 3.11.4
-#            gitref: ${{ github.ref }}
-#
-#            test_label_solo: gcp-k8s-l4-solo
-#            test_label_multi: ignore
-#            test_timeout: 480
-#            test_skip_list: neuralmagic/tests/skip-for-remote-push-tmp.txt
-#
-#            benchmark_label: gcp-k8s-l4-solo
-#            benchmark_config_list_file: ./.github/data/nm_benchmark_remote_push_configs_list.txt
-#            benchmark_timeout: 480
-#        secrets: inherit
+    BUILD-TEST-3-11:
+        uses: ./.github/workflows/nm-build-test.yml
+        with:
+            python: 3.11.4
+            gitref: ${{ github.ref }}
+
+            test_label_solo: gcp-k8s-l4-solo
+            test_label_multi: ignore
+            test_timeout: 480
+            test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/smoke.txt
+
+            benchmark_label: gcp-k8s-l4-solo
+            benchmark_config_list_file: ./.github/data/nm_benchmark_remote_push_configs_list.txt
+            benchmark_timeout: 480
+        secrets: inherit
diff --git a/.github/workflows/nm-test.yml b/.github/workflows/nm-test.yml
@@ -23,8 +23,8 @@ on:
         description: "whl to test (variable appears late binding so unusable outside 'download artifact')"
         type: string
         required: true
-      test_skip_list:
-        description: 'file containing tests to skip'
+      test_skip_env_vars:
+        description: 'file containing tests env vars for test skipping'
         type: string
         required: true
 
@@ -51,8 +51,8 @@ on:
         description: "whl to test (variable appears late binding so unusable outside 'download artifact')"
         type: string
         required: true
-      test_skip_list:
-        description: 'file containing tests to skip'
+      test_skip_env_vars:
+        description: 'file containing tests env vars for test skipping'
         type: string
         required: true
 
@@ -131,12 +131,17 @@ jobs:
             - name: run buildkite script
               run: |
                 cd tests && sudo bash ../.buildkite/download-images.sh
+            
+            - name: setenv test skip
+              id: setenv_test_skip
+              uses: ./.github/actions/nm-set-env-test-skip
+              with:
+                test_skip_env_vars: ${{ inputs.test_skip_env_vars }}
 
             - name: run tests
               id: test
               uses: ./.github/actions/nm-test-whl/
               with:
-                test_skip_list: ${{ inputs.test_skip_list }}
                 test_directory: tests
                 test_results: test-results
 

diff --git a/.github/workflows/nm-weekly.yml b/.github/workflows/nm-weekly.yml
@@ -27,7 +27,7 @@ jobs:
       test_label_solo: aws-avx2-32G-a10g-24G
       test_label_multi: aws-avx2-192G-4-a10g-96G
       test_timeout: 480
-      test_skip_list: neuralmagic/tests/skip-for-weekly.txt
+      test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/full.txt
 
       benchmark_label: aws-avx2-32G-a10g-24G
       benchmark_config_list_file: ./.github/data/nm_benchmark_weekly_configs_list.txt

diff --git a/neuralmagic/tests/test_skip_env_vars/full.txt b/neuralmagic/tests/test_skip_env_vars/full.txt
@@ -0,0 +1,19 @@
+TEST_ACCURACY=DISABLE
+TEST_ASYNC_ENGINE=ENABLE
+TEST_BASIC_CORRECTNESS=ENABLE
+TEST_CORE=ENABLE
+TEST_DISTRIBUTED=DISABLE
+TEST_ENGINE=ENABLE
+TEST_ENTRYPOINTS=ENABLE
+TEST_KERNELS=ENABLE
+TEST_LORA=ENABLE
+TEST_METRICS=ENABLE
+TEST_MODELS=ENABLE
+TEST_MODELS_CORE=ENABLE
+TEST_PREFIX_CACHING=ENABLE
+TEST_QUANTIZATION=ENABLE
+TEST_SAMPLERS=ENABLE
+TEST_SPEC_DECODE=DISABLE
+TEST_TENSORIZER_LOADER=ENABLE
+TEST_TOKENIZATION=ENABLE
+TEST_WORKER=ENABLE
diff --git a/neuralmagic/tests/test_skip_env_vars/smoke.txt b/neuralmagic/tests/test_skip_env_vars/smoke.txt
@@ -0,0 +1,19 @@
+TEST_ACCURACY=DISABLE
+TEST_ASYNC_ENGINE=ENABLE
+TEST_BASIC_CORRECTNESS=DISABLE
+TEST_CORE=ENABLE
+TEST_DISTRIBUTED=DISABLE
+TEST_ENGINE=ENABLE
+TEST_ENTRYPOINTS=DISABLE
+TEST_KERNELS=DISABLE
+TEST_LORA=DISABLE
+TEST_METRICS=ENABLE
+TEST_MODELS=DISABLE
+TEST_MODELS_CORE=ENABLE
+TEST_PREFIX_CACHING=ENABLE
+TEST_QUANTIZATION=ENABLE
+TEST_SAMPLERS=DISABLE
+TEST_SPEC_DECODE=DISABLE
+TEST_TENSORIZER_LOADER=DISABLE
+TEST_TOKENIZATION=ENABLE
+TEST_WORKER=ENABLE
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -31,6 +31,8 @@ peft
 requests==2.31
 ray
 sentence-transformers # required for embedding
+optimum     # required for hf gptq baselines
+auto-gptq   # required for hf gptq baselines
 
 # Benchmarking
 aiohttp

diff --git a/tests/accuracy/test_lm_eval_correctness.py b/tests/accuracy/test_lm_eval_correctness.py
@@ -8,6 +8,11 @@
 import yaml
 
 from tests.nm_utils.server import ServerContext
+from tests.nm_utils.utils_skip import should_skip_test_group
+
+if should_skip_test_group(group_name="TEST_ACCURACY"):
+    pytest.skip("TEST_ACCURACY=DISABLE, skipping accuracy test group",
+                allow_module_level=True)
 
 if TYPE_CHECKING:
     import lm_eval as lm_eval_t

diff --git a/tests/async_engine/test_api_server.py b/tests/async_engine/test_api_server.py
@@ -7,6 +7,12 @@
 import pytest
 import requests
 
+from tests.nm_utils.utils_skip import should_skip_test_group
+
+if should_skip_test_group(group_name="TEST_ASYNC_ENGINE"):
+    pytest.skip("TEST_ASYNC_ENGINE=DISABLE, skipping async engine test group",
+                allow_module_level=True)
+
 
 def _query_server(prompt: str, max_tokens: int = 5) -> dict:
     response = requests.post("http://localhost:8000/generate",

diff --git a/tests/async_engine/test_async_llm_engine.py b/tests/async_engine/test_async_llm_engine.py
@@ -3,8 +3,13 @@
 
 import pytest
 
+from tests.nm_utils.utils_skip import should_skip_test_group
 from vllm.engine.async_llm_engine import AsyncLLMEngine
 
+if should_skip_test_group(group_name="TEST_ASYNC_ENGINE"):
+    pytest.skip("TEST_ASYNC_ENGINE=DISABLE, skipping async engine test group",
+                allow_module_level=True)
+
 
 @dataclass
 class RequestOutput: