neuralmagic · dbarbuzzi · Jun 14, 2024 · May 19, 2024 · May 20, 2024 · May 20, 2024
diff --git a/.github/actions/nm-set-env-test-skip/action.yml b/.github/actions/nm-set-env-test-skip/action.yml
@@ -0,0 +1,15 @@
+name: set test skip env vars
+description: 'sets env variables for test skipping. See tests/utils_skip.py'
+inputs:
+  test_skip_env_vars:
+    description: 'file with list of env vars controlling which tests to run.'
+    required: true
+
+runs:
+  using: composite
+  steps:
+  - run: |
+      cat "${ENV_VAR_FILE}" >> $GITHUB_ENV
+    env:
+        ENV_VAR_FILE: ${{ inputs.test_skip_env_vars }}
+    shell: bash
diff --git a/.github/workflows/nm-build-test.yml b/.github/workflows/nm-build-test.yml
@@ -45,8 +45,8 @@ on:
         description: "git commit hash or branch name"
         type: string
         required: true
-      test_skip_list:
-        description: 'file containing tests to skip'
+      test_skip_env_vars:
+        description: 'file with list of env vars controlling which tests to run'
         type: string
         required: true
       # benchmark related parameters
@@ -91,7 +91,7 @@ jobs:
             gitref: ${{ github.ref }}
             python: ${{ inputs.python }}
             whl: ${{ needs.BUILD.outputs.whl }}
-            test_skip_list: ${{ inputs.test_skip_list }}
+            test_skip_env_vars: ${{ inputs.test_skip_env_vars }}
         secrets: inherit
 
     # TODO: re-enable
@@ -105,7 +105,7 @@ jobs:
     #         gitref: ${{ github.ref }}
     #         python: ${{ inputs.python }}
     #         whl: ${{ needs.BUILD.outputs.whl }}
-    #         test_skip_list: ${{ inputs.test_skip_list }}
+    #         test_skip_env_vars: ${{ inputs.test_skip_env_vars }}
     #     secrets: inherit
 
     UPLOAD:

diff --git a/.github/workflows/nm-nightly.yml b/.github/workflows/nm-nightly.yml
@@ -1,4 +1,4 @@
-name: nm Nightly
+name: nm nightly
 run-name: ${{ github.actor }} triggered nightly on ${{ github.ref }}
 on:
     schedule:
@@ -45,7 +45,7 @@ jobs:
             test_label_solo: gcp-k8s-l4-solo
             test_label_multi: ignore
             test_timeout: 480
-            test_skip_list: neuralmagic/tests/skip-for-nightly.txt
+            test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/full.txt
 
             benchmark_label: gcp-k8s-l4-solo
             benchmark_config_list_file: ./.github/data/nm_benchmark_remote_push_configs_list.txt
@@ -63,7 +63,7 @@ jobs:
             test_label_solo: aws-avx2-32G-a10g-24G
             test_label_multi: ignore
             test_timeout: 480
-            test_skip_list: neuralmagic/tests/skip-for-nightly.txt
+            test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/full.txt
 
             benchmark_label: gcp-k8s-l4-solo
             benchmark_config_list_file: ./.github/data/nm_benchmark_remote_push_configs_list.txt
@@ -81,7 +81,8 @@ jobs:
             test_label_solo: gcp-k8s-l4-solo
             test_label_multi: ignore
             test_timeout: 480
-            test_skip_list: neuralmagic/tests/skip-for-nightly.txt
+            test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/full.txt
+
 
             benchmark_label: gcp-k8s-l4-solo
             benchmark_config_list_file: ./.github/data/nm_benchmark_remote_push_configs_list.txt

diff --git a/.github/workflows/nm-release.yml b/.github/workflows/nm-release.yml
@@ -23,7 +23,7 @@ jobs:
       test_label_solo: gcp-k8s-l4-solo
       test_label_multi: ignore
       test_timeout: 720
-      test_skip_list: neuralmagic/tests/skip-for-release.txt
+      test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/full.txt
 
       benchmark_label: gcp-k8s-l4-solo
       benchmark_config_list_file: ./.github/data/nm_benchmark_nightly_configs_list.txt
@@ -41,7 +41,7 @@ jobs:
       test_label_solo: gcp-k8s-l4-solo
       test_label_multi: ignore
       test_timeout: 720
-      test_skip_list: neuralmagic/tests/skip-for-release.txt
+      test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/full.txt
 
       benchmark_label: gcp-k8s-l4-solo
       benchmark_config_list_file: ./.github/data/nm_benchmark_nightly_configs_list.txt
@@ -59,7 +59,7 @@ jobs:
       test_label_solo: gcp-k8s-l4-solo
       test_label_multi: ignore
       test_timeout: 720
-      test_skip_list: neuralmagic/tests/skip-for-release.txt
+      test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/full.txt
 
       benchmark_label: gcp-k8s-l4-solo
       benchmark_config_list_file: ./.github/data/nm_benchmark_nightly_configs_list.txt
@@ -77,7 +77,7 @@ jobs:
       test_label_solo: gcp-k8s-l4-solo
       test_label_multi: ignore
       test_timeout: 720
-      test_skip_list: neuralmagic/tests/skip-for-release.txt
+      test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/full.txt
 
       benchmark_label: gcp-k8s-l4-solo
       benchmark_config_list_file: ./.github/data/nm_benchmark_nightly_configs_list.txt

diff --git a/.github/workflows/nm-remote-push.yml b/.github/workflows/nm-remote-push.yml
@@ -21,7 +21,7 @@ jobs:
             test_label_solo: gcp-k8s-l4-solo
             test_label_multi: ignore
             test_timeout: 480
-            test_skip_list: neuralmagic/tests/skip-for-remote-push-tmp.txt
+            test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/smoke.txt
 
             benchmark_label: gcp-k8s-l4-solo
             benchmark_config_list_file: ./.github/data/nm_benchmark_remote_push_configs_list.txt
@@ -37,7 +37,7 @@ jobs:
             test_label_solo: gcp-k8s-l4-solo
             test_label_multi: ignore
             test_timeout: 480
-            test_skip_list: neuralmagic/tests/skip-for-remote-push-tmp.txt
+            test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/smoke.txt
 
             benchmark_label: gcp-k8s-l4-solo
             benchmark_config_list_file: ./.github/data/nm_benchmark_remote_push_configs_list.txt
@@ -53,7 +53,7 @@ jobs:
             test_label_solo: gcp-k8s-l4-solo
             test_label_multi: ignore
             test_timeout: 480
-            test_skip_list: neuralmagic/tests/skip-for-remote-push-tmp.txt
+            test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/smoke.txt
 
             benchmark_label: gcp-k8s-l4-solo
             benchmark_config_list_file: ./.github/data/nm_benchmark_remote_push_configs_list.txt
@@ -69,7 +69,7 @@ jobs:
             test_label_solo: gcp-k8s-l4-solo
             test_label_multi: ignore
             test_timeout: 480
-            test_skip_list: neuralmagic/tests/skip-for-remote-push-tmp.txt
+            test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/smoke.txt
 
             benchmark_label: gcp-k8s-l4-solo
             benchmark_config_list_file: ./.github/data/nm_benchmark_remote_push_configs_list.txt

diff --git a/.github/workflows/nm-test.yml b/.github/workflows/nm-test.yml
@@ -23,8 +23,8 @@ on:
         description: "whl to test (variable appears late binding so unusable outside 'download artifact')"
         type: string
         required: true
-      test_skip_list:
-        description: 'file containing tests to skip'
+      test_skip_env_vars:
+        description: 'file containing tests env vars for test skipping'
         type: string
         required: true
 
@@ -51,8 +51,8 @@ on:
         description: "whl to test (variable appears late binding so unusable outside 'download artifact')"
         type: string
         required: true
-      test_skip_list:
-        description: 'file containing tests to skip'
+      test_skip_env_vars:
+        description: 'file containing tests env vars for test skipping'
         type: string
         required: true
 
@@ -131,12 +131,17 @@ jobs:
             - name: run buildkite script
               run: |
                 cd tests && sudo bash ../.buildkite/download-images.sh
+
+            - name: setenv test skip
+              id: setenv_test_skip
+              uses: ./.github/actions/nm-set-env-test-skip
+              with:
+                test_skip_env_vars: ${{ inputs.test_skip_env_vars }}
 
             - name: run tests
               id: test
               uses: ./.github/actions/nm-test-whl/
               with:
-                test_skip_list: ${{ inputs.test_skip_list }}
                 test_directory: tests
                 test_results: test-results
 

diff --git a/.github/workflows/nm-weekly.yml b/.github/workflows/nm-weekly.yml
@@ -27,7 +27,7 @@ jobs:
       test_label_solo: aws-avx2-32G-a10g-24G
       test_label_multi: aws-avx2-192G-4-a10g-96G
       test_timeout: 480
-      test_skip_list: neuralmagic/tests/skip-for-weekly.txt
+      test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/full.txt
 
       benchmark_label: aws-avx2-32G-a10g-24G
       benchmark_config_list_file: ./.github/data/nm_benchmark_weekly_configs_list.txt

diff --git a/neuralmagic/tests/test_skip_env_vars/full.txt b/neuralmagic/tests/test_skip_env_vars/full.txt
@@ -0,0 +1,19 @@
+TEST_ACCURACY=DISABLE
+TEST_ASYNC_ENGINE=ENABLE
+TEST_BASIC_CORRECTNESS=ENABLE
+TEST_CORE=ENABLE
+TEST_DISTRIBUTED=DISABLE
+TEST_ENGINE=ENABLE
+TEST_ENTRYPOINTS=ENABLE
+TEST_KERNELS=ENABLE
+TEST_LORA=ENABLE
+TEST_METRICS=ENABLE
+TEST_MODELS=ENABLE
+TEST_MODELS_CORE=ENABLE
+TEST_PREFIX_CACHING=ENABLE
+TEST_QUANTIZATION=ENABLE
+TEST_SAMPLERS=ENABLE
+TEST_SPEC_DECODE=DISABLE
+TEST_TENSORIZER_LOADER=ENABLE
+TEST_TOKENIZATION=ENABLE
+TEST_WORKER=ENABLE
diff --git a/neuralmagic/tests/test_skip_env_vars/smoke.txt b/neuralmagic/tests/test_skip_env_vars/smoke.txt
@@ -0,0 +1,19 @@
+TEST_ACCURACY=DISABLE
+TEST_ASYNC_ENGINE=ENABLE
+TEST_BASIC_CORRECTNESS=DISABLE
+TEST_CORE=ENABLE
+TEST_DISTRIBUTED=DISABLE
+TEST_ENGINE=ENABLE
+TEST_ENTRYPOINTS=DISABLE
+TEST_KERNELS=DISABLE
+TEST_LORA=DISABLE
+TEST_METRICS=ENABLE
+TEST_MODELS=DISABLE
+TEST_MODELS_CORE=ENABLE
+TEST_PREFIX_CACHING=ENABLE
+TEST_QUANTIZATION=ENABLE
+TEST_SAMPLERS=DISABLE
+TEST_SPEC_DECODE=DISABLE
+TEST_TENSORIZER_LOADER=DISABLE
+TEST_TOKENIZATION=ENABLE
+TEST_WORKER=ENABLE
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -31,6 +31,8 @@ peft
 requests==2.31
 ray
 sentence-transformers # required for embedding
+optimum     # required for hf gptq baselines
+auto-gptq   # required for hf gptq baselines
 
 # Benchmarking
 aiohttp

diff --git a/tests/accuracy/test_lm_eval_correctness.py b/tests/accuracy/test_lm_eval_correctness.py
@@ -8,6 +8,11 @@
 import yaml
 
 from tests.nm_utils.server import ServerContext
+from tests.nm_utils.utils_skip import should_skip_test_group
+
+if should_skip_test_group(group_name="TEST_ACCURACY"):
+    pytest.skip("TEST_ACCURACY=DISABLE, skipping accuracy test group",
+                allow_module_level=True)
 
 if TYPE_CHECKING:
     import lm_eval as lm_eval_t

diff --git a/tests/async_engine/test_api_server.py b/tests/async_engine/test_api_server.py
@@ -7,6 +7,12 @@
 import pytest
 import requests
 
+from tests.nm_utils.utils_skip import should_skip_test_group
+
+if should_skip_test_group(group_name="TEST_ASYNC_ENGINE"):
+    pytest.skip("TEST_ASYNC_ENGINE=DISABLE, skipping async engine test group",
+                allow_module_level=True)
+
 
 def _query_server(prompt: str, max_tokens: int = 5) -> dict:
     response = requests.post("http://localhost:8000/generate",

diff --git a/tests/async_engine/test_async_llm_engine.py b/tests/async_engine/test_async_llm_engine.py
@@ -3,8 +3,13 @@
 
 import pytest
 
+from tests.nm_utils.utils_skip import should_skip_test_group
 from vllm.engine.async_llm_engine import AsyncLLMEngine
 
+if should_skip_test_group(group_name="TEST_ASYNC_ENGINE"):
+    pytest.skip("TEST_ASYNC_ENGINE=DISABLE, skipping async engine test group",
+                allow_module_level=True)
+
 
 @dataclass
 class RequestOutput:

diff --git a/tests/async_engine/test_chat_template.py b/tests/async_engine/test_chat_template.py
@@ -4,10 +4,15 @@
 
 import pytest
 
+from tests.nm_utils.utils_skip import should_skip_test_group
 from vllm.entrypoints.openai.protocol import ChatCompletionRequest
 from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
 from vllm.transformers_utils.tokenizer import get_tokenizer
 
+if should_skip_test_group(group_name="TEST_ASYNC_ENGINE"):
+    pytest.skip("TEST_ASYNC_ENGINE=DISABLE, skipping async engine test group",
+                allow_module_level=True)
+
 chatml_jinja_path = pathlib.Path(os.path.dirname(os.path.abspath(
     __file__))).parent.parent / "examples/template_chatml.jinja"
 assert chatml_jinja_path.exists()

diff --git a/tests/async_engine/test_openapi_server_ray.py b/tests/async_engine/test_openapi_server_ray.py
@@ -4,8 +4,13 @@
 # and debugging.
 import ray
 
+from tests.nm_utils.utils_skip import should_skip_test_group
 from tests.utils import ServerRunner
 
+if should_skip_test_group(group_name="TEST_ASYNC_ENGINE"):
+    pytest.skip("TEST_ASYNC_ENGINE=DISABLE, skipping async engine test group",
+                allow_module_level=True)
+
 # any model with a chat template should work here
 MODEL_NAME = "facebook/opt-125m"
 

diff --git a/tests/async_engine/test_request_tracker.py b/tests/async_engine/test_request_tracker.py
@@ -1,8 +1,13 @@
 import pytest
 
+from tests.nm_utils.utils_skip import should_skip_test_group
 from vllm.engine.async_llm_engine import RequestTracker
 from vllm.outputs import RequestOutput
 
+if should_skip_test_group(group_name="TEST_ASYNC_ENGINE"):
+    pytest.skip("TEST_ASYNC_ENGINE=DISABLE, skipping async engine test group",
+                allow_module_level=True)
+
 
 @pytest.mark.asyncio
 async def test_request_tracker():

diff --git a/tests/basic_correctness/test_basic_correctness.py b/tests/basic_correctness/test_basic_correctness.py
@@ -7,8 +7,14 @@
 
 import pytest
 
+from tests.nm_utils.utils_skip import should_skip_test_group
 from vllm import LLM
 
+if should_skip_test_group(group_name="TEST_BASIC_CORRECTNESS"):
+    pytest.skip(
+        "TEST_BASIC_CORRECTNESS=DISABLE, skipping basic correctness test group",
+        allow_module_level=True)
+
 MODELS = [
     "facebook/opt-125m",
     "meta-llama/Llama-2-7b-hf",