Small fixes from conflict resolution

neuralmagic · Jun 13, 2024 · ef38251 · ef38251 · github-actions · Jun 13, 2024
1 parent 4b691b9
commit ef38251
Show file tree

Hide file tree

Showing 3 changed files with 1 addition and 38 deletions.
diff --git a/tests/kernels/test_int8_quant.py b/tests/kernels/test_int8_quant.py
@@ -1,9 +1,9 @@
 import pytest
 import torch
 
-from tests.nm_utils.utils_skip import should_skip_test_group
 # ruff: noqa: F401
 import vllm._C
+from tests.nm_utils.utils_skip import should_skip_test_group
 
 if should_skip_test_group(group_name="TEST_KERNELS"):
     pytest.skip("TEST_KERNELS=DISABLE, skipping kernels test group",

diff --git a/tests/models/test_llava.py b/tests/models/test_llava.py
@@ -42,42 +42,6 @@ def iter_llava_configs(model_name: str):
                                         image_processor_revision=None))
 
 
-from ..conftest import IMAGE_FILES
-
-pytestmark = pytest.mark.llava
-
-# The image token is placed before "user" on purpose so that the test can pass
-HF_IMAGE_PROMPTS = [
-    "<image>\nUSER: What's the content of the image?\nASSISTANT:",
-    "<image>\nUSER: What is the season?\nASSISTANT:",
-]
-
-assert len(HF_IMAGE_PROMPTS) == len(IMAGE_FILES)
-
-
-def iter_llava_configs(model_name: str):
-    image_hw_to_feature_size = {
-        (336, 336): 576,
-    }
-
-    for (h, w), f in image_hw_to_feature_size.items():
-        for input_type, input_shape in [
-            (VisionLanguageConfig.ImageInputType.PIXEL_VALUES, (1, 3, h, w)),
-            (VisionLanguageConfig.ImageInputType.IMAGE_FEATURES, (1, f, 1024)),
-        ]:
-            yield (model_name,
-                   VisionLanguageConfig(image_input_type=input_type,
-                                        image_feature_size=f,
-                                        image_token_id=32000,
-                                        image_input_shape=input_shape,
-                                        image_processor=model_name,
-                                        image_processor_revision=None))
-
-
-if should_skip_test_group(group_name="TEST_MODELS"):
-    pytest.skip("TEST_MODELS=DISABLE, skipping model test group",
-                allow_module_level=True)
-
 model_and_vl_config = [
     *iter_llava_configs("llava-hf/llava-1.5-7b-hf"),
 ]

diff --git a/tests/quantization/test_compressed_tensors.py b/tests/quantization/test_compressed_tensors.py
@@ -12,7 +12,6 @@
     CompressedTensorsLinearMethod, CompressedTensorsW8A8DynamicToken,
     CompressedTensorsW8A8StaticTensor)
 
-
 if should_skip_test_group(group_name="TEST_QUANTIZATION"):
     pytest.skip("TEST_QUANTIZATION=DISABLE, skipping quantization test group",
                 allow_module_level=True)
Benchmark suite	Current: `ef38251`	Previous: `5aaec10`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`2.460970676040177` prompts/s
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`945.0127395994278` tokens/s