Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
Small fixes from conflict resolution
Browse files Browse the repository at this point in the history
  • Loading branch information
dbarbuzzi committed Jun 13, 2024
1 parent 4b691b9 commit ef38251
Show file tree
Hide file tree
Showing 3 changed files with 1 addition and 38 deletions.
2 changes: 1 addition & 1 deletion tests/kernels/test_int8_quant.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import pytest
import torch

from tests.nm_utils.utils_skip import should_skip_test_group
# ruff: noqa: F401
import vllm._C
from tests.nm_utils.utils_skip import should_skip_test_group

if should_skip_test_group(group_name="TEST_KERNELS"):
pytest.skip("TEST_KERNELS=DISABLE, skipping kernels test group",
Expand Down
36 changes: 0 additions & 36 deletions tests/models/test_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,42 +42,6 @@ def iter_llava_configs(model_name: str):
image_processor_revision=None))


from ..conftest import IMAGE_FILES

pytestmark = pytest.mark.llava

# The image token is placed before "user" on purpose so that the test can pass
HF_IMAGE_PROMPTS = [
"<image>\nUSER: What's the content of the image?\nASSISTANT:",
"<image>\nUSER: What is the season?\nASSISTANT:",
]

assert len(HF_IMAGE_PROMPTS) == len(IMAGE_FILES)


def iter_llava_configs(model_name: str):
image_hw_to_feature_size = {
(336, 336): 576,
}

for (h, w), f in image_hw_to_feature_size.items():
for input_type, input_shape in [
(VisionLanguageConfig.ImageInputType.PIXEL_VALUES, (1, 3, h, w)),
(VisionLanguageConfig.ImageInputType.IMAGE_FEATURES, (1, f, 1024)),
]:
yield (model_name,
VisionLanguageConfig(image_input_type=input_type,
image_feature_size=f,
image_token_id=32000,
image_input_shape=input_shape,
image_processor=model_name,
image_processor_revision=None))


if should_skip_test_group(group_name="TEST_MODELS"):
pytest.skip("TEST_MODELS=DISABLE, skipping model test group",
allow_module_level=True)

model_and_vl_config = [
*iter_llava_configs("llava-hf/llava-1.5-7b-hf"),
]
Expand Down
1 change: 0 additions & 1 deletion tests/quantization/test_compressed_tensors.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
CompressedTensorsLinearMethod, CompressedTensorsW8A8DynamicToken,
CompressedTensorsW8A8StaticTensor)


if should_skip_test_group(group_name="TEST_QUANTIZATION"):
pytest.skip("TEST_QUANTIZATION=DISABLE, skipping quantization test group",
allow_module_level=True)
Expand Down

1 comment on commit ef38251

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bigger_is_better

Benchmark suite Current: ef38251 Previous: 5aaec10 Ratio
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 2.460970676040177 prompts/s
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 945.0127395994278 tokens/s

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.