Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
Merge branch 'main' into rel-eng/dial-in-accuracy-tests
Browse files Browse the repository at this point in the history
  • Loading branch information
robertgshaw2-neuralmagic authored Jun 20, 2024
2 parents ce0fcdb + abc0ceb commit 0dba00a
Show file tree
Hide file tree
Showing 133 changed files with 4,486 additions and 1,558 deletions.
1 change: 1 addition & 0 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ steps:
- TEST_DIST_MODEL=meta-llama/Llama-2-7b-hf DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_chunked_prefill_distributed.py
- pytest -v -s spec_decode/e2e/test_integration_dist.py
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s distributed/test_utils.py

- label: Distributed Tests (Multiple Groups)
#mirror_hardwares: [amd]
Expand Down
32 changes: 30 additions & 2 deletions .buildkite/test-template-aws.j2
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ steps:
queue: cpu_queue
commands:
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
- "docker build --build-arg max_jobs=16 --tag {{ docker_image }} --target test --progress plain ."
- "docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --tag {{ docker_image }} --target test --progress plain ."
- "docker push {{ docker_image }}"
env:
DOCKER_BUILDKIT: "1"
Expand All @@ -19,6 +19,34 @@ steps:
limit: 5
- wait

- group: "AMD Tests"
depends_on: ~
steps:
{% for step in steps %}
{% if step.mirror_hardwares and "amd" in step.mirror_hardwares %}
- label: "AMD: {{ step.label }}"
agents:
queue: amd
command: bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" ; ")) | safe }}"
env:
DOCKER_BUILDKIT: "1"
soft_fail: true
{% endif %}
{% endfor %}

- label: "Neuron Test"
depends_on: ~
agents:
queue: neuron
command: bash .buildkite/run-neuron-test.sh
soft_fail: false

- label: "Intel Test"
depends_on: ~
agents:
queue: intel
command: bash .buildkite/run-cpu-test.sh

{% for step in steps %}
- label: "{{ step.label }}"
agents:
Expand All @@ -31,7 +59,7 @@ steps:
{% else %}
queue: gpu_1_queue
{% endif %}
soft_fail: true
soft_fail: {{ step.soft_fail or false }}
{% if step.parallelism %}
parallelism: {{ step.parallelism }}
{% endif %}
Expand Down
17 changes: 0 additions & 17 deletions .github/actions/nm-install-testmo/action.yml

This file was deleted.

3 changes: 3 additions & 0 deletions .github/actions/nm-set-env/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ runs:
# testmo
echo "XDG_CONFIG_HOME=/usr/local/apps" >> $GITHUB_ENV
echo "PROJECT_ID=12" >> $GITHUB_ENV
# disable usage stats (writes to protected /usr/local/apps)
echo "VLLM_NO_USAGE_STATS=1" >> $GITHUB_ENV
echo "DO_NOT_TRACK=1" >> $GITHUB_ENV
env:
HF_TOKEN_SECRET: ${{ inputs.hf_token }}
shell: bash
27 changes: 0 additions & 27 deletions .github/actions/nm-set-python/action.yml

This file was deleted.

17 changes: 16 additions & 1 deletion .github/scripts/run-tests
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,10 @@ do
LOCAL_SUCCESS=0
RESULT_XML=$(echo ${TEST} | sed -e "s/${TEST_DIR}/${RESULTS_DIR}/" | sed -e "s/.py/.xml/")

# report which test is being run
# (in CI, if a test hangs, this logs *which* test is running *before* it hangs)
echo "=== RUNNING TEST: ${TEST} ==="

# this is a bit messy and brittle, but certain tests
# need to be run with specific options
if [[ "${TEST}" == *"kernels"* || "${TEST}" == *"samplers"* ]]; then
Expand All @@ -125,7 +129,18 @@ do
pytest ${CC_PYTEST_FLAGS} --junitxml=${RESULT_XML} ${TEST} || LOCAL_SUCCESS=$?
fi

SUCCESS=$((SUCCESS + LOCAL_SUCCESS))
# if a file gets exit code 0, we are good
if [[ $LOCAL_SUCCESS == 0 ]]; then
echo "=== PASSED TEST: ${TEST} ==="
# if a file does not run any tests, pytest reports exit code of 5
# since we skip full modules in our skipping strategy, this is common
elif [[ $LOCAL_SUCCESS == 5 ]]; then
echo "=== SKIPPED TEST: ${TEST} ==="
# otherwise, report failure
else
echo "=== FAILED TEST: ${TEST} ==="
SUCCESS=$((SUCCESS + LOCAL_SUCCESS))
fi

done

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/nm-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ jobs:

- name: set python
id: set_python
uses: ./.github/actions/nm-set-python/
uses: neuralmagic/nm-actions/actions/set-python@main
with:
python: ${{ inputs.python }}
venv: ${{ env.VENV_BASE }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/nm-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
test_label_solo: gcp-k8s-l4-solo
test_label_multi: ignore
test_timeout: 480
test_skip_list: neuralmagic/tests/skip-for-nightly.txt
test_skip_env_vars: neuralmagic/tests/test_skip_env_vars/full.txt

benchmark_label: gcp-k8s-l4-solo
benchmark_config_list_file: ./.github/data/nm_benchmark_remote_push_configs_list.txt
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/nm-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ jobs:
nvcc_threads: 0

- name: install testmo
uses: ./.github/actions/nm-install-testmo/
uses: neuralmagic/nm-actions/actions/install-testmo@main

- name: create testmo run
id: create_testmo_run
Expand Down Expand Up @@ -131,7 +131,7 @@ jobs:
- name: run buildkite script
run: |
cd tests && sudo bash ../.buildkite/download-images.sh
- name: setenv test skip
id: setenv_test_skip
uses: ./.github/actions/nm-set-env-test-skip
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ruff.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install ruff==0.1.5 codespell==2.2.6 tomli==2.0.1 isort==5.13.2
pip install ruff==0.1.5 codespell==2.3.0 tomli==2.0.1 isort==5.13.2
- name: Analysing the code with ruff
run: |
ruff .
Expand Down
18 changes: 11 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
message(STATUS "Target device: ${VLLM_TARGET_DEVICE}")

include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
include(${CMAKE_CURRENT_LIST_DIR}/cmake/dep.cmake)

#
# Supported python versions. These versions will be searched in order, the
Expand Down Expand Up @@ -179,17 +180,17 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
"csrc/quantization/gptq_marlin/gptq_marlin.cu"
"csrc/quantization/gptq_marlin/gptq_marlin_repack.cu"
"csrc/custom_all_reduce.cu"
"csrc/quantization/cutlass_w8a8/scaled_mm_dq_entry.cu"
"csrc/quantization/cutlass_w8a8/scaled_mm_dq_c2x.cu"
"csrc/quantization/cutlass_w8a8/scaled_mm_dq_c3x.cu")
"csrc/quantization/cutlass_w8a8/scaled_mm_entry.cu"
"csrc/quantization/cutlass_w8a8/scaled_mm_c2x.cu"
"csrc/quantization/cutlass_w8a8/scaled_mm_c3x.cu")

#
# The CUTLASS kernels for Hopper require sm90a to be enabled.
# This is done via the below gencode option, BUT that creates kernels for both sm90 and sm90a.
# That adds an extra 17MB to compiled binary, so instead we selectively enable it.
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.0)
set_source_files_properties(
"csrc/quantization/cutlass_w8a8/scaled_mm_dq_c3x.cu"
"csrc/quantization/cutlass_w8a8/scaled_mm_c3x.cu"
PROPERTIES
COMPILE_FLAGS
"-gencode arch=compute_90a,code=sm_90a")
Expand All @@ -206,7 +207,8 @@ define_gpu_extension_target(
ARCHITECTURES ${VLLM_GPU_ARCHES}
INCLUDE_DIRECTORIES ${CUTLASS_INCLUDE_DIR};${CUTLASS_TOOLS_UTIL_INCLUDE_DIR}
USE_SABI 3
WITH_SOABI)
WITH_SOABI
LIBRARIES cmake_git_version_tracking)

#
# _moe_C extension
Expand All @@ -224,7 +226,8 @@ define_gpu_extension_target(
COMPILE_FLAGS ${VLLM_GPU_FLAGS}
ARCHITECTURES ${VLLM_GPU_ARCHES}
USE_SABI 3
WITH_SOABI)
WITH_SOABI
LIBRARIES cmake_git_version_tracking)

#
# _punica_C extension
Expand Down Expand Up @@ -276,7 +279,8 @@ if (VLLM_PUNICA_GPU_ARCHES)
COMPILE_FLAGS ${VLLM_PUNICA_GPU_FLAGS}
ARCHITECTURES ${VLLM_PUNICA_GPU_ARCHES}
USE_SABI 3
WITH_SOABI)
WITH_SOABI
LIBRARIES cmake_git_version_tracking)
else()
message(WARNING "Unable to create _punica_C target because none of the "
"requested architectures (${VLLM_GPU_ARCHES}) are supported, i.e. >= 8.0")
Expand Down
6 changes: 4 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
# prepare basic build environment
FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 AS dev

RUN apt-get update -y && \
apt-get install -y python3-pip git
RUN apt-get update -y \
&& apt-get install -y python3-pip git curl sudo

# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
Expand All @@ -27,6 +27,8 @@ RUN --mount=type=cache,target=/root/.cache/pip \
pip install -r requirements-cuda.txt

# install development dependencies
COPY requirements-lint.txt requirements-lint.txt
COPY requirements-test.txt requirements-test.txt
COPY requirements-dev.txt requirements-dev.txt
RUN --mount=type=cache,target=/root/.cache/pip \
pip install -r requirements-dev.txt
Expand Down
8 changes: 6 additions & 2 deletions Dockerfile.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@
FROM ubuntu:22.04 AS cpu-test-1

RUN apt-get update -y \
&& apt-get install -y git wget vim numactl gcc-12 g++-12 python3 python3-pip \
&& apt-get install -y git wget vim numactl gcc-12 g++-12 python3 python3-pip libtcmalloc-minimal4 \
&& update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12

RUN echo 'export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:$LD_PRELOAD' >> ~/.bashrc

RUN pip install https://intel-extension-for-pytorch.s3.amazonaws.com/ipex_dev/cpu/intel_extension_for_pytorch-2.3.100%2Bgit0eb3473-cp310-cp310-linux_x86_64.whl

RUN pip install --upgrade pip \
&& pip install wheel packaging ninja "setuptools>=49.4.0" numpy

Expand All @@ -21,6 +25,6 @@ RUN VLLM_TARGET_DEVICE=cpu python3 setup.py install

WORKDIR /workspace/

RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks

CMD ["/bin/bash"]
19 changes: 19 additions & 0 deletions Dockerfile.tpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
ARG NIGHTLY_DATE="20240601"
ARG BASE_IMAGE="us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:nightly_3.10_tpuvm_$NIGHTLY_DATE"

FROM $BASE_IMAGE

WORKDIR /workspace
COPY . /workspace/vllm

ENV VLLM_TARGET_DEVICE="tpu"
# Install aiohttp separately to avoid build errors.
RUN pip install aiohttp
# Install the TPU and Pallas dependencies.
RUN pip install torch_xla[tpu] -f https://storage.googleapis.com/libtpu-releases/index.html
RUN pip install torch_xla[pallas] -f https://storage.googleapis.com/jax-releases/jax_nightly_releases.html -f https://storage.googleapis.com/jax-releases/jaxlib_nightly_releases.html

# Build vLLM.
RUN cd /workspace/vllm && python setup.py develop

CMD ["/bin/bash"]
8 changes: 6 additions & 2 deletions benchmarks/backend_request_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,13 @@ async def async_request_tgi(
chunk_bytes = chunk_bytes.strip()
if not chunk_bytes:
continue
chunk_bytes = chunk_bytes.decode("utf-8")

chunk = remove_prefix(chunk_bytes.decode("utf-8"),
"data:")
#NOTE: Sometimes TGI returns a ping response without
# any data, we should skip it.
if chunk_bytes.startswith(":"):
continue
chunk = remove_prefix(chunk_bytes, "data:")

data = json.loads(chunk)
timestamp = time.perf_counter()
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/benchmark_latency.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def run_to_completion(profile_dir: Optional[str] = None):
"--device",
type=str,
default="cuda",
choices=["cuda", "cpu"],
choices=["cuda", "cpu", "tpu"],
help='device type for vLLM execution, supporting CUDA and CPU.')
parser.add_argument('--block-size',
type=int,
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/benchmark_throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ def main(args: argparse.Namespace):
"--device",
type=str,
default="cuda",
choices=["cuda", "cpu"],
choices=["cuda", "cpu", "tpu"],
help='device type for vLLM execution, supporting CUDA and CPU.')
parser.add_argument(
"--enable-prefix-caching",
Expand Down
6 changes: 1 addition & 5 deletions benchmarks/cutlass_benchmarks/w8a8_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,7 @@ def pytorch_fp8_impl_fast_accum(a: torch.tensor, b: torch.tensor,
def cutlass_impl(a: torch.tensor, b: torch.tensor, scale_a: torch.tensor,
scale_b: torch.tensor,
out_dtype: torch.dtype) -> torch.tensor:
return ops.cutlass_scaled_mm_dq(a,
b,
scale_a,
scale_b,
out_dtype=out_dtype)
return ops.cutlass_scaled_mm(a, b, scale_a, scale_b, out_dtype=out_dtype)


# bench
Expand Down
6 changes: 5 additions & 1 deletion cmake/cpu_extension.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ function (find_isa CPUINFO TARGET OUT)
endif()
endfunction()

find_isa(${CPUINFO} "avx2" AVX2_FOUND)
find_isa(${CPUINFO} "avx512f" AVX512_FOUND)

if (AVX512_FOUND)
Expand All @@ -53,8 +54,11 @@ if (AVX512_FOUND)
else()
message(WARNING "Disable AVX512-BF16 ISA support, no avx512_bf16 found in local CPU flags." " If cross-compilation is required, please set env VLLM_CPU_AVX512BF16=1.")
endif()
elseif (AVX2_FOUND)
list(APPEND CXX_COMPILE_FLAGS "-mavx2")
message(WARNING "vLLM CPU backend using AVX2 ISA")
else()
message(FATAL_ERROR "vLLM CPU backend requires AVX512 ISA support.")
message(FATAL_ERROR "vLLM CPU backend requires AVX512 or AVX2 ISA support.")
endif()

message(STATUS "CPU extension compile flags: ${CXX_COMPILE_FLAGS}")
Expand Down
6 changes: 6 additions & 0 deletions cmake/dep.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include(FetchContent)
FetchContent_Declare(cmake_git_version_tracking
GIT_REPOSITORY https://github.com/andrew-hardin/cmake-git-version-tracking.git
GIT_TAG 6c0cb87edd029ddfb403a8e24577c144a03605a6
)
FetchContent_MakeAvailable(cmake_git_version_tracking)
Loading

4 comments on commit 0dba00a

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bigger_is_better

Benchmark suite Current: 0dba00a Previous: d8da97b Ratio
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 2.4834617389958384 prompts/s
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 953.649307774402 tokens/s

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bigger_is_better

Benchmark suite Current: 0dba00a Previous: d8da97b Ratio
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.11.4 (main, Jun 7 2023, 11:01:02) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 2.493410950719893 prompts/s
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.11.4 (main, Jun 7 2023, 11:01:02) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 957.4698050764389 tokens/s

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bigger_is_better

Benchmark suite Current: 0dba00a Previous: d8da97b Ratio
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.8.17 (default, Jun 7 2023, 12:29:56) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 2.487254606871566 prompts/s
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.8.17 (default, Jun 7 2023, 12:29:56) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 955.1057690386813 tokens/s

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bigger_is_better

Benchmark suite Current: 0dba00a Previous: d8da97b Ratio
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.9.17 (main, Jun 7 2023, 12:34:12) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 2.5014163422745868 prompts/s 2.4505808553524178 prompts/s 0.98
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.9.17 (main, Jun 7 2023, 12:34:12) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 960.5438754334413 tokens/s 941.0230484553284 tokens/s 0.98

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.