This repository has been archived by the owner on Oct 11, 2024. It is now read-only.
forked from vllm-project/vllm
-
Notifications
You must be signed in to change notification settings - Fork 10
281 lines (249 loc) · 10.2 KB
/
nm-benchmark.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
name: benchmark
on:
# makes workflow reusable
workflow_call:
inputs:
label:
description: "requested runner label (specifies instance)"
type: string
required: true
benchmark_config_list_file:
description: "path to a file containing a list of benchmark-configs to run benchmarks with. For reference look at .github/data/nm_benchmark_configs_list.txt"
type: string
required: true
timeout:
description: "maximum time runner will be up"
type: string
required: true
gitref:
description: "git commit hash or branch name"
type: string
required: true
python:
description: "python version, e.g. 3.10.12"
type: string
required: true
whl:
description: "whl to test (variable appears late binding so unusable outside 'download artifact')"
type: string
required: true
push_benchmark_results_to_gh_pages:
description: "When set to true, the workflow pushes all benchmarking results to gh-pages UI"
type: string
required: true
# makes workflow manually callable
workflow_dispatch:
inputs:
label:
description: "requested runner label (specifies instance)"
type: string
required: true
benchmark_config_list_file:
description: "path to a file containing a list of benchmark-configs to run benchmarks with. For reference look at .github/data/nm_benchmark_configs_list.txt"
type: string
required: true
timeout:
description: "maximum time runner will be up"
type: string
required: true
gitref:
description: "git commit hash or branch name"
type: string
required: true
python:
description: "python version, e.g. 3.10.12"
type: string
required: true
whl:
description: "whl to test (variable appears late binding so unusable outside 'download artifact')"
type: string
required: true
push_benchmark_results_to_gh_pages:
description: "When set to true, the workflow pushes all benchmarking results to gh-pages UI"
type: choice
options:
- 'true'
- 'false'
default: 'false'
env:
BENCHMARK_RESULTS: /model-cache/benchmark_results
jobs:
BENCHMARK:
runs-on: ${{ inputs.label }}
timeout-minutes: ${{ fromJSON(inputs.timeout) }}
outputs:
gh_action_benchmark_input_artifact_name: ${{ steps.set_gh_action_benchmark_input_artifact_name.outputs.gh_action_benchmark_input_artifact_name}}
steps:
- name: set python
id: set_python
uses: actions/setup-python@v5
with:
python-version: ${{ inputs.python }}
- name: install automation components
run: |
sudo apt-get update --fix-missing
sudo apt-get install -y git-all
sudo apt-get install -y curl
- name: checkout repository code
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ inputs.gitref }}
submodules: recursive
- name: setenv
id: setenv
uses: ./.github/actions/nm-set-env/
with:
hf_token: ${{ secrets.NM_HF_TOKEN }}
Gi_per_thread: 1
nvcc_threads: 0
- name: hf cache
id: hf_cache
uses: ./.github/actions/nm-hf-cache/
with:
fs_cache: ${{ secrets.HF_FS_CACHE }}
# - name: download whl
# id: download
# uses: actions/download-artifact@v4
# with:
# name: ${{ inputs.whl }}
# path: ${{ inputs.whl }}
# - name: install whl
# id: install_whl
# uses: ./.github/actions/nm-install-whl/
# with:
# python: ${{ inputs.python }}
# venv:
- name: install whl
run: |
pip install --extra-index-url https://pypi.neuralmagic.com/simple nm-vllm
mv vllm vllm-ignore
mv csrc csrc-ignore
- name: run benchmarks
uses: ./.github/actions/nm-benchmark/
with:
benchmark_config_list_file: ${{ inputs.benchmark_config_list_file }}
output_directory: benchmark-results
python: ${{ inputs.python }}
venv:
- name: take a look
run: |
ls -al benchmark-results
- name: store benchmark result artifacts
if: success()
uses: actions/upload-artifact@v4
with:
name: ${{ github.run_id }}-${{ inputs.label }}-${{ inputs.python }}
path: benchmark-results
retention-days: 2
- name: mount EFS
run: |
if [[ ${{ inputs.label }} == *aws* ]]; then
sudo mkdir -m 777 -p /EFS
sudo mount -t nfs4 -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport ${{ secrets.HF_FS_CACHE }}:/ /EFS
sudo chown -R $(whoami):$(whoami) /EFS
# update results target
echo "BENCHMARK_RESULTS=/EFS/benchmark_results" >> $GITHUB_ENV
fi
- name: copy benchmark results to Filestore
if: success()
uses: ./.github/actions/nm-copy-benchmark-data-to-fs
with:
label: ${{ inputs.label }}
python: ${{ inputs.python }}
src: benchmark-results
fs_dst: ${{ env.BENCHMARK_RESULTS }}
# Produce GHA benchmark JSONs
- name: make github-action-benchmark JSONs
uses: ./.github/actions/nm-produce-gha-benchmark-json
with:
vllm_benchmark_jsons_path: benchmark-results
# Metrics that are "better" when the value is greater are stored here
bigger_is_better_output_file_path: gh-action-benchmark-jsons/bigger_is_better.json
# Metrics that are "better" when the value is smaller are stored here
smaller_is_better_output_file_path: gh-action-benchmark-jsons/smaller_is_better.json
# Metrics that we only want to observe are stored here
observation_metrics_output_file_path: gh-action-benchmark-jsons/observation_metrics.json
python: ${{ inputs.python }}
venv:
- name: set gh action benchmark input artifact name
id: set_gh_action_benchmark_input_artifact_name
run: |
GH_ACTION_BENCHMARK_INPUT_ARTIFACT_NAME=`echo "gh_action_benchmark_jsons-${{ github.run_id }}-${{ inputs.label }}-${{ inputs.python }}"`
echo "gh_action_benchmark_input_artifact_name=$GH_ACTION_BENCHMARK_INPUT_ARTIFACT_NAME" >> $GITHUB_OUTPUT
- name: store gh action benchmark input artifacts
if: success()
uses: actions/upload-artifact@v4
with:
name: ${{ steps.set_gh_action_benchmark_input_artifact_name.outputs.gh_action_benchmark_input_artifact_name}}
path: gh-action-benchmark-jsons
retention-days: 1
- name: copy gh action benchmark JSONs to Filestore
if: success()
uses: ./.github/actions/nm-copy-benchmark-data-to-fs
with:
label: ${{ inputs.label }}
python: ${{ inputs.python }}
src: gh-action-benchmark-jsons
fs_dst: ${{ env.BENCHMARK_RESULTS }}
BENCHMARK_REPORT:
needs: [BENCHMARK]
runs-on: ubuntu-latest
timeout-minutes: 20
permissions:
# Permissions required to be able to push to the nm-gh-pages branch
contents: write
steps:
- name: checkout repository code
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ inputs.gitref }}
submodules: recursive
- name: download benchmark results artifact
uses: actions/download-artifact@v4
with:
name: ${{ needs.BENCHMARK.outputs.gh_action_benchmark_input_artifact_name }}
path: downloads
- name: display structure of downloaded files
run: ls -R ./downloads
- name: nm-github-action-benchmark(bigger_is_better.json)
# Absence of the file indicates that there were no "bigger_is_better" metrics
if: (success() || failure()) && (hashFiles('downloads/bigger_is_better.json') != '')
uses: ./.github/actions/nm-github-action-benchmark
with:
gh_action_benchmark_name: "bigger_is_better"
gh_action_benchmark_json_file_path: "downloads/bigger_is_better.json"
gh_action_benchmark_tool: "customBiggerIsBetter"
gh_pages_branch: "nm-gh-pages"
auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
reporting_enabled: "true"
github_token: ${{ secrets.GITHUB_TOKEN }}
- name: nm-github-action-benchmark(smaller_is_better.json)
# Absence of the file indicates that there were no "smaller_is_better" metrics
if: (success() || failure()) && (hashFiles('downloads/smaller_is_better.json') != '')
uses: ./.github/actions/nm-github-action-benchmark
with:
gh_action_benchmark_name: "smaller_is_better"
gh_action_benchmark_json_file_path: "downloads/smaller_is_better.json"
gh_action_benchmark_tool: "customSmallerIsBetter"
gh_pages_branch: "nm-gh-pages"
auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
reporting_enabled: "true"
github_token: ${{ secrets.GITHUB_TOKEN }}
- name: nm-github-action-benchmark(observation_metrics.json)
# Absence of the file indicates that there were no "observation" metrics
if: (success() || failure()) && (hashFiles('downloads/observation_metrics.json') != '')
uses: ./.github/actions/nm-github-action-benchmark
with:
gh_action_benchmark_name: "observation_metrics"
gh_action_benchmark_json_file_path: "downloads/observation_metrics.json"
# `github-action-benchmark` expects a tool name that is either
# "customBiggerIsBetter" or "customSmallerIsBetter". This is a hack to
# work around that. Since we mark the action to not report failures, this
# is fine.
gh_action_benchmark_tool: "customBiggerIsBetter"
gh_pages_branch: "nm-gh-pages"
auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
reporting_enabled: "false"
github_token: ${{ secrets.GITHUB_TOKEN }}