Skip to content

Commit

Permalink
Merge branch 'mlcommons:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
arjunsuresh authored Oct 28, 2024
2 parents 233f588 + f74d16f commit 0ac8bdc
Show file tree
Hide file tree
Showing 30 changed files with 1,051 additions and 479 deletions.
95 changes: 89 additions & 6 deletions .github/workflows/build_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,69 @@ on:
push:
branches:
- master
- loadgen-release
paths:
- loadgen/setup.py
- loadgen/**

jobs:
update_version:
name: Update version only on ubuntu but used by windows and macos
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3

# Step 3: Check if VERSION.txt file has changed in this push
- name: Check if VERSION.txt file has changed
id: version_changed
run: |
if git diff --name-only HEAD~1 | grep -q "VERSION.txt"; then
echo "VERSION.txt file has been modified"
echo "::set-output name=version_changed::true"
new_version=$(cat VERSION.txt)
else
echo "VERSION file has NOT been modified"
echo "::set-output name=version_changed::false"
fi
echo "::set-output name=new_version::$new_version"
# Step 4: Increment version if VERSION was not changed
- name: Increment version if necessary
id: do_version_increment
if: steps.version_changed.outputs.version_changed == 'false'
run: |
cd loadgen
# Check if VERSION file exists, else initialize it
if [ ! -f VERSION.txt ]; then
echo "0.0.0" > VERSION.txt
fi
version=$(cat VERSION.txt)
IFS='.' read -r major minor patch <<< "$version"
patch=$((patch + 1))
new_version="$major.$minor.$patch"
echo $new_version > VERSION.txt
echo "New version: $new_version"
echo "::set-output name=new_version::$new_version"
# Step 5: Commit the updated version to the repository
- name: Commit updated version
if: steps.version_changed.outputs.version_changed == 'false'
run: |
cd loadgen
git config --global user.name "${{ github.actor }}"
git config --global user.email "${{ github.actor }}@users.noreply.github.com"
git add VERSION.txt
git commit -m "Increment version to ${{ steps.do_version_increment.outputs.new_version }}"
git push
build_wheels:
name: Build wheels on ${{ matrix.os }}
needs: update_version
runs-on: ${{ matrix.os }}
environment: release
permissions:
# IMPORTANT: this permission is mandatory for trusted publishing
id-token: write
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macOS-latest]
os: [ubuntu-latest, windows-latest, macos-latest]

steps:
- uses: actions/checkout@v3
Expand All @@ -33,6 +81,41 @@ jobs:
- name: Build wheels
run: python -m cibuildwheel loadgen/ --output-dir wheels

# Save wheels as artifacts
- name: Upload built wheels
uses: actions/upload-artifact@v3
with:
name: wheels-${{ matrix.os }}
path: wheels

publish_wheels:
needs: build_wheels # Wait for the build_wheels job to complete
runs-on: ubuntu-latest # Only run this job on Linux
environment: release
permissions:
# IMPORTANT: this permission is mandatory for trusted publishing
id-token: write
steps:
- uses: actions/checkout@v3

# Download the built wheels from ubuntu
- name: Download Ubuntu wheels
uses: actions/download-artifact@v3
with:
name: wheels-ubuntu-latest
path: wheels
# Download the built wheels from macOS
- name: Download macOS wheels
uses: actions/download-artifact@v3
with:
name: wheels-macos-latest
path: wheels
# Download the built wheels from Windows
- name: Download Windows wheels
uses: actions/download-artifact@v3
with:
name: wheels-windows-latest
path: wheels
- name: Publish
uses: pypa/gh-action-pypi-publish@release/v1
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test-bert.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ jobs:
python3 -m pip install cm4mlops
- name: Test BERT and end to end submission generation
run: |
cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=bert-99 --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --adr.compiler.tags=gcc --adr.inference-src.version=custom --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.${{ github.event.pull_request.head.ref }}
cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=bert-99 --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --adr.compiler.tags=gcc --adr.inference-src.version=custom --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.${{ github.event.pull_request.head.ref }} --adr.loadgen.version=custom
4 changes: 2 additions & 2 deletions .github/workflows/test-loadgen.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10"]
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]

steps:
- uses: actions/checkout@v3
Expand All @@ -31,4 +31,4 @@ jobs:
python3 -m pip install cm4mlops
- name: Test Loadgen
run: |
cm run script --tags=get,mlperf,inference,loadgen --quiet --version=custom --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.${{ github.event.pull_request.head.ref }}
cm run script --tags=get,mlperf,inference,loadgen --quiet --version=custom --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.${{ github.event.pull_request.head.ref }} --adr.loadgen.tags=_no-compilation-warnings
3 changes: 2 additions & 1 deletion .github/workflows/test-resnet50.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ on:
branches: [ "master", "dev" ]
paths:
- vision/classification_and_detection/**
- loadgen/**
- tools/submission/**
- .github/workflows/test-resnet50.yml
- '!**.md'
Expand All @@ -33,4 +34,4 @@ jobs:
python3 -m pip install cm4mlops
- name: Test Resnet50 and end to end submission generation
run: |
cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=resnet50 --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --adr.inference-src.tags=_branch.${{ github.event.pull_request.head.ref }},_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src.version=custom
cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=resnet50 --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --adr.inference-src.tags=_branch.${{ github.event.pull_request.head.ref }},_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src.version=custom --adr.loadgen.version=custom
2 changes: 1 addition & 1 deletion .github/workflows/test-retinanet.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ jobs:
python3 -m pip install cm4mlops
- name: Test Retinanet and end to end submission generation
run: |
cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=retinanet --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=10 --adr.compiler.tags=gcc --adr.inference-src.version=custom --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.${{ github.event.pull_request.head.ref }}
cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=retinanet --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=10 --adr.compiler.tags=gcc --adr.inference-src.version=custom --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.${{ github.event.pull_request.head.ref }} --adr.loadgen.version=custom
2 changes: 1 addition & 1 deletion compliance/nvidia/TEST06/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ This repository provides the config files and scripts to run and verify TEST 06

The purpose of this test is to ensure the consistency of the output of the LLM (Llama2 and Mixtral) model and avoid a potential EOS exploit. This test will make a performance run, with a limit of 100 samples and logging them into `mlperf_log_accuracy.json`. To achieve a passing result in this test, three criteria must be met:
- In the case the first token is reported independently (not applicable for Offline scenario), it should match for every query with the first token of the model output.
- For each query, the model output should only end with zero or one EOS token. The only exception for 2 EOS tokens is when the entire output sequences are EOS tokens (i.e. output is [eos_token_id, eos_token_id])
- For each query, the model output should only end with zero or one EOS token.
- The number of reported tokens should match with the length of output sequence.

## Requisites
Expand Down
3 changes: 1 addition & 2 deletions compliance/nvidia/TEST06/run_verification.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ def eos_check(acc_data, dtype, eos_token_id=2):
if data[i] == eos_token_id:
n_eos_tokens += 1
if n_eos_tokens >= 2:
# Allow output to be [eos_token_id, eos_token_id]
return len(data) == 2
return False
if data[i] != eos_token_id:
break
i-=1
Expand Down
52 changes: 36 additions & 16 deletions language/bert/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,21 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from absl import flags
from absl import app
import subprocess
import mlperf_loadgen as lg
import argparse
import os
import sys
sys.path.insert(0, os.getcwd())
sys.path.insert(0, os.path.join(os.getcwd(), "..", "..", "lon"))
from absl import app
from absl import flags


def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--backend", choices=["tf", "pytorch", "onnxruntime", "tf_estimator", "ray"], default="tf", help="Backend")
"--backend", choices=["tf", "pytorch", "onnxruntime", "tf_estimator", "ray"], default="tf", help="Backend")
parser.add_argument("--scenario", choices=["SingleStream", "Offline",
"Server", "MultiStream"], default="Offline", help="Scenario")
parser.add_argument("--accuracy", action="store_true",
Expand All @@ -37,30 +38,36 @@ def get_args():
help="use quantized model (only valid for onnxruntime backend)")
parser.add_argument("--profile", action="store_true",
help="enable profiling (only valid for onnxruntime backend)")
parser.add_argument(
"--mlperf_conf", default="build/mlperf.conf", help="mlperf rules config")
parser.add_argument("--user_conf", default="user.conf",
help="user config for user LoadGen settings such as target QPS")
parser.add_argument("--audit_conf", default="audit.conf",
help="audit config for LoadGen settings during compliance runs")
parser.add_argument("--max_examples", type=int,
help="Maximum number of examples to consider (not limited by default)")
parser.add_argument("--network", choices=["sut","lon",None], default=None, help="Loadgen network mode")
parser.add_argument(
"--network",
choices=[
"sut",
"lon",
None],
default=None,
help="Loadgen network mode")
parser.add_argument('--node', type=str, default="")
parser.add_argument('--port', type=int, default=8000)
parser.add_argument('--sut_server', nargs="*", default= ['http://localhost:8000'],
help='Address of the server(s) under test.')
parser.add_argument('--sut_server', nargs="*", default=['http://localhost:8000'],
help='Address of the server(s) under test.')

args = parser.parse_args()
return args


scenario_map = {
"SingleStream": lg.TestScenario.SingleStream,
"Offline": lg.TestScenario.Offline,
"Server": lg.TestScenario.Server,
"MultiStream": lg.TestScenario.MultiStream
}
"SingleStream": lg.TestScenario.SingleStream,
"Offline": lg.TestScenario.Offline,
"Server": lg.TestScenario.Server,
"MultiStream": lg.TestScenario.MultiStream
}


def main():
args = get_args()
Expand Down Expand Up @@ -96,7 +103,8 @@ def main():

settings = lg.TestSettings()
settings.scenario = scenario_map[args.scenario]
settings.FromConfig(args.mlperf_conf, "bert", args.scenario)
# mlperf.conf is automatically loaded by the loadgen
# settings.FromConfig(args.mlperf_conf, "bert", args.scenario)
settings.FromConfig(args.user_conf, "bert", args.scenario)

if args.accuracy:
Expand All @@ -117,7 +125,14 @@ def main():

if args.network == "lon":
from network_LON import app, set_args, main as app_main
set_args(args, settings, log_settings, args.audit_conf, args.sut_server, args.backend, args.max_examples)
set_args(
args,
settings,
log_settings,
args.audit_conf,
args.sut_server,
args.backend,
args.max_examples)
app.run(app_main)

elif args.network == "sut":
Expand All @@ -128,7 +143,12 @@ def main():

else:
print("Running LoadGen test...")
lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings, args.audit_conf)
lg.StartTestWithLogSettings(
sut.sut,
sut.qsl.qsl,
settings,
log_settings,
args.audit_conf)
if args.accuracy and not os.environ.get("SKIP_VERIFY_ACCURACY"):
cmd = "python3 {:}/accuracy-squad.py {}".format(
os.path.dirname(os.path.abspath(__file__)),
Expand Down
Loading

0 comments on commit 0ac8bdc

Please sign in to comment.