Fix code version (#2026)

mlcommons · Jan 10, 2025 · d419782 · d419782
1 parent a56a251
commit d419782
Show file tree

Hide file tree

Showing 7 changed files with 12 additions and 21 deletions.
diff --git a/language/llama3.1-405b/evaluate-accuracy.py b/language/llama3.1-405b/evaluate-accuracy.py
@@ -138,6 +138,7 @@ def main():
     dataset_path = args.dataset_file
     checkpoint_path = args.checkpoint_path
     nltk.download("punkt")
+    nltk.download('punkt_tab')
 
     tokenizer = AutoTokenizer.from_pretrained(
         checkpoint_path,

diff --git a/language/mixtral-8x7b/README.md b/language/mixtral-8x7b/README.md
@@ -13,11 +13,6 @@ Please see the [new docs site](https://docs.mlcommons.org/inference/benchmarks/l
 
 ## Prepare environment
 
-Copy the mlperf.conf file to this folder.
-```
-cp ../../mlperf.conf .
-```
-
 For a CPU-only run:
 
 ```
@@ -136,7 +131,6 @@ wget https://inference.mlcommons-storage.org/mixtral_8x7b%2F2024.06.06_mixtral_1
 ```
 python -u main.py --scenario Offline \
                 --model-path ${CHECKPOINT_PATH} \
-                --mlperf-conf mlperf.conf \
                 --user-conf user.conf \
                 --total-sample-count 15000 \
                 --device cpu \
@@ -149,7 +143,6 @@ For a GPU-based run:
 ```
 python3 -u main.py --scenario Offline \
         --model-path ${CHECKPOINT_PATH} \
-        --mlperf-conf mlperf.conf \
         --user-conf user.conf \
         --total-sample-count 15000 \
         --dataset-path ${DATASET_PATH} \
@@ -162,7 +155,6 @@ python3 -u main.py --scenario Offline \
 ```
 python -u main.py --scenario Server \
                 --model-path ${CHECKPOINT_PATH} \
-                --mlperf-conf mlperf.conf \
                 --user-conf user.conf \
                 --total-sample-count 15000 \
                 --device cpu \
@@ -184,7 +176,6 @@ mkdir -p "run_outputs"  # The script will dump all the outputs to 'run_outputs'.
 python -u main.py --scenario Offline \
                 --model-path ${CHECKPOINT_PATH} \
                 --accuracy \
-                --mlperf-conf mlperf.conf \
                 --user-conf user.conf \
                 --total-sample-count 15000 \
                 --dataset-path ${DATASET_PATH} \
@@ -221,7 +212,6 @@ OUTPUT_LOG_DIR=server-accuracy-logs
 python -u main.py --scenario Server \
                 --model-path ${CHECKPOINT_PATH} \
                 --accuracy \
-                --mlperf-conf mlperf.conf \
                 --user-conf user.conf \
                 --total-sample-count 15000 \
                 --dataset-path ${DATASET_PATH} \

diff --git a/language/mixtral-8x7b/run_accuracy.sh b/language/mixtral-8x7b/run_accuracy.sh
@@ -6,7 +6,6 @@ mkdir -p "run_outputs"
 python3 -u main.py --scenario Offline \
         --model-path ${CHECKPOINT_PATH} \
         --accuracy \
-        --mlperf-conf mlperf.conf \
         --user-conf user.conf \
         --total-sample-count 15000 \
         --dataset-path ${DATASET_PATH} \

diff --git a/language/mixtral-8x7b/run_offline.sh b/language/mixtral-8x7b/run_offline.sh
@@ -3,7 +3,6 @@ DATASET_PATH="${DATASET_PATH:dataset/2024_06_06_mixtral_15k_v4.pkl}"
 
 python -u main.py --scenario Offline \
 		--model-path ${CHECKPOINT_PATH} \
-		--mlperf-conf mlperf.conf \
 		--user-conf user.conf \
 		--total-sample-count 15000 \
 		--dataset-path ${DATASET_PATH} \

diff --git a/language/mixtral-8x7b/run_server.sh b/language/mixtral-8x7b/run_server.sh
@@ -5,7 +5,6 @@ DATASET_PATH="${DATASET_PATH:dataset/2024_06_06_mixtral_15k_v4.pkl}"
 
 python -u main.py --scenario Server \
 		--model-path ${CHECKPOINT_PATH} \
-		--mlperf-conf mlperf.conf \
 		--user-conf user.conf \
 		--total-sample-count 15000 \
 		--dataset-path ${DATASET_PATH} \

diff --git a/main.py b/main.py
@@ -28,14 +28,16 @@ def mlperf_inference_implementation_readme(
         content = ""
 
         execution_envs = ["Docker", "Native"]
-        code_version = "r5.0-dev"
+        code_version = "r4.1-dev"
         implementation_run_options = []
 
         if model == "rnnt":
             code_version = "r4.0"
 
         if implementation == "reference":
             # Tip
+            if model != "rnnt":
+                code_version = "r5.0-dev"
             if "99.9" not in model and implementation_tips:
                 content += f"\n{pre_space}!!! tip\n\n"
                 content += f"{pre_space}    - MLCommons reference implementations are only meant to provide a rules compliant reference implementation for the submitters and in most cases are not best performing. If you want to benchmark any system, it is advisable to use the vendor MLPerf implementation for that system like Nvidia, Intel etc.\n\n"
@@ -502,7 +504,8 @@ def get_common_info(spaces, implementation, model):
         info += f"\n{pre_space}!!! tip\n\n"
         info += f"{pre_space}    - Number of threads could be adjusted using `--threads=#`, where `#` is the desired number of threads. This option works only if the implementation in use supports threading.\n\n"
         info += f"{pre_space}    - Batch size could be adjusted using `--batch_size=#`, where `#` is the desired batch size. This option works only if the implementation in use is supporting the given batch size.\n\n"
-        info += f"{pre_space}    - `_r4.1-dev` could also be given instead of `_r5.0-dev` if you want to run the benchmark with the MLPerf version being 4.1.\n\n"
+        if implementation.lower() == "reference":
+            info += f"{pre_space}    - `_r4.1-dev` could also be given instead of `_r5.0-dev` if you want to run the benchmark with the MLPerf version being 4.1.\n\n"
         if model == "rgat":
             info += f"{pre_space}    - Add `--env.CM_DATASET_IGBH_PATH=<Path to IGBH dataset>` if you have already downloaded the dataset. The path will be automatically mounted when using docker run.\n\n"
             info += f"{pre_space}    - Add `--env.CM_ML_MODEL_RGAT_CHECKPOINT_PATH=<Path to R-GAT model checkpoint>` if you have already downloaded the model. The path will be automatically mounted when using docker run.\n\n"

diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py
@@ -378,17 +378,17 @@
             ),
             "mixtral-8x7b": (
                 "ROUGE1",
-                45.4911 * 0.99,
+                45.5989 * 0.99,
                 "ROUGE2",
-                23.2829 * 0.99,
+                23.3526 * 0.99,
                 "ROUGEL",
-                30.3615 * 0.99,
+                30.4608 * 0.99,
                 "TOKENS_PER_SAMPLE",
-                145.9 * 0.9,
+                144.84 * 0.9,
                 "gsm8k_accuracy",
-                73.78 * 0.99,
+                73.66 * 0.99,
                 "mbxp_accuracy",
-                60.12 * 0.99,
+                60.16 * 0.99,
             ),
             "llama3.1-405b": (
                 "ROUGEL",