diff --git a/.gitignore b/.gitignore index 9545a7977..eba8bb341 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ loadgen/build/ libmlperf_loadgen.a __pycache__/ generated/ +*.swp diff --git a/language/llama3-405b/README.md b/language/llama3-405b/README.md index dcc5344c4..26e2876a0 100644 --- a/language/llama3-405b/README.md +++ b/language/llama3-405b/README.md @@ -9,31 +9,61 @@ Please see the [new docs site](https://docs.mlcommons.org/inference/benchmarks/language/llama3-405b) for an automated way to run this benchmark across different available implementations and do an end-to-end submission with or without docker. - + ## Prepare environment -Copy the mlperf.conf file to this folder. -``` -cp ../../mlperf.conf . +### Local Environment Run + +The following steps were tested in Ubuntu 22.04 with python 3.10 + +- **Prerrequisite for GPU runs:** Install Nvidia Driver and cuda 12.1. + +The following links contain the commands for installing the [NVIDIA Driver](https://developer.nvidia.com/datacenter-driver-downloads?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=22.04&target_type=deb_local) and [Cuda](https://developer.nvidia.com/cuda-12-1-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=22.04&target_type=deb_local) + +- **Prerrequisite:** Install conda. + +```bash +mkdir -p ~/miniconda3 +wget https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh +bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3 +rm ~/miniconda3/miniconda.sh +~/miniconda3/bin/conda init ``` -For a CPU-only run: +- Set the following helper variables +```bash +export ROOT=$PWD/inference +export LLAMA_FOLDER=$PWD/inference/language/llama3-405b +export LOADGEN_FOLDER=$PWD/inference/loadgen +export DATASET_FOLDER=$PWD/inference/language/llama3-405b/dataset +``` +- Clone the inference repository: +```bash +git clone --recurse-submodules https://github.com/mlcommons/inference.git \ + --depth 1 ``` -conda create -n llama3-405b python=3.9 + +- Create a conda environment: +```bash +conda create -y -n llama3-405b python=3.10 conda activate llama3-405b +conda install -y -c conda-forge libstdcxx-ng=12 +``` +- Install requirements and loadgen: +```bash +cd $LLAMA_FOLDER # Install packages pip install -r requirements.txt +``` -export CUR_DIR=${PWD} -cd /loadgen - - -python -m pip install . +```bash +cd $LOADGEN_FOLDER +pip install -e . ``` -For a GPU-based run: +### Docker Run A dockerfile is provided, along with scripts to help launch it. First, add any docker volume mounts you want in `launch.sh`. There is a section at the top of the file that looks like: @@ -54,10 +84,13 @@ MOUNTS=( /raid/data:/raid/data ) ``` -Once you have added all your mounts, launch the container with `bash launch.sh`. +Once you have added all your mounts, build and launch the container with `bash launch.sh`. -Inside the container, set up the environment with `bash build.sh`. This will install all the dependencies from the -CPU-only setup, as well as any GPU versions for applicable libraries like PyTorch. +Now install all the dependencies: +``` +pip install -r requirements.txt +pip install -e ../../loadgen +``` ## Get Model @@ -73,7 +106,7 @@ TODO: Host model and grant access to submitters export CHECKPOINT_PATH=Meta-Llama-3.1-405B-Instruct git lfs install git clone https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct ${CHECKPOINT_PATH} - +cd ${CHECKPOINT_PATH} && git checkout be673f326cab4cd22ccfef76109faf68e41aa5f1 ``` ## Get Dataset @@ -109,6 +142,7 @@ rclone copy mlc-inference:mlcommons-inference-wg-public/llama3_405b/mlperf_llama ``` python -u main.py --scenario Offline \ --model-path ${CHECKPOINT_PATH} \ + --batch-size 16 \ --dtype float16 \ --user-conf user.conf \ --total-sample-count 8312 \ @@ -123,6 +157,7 @@ python -u main.py --scenario Offline \ ``` python -u main.py --scenario Server \ --model-path ${CHECKPOINT_PATH} \ + --batch-size 16 \ --dtype float16 \ --user-conf user.conf \ --total-sample-count 8312 \ @@ -145,6 +180,7 @@ mkdir -p "run_outputs" # The script will dump all the outputs to 'run_outputs'. python -u main.py --scenario Offline \ --model-path ${CHECKPOINT_PATH} \ + --batch-size 16 \ --accuracy \ --dtype float16 \ --user-conf user.conf \ @@ -172,6 +208,7 @@ OUTPUT_LOG_DIR=server-accuracy-logs python -u main.py --scenario Server \ --model-path ${CHECKPOINT_PATH} \ + --batch-size 16 \ --accuracy \ --dtype float16 \ --user-conf user.conf \ @@ -201,4 +238,4 @@ Running the GPU implementation in FP16 precision resulted in the following FP16 'tokens_per_sample': 684.68, } ``` - +The accuracy target is 99% for rougeL and exact_match, and 90% for tokens_per_sample diff --git a/language/llama3-405b/run_accuracy.sh b/language/llama3-405b/run_accuracy.sh index 075245913..f1a8be404 100644 --- a/language/llama3-405b/run_accuracy.sh +++ b/language/llama3-405b/run_accuracy.sh @@ -5,6 +5,7 @@ mkdir -p "run_outputs" python3 -u main.py --scenario Offline \ --model-path ${CHECKPOINT_PATH} \ + --batch-size 16 \ --accuracy \ --mlperf-conf mlperf.conf \ --user-conf user.conf \ @@ -17,5 +18,3 @@ python3 evaluate-accuracy.py --checkpoint-path ${CHECKPOINT_PATH} \ --mlperf-accuracy-file offline_accuracy_loadgen_logs/mlperf_log_accuracy.json \ --dataset-file ${DATASET_PATH} \ --dtype int32 - -python3 consolidate_results.py --dataset-path ${DATASET_PATH} --model-dir ${CHECKPOINT_PATH} diff --git a/language/llama3-405b/run_offline.sh b/language/llama3-405b/run_offline.sh index 89fa9e45f..b5ad1ded5 100644 --- a/language/llama3-405b/run_offline.sh +++ b/language/llama3-405b/run_offline.sh @@ -1,10 +1,13 @@ CHECKPOINT_PATH="${CHECKPOINT_PATH:Meta-Llama-3.1-405B-Instruct}" -DATASET_PATH="${DATASET_PATH:-open-orca-val-set.pkl}" +DATASET_PATH="${DATASET_PATH:mlperf_llama3.1_405b_dataset_8318.pkl}" python -u main.py --scenario Offline \ - --model-path ${CHECKPOINT_PATH} \ - --mlperf-conf mlperf.conf \ - --user-conf user.conf \ - --total-sample-count 8312 \ - --dataset-path ${DATASET_PATH} \ - --device cpu 2>&1 | tee server_log.log + --model-path ${CHECKPOINT_PATH} \ + --batch-size 16 \ + --dtype float16 \ + --user-conf user.conf \ + --total-sample-count 8312 \ + --dataset-path ${DATASET_PATH} \ + --output-log-dir output \ + --tensor-parallel-size ${GPU_COUNT} \ + --vllm 2>&1 | tee offline.log diff --git a/language/llama3-405b/run_server.sh b/language/llama3-405b/run_server.sh index fe2a31c43..7735b417e 100644 --- a/language/llama3-405b/run_server.sh +++ b/language/llama3-405b/run_server.sh @@ -1,12 +1,15 @@ CHECKPOINT_PATH="${CHECKPOINT_PATH:Meta-Llama-3.1-405B-Instruct}" -DATASET_PATH="${DATASET_PATH:-open-orca-val-set.pkl}" +DATASET_PATH="${DATASET_PATH:mlperf_llama3.1_405b_dataset_8318.pkl}" python -u main.py --scenario Server \ - --model-path ${CHECKPOINT_PATH} \ - --mlperf-conf mlperf.conf \ - --user-conf user.conf \ - --total-sample-count 8312 \ - --dataset-path ${DATASET_PATH} \ - --device cpu 2>&1 | tee server_log.log + --model-path ${CHECKPOINT_PATH} \ + --batch-size 16 \ + --dtype float16 \ + --user-conf user.conf \ + --total-sample-count 8312 \ + --dataset-path ${DATASET_PATH} \ + --output-log-dir output \ + --tensor-parallel-size ${GPU_COUNT} \ + --vllm 2>&1 | tee server.log diff --git a/language/llama3-405b/with_the_same_user b/language/llama3-405b/with_the_same_user new file mode 100755 index 000000000..cfa57902f --- /dev/null +++ b/language/llama3-405b/with_the_same_user @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +# wkong: manually set the user info in env first + +set -ex + +if [ -z "$@" ]; then + COMMAND=(bash) +else + COMMAND=("$@") +fi + +apt-get update && apt-get install -y sudo + +getent group "${CI_BUILD_GID}" || addgroup --gid "${CI_BUILD_GID}" "${CI_BUILD_GROUP}" +getent passwd "${CI_BUILD_UID}" || adduser --gid "${CI_BUILD_GID}" --uid "${CI_BUILD_UID}" --gecos "${CI_BUILD_USER} (generated by with_the_same_user script)" --disabled-password --quiet "${CI_BUILD_USER}" + +usermod -a -G dip "${CI_BUILD_USER}" +usermod -a -G sudo "${CI_BUILD_USER}" +usermod -a -G root "${CI_BUILD_USER}" + +echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers + +sudo -H -u "#${CI_BUILD_UID}" --preserve-env \ + PATH="${PATH}" \ + LD_LIBRARY_PATH="${LD_LIBRARY_PATH}" \ + PYTHONPATH="${PYTHONPATH}" \ + ${COMMAND[@]}