install latest openai and utility, and print out gpu memory before an…

…d after model run
neuralmagic · Jul 12, 2024 · a21fa6d · a21fa6d
1 parent 493b69f
commit a21fa6d
Show file tree

Hide file tree

Showing 3 changed files with 19 additions and 4 deletions.
diff --git a/.github/actions/nm-lm-eval/action.yml b/.github/actions/nm-lm-eval/action.yml
@@ -23,7 +23,10 @@ runs:
 
       #pip3 install git+https://github.com/EleutherAI/lm-evaluation-harness.git@262f879a06aa5de869e5dd951d0ff2cf2f9ba380
       pip3 install git+https://github.com/EleutherAI/[email protected]
-      pip3 install pytest openai==1.3.9
+      #pip3 install pytest openai==1.3.9
+      pip3 install pytest openai
+
+      sudo apt-get install -y psmisc
 
       SUCCESS=0
       ./.github/scripts/nm-run-lm-eval-vllm.sh -c ${{ inputs.lm_eval_configuration }} || SUCCESS=$?

diff --git a/.github/lm-eval-configs/full-large-models.txt b/.github/lm-eval-configs/full-large-models.txt
@@ -1,9 +1,9 @@
 Meta-Llama-3-70B-Instruct-FP8-KV.yaml
 Meta-Llama-3-70B-Instruct.yaml
+Qwen2-57B-A14B-Instruct.yaml
+Qwen2-72B-Instruct.yaml
+Phi-3-medium-4k-instruct.yaml
 Mixtral-8x22B-Instruct-v0.1-FP8.yaml
 Mixtral-8x7B-Instruct-v0.1-FP8.yaml
 Mixtral-8x22B-Instruct-v0.1.yaml
 Mixtral-8x7B-Instruct-v0.1.yaml
-Qwen2-57B-A14B-Instruct.yaml
-Qwen2-72B-Instruct.yaml
-Phi-3-medium-4k-instruct.yaml
diff --git a/.github/scripts/nm-run-lm-eval-vllm.sh b/.github/scripts/nm-run-lm-eval-vllm.sh
@@ -41,9 +41,21 @@ do
 
     echo "=== RUNNING MODEL: $MODEL_CONFIG ==="
 
+    echo "Checking GPU memory before running model:"
+    echo "BEFORE==>" > log.txt
+    fuser -v /dev/nvidia* 2>&1 | tee -a log.txt
+
     MODEL_CONFIG_PATH=$PWD/.github/lm-eval-configs/models/${MODEL_CONFIG}
     LM_EVAL_TEST_DATA_FILE=$MODEL_CONFIG_PATH pytest -s tests/accuracy/test_lm_eval_correctness.py || LOCAL_SUCCESS=$?
 
+    echo "Checking GPU memory after model ran:"
+    echo "AFTER==>" >> log.txt
+    fuser -v /dev/nvidia* 2>&1 | tee -a log.txt
+
+    # clean up leftover from GPU memory
+    echo "Clean up leftover in gpu memory..."
+    cat log.txt
+
     if [[ $LOCAL_SUCCESS == 0 ]]; then
         echo "=== PASSED MODEL: ${MODEL_CONFIG} ==="
     else