llm perf

huggingface · May 17, 2024 · 1c5c077 · 1c5c077
1 parent 0b24af9
commit 1c5c077
Show file tree

Hide file tree

Showing 2 changed files with 23 additions and 58 deletions.
diff --git a/llm_perf/update_llm_perf_cuda_pytorch.py b/llm_perf/update_llm_perf_cuda_pytorch.py
@@ -1,19 +1,20 @@
 import os
+import traceback
 from itertools import product
 from logging import getLogger
 
-from llm_perf.utils import (
+from optimum_benchmark import Benchmark, BenchmarkConfig, BenchmarkReport, InferenceConfig, ProcessConfig, PyTorchConfig
+from optimum_benchmark.logging_utils import setup_logging
+
+from .utils import (
     CANONICAL_PRETRAINED_OPEN_LLM_LIST,
     GENERATE_KWARGS,
     INPUT_SHAPES,
     OPEN_LLM_LIST,
     PRETRAINED_OPEN_LLM_LIST,
-    errors_handler,
     is_benchmark_conducted,
     is_benchmark_supported,
 )
-from optimum_benchmark import Benchmark, BenchmarkConfig, InferenceConfig, ProcessConfig, PyTorchConfig
-from optimum_benchmark.logging_utils import setup_logging
 
 SUBSET = os.getenv("SUBSET", None)
 MACHINE = os.getenv("MACHINE", None)
@@ -108,6 +109,10 @@ def benchmark_cuda_pytorch(model, attn_implementation, weights_config):
         LOGGER.info(f"Skipping benchmark {benchmark_name} with model {model} since it is not supported")
         return
 
+    if is_benchmark_conducted(PUSH_REPO_ID, subfolder):
+        LOGGER.info(f"Skipping benchmark {benchmark_name} with model {model} since it was already conducted")
+        return
+
     launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="kill")
     scenario_config = InferenceConfig(
         memory=True,
@@ -137,31 +142,21 @@ def benchmark_cuda_pytorch(model, attn_implementation, weights_config):
         name=benchmark_name, scenario=scenario_config, launcher=launcher_config, backend=backend_config
     )
 
-    if is_benchmark_conducted(benchmark_config, PUSH_REPO_ID, subfolder):
-        LOGGER.info(f"Skipping benchmark {benchmark_name} with model {model} since it was already conducted")
-        return
-
-    benchmark_config.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)
+    benchmark_config.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True)
 
     try:
         LOGGER.info(f"Running benchmark {benchmark_name} with model {model}")
         benchmark_report = Benchmark.launch(benchmark_config)
-        benchmark_report.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)
+        benchmark_report.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True)
         benchmark = Benchmark(config=benchmark_config, report=benchmark_report)
-        benchmark.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)
+        benchmark.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True)
 
-    except Exception as error:
+    except Exception:
         LOGGER.error(f"Benchmark {benchmark_name} failed with model {model}")
-        valid_error, benchmark_report = errors_handler(str(error))
-
-        if valid_error:
-            LOGGER.error("The error is a valid one, reporting it")
-            LOGGER.error(benchmark_report.error)
-            benchmark_report.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)
-        else:
-            LOGGER.error("The error is not valid, need to investigate")
-            LOGGER.error(benchmark_report.error)
-            return
+        benchmark_report = BenchmarkReport.from_dict({"traceback": traceback.format_exc()})
+        benchmark_report.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True)
+        benchmark = Benchmark(config=benchmark_config, report=benchmark_report)
+        benchmark.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True)
 
 
 if __name__ == "__main__":

diff --git a/llm_perf/utils.py b/llm_perf/utils.py
@@ -1,5 +1,3 @@
-from typing import Tuple
-
 import pandas as pd
 
 from optimum_benchmark.report import BenchmarkReport
@@ -124,43 +122,15 @@
 ]
 
 
-def errors_handler(error: str) -> Tuple[bool, BenchmarkReport]:
-    valid_error = True
-    benchmark_report = BenchmarkReport.from_list(["error"])
-
-    if "torch.cuda.OutOfMemoryError" in error:
-        benchmark_report.error = "CUDA: Out of memory"
-    elif "gptq" in error and "assert outfeatures % 32 == 0" in error:
-        benchmark_report.error = "GPTQ: assert outfeatures % 32 == 0"
-    elif "gptq" in error and "assert infeatures % self.group_size == 0" in error:
-        benchmark_report.error = "GPTQ: assert infeatures % self.group_size == 0"
-    elif "support Flash Attention 2.0" in error:
-        benchmark_report.error = "Flash Attention 2.0: not supported yet"
-    elif "support an attention implementation through torch.nn.functional.scaled_dot_product_attention" in error:
-        benchmark_report.error = "SDPA: not supported yet"
-    elif "FlashAttention only support fp16 and bf16 data type" in error:
-        benchmark_report.error = "FlashAttention: only support fp16 and bf16 data type"
-    else:
-        benchmark_report.error = f"Unknown error: {error}"
-        valid_error = False
-
-    return valid_error, benchmark_report
-
-
-def is_benchmark_conducted(benchmark_config, push_repo_id, subfolder):
+def is_benchmark_conducted(push_repo_id, subfolder):
     try:
-        loaded_benchmark_config = benchmark_config.from_pretrained(repo_id=push_repo_id, subfolder=subfolder)
-        loaded_benchmark_dict = loaded_benchmark_config.to_dict()
-        benchmark_dict = benchmark_config.to_dict()
-        loaded_benchmark_dict.pop("environment")
-        benchmark_dict.pop("environment")
-        if loaded_benchmark_dict == benchmark_dict:
-            BenchmarkReport.from_pretrained(repo_id=push_repo_id, subfolder=subfolder)
+        report = BenchmarkReport.from_pretrained(repo_id=push_repo_id, subfolder=subfolder)
+        if hasattr("traceback", report):
+            return False
+        else:
             return True
     except Exception:
-        pass
-
-    return False
+        return False
 
 
 def is_benchmark_supported(weights_config, attn_implementation):