Skip to content

Commit

Permalink
llm perf
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed May 17, 2024
1 parent 0b24af9 commit 1c5c077
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 58 deletions.
39 changes: 17 additions & 22 deletions llm_perf/update_llm_perf_cuda_pytorch.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
import os
import traceback
from itertools import product
from logging import getLogger

from llm_perf.utils import (
from optimum_benchmark import Benchmark, BenchmarkConfig, BenchmarkReport, InferenceConfig, ProcessConfig, PyTorchConfig
from optimum_benchmark.logging_utils import setup_logging

from .utils import (
CANONICAL_PRETRAINED_OPEN_LLM_LIST,
GENERATE_KWARGS,
INPUT_SHAPES,
OPEN_LLM_LIST,
PRETRAINED_OPEN_LLM_LIST,
errors_handler,
is_benchmark_conducted,
is_benchmark_supported,
)
from optimum_benchmark import Benchmark, BenchmarkConfig, InferenceConfig, ProcessConfig, PyTorchConfig
from optimum_benchmark.logging_utils import setup_logging

SUBSET = os.getenv("SUBSET", None)
MACHINE = os.getenv("MACHINE", None)
Expand Down Expand Up @@ -108,6 +109,10 @@ def benchmark_cuda_pytorch(model, attn_implementation, weights_config):
LOGGER.info(f"Skipping benchmark {benchmark_name} with model {model} since it is not supported")
return

if is_benchmark_conducted(PUSH_REPO_ID, subfolder):
LOGGER.info(f"Skipping benchmark {benchmark_name} with model {model} since it was already conducted")
return

launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="kill")
scenario_config = InferenceConfig(
memory=True,
Expand Down Expand Up @@ -137,31 +142,21 @@ def benchmark_cuda_pytorch(model, attn_implementation, weights_config):
name=benchmark_name, scenario=scenario_config, launcher=launcher_config, backend=backend_config
)

if is_benchmark_conducted(benchmark_config, PUSH_REPO_ID, subfolder):
LOGGER.info(f"Skipping benchmark {benchmark_name} with model {model} since it was already conducted")
return

benchmark_config.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)
benchmark_config.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True)

try:
LOGGER.info(f"Running benchmark {benchmark_name} with model {model}")
benchmark_report = Benchmark.launch(benchmark_config)
benchmark_report.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)
benchmark_report.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True)
benchmark = Benchmark(config=benchmark_config, report=benchmark_report)
benchmark.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)
benchmark.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True)

except Exception as error:
except Exception:
LOGGER.error(f"Benchmark {benchmark_name} failed with model {model}")
valid_error, benchmark_report = errors_handler(str(error))

if valid_error:
LOGGER.error("The error is a valid one, reporting it")
LOGGER.error(benchmark_report.error)
benchmark_report.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)
else:
LOGGER.error("The error is not valid, need to investigate")
LOGGER.error(benchmark_report.error)
return
benchmark_report = BenchmarkReport.from_dict({"traceback": traceback.format_exc()})
benchmark_report.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True)
benchmark = Benchmark(config=benchmark_config, report=benchmark_report)
benchmark.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True)


if __name__ == "__main__":
Expand Down
42 changes: 6 additions & 36 deletions llm_perf/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from typing import Tuple

import pandas as pd

from optimum_benchmark.report import BenchmarkReport
Expand Down Expand Up @@ -124,43 +122,15 @@
]


def errors_handler(error: str) -> Tuple[bool, BenchmarkReport]:
valid_error = True
benchmark_report = BenchmarkReport.from_list(["error"])

if "torch.cuda.OutOfMemoryError" in error:
benchmark_report.error = "CUDA: Out of memory"
elif "gptq" in error and "assert outfeatures % 32 == 0" in error:
benchmark_report.error = "GPTQ: assert outfeatures % 32 == 0"
elif "gptq" in error and "assert infeatures % self.group_size == 0" in error:
benchmark_report.error = "GPTQ: assert infeatures % self.group_size == 0"
elif "support Flash Attention 2.0" in error:
benchmark_report.error = "Flash Attention 2.0: not supported yet"
elif "support an attention implementation through torch.nn.functional.scaled_dot_product_attention" in error:
benchmark_report.error = "SDPA: not supported yet"
elif "FlashAttention only support fp16 and bf16 data type" in error:
benchmark_report.error = "FlashAttention: only support fp16 and bf16 data type"
else:
benchmark_report.error = f"Unknown error: {error}"
valid_error = False

return valid_error, benchmark_report


def is_benchmark_conducted(benchmark_config, push_repo_id, subfolder):
def is_benchmark_conducted(push_repo_id, subfolder):
try:
loaded_benchmark_config = benchmark_config.from_pretrained(repo_id=push_repo_id, subfolder=subfolder)
loaded_benchmark_dict = loaded_benchmark_config.to_dict()
benchmark_dict = benchmark_config.to_dict()
loaded_benchmark_dict.pop("environment")
benchmark_dict.pop("environment")
if loaded_benchmark_dict == benchmark_dict:
BenchmarkReport.from_pretrained(repo_id=push_repo_id, subfolder=subfolder)
report = BenchmarkReport.from_pretrained(repo_id=push_repo_id, subfolder=subfolder)
if hasattr("traceback", report):
return False
else:
return True
except Exception:
pass

return False
return False


def is_benchmark_supported(weights_config, attn_implementation):
Expand Down

0 comments on commit 1c5c077

Please sign in to comment.