From 26bb362eb57254d2601dbd8698fe329c3690a162 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Thu, 26 Sep 2024 11:59:01 +0200 Subject: [PATCH] test --- optimum_benchmark/benchmark/report.py | 88 +++++++++---------- .../launchers/inline/launcher.py | 3 + .../scenarios/inference/scenario.py | 27 ++---- optimum_benchmark/trackers/energy.py | 42 +++++++-- optimum_benchmark/trackers/latency.py | 56 +++++++++--- optimum_benchmark/trackers/memory.py | 23 ++++- 6 files changed, 159 insertions(+), 80 deletions(-) diff --git a/optimum_benchmark/benchmark/report.py b/optimum_benchmark/benchmark/report.py index 812f4abbb..fa26c84e6 100644 --- a/optimum_benchmark/benchmark/report.py +++ b/optimum_benchmark/benchmark/report.py @@ -39,7 +39,37 @@ def aggregate(measurements: List["BenchmarkMeasurements"]) -> "BenchmarkMeasurem energy = Energy.aggregate([m.energy for m in measurements]) if m0.energy is not None else None efficiency = Efficiency.aggregate([m.efficiency for m in measurements]) if m0.efficiency is not None else None - return BenchmarkMeasurements(memory, latency, throughput, energy, efficiency) + return BenchmarkMeasurements( + memory=memory, latency=latency, throughput=throughput, energy=energy, efficiency=efficiency + ) + + def log(self, prefix: str = ""): + if self.memory is not None: + self.memory.log(prefix=prefix) + if self.latency is not None: + self.latency.log(prefix=prefix) + if self.throughput is not None: + self.throughput.log(prefix=prefix) + if self.energy is not None: + self.energy.log(prefix=prefix) + if self.efficiency is not None: + self.efficiency.log(prefix=prefix) + + def markdown(self, prefix: str = "") -> str: + markdown = "" + + if self.memory is not None: + markdown += self.memory.markdown(prefix=prefix) + if self.latency is not None: + markdown += self.latency.markdown(prefix=prefix) + if self.throughput is not None: + markdown += self.throughput.markdown(prefix=prefix) + if self.energy is not None: + markdown += self.energy.markdown(prefix=prefix) + if self.efficiency is not None: + markdown += self.efficiency.markdown(prefix=prefix) + + return markdown @dataclass @@ -59,58 +89,28 @@ def __post_init__(self): elif isinstance(getattr(self, target), dict): setattr(self, target, BenchmarkMeasurements(**getattr(self, target))) - def log_memory(self): - for target in self.to_dict().keys(): - measurements: BenchmarkMeasurements = getattr(self, target) - if measurements.memory is not None: - measurements.memory.log(prefix=target) - - def log_latency(self): - for target in self.to_dict().keys(): - measurements: BenchmarkMeasurements = getattr(self, target) - if measurements.latency is not None: - measurements.latency.log(prefix=target) + @classmethod + def aggregate(cls, reports: List["BenchmarkReport"]) -> "BenchmarkReport": + aggregated_measurements = {} + for target in reports[0].to_dict().keys(): + measurements = [getattr(report, target) for report in reports] + aggregated_measurements[target] = BenchmarkMeasurements.aggregate(measurements) - def log_throughput(self): - for target in self.to_dict().keys(): - measurements: BenchmarkMeasurements = getattr(self, target) - if measurements.throughput is not None: - measurements.throughput.log(prefix=target) + return cls.from_dict(aggregated_measurements) - def log_energy(self): + def log(self): for target in self.to_dict().keys(): measurements: BenchmarkMeasurements = getattr(self, target) - if measurements.energy is not None: - measurements.energy.log(prefix=target) + measurements.log(prefix=target) - def log_efficiency(self): - for target in self.to_dict().keys(): - measurements: BenchmarkMeasurements = getattr(self, target) - if measurements.efficiency is not None: - measurements.efficiency.log(prefix=target) + def markdown(self): + markdown = "" - def log(self): for target in self.to_dict().keys(): measurements: BenchmarkMeasurements = getattr(self, target) - if measurements.memory is not None: - measurements.memory.log(prefix=target) - if measurements.latency is not None: - measurements.latency.log(prefix=target) - if measurements.throughput is not None: - measurements.throughput.log(prefix=target) - if measurements.energy is not None: - measurements.energy.log(prefix=target) - if measurements.efficiency is not None: - measurements.efficiency.log(prefix=target) + markdown += measurements.markdown(prefix=target) - @classmethod - def aggregate(cls, reports: List["BenchmarkReport"]) -> "BenchmarkReport": - aggregated_measurements = {} - for target in reports[0].to_dict().keys(): - measurements = [getattr(report, target) for report in reports] - aggregated_measurements[target] = BenchmarkMeasurements.aggregate(measurements) - - return cls.from_dict(aggregated_measurements) + return markdown @classproperty def default_filename(self) -> str: diff --git a/optimum_benchmark/launchers/inline/launcher.py b/optimum_benchmark/launchers/inline/launcher.py index 05b0448c0..bc52f5232 100644 --- a/optimum_benchmark/launchers/inline/launcher.py +++ b/optimum_benchmark/launchers/inline/launcher.py @@ -13,5 +13,8 @@ def __init__(self, config: InlineConfig): def launch(self, worker: Callable[..., BenchmarkReport], worker_args: List[Any]) -> BenchmarkReport: self.logger.warning("The inline launcher is only recommended for debugging purposes and not for benchmarking") + report = worker(*worker_args) + report.log() + return report diff --git a/optimum_benchmark/scenarios/inference/scenario.py b/optimum_benchmark/scenarios/inference/scenario.py index c0d9475e8..ef0a773e3 100644 --- a/optimum_benchmark/scenarios/inference/scenario.py +++ b/optimum_benchmark/scenarios/inference/scenario.py @@ -13,7 +13,7 @@ from ..base import Scenario from .config import InferenceConfig -PER_TOKEN_BACKENDS = ["pytorch", "onnxruntime", "openvino", "neural-compressor"] +PER_TOKEN_BACKENDS = ["pytorch", "onnxruntime", "openvino", "neural-compressor", "ipex"] TEXT_GENERATION_DEFAULT_KWARGS = { "num_return_sequences": 1, @@ -99,8 +99,6 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport: else: self.run_inference_memory_tracking(backend) - self.report.log_memory() - if self.config.latency or self.config.energy: # latency and energy are metrics that require some warmup if backend.config.task in TEXT_GENERATION_TASKS: @@ -121,9 +119,6 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport: else: self.run_latency_inference_tracking(backend) - self.report.log_latency() - self.report.log_throughput() - if self.config.energy: if backend.config.task in TEXT_GENERATION_TASKS: self.run_text_generation_energy_tracking(backend) @@ -132,11 +127,9 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport: else: self.run_inference_energy_tracking(backend) - self.report.log_energy() - self.report.log_efficiency() - return self.report + # Warmup def warmup_text_generation(self, backend: Backend[BackendConfigT]): self.logger.info("\t+ Warming up backend for Text Generation") _ = backend.generate(self.inputs, self.config.generate_kwargs) @@ -169,16 +162,14 @@ def run_model_loading_tracking(self, backend: Backend[BackendConfigT]): backend=backend.config.name, device=backend.config.device, device_ids=backend.config.device_ids ) - context_stack = ExitStack() - if self.config.latency: - context_stack.enter_context(latency_tracker.track()) - if self.config.memory: - context_stack.enter_context(memory_tracker.track()) - if self.config.energy: - context_stack.enter_context(energy_tracker.track()) + with ExitStack() as context_stack: + if self.config.latency: + context_stack.enter_context(latency_tracker.track()) + if self.config.memory: + context_stack.enter_context(memory_tracker.track()) + if self.config.energy: + context_stack.enter_context(energy_tracker.track()) - with context_stack: - self.logger.info("\t+ Loading model for Inference") backend.load() if self.config.latency: diff --git a/optimum_benchmark/trackers/energy.py b/optimum_benchmark/trackers/energy.py index 6f904deb9..0307da41f 100644 --- a/optimum_benchmark/trackers/energy.py +++ b/optimum_benchmark/trackers/energy.py @@ -48,12 +48,27 @@ def aggregate(energies: List["Energy"]) -> "Energy": return Energy(cpu=cpu, gpu=gpu, ram=ram, total=total, unit=ENERGY_UNIT) - def log(self, prefix: str = "forward"): - LOGGER.info(f"\t\t+ {prefix} energy consumption:") - LOGGER.info(f"\t\t\t+ CPU: {self.cpu:f} ({self.unit})") - LOGGER.info(f"\t\t\t+ GPU: {self.gpu:f} ({self.unit})") - LOGGER.info(f"\t\t\t+ RAM: {self.ram:f} ({self.unit})") - LOGGER.info(f"\t\t\t+ total: {self.total:f} ({self.unit})") + def log(self, prefix: str = ""): + LOGGER.info(f"\t\t+ {prefix} energy:") + LOGGER.info(f"\t\t\t- cpu: {self.cpu:f} ({self.unit})") + LOGGER.info(f"\t\t\t- gpu: {self.gpu:f} ({self.unit})") + LOGGER.info(f"\t\t\t- ram: {self.ram:f} ({self.unit})") + LOGGER.info(f"\t\t\t- total: {self.total:f} ({self.unit})") + + def markdown(self, prefix: str = "") -> str: + markdown = "" + markdown += "| ---------------------------------------- |\n" + markdown += "| {prefix} energy |\n" + markdown += "| ---------------------------------------- |\n" + markdown += "| metric | value (unit) |\n" + markdown += "| :-------- | ---------------------------: |\n" + markdown += "| cpu | {self.cpu:f} ({self.unit}) |\n" + markdown += "| gpu | {self.gpu:f} ({self.unit}) |\n" + markdown += "| ram | {self.ram:f} ({self.unit}) |\n" + markdown += "| total | {self.total:f} ({self.unit}) |\n" + markdown += "| ---------------------------------------- |\n" + + return markdown.format(prefix=prefix, **asdict(self)) def __sub__(self, other: "Energy") -> "Energy": """Enables subtraction of two Energy instances using the '-' operator.""" @@ -102,7 +117,20 @@ def from_energy(energy: "Energy", volume: int, unit: str) -> "Efficiency": return Efficiency(value=volume / energy.total if energy.total > 0 else 0, unit=unit) def log(self, prefix: str = ""): - LOGGER.info(f"\t\t+ {prefix} energy efficiency: {self.value:f} ({self.unit})") + LOGGER.info(f"\t\t+ {prefix} efficiency: {self.value:f} ({self.unit})") + + def markdown(self, prefix: str = "") -> str: + markdown = "" + + markdown += "| ------------------------------- |\n" + markdown += "| {prefix} efficiency |\n" + markdown += "| ------------------------------- |\n" + markdown += "| metric | value (unit) |\n" + markdown += "| :--------- | -----------------: |\n" + markdown += "| efficiency | {value:f} ({unit}) |\n" + markdown += "| ------------------------------- |\n" + + return markdown.format(prefix=prefix, **asdict(self)) class EnergyTracker: diff --git a/optimum_benchmark/trackers/latency.py b/optimum_benchmark/trackers/latency.py index 343a04d7c..2c42b82f5 100644 --- a/optimum_benchmark/trackers/latency.py +++ b/optimum_benchmark/trackers/latency.py @@ -1,6 +1,6 @@ import time from contextlib import contextmanager -from dataclasses import dataclass +from dataclasses import asdict, dataclass from logging import getLogger from typing import List, Literal, Optional, Union @@ -74,16 +74,40 @@ def from_values(values: List[float], unit: str) -> "Latency": ) def log(self, prefix: str = ""): - stdev_percentage = 100 * self.stdev / self.mean if self.mean > 0 else 0 LOGGER.info(f"\t\t+ {prefix} latency:") LOGGER.info(f"\t\t\t- count: {self.count}") - LOGGER.info(f"\t\t\t- total: {self.total:f} {self.unit}") - LOGGER.info(f"\t\t\t- mean: {self.mean:f} {self.unit}") - LOGGER.info(f"\t\t\t- stdev: {self.stdev:f} {self.unit} ({stdev_percentage:.2f}%)") - LOGGER.info(f"\t\t\t- p50: {self.p50:f} {self.unit}") - LOGGER.info(f"\t\t\t- p90: {self.p90:f} {self.unit}") - LOGGER.info(f"\t\t\t- p95: {self.p95:f} {self.unit}") - LOGGER.info(f"\t\t\t- p99: {self.p99:f} {self.unit}") + LOGGER.info(f"\t\t\t- total: {self.total:f} ({self.unit})") + LOGGER.info(f"\t\t\t- mean: {self.mean:f} ({self.unit})") + LOGGER.info(f"\t\t\t- p50: {self.p50:f} ({self.unit})") + LOGGER.info(f"\t\t\t- p90: {self.p90:f} ({self.unit})") + LOGGER.info(f"\t\t\t- p95: {self.p95:f} ({self.unit})") + LOGGER.info(f"\t\t\t- p99: {self.p99:f} ({self.unit})") + LOGGER.info(f"\t\t\t- stdev: {self.stdev:f} ({self.unit})") + LOGGER.info(f"\t\t\t- stdev_: {self.stdev_percentage:.2f} (%)") + + def markdown(self, prefix: str = "") -> str: + markdown = "" + markdown += "| -------------------------------------- |\n" + markdown += "| {prefix} latency |\n" + markdown += "| -------------------------------------- |\n" + markdown += "| metric | value (unit) |\n" + markdown += "| :-------- | -------------------------: |\n" + markdown += "| count | {count} |\n" + markdown += "| total | {total:f} ({unit}) |\n" + markdown += "| mean | {mean:f} ({unit}) |\n" + markdown += "| p50 | {p50:f} ({unit}) |\n" + markdown += "| p90 | {p90:f} ({unit}) |\n" + markdown += "| p95 | {p95:f} ({unit}) |\n" + markdown += "| p99 | {p99:f} ({unit}) |\n" + markdown += "| stdev | {stdev:f} ({unit}) |\n" + markdown += "| stdev_ | {stdev_percentage:.2f} (%) |\n" + markdown += "| -------------------------------------- |\n" + + return markdown.format(prefix=prefix, stdev_percentage=self.stdev_percentage, **asdict(self)) + + @property + def stdev_percentage(self) -> float: + return 100 * self.stdev / self.mean if self.mean > 0 else 0 @dataclass @@ -109,9 +133,21 @@ def from_latency(latency: Latency, volume: int, unit: str) -> "Throughput": value = volume / latency.mean if latency.mean > 0 else 0 return Throughput(value=value, unit=unit) - def log(self, prefix: str = "method"): + def log(self, prefix: str = ""): LOGGER.info(f"\t\t+ {prefix} throughput: {self.value:f} {self.unit}") + def markdown(self, prefix: str = "") -> str: + markdown = "" + markdown += "| ------------------------------- |\n" + markdown += "| {prefix} throughput |\n" + markdown += "| ------------------------------- |\n" + markdown += "| metric | value (unit) |\n" + markdown += "| :--------- | -----------------: |\n" + markdown += "| throughput | {value:f} ({unit}) |\n" + markdown += "| ------------------------------- |\n" + + return markdown.format(prefix=prefix, **asdict(self)) + class LatencyTracker: def __init__(self, device: str, backend: str): diff --git a/optimum_benchmark/trackers/memory.py b/optimum_benchmark/trackers/memory.py index ba515d516..db6b4c9ea 100644 --- a/optimum_benchmark/trackers/memory.py +++ b/optimum_benchmark/trackers/memory.py @@ -1,6 +1,6 @@ import os from contextlib import contextmanager -from dataclasses import dataclass +from dataclasses import asdict, dataclass from logging import getLogger from multiprocessing import Pipe, Process from multiprocessing.connection import Connection @@ -90,6 +90,27 @@ def log(self, prefix: str = ""): if self.max_allocated is not None: LOGGER.info(f"\t\t\t- max allocated memory: {self.max_allocated:f} ({self.unit})") + def markdown(self, prefix: str = "") -> str: + markdown = "" + markdown += "| ----------------------------------------------------- |\n" + markdown += "| {prefix} memory: |\n" + markdown += "| ----------------------------------------------------- |\n" + markdown += "| metric | value (unit) |\n" + markdown += "| ------ | -------------------------------------------- |\n" + if self.max_ram is not None: + markdown += "| max RAM | {max_ram:f} ({unit}) |\n" + if self.max_global_vram is not None: + markdown += "| max global VRAM | {max_global_vram:f} ({unit}) |\n" + if self.max_process_vram is not None: + markdown += "| max process VRAM | {max_process_vram:f} ({unit}) |\n" + if self.max_reserved is not None: + markdown += "| max reserved memory | {max_reserved:f} ({unit}) |\n" + if self.max_allocated is not None: + markdown += "| max allocated memory | {max_allocated:f} ({unit}) |\n" + markdown += "| ----------------------------------------------------- |\n" + + return markdown.format(prefix=prefix, **asdict(self)) + class MemoryTracker: def __init__(self, device: str, backend: str, device_ids: Optional[Union[str, int, List[int]]] = None):