test

huggingface · Sep 26, 2024 · 26bb362 · 26bb362
1 parent 561deca
commit 26bb362
Show file tree

Hide file tree

Showing 6 changed files with 159 additions and 80 deletions.
diff --git a/optimum_benchmark/benchmark/report.py b/optimum_benchmark/benchmark/report.py
@@ -39,7 +39,37 @@ def aggregate(measurements: List["BenchmarkMeasurements"]) -> "BenchmarkMeasurem
         energy = Energy.aggregate([m.energy for m in measurements]) if m0.energy is not None else None
         efficiency = Efficiency.aggregate([m.efficiency for m in measurements]) if m0.efficiency is not None else None
 
-        return BenchmarkMeasurements(memory, latency, throughput, energy, efficiency)
+        return BenchmarkMeasurements(
+            memory=memory, latency=latency, throughput=throughput, energy=energy, efficiency=efficiency
+        )
+
+    def log(self, prefix: str = ""):
+        if self.memory is not None:
+            self.memory.log(prefix=prefix)
+        if self.latency is not None:
+            self.latency.log(prefix=prefix)
+        if self.throughput is not None:
+            self.throughput.log(prefix=prefix)
+        if self.energy is not None:
+            self.energy.log(prefix=prefix)
+        if self.efficiency is not None:
+            self.efficiency.log(prefix=prefix)
+
+    def markdown(self, prefix: str = "") -> str:
+        markdown = ""
+
+        if self.memory is not None:
+            markdown += self.memory.markdown(prefix=prefix)
+        if self.latency is not None:
+            markdown += self.latency.markdown(prefix=prefix)
+        if self.throughput is not None:
+            markdown += self.throughput.markdown(prefix=prefix)
+        if self.energy is not None:
+            markdown += self.energy.markdown(prefix=prefix)
+        if self.efficiency is not None:
+            markdown += self.efficiency.markdown(prefix=prefix)
+
+        return markdown
 
 
 @dataclass
@@ -59,58 +89,28 @@ def __post_init__(self):
             elif isinstance(getattr(self, target), dict):
                 setattr(self, target, BenchmarkMeasurements(**getattr(self, target)))
 
-    def log_memory(self):
-        for target in self.to_dict().keys():
-            measurements: BenchmarkMeasurements = getattr(self, target)
-            if measurements.memory is not None:
-                measurements.memory.log(prefix=target)
-
-    def log_latency(self):
-        for target in self.to_dict().keys():
-            measurements: BenchmarkMeasurements = getattr(self, target)
-            if measurements.latency is not None:
-                measurements.latency.log(prefix=target)
+    @classmethod
+    def aggregate(cls, reports: List["BenchmarkReport"]) -> "BenchmarkReport":
+        aggregated_measurements = {}
+        for target in reports[0].to_dict().keys():
+            measurements = [getattr(report, target) for report in reports]
+            aggregated_measurements[target] = BenchmarkMeasurements.aggregate(measurements)
 
-    def log_throughput(self):
-        for target in self.to_dict().keys():
-            measurements: BenchmarkMeasurements = getattr(self, target)
-            if measurements.throughput is not None:
-                measurements.throughput.log(prefix=target)
+        return cls.from_dict(aggregated_measurements)
 
-    def log_energy(self):
+    def log(self):
         for target in self.to_dict().keys():
             measurements: BenchmarkMeasurements = getattr(self, target)
-            if measurements.energy is not None:
-                measurements.energy.log(prefix=target)
+            measurements.log(prefix=target)
 
-    def log_efficiency(self):
-        for target in self.to_dict().keys():
-            measurements: BenchmarkMeasurements = getattr(self, target)
-            if measurements.efficiency is not None:
-                measurements.efficiency.log(prefix=target)
+    def markdown(self):
+        markdown = ""
 
-    def log(self):
         for target in self.to_dict().keys():
             measurements: BenchmarkMeasurements = getattr(self, target)
-            if measurements.memory is not None:
-                measurements.memory.log(prefix=target)
-            if measurements.latency is not None:
-                measurements.latency.log(prefix=target)
-            if measurements.throughput is not None:
-                measurements.throughput.log(prefix=target)
-            if measurements.energy is not None:
-                measurements.energy.log(prefix=target)
-            if measurements.efficiency is not None:
-                measurements.efficiency.log(prefix=target)
+            markdown += measurements.markdown(prefix=target)
 
-    @classmethod
-    def aggregate(cls, reports: List["BenchmarkReport"]) -> "BenchmarkReport":
-        aggregated_measurements = {}
-        for target in reports[0].to_dict().keys():
-            measurements = [getattr(report, target) for report in reports]
-            aggregated_measurements[target] = BenchmarkMeasurements.aggregate(measurements)
-
-        return cls.from_dict(aggregated_measurements)
+        return markdown
 
     @classproperty
     def default_filename(self) -> str:

diff --git a/optimum_benchmark/launchers/inline/launcher.py b/optimum_benchmark/launchers/inline/launcher.py
@@ -13,5 +13,8 @@ def __init__(self, config: InlineConfig):
 
     def launch(self, worker: Callable[..., BenchmarkReport], worker_args: List[Any]) -> BenchmarkReport:
         self.logger.warning("The inline launcher is only recommended for debugging purposes and not for benchmarking")
+
         report = worker(*worker_args)
+        report.log()
+
         return report
diff --git a/optimum_benchmark/scenarios/inference/scenario.py b/optimum_benchmark/scenarios/inference/scenario.py
@@ -13,7 +13,7 @@
 from ..base import Scenario
 from .config import InferenceConfig
 
-PER_TOKEN_BACKENDS = ["pytorch", "onnxruntime", "openvino", "neural-compressor"]
+PER_TOKEN_BACKENDS = ["pytorch", "onnxruntime", "openvino", "neural-compressor", "ipex"]
 
 TEXT_GENERATION_DEFAULT_KWARGS = {
     "num_return_sequences": 1,
@@ -99,8 +99,6 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport:
             else:
                 self.run_inference_memory_tracking(backend)
 
-            self.report.log_memory()
-
         if self.config.latency or self.config.energy:
             # latency and energy are metrics that require some warmup
             if backend.config.task in TEXT_GENERATION_TASKS:
@@ -121,9 +119,6 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport:
             else:
                 self.run_latency_inference_tracking(backend)
 
-            self.report.log_latency()
-            self.report.log_throughput()
-
         if self.config.energy:
             if backend.config.task in TEXT_GENERATION_TASKS:
                 self.run_text_generation_energy_tracking(backend)
@@ -132,11 +127,9 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport:
             else:
                 self.run_inference_energy_tracking(backend)
 
-            self.report.log_energy()
-            self.report.log_efficiency()
-
         return self.report
 
+    # Warmup
     def warmup_text_generation(self, backend: Backend[BackendConfigT]):
         self.logger.info("\t+ Warming up backend for Text Generation")
         _ = backend.generate(self.inputs, self.config.generate_kwargs)
@@ -169,16 +162,14 @@ def run_model_loading_tracking(self, backend: Backend[BackendConfigT]):
                 backend=backend.config.name, device=backend.config.device, device_ids=backend.config.device_ids
             )
 
-        context_stack = ExitStack()
-        if self.config.latency:
-            context_stack.enter_context(latency_tracker.track())
-        if self.config.memory:
-            context_stack.enter_context(memory_tracker.track())
-        if self.config.energy:
-            context_stack.enter_context(energy_tracker.track())
+        with ExitStack() as context_stack:
+            if self.config.latency:
+                context_stack.enter_context(latency_tracker.track())
+            if self.config.memory:
+                context_stack.enter_context(memory_tracker.track())
+            if self.config.energy:
+                context_stack.enter_context(energy_tracker.track())
 
-        with context_stack:
-            self.logger.info("\t+ Loading model for Inference")
             backend.load()
 
         if self.config.latency:

diff --git a/optimum_benchmark/trackers/energy.py b/optimum_benchmark/trackers/energy.py
@@ -48,12 +48,27 @@ def aggregate(energies: List["Energy"]) -> "Energy":
 
         return Energy(cpu=cpu, gpu=gpu, ram=ram, total=total, unit=ENERGY_UNIT)
 
-    def log(self, prefix: str = "forward"):
-        LOGGER.info(f"\t\t+ {prefix} energy consumption:")
-        LOGGER.info(f"\t\t\t+ CPU: {self.cpu:f} ({self.unit})")
-        LOGGER.info(f"\t\t\t+ GPU: {self.gpu:f} ({self.unit})")
-        LOGGER.info(f"\t\t\t+ RAM: {self.ram:f} ({self.unit})")
-        LOGGER.info(f"\t\t\t+ total: {self.total:f} ({self.unit})")
+    def log(self, prefix: str = ""):
+        LOGGER.info(f"\t\t+ {prefix} energy:")
+        LOGGER.info(f"\t\t\t- cpu: {self.cpu:f} ({self.unit})")
+        LOGGER.info(f"\t\t\t- gpu: {self.gpu:f} ({self.unit})")
+        LOGGER.info(f"\t\t\t- ram: {self.ram:f} ({self.unit})")
+        LOGGER.info(f"\t\t\t- total: {self.total:f} ({self.unit})")
+
+    def markdown(self, prefix: str = "") -> str:
+        markdown = ""
+        markdown += "| ---------------------------------------- |\n"
+        markdown += "| {prefix} energy                          |\n"
+        markdown += "| ---------------------------------------- |\n"
+        markdown += "| metric    | value (unit)                 |\n"
+        markdown += "| :-------- | ---------------------------: |\n"
+        markdown += "| cpu       | {self.cpu:f} ({self.unit})   |\n"
+        markdown += "| gpu       | {self.gpu:f} ({self.unit})   |\n"
+        markdown += "| ram       | {self.ram:f} ({self.unit})   |\n"
+        markdown += "| total     | {self.total:f} ({self.unit}) |\n"
+        markdown += "| ---------------------------------------- |\n"
+
+        return markdown.format(prefix=prefix, **asdict(self))
 
     def __sub__(self, other: "Energy") -> "Energy":
         """Enables subtraction of two Energy instances using the '-' operator."""
@@ -102,7 +117,20 @@ def from_energy(energy: "Energy", volume: int, unit: str) -> "Efficiency":
         return Efficiency(value=volume / energy.total if energy.total > 0 else 0, unit=unit)
 
     def log(self, prefix: str = ""):
-        LOGGER.info(f"\t\t+ {prefix} energy efficiency: {self.value:f} ({self.unit})")
+        LOGGER.info(f"\t\t+ {prefix} efficiency: {self.value:f} ({self.unit})")
+
+    def markdown(self, prefix: str = "") -> str:
+        markdown = ""
+
+        markdown += "| ------------------------------- |\n"
+        markdown += "| {prefix} efficiency             |\n"
+        markdown += "| ------------------------------- |\n"
+        markdown += "| metric     | value (unit)       |\n"
+        markdown += "| :--------- | -----------------: |\n"
+        markdown += "| efficiency | {value:f} ({unit}) |\n"
+        markdown += "| ------------------------------- |\n"
+
+        return markdown.format(prefix=prefix, **asdict(self))
 
 
 class EnergyTracker:

diff --git a/optimum_benchmark/trackers/latency.py b/optimum_benchmark/trackers/latency.py
@@ -1,6 +1,6 @@
 import time
 from contextlib import contextmanager
-from dataclasses import dataclass
+from dataclasses import asdict, dataclass
 from logging import getLogger
 from typing import List, Literal, Optional, Union
 
@@ -74,16 +74,40 @@ def from_values(values: List[float], unit: str) -> "Latency":
         )
 
     def log(self, prefix: str = ""):
-        stdev_percentage = 100 * self.stdev / self.mean if self.mean > 0 else 0
         LOGGER.info(f"\t\t+ {prefix} latency:")
         LOGGER.info(f"\t\t\t- count: {self.count}")
-        LOGGER.info(f"\t\t\t- total: {self.total:f} {self.unit}")
-        LOGGER.info(f"\t\t\t- mean: {self.mean:f} {self.unit}")
-        LOGGER.info(f"\t\t\t- stdev: {self.stdev:f} {self.unit} ({stdev_percentage:.2f}%)")
-        LOGGER.info(f"\t\t\t- p50: {self.p50:f} {self.unit}")
-        LOGGER.info(f"\t\t\t- p90: {self.p90:f} {self.unit}")
-        LOGGER.info(f"\t\t\t- p95: {self.p95:f} {self.unit}")
-        LOGGER.info(f"\t\t\t- p99: {self.p99:f} {self.unit}")
+        LOGGER.info(f"\t\t\t- total: {self.total:f} ({self.unit})")
+        LOGGER.info(f"\t\t\t- mean: {self.mean:f} ({self.unit})")
+        LOGGER.info(f"\t\t\t- p50: {self.p50:f} ({self.unit})")
+        LOGGER.info(f"\t\t\t- p90: {self.p90:f} ({self.unit})")
+        LOGGER.info(f"\t\t\t- p95: {self.p95:f} ({self.unit})")
+        LOGGER.info(f"\t\t\t- p99: {self.p99:f} ({self.unit})")
+        LOGGER.info(f"\t\t\t- stdev: {self.stdev:f} ({self.unit})")
+        LOGGER.info(f"\t\t\t- stdev_: {self.stdev_percentage:.2f} (%)")
+
+    def markdown(self, prefix: str = "") -> str:
+        markdown = ""
+        markdown += "| -------------------------------------- |\n"
+        markdown += "| {prefix} latency                       |\n"
+        markdown += "| -------------------------------------- |\n"
+        markdown += "| metric    | value (unit)               |\n"
+        markdown += "| :-------- | -------------------------: |\n"
+        markdown += "| count     | {count}                    |\n"
+        markdown += "| total     | {total:f} ({unit})         |\n"
+        markdown += "| mean      | {mean:f} ({unit})          |\n"
+        markdown += "| p50       | {p50:f} ({unit})           |\n"
+        markdown += "| p90       | {p90:f} ({unit})           |\n"
+        markdown += "| p95       | {p95:f} ({unit})           |\n"
+        markdown += "| p99       | {p99:f} ({unit})           |\n"
+        markdown += "| stdev     | {stdev:f} ({unit})         |\n"
+        markdown += "| stdev_    | {stdev_percentage:.2f} (%) |\n"
+        markdown += "| -------------------------------------- |\n"
+
+        return markdown.format(prefix=prefix, stdev_percentage=self.stdev_percentage, **asdict(self))
+
+    @property
+    def stdev_percentage(self) -> float:
+        return 100 * self.stdev / self.mean if self.mean > 0 else 0
 
 
 @dataclass
@@ -109,9 +133,21 @@ def from_latency(latency: Latency, volume: int, unit: str) -> "Throughput":
         value = volume / latency.mean if latency.mean > 0 else 0
         return Throughput(value=value, unit=unit)
 
-    def log(self, prefix: str = "method"):
+    def log(self, prefix: str = ""):
         LOGGER.info(f"\t\t+ {prefix} throughput: {self.value:f} {self.unit}")
 
+    def markdown(self, prefix: str = "") -> str:
+        markdown = ""
+        markdown += "| ------------------------------- |\n"
+        markdown += "| {prefix} throughput             |\n"
+        markdown += "| ------------------------------- |\n"
+        markdown += "| metric     | value (unit)       |\n"
+        markdown += "| :--------- | -----------------: |\n"
+        markdown += "| throughput | {value:f} ({unit}) |\n"
+        markdown += "| ------------------------------- |\n"
+
+        return markdown.format(prefix=prefix, **asdict(self))
+
 
 class LatencyTracker:
     def __init__(self, device: str, backend: str):

diff --git a/optimum_benchmark/trackers/memory.py b/optimum_benchmark/trackers/memory.py
@@ -1,6 +1,6 @@
 import os
 from contextlib import contextmanager
-from dataclasses import dataclass
+from dataclasses import asdict, dataclass
 from logging import getLogger
 from multiprocessing import Pipe, Process
 from multiprocessing.connection import Connection
@@ -90,6 +90,27 @@ def log(self, prefix: str = ""):
         if self.max_allocated is not None:
             LOGGER.info(f"\t\t\t- max allocated memory: {self.max_allocated:f} ({self.unit})")
 
+    def markdown(self, prefix: str = "") -> str:
+        markdown = ""
+        markdown += "| ----------------------------------------------------- |\n"
+        markdown += "| {prefix} memory:                                      |\n"
+        markdown += "| ----------------------------------------------------- |\n"
+        markdown += "| metric | value (unit)                                 |\n"
+        markdown += "| ------ | -------------------------------------------- |\n"
+        if self.max_ram is not None:
+            markdown += "| max RAM | {max_ram:f} ({unit})                    |\n"
+        if self.max_global_vram is not None:
+            markdown += "| max global VRAM | {max_global_vram:f} ({unit})    |\n"
+        if self.max_process_vram is not None:
+            markdown += "| max process VRAM | {max_process_vram:f} ({unit})  |\n"
+        if self.max_reserved is not None:
+            markdown += "| max reserved memory | {max_reserved:f} ({unit})   |\n"
+        if self.max_allocated is not None:
+            markdown += "| max allocated memory | {max_allocated:f} ({unit}) |\n"
+        markdown += "| ----------------------------------------------------- |\n"
+
+        return markdown.format(prefix=prefix, **asdict(self))
+
 
 class MemoryTracker:
     def __init__(self, device: str, backend: str, device_ids: Optional[Union[str, int, List[int]]] = None):