Skip to content

Commit

Permalink
test
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed Sep 26, 2024
1 parent 561deca commit 26bb362
Show file tree
Hide file tree
Showing 6 changed files with 159 additions and 80 deletions.
88 changes: 44 additions & 44 deletions optimum_benchmark/benchmark/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,37 @@ def aggregate(measurements: List["BenchmarkMeasurements"]) -> "BenchmarkMeasurem
energy = Energy.aggregate([m.energy for m in measurements]) if m0.energy is not None else None
efficiency = Efficiency.aggregate([m.efficiency for m in measurements]) if m0.efficiency is not None else None

return BenchmarkMeasurements(memory, latency, throughput, energy, efficiency)
return BenchmarkMeasurements(
memory=memory, latency=latency, throughput=throughput, energy=energy, efficiency=efficiency
)

def log(self, prefix: str = ""):
if self.memory is not None:
self.memory.log(prefix=prefix)
if self.latency is not None:
self.latency.log(prefix=prefix)
if self.throughput is not None:
self.throughput.log(prefix=prefix)
if self.energy is not None:
self.energy.log(prefix=prefix)
if self.efficiency is not None:
self.efficiency.log(prefix=prefix)

def markdown(self, prefix: str = "") -> str:
markdown = ""

if self.memory is not None:
markdown += self.memory.markdown(prefix=prefix)
if self.latency is not None:
markdown += self.latency.markdown(prefix=prefix)
if self.throughput is not None:
markdown += self.throughput.markdown(prefix=prefix)
if self.energy is not None:
markdown += self.energy.markdown(prefix=prefix)
if self.efficiency is not None:
markdown += self.efficiency.markdown(prefix=prefix)

return markdown


@dataclass
Expand All @@ -59,58 +89,28 @@ def __post_init__(self):
elif isinstance(getattr(self, target), dict):
setattr(self, target, BenchmarkMeasurements(**getattr(self, target)))

def log_memory(self):
for target in self.to_dict().keys():
measurements: BenchmarkMeasurements = getattr(self, target)
if measurements.memory is not None:
measurements.memory.log(prefix=target)

def log_latency(self):
for target in self.to_dict().keys():
measurements: BenchmarkMeasurements = getattr(self, target)
if measurements.latency is not None:
measurements.latency.log(prefix=target)
@classmethod
def aggregate(cls, reports: List["BenchmarkReport"]) -> "BenchmarkReport":
aggregated_measurements = {}
for target in reports[0].to_dict().keys():
measurements = [getattr(report, target) for report in reports]
aggregated_measurements[target] = BenchmarkMeasurements.aggregate(measurements)

def log_throughput(self):
for target in self.to_dict().keys():
measurements: BenchmarkMeasurements = getattr(self, target)
if measurements.throughput is not None:
measurements.throughput.log(prefix=target)
return cls.from_dict(aggregated_measurements)

def log_energy(self):
def log(self):
for target in self.to_dict().keys():
measurements: BenchmarkMeasurements = getattr(self, target)
if measurements.energy is not None:
measurements.energy.log(prefix=target)
measurements.log(prefix=target)

def log_efficiency(self):
for target in self.to_dict().keys():
measurements: BenchmarkMeasurements = getattr(self, target)
if measurements.efficiency is not None:
measurements.efficiency.log(prefix=target)
def markdown(self):
markdown = ""

def log(self):
for target in self.to_dict().keys():
measurements: BenchmarkMeasurements = getattr(self, target)
if measurements.memory is not None:
measurements.memory.log(prefix=target)
if measurements.latency is not None:
measurements.latency.log(prefix=target)
if measurements.throughput is not None:
measurements.throughput.log(prefix=target)
if measurements.energy is not None:
measurements.energy.log(prefix=target)
if measurements.efficiency is not None:
measurements.efficiency.log(prefix=target)
markdown += measurements.markdown(prefix=target)

@classmethod
def aggregate(cls, reports: List["BenchmarkReport"]) -> "BenchmarkReport":
aggregated_measurements = {}
for target in reports[0].to_dict().keys():
measurements = [getattr(report, target) for report in reports]
aggregated_measurements[target] = BenchmarkMeasurements.aggregate(measurements)

return cls.from_dict(aggregated_measurements)
return markdown

@classproperty
def default_filename(self) -> str:
Expand Down
3 changes: 3 additions & 0 deletions optimum_benchmark/launchers/inline/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,8 @@ def __init__(self, config: InlineConfig):

def launch(self, worker: Callable[..., BenchmarkReport], worker_args: List[Any]) -> BenchmarkReport:
self.logger.warning("The inline launcher is only recommended for debugging purposes and not for benchmarking")

report = worker(*worker_args)
report.log()

return report
27 changes: 9 additions & 18 deletions optimum_benchmark/scenarios/inference/scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from ..base import Scenario
from .config import InferenceConfig

PER_TOKEN_BACKENDS = ["pytorch", "onnxruntime", "openvino", "neural-compressor"]
PER_TOKEN_BACKENDS = ["pytorch", "onnxruntime", "openvino", "neural-compressor", "ipex"]

TEXT_GENERATION_DEFAULT_KWARGS = {
"num_return_sequences": 1,
Expand Down Expand Up @@ -99,8 +99,6 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport:
else:
self.run_inference_memory_tracking(backend)

self.report.log_memory()

if self.config.latency or self.config.energy:
# latency and energy are metrics that require some warmup
if backend.config.task in TEXT_GENERATION_TASKS:
Expand All @@ -121,9 +119,6 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport:
else:
self.run_latency_inference_tracking(backend)

self.report.log_latency()
self.report.log_throughput()

if self.config.energy:
if backend.config.task in TEXT_GENERATION_TASKS:
self.run_text_generation_energy_tracking(backend)
Expand All @@ -132,11 +127,9 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport:
else:
self.run_inference_energy_tracking(backend)

self.report.log_energy()
self.report.log_efficiency()

return self.report

# Warmup
def warmup_text_generation(self, backend: Backend[BackendConfigT]):
self.logger.info("\t+ Warming up backend for Text Generation")
_ = backend.generate(self.inputs, self.config.generate_kwargs)
Expand Down Expand Up @@ -169,16 +162,14 @@ def run_model_loading_tracking(self, backend: Backend[BackendConfigT]):
backend=backend.config.name, device=backend.config.device, device_ids=backend.config.device_ids
)

context_stack = ExitStack()
if self.config.latency:
context_stack.enter_context(latency_tracker.track())
if self.config.memory:
context_stack.enter_context(memory_tracker.track())
if self.config.energy:
context_stack.enter_context(energy_tracker.track())
with ExitStack() as context_stack:
if self.config.latency:
context_stack.enter_context(latency_tracker.track())
if self.config.memory:
context_stack.enter_context(memory_tracker.track())
if self.config.energy:
context_stack.enter_context(energy_tracker.track())

with context_stack:
self.logger.info("\t+ Loading model for Inference")
backend.load()

if self.config.latency:
Expand Down
42 changes: 35 additions & 7 deletions optimum_benchmark/trackers/energy.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,27 @@ def aggregate(energies: List["Energy"]) -> "Energy":

return Energy(cpu=cpu, gpu=gpu, ram=ram, total=total, unit=ENERGY_UNIT)

def log(self, prefix: str = "forward"):
LOGGER.info(f"\t\t+ {prefix} energy consumption:")
LOGGER.info(f"\t\t\t+ CPU: {self.cpu:f} ({self.unit})")
LOGGER.info(f"\t\t\t+ GPU: {self.gpu:f} ({self.unit})")
LOGGER.info(f"\t\t\t+ RAM: {self.ram:f} ({self.unit})")
LOGGER.info(f"\t\t\t+ total: {self.total:f} ({self.unit})")
def log(self, prefix: str = ""):
LOGGER.info(f"\t\t+ {prefix} energy:")
LOGGER.info(f"\t\t\t- cpu: {self.cpu:f} ({self.unit})")
LOGGER.info(f"\t\t\t- gpu: {self.gpu:f} ({self.unit})")
LOGGER.info(f"\t\t\t- ram: {self.ram:f} ({self.unit})")
LOGGER.info(f"\t\t\t- total: {self.total:f} ({self.unit})")

def markdown(self, prefix: str = "") -> str:
markdown = ""
markdown += "| ---------------------------------------- |\n"
markdown += "| {prefix} energy |\n"
markdown += "| ---------------------------------------- |\n"
markdown += "| metric | value (unit) |\n"
markdown += "| :-------- | ---------------------------: |\n"
markdown += "| cpu | {self.cpu:f} ({self.unit}) |\n"
markdown += "| gpu | {self.gpu:f} ({self.unit}) |\n"
markdown += "| ram | {self.ram:f} ({self.unit}) |\n"
markdown += "| total | {self.total:f} ({self.unit}) |\n"
markdown += "| ---------------------------------------- |\n"

return markdown.format(prefix=prefix, **asdict(self))

def __sub__(self, other: "Energy") -> "Energy":
"""Enables subtraction of two Energy instances using the '-' operator."""
Expand Down Expand Up @@ -102,7 +117,20 @@ def from_energy(energy: "Energy", volume: int, unit: str) -> "Efficiency":
return Efficiency(value=volume / energy.total if energy.total > 0 else 0, unit=unit)

def log(self, prefix: str = ""):
LOGGER.info(f"\t\t+ {prefix} energy efficiency: {self.value:f} ({self.unit})")
LOGGER.info(f"\t\t+ {prefix} efficiency: {self.value:f} ({self.unit})")

def markdown(self, prefix: str = "") -> str:
markdown = ""

markdown += "| ------------------------------- |\n"
markdown += "| {prefix} efficiency |\n"
markdown += "| ------------------------------- |\n"
markdown += "| metric | value (unit) |\n"
markdown += "| :--------- | -----------------: |\n"
markdown += "| efficiency | {value:f} ({unit}) |\n"
markdown += "| ------------------------------- |\n"

return markdown.format(prefix=prefix, **asdict(self))


class EnergyTracker:
Expand Down
56 changes: 46 additions & 10 deletions optimum_benchmark/trackers/latency.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import time
from contextlib import contextmanager
from dataclasses import dataclass
from dataclasses import asdict, dataclass
from logging import getLogger
from typing import List, Literal, Optional, Union

Expand Down Expand Up @@ -74,16 +74,40 @@ def from_values(values: List[float], unit: str) -> "Latency":
)

def log(self, prefix: str = ""):
stdev_percentage = 100 * self.stdev / self.mean if self.mean > 0 else 0
LOGGER.info(f"\t\t+ {prefix} latency:")
LOGGER.info(f"\t\t\t- count: {self.count}")
LOGGER.info(f"\t\t\t- total: {self.total:f} {self.unit}")
LOGGER.info(f"\t\t\t- mean: {self.mean:f} {self.unit}")
LOGGER.info(f"\t\t\t- stdev: {self.stdev:f} {self.unit} ({stdev_percentage:.2f}%)")
LOGGER.info(f"\t\t\t- p50: {self.p50:f} {self.unit}")
LOGGER.info(f"\t\t\t- p90: {self.p90:f} {self.unit}")
LOGGER.info(f"\t\t\t- p95: {self.p95:f} {self.unit}")
LOGGER.info(f"\t\t\t- p99: {self.p99:f} {self.unit}")
LOGGER.info(f"\t\t\t- total: {self.total:f} ({self.unit})")
LOGGER.info(f"\t\t\t- mean: {self.mean:f} ({self.unit})")
LOGGER.info(f"\t\t\t- p50: {self.p50:f} ({self.unit})")
LOGGER.info(f"\t\t\t- p90: {self.p90:f} ({self.unit})")
LOGGER.info(f"\t\t\t- p95: {self.p95:f} ({self.unit})")
LOGGER.info(f"\t\t\t- p99: {self.p99:f} ({self.unit})")
LOGGER.info(f"\t\t\t- stdev: {self.stdev:f} ({self.unit})")
LOGGER.info(f"\t\t\t- stdev_: {self.stdev_percentage:.2f} (%)")

def markdown(self, prefix: str = "") -> str:
markdown = ""
markdown += "| -------------------------------------- |\n"
markdown += "| {prefix} latency |\n"
markdown += "| -------------------------------------- |\n"
markdown += "| metric | value (unit) |\n"
markdown += "| :-------- | -------------------------: |\n"
markdown += "| count | {count} |\n"
markdown += "| total | {total:f} ({unit}) |\n"
markdown += "| mean | {mean:f} ({unit}) |\n"
markdown += "| p50 | {p50:f} ({unit}) |\n"
markdown += "| p90 | {p90:f} ({unit}) |\n"
markdown += "| p95 | {p95:f} ({unit}) |\n"
markdown += "| p99 | {p99:f} ({unit}) |\n"
markdown += "| stdev | {stdev:f} ({unit}) |\n"
markdown += "| stdev_ | {stdev_percentage:.2f} (%) |\n"
markdown += "| -------------------------------------- |\n"

return markdown.format(prefix=prefix, stdev_percentage=self.stdev_percentage, **asdict(self))

@property
def stdev_percentage(self) -> float:
return 100 * self.stdev / self.mean if self.mean > 0 else 0


@dataclass
Expand All @@ -109,9 +133,21 @@ def from_latency(latency: Latency, volume: int, unit: str) -> "Throughput":
value = volume / latency.mean if latency.mean > 0 else 0
return Throughput(value=value, unit=unit)

def log(self, prefix: str = "method"):
def log(self, prefix: str = ""):
LOGGER.info(f"\t\t+ {prefix} throughput: {self.value:f} {self.unit}")

def markdown(self, prefix: str = "") -> str:
markdown = ""
markdown += "| ------------------------------- |\n"
markdown += "| {prefix} throughput |\n"
markdown += "| ------------------------------- |\n"
markdown += "| metric | value (unit) |\n"
markdown += "| :--------- | -----------------: |\n"
markdown += "| throughput | {value:f} ({unit}) |\n"
markdown += "| ------------------------------- |\n"

return markdown.format(prefix=prefix, **asdict(self))


class LatencyTracker:
def __init__(self, device: str, backend: str):
Expand Down
23 changes: 22 additions & 1 deletion optimum_benchmark/trackers/memory.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
from contextlib import contextmanager
from dataclasses import dataclass
from dataclasses import asdict, dataclass
from logging import getLogger
from multiprocessing import Pipe, Process
from multiprocessing.connection import Connection
Expand Down Expand Up @@ -90,6 +90,27 @@ def log(self, prefix: str = ""):
if self.max_allocated is not None:
LOGGER.info(f"\t\t\t- max allocated memory: {self.max_allocated:f} ({self.unit})")

def markdown(self, prefix: str = "") -> str:
markdown = ""
markdown += "| ----------------------------------------------------- |\n"
markdown += "| {prefix} memory: |\n"
markdown += "| ----------------------------------------------------- |\n"
markdown += "| metric | value (unit) |\n"
markdown += "| ------ | -------------------------------------------- |\n"
if self.max_ram is not None:
markdown += "| max RAM | {max_ram:f} ({unit}) |\n"
if self.max_global_vram is not None:
markdown += "| max global VRAM | {max_global_vram:f} ({unit}) |\n"
if self.max_process_vram is not None:
markdown += "| max process VRAM | {max_process_vram:f} ({unit}) |\n"
if self.max_reserved is not None:
markdown += "| max reserved memory | {max_reserved:f} ({unit}) |\n"
if self.max_allocated is not None:
markdown += "| max allocated memory | {max_allocated:f} ({unit}) |\n"
markdown += "| ----------------------------------------------------- |\n"

return markdown.format(prefix=prefix, **asdict(self))


class MemoryTracker:
def __init__(self, device: str, backend: str, device_ids: Optional[Union[str, int, List[int]]] = None):
Expand Down

0 comments on commit 26bb362

Please sign in to comment.