-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathperf_compare.py
executable file
·117 lines (95 loc) · 4.09 KB
/
perf_compare.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python3
# Compare results generated by `blimp-test --perf-report`. For each test common
# to both runs, we report the ratio of the average times. We also flag
# statistically significant performance regressions.
import argparse
import csv
from functools import reduce
import math
import itertools
class Result(object):
def __init__(self, line):
(self.group, self.test, self.benchmark, self.iter, self.ops,
self.cycles_per_ns, self.avg, self.std_dev, self.min, self.max) = line
self.iter = int(self.iter)
self.ops = int(self.ops)
self.cycles_per_ns = float(self.cycles_per_ns)
self.avg = float(self.avg)
self.std_dev = float(self.std_dev)
self.min = float(self.min)
self.max = float(self.max)
self.key = (self.group, self.test, self.benchmark)
def read_report(path):
with open(path, "r") as f:
r = csv.reader(f)
next(r)
# Drop the first line, which is a header.
return {r.key: r for r in (Result(line) for line in r)}
def is_regression(r1, r2):
# Some of the benchmarks are multimodal, which results in a very large
# standard deviation which is not representative of the true uncertainty of
# the benchmark. For these benchmarks, we use the average of the averages as
# a tolerance. This method is imprecise and not statistically meaningful. A
# better approach would be to get an actual distribution, try to find the
# modes, and compare them.
tolerance1 = 1.5*r1.std_dev if r1.std_dev < r1.avg else r1.avg/2
tolerance2 = 1.5*r2.std_dev if r2.std_dev < r2.avg else r2.avg/2
return (r2.avg - tolerance2) > (r1.avg + tolerance1)
def ratio_dev(r1, r2):
assert r1.avg != 0
assert r2.avg != 0
ret = (r2.avg/r1.avg)*math.sqrt((r1.std_dev/r1.avg)**2 + (r2.std_dev/r2.avg)**2)
if math.isnan(ret):
print((r1.avg, r1.std_dev, r2.avg, r2.std_dev))
return ret
def compare_results(r1, r2):
def rmax(field):
result_max = max(len(getattr(r, field))
for r in itertools.chain(r1.values(), r2.values()))
special_max = max(map(len, ["Geomean"]))
return max(result_max, special_max)
fmt_params = {
"group_len": rmax("group"),
"test_len": rmax("test"),
"benchmark_len": rmax("benchmark"),
}
print(" {group:<{group_len}} {test:<{test_len}} {benchmark:<{benchmark_len}} Ratio (lower is better)".format(
group="Group", test="Test", benchmark="Benchmark", ratio="Ratio",
**fmt_params))
print("-"*80)
fmt = "{regression}{group:<{group_len}} {test:<{test_len}} {benchmark:<{benchmark_len}} {ratio:3.3f} +- {std_dev:2.3f}"
# Find all the tests which have results in both results sets, and compute
# the average improvement ratio (lower is better).
compared = [(r, r2[key], r2[key].avg/r.avg)
for key, r in r1.items() if key in r2]
# Sort from worst to best.
compared.sort(key=lambda c: (c[2], c[0].key), reverse=True)
# Print the results.
for result1, result2, ratio in compared:
print(fmt.format(
group=result1.group,
test=result1.test,
benchmark=result1.benchmark,
ratio=ratio,
std_dev=ratio_dev(result1, result2),
regression="*" if is_regression(result1, result2) else " ",
**fmt_params))
print("-"*80)
geomean = reduce(lambda x,y: x*y, (c[2] for c in compared))**(1.0/len(compared))
print(fmt.format(
group="Geomean",
test="",
benchmark="",
ratio=geomean,
std_dev=float("nan"),
regression=" ",
**fmt_params))
def main():
parser = argparse.ArgumentParser(
description="compare performance reports generated by blimp-test")
parser.add_argument("baseline", help="the baseline CSV to compare against")
parser.add_argument("report", help="the new CSV to compare")
args = parser.parse_args()
compare_results(read_report(args.baseline), read_report(args.report))
if __name__ == "__main__":
main()