-
Notifications
You must be signed in to change notification settings - Fork 8
/
report.py
executable file
·109 lines (87 loc) · 3.45 KB
/
report.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/env python3
"""Generate an accuracy report with confusion matrix for
a given SVM model on a test data set.
@author Dat Hoang
@date March 2011"""
import csv
import sys
from collections import defaultdict
from subprocess import Popen, PIPE
from optparse import OptionParser
from itertools import chain
SVM_PREDICT = "~/shared/svm-predict"
DELIMITER = '\t'
def parse_name_table(name_fd):
"""Parse a hash table for name fields."""
reader = csv.reader(name_fd, delimiter=DELIMITER)
return dict((int(i),name) for i, name in reader)
def gen_report(name_table, pairs, out_fp):
"""Generate report based on (actual, estimated) pairs."""
matrix = defaultdict(int)
actual_table = defaultdict(int)
estimated_table = defaultdict(int)
for (actual, estimated) in pairs:
matrix[(actual,estimated)] += 1
actual_table[actual] += 1
estimated_table[estimated] += 1
categories = set(chain(actual_table.keys(), estimated_table.keys()))
all_correct = sum(matrix[(c,c)] for c in categories)
all_total = sum(actual_table.values())
#overall accuracy
all_total = float(all_total) if all_total > 0 else 1.0
out_fp.write("\nOverall accuracy: %f\n" % (all_correct / all_total))
#confusion matrix
out_fp.write("\nX=SystemAnswer, Y=CorrectAnswer\n\n")
for c in categories:
out_fp.write("%-5s\t" % name_table.get(c, c))
out_fp.write("\t".join("%-4s" % matrix[(c, p)] for p in categories))
out_fp.write("\n")
#category results
out_fp.write("\nLabel\tCorrect\tTotal\tPrec\tRecall\tFmeas\n")
for c in categories:
correct = matrix[(c,c)]
total = actual_table[c]
precision = correct / float(estimated_table[c]) \
if estimated_table[c] != 0 else 0.0
recall = correct / float(total) \
if total != 0 else 0.0
fmeas = 2 * precision * recall / (precision + recall) \
if (precision + recall) != 0.0 else 0.0
out_fp.write("%-5s" % name_table.get(c, c))
out_fp.write("%6d\t%6d\t%6.3f\t%6.3f\t%6.3f\n" % \
(correct, total, precision, recall, fmeas))
def main():
global SVM_PREDICT
parser = OptionParser(
usage="""Usage: %prog [options] <model> <svm-file>
<model> := trained SVM model file
<svm-file> := SVM file to test""")
parser.add_option("--svm-predict", dest="svm_predict", metavar="PATHNAME",
type='string', default=SVM_PREDICT,
help="path of SVM predictor [default: %default]")
parser.add_option("-f", "--fields", dest="fields",
metavar="PATHNAME", type='string', default=None,
help="file of field mappings from integer to names")
(options, args) = parser.parse_args()
if len(args) != 2:
parser.print_usage(file=sys.stderr)
return 1
SVM_PREDICT = options.svm_predict
fields_file = options.fields
model_file = args[0]
svm_file = args[1]
if fields_file:
with open(fields_file, 'r') as field_fp:
table = parse_name_table(field_fp)
else:
table = {}
with open(svm_file, 'r') as ifp:
actual = [int(line.split()[0]) for line in ifp]
estimated = map(int,
Popen("%s %s %s /dev/stdout | head -n -1" %
(SVM_PREDICT, svm_file, model_file),
shell=True, stdout=PIPE).stdout.readlines())
gen_report(table, zip(actual, estimated), sys.stdout)
return 0
if __name__=="__main__":
sys.exit(main())