-
Notifications
You must be signed in to change notification settings - Fork 5
/
evaluate.py
131 lines (105 loc) · 4.69 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
"""Compare the output of bulk run with a ground truth"""
import argparse
import json
import sys
import pandas as pd
import numpy as np
from src import clusterize
import matplotlib.pyplot as plt
def get_match(person, res):
# print('*****', person, sep=' ')
person = person.lower()
if person is '0':
return False
for r in res:
# print(r['name'], '$$' if r['name'].person == person else '', sep=' ')
if r['name'].lower() == person:
return True
return False
def parse_person(x):
p = x['person'].split(',')
p.reverse()
p = ' '.join(p).strip()
return p
def main(results, ground_truth):
gt = pd.read_csv(ground_truth)
with open(results, 'r', encoding='utf-8') as f:
res = json.load(f)
persons = [parse_person(x) for i, x in gt.iterrows()]
persons = ['Elisabeth II' if p == "Elizabeth d'Angleterre" else p for p in persons]
precision = []
recall = []
thresholds = np.arange(0.4, 1, step=0.05)
dominant = np.arange(0.4, 1, step=0.05)
weighted_dominant = np.arange(0.4, 1, step=0.05)
wc = 0.4
dom = 0.6
for val in thresholds:
# for dom in dominant:
# for wc in weighted_dominant:
clusters = []
for r in res:
c = clusterize.main(clusterize.from_dict(r), dominant_ratio=dom, weighted_dominant_ratio=wc,
confidence_threshold=val, merge_cluster=True, min_length=1)
clusters.append(c)
predictions = [len(x) > 0 for x in clusters]
matches = [get_match(person, r) for person, r in zip(persons, clusters)]
# see https://github.com/rafaelpadilla/Object-Detection-Metrics
true_positive = np.sum(matches) # hit
false_positive = np.sum([not m and pred for pred, m in zip(predictions, matches)]) # wrong
false_negative = np.sum([1 if not m and p != '0' else 0 for p, m in zip(persons, matches)]) # miss
p = true_positive / sum(predictions) # (true_positive + false_positive)
r = true_positive / sum([1 if p != '0' else 0 for p in persons]) # (true_positive + false_negative)
print('%.2f' % val, true_positive, false_positive, false_negative, p, r, sep='\t|\t')
precision.append(p)
recall.append(r)
plt.figure(1)
plt.plot(recall, precision)
plt.xlabel('recall')
plt.ylabel('precision')
# plt.legend(loc='best')
plt.show()
clusters = []
for r in res:
c = clusterize.main(clusterize.from_dict(r), dominant_ratio=dom, weighted_dominant_ratio=wc,
confidence_threshold=0.6, merge_cluster=True, min_length=1)
clusters.append(c)
persons = [parse_person(x) for i, x in gt.iterrows()]
persons = ['Elisabeth II' if p == "Elizabeth d'Angleterre" else p for p in persons]
matches = [get_match(person, res) for person, res in zip(persons, clusters)]
df = pd.DataFrame(persons, columns=['person'])
df['match'] = matches
df['n_prediction'] = [len(x) for x in clusters]
ppl = df[df['person'] != '0']
print('Scores: ')
print(f'* Total: {len(persons)}')
print('* Matches: %d (%.2f%%)' % (sum(matches), sum(matches) * 100 / len(persons)))
unpredicted = len(df[df['n_prediction'] == 0])
print('* Unpredicted: %d (%.2f%%)' % (unpredicted, unpredicted / len(persons)))
print(f'* Total people: {len(ppl)}')
print('* Matches people: %d (%.2f%%)' % (sum(ppl['match']), sum(ppl['match']) * 100 / len(ppl)))
unpredicted = len(ppl[ppl['n_prediction'] == 0])
print('* Unpredicted people: %d (%.2f%%)' % (unpredicted, unpredicted * 100 / len(ppl)))
predicted = ppl[ppl['n_prediction'] != 0]
wrong = len(predicted[predicted['match'] == False])
print('* Wrong people: %d (%.2f%%)' % (wrong, wrong * 100 / len(ppl)))
ppl_aggr = df.groupby('person').agg({'match': ['count', 'sum']})
ppl_aggr['perc'] = ppl_aggr['match']['sum'] / ppl_aggr['match']['count']
print(ppl_aggr)
gt['match'] = matches
gt['locator'] = gt.apply(
lambda
x: f"https://okapi.ina.fr/antract/Media/AF/{x['media'].split('/')[-1]}.mp4#t={int(x['start'])},{int(x['end'])}",
axis=1)
gt.to_csv(ground_truth.rsplit('.', 1)[0] + '_res.csv')
def parse_arguments(argv):
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input', type=str, required=True,
help='The JSON output of bulk run.')
parser.add_argument('--gt', type=str, required=True,
help='The ground truth csv')
return parser.parse_args(argv)
if __name__ == '__main__':
args = parse_arguments(sys.argv[1:])
main(args.input, args.gt)
# python evaluate.py -i results_antract.json --gt evaluation/dataset_antract.csv