-
Notifications
You must be signed in to change notification settings - Fork 175
/
Copy pathviou_tracker.py
237 lines (195 loc) · 10.2 KB
/
viou_tracker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
# ---------------------------------------------------------
# IOU Tracker
# Copyright (c) 2019 TU Berlin, Communication Systems Group
# Licensed under The MIT License [see LICENSE for details]
# Written by Erik Bochinski
# ---------------------------------------------------------
import cv2
import numpy as np
from lapsolver import solve_dense
from tqdm import tqdm
from time import time
from util import iou, load_mot
from vis_tracker import VisTracker
def track_viou(frames_path, detections, sigma_l, sigma_h, sigma_iou, t_min, ttl, tracker_type, keep_upper_height_ratio):
""" V-IOU Tracker.
See "Extending IOU Based Multi-Object Tracking by Visual Information by E. Bochinski, T. Senst, T. Sikora" for
more information.
Args:
frames_path (str): path to ALL frames.
string must contain a placeholder like {:07d} to be replaced with the frame numbers.
detections (list): list of detections per frame, usually generated by util.load_mot
sigma_l (float): low detection threshold.
sigma_h (float): high detection threshold.
sigma_iou (float): IOU threshold.
t_min (float): minimum track length in frames.
ttl (float): maximum number of frames to perform visual tracking.
this can fill 'gaps' of up to 2*ttl frames (ttl times forward and backward).
tracker_type (str): name of the visual tracker to use. see VisTracker for more details.
keep_upper_height_ratio (float): float between 0.0 and 1.0 that determines the ratio of height of the object
to track to the total height of the object used for visual tracking.
Returns:
list: list of tracks.
"""
if tracker_type == 'NONE':
assert ttl == 1, "ttl should not be larger than 1 if no visual tracker is selected"
tracks_active = []
tracks_extendable = []
tracks_finished = []
frame_buffer = []
for frame_num, detections_frame in enumerate(tqdm(detections), start=1):
# load frame and put into buffer
frame_path = frames_path.format(frame_num)
frame = cv2.imread(frame_path)
assert frame is not None, "could not read '{}'".format(frame_path)
frame_buffer.append(frame)
if len(frame_buffer) > ttl + 1:
frame_buffer.pop(0)
# apply low threshold to detections
dets = [det for det in detections_frame if det['score'] >= sigma_l]
track_ids, det_ids = associate(tracks_active, dets, sigma_iou)
updated_tracks = []
for track_id, det_id in zip(track_ids, det_ids):
tracks_active[track_id]['bboxes'].append(dets[det_id]['bbox'])
tracks_active[track_id]['max_score'] = max(tracks_active[track_id]['max_score'], dets[det_id]['score'])
tracks_active[track_id]['classes'].append(dets[det_id]['class'])
tracks_active[track_id]['det_counter'] += 1
if tracks_active[track_id]['ttl'] != ttl:
# reset visual tracker if active
tracks_active[track_id]['ttl'] = ttl
tracks_active[track_id]['visual_tracker'] = None
updated_tracks.append(tracks_active[track_id])
tracks_not_updated = [tracks_active[idx] for idx in set(range(len(tracks_active))).difference(set(track_ids))]
for track in tracks_not_updated:
if track['ttl'] > 0:
if track['ttl'] == ttl:
# init visual tracker
track['visual_tracker'] = VisTracker(tracker_type, track['bboxes'][-1], frame_buffer[-2],
keep_upper_height_ratio)
# viou forward update
ok, bbox = track['visual_tracker'].update(frame)
if not ok:
# visual update failed, track can still be extended
tracks_extendable.append(track)
continue
track['ttl'] -= 1
track['bboxes'].append(bbox)
updated_tracks.append(track)
else:
tracks_extendable.append(track)
# update the list of extendable tracks. tracks that are too old are moved to the finished_tracks. this should
# not be necessary but may improve the performance for large numbers of tracks (eg. for mot19)
tracks_extendable_updated = []
for track in tracks_extendable:
if track['start_frame'] + len(track['bboxes']) + ttl - track['ttl'] >= frame_num:
tracks_extendable_updated.append(track)
elif track['max_score'] >= sigma_h and track['det_counter'] >= t_min:
tracks_finished.append(track)
tracks_extendable = tracks_extendable_updated
new_dets = [dets[idx] for idx in set(range(len(dets))).difference(set(det_ids))]
dets_for_new = []
for det in new_dets:
finished = False
# go backwards and track visually
boxes = []
vis_tracker = VisTracker(tracker_type, det['bbox'], frame, keep_upper_height_ratio)
for f in reversed(frame_buffer[:-1]):
ok, bbox = vis_tracker.update(f)
if not ok:
# can not go further back as the visual tracker failed
break
boxes.append(bbox)
# sorting is not really necessary but helps to avoid different behaviour for different orderings
# preferring longer tracks for extension seems intuitive, LAP solving might be better
for track in sorted(tracks_extendable, key=lambda x: len(x['bboxes']), reverse=True):
offset = track['start_frame'] + len(track['bboxes']) + len(boxes) - frame_num
# association not optimal (LAP solving might be better)
# association is performed at the same frame, not adjacent ones
if 1 <= offset <= ttl - track['ttl'] and iou(track['bboxes'][-offset], bbox) >= sigma_iou:
if offset > 1:
# remove existing visually tracked boxes behind the matching frame
track['bboxes'] = track['bboxes'][:-offset+1]
track['bboxes'] += list(reversed(boxes))[1:]
track['bboxes'].append(det['bbox'])
track['max_score'] = max(track['max_score'], det['score'])
track['classes'].append(det['class'])
track['ttl'] = ttl
track['visual_tracker'] = None
tracks_extendable.remove(track)
if track in tracks_finished:
del tracks_finished[tracks_finished.index(track)]
updated_tracks.append(track)
finished = True
break
if finished:
break
if not finished:
dets_for_new.append(det)
# create new tracks
new_tracks = [{'bboxes': [det['bbox']], 'max_score': det['score'], 'start_frame': frame_num, 'ttl': ttl,
'classes': [det['class']], 'det_counter': 1, 'visual_tracker': None} for det in dets_for_new]
tracks_active = []
for track in updated_tracks + new_tracks:
if track['ttl'] == 0:
tracks_extendable.append(track)
else:
tracks_active.append(track)
# finish all remaining active and extendable tracks
tracks_finished = tracks_finished + \
[track for track in tracks_active + tracks_extendable
if track['max_score'] >= sigma_h and track['det_counter'] >= t_min]
# remove last visually tracked frames and compute the track classes
for track in tracks_finished:
if ttl != track['ttl']:
track['bboxes'] = track['bboxes'][:-(ttl - track['ttl'])]
track['class'] = max(set(track['classes']), key=track['classes'].count)
del track['visual_tracker']
return tracks_finished
def associate(tracks, detections, sigma_iou):
""" perform association between tracks and detections in a frame.
Args:
tracks (list): input tracks
detections (list): input detections
sigma_iou (float): minimum intersection-over-union of a valid association
Returns:
(tuple): tuple containing:
track_ids (numpy.array): 1D array with indexes of the tracks
det_ids (numpy.array): 1D array of the associated indexes of the detections
"""
costs = np.empty(shape=(len(tracks), len(detections)), dtype=np.float32)
for row, track in enumerate(tracks):
for col, detection in enumerate(detections):
costs[row, col] = 1 - iou(track['bboxes'][-1], detection['bbox'])
np.nan_to_num(costs)
costs[costs > 1 - sigma_iou] = np.nan
track_ids, det_ids = solve_dense(costs)
return track_ids, det_ids
def track_viou_matlab_wrapper(frames_path, detections, sigma_l, sigma_h, sigma_iou, t_min, ttl, tracker_type,
keep_upper_height_ratio=1.):
"""
Matlab wrapper of the v-iou tracker for the detrac evaluation toolkit.
Args:
detections (numpy.array): numpy array of detections, usually supplied by run_tracker.m
sigma_l (float): low detection threshold.
sigma_h (float): high detection threshold.
sigma_iou (float): IOU threshold.
t_min (float): minimum track length in frames.
Returns:
float: speed in frames per second.
list: list of tracks.
"""
detections = detections.reshape((7, -1)).transpose()
dets = load_mot(detections, with_classes=False)
start = time()
tracks = track_viou(frames_path+"img{:05d}.jpg", dets, sigma_l, sigma_h, sigma_iou, int(t_min), int(ttl), tracker_type, keep_upper_height_ratio)
end = time()
id_ = 1
out = []
for track in tracks:
for i, bbox in enumerate(track['bboxes']):
out += [float(bbox[0]), float(bbox[1]), float(bbox[2] - bbox[0]), float(bbox[3] - bbox[1]),
float(track['start_frame'] + i), float(id_)]
id_ += 1
num_frames = len(dets)
speed = num_frames / (end - start)
return speed, out