-
Notifications
You must be signed in to change notification settings - Fork 6
/
ssd_training.py
137 lines (117 loc) · 5.69 KB
/
ssd_training.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
"""SSD training utils."""
import tensorflow as tf
class MultiboxLoss(object):
"""Multibox loss with some helper functions.
# Arguments
num_classes: Number of classes including background.
alpha: Weight of L1-smooth loss.
neg_pos_ratio: Max ratio of negative to positive boxes in loss.
background_label_id: Id of background label.
negatives_for_hard: Number of negative boxes to consider
it there is no positive boxes in batch.
# References
https://arxiv.org/abs/1512.02325
# TODO
Add possibility for background label id be not zero
"""
def __init__(self, num_classes, alpha=1.0, neg_pos_ratio=3.0,
background_label_id=0, negatives_for_hard=100.0):
self.num_classes = num_classes
self.alpha = alpha
self.neg_pos_ratio = neg_pos_ratio
if background_label_id != 0:
raise Exception('Only 0 as background label id is supported')
self.background_label_id = background_label_id
self.negatives_for_hard = negatives_for_hard
def _l1_smooth_loss(self, y_true, y_pred):
"""Compute L1-smooth loss.
# Arguments
y_true: Ground truth bounding boxes,
tensor of shape (?, num_boxes, 4).
y_pred: Predicted bounding boxes,
tensor of shape (?, num_boxes, 4).
# Returns
l1_loss: L1-smooth loss, tensor of shape (?, num_boxes).
# References
https://arxiv.org/abs/1504.08083
"""
abs_loss = tf.abs(y_true - y_pred)
sq_loss = 0.5 * (y_true - y_pred)**2
l1_loss = tf.where(tf.less(abs_loss, 1.0), sq_loss, abs_loss - 0.5)
return tf.reduce_sum(l1_loss, -1)
def _softmax_loss(self, y_true, y_pred):
"""Compute softmax loss.
# Arguments
y_true: Ground truth targets,
tensor of shape (?, num_boxes, num_classes).
y_pred: Predicted logits,
tensor of shape (?, num_boxes, num_classes).
# Returns
softmax_loss: Softmax loss, tensor of shape (?, num_boxes).
"""
y_pred = tf.maximum(tf.minimum(y_pred, 1 - 1e-15), 1e-15)
softmax_loss = -tf.reduce_sum(y_true * tf.log(y_pred),
reduction_indices=-1)
return softmax_loss
def compute_loss(self, y_true, y_pred):
"""Compute mutlibox loss.
# Arguments
y_true: Ground truth targets,
tensor of shape (?, num_boxes, 4 + num_classes + 8),
priors in ground truth are fictitious,
y_true[:, :, -8] has 1 if prior should be penalized
or in other words is assigned to some ground truth box,
y_true[:, :, -7:] are all 0.
y_pred: Predicted logits,
tensor of shape (?, num_boxes, 4 + num_classes + 8).
# Returns
loss: Loss for prediction, tensor of shape (?,).
"""
batch_size = tf.shape(y_true)[0]
num_boxes = tf.to_float(tf.shape(y_true)[1])
# loss for all priors
conf_loss = self._softmax_loss(y_true[:, :, 4:-8],
y_pred[:, :, 4:-8])
loc_loss = self._l1_smooth_loss(y_true[:, :, :4],
y_pred[:, :, :4])
# get positives loss
num_pos = tf.reduce_sum(y_true[:, :, -8], reduction_indices=-1)
pos_loc_loss = tf.reduce_sum(loc_loss * y_true[:, :, -8],
reduction_indices=1)
pos_conf_loss = tf.reduce_sum(conf_loss * y_true[:, :, -8],
reduction_indices=1)
# get negatives loss, we penalize only confidence here
num_neg = tf.minimum(self.neg_pos_ratio * num_pos,
num_boxes - num_pos)
pos_num_neg_mask = tf.greater(num_neg, 0)
has_min = tf.to_float(tf.reduce_any(pos_num_neg_mask))
num_neg = tf.concat(axis=0, values=[num_neg,
[(1 - has_min) * self.negatives_for_hard]])
num_neg_batch = tf.reduce_min(tf.boolean_mask(num_neg,
tf.greater(num_neg, 0)))
num_neg_batch = tf.to_int32(num_neg_batch)
confs_start = 4 + self.background_label_id + 1
confs_end = confs_start + self.num_classes - 1
max_confs = tf.reduce_max(y_pred[:, :, confs_start:confs_end],
reduction_indices=2)
_, indices = tf.nn.top_k(max_confs * (1 - y_true[:, :, -8]),
k=num_neg_batch)
batch_idx = tf.expand_dims(tf.range(0, batch_size), 1)
batch_idx = tf.tile(batch_idx, (1, num_neg_batch))
full_indices = (tf.reshape(batch_idx, [-1]) * tf.to_int32(num_boxes) +
tf.reshape(indices, [-1]))
# full_indices = tf.concat(2, [tf.expand_dims(batch_idx, 2),
# tf.expand_dims(indices, 2)])
# neg_conf_loss = tf.gather_nd(conf_loss, full_indices)
neg_conf_loss = tf.gather(tf.reshape(conf_loss, [-1]),
full_indices)
neg_conf_loss = tf.reshape(neg_conf_loss,
[batch_size, num_neg_batch])
neg_conf_loss = tf.reduce_sum(neg_conf_loss, reduction_indices=1)
# loss is sum of positives and negatives
total_loss = pos_conf_loss + neg_conf_loss
total_loss /= (num_pos + tf.to_float(num_neg_batch))
num_pos = tf.where(tf.not_equal(num_pos, 0), num_pos,
tf.ones_like(num_pos))
total_loss += (self.alpha * pos_loc_loss) / num_pos
return total_loss