-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfaster_rcnn.py
130 lines (94 loc) · 5.72 KB
/
faster_rcnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import sys
from importlib import import_module
import tensorflow as tf
from tensorflow.contrib import slim
from utils.anchor_utils import decode_bboxes
from utils.losses import smooth_l1_loss_rcnn, smooth_l1_loss_rcnn_ohem
import faster_rcnn_configs as frc
def faster_rcnn(features, rois, image_shape, is_training=True):
with tf.variable_scope('rcnn'):
# ROI Pooling
roi_features = roi_pooling(features, rois, image_shape)
if 'backbones' not in sys.path:
sys.path.append('backbones')
cnn = import_module(frc.BACKBONE, package='backbones')
# Fully connected
net_flatten = cnn.head(roi_features, is_training=True)
net_fc = slim.fully_connected(net_flatten, frc.NUM_CLS, activation_fn=None,
normalizer_fn=slim.batch_norm,
normalizer_params={'decay': 0.995, 'epsilon': 0.0001},
weights_regularizer=slim.l2_regularizer(frc.L2_WEIGHT), scope='fc')
with slim.arg_scope([slim.fully_connected], weights_regularizer=slim.l2_regularizer(frc.L2_WEIGHT),
weights_initializer=slim.variance_scaling_initializer(1.0, mode='FAN_AVG', uniform=True),
activation_fn=None, trainable=is_training):
cls_score = slim.fully_connected(net_fc, frc.NUM_CLS + 1, scope='cls_fc')
bbox_pred = slim.fully_connected(net_fc, 4 * (frc.NUM_CLS + 1), scope='reg_fc')
cls_score = tf.reshape(cls_score, [-1, frc.NUM_CLS + 1])
bbox_pred = tf.reshape(bbox_pred, [-1, 4 * (frc.NUM_CLS + 1)])
return cls_score, bbox_pred
def process_faster_rcnn(rois, bbox_pred, scores, image_shape):
with tf.variable_scope('postprocess_faster_rcnn'):
rois = tf.stop_gradient(rois)
bbox_pred = tf.reshape(bbox_pred, [-1, frc.NUM_CLS + 1, 4])
bbox_pred = tf.stop_gradient(bbox_pred)
scores = tf.stop_gradient(scores)
bboxes_pred_list = tf.unstack(bbox_pred, axis=1)
score_list = tf.unstack(scores, axis=1)
all_cls_bboxex = []
all_cls_scores = []
categories = []
for i in range(frc.NUM_CLS + 1):
encoded_bbox = bboxes_pred_list[i]
score = score_list[i]
decoded_bbox = decode_bboxes(encoded_bbox, rois, scale_factor=None) # frc.ROI_SCALE_FACTORS
# clip bounding to image shape
predict_x_min, predict_y_min, predict_x_max, predict_y_max = tf.unstack(decoded_bbox, axis=1)
image_height, image_width = tf.to_float(image_shape[0]), tf.to_float(image_shape[1])
predict_x_min = tf.maximum(0., tf.minimum(image_width - 1, predict_x_min))
predict_y_min = tf.maximum(0., tf.minimum(image_height - 1, predict_y_min))
predict_x_max = tf.maximum(0., tf.minimum(image_width - 1, predict_x_max))
predict_y_max = tf.maximum(0., tf.minimum(image_height - 1, predict_y_max))
predict_bboxes = tf.stack([predict_x_min, predict_y_min, predict_x_max, predict_y_max], axis=1)
# NMS
keep_ind = tf.image.non_max_suppression(predict_bboxes, score,
frc.FASTER_RCNN_NMS_MAX_BOX_PER_CLASS,
frc.FASTER_RCNN_NMS_IOU_THRESHOLD)
per_cls_boxes = tf.gather(predict_bboxes, keep_ind)
per_cls_scores = tf.gather(score, keep_ind)
all_cls_bboxex.append(per_cls_boxes)
all_cls_scores.append(per_cls_scores)
categories.append(tf.ones_like(per_cls_scores) * i)
final_bboxes = tf.concat(all_cls_bboxex, axis=0, name='final_bboxes')
final_scores = tf.concat(all_cls_scores, axis=0, name='final_scores')
final_categories = tf.concat(categories, axis=0, name='final_categories')
return final_bboxes, final_scores, final_categories
def build_faster_rcnn_losses(bbox_pred, bbox_targets, cls_score, labels, num_cls):
with tf.variable_scope('rcnn_losses'):
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_score, labels=labels)
if frc.FASTER_RCNN_MINIBATCH_SIZE == -1:
bbox_loss, cls_loss = smooth_l1_loss_rcnn_ohem(bbox_pred, bbox_targets, cross_entropy, labels, num_cls,
batch_size=frc.OHEM_BATCH_SIZE)
else:
bbox_loss = smooth_l1_loss_rcnn(bbox_pred, bbox_targets, labels, num_cls)
cls_loss = tf.reduce_mean(cross_entropy)
return bbox_loss, cls_loss
def roi_pooling(features, rois, image_shape):
with tf.variable_scope('roi_pooling'):
img_h, img_w = tf.cast(image_shape[0], tf.float32), tf.cast(image_shape[1], tf.float32)
N = tf.shape(rois)[0]
normalized_rois = _normalize_rois(rois, img_h, img_w)
cropped_roi_features = tf.image.crop_and_resize(features, normalized_rois, tf.zeros((N,), tf.int32),
crop_size=[frc.FASTER_RCNN_ROI_SIZE, frc.FASTER_RCNN_ROI_SIZE])
roi_features = slim.max_pool2d(cropped_roi_features,
kernel_size=[frc.FASTER_RCNN_POOL_KERNEL_SIZE, frc.FASTER_RCNN_POOL_KERNEL_SIZE],
stride=frc.FASTER_RCNN_POOL_KERNEL_SIZE)
return roi_features
def _normalize_rois(rois, img_h, img_w):
x1, y1, x2, y2 = tf.unstack(rois, axis=1)
normalized_x1 = x1 / img_w
normalized_y1 = y1 / img_h
normalized_x2 = x2 / img_w
normalized_y2 = y2 / img_h
# normalized coordinates [y1, x1, y2, x2]
normalized_rois = tf.stack([normalized_y1, normalized_x1, normalized_y2, normalized_x2], axis=1)
return tf.stop_gradient(normalized_rois)