forked from PaddlePaddle/PaddleDetection
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmask_rcnn_r50_fpn_1x.yml
498 lines (470 loc) · 14 KB
/
mask_rcnn_r50_fpn_1x.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
# Architecture of detection, which is also the prefix of data feed module
architecture: MaskRCNN
# Data feed module
train_feed: MaskRCNNTrainFeed
eval_feed: MaskRCNNEvalFeed
test_feed: MaskRCNNTestFeed
# Use GPU or CPU, true by default
use_gpu: true
# Maximum number of iteration.
# In rcnn models, max_iters is 180000 if lr schedule is 1x and batch_size is 1.
max_iters: 180000
# Snapshot period. If training and test at same time, evaluate model at each snapshot_iter. 10000 by default.
snapshot_iter: 10000
# Smooth the log output in specified iterations, 20 by default.
log_smooth_window: 20
# The number of iteration interval to display in training log.
log_iter: 20
# The directory to save models.
save_dir: output
# The path of oretrained wegiths. If url is provided, it will download the pretrain_weights and decompress automatically.
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
# Evalution method, COCO and VOC are available.
metric: COCO
# The path of final model for evaluation and test.
weights: output/mask_rcnn_r50_fpn_1x/model_final/
# Number of classes, 81 for COCO and 21 for VOC
num_classes: 81
# Mask RCNN architecture, see https://arxiv.org/abs/1703.06870
MaskRCNN:
backbone: ResNet
fpn: FPN
roi_extractor: FPNRoIAlign
rpn_head: FPNRPNHead
bbox_assigner: BBoxAssigner
bbox_head: BBoxHead
mask_assigner: MaskAssigner
mask_head: MaskHead
rpn_only: false
# Backbone module
ResNet:
# Index of stages using deformable conv v2, [] by default
dcn_v2_stages: []
# ResNet depth, 50 by default
depth: 50
# Stage index of returned feature map, [2,3,4,5] by default
feature_maps:
- 2
- 3
- 4
- 5
# Stage Index of backbone to freeze, 2 by default
freeze_at: 2
# Whether freeze normalization layers, true by default
freeze_norm: true
# Weight decay for normalization layer weights, 0. by default
norm_decay: 0.0
# Normalization type, bn/sync_bn/affine_channel, affine_channel by default
norm_type: affine_channel
# ResNet variant, supports 'a', 'b', 'c', 'd' currently, b by default
variant: b
# FPN module
FPN:
# Whether has extra conv in higher levels, false by default
has_extra_convs: false
# Highest level of the backbone feature map to use, 6 by default
max_level: 6
# Lowest level of the backbone feature map to use, 6 by default
min_level: 2
# FPN normalization type, bn/sync_bn/affine_channel, null by default
norm_type: null
# Number of feature channels, 256 by default
num_chan: 256
# Feature map scaling factors, [0.03125, 0.0625, 0.125, 0.25] by default
spatial_scale:
- 0.03125
- 0.0625
- 0.125
- 0.25
# RPN module, if use non-FPN architecture, use RPNHead instead
# Extract proposals according to anchors and assign box targets and
# score targets to selected proposals to compute RPN loss. For FPN
# architecture, RPN is computed from each levels and collect proposals
# together.
FPNRPNHead:
# fluid.layers.anchor_generator
# Generate anchors for RCNN models. Each position of input produces
# N anchors. N = anchor_sizes * aspect_ratios. In FPNRPNHead, aspect_ratios
# is provided and anchor_sizes depends on FPN levels and anchor_start_size.
anchor_generator:
aspect_ratios:
- 0.5
- 1.0
- 2.0
variance:
- 1.0
- 1.0
- 1.0
- 1.0
# fluid.layers.rpn_target_assign
# Assign classification and regression targets to each anchor according
# to Intersection-over-Union(IoU) overlap between anchors and ground
# truth boxes. The classification targets is binary class labels. the
# positive labels are two kinds of anchors: the anchors with the highest
# IoU overlap with a ground-truth box, or an anchor that has an IoU overlap
# higher than rpn_positive_overlap with any ground-truth box.
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
# fluid.layers.generate_proposals in training
# Generate RoIs according to each box with probability to be a foreground
# object. The operation performs following steps: Transposes and resizes
# scores and bbox_deltas; Calculate box locations as proposal candidates;
# Clip boxes to image; Remove predicted boxes with small area; Apply NMS to
# get final proposals as output.
train_proposal:
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 2000
pre_nms_top_n: 2000
# fluid.layers.generate_proposals in test
test_proposal:
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 1000
pre_nms_top_n: 1000
# Size of anchor at the first scale, 32 by default
anchor_start_size: 32
# highest level of FPN output, 6 by default
max_level: 6
# Lowest level of FPN output, 2 by default
min_level: 2
# Number of FPN output channels, 256 by default
num_chan: 256
# Number of classes in RPN output, 1 by default
num_classes: 1
# RoI extractor module, if use non-FPN architecture, use RoIAlign instead
# For FPN architecture, proposals are distributed to different levels and
# apply roi align at each level. Then concat the outputs.
FPNRoIAlign:
# The canconical FPN feature map level, 4 by default
canconical_level: 4
# The canconical FPN feature map size, 224 by default
canonical_size: 224
# The highest level of FPN layer, 5 by default
max_level: 5
# The lowest level of FPN layer, 2 by default
min_level: 2
# Number of sampling points, 0 by default
sampling_ratio: 2
# Box resolution, 7 by default
box_resolution: 7
# Mask RoI resolution, 14 by default
mask_resolution: 14
# Mask head module
# Generate mask output and compute loss mask.
MaskHead:
# Number of convolutions, 4 for FPN, 0 otherwise. 0 by default
num_convs: 4
# size of the output mask, 14 by default
resolution: 28
# Dilation rate, 1 by default
dilation: 1
# Number of channels after first conv, 256 by default
num_chan_reduced: 256
# Number of output classes, 81 by default
num_classes: 81
# fluid.layers.generate_proposal_labels
# Combine boxes and gt_boxes, and sample foreground proposals and background
# prosals.Then assign classification and regression targets to selected RoIs.
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights:
- 0.1
- 0.1
- 0.2
- 0.2
bg_thresh_hi: 0.5
bg_thresh_lo: 0.0
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
shuffle_before_sample: true
# fluid.layers.generate_mask_labels
# For given the RoIs and corresponding labels, sample foreground RoIs.
# Assign mask targets to selected RoIs which are encoded to K binary masks
# of resolution M x M.
MaskAssigner:
resolution: 28
num_classes: 81
# BBox head module
# Faster bbox head following the RoI extractor, and apply post process, such as
# NMS and box coder..
BBoxHead:
# Head after RoI extractor, ResNetC5/TwoFCHead
head: TwoFCHead
# fluid.layers.multiclass_nms
# Select a subset of detection bounding boxes that have high scores larger
# than score_threshold. Then prune away boxes that have high IoU overlap
# with already selected boxes by nms_threshold.
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
# fluid.layers.box_coder
box_coder:
axis: 1
box_normalized: false
code_type: decode_center_size
prior_box_var:
- 0.1
- 0.1
- 0.2
- 0.2
num_classes: 81
# RCNN head with two Fully Connected layers
TwoFCHead:
# The number of output channels, 1024 by default
num_chan: 1024
# Learning rate configuration
LearningRate:
# Base learning rate, 0.01 by default
base_lr: 0.01
# Learning rate schedulers, PiecewiseDecay and LinearWarmup by default
schedulers:
# fluid.layers.piecewise_decay
# Values has higher priority and if values is null, learning rate is multipled by gamma at each stage
- !PiecewiseDecay
gamma: 0.1
milestones:
- 120000
- 160000
values: null
# fluid.layers.linear_lr_warmup
# Start learning rate equals to base_lr * start_factor
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
# Optimizer module
OptimizerBuilder:
# fluid.optimizer
optimizer:
momentum: 0.9
type: Momentum
# fluid.regularizer
regularizer:
factor: 0.0001
type: L2
# Data feed module for training
MaskRCNNTrainFeed:
# Batch size per device, 1 by default
batch_size: 1
# Dataset module
dataset:
# Annotation file path
annotation: annotations/instances_train2017.json
# Dataset directory
dataset_dir: dataset/coco
# Directory where image files are stored
image_dir: train2017
# List of data fields needed
fields:
- image
- im_info
- im_id
- gt_box
- gt_label
- is_crowd
- gt_mask
# list of image dims
image_shape:
- 3
- 800
- 1333
# List of sample transformations to use
sample_transforms:
# Transform the image data to numpy format.
- !DecodeImage
to_rgb: true # default: true
with_mixup: false # default: false
# Flip images randomly
# Transform the x coordinates of bboxes and segmentations
- !RandomFlipImage
is_mask_flip: true # default: false
# Whether bbox is normalized
is_normalized: false # default: false
prob: 0.5 # default: 0.5
# Normalize the image
- !NormalizeImage
# The format of image, [H, W, C]/[C, H, W], true by default
is_channel_first: false
# Whether divide by 255, true by default
is_scale: true
# default: [0.485, 0.456, 0.406]
mean:
- 0.485
- 0.456
- 0.406
# default: [1, 1, 1]
std:
- 0.229
- 0.224
- 0.225
# Rescale image to the specified target size, and capped at max_size
- !ResizeImage
# Resize method, cv2.INTER_LINEAR(1) by default
interp: 1
max_size: 1333
target_size: 800
use_cv2: true # default: true
# Change the channel
- !Permute
# The format of image, [H, W, C]/[C, H, W], true by default
channel_first: true
to_bgr: false # default: true
# List of batch transformations to use
batch_transforms:
# Pad a batch of samples to same dimensions
- !PadBatch
pad_to_stride: 32 # default: 32
# Drop last batch if size is uneven, false by default
drop_last: false
# Number of workers processes(or threads), 2 by default
num_workers: 2
# Number of samples, -1 represents all samples. -1 by default
samples: -1
# If samples should be shuffled, true by default
shuffle: true
# If update im_info after padding, false by default
use_padded_im_info: false
# If use multi-process, false by default
use_process: false
# Data feed module for test
MaskRCNNEvalFeed:
# Batch size per device, 1 by default
batch_size: 1
# Dataset module
dataset:
# Annotation file path
annotation: annotations/instances_val2017.json
# Dataset directory
dataset_dir: dataset/coco
# Directory where image files are stored
image_dir: val2017
# List of data fields needed
fields:
- image
- im_info
- im_id
- im_shape
# list of image dims
image_shape:
- 3
- 800
- 1333
# List of sample transformations to use
sample_transforms:
# Transform the image data to numpy format.
- !DecodeImage
to_rgb: true # default: true
with_mixup: false # default: false
# Normalize the image
- !NormalizeImage
# The format of image, [H, W, C]/[C, H, W], true by default
is_channel_first: false
# Whether divide by 255, true by default
is_scale: true
# default: [0.485, 0.456, 0.406]
mean:
- 0.485
- 0.456
- 0.406
# default: [1, 1, 1]
std:
- 0.229
- 0.224
- 0.225
# Rescale image to the specified target size, and capped at max_size
- !ResizeImage
# Resize method, cv2.INTER_LINEAR(1) by default
interp: 1
max_size: 1333
target_size: 800
use_cv2: true # default: true
# Change the channel
- !Permute
# The format of image, [H, W, C]/[C, H, W], true by default
channel_first: true
to_bgr: false # default: true
# List of batch transformations to use
batch_transforms:
# Pad a batch of samples to same dimensions
- !PadBatch
pad_to_stride: 32 # default: 32
# Drop last batch if size is uneven, false by default
drop_last: false
# Number of workers processes(or threads), 2 by default
num_workers: 2
# Number of samples, -1 represents all samples. -1 by default
samples: -1
# If samples should be shuffled, true by default
shuffle: false
# If update im_info after padding, false by default
use_padded_im_info: true
# If use multi-process, false by default
use_process: false
# Data feed module for test
MaskRCNNTestFeed:
# Batch size per device, 1 by default
batch_size: 1
# Dataset module
dataset:
# Annotation file path
annotation: dataset/coco/annotations/instances_val2017.json
# List of data fields needed
fields:
- image
- im_info
- im_id
- im_shape
# list of image dims
image_shape:
- 3
- 800
- 1333
# List of sample transformations to use
sample_transforms:
# Transform the image data to numpy format.
- !DecodeImage
to_rgb: true # default: true
with_mixup: false # default: false
# Normalize the image
- !NormalizeImage
# The format of image, [H, W, C]/[C, H, W], true by default
is_channel_first: false
# Whether divide by 255, true by default
is_scale: true
# default: [0.485, 0.456, 0.406]
mean:
- 0.485
- 0.456
- 0.406
# default: [1, 1, 1]
std:
- 0.229
- 0.224
- 0.225
# Change the channel
- !Permute
# The format of image, [H, W, C]/[C, H, W], true by default
channel_first: true
to_bgr: false # default: true
# List of batch transformations to use
batch_transforms:
# Pad a batch of samples to same dimensions
- !PadBatch
pad_to_stride: 32 # default: 32
# Drop last batch if size is uneven, false by default
drop_last: false
# Number of workers processes(or threads), 2 by default
num_workers: 2
# Number of samples, -1 represents all samples. -1 by default
samples: -1
# If samples should be shuffled, true by default
shuffle: false
# If update im_info after padding, false by default
use_padded_im_info: true
# If use multi-process, false by default
use_process: false