forked from PaddlePaddle/PaddleDetection
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathyolov3_darknet.yml
323 lines (304 loc) · 10.2 KB
/
yolov3_darknet.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
# Architecture of detection, which is also the prefix of data feed module
architecture: YOLOv3
# Data feed module.
train_feed: YoloTrainFeed
eval_feed: YoloEvalFeed
test_feed: YoloTestFeed
# Use GPU or CPU, true by default.
use_gpu: true
# Maximum number of iteration.
# In YOLOv3 model, default iteration number is to train for 270 epoches.
max_iters: 500200
# Smooth the log output in specified iterations, 20 by default.
log_smooth_window: 20
# The number of iteration interval to display in training log.
log_iter: 20
# The directory to save models.
save_dir: output
# Snapshot period. If training and test at same time, evaluate model at each snapshot_iter. 2000 by default.
snapshot_iter: 2000
# Evalution method, COCO and VOC are available.
metric: COCO
# The path of oretrained wegiths. If url is provided, it will be downloaded and decompressed automatically.
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar
# The path of final model for evaluation and test.
weights: output/yolov3_darknet/model_final
# Number of classes, 80 for COCO and 20 for VOC.
num_classes: 80
# YOLOv3 architecture, see https://arxiv.org/abs/1804.02767
YOLOv3:
backbone: DarkNet
yolo_head: YOLOv3Head
# Backbone module
DarkNet:
# Batch normalization type in training, sync_bn for synchronized batch normalization
norm_type: sync_bn
# L2 weight decay factor of batch normalization layer
norm_decay: 0.
# Darknet convolution layer number, only support 53 currently
depth: 53
# YOLOv3 head module
# Generate bbox output in evaluation and calculate loss in training
# fluid.layers.yolov3_loss / fluid.layers.yolo_box
YOLOv3Head:
# anchor mask of 3 yolo_loss/yolo_box layers, each yolo_loss/yolo_box layer has 3 anchors
anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
# 9 anchors for 3 yolo_loss/yolo_box layer, generated by perform kmeans on COCO gtboxes
anchors: [[10, 13], [16, 30], [33, 23],
[30, 61], [62, 45], [59, 119],
[116, 90], [156, 198], [373, 326]]
# L2 weight decay factor of batch normalization layer
norm_decay: 0.
# Ignore threshold for yolo_loss layer, 0.7 by default.
# Objectness loss will be ignored if a predcition bbox overlap a gtbox over ignore_thresh.
ignore_thresh: 0.7
# Whether use label smooth in yolo_loss layer
# It is recommended to set as true when only num_classes is very big
label_smooth: true
# fluid.layers.multiclass_nms
# Non-max suppress for output prediction boxes, see multiclass_nms for following parameters.
# 1. Select detection bounding boxes with high scores larger than score_threshold.
# 2. Select detection bounding boxes with the largest nms_top_k scores.
# 3. Suppress detection bounding boxes which have high IoU overlap witch already selected boxes.
# 4. Keep the top keep_top_k detection bounding boxes as output.
nms:
# Which label is regard as backgroud and will be ignored, -1 for no backgroud label.
background_label: -1
# Number of total bboxes to be kept per image after NMS step.
keep_top_k: 100
# IoU threshold for NMS, bbox with IoU over nms_threshold will be suppressed.
nms_threshold: 0.45
# Maximum number of detections to be kept according to the confidences after the filtering detections based on score_threshold.
nms_top_k: 1000
# Whether detections are normalized.
normalized: false
# Threshold to filter out bounding boxes with low confidence score.
score_threshold: 0.01
# Learning rate configuration
LearningRate:
# Base learning rate for training, 1e-3 by default.
base_lr: 0.001
# Learning rate schedulers, PiecewiseDecay and LinearWarmup by default
schedulers:
# fluid.layers.piecewise_decay
# each milestone stage decay gamma
- !PiecewiseDecay
gamma: 0.1
milestones:
- 400000
- 450000
# fluid.layers.linear_lr_warmup
# Start learning rate equals to base_lr * start_factor
- !LinearWarmup
start_factor: 0.
steps: 4000
# Optimizer module
OptimizerBuilder:
# fluid.optimizer
optimizer:
momentum: 0.9
type: Momentum
# fluid.regularizer
regularizer:
factor: 0.0005
type: L2
# Data feed module for training
YoloTrainFeed:
# Batch size per device, 8 by default
batch_size: 8
# Dataset module
dataset:
# Dataset directory.
dataset_dir: dataset/coco
# Annotation file path.
annotation: annotations/instances_train2017.json
# Directory where image files are stored.
image_dir: train2017
# List of data fields needed.
fields: [image, gt_box, gt_label, gt_score]
# List of image dims
image_shape: [3, 608, 608]
# List of sample transformations to use.
sample_transforms:
# read image data and decode to numpy.
- !DecodeImage
to_rgb: true
# YOLOv3 use image mixup in training.
with_mixup: true
# Mixup two images in training, a trick to improve performance.
- !MixupImage
alpha: 1.5 # default: 1.5
beta: 1.5 # default: 1.5
# Normalize gtbox to range [0, 1]
- !NormalizeBox {}
# Random color distort: brightness, contrast, hue, saturation.
- !RandomDistort
brightness_lower: 0.5
brightness_prob: 0.5
brightness_upper: 1.5
contrast_lower: 0.5
contrast_prob: 0.5
contrast_upper: 1.5
count: 4
hue_lower: -18
hue_prob: 0.5
hue_upper: 18
is_order: false
saturation_lower: 0.5
saturation_prob: 0.5
saturation_upper: 1.5
# Random Expand the image and modify bounding box.
# Operators:
# 1. Scale the image weight and height.
# 2. Construct new images with new height and width.
# 3. Fill the new image with the mean.
# 4. Put original imge into new image.
# 5. Rescale the bounding box.
# 6. Determine if the new bbox is satisfied in the new image.
- !ExpandImage
# max expand ratio, default 4.0.
max_ratio: 4.0
mean: [123.675, 116.28, 103.53]
prob: 0.5
# Random Crop the image and modify bounding box.
# Operators:
# 1. Scale the image weight and height.
# 2. Crop the image according to a radom sample.
# 3. Rescale the bounding box.
# 4. Determine if the new bbox is satisfied in the new image.
- !CropImage
# Recrop image if there are no bbox in output cropped image.
avoid_no_bbox: true
batch_sampler: [[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]]
# Whether should all bbox satisfy IoU constrains.
satisfy_all: false
# Interpolate image to target_size with random interpolate method:
# cv2.INTER_NEAREST,
# cv2.INTER_LINEAR,
# cv2.INTER_AREA,
# cv2.INTER_CUBIC,
# cv2.INTER_LANCZOS4,
- !RandomInterpImage
max_size: 0
target_size: 608
# Filp the image and bounding box.
# Operators:
# 1. Flip the image numpy.
# 2. Transform the bboxes' x coordinates. (Must judge whether the coordinates are normalized!)
# 3. Transform the segmentations' x coordinates. (Must judge whether the coordinates are normalized!)
- !RandomFlipImage
is_mask_flip: false
is_normalized: true
prob: 0.5
# Normalize the image.
# Operators:
# 1.(optional) Scale the image to [0,1]
# 2. Each pixel minus mean and is divided by std
- !NormalizeImage
is_channel_first: false
is_scale: true
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
# Change data layout to [C, H, W].
- !Permute
channel_first: true
to_bgr: false
# List of batch transformations to use.
batch_transforms:
# Random reshape images in each mini-batch to different shapes.
- !RandomShape
sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
# YOLOv3 read gtbox into zero padded tensor with max box number as 50.
num_max_boxes: 50
# YOLOv3 read gtlabel without regarding backgroud as label 0.
with_background: false
# Number of samples, -1 represents all samples. -1 by default.
samples: -1
# Whether samples should be shuffled, true by default.
shuffle: true
# Whether drop last images which less than a batch.
drop_last: true
# Whether use multi-process reader in training.
use_process: true
# Use multi-process reader number.
num_workers: 8
# Buffer size for reader.
bufsize: 128
# Mixup image epoch number.
mixup_epoch: 250
# Data feed module for evaluation
YoloEvalFeed:
batch_size: 8
dataset:
dataset_dir: dataset/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
batch_transforms: []
fields: [image, im_size, im_id, gt_box, gt_label, is_difficult]
image_shape: [3, 608, 608]
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
# Rescale image to the specified target size, and capped at max_size if max_size != 0.
# If target_size is list, selected a scale randomly as the specified target size.
- !ResizeImage
interp: 2 # 2 for cv2.INTER_CUBIC
max_size: 0
target_size: 608
use_cv2: true
- !NormalizeImage
is_channel_first: false
is_scale: true
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
- !Permute
channel_first: true
to_bgr: false
num_max_boxes: 50
samples: -1
shuffle: false
drop_last: false
# Use multi-thread reader in evaluation mode.
use_process: false
# Thread number for multi-thread reader.
num_workers: 8
with_background: false
# Data feed module for test
YoloTestFeed:
batch_size: 1
dataset:
annotation: dataset/coco/annotations/instances_val2017.json
batch_transforms: []
fields: [image, im_size, im_id]
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !ResizeImage
interp: 2
max_size: 0
target_size: 608
use_cv2: true
- !NormalizeImage
is_channel_first: false
is_scale: true
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
- !Permute
channel_first: true
to_bgr: false
num_max_boxes: 50
samples: -1
shuffle: false
drop_last: false
# Use multi-thread reader in test mode.
use_process: false
num_workers: 8
with_background: false