forked from DeepMotionAIResearch/DenseMatchingBenchmark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscene_flow.py
185 lines (169 loc) · 5.5 KB
/
scene_flow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import os.path as osp
# the task of the model for, including 'stereo' and 'flow', default 'stereo'
task = 'stereo'
# model settings
max_disp = 192
model = dict(
meta_architecture="GeneralizedStereoModel",
# max disparity
max_disp=max_disp,
# the model whether or not to use BatchNorm
batch_norm=True,
backbone=dict(
type="PSMNet",
# the in planes of feature extraction backbone
in_planes=3,
),
cost_processor=dict(
# Use the concatenation of left and right feature to form cost volume, then aggregation
type='Concatenation',
cost_computation = dict(
# default cat_fms
type="default",
# the maximum disparity of disparity search range under the resolution of feature
max_disp = int(max_disp // 4),
# the start disparity of disparity search range
start_disp = 0,
# the step between near disparity sample
dilation = 1,
),
cost_aggregator=dict(
type="PSMNet",
# the maximum disparity of disparity search range
max_disp = max_disp,
# the in planes of cost aggregation sub network
in_planes=64,
),
),
disp_predictor=dict(
# default FasterSoftArgmin
type='FASTER',
# the maximum disparity of disparity search range
max_disp = max_disp,
# the start disparity of disparity search range
start_disp = 0,
# the step between near disparity sample
dilation = 1,
# the temperature coefficient of soft argmin
alpha=1.0,
# whether normalize the estimated cost volume
normalize=True,
),
losses=dict(
l1_loss=dict(
# the maximum disparity of disparity search range
max_disp=max_disp,
# weights for different scale loss
weights=(1.0, 0.7, 0.5),
# weight for l1 loss with regard to other loss type
weight=1.0,
),
),
eval=dict(
# evaluate the disparity map within (lower_bound, upper_bound)
lower_bound=0,
upper_bound=max_disp,
# evaluate the disparity map in occlusion area and not occlusion
eval_occlusion=True,
# return the cost volume after regularization for visualization
is_cost_return=False,
# whether move the cost volume from cuda to cpu
is_cost_to_cpu=True,
),
)
# dataset settings
dataset_type = 'SceneFlow'
# data_root = 'datasets/{}/'.format(dataset_type)
# annfile_root = osp.join(data_root, 'annotations')
# root = '/home/youmin/'
root = '/node01/jobs/io/out/youmin/'
data_root = osp.join(root, 'data/StereoMatching/', dataset_type)
annfile_root = osp.join(root, 'data/annotations/', dataset_type)
# If you don't want to visualize the results, just uncomment the vis data
# For download and usage in debug, please refer to DATA.md and GETTING_STATED.md respectively.
vis_data_root = osp.join(root, 'data/visualization_data/', dataset_type)
vis_annfile_root = osp.join(vis_data_root, 'annotations')
img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375])
data = dict(
# whether disparity of datasets is sparse, e.g., SceneFLow is not sparse, but KITTI is sparse
sparse=False,
imgs_per_gpu=3,
workers_per_gpu=16,
train=dict(
type=dataset_type,
data_root=data_root,
annfile=osp.join(annfile_root, 'cleanpass_train.json'),
input_shape=[256, 512],
use_right_disp=False,
**img_norm_cfg,
),
eval=dict(
type=dataset_type,
data_root=data_root,
annfile=osp.join(annfile_root, 'cleanpass_test.json'),
input_shape=[544, 960],
use_right_disp=False,
**img_norm_cfg,
),
# If you don't want to visualize the results, just uncomment the vis data
vis=dict(
type=dataset_type,
data_root=vis_data_root,
annfile=osp.join(vis_annfile_root, 'vis_test.json'),
input_shape=[544, 960],
**img_norm_cfg,
),
test=dict(
type=dataset_type,
data_root=data_root,
annfile=osp.join(annfile_root, 'cleanpass_test.json'),
input_shape=[544, 960],
use_right_disp=False,
**img_norm_cfg,
),
)
optimizer = dict(type='RMSprop', lr=0.001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[10]
)
checkpoint_config = dict(
interval=1
)
log_config = dict(
interval=10,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook'),
]
)
# https://nvidia.github.io/apex/amp.html
apex = dict(
# whether to use apex.synced_bn
synced_bn=True,
# whether to use apex for mixed precision training
use_mixed_precision=False,
# the model weight type: float16 or float32
type="float16",
# the factor when apex scales the loss value
loss_scale=16,
)
total_epochs = 10
# each model will return several disparity maps, but not all of them need to be evaluated
# here, by giving indexes, the framework will evaluate the corresponding disparity map
eval_disparity_id = [0, 1, 2]
gpus = 4
dist_params = dict(backend='nccl')
log_level = 'INFO'
validate = True
load_from = None
resume_from = None
workflow = [('train', 1)]
work_dir = osp.join(root, 'exps/PSMNet/scene_flow')
# For test
checkpoint = osp.join(work_dir, 'epoch_10.pth')
out_dir = osp.join(work_dir, 'epoch_10')