From bfdc5d5d90531f8309e04e26b5630b38941c2981 Mon Sep 17 00:00:00 2001 From: Range King Date: Fri, 3 Mar 2023 10:07:08 +0800 Subject: [PATCH 01/38] [Fix] Unifying metafile.yml (#9849) --- configs/fsaf/metafile.yml | 4 ++-- configs/paa/metafile.yml | 7 +++++++ configs/yolact/metafile.yml | 3 +++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/configs/fsaf/metafile.yml b/configs/fsaf/metafile.yml index 2d524b6aea7..daaad0d3a86 100644 --- a/configs/fsaf/metafile.yml +++ b/configs/fsaf/metafile.yml @@ -56,7 +56,7 @@ Models: - Task: Object Detection Dataset: COCO Metrics: - box AP: 39.3 (37.9) + box AP: 39.3 Weights: https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r101_fpn_1x_coco/fsaf_r101_fpn_1x_coco-9e71098f.pth - Name: fsaf_x101-64x4d_fpn_1x_coco @@ -76,5 +76,5 @@ Models: - Task: Object Detection Dataset: COCO Metrics: - box AP: 42.4 (41.0) + box AP: 42.4 Weights: https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_x101_64x4d_fpn_1x_coco/fsaf_x101_64x4d_fpn_1x_coco-e3f6e6fd.pth diff --git a/configs/paa/metafile.yml b/configs/paa/metafile.yml index a2a39ffd8ba..078b974971d 100644 --- a/configs/paa/metafile.yml +++ b/configs/paa/metafile.yml @@ -24,6 +24,7 @@ Models: Config: configs/paa/paa_r50_fpn_1x_coco.py Metadata: Training Memory (GB): 3.7 + Epochs: 12 Results: - Task: Object Detection Dataset: COCO @@ -36,6 +37,7 @@ Models: Config: configs/paa/paa_r50_fpn_1.5x_coco.py Metadata: Training Memory (GB): 3.7 + Epochs: 18 Results: - Task: Object Detection Dataset: COCO @@ -48,6 +50,7 @@ Models: Config: configs/paa/paa_r50_fpn_2x_coco.py Metadata: Training Memory (GB): 3.7 + Epochs: 24 Results: - Task: Object Detection Dataset: COCO @@ -60,6 +63,7 @@ Models: Config: configs/paa/paa_r50_fpn_ms-3x_coco.py Metadata: Training Memory (GB): 3.7 + Epochs: 36 Results: - Task: Object Detection Dataset: COCO @@ -72,6 +76,7 @@ Models: Config: configs/paa/paa_r101_fpn_1x_coco.py Metadata: Training Memory (GB): 6.2 + Epochs: 12 Results: - Task: Object Detection Dataset: COCO @@ -84,6 +89,7 @@ Models: Config: configs/paa/paa_r101_fpn_2x_coco.py Metadata: Training Memory (GB): 6.2 + Epochs: 24 Results: - Task: Object Detection Dataset: COCO @@ -96,6 +102,7 @@ Models: Config: configs/paa/paa_r101_fpn_ms-3x_coco.py Metadata: Training Memory (GB): 6.2 + Epochs: 36 Results: - Task: Object Detection Dataset: COCO diff --git a/configs/yolact/metafile.yml b/configs/yolact/metafile.yml index 6b01a94c9bd..9ca76b3d391 100644 --- a/configs/yolact/metafile.yml +++ b/configs/yolact/metafile.yml @@ -24,6 +24,7 @@ Models: Metadata: Training Resources: 1x V100 GPU Batch Size: 8 + Epochs: 55 inference time (ms/im): - value: 23.53 hardware: V100 @@ -43,6 +44,7 @@ Models: Config: configs/yolact/yolact_r50_8xb8-55e_coco.py Metadata: Batch Size: 64 + Epochs: 55 inference time (ms/im): - value: 23.53 hardware: V100 @@ -63,6 +65,7 @@ Models: Metadata: Training Resources: 1x V100 GPU Batch Size: 8 + Epochs: 55 inference time (ms/im): - value: 29.85 hardware: V100 From 95fac563d17c4b6523ac7813ef808fa0a1501205 Mon Sep 17 00:00:00 2001 From: Ghlerrix <114851291+Ghlerrix@users.noreply.github.com> Date: Mon, 6 Mar 2023 23:13:00 +0800 Subject: [PATCH 02/38] [Fix]: fix FPS benchmark interface (#9865) --- mmdet/utils/benchmark.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mmdet/utils/benchmark.py b/mmdet/utils/benchmark.py index 18070c05fd2..1714b464740 100644 --- a/mmdet/utils/benchmark.py +++ b/mmdet/utils/benchmark.py @@ -160,7 +160,6 @@ def __init__(self, print_log('before build: ', self.logger) print_process_memory(self._process, self.logger) - self.cfg.model.pretrained = None self.model = self._init_model(checkpoint, is_fuse_conv_bn) # Because multiple processes will occupy additional CPU resources, @@ -213,7 +212,7 @@ def run_once(self) -> dict: start_time = time.perf_counter() with torch.no_grad(): - self.model(data, return_loss=False) + self.model.test_step(data) torch.cuda.synchronize() elapsed = time.perf_counter() - start_time From bc1bfa12c70861c880470fa8435be6771e6cdf92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Haian=20Huang=28=E6=B7=B1=E5=BA=A6=E7=9C=B8=29?= Date: Tue, 7 Mar 2023 15:51:07 +0800 Subject: [PATCH 03/38] [Fix]: Fix metafile error (#9885) --- configs/deformable_detr/metafile.yml | 12 ++++++------ configs/detr/metafile.yml | 4 ++-- configs/mask2former/metafile.yml | 28 ++++++++++++++-------------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/configs/deformable_detr/metafile.yml b/configs/deformable_detr/metafile.yml index abb85492cbb..0fba0ba09e6 100644 --- a/configs/deformable_detr/metafile.yml +++ b/configs/deformable_detr/metafile.yml @@ -28,8 +28,8 @@ Models: - Task: Object Detection Dataset: COCO Metrics: - box AP: 44.5 - Weights: https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_r50_16x2_50e_coco/deformable_detr_r50_16x2_50e_coco_20210419_220030-a12b9512.pth + box AP: 44.3 + Weights: https://download.openmmlab.com/mmdetection/v3.0/deformable_detr/deformable-detr_r50_16xb2-50e_coco/deformable-detr_r50_16xb2-50e_coco_20221029_210934-6bc7d21b.pth - Name: deformable-detr_refine_r50_16xb2-50e_coco In Collection: Deformable DETR @@ -40,8 +40,8 @@ Models: - Task: Object Detection Dataset: COCO Metrics: - box AP: 46.1 - Weights: https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_refine_r50_16x2_50e_coco/deformable_detr_refine_r50_16x2_50e_coco_20210419_220503-5f5dff21.pth + box AP: 46.2 + Weights: https://download.openmmlab.com/mmdetection/v3.0/deformable_detr/deformable-detr-refine_r50_16xb2-50e_coco/deformable-detr-refine_r50_16xb2-50e_coco_20221022_225303-844e0f93.pth - Name: deformable-detr_refine_twostage_r50_16xb2-50e_coco In Collection: Deformable DETR @@ -52,5 +52,5 @@ Models: - Task: Object Detection Dataset: COCO Metrics: - box AP: 46.8 - Weights: https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_twostage_refine_r50_16x2_50e_coco/deformable_detr_twostage_refine_r50_16x2_50e_coco_20210419_220613-9d28ab72.pth + box AP: 47.0 + Weights: https://download.openmmlab.com/mmdetection/v3.0/deformable_detr/deformable-detr-refine-twostage_r50_16xb2-50e_coco/deformable-detr-refine-twostage_r50_16xb2-50e_coco_20221021_184714-acc8a5ff.pth diff --git a/configs/detr/metafile.yml b/configs/detr/metafile.yml index 6b7f45eca9e..a9132dff022 100644 --- a/configs/detr/metafile.yml +++ b/configs/detr/metafile.yml @@ -29,5 +29,5 @@ Models: - Task: Object Detection Dataset: COCO Metrics: - box AP: 40.1 - Weights: https://download.openmmlab.com/mmdetection/v2.0/detr/detr_r50_8x2_150e_coco/detr_r50_8x2_150e_coco_20201130_194835-2c4b8974.pth + box AP: 39.9 + Weights: https://download.openmmlab.com/mmdetection/v3.0/detr/detr_r50_8xb2-150e_coco/detr_r50_8xb2-150e_coco_20221023_153551-436d03e8.pth diff --git a/configs/mask2former/metafile.yml b/configs/mask2former/metafile.yml index 1de7a4e6821..3321239213f 100644 --- a/configs/mask2former/metafile.yml +++ b/configs/mask2former/metafile.yml @@ -36,7 +36,7 @@ Models: Dataset: COCO Metrics: PQ: 54.5 - Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220329_225200-c7b94355.pth + Weights: https://download.openmmlab.com/mmdetection/v3.0/mask2former/mask2former_swin-s-p4-w7-224_8xb2-lsj-50e_coco-panoptic/mask2former_swin-s-p4-w7-224_8xb2-lsj-50e_coco-panoptic_20220329_225200-4a16ded7.pth - Name: mask2former_r101_8xb2-lsj-50e_coco In Collection: Mask2Former Config: configs/mask2former/mask2former_r101_8xb2-lsj-50e_coco.py @@ -52,7 +52,7 @@ Models: Dataset: COCO Metrics: mask AP: 44.0 - Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220426_100250-c50b6fa6.pth + Weights: https://download.openmmlab.com/mmdetection/v3.0/mask2former/mask2former_r101_8xb2-lsj-50e_coco/mask2former_r101_8xb2-lsj-50e_coco_20220426_100250-ecf181e2.pth - Name: mask2former_r101_8xb2-lsj-50e_coco-panoptic In Collection: Mask2Former Config: configs/mask2former/mask2former_r101_8xb2-lsj-50e_coco-panoptic.py @@ -72,7 +72,7 @@ Models: Dataset: COCO Metrics: PQ: 52.4 - Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic/mask2former_r101_lsj_8x2_50e_coco-panoptic_20220329_225104-c54e64c9.pth + Weights: https://download.openmmlab.com/mmdetection/v3.0/mask2former/mask2former_r101_8xb2-lsj-50e_coco-panoptic/mask2former_r101_8xb2-lsj-50e_coco-panoptic_20220329_225104-c74d4d71.pth - Name: mask2former_r50_8xb2-lsj-50e_coco-panoptic In Collection: Mask2Former Config: configs/mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py @@ -83,16 +83,16 @@ Models: - Task: Object Detection Dataset: COCO Metrics: - box AP: 44.8 + box AP: 44.5 - Task: Instance Segmentation Dataset: COCO Metrics: - mask AP: 41.9 + mask AP: 41.8 - Task: Panoptic Segmentation Dataset: COCO Metrics: - PQ: 51.9 - Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic/mask2former_r50_lsj_8x2_50e_coco-panoptic_20220326_224516-11a44721.pth + PQ: 52.0 + Weights: https://download.openmmlab.com/mmdetection/v3.0/mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic/mask2former_r50_8xb2-lsj-50e_coco-panoptic_20230118_125535-54df384a.pth - Name: mask2former_swin-t-p4-w7-224_8xb2-lsj-50e_coco-panoptic In Collection: Mask2Former Config: configs/mask2former/mask2former_swin-t-p4-w7-224_8xb2-lsj-50e_coco-panoptic.py @@ -112,7 +112,7 @@ Models: Dataset: COCO Metrics: PQ: 53.4 - Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220326_224553-fc567107.pth + Weights: https://download.openmmlab.com/mmdetection/v3.0/mask2former/mask2former_swin-t-p4-w7-224_8xb2-lsj-50e_coco-panoptic/mask2former_swin-t-p4-w7-224_8xb2-lsj-50e_coco-panoptic_20220326_224553-3ec9e0ae.pth - Name: mask2former_r50_8xb2-lsj-50e_coco In Collection: Mask2Former Config: configs/mask2former/mask2former_r50_8xb2-lsj-50e_coco.py @@ -128,7 +128,7 @@ Models: Dataset: COCO Metrics: mask AP: 42.9 - Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco/mask2former_r50_lsj_8x2_50e_coco_20220506_191028-8e96e88b.pth + Weights: https://download.openmmlab.com/mmdetection/v3.0/mask2former/mask2former_r50_8xb2-lsj-50e_coco/mask2former_r50_8xb2-lsj-50e_coco_20220506_191028-41b088b6.pth - Name: mask2former_swin-l-p4-w12-384-in21k_16xb1-lsj-100e_coco-panoptic In Collection: Mask2Former Config: configs/mask2former/mask2former_swin-l-p4-w12-384-in21k_16xb1-lsj-100e_coco-panoptic.py @@ -148,7 +148,7 @@ Models: Dataset: COCO Metrics: PQ: 57.6 - Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic_20220407_104949-d4919c44.pth + Weights: https://download.openmmlab.com/mmdetection/v3.0/mask2former/mask2former_swin-l-p4-w12-384-in21k_16xb1-lsj-100e_coco-panoptic/mask2former_swin-l-p4-w12-384-in21k_16xb1-lsj-100e_coco-panoptic_20220407_104949-82f8d28d.pth - Name: mask2former_swin-b-p4-w12-384-in21k_8xb2-lsj-50e_coco-panoptic In Collection: Mask2Former Config: configs/mask2former/mask2former_swin-b-p4-w12-384-in21k_8xb2-lsj-50e_coco-panoptic.py @@ -168,7 +168,7 @@ Models: Dataset: COCO Metrics: PQ: 56.3 - Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic_20220329_230021-3bb8b482.pth + Weights: https://download.openmmlab.com/mmdetection/v3.0/mask2former/mask2former_swin-b-p4-w12-384-in21k_8xb2-lsj-50e_coco-panoptic/mask2former_swin-b-p4-w12-384-in21k_8xb2-lsj-50e_coco-panoptic_20220329_230021-05ec7315.pth - Name: mask2former_swin-b-p4-w12-384_8xb2-lsj-50e_coco-panoptic In Collection: Mask2Former Config: configs/mask2former/mask2former_swin-b-p4-w12-384_8xb2-lsj-50e_coco-panoptic.py @@ -188,7 +188,7 @@ Models: Dataset: COCO Metrics: PQ: 55.1 - Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic_20220331_002244-c149a9e9.pth + Weights: https://download.openmmlab.com/mmdetection/v3.0/mask2former/mask2former_swin-b-p4-w12-384_8xb2-lsj-50e_coco-panoptic/mask2former_swin-b-p4-w12-384_8xb2-lsj-50e_coco-panoptic_20220331_002244-8a651d82.pth - Name: mask2former_swin-t-p4-w7-224_8xb2-lsj-50e_coco In Collection: Mask2Former Config: configs/mask2former/mask2former_swin-t-p4-w7-224_8xb2-lsj-50e_coco.py @@ -204,7 +204,7 @@ Models: Dataset: COCO Metrics: mask AP: 44.7 - Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649-4a943037.pth + Weights: https://download.openmmlab.com/mmdetection/v3.0/mask2former/mask2former_swin-t-p4-w7-224_8xb2-lsj-50e_coco/mask2former_swin-t-p4-w7-224_8xb2-lsj-50e_coco_20220508_091649-01b0f990.pth - Name: mask2former_swin-s-p4-w7-224_8xb2-lsj-50e_coco In Collection: Mask2Former Config: configs/mask2former/mask2former_swin-s-p4-w7-224_8xb2-lsj-50e_coco.py @@ -220,4 +220,4 @@ Models: Dataset: COCO Metrics: mask AP: 46.1 - Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756-743b7d99.pth + Weights: https://download.openmmlab.com/mmdetection/v3.0/mask2former/mask2former_swin-s-p4-w7-224_8xb2-lsj-50e_coco/mask2former_swin-s-p4-w7-224_8xb2-lsj-50e_coco_20220504_001756-c9d0c4f2.pth From e7eeeb98dc4a04b0e5e1a784f9918fb79411b4a6 Mon Sep 17 00:00:00 2001 From: takuoko Date: Tue, 7 Mar 2023 00:52:20 -0700 Subject: [PATCH 04/38] [Feature] Release DINO Swin-L model (#9864) --- configs/dino/README.md | 7 +++-- .../dino/dino-5scale_swin-l_8xb2-12e_coco.py | 31 +++++++++++++++++++ configs/dino/metafile.yml | 12 +++++++ 3 files changed, 47 insertions(+), 3 deletions(-) create mode 100644 configs/dino/dino-5scale_swin-l_8xb2-12e_coco.py diff --git a/configs/dino/README.md b/configs/dino/README.md index 0f1d4eb9702..8512f7f8c7f 100644 --- a/configs/dino/README.md +++ b/configs/dino/README.md @@ -14,9 +14,10 @@ We present DINO (DETR with Improved deNoising anchOr boxes), a state-of-the-art ## Results and Models -| Backbone | Model | Lr schd | box AP | Config | Download | -| :------: | :---------: | :-----: | :----: | :------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| R-50 | DINO-4scale | 12e | 49.0 | [config](./dino-4scale_r50_8xb2-12e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v3.0/dino/dino-4scale_r50_8xb2-12e_coco/dino-4scale_r50_8xb2-12e_coco_20221202_182705-55b2bba2.pth) \| [log](https://download.openmmlab.com/mmdetection/v3.0/dino/dino-4scale_r50_8xb2-12e_coco/dino-4scale_r50_8xb2-12e_coco_20221202_182705.log.json) | +| Backbone | Model | Lr schd | box AP | Config | Download | +| :------: | :---------: | :-----: | :----: | :---------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | DINO-4scale | 12e | 49.0 | [config](./dino-4scale_r50_8xb2-12e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v3.0/dino/dino-4scale_r50_8xb2-12e_coco/dino-4scale_r50_8xb2-12e_coco_20221202_182705-55b2bba2.pth) \| [log](https://download.openmmlab.com/mmdetection/v3.0/dino/dino-4scale_r50_8xb2-12e_coco/dino-4scale_r50_8xb2-12e_coco_20221202_182705.log.json) | +| Swin-L | DINO-5scale | 12e | 57.2 | [config](./dino-5scale_swin-l_8xb2-12e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v3.0/dino/dino-5scale_swin-l_8xb2-12e_coco/dino-5scale_swin-l_8xb2-12e_coco_20230228_072924-a654145f.pth) \| [log](https://download.openmmlab.com/mmdetection/v3.0/dino/dino-5scale_swin-l_8xb2-12e_coco/dino-5scale_swin-l_8xb2-12e_coco_20230228_072924.log) | ### NOTE diff --git a/configs/dino/dino-5scale_swin-l_8xb2-12e_coco.py b/configs/dino/dino-5scale_swin-l_8xb2-12e_coco.py new file mode 100644 index 00000000000..fd94e9936c7 --- /dev/null +++ b/configs/dino/dino-5scale_swin-l_8xb2-12e_coco.py @@ -0,0 +1,31 @@ +_base_ = './dino-4scale_r50_8xb2-12e_coco.py' + +fp16 = dict(loss_scale=512.) +pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth' # noqa +num_levels = 5 +model = dict( + num_feature_levels=num_levels, + backbone=dict( + _delete_=True, + type='SwinTransformer', + pretrain_img_size=384, + embed_dims=192, + depths=[2, 2, 18, 2], + num_heads=[6, 12, 24, 48], + window_size=12, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.2, + patch_norm=True, + out_indices=(0, 1, 2, 3), + # Please only add indices that would be used + # in FPN, otherwise some parameter will not be used + with_cp=True, + convert_weights=True, + init_cfg=dict(type='Pretrained', checkpoint=pretrained)), + neck=dict(in_channels=[192, 384, 768, 1536], num_outs=num_levels), + encoder=dict(layer_cfg=dict(self_attn_cfg=dict(num_levels=num_levels))), + decoder=dict(layer_cfg=dict(cross_attn_cfg=dict(num_levels=num_levels)))) diff --git a/configs/dino/metafile.yml b/configs/dino/metafile.yml index 5b68a41abf9..2f61fb38431 100644 --- a/configs/dino/metafile.yml +++ b/configs/dino/metafile.yml @@ -48,3 +48,15 @@ Models: Results: - Task: Object Detection Dataset: COCO + + - Name: dino-5scale_swin-l_8xb2-12e_coco.py + In Collection: DINO + Config: configs/dino/dino-5scale_swin-l_8xb2-12e_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 57.2 + Weights: https://download.openmmlab.com/mmdetection/v3.0/dino/dino-5scale_swin-l_8xb2-12e_coco/dino-5scale_swin-l_8xb2-12e_coco_20230228_072924-a654145f.pth From 37ae7d1a084293e5dda2e471b677e2e672633ff1 Mon Sep 17 00:00:00 2001 From: RangiLyu Date: Tue, 7 Mar 2023 19:14:47 +0800 Subject: [PATCH 05/38] [Fix] Fix cropping polygon mask (#9858) --- mmdet/structures/mask/structures.py | 39 ++++++++++++++++++++++++++--- requirements/runtime.txt | 1 + 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/mmdet/structures/mask/structures.py b/mmdet/structures/mask/structures.py index 7e51e128dea..b4fdd27570b 100644 --- a/mmdet/structures/mask/structures.py +++ b/mmdet/structures/mask/structures.py @@ -7,6 +7,7 @@ import mmcv import numpy as np import pycocotools.mask as maskUtils +import shapely.geometry as geometry import torch from mmcv.ops.roi_align import roi_align @@ -753,16 +754,46 @@ def crop(self, bbox): if len(self.masks) == 0: cropped_masks = PolygonMasks([], h, w) else: + # reference: https://github.com/facebookresearch/fvcore/blob/main/fvcore/transforms/transform.py # noqa + crop_box = geometry.box(x1, y1, x2, y2).buffer(0.0) cropped_masks = [] + # suppress shapely warnings util it incorporates GEOS>=3.11.2 + # reference: https://github.com/shapely/shapely/issues/1345 + initial_settings = np.seterr() + np.seterr(invalid='ignore') for poly_per_obj in self.masks: cropped_poly_per_obj = [] for p in poly_per_obj: - # pycocotools will clip the boundary p = p.copy() - p[0::2] = p[0::2] - bbox[0] - p[1::2] = p[1::2] - bbox[1] - cropped_poly_per_obj.append(p) + p = geometry.Polygon(p.reshape(-1, 2)).buffer(0.0) + # polygon must be valid to perform intersection. + if not p.is_valid: + continue + cropped = p.intersection(crop_box) + if cropped.is_empty: + continue + if isinstance(cropped, + geometry.collection.BaseMultipartGeometry): + cropped = cropped.geoms + else: + cropped = [cropped] + # one polygon may be cropped to multiple ones + for poly in cropped: + # ignore lines or points + if not isinstance( + poly, geometry.Polygon) or not poly.is_valid: + continue + coords = np.asarray(poly.exterior.coords) + # remove an extra identical vertex at the end + coords = coords[:-1] + coords[:, 0] -= x1 + coords[:, 1] -= y1 + cropped_poly_per_obj.append(coords.reshape(-1)) + # a dummy polygon to avoid misalignment between masks and boxes + if len(cropped_poly_per_obj) == 0: + cropped_poly_per_obj = [np.array([0, 0, 0, 0, 0, 0])] cropped_masks.append(cropped_poly_per_obj) + np.seterr(**initial_settings) cropped_masks = PolygonMasks(cropped_masks, h, w) return cropped_masks diff --git a/requirements/runtime.txt b/requirements/runtime.txt index c815aef86c2..f5d31051927 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -2,5 +2,6 @@ matplotlib numpy pycocotools scipy +shapely six terminaltables From 93f8186dfc4aa1650e356e45f9967bd0253e3af8 Mon Sep 17 00:00:00 2001 From: zwhus <121282623+zwhus@users.noreply.github.com> Date: Fri, 10 Mar 2023 21:41:31 +0800 Subject: [PATCH 06/38] [Feature] Support EfficientDet in projects (#9810) --- projects/EfficientDet/README.md | 46 +++-- ...det_effb0_bifpn_8xb16-crop512-300e_coco.py | 175 ++++++++++++++++++ ...fb3_bifpn_8xb16-crop896-300e_coco-90cls.py | 175 ++++++++++++++++++ ...det_effb3_bifpn_8xb16-crop896-300e_coco.py | 175 ++++++++++++++++++ ...effb0_bifpn_8xb16-crop512-300e_coco_tf.py} | 33 ++-- projects/EfficientDet/convert_tf_to_pt.py | 97 +++++----- .../EfficientDet/efficientdet/__init__.py | 18 +- projects/EfficientDet/efficientdet/bifpn.py | 24 +-- .../efficientdet/efficientdet_head.py | 161 ++++++++++++++-- .../EfficientDet/efficientdet/huber_loss.py | 91 +++++++++ .../{ => tensorflow}/anchor_generator.py | 0 .../{ => tensorflow}/api_wrappers/__init__.py | 0 .../{ => tensorflow}/api_wrappers/coco_api.py | 1 - .../{ => tensorflow}/coco_90class.py | 0 .../{ => tensorflow}/coco_90metric.py | 0 .../trans_max_iou_assigner.py | 0 .../{ => tensorflow}/yxyx_bbox_coder.py | 0 projects/EfficientDet/efficientdet/utils.py | 81 +++----- 18 files changed, 911 insertions(+), 166 deletions(-) create mode 100644 projects/EfficientDet/configs/efficientdet_effb0_bifpn_8xb16-crop512-300e_coco.py create mode 100644 projects/EfficientDet/configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco-90cls.py create mode 100644 projects/EfficientDet/configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco.py rename projects/EfficientDet/configs/{efficientdet_effb0_bifpn_16xb8-crop512-300e_coco.py => tensorflow/efficientdet_effb0_bifpn_8xb16-crop512-300e_coco_tf.py} (88%) create mode 100644 projects/EfficientDet/efficientdet/huber_loss.py rename projects/EfficientDet/efficientdet/{ => tensorflow}/anchor_generator.py (100%) rename projects/EfficientDet/efficientdet/{ => tensorflow}/api_wrappers/__init__.py (100%) rename projects/EfficientDet/efficientdet/{ => tensorflow}/api_wrappers/coco_api.py (98%) rename projects/EfficientDet/efficientdet/{ => tensorflow}/coco_90class.py (100%) rename projects/EfficientDet/efficientdet/{ => tensorflow}/coco_90metric.py (100%) rename projects/EfficientDet/efficientdet/{ => tensorflow}/trans_max_iou_assigner.py (100%) rename projects/EfficientDet/efficientdet/{ => tensorflow}/yxyx_bbox_coder.py (100%) diff --git a/projects/EfficientDet/README.md b/projects/EfficientDet/README.md index 7bc073f0df5..4de4d9700cb 100644 --- a/projects/EfficientDet/README.md +++ b/projects/EfficientDet/README.md @@ -22,6 +22,10 @@ In contrast to other feature pyramid network, such as FPN, FPN + PAN, NAS-FPN, B ## Usage +## Official TensorFlow Model + +This project also supports [official tensorflow model](https://github.com/google/automl), it uses 90 categories and yxyx box encoding in training. If you want to use the original model weight to get official results, please refer to the following steps. + ### Model conversion Firstly, download EfficientDet [weights](https://github.com/google/automl/tree/master/efficientdet) and unzip, please use the following command @@ -47,20 +51,40 @@ python projects/EfficientDet/convert_tf_to_pt.py --backbone {BACKBONE_NAME} --te In MMDetection's root directory, run the following command to test the model: ```bash -python tools/test.py projects/EfficientDet/configs/efficientdet_effb0_bifpn_8xb16-crop512-300e_coco.py ${CHECKPOINT_PATH} +python tools/test.py projects/EfficientDet/configs/tensorflow/efficientdet_effb0_bifpn_8xb16-crop512-300e_coco_tf.py ${CHECKPOINT_PATH} +``` + +## Reproduce Model + +For convenience, we recommend the current implementation version, it uses 80 categories and xyxy encoding in training. On this basis, a higher result was finally achieved. + +### Training commands + +In MMDetection's root directory, run the following command to train the model: + +```bash +python tools/train.py projects/EfficientDet/configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco.py +``` + +### Testing commands + +In MMDetection's root directory, run the following command to test the model: + +```bash +python tools/test.py projects/EfficientDet/configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco.py ${CHECKPOINT_PATH} ``` ## Results -Based on mmdetection, this project aligns the test accuracy of the [official model](https://github.com/google/automl). -
-If you want to reproduce the test results, you need to convert model weights first, then run the test command. -
-The training accuracy will also be aligned with the official in the future +Based on mmdetection, this project aligns the accuracy of the [official model](https://github.com/google/automl). + +| Method | Backbone | Pretrained Model | Training set | Test set | Epoch | Val Box AP | Official AP | Download | +| :------------------------------------------------------------------------------------------------------------------: | :-------------: | :--------------: | :------------: | :----------: | :---: | :--------: | :---------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| [efficientdet-d0\*](projects/EfficientDet/configs/tensorflow/efficientdet_effb0_bifpn_8xb16-crop512-300e_coco_tf.py) | efficientnet-b0 | ImageNet | COCO2017 Train | COCO2017 Val | 300 | 34.4 | 34.3 | | +| [efficientdet-d3](projects/EfficientDet/configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco.py) | efficientnet-b3 | ImageNet | COCO2017 Train | COCO2017 Val | 300 | 47.2 | 46.8 | [model](https://download.openmmlab.com/mmdetection/v3.0/efficientdet/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco_20230223_122457-e6f7a833.pth) \| [log](https://download.openmmlab.com/mmdetection/v3.0/efficientdet/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco_20230223_122457.log.json) | -| Method | Backbone | Pretrained Model | Training set | Test set | Epoch | Val Box AP | Official AP | -| :------------------------------------------------------------------------------: | :-------------: | :--------------: | :------------: | :----------: | :---: | :--------: | :---------: | -| [efficientdet-d0](./configs/efficientdet_effb0_bifpn_8xb16-crop512-300e_coco.py) | efficientnet-b0 | ImageNet | COCO2017 Train | COCO2017 Val | 300 | 34.4 | 34.3 | +**Note**: +\*means use [official tensorflow model](https://github.com/google/automl) weights to test. ## Citation @@ -99,9 +123,9 @@ A project does not necessarily have to be finished in a single PR, but it's esse -- [ ] Milestone 2: Indicates a successful model implementation. +- [x] Milestone 2: Indicates a successful model implementation. - - [ ] Training-time correctness + - [x] Training-time correctness diff --git a/projects/EfficientDet/configs/efficientdet_effb0_bifpn_8xb16-crop512-300e_coco.py b/projects/EfficientDet/configs/efficientdet_effb0_bifpn_8xb16-crop512-300e_coco.py new file mode 100644 index 00000000000..8ccbc85a479 --- /dev/null +++ b/projects/EfficientDet/configs/efficientdet_effb0_bifpn_8xb16-crop512-300e_coco.py @@ -0,0 +1,175 @@ +_base_ = [ + 'mmdet::_base_/datasets/coco_detection.py', + 'mmdet::_base_/schedules/schedule_1x.py', + 'mmdet::_base_/default_runtime.py' +] +custom_imports = dict( + imports=['projects.EfficientDet.efficientdet'], allow_failed_imports=False) + +image_size = 512 +batch_augments = [ + dict(type='BatchFixedSizePad', size=(image_size, image_size)) +] +dataset_type = 'CocoDataset' +evalute_type = 'CocoMetric' +norm_cfg = dict(type='SyncBN', requires_grad=True, eps=1e-3, momentum=0.01) +checkpoint = 'https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b0_3rdparty_8xb32-aa-advprop_in1k_20220119-26434485.pth' # noqa +model = dict( + type='EfficientDet', + data_preprocessor=dict( + type='DetDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_size_divisor=image_size, + batch_augments=batch_augments), + backbone=dict( + type='EfficientNet', + arch='b0', + drop_path_rate=0.2, + out_indices=(3, 4, 5), + frozen_stages=0, + conv_cfg=dict(type='Conv2dSamePadding'), + norm_cfg=norm_cfg, + norm_eval=False, + init_cfg=dict( + type='Pretrained', prefix='backbone', checkpoint=checkpoint)), + neck=dict( + type='BiFPN', + num_stages=3, + in_channels=[40, 112, 320], + out_channels=64, + start_level=0, + norm_cfg=norm_cfg), + bbox_head=dict( + type='EfficientDetSepBNHead', + num_classes=80, + num_ins=5, + in_channels=64, + feat_channels=64, + stacked_convs=3, + norm_cfg=norm_cfg, + anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=4, + scales_per_octave=3, + ratios=[1.0, 0.5, 2.0], + strides=[8, 16, 32, 64, 128], + center_offset=0.5), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=1.5, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='HuberLoss', beta=0.1, loss_weight=50)), + # training and testing settings + train_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0, + ignore_iof_thr=-1), + sampler=dict( + type='PseudoSampler'), # Focal loss should use PseudoSampler + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict( + type='soft_nms', + iou_threshold=0.3, + sigma=0.5, + min_score=1e-3, + method='gaussian'), + max_per_img=100)) + +# dataset settings +train_pipeline = [ + dict( + type='LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='RandomResize', + scale=(image_size, image_size), + ratio_range=(0.1, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=(image_size, image_size)), + dict(type='RandomFlip', prob=0.5), + dict(type='PackDetInputs') +] +test_pipeline = [ + dict( + type='LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='Resize', scale=(image_size, image_size), keep_ratio=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] + +train_dataloader = dict( + batch_size=16, + num_workers=8, + dataset=dict(type=dataset_type, pipeline=train_pipeline)) +val_dataloader = dict(dataset=dict(type=dataset_type, pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type=evalute_type) +test_evaluator = val_evaluator + +optim_wrapper = dict( + optimizer=dict(lr=0.16, weight_decay=4e-5), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True), + clip_grad=dict(max_norm=10, norm_type=2)) + +# learning policy +max_epochs = 300 +param_scheduler = [ + dict(type='LinearLR', start_factor=0.1, by_epoch=False, begin=0, end=917), + dict( + type='CosineAnnealingLR', + eta_min=0.0, + begin=1, + T_max=299, + end=300, + by_epoch=True, + convert_to_iter_based=True) +] +train_cfg = dict(max_epochs=max_epochs, val_interval=1) + +vis_backends = [ + dict(type='LocalVisBackend'), + dict(type='TensorboardVisBackend') +] +visualizer = dict( + type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') + +default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=15)) +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49) +] +# cudnn_benchmark=True can accelerate fix-size training +env_cfg = dict(cudnn_benchmark=True) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (16 samples per GPU) +auto_scale_lr = dict(base_batch_size=128) diff --git a/projects/EfficientDet/configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco-90cls.py b/projects/EfficientDet/configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco-90cls.py new file mode 100644 index 00000000000..e1ff4d7d147 --- /dev/null +++ b/projects/EfficientDet/configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco-90cls.py @@ -0,0 +1,175 @@ +_base_ = [ + 'mmdet::_base_/datasets/coco_detection.py', + 'mmdet::_base_/schedules/schedule_1x.py', + 'mmdet::_base_/default_runtime.py' +] +custom_imports = dict( + imports=['projects.EfficientDet.efficientdet'], allow_failed_imports=False) + +image_size = 896 +batch_augments = [ + dict(type='BatchFixedSizePad', size=(image_size, image_size)) +] +dataset_type = 'Coco90Dataset' +evalute_type = 'Coco90Metric' +norm_cfg = dict(type='SyncBN', requires_grad=True, eps=1e-3, momentum=0.01) +checkpoint = 'https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b3_3rdparty_8xb32-aa-advprop_in1k_20220119-53b41118.pth' # noqa +model = dict( + type='EfficientDet', + data_preprocessor=dict( + type='DetDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_size_divisor=image_size, + batch_augments=batch_augments), + backbone=dict( + type='EfficientNet', + arch='b3', + drop_path_rate=0.3, + out_indices=(3, 4, 5), + frozen_stages=0, + conv_cfg=dict(type='Conv2dSamePadding'), + norm_cfg=norm_cfg, + norm_eval=False, + init_cfg=dict( + type='Pretrained', prefix='backbone', checkpoint=checkpoint)), + neck=dict( + type='BiFPN', + num_stages=6, + in_channels=[48, 136, 384], + out_channels=160, + start_level=0, + norm_cfg=norm_cfg), + bbox_head=dict( + type='EfficientDetSepBNHead', + num_classes=90, + num_ins=5, + in_channels=160, + feat_channels=160, + stacked_convs=4, + norm_cfg=norm_cfg, + anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=4, + scales_per_octave=3, + ratios=[1.0, 0.5, 2.0], + strides=[8, 16, 32, 64, 128], + center_offset=0.5), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=1.5, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='HuberLoss', beta=0.1, loss_weight=50)), + # training and testing settings + train_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0, + ignore_iof_thr=-1), + sampler=dict( + type='PseudoSampler'), # Focal loss should use PseudoSampler + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict( + type='soft_nms', + iou_threshold=0.3, + sigma=0.5, + min_score=1e-3, + method='gaussian'), + max_per_img=100)) + +# dataset settings +train_pipeline = [ + dict( + type='LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='RandomResize', + scale=(image_size, image_size), + ratio_range=(0.1, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=(image_size, image_size)), + dict(type='RandomFlip', prob=0.5), + dict(type='PackDetInputs') +] +test_pipeline = [ + dict( + type='LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='Resize', scale=(image_size, image_size), keep_ratio=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] + +train_dataloader = dict( + batch_size=16, + num_workers=8, + dataset=dict(type=dataset_type, pipeline=train_pipeline)) +val_dataloader = dict(dataset=dict(type=dataset_type, pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type=evalute_type) +test_evaluator = val_evaluator + +optim_wrapper = dict( + optimizer=dict(lr=0.16, weight_decay=4e-5), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True), + clip_grad=dict(max_norm=10, norm_type=2)) + +# learning policy +max_epochs = 300 +param_scheduler = [ + dict(type='LinearLR', start_factor=0.1, by_epoch=False, begin=0, end=917), + dict( + type='CosineAnnealingLR', + eta_min=0.0, + begin=1, + T_max=299, + end=300, + by_epoch=True, + convert_to_iter_based=True) +] +train_cfg = dict(max_epochs=max_epochs, val_interval=1) + +vis_backends = [ + dict(type='LocalVisBackend'), + dict(type='TensorboardVisBackend') +] +visualizer = dict( + type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') + +default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=15)) +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49) +] +# cudnn_benchmark=True can accelerate fix-size training +env_cfg = dict(cudnn_benchmark=True) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (16 samples per GPU) +auto_scale_lr = dict(base_batch_size=128) diff --git a/projects/EfficientDet/configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco.py b/projects/EfficientDet/configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco.py new file mode 100644 index 00000000000..5d9a6b6fe93 --- /dev/null +++ b/projects/EfficientDet/configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco.py @@ -0,0 +1,175 @@ +_base_ = [ + 'mmdet::_base_/datasets/coco_detection.py', + 'mmdet::_base_/schedules/schedule_1x.py', + 'mmdet::_base_/default_runtime.py' +] +custom_imports = dict( + imports=['projects.EfficientDet.efficientdet'], allow_failed_imports=False) + +image_size = 896 +batch_augments = [ + dict(type='BatchFixedSizePad', size=(image_size, image_size)) +] +dataset_type = 'CocoDataset' +evalute_type = 'CocoMetric' +norm_cfg = dict(type='SyncBN', requires_grad=True, eps=1e-3, momentum=0.01) +checkpoint = 'https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b3_3rdparty_8xb32-aa-advprop_in1k_20220119-53b41118.pth' # noqa +model = dict( + type='EfficientDet', + data_preprocessor=dict( + type='DetDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_size_divisor=image_size, + batch_augments=batch_augments), + backbone=dict( + type='EfficientNet', + arch='b3', + drop_path_rate=0.3, + out_indices=(3, 4, 5), + frozen_stages=0, + conv_cfg=dict(type='Conv2dSamePadding'), + norm_cfg=norm_cfg, + norm_eval=False, + init_cfg=dict( + type='Pretrained', prefix='backbone', checkpoint=checkpoint)), + neck=dict( + type='BiFPN', + num_stages=6, + in_channels=[48, 136, 384], + out_channels=160, + start_level=0, + norm_cfg=norm_cfg), + bbox_head=dict( + type='EfficientDetSepBNHead', + num_classes=80, + num_ins=5, + in_channels=160, + feat_channels=160, + stacked_convs=4, + norm_cfg=norm_cfg, + anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=4, + scales_per_octave=3, + ratios=[1.0, 0.5, 2.0], + strides=[8, 16, 32, 64, 128], + center_offset=0.5), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=1.5, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='HuberLoss', beta=0.1, loss_weight=50)), + # training and testing settings + train_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0, + ignore_iof_thr=-1), + sampler=dict( + type='PseudoSampler'), # Focal loss should use PseudoSampler + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict( + type='soft_nms', + iou_threshold=0.3, + sigma=0.5, + min_score=1e-3, + method='gaussian'), + max_per_img=100)) + +# dataset settings +train_pipeline = [ + dict( + type='LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='RandomResize', + scale=(image_size, image_size), + ratio_range=(0.1, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=(image_size, image_size)), + dict(type='RandomFlip', prob=0.5), + dict(type='PackDetInputs') +] +test_pipeline = [ + dict( + type='LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='Resize', scale=(image_size, image_size), keep_ratio=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] + +train_dataloader = dict( + batch_size=16, + num_workers=8, + dataset=dict(type=dataset_type, pipeline=train_pipeline)) +val_dataloader = dict(dataset=dict(type=dataset_type, pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type=evalute_type) +test_evaluator = val_evaluator + +optim_wrapper = dict( + optimizer=dict(lr=0.16, weight_decay=4e-5), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True), + clip_grad=dict(max_norm=10, norm_type=2)) + +# learning policy +max_epochs = 300 +param_scheduler = [ + dict(type='LinearLR', start_factor=0.1, by_epoch=False, begin=0, end=917), + dict( + type='CosineAnnealingLR', + eta_min=0.0, + begin=1, + T_max=299, + end=300, + by_epoch=True, + convert_to_iter_based=True) +] +train_cfg = dict(max_epochs=max_epochs, val_interval=1) + +vis_backends = [ + dict(type='LocalVisBackend'), + dict(type='TensorboardVisBackend') +] +visualizer = dict( + type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') + +default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=15)) +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49) +] +# cudnn_benchmark=True can accelerate fix-size training +env_cfg = dict(cudnn_benchmark=True) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (16 samples per GPU) +auto_scale_lr = dict(base_batch_size=128) diff --git a/projects/EfficientDet/configs/efficientdet_effb0_bifpn_16xb8-crop512-300e_coco.py b/projects/EfficientDet/configs/tensorflow/efficientdet_effb0_bifpn_8xb16-crop512-300e_coco_tf.py similarity index 88% rename from projects/EfficientDet/configs/efficientdet_effb0_bifpn_16xb8-crop512-300e_coco.py rename to projects/EfficientDet/configs/tensorflow/efficientdet_effb0_bifpn_8xb16-crop512-300e_coco_tf.py index 080b7963b95..00200cdf718 100644 --- a/projects/EfficientDet/configs/efficientdet_effb0_bifpn_16xb8-crop512-300e_coco.py +++ b/projects/EfficientDet/configs/tensorflow/efficientdet_effb0_bifpn_8xb16-crop512-300e_coco_tf.py @@ -7,11 +7,11 @@ imports=['projects.EfficientDet.efficientdet'], allow_failed_imports=False) image_size = 512 -dataset_type = 'Coco90Dataset' -evalute_type = 'Coco90Metric' batch_augments = [ dict(type='BatchFixedSizePad', size=(image_size, image_size)) ] +dataset_type = 'Coco90Dataset' +evalute_type = 'Coco90Metric' norm_cfg = dict(type='SyncBN', requires_grad=True, eps=1e-3, momentum=0.01) checkpoint = 'https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b0_3rdparty_8xb32-aa-advprop_in1k_20220119-26434485.pth' # noqa model = dict( @@ -29,6 +29,7 @@ drop_path_rate=0.2, out_indices=(3, 4, 5), frozen_stages=0, + conv_cfg=dict(type='Conv2dSamePadding'), norm_cfg=norm_cfg, norm_eval=False, init_cfg=dict( @@ -62,10 +63,10 @@ loss_cls=dict( type='FocalLoss', use_sigmoid=True, - gamma=2.0, + gamma=1.5, alpha=0.25, loss_weight=1.0), - loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)), + loss_bbox=dict(type='HuberLoss', beta=0.1, loss_weight=50)), # training and testing settings train_cfg=dict( assigner=dict( @@ -120,7 +121,7 @@ train_dataloader = dict( batch_size=16, - num_workers=16, + num_workers=8, dataset=dict(type=dataset_type, pipeline=train_pipeline)) val_dataloader = dict(dataset=dict(type=dataset_type, pipeline=test_pipeline)) test_dataloader = val_dataloader @@ -129,8 +130,10 @@ test_evaluator = val_evaluator optim_wrapper = dict( - optimizer=dict(lr=0.16), - paramwise_cfg=dict(norm_decay_mult=0, bypass_duplicate=True)) + optimizer=dict(lr=0.16, weight_decay=4e-5), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True), + clip_grad=dict(max_norm=10, norm_type=2)) # learning policy max_epochs = 300 @@ -138,10 +141,10 @@ dict(type='LinearLR', start_factor=0.1, by_epoch=False, begin=0, end=917), dict( type='CosineAnnealingLR', - eta_min=0.0016, + eta_min=0.0, begin=1, - T_max=284, - end=285, + T_max=299, + end=300, by_epoch=True, convert_to_iter_based=True) ] @@ -155,10 +158,18 @@ type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=15)) +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49) +] # cudnn_benchmark=True can accelerate fix-size training env_cfg = dict(cudnn_benchmark=True) # NOTE: `auto_scale_lr` is for automatically scaling LR, # USER SHOULD NOT CHANGE ITS VALUES. -# base_batch_size = (8 GPUs) x (32 samples per GPU) +# base_batch_size = (8 GPUs) x (16 samples per GPU) auto_scale_lr = dict(base_batch_size=128) diff --git a/projects/EfficientDet/convert_tf_to_pt.py b/projects/EfficientDet/convert_tf_to_pt.py index 6132a6ba241..f3b127f2aaf 100644 --- a/projects/EfficientDet/convert_tf_to_pt.py +++ b/projects/EfficientDet/convert_tf_to_pt.py @@ -164,8 +164,8 @@ def convert_key(model_name, bifpn_repeats, weights): elif seg[0] == 'resample_p6': prefix = 'neck.bifpn.0.p5_to_p6.0' mapping = { - 'conv2d/kernel': 'down_conv.conv.weight', - 'conv2d/bias': 'down_conv.conv.bias', + 'conv2d/kernel': 'down_conv.weight', + 'conv2d/bias': 'down_conv.bias', 'bn/beta': 'bn.bias', 'bn/gamma': 'bn.weight', 'bn/moving_mean': 'bn.running_mean', @@ -180,11 +180,11 @@ def convert_key(model_name, bifpn_repeats, weights): if fnode_id == 0: mapping = { 'op_after_combine5/conv/depthwise_kernel': - 'conv6_up.depthwise_conv.conv.weight', + 'conv6_up.depthwise_conv.weight', 'op_after_combine5/conv/pointwise_kernel': - 'conv6_up.pointwise_conv.conv.weight', + 'conv6_up.pointwise_conv.weight', 'op_after_combine5/conv/bias': - 'conv6_up.pointwise_conv.conv.bias', + 'conv6_up.pointwise_conv.bias', 'op_after_combine5/bn/beta': 'conv6_up.bn.bias', 'op_after_combine5/bn/gamma': @@ -208,11 +208,11 @@ def convert_key(model_name, bifpn_repeats, weights): elif fnode_id == 1: base_mapping = { 'op_after_combine6/conv/depthwise_kernel': - 'conv5_up.depthwise_conv.conv.weight', + 'conv5_up.depthwise_conv.weight', 'op_after_combine6/conv/pointwise_kernel': - 'conv5_up.pointwise_conv.conv.weight', + 'conv5_up.pointwise_conv.weight', 'op_after_combine6/conv/bias': - 'conv5_up.pointwise_conv.conv.bias', + 'conv5_up.pointwise_conv.bias', 'op_after_combine6/bn/beta': 'conv5_up.bn.bias', 'op_after_combine6/bn/gamma': @@ -225,9 +225,9 @@ def convert_key(model_name, bifpn_repeats, weights): if fpn_idx == 0: mapping = { 'resample_0_2_6/conv2d/kernel': - 'p5_down_channel.down_conv.conv.weight', + 'p5_down_channel.down_conv.weight', 'resample_0_2_6/conv2d/bias': - 'p5_down_channel.down_conv.conv.bias', + 'p5_down_channel.down_conv.bias', 'resample_0_2_6/bn/beta': 'p5_down_channel.bn.bias', 'resample_0_2_6/bn/gamma': @@ -252,11 +252,11 @@ def convert_key(model_name, bifpn_repeats, weights): elif fnode_id == 2: base_mapping = { 'op_after_combine7/conv/depthwise_kernel': - 'conv4_up.depthwise_conv.conv.weight', + 'conv4_up.depthwise_conv.weight', 'op_after_combine7/conv/pointwise_kernel': - 'conv4_up.pointwise_conv.conv.weight', + 'conv4_up.pointwise_conv.weight', 'op_after_combine7/conv/bias': - 'conv4_up.pointwise_conv.conv.bias', + 'conv4_up.pointwise_conv.bias', 'op_after_combine7/bn/beta': 'conv4_up.bn.bias', 'op_after_combine7/bn/gamma': @@ -269,9 +269,9 @@ def convert_key(model_name, bifpn_repeats, weights): if fpn_idx == 0: mapping = { 'resample_0_1_7/conv2d/kernel': - 'p4_down_channel.down_conv.conv.weight', + 'p4_down_channel.down_conv.weight', 'resample_0_1_7/conv2d/bias': - 'p4_down_channel.down_conv.conv.bias', + 'p4_down_channel.down_conv.bias', 'resample_0_1_7/bn/beta': 'p4_down_channel.bn.bias', 'resample_0_1_7/bn/gamma': @@ -297,11 +297,11 @@ def convert_key(model_name, bifpn_repeats, weights): base_mapping = { 'op_after_combine8/conv/depthwise_kernel': - 'conv3_up.depthwise_conv.conv.weight', + 'conv3_up.depthwise_conv.weight', 'op_after_combine8/conv/pointwise_kernel': - 'conv3_up.pointwise_conv.conv.weight', + 'conv3_up.pointwise_conv.weight', 'op_after_combine8/conv/bias': - 'conv3_up.pointwise_conv.conv.bias', + 'conv3_up.pointwise_conv.bias', 'op_after_combine8/bn/beta': 'conv3_up.bn.bias', 'op_after_combine8/bn/gamma': @@ -314,9 +314,9 @@ def convert_key(model_name, bifpn_repeats, weights): if fpn_idx == 0: mapping = { 'resample_0_0_8/conv2d/kernel': - 'p3_down_channel.down_conv.conv.weight', + 'p3_down_channel.down_conv.weight', 'resample_0_0_8/conv2d/bias': - 'p3_down_channel.down_conv.conv.bias', + 'p3_down_channel.down_conv.bias', 'resample_0_0_8/bn/beta': 'p3_down_channel.bn.bias', 'resample_0_0_8/bn/gamma': @@ -341,11 +341,11 @@ def convert_key(model_name, bifpn_repeats, weights): elif fnode_id == 4: base_mapping = { 'op_after_combine9/conv/depthwise_kernel': - 'conv4_down.depthwise_conv.conv.weight', + 'conv4_down.depthwise_conv.weight', 'op_after_combine9/conv/pointwise_kernel': - 'conv4_down.pointwise_conv.conv.weight', + 'conv4_down.pointwise_conv.weight', 'op_after_combine9/conv/bias': - 'conv4_down.pointwise_conv.conv.bias', + 'conv4_down.pointwise_conv.bias', 'op_after_combine9/bn/beta': 'conv4_down.bn.bias', 'op_after_combine9/bn/gamma': @@ -358,9 +358,9 @@ def convert_key(model_name, bifpn_repeats, weights): if fpn_idx == 0: mapping = { 'resample_0_1_9/conv2d/kernel': - 'p4_level_connection.down_conv.conv.weight', + 'p4_level_connection.down_conv.weight', 'resample_0_1_9/conv2d/bias': - 'p4_level_connection.down_conv.conv.bias', + 'p4_level_connection.down_conv.bias', 'resample_0_1_9/bn/beta': 'p4_level_connection.bn.bias', 'resample_0_1_9/bn/gamma': @@ -387,11 +387,11 @@ def convert_key(model_name, bifpn_repeats, weights): elif fnode_id == 5: base_mapping = { 'op_after_combine10/conv/depthwise_kernel': - 'conv5_down.depthwise_conv.conv.weight', + 'conv5_down.depthwise_conv.weight', 'op_after_combine10/conv/pointwise_kernel': - 'conv5_down.pointwise_conv.conv.weight', + 'conv5_down.pointwise_conv.weight', 'op_after_combine10/conv/bias': - 'conv5_down.pointwise_conv.conv.bias', + 'conv5_down.pointwise_conv.bias', 'op_after_combine10/bn/beta': 'conv5_down.bn.bias', 'op_after_combine10/bn/gamma': @@ -404,9 +404,9 @@ def convert_key(model_name, bifpn_repeats, weights): if fpn_idx == 0: mapping = { 'resample_0_2_10/conv2d/kernel': - 'p5_level_connection.down_conv.conv.weight', + 'p5_level_connection.down_conv.weight', 'resample_0_2_10/conv2d/bias': - 'p5_level_connection.down_conv.conv.bias', + 'p5_level_connection.down_conv.bias', 'resample_0_2_10/bn/beta': 'p5_level_connection.bn.bias', 'resample_0_2_10/bn/gamma': @@ -433,11 +433,11 @@ def convert_key(model_name, bifpn_repeats, weights): elif fnode_id == 6: base_mapping = { 'op_after_combine11/conv/depthwise_kernel': - 'conv6_down.depthwise_conv.conv.weight', + 'conv6_down.depthwise_conv.weight', 'op_after_combine11/conv/pointwise_kernel': - 'conv6_down.pointwise_conv.conv.weight', + 'conv6_down.pointwise_conv.weight', 'op_after_combine11/conv/bias': - 'conv6_down.pointwise_conv.conv.bias', + 'conv6_down.pointwise_conv.bias', 'op_after_combine11/bn/beta': 'conv6_down.bn.bias', 'op_after_combine11/bn/gamma': @@ -463,11 +463,11 @@ def convert_key(model_name, bifpn_repeats, weights): elif fnode_id == 7: base_mapping = { 'op_after_combine12/conv/depthwise_kernel': - 'conv7_down.depthwise_conv.conv.weight', + 'conv7_down.depthwise_conv.weight', 'op_after_combine12/conv/pointwise_kernel': - 'conv7_down.pointwise_conv.conv.weight', + 'conv7_down.pointwise_conv.weight', 'op_after_combine12/conv/bias': - 'conv7_down.pointwise_conv.conv.bias', + 'conv7_down.pointwise_conv.bias', 'op_after_combine12/bn/beta': 'conv7_down.bn.bias', 'op_after_combine12/bn/gamma': @@ -492,9 +492,9 @@ def convert_key(model_name, bifpn_repeats, weights): if 'box-predict' in seg[1]: prefix = '.'.join(['bbox_head', 'reg_header']) base_mapping = { - 'depthwise_kernel': 'depthwise_conv.conv.weight', - 'pointwise_kernel': 'pointwise_conv.conv.weight', - 'bias': 'pointwise_conv.conv.bias' + 'depthwise_kernel': 'depthwise_conv.weight', + 'pointwise_kernel': 'pointwise_conv.weight', + 'bias': 'pointwise_conv.bias' } suffix = base_mapping['/'.join(seg[2:])] if 'depthwise_conv' in suffix: @@ -522,9 +522,9 @@ def convert_key(model_name, bifpn_repeats, weights): ['bbox_head', 'reg_conv_list', str(bbox_conv_idx)]) base_mapping = { - 'depthwise_kernel': 'depthwise_conv.conv.weight', - 'pointwise_kernel': 'pointwise_conv.conv.weight', - 'bias': 'pointwise_conv.conv.bias' + 'depthwise_kernel': 'depthwise_conv.weight', + 'pointwise_kernel': 'pointwise_conv.weight', + 'bias': 'pointwise_conv.bias' } suffix = base_mapping['/'.join(seg[2:])] if 'depthwise_conv' in suffix: @@ -534,9 +534,9 @@ def convert_key(model_name, bifpn_repeats, weights): if 'class-predict' in seg[1]: prefix = '.'.join(['bbox_head', 'cls_header']) base_mapping = { - 'depthwise_kernel': 'depthwise_conv.conv.weight', - 'pointwise_kernel': 'pointwise_conv.conv.weight', - 'bias': 'pointwise_conv.conv.bias' + 'depthwise_kernel': 'depthwise_conv.weight', + 'pointwise_kernel': 'pointwise_conv.weight', + 'bias': 'pointwise_conv.bias' } suffix = base_mapping['/'.join(seg[2:])] if 'depthwise_conv' in suffix: @@ -564,9 +564,9 @@ def convert_key(model_name, bifpn_repeats, weights): ['bbox_head', 'cls_conv_list', str(cls_conv_idx)]) base_mapping = { - 'depthwise_kernel': 'depthwise_conv.conv.weight', - 'pointwise_kernel': 'pointwise_conv.conv.weight', - 'bias': 'pointwise_conv.conv.bias' + 'depthwise_kernel': 'depthwise_conv.weight', + 'pointwise_kernel': 'pointwise_conv.weight', + 'bias': 'pointwise_conv.bias' } suffix = base_mapping['/'.join(seg[2:])] if 'depthwise_conv' in suffix: @@ -616,7 +616,6 @@ def main(): n: torch.as_tensor(tf2pth(reader.get_tensor(n))) for (n, _) in reader.get_variable_to_shape_map().items() } - print(weights.keys()) bifpn_repeats = repeat_map[int(model_name[14])] out = convert_key(model_name, bifpn_repeats, weights) result = {'state_dict': out} diff --git a/projects/EfficientDet/efficientdet/__init__.py b/projects/EfficientDet/efficientdet/__init__.py index dca95d53a35..b6c66bcc353 100644 --- a/projects/EfficientDet/efficientdet/__init__.py +++ b/projects/EfficientDet/efficientdet/__init__.py @@ -1,14 +1,16 @@ -from .anchor_generator import YXYXAnchorGenerator from .bifpn import BiFPN -from .coco_90class import Coco90Dataset -from .coco_90metric import Coco90Metric from .efficientdet import EfficientDet from .efficientdet_head import EfficientDetSepBNHead -from .trans_max_iou_assigner import TransMaxIoUAssigner -from .yxyx_bbox_coder import YXYXDeltaXYWHBBoxCoder +from .huber_loss import HuberLoss +from .tensorflow.anchor_generator import YXYXAnchorGenerator +from .tensorflow.coco_90class import Coco90Dataset +from .tensorflow.coco_90metric import Coco90Metric +from .tensorflow.trans_max_iou_assigner import TransMaxIoUAssigner +from .tensorflow.yxyx_bbox_coder import YXYXDeltaXYWHBBoxCoder +from .utils import Conv2dSamePadding __all__ = [ - 'EfficientDet', 'BiFPN', 'EfficientDetSepBNHead', 'YXYXAnchorGenerator', - 'YXYXDeltaXYWHBBoxCoder', 'Coco90Dataset', 'Coco90Metric', - 'TransMaxIoUAssigner' + 'EfficientDet', 'BiFPN', 'HuberLoss', 'EfficientDetSepBNHead', + 'Conv2dSamePadding', 'Coco90Dataset', 'Coco90Metric', + 'YXYXAnchorGenerator', 'TransMaxIoUAssigner', 'YXYXDeltaXYWHBBoxCoder' ] diff --git a/projects/EfficientDet/efficientdet/bifpn.py b/projects/EfficientDet/efficientdet/bifpn.py index 114af7b16c7..56356c3c555 100644 --- a/projects/EfficientDet/efficientdet/bifpn.py +++ b/projects/EfficientDet/efficientdet/bifpn.py @@ -1,5 +1,3 @@ -# Copyright (c) OpenMMLab. All rights reserved. -# Modified from https://github.com/zylo117/Yet-Another-EfficientDet-Pytorch from typing import List import torch @@ -9,21 +7,19 @@ from mmdet.registry import MODELS from mmdet.utils import MultiConfig, OptConfigType -from .utils import (DepthWiseConvBlock, DownChannelBlock, MaxPool2dSamePadding, - MemoryEfficientSwish) +from .utils import DepthWiseConvBlock, DownChannelBlock, MaxPool2dSamePadding class BiFPNStage(nn.Module): - ''' + """ in_channels: List[int], input dim for P3, P4, P5 out_channels: int, output dim for P2 - P7 first_time: int, whether is the first bifpnstage - num_outs: int, BiFPN need feature maps num - use_swish: whether use MemoryEfficientSwish + conv_bn_act_pattern: bool, whether use conv_bn_act_pattern norm_cfg: (:obj:`ConfigDict` or dict, optional): Config dict for normalization layer. epsilon: float, hyperparameter in fusion features - ''' + """ def __init__(self, in_channels: List[int], @@ -31,7 +27,6 @@ def __init__(self, first_time: bool = False, apply_bn_for_resampling: bool = True, conv_bn_act_pattern: bool = False, - use_meswish: bool = True, norm_cfg: OptConfigType = dict( type='BN', momentum=1e-2, eps=1e-3), epsilon: float = 1e-4) -> None: @@ -42,7 +37,6 @@ def __init__(self, self.first_time = first_time self.apply_bn_for_resampling = apply_bn_for_resampling self.conv_bn_act_pattern = conv_bn_act_pattern - self.use_meswish = use_meswish self.norm_cfg = norm_cfg self.epsilon = epsilon @@ -173,7 +167,7 @@ def __init__(self, torch.ones(2, dtype=torch.float32), requires_grad=True) self.p7_w2_relu = nn.ReLU() - self.swish = MemoryEfficientSwish() if use_meswish else Swish() + self.swish = Swish() def combine(self, x): if not self.conv_bn_act_pattern: @@ -268,7 +262,7 @@ def forward(self, x): @MODELS.register_module() class BiFPN(BaseModule): - ''' + """ num_stages: int, bifpn number of repeats in_channels: List[int], input dim for P3, P4, P5 out_channels: int, output dim for P2 - P7 @@ -276,11 +270,10 @@ class BiFPN(BaseModule): epsilon: float, hyperparameter in fusion features apply_bn_for_resampling: bool, whether use bn after resampling conv_bn_act_pattern: bool, whether use conv_bn_act_pattern - use_swish: whether use MemoryEfficientSwish norm_cfg: (:obj:`ConfigDict` or dict, optional): Config dict for normalization layer. init_cfg: MultiConfig: init method - ''' + """ def __init__(self, num_stages: int, @@ -290,11 +283,9 @@ def __init__(self, epsilon: float = 1e-4, apply_bn_for_resampling: bool = True, conv_bn_act_pattern: bool = False, - use_meswish: bool = True, norm_cfg: OptConfigType = dict( type='BN', momentum=1e-2, eps=1e-3), init_cfg: MultiConfig = None) -> None: - super().__init__(init_cfg=init_cfg) self.start_level = start_level self.bifpn = nn.Sequential(*[ @@ -304,7 +295,6 @@ def __init__(self, first_time=True if _ == 0 else False, apply_bn_for_resampling=apply_bn_for_resampling, conv_bn_act_pattern=conv_bn_act_pattern, - use_meswish=use_meswish, norm_cfg=norm_cfg, epsilon=epsilon) for _ in range(num_stages) ]) diff --git a/projects/EfficientDet/efficientdet/efficientdet_head.py b/projects/EfficientDet/efficientdet/efficientdet_head.py index 6ed6521d091..ae3efbe2c7d 100644 --- a/projects/EfficientDet/efficientdet/efficientdet_head.py +++ b/projects/EfficientDet/efficientdet/efficientdet_head.py @@ -1,30 +1,34 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Tuple +from typing import List, Tuple +import torch import torch.nn as nn -from mmcv.cnn.bricks import build_norm_layer +from mmcv.cnn.bricks import Swish, build_norm_layer from mmengine.model import bias_init_with_prob from torch import Tensor from mmdet.models.dense_heads.anchor_head import AnchorHead +from mmdet.models.utils import images_to_levels, multi_apply from mmdet.registry import MODELS -from mmdet.utils import OptConfigType, OptMultiConfig -from .utils import DepthWiseConvBlock, MemoryEfficientSwish +from mmdet.structures.bbox import cat_boxes, get_box_tensor +from mmdet.utils import (InstanceList, OptConfigType, OptInstanceList, + OptMultiConfig, reduce_mean) +from .utils import DepthWiseConvBlock @MODELS.register_module() class EfficientDetSepBNHead(AnchorHead): """EfficientDetHead with separate BN. - num_classes (int): Number of categories excluding the background - category. in_channels (int): Number of channels in the input feature map. - feat_channels (int): Number of hidden channels. stacked_convs (int): Number - of repetitions of conv norm_cfg (dict): Config dict for normalization - layer. anchor_generator (dict): Config dict for anchor generator bbox_coder - (dict): Config of bounding box coder. loss_cls (dict): Config of - classification loss. loss_bbox (dict): Config of localization loss. - train_cfg (dict): Training config of anchor head. test_cfg (dict): Testing - config of anchor head. init_cfg (dict or list[dict], optional): + num_classes (int): Number of categories num_ins (int): Number of the input + feature map. in_channels (int): Number of channels in the input feature + map. feat_channels (int): Number of hidden channels. stacked_convs (int): + Number of repetitions of conv norm_cfg (dict): Config dict for + normalization layer. anchor_generator (dict): Config dict for anchor + generator bbox_coder (dict): Config of bounding box coder. loss_cls (dict): + Config of classification loss. loss_bbox (dict): Config of localization + loss. train_cfg (dict): Training config of anchor head. test_cfg (dict): + Testing config of anchor head. init_cfg (dict or list[dict], optional): Initialization config dict. """ @@ -83,17 +87,17 @@ def _init_layers(self) -> None: apply_norm=False) self.reg_header = DepthWiseConvBlock( self.in_channels, self.num_base_priors * 4, apply_norm=False) - self.swish = MemoryEfficientSwish() + self.swish = Swish() def init_weights(self) -> None: """Initialize weights of the head.""" for m in self.reg_conv_list: - nn.init.constant_(m.pointwise_conv.conv.bias, 0.0) + nn.init.constant_(m.pointwise_conv.bias, 0.0) for m in self.cls_conv_list: - nn.init.constant_(m.pointwise_conv.conv.bias, 0.0) + nn.init.constant_(m.pointwise_conv.bias, 0.0) bias_cls = bias_init_with_prob(0.01) - nn.init.constant_(self.cls_header.pointwise_conv.conv.bias, bias_cls) - nn.init.constant_(self.reg_header.pointwise_conv.conv.bias, 0.0) + nn.init.constant_(self.cls_header.pointwise_conv.bias, bias_cls) + nn.init.constant_(self.reg_header.pointwise_conv.bias, 0.0) def forward_single_bbox(self, feat: Tensor, level_id: int, i: int) -> Tensor: @@ -134,3 +138,124 @@ def forward(self, feats: Tuple[Tensor]) -> tuple: cls_scores.append(cls_score) return cls_scores, bbox_preds + + def loss_by_feat( + self, + cls_scores: List[Tensor], + bbox_preds: List[Tensor], + batch_gt_instances: InstanceList, + batch_img_metas: List[dict], + batch_gt_instances_ignore: OptInstanceList = None) -> dict: + """Calculate the loss based on the features extracted by the detection + head. + + Args: + cls_scores (list[Tensor]): Box scores for each scale level + has shape (N, num_anchors * num_classes, H, W). + bbox_preds (list[Tensor]): Box energies / deltas for each scale + level with shape (N, num_anchors * 4, H, W). + batch_gt_instances (list[:obj:`InstanceData`]): Batch of + gt_instance. It usually includes ``bboxes`` and ``labels`` + attributes. + batch_img_metas (list[dict]): Meta information of each image, e.g., + image size, scaling factor, etc. + batch_gt_instances_ignore (list[:obj:`InstanceData`], optional): + Batch of gt_instances_ignore. It includes ``bboxes`` attribute + data that is ignored during training and testing. + Defaults to None. + + Returns: + dict: A dictionary of loss components. + """ + featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] + assert len(featmap_sizes) == self.prior_generator.num_levels + + device = cls_scores[0].device + + anchor_list, valid_flag_list = self.get_anchors( + featmap_sizes, batch_img_metas, device=device) + cls_reg_targets = self.get_targets( + anchor_list, + valid_flag_list, + batch_gt_instances, + batch_img_metas, + batch_gt_instances_ignore=batch_gt_instances_ignore) + (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, + avg_factor) = cls_reg_targets + + # anchor number of multi levels + num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] + # concat all level anchors and flags to a single tensor + concat_anchor_list = [] + for i in range(len(anchor_list)): + concat_anchor_list.append(cat_boxes(anchor_list[i])) + all_anchor_list = images_to_levels(concat_anchor_list, + num_level_anchors) + + avg_factor = reduce_mean( + torch.tensor(avg_factor, dtype=torch.float, device=device)).item() + avg_factor = max(avg_factor, 1.0) + losses_cls, losses_bbox = multi_apply( + self.loss_by_feat_single, + cls_scores, + bbox_preds, + all_anchor_list, + labels_list, + label_weights_list, + bbox_targets_list, + bbox_weights_list, + avg_factor=avg_factor) + return dict(loss_cls=losses_cls, loss_bbox=losses_bbox) + + def loss_by_feat_single(self, cls_score: Tensor, bbox_pred: Tensor, + anchors: Tensor, labels: Tensor, + label_weights: Tensor, bbox_targets: Tensor, + bbox_weights: Tensor, avg_factor: int) -> tuple: + """Calculate the loss of a single scale level based on the features + extracted by the detection head. + + Args: + cls_score (Tensor): Box scores for each scale level + Has shape (N, num_anchors * num_classes, H, W). + bbox_pred (Tensor): Box energies / deltas for each scale + level with shape (N, num_anchors * 4, H, W). + anchors (Tensor): Box reference for each scale level with shape + (N, num_total_anchors, 4). + labels (Tensor): Labels of each anchors with shape + (N, num_total_anchors). + label_weights (Tensor): Label weights of each anchor with shape + (N, num_total_anchors) + bbox_targets (Tensor): BBox regression targets of each anchor + weight shape (N, num_total_anchors, 4). + bbox_weights (Tensor): BBox regression loss weights of each anchor + with shape (N, num_total_anchors, 4). + avg_factor (int): Average factor that is used to average the loss. + + Returns: + tuple: loss components. + """ + + # classification loss + labels = labels.reshape(-1) + label_weights = label_weights.reshape(-1) + cls_score = cls_score.permute(0, 2, 3, + 1).reshape(-1, self.cls_out_channels) + loss_cls = self.loss_cls( + cls_score, labels, label_weights, avg_factor=avg_factor) + # regression loss + target_dim = bbox_targets.size(-1) + bbox_targets = bbox_targets.reshape(-1, target_dim) + bbox_weights = bbox_weights.reshape(-1, target_dim) + bbox_pred = bbox_pred.permute(0, 2, 3, + 1).reshape(-1, + self.bbox_coder.encode_size) + if self.reg_decoded_bbox: + # When the regression loss (e.g. `IouLoss`, `GIouLoss`) + # is applied directly on the decoded bounding boxes, it + # decodes the already encoded coordinates to absolute format. + anchors = anchors.reshape(-1, anchors.size(-1)) + bbox_pred = self.bbox_coder.decode(anchors, bbox_pred) + bbox_pred = get_box_tensor(bbox_pred) + loss_bbox = self.loss_bbox( + bbox_pred, bbox_targets, bbox_weights, avg_factor=avg_factor * 4) + return loss_cls, loss_bbox diff --git a/projects/EfficientDet/efficientdet/huber_loss.py b/projects/EfficientDet/efficientdet/huber_loss.py new file mode 100644 index 00000000000..091963fa9d6 --- /dev/null +++ b/projects/EfficientDet/efficientdet/huber_loss.py @@ -0,0 +1,91 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Optional + +import torch +import torch.nn as nn +from torch import Tensor + +from mmdet.models.losses.utils import weighted_loss +from mmdet.registry import MODELS + + +@weighted_loss +def huber_loss(pred: Tensor, target: Tensor, beta: float = 1.0) -> Tensor: + """Huber loss. + + Args: + pred (Tensor): The prediction. + target (Tensor): The learning target of the prediction. + beta (float, optional): The threshold in the piecewise function. + Defaults to 1.0. + + Returns: + Tensor: Calculated loss + """ + assert beta > 0 + if target.numel() == 0: + return pred.sum() * 0 + + assert pred.size() == target.size() + diff = torch.abs(pred - target) + loss = torch.where(diff < beta, 0.5 * diff * diff, + beta * diff - 0.5 * beta * beta) + return loss + + +@MODELS.register_module() +class HuberLoss(nn.Module): + """Huber loss. + + Args: + beta (float, optional): The threshold in the piecewise function. + Defaults to 1.0. + reduction (str, optional): The method to reduce the loss. + Options are "none", "mean" and "sum". Defaults to "mean". + loss_weight (float, optional): The weight of loss. + """ + + def __init__(self, + beta: float = 1.0, + reduction: str = 'mean', + loss_weight: float = 1.0) -> None: + super().__init__() + self.beta = beta + self.reduction = reduction + self.loss_weight = loss_weight + + def forward(self, + pred: Tensor, + target: Tensor, + weight: Optional[Tensor] = None, + avg_factor: Optional[int] = None, + reduction_override: Optional[str] = None, + **kwargs) -> Tensor: + """Forward function. + + Args: + pred (Tensor): The prediction. + target (Tensor): The learning target of the prediction. + weight (Tensor, optional): The weight of loss for each + prediction. Defaults to None. + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + reduction_override (str, optional): The reduction method used to + override the original reduction method of the loss. + Defaults to None. + + Returns: + Tensor: Calculated loss + """ + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + loss_bbox = self.loss_weight * huber_loss( + pred, + target, + weight, + beta=self.beta, + reduction=reduction, + avg_factor=avg_factor, + **kwargs) + return loss_bbox diff --git a/projects/EfficientDet/efficientdet/anchor_generator.py b/projects/EfficientDet/efficientdet/tensorflow/anchor_generator.py similarity index 100% rename from projects/EfficientDet/efficientdet/anchor_generator.py rename to projects/EfficientDet/efficientdet/tensorflow/anchor_generator.py diff --git a/projects/EfficientDet/efficientdet/api_wrappers/__init__.py b/projects/EfficientDet/efficientdet/tensorflow/api_wrappers/__init__.py similarity index 100% rename from projects/EfficientDet/efficientdet/api_wrappers/__init__.py rename to projects/EfficientDet/efficientdet/tensorflow/api_wrappers/__init__.py diff --git a/projects/EfficientDet/efficientdet/api_wrappers/coco_api.py b/projects/EfficientDet/efficientdet/tensorflow/api_wrappers/coco_api.py similarity index 98% rename from projects/EfficientDet/efficientdet/api_wrappers/coco_api.py rename to projects/EfficientDet/efficientdet/tensorflow/api_wrappers/coco_api.py index ffaf33e0185..142f27d7f94 100644 --- a/projects/EfficientDet/efficientdet/api_wrappers/coco_api.py +++ b/projects/EfficientDet/efficientdet/tensorflow/api_wrappers/coco_api.py @@ -30,7 +30,6 @@ def get_ann_ids(self, img_ids=[], cat_ids=[], area_rng=[], iscrowd=None): return self.getAnnIds(img_ids, cat_ids, area_rng, iscrowd) def get_cat_ids(self, cat_names=[], sup_names=[], cat_ids=[]): - # return self.getCatIds(cat_names, sup_names, cat_ids) cat_ids_coco = self.getCatIds(cat_names, sup_names, cat_ids) if None in cat_names: index = [i for i, v in enumerate(cat_names) if v is not None] diff --git a/projects/EfficientDet/efficientdet/coco_90class.py b/projects/EfficientDet/efficientdet/tensorflow/coco_90class.py similarity index 100% rename from projects/EfficientDet/efficientdet/coco_90class.py rename to projects/EfficientDet/efficientdet/tensorflow/coco_90class.py diff --git a/projects/EfficientDet/efficientdet/coco_90metric.py b/projects/EfficientDet/efficientdet/tensorflow/coco_90metric.py similarity index 100% rename from projects/EfficientDet/efficientdet/coco_90metric.py rename to projects/EfficientDet/efficientdet/tensorflow/coco_90metric.py diff --git a/projects/EfficientDet/efficientdet/trans_max_iou_assigner.py b/projects/EfficientDet/efficientdet/tensorflow/trans_max_iou_assigner.py similarity index 100% rename from projects/EfficientDet/efficientdet/trans_max_iou_assigner.py rename to projects/EfficientDet/efficientdet/tensorflow/trans_max_iou_assigner.py diff --git a/projects/EfficientDet/efficientdet/yxyx_bbox_coder.py b/projects/EfficientDet/efficientdet/tensorflow/yxyx_bbox_coder.py similarity index 100% rename from projects/EfficientDet/efficientdet/yxyx_bbox_coder.py rename to projects/EfficientDet/efficientdet/tensorflow/yxyx_bbox_coder.py diff --git a/projects/EfficientDet/efficientdet/utils.py b/projects/EfficientDet/efficientdet/utils.py index 5fc898a64a7..9c30a01fc8b 100644 --- a/projects/EfficientDet/efficientdet/utils.py +++ b/projects/EfficientDet/efficientdet/utils.py @@ -1,4 +1,3 @@ -# Copyright (c) OpenMMLab. All rights reserved. import math from typing import Tuple, Union @@ -6,67 +5,49 @@ import torch.nn as nn from mmcv.cnn.bricks import Swish, build_norm_layer from torch.nn import functional as F +from torch.nn.init import _calculate_fan_in_and_fan_out, trunc_normal_ +from mmdet.registry import MODELS from mmdet.utils import OptConfigType -class SwishImplementation(torch.autograd.Function): +def variance_scaling_trunc(tensor, gain=1.): + fan_in, _ = _calculate_fan_in_and_fan_out(tensor) + gain /= max(1.0, fan_in) + std = math.sqrt(gain) / .87962566103423978 + return trunc_normal_(tensor, 0., std) - @staticmethod - def forward(ctx, i): - result = i * torch.sigmoid(i) - ctx.save_for_backward(i) - return result - @staticmethod - def backward(ctx, grad_output): - i = ctx.saved_variables[0] - sigmoid_i = torch.sigmoid(i) - return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i))) - - -class MemoryEfficientSwish(nn.Module): - - def forward(self, x): - return SwishImplementation.apply(x) - - -class Conv2dSamePadding(nn.Module): +@MODELS.register_module() +class Conv2dSamePadding(nn.Conv2d): def __init__(self, in_channels: int, out_channels: int, kernel_size: Union[int, Tuple[int, int]], stride: Union[int, Tuple[int, int]] = 1, + padding: Union[int, Tuple[int, int]] = 0, + dilation: Union[int, Tuple[int, int]] = 1, groups: int = 1, bias: bool = True): - super().__init__() - self.conv = nn.Conv2d( - in_channels, - out_channels, - kernel_size, - stride=stride, - bias=bias, - groups=groups) - self.stride = self.conv.stride - self.kernel_size = self.conv.kernel_size - - def forward(self, x): - h, w = x.shape[-2:] - extra_h = (math.ceil(w / self.stride[1]) - - 1) * self.stride[1] - w + self.kernel_size[1] - extra_v = (math.ceil(h / self.stride[0]) - - 1) * self.stride[0] - h + self.kernel_size[0] - - left = extra_h // 2 - right = extra_h - left - top = extra_v // 2 - bottom = extra_v - top - + super().__init__(in_channels, out_channels, kernel_size, stride, 0, + dilation, groups, bias) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + img_h, img_w = x.size()[-2:] + kernel_h, kernel_w = self.weight.size()[-2:] + extra_w = (math.ceil(img_w / self.stride[1]) - + 1) * self.stride[1] - img_w + kernel_w + extra_h = (math.ceil(img_h / self.stride[0]) - + 1) * self.stride[0] - img_h + kernel_h + + left = extra_w // 2 + right = extra_w - left + top = extra_h // 2 + bottom = extra_h - top x = F.pad(x, [left, right, top, bottom]) - x = self.conv(x) - - return x + return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, + self.dilation, self.groups) class MaxPool2dSamePadding(nn.Module): @@ -112,7 +93,6 @@ def __init__( out_channels: int, apply_norm: bool = True, conv_bn_act_pattern: bool = False, - use_meswish: bool = True, norm_cfg: OptConfigType = dict(type='BN', momentum=1e-2, eps=1e-3) ) -> None: super(DepthWiseConvBlock, self).__init__() @@ -132,7 +112,7 @@ def __init__( self.apply_activation = conv_bn_act_pattern if self.apply_activation: - self.swish = MemoryEfficientSwish() if use_meswish else Swish() + self.swish = Swish() def forward(self, x): x = self.depthwise_conv(x) @@ -153,7 +133,6 @@ def __init__( out_channels: int, apply_norm: bool = True, conv_bn_act_pattern: bool = False, - use_meswish: bool = True, norm_cfg: OptConfigType = dict(type='BN', momentum=1e-2, eps=1e-3) ) -> None: super(DownChannelBlock, self).__init__() @@ -163,7 +142,7 @@ def __init__( self.bn = build_norm_layer(norm_cfg, num_features=out_channels)[1] self.apply_activation = conv_bn_act_pattern if self.apply_activation: - self.swish = MemoryEfficientSwish() if use_meswish else Swish() + self.swish = Swish() def forward(self, x): x = self.down_conv(x) From 40b8e1407079a6b3911db5fbd51716e84dfff6ab Mon Sep 17 00:00:00 2001 From: JosonChan <57584090+JosonChan1998@users.noreply.github.com> Date: Mon, 13 Mar 2023 21:18:52 +0800 Subject: [PATCH 07/38] [Fix] albu augmentation with mask shape (#9918) --- mmdet/datasets/transforms/transforms.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/mmdet/datasets/transforms/transforms.py b/mmdet/datasets/transforms/transforms.py index 129fe9202db..c954dada442 100644 --- a/mmdet/datasets/transforms/transforms.py +++ b/mmdet/datasets/transforms/transforms.py @@ -1574,16 +1574,15 @@ def _postprocess_results( results['masks'] = np.array( [results['masks'][i] for i in results['idx_mapper']]) results['masks'] = ori_masks.__class__( - results['masks'], results['image'].shape[0], - results['image'].shape[1]) + results['masks'], ori_masks.height, ori_masks.width) if (not len(results['idx_mapper']) and self.skip_img_without_anno): return None elif 'masks' in results: - results['masks'] = ori_masks.__class__( - results['masks'], results['image'].shape[0], - results['image'].shape[1]) + results['masks'] = ori_masks.__class__(results['masks'], + ori_masks.height, + ori_masks.width) return results From c3200601832c9124a83df663b9234d4369adbd9e Mon Sep 17 00:00:00 2001 From: RangiLyu Date: Mon, 13 Mar 2023 21:28:49 +0800 Subject: [PATCH 08/38] [Fix] Update metafile with missed model checkpoints (#9890) --- configs/albu_example/metafile.yml | 17 ++++ configs/boxinst/README.md | 7 +- configs/boxinst/metafile.yml | 52 +++++++++++ configs/centernet/metafile.yml | 13 +++ configs/lvis/metafile.yml | 128 ++++++++++++++++++++++++++ configs/rpn/metafile.yml | 127 +++++++++++++++++++++++++ configs/soft_teacher/metafile.yml | 18 ++++ configs/strong_baselines/metafile.yml | 24 +++++ model-index.yml | 13 ++- 9 files changed, 394 insertions(+), 5 deletions(-) create mode 100644 configs/albu_example/metafile.yml create mode 100644 configs/boxinst/metafile.yml create mode 100644 configs/lvis/metafile.yml create mode 100644 configs/rpn/metafile.yml create mode 100644 configs/soft_teacher/metafile.yml create mode 100644 configs/strong_baselines/metafile.yml diff --git a/configs/albu_example/metafile.yml b/configs/albu_example/metafile.yml new file mode 100644 index 00000000000..3b54bdf1568 --- /dev/null +++ b/configs/albu_example/metafile.yml @@ -0,0 +1,17 @@ +Models: + - Name: mask-rcnn_r50_fpn_albu-1x_coco + In Collection: Mask R-CNN + Config: mask-rcnn_r50_fpn_albu-1x_coco.py + Metadata: + Training Memory (GB): 4.4 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.0 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 34.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/albu_example/mask_rcnn_r50_fpn_albu_1x_coco/mask_rcnn_r50_fpn_albu_1x_coco_20200208-ab203bcd.pth diff --git a/configs/boxinst/README.md b/configs/boxinst/README.md index 6f015a1d16b..f6f01c5d27b 100644 --- a/configs/boxinst/README.md +++ b/configs/boxinst/README.md @@ -15,9 +15,10 @@ of learning masks in instance segmentation, with no modification to the segmenta ## Results and Models -| Backbone | Style | MS train | Lr schd | bbox AP | mask AP | Config | Download | -| :------: | :-----: | :------: | :-----: | :-----: | :-----: | :----------------------------------------: | :----------------------: | -| R-50 | pytorch | Y | 1x | 39.4 | 30.8 | [config](./boxinst_r50_fpn_ms-90k_coco.py) | [model](<>) \| [log](<>) | +| Backbone | Style | MS train | Lr schd | bbox AP | mask AP | Config | Download | +| :------: | :-----: | :------: | :-----: | :-----: | :-----: | :-----------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | pytorch | Y | 1x | 39.6 | 31.1 | [config](./boxinst_r50_fpn_ms-90k_coco.py) | [model](https://download.openmmlab.com/mmdetection/v3.0/boxinst/boxinst_r50_fpn_ms-90k_coco/boxinst_r50_fpn_ms-90k_coco_20221228_163052-6add751a.pth) \| [log](https://download.openmmlab.com/mmdetection/v3.0/boxinst/boxinst_r50_fpn_ms-90k_coco/boxinst_r50_fpn_ms-90k_coco_20221228_163052.log.json) | +| R-101 | pytorch | Y | 1x | 41.8 | 32.7 | [config](./boxinst_r101_fpn_ms-90k_coco.py) | [model](https://download.openmmlab.com/mmdetection/v3.0/boxinst/boxinst_r101_fpn_ms-90k_coco/boxinst_r101_fpn_ms-90k_coco_20221229_145106-facf375b.pth) \|[log](https://download.openmmlab.com/mmdetection/v3.0/boxinst/boxinst_r101_fpn_ms-90k_coco/boxinst_r101_fpn_ms-90k_coco_20221229_145106.log.json) | ## Citation diff --git a/configs/boxinst/metafile.yml b/configs/boxinst/metafile.yml new file mode 100644 index 00000000000..c97fcdcd636 --- /dev/null +++ b/configs/boxinst/metafile.yml @@ -0,0 +1,52 @@ +Collections: + - Name: BoxInst + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x A100 GPUs + Architecture: + - ResNet + - FPN + - CondInst + Paper: + URL: https://arxiv.org/abs/2012.02310 + Title: 'BoxInst: High-Performance Instance Segmentation with Box Annotations' + README: configs/boxinst/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v3.0.0rc6/mmdet/models/detectors/boxinst.py#L8 + Version: v3.0.0rc6 + +Models: + - Name: boxinst_r50_fpn_ms-90k_coco + In Collection: BoxInst + Config: configs/boxinst/boxinst_r50_fpn_ms-90k_coco.py + Metadata: + Iterations: 90000 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.4 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 30.8 + Weights: https://download.openmmlab.com/mmdetection/v3.0/boxinst/boxinst_r50_fpn_ms-90k_coco/boxinst_r50_fpn_ms-90k_coco_20221228_163052-6add751a.pth + + - Name: boxinst_r101_fpn_ms-90k_coco + In Collection: BoxInst + Config: configs/boxinst/boxinst_r101_fpn_ms-90k_coco.py + Metadata: + Iterations: 90000 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.8 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 32.7 + Weights: https://download.openmmlab.com/mmdetection/v3.0/boxinst/boxinst_r101_fpn_ms-90k_coco/boxinst_r101_fpn_ms-90k_coco_20221229_145106-facf375b.pth diff --git a/configs/centernet/metafile.yml b/configs/centernet/metafile.yml index 578a5996789..13ea6659d3f 100644 --- a/configs/centernet/metafile.yml +++ b/configs/centernet/metafile.yml @@ -44,3 +44,16 @@ Models: Metrics: box AP: 25.9 Weights: https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_140e_coco/centernet_resnet18_140e_coco_20210705_093630-bb5b3bf7.pth + + - Name: centernet-update_r50-caffe_fpn_ms-1x_coco + In Collection: CenterNet + Config: configs/centernet/centernet-update_r50-caffe_fpn_ms-1x_coco.py + Metadata: + Batch Size: 16 + Training Memory (GB): 3.3 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.2 diff --git a/configs/lvis/metafile.yml b/configs/lvis/metafile.yml new file mode 100644 index 00000000000..f8def96c7e5 --- /dev/null +++ b/configs/lvis/metafile.yml @@ -0,0 +1,128 @@ +Models: + - Name: mask-rcnn_r50_fpn_sample1e-3_ms-2x_lvis-v0.5 + In Collection: Mask R-CNN + Config: configs/lvis/mask-rcnn_r50_fpn_sample1e-3_ms-2x_lvis-v0.5.py + Metadata: + Epochs: 24 + Results: + - Task: Object Detection + Dataset: LVIS v0.5 + Metrics: + box AP: 26.1 + - Task: Instance Segmentation + Dataset: LVIS v0.5 + Metrics: + mask AP: 25.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis-dbd06831.pth + + - Name: mask-rcnn_r101_fpn_sample1e-3_ms-2x_lvis-v0.5 + In Collection: Mask R-CNN + Config: configs/lvis/mask-rcnn_r101_fpn_sample1e-3_ms-2x_lvis-v0.5.py + Metadata: + Epochs: 24 + Results: + - Task: Object Detection + Dataset: LVIS v0.5 + Metrics: + box AP: 27.1 + - Task: Instance Segmentation + Dataset: LVIS v0.5 + Metrics: + mask AP: 27.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis-54582ee2.pth + + - Name: mask-rcnn_x101-32x4d_fpn_sample1e-3_ms-2x_lvis-v0.5 + In Collection: Mask R-CNN + Config: configs/lvis/mask-rcnn_x101-32x4d_fpn_sample1e-3_ms-2x_lvis-v0.5.py + Metadata: + Epochs: 24 + Results: + - Task: Object Detection + Dataset: LVIS v0.5 + Metrics: + box AP: 26.7 + - Task: Instance Segmentation + Dataset: LVIS v0.5 + Metrics: + mask AP: 26.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis-3cf55ea2.pth + + - Name: mask-rcnn_x101-64x4d_fpn_sample1e-3_ms-2x_lvis-v0.5 + In Collection: Mask R-CNN + Config: configs/lvis/mask-rcnn_x101-64x4d_fpn_sample1e-3_ms-2x_lvis-v0.5.py + Metadata: + Epochs: 24 + Results: + - Task: Object Detection + Dataset: LVIS v0.5 + Metrics: + box AP: 26.4 + - Task: Instance Segmentation + Dataset: LVIS v0.5 + Metrics: + mask AP: 26.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis-1c99a5ad.pth + + - Name: mask-rcnn_r50_fpn_sample1e-3_ms-1x_lvis-v1 + In Collection: Mask R-CNN + Config: configs/lvis/mask-rcnn_r50_fpn_sample1e-3_ms-1x_lvis-v1.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: LVIS v1 + Metrics: + box AP: 22.5 + - Task: Instance Segmentation + Dataset: LVIS v1 + Metrics: + mask AP: 21.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1-aa78ac3d.pth + + - Name: mask-rcnn_r101_fpn_sample1e-3_ms-1x_lvis-v1 + In Collection: Mask R-CNN + Config: configs/lvis/mask-rcnn_r101_fpn_sample1e-3_ms-1x_lvis-v1.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: LVIS v1 + Metrics: + box AP: 24.6 + - Task: Instance Segmentation + Dataset: LVIS v1 + Metrics: + mask AP: 23.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1-ec55ce32.pth + + - Name: mask-rcnn_x101-32x4d_fpn_sample1e-3_ms-1x_lvis-v1 + In Collection: Mask R-CNN + Config: configs/lvis/mask-rcnn_x101-32x4d_fpn_sample1e-3_ms-1x_lvis-v1.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: LVIS v1 + Metrics: + box AP: 26.7 + - Task: Instance Segmentation + Dataset: LVIS v1 + Metrics: + mask AP: 25.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1-ebbc5c81.pth + + - Name: mask-rcnn_x101-64x4d_fpn_sample1e-3_ms-1x_lvis-v1 + In Collection: Mask R-CNN + Config: configs/lvis/mask-rcnn_x101-64x4d_fpn_sample1e-3_ms-1x_lvis-v1.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: LVIS v1 + Metrics: + box AP: 27.2 + - Task: Instance Segmentation + Dataset: LVIS v1 + Metrics: + mask AP: 25.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1-43d9edfe.pth diff --git a/configs/rpn/metafile.yml b/configs/rpn/metafile.yml new file mode 100644 index 00000000000..9796ead6d2e --- /dev/null +++ b/configs/rpn/metafile.yml @@ -0,0 +1,127 @@ +Collections: + - Name: RPN + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - FPN + - ResNet + Paper: + URL: https://arxiv.org/abs/1506.01497 + Title: "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" + README: configs/rpn/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/rpn.py#L6 + Version: v2.0.0 + +Models: + - Name: rpn_r50-caffe_fpn_1x_coco + In Collection: RPN + Config: configs/rpn/rpn_r50-caffe_fpn_1x_coco.py + Metadata: + Training Memory (GB): 3.5 + Training Resources: 8x V100 GPUs + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + AR@1000: 58.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r50_caffe_fpn_1x_coco/rpn_r50_caffe_fpn_1x_coco_20200531-5b903a37.pth + + - Name: rpn_r50_fpn_1x_coco + In Collection: RPN + Config: configs/rpn/rpn_r50_fpn_1x_coco.py + Metadata: + Training Memory (GB): 3.8 + Training Resources: 8x V100 GPUs + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + AR@1000: 58.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r50_fpn_1x_coco/rpn_r50_fpn_1x_coco_20200218-5525fa2e.pth + + - Name: rpn_r50_fpn_2x_coco + In Collection: RPN + Config: rpn_r50_fpn_2x_coco.py + Metadata: + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + AR@1000: 58.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r50_fpn_2x_coco/rpn_r50_fpn_2x_coco_20200131-0728c9b3.pth + + - Name: rpn_r101-caffe_fpn_1x_coco + In Collection: RPN + Config: configs/rpn/rpn_r101-caffe_fpn_1x_coco.py + Metadata: + Training Memory (GB): 5.4 + Training Resources: 8x V100 GPUs + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + AR@1000: 60.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_r101_caffe_fpn_1x_coco/rpn_r101_caffe_fpn_1x_coco_20200531-0629a2e2.pth + + - Name: rpn_x101-32x4d_fpn_1x_coco + In Collection: RPN + Config: configs/rpn/rpn_x101-32x4d_fpn_1x_coco.py + Metadata: + Training Memory (GB): 7.0 + Training Resources: 8x V100 GPUs + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + AR@1000: 60.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_32x4d_fpn_1x_coco/rpn_x101_32x4d_fpn_1x_coco_20200219-b02646c6.pth + + - Name: rpn_x101-32x4d_fpn_2x_coco + In Collection: RPN + Config: configs/rpn/rpn_x101-32x4d_fpn_2x_coco.py + Metadata: + Training Resources: 8x V100 GPUs + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + AR@1000: 61.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_32x4d_fpn_2x_coco/rpn_x101_32x4d_fpn_2x_coco_20200208-d22bd0bb.pth + + - Name: rpn_x101-64x4d_fpn_1x_coco + In Collection: RPN + Config: configs/rpn/rpn_x101-64x4d_fpn_1x_coco.py + Metadata: + Training Memory (GB): 10.1 + Training Resources: 8x V100 GPUs + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + AR@1000: 61.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_64x4d_fpn_1x_coco/rpn_x101_64x4d_fpn_1x_coco_20200208-cde6f7dd.pth + + - Name: rpn_x101-64x4d_fpn_2x_coco + In Collection: RPN + Config: configs/rpn/rpn_x101-64x4d_fpn_2x_coco.py + Metadata: + Training Resources: 8x V100 GPUs + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + AR@1000: 61.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/rpn/rpn_x101_64x4d_fpn_2x_coco/rpn_x101_64x4d_fpn_2x_coco_20200208-c65f524f.pth diff --git a/configs/soft_teacher/metafile.yml b/configs/soft_teacher/metafile.yml new file mode 100644 index 00000000000..a9fb3c2e312 --- /dev/null +++ b/configs/soft_teacher/metafile.yml @@ -0,0 +1,18 @@ +Collections: + - Name: SoftTeacher + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - FPN + - ResNet + Paper: + URL: https://arxiv.org/abs/2106.09018 + Title: "End-to-End Semi-Supervised Object Detection with Soft Teacher" + README: configs/soft_teacher/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v3.0.0rc1/mmdet/models/detectors/soft_teacher.py#L20 + Version: v3.0.0rc1 diff --git a/configs/strong_baselines/metafile.yml b/configs/strong_baselines/metafile.yml new file mode 100644 index 00000000000..f72c07e64b6 --- /dev/null +++ b/configs/strong_baselines/metafile.yml @@ -0,0 +1,24 @@ +Models: + - Name: mask-rcnn_r50-caffe_fpn_rpn-2conv_4conv1fc_syncbn-all_lsj-100e_coco + In Collection: Mask R-CNN + Config: configs/strong_baselines/mask-rcnn_r50-caffe_fpn_rpn-2conv_4conv1fc_syncbn-all_lsj-100e_coco.py + Metadata: + Epochs: 100 + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + - LSJ + Training Resources: 8x V100 GPUs + Architecture: + - ResNet + - FPN + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.7 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + box AP: 40.4 diff --git a/model-index.yml b/model-index.yml index 1e71f450d8d..41f1491a02b 100644 --- a/model-index.yml +++ b/model-index.yml @@ -1,20 +1,26 @@ Import: + - configs/albu_example/metafile.yml - configs/atss/metafile.yml - configs/autoassign/metafile.yml + - configs/boxinst/metafile.yml - configs/carafe/metafile.yml - configs/cascade_rcnn/metafile.yml - configs/cascade_rpn/metafile.yml - configs/centernet/metafile.yml - configs/centripetalnet/metafile.yml - - configs/cornernet/metafile.yml - configs/condinst/metafile.yml + - configs/conditional_detr/metafile.yml + - configs/cornernet/metafile.yml - configs/convnext/metafile.yml + - configs/crowddet/metafile.yml + - configs/dab_detr/metafile.yml - configs/dcn/metafile.yml - configs/dcnv2/metafile.yml - configs/ddod/metafile.yml - configs/deformable_detr/metafile.yml - configs/detectors/metafile.yml - configs/detr/metafile.yml + - configs/dino/metafile.yml - configs/double_heads/metafile.yml - configs/dyhead/metafile.yml - configs/dynamic_rcnn/metafile.yml @@ -40,6 +46,7 @@ Import: - configs/lad/metafile.yml - configs/ld/metafile.yml - configs/libra_rcnn/metafile.yml + - configs/lvis/metafile.yml - configs/mask2former/metafile.yml - configs/mask_rcnn/metafile.yml - configs/maskformer/metafile.yml @@ -54,13 +61,13 @@ Import: - configs/pisa/metafile.yml - configs/point_rend/metafile.yml - configs/queryinst/metafile.yml - - configs/rtmdet/metafile.yml - configs/regnet/metafile.yml - configs/reppoints/metafile.yml - configs/res2net/metafile.yml - configs/resnest/metafile.yml - configs/resnet_strikes_back/metafile.yml - configs/retinanet/metafile.yml + - configs/rpn/metafile.yml - configs/rtmdet/metafile.yml - configs/sabl/metafile.yml - configs/scnet/metafile.yml @@ -68,9 +75,11 @@ Import: - configs/seesaw_loss/metafile.yml - configs/simple_copy_paste/metafile.yml - configs/sparse_rcnn/metafile.yml + - configs/soft_teacher/metafile.yml - configs/solo/metafile.yml - configs/solov2/metafile.yml - configs/ssd/metafile.yml + - configs/strong_baselines/metafile.yml - configs/swin/metafile.yml - configs/tridentnet/metafile.yml - configs/tood/metafile.yml From 5113bd815e887535f3d9436c785584b593ccc9e1 Mon Sep 17 00:00:00 2001 From: RangiLyu Date: Fri, 17 Mar 2023 10:09:31 +0800 Subject: [PATCH 09/38] [Fix] Fix RTMDetIns prior generator device. (#9964) --- mmdet/models/dense_heads/rtmdet_ins_head.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mmdet/models/dense_heads/rtmdet_ins_head.py b/mmdet/models/dense_heads/rtmdet_ins_head.py index e355bdb79f8..729a4492f0b 100644 --- a/mmdet/models/dense_heads/rtmdet_ins_head.py +++ b/mmdet/models/dense_heads/rtmdet_ins_head.py @@ -565,7 +565,7 @@ def _mask_predict_by_feat_single(self, mask_feat: Tensor, kernels: Tensor, mask_feat.unsqueeze(0) coord = self.prior_generator.single_level_grid_priors( - (h, w), level_idx=0).reshape(1, -1, 2) + (h, w), level_idx=0, device=mask_feat.device).reshape(1, -1, 2) num_inst = priors.shape[0] points = priors[:, :2].reshape(-1, 1, 2) strides = priors[:, 2:].reshape(-1, 1, 2) From 06d338c7e91633ac797e9ec78bc3ec53bbf3b0c1 Mon Sep 17 00:00:00 2001 From: JosonChan <57584090+JosonChan1998@users.noreply.github.com> Date: Fri, 17 Mar 2023 13:55:08 +0800 Subject: [PATCH 10/38] [Feature] add BoxInst resnet-101 config (#9967) Co-authored-by: RangiLyu --- configs/boxinst/boxinst_r101_fpn_ms-90k_coco.py | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 configs/boxinst/boxinst_r101_fpn_ms-90k_coco.py diff --git a/configs/boxinst/boxinst_r101_fpn_ms-90k_coco.py b/configs/boxinst/boxinst_r101_fpn_ms-90k_coco.py new file mode 100644 index 00000000000..ab2b11628a7 --- /dev/null +++ b/configs/boxinst/boxinst_r101_fpn_ms-90k_coco.py @@ -0,0 +1,8 @@ +_base_ = './boxinst_r50_fpn_ms-90k_coco.py' + +# model settings +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) From 0f5cd10f5d2ce15c5cd4f96fe46668979da921fc Mon Sep 17 00:00:00 2001 From: RangiLyu Date: Fri, 17 Mar 2023 14:56:33 +0800 Subject: [PATCH 11/38] [Fix] Fix img_shape in data pipeline (#9966) --- mmdet/datasets/transforms/transforms.py | 20 +++++++++---------- mmdet/structures/det_data_sample.py | 8 ++++---- .../test_transforms/test_transforms.py | 10 ++++++++++ 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/mmdet/datasets/transforms/transforms.py b/mmdet/datasets/transforms/transforms.py index c954dada442..b844d0a3fe7 100644 --- a/mmdet/datasets/transforms/transforms.py +++ b/mmdet/datasets/transforms/transforms.py @@ -722,7 +722,7 @@ def _crop_data(self, results: dict, crop_size: Tuple[int, int], img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...] img_shape = img.shape results['img'] = img - results['img_shape'] = img_shape + results['img_shape'] = img_shape[:2] # crop bboxes accordingly and clip to the image boundary if results.get('gt_bboxes', None) is not None: @@ -1510,7 +1510,7 @@ def transform(self, results: dict) -> Union[dict, None]: return None # back to the original format results = self.mapper(results, self.keymap_back) - results['img_shape'] = results['img'].shape + results['img_shape'] = results['img'].shape[:2] return results def _preprocess_results(self, results: dict) -> tuple: @@ -1861,7 +1861,7 @@ def _train_aug(self, results): if len(gt_bboxes) == 0: results['img'] = cropped_img - results['img_shape'] = cropped_img.shape + results['img_shape'] = cropped_img.shape[:2] return results # if image do not have valid bbox, any crop patch is valid. @@ -1870,7 +1870,7 @@ def _train_aug(self, results): continue results['img'] = cropped_img - results['img_shape'] = cropped_img.shape + results['img_shape'] = cropped_img.shape[:2] x0, y0, x1, y1 = patch @@ -1936,7 +1936,7 @@ def _test_aug(self, results): cropped_img, border, _ = self._crop_image_and_paste( img, [h // 2, w // 2], [target_h, target_w]) results['img'] = cropped_img - results['img_shape'] = cropped_img.shape + results['img_shape'] = cropped_img.shape[:2] results['border'] = border return results @@ -2240,7 +2240,7 @@ def transform(self, results: dict) -> dict: mosaic_ignore_flags = mosaic_ignore_flags[inside_inds] results['img'] = mosaic_img - results['img_shape'] = mosaic_img.shape + results['img_shape'] = mosaic_img.shape[:2] results['gt_bboxes'] = mosaic_bboxes results['gt_bboxes_labels'] = mosaic_bboxes_labels results['gt_ignore_flags'] = mosaic_ignore_flags @@ -2522,7 +2522,7 @@ def transform(self, results: dict) -> dict: mixup_gt_ignore_flags = mixup_gt_ignore_flags[inside_inds] results['img'] = mixup_img.astype(np.uint8) - results['img_shape'] = mixup_img.shape + results['img_shape'] = mixup_img.shape[:2] results['gt_bboxes'] = mixup_gt_bboxes results['gt_bboxes_labels'] = mixup_gt_bboxes_labels results['gt_ignore_flags'] = mixup_gt_ignore_flags @@ -2645,7 +2645,7 @@ def transform(self, results: dict) -> dict: dsize=(width, height), borderValue=self.border_val) results['img'] = img - results['img_shape'] = img.shape + results['img_shape'] = img.shape[:2] bboxes = results['gt_bboxes'] num_bboxes = len(bboxes) @@ -3334,7 +3334,7 @@ def transform(self, results: dict) -> dict: mosaic_ignore_flags = mosaic_ignore_flags[inside_inds] results['img'] = mosaic_img - results['img_shape'] = mosaic_img.shape + results['img_shape'] = mosaic_img.shape[:2] results['gt_bboxes'] = mosaic_bboxes results['gt_bboxes_labels'] = mosaic_bboxes_labels results['gt_ignore_flags'] = mosaic_ignore_flags @@ -3614,7 +3614,7 @@ def transform(self, results: dict) -> dict: mixup_gt_masks = mixup_gt_masks[inside_inds] results['img'] = mixup_img.astype(np.uint8) - results['img_shape'] = mixup_img.shape + results['img_shape'] = mixup_img.shape[:2] results['gt_bboxes'] = mixup_gt_bboxes results['gt_bboxes_labels'] = mixup_gt_bboxes_labels results['gt_ignore_flags'] = mixup_gt_ignore_flags diff --git a/mmdet/structures/det_data_sample.py b/mmdet/structures/det_data_sample.py index 71bc404a269..d7b7f354a85 100644 --- a/mmdet/structures/det_data_sample.py +++ b/mmdet/structures/det_data_sample.py @@ -30,8 +30,8 @@ class DetDataSample(BaseDataElement): >>> from mmdet.structures import DetDataSample >>> data_sample = DetDataSample() - >>> img_meta = dict(img_shape=(800, 1196, 3), - ... pad_shape=(800, 1216, 3)) + >>> img_meta = dict(img_shape=(800, 1196), + ... pad_shape=(800, 1216)) >>> gt_instances = InstanceData(metainfo=img_meta) >>> gt_instances.bboxes = torch.rand((5, 4)) >>> gt_instances.labels = torch.rand((5,)) @@ -48,8 +48,8 @@ class DetDataSample(BaseDataElement): gt_instances: Date: Fri, 17 Mar 2023 14:57:29 +0800 Subject: [PATCH 12/38] [Refactor] migrate File I/O to the newest mmengine (#9709) --- .../_base_/datasets/cityscapes_detection.py | 28 +- .../_base_/datasets/cityscapes_instance.py | 35 ++- configs/_base_/datasets/coco_detection.py | 24 +- configs/_base_/datasets/coco_instance.py | 24 +- .../_base_/datasets/coco_instance_semantic.py | 24 +- configs/_base_/datasets/coco_panoptic.py | 30 +- configs/_base_/datasets/deepfashion.py | 30 +- configs/_base_/datasets/lvis_v0.5_instance.py | 24 +- .../_base_/datasets/objects365v1_detection.py | 24 +- .../_base_/datasets/objects365v2_detection.py | 24 +- .../_base_/datasets/openimages_detection.py | 22 +- .../_base_/datasets/semi_coco_detection.py | 29 +- configs/_base_/datasets/voc0712.py | 28 +- .../mask-rcnn_r50_fpn_albu-1x_coco.py | 4 +- ...nternet-update_r50-caffe_fpn_ms-1x_coco.py | 4 +- ...ernet_r18-dcnv2_8xb16-crop512-140e_coco.py | 12 +- configs/centernet/centernet_tta.py | 5 +- ...glass104_16xb6-crop511-210e-mstest_coco.py | 6 +- configs/common/lsj-100e_coco-detection.py | 25 +- configs/common/lsj-100e_coco-instance.py | 25 +- configs/common/ms-90k_coco.py | 24 +- configs/common/ms-poly-90k_coco-instance.py | 24 +- configs/common/ms-poly_3x_coco-instance.py | 24 +- configs/common/ms_3x_coco-instance.py | 24 +- configs/common/ms_3x_coco.py | 24 +- configs/common/ssj_270k_coco-instance.py | 25 +- configs/common/ssj_scp_270k_coco-instance.py | 17 +- ...7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py | 4 +- ...onvnext-t-p4-w7_fpn_amp-ms-crop-3x_coco.py | 4 +- ...rnet_hourglass104_8xb6-210e-mstest_coco.py | 7 +- ...owddet-rcnn_r50_fpn_8xb2-30e_crowdhuman.py | 30 +- .../dab_detr/dab-detr_r50_8xb2-50e_coco.py | 4 +- .../deformable-detr_r50_16xb2-50e_coco.py | 4 +- configs/detr/detr_r50_8xb2-150e_coco.py | 4 +- configs/dino/dino-4scale_r50_8xb2-12e_coco.py | 4 +- .../atss_r50-caffe_fpn_dyhead_1x_coco.py | 8 +- ...tss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco.py | 11 +- ...etinanet_effb3_fpn_8xb4-crop896-1x_coco.py | 8 +- configs/fast_rcnn/README.md | 4 +- .../fast_rcnn/fast-rcnn_r50_fpn_1x_coco.py | 8 +- ...01-caffe_fpn_gn-head_ms-640-800-2x_coco.py | 4 +- ...50-caffe_fpn_gn-head_ms-640-800-2x_coco.py | 4 +- ...01-64x4d_fpn_gn-head_ms-640-800-2x_coco.py | 4 +- ...n_gn-head-align_ms-640-800-4xb4-2x_coco.py | 4 +- ...n_gn-head-align_ms-640-800-4xb4-2x_coco.py | 4 +- .../faster-rcnn_r50_fpn_crop640-50e_coco.py | 8 +- .../fpg/mask-rcnn_r50_fpn_crop640-50e_coco.py | 8 +- configs/gfl/gfl_r50_fpn_ms-2x_coco.py | 4 +- .../ga-retinanet_r101-caffe_fpn_ms-2x.py | 4 +- ...v2p-w32-gn-head_ms-640-800-4xb4-2x_coco.py | 4 +- configs/htc/htc_r50_fpn_1x_coco.py | 4 +- ...de-mask-rcnn_r50_fpn_instaboost-4x_coco.py | 4 +- .../mask-rcnn_r50_fpn_instaboost-4x_coco.py | 4 +- .../ld/ld_r101-gflv1-r101-dcn_fpn_2x_coco.py | 4 +- ...k2former_r50_8xb2-lsj-50e_coco-panoptic.py | 12 +- .../mask2former_r50_8xb2-lsj-50e_coco.py | 13 +- .../mask-rcnn_r50-caffe_fpn_ms-1x_coco.py | 4 +- ...mask-rcnn_r50-caffe_fpn_ms-poly-1x_coco.py | 4 +- .../mask-rcnn_r50_fpn_poly-1x_coco.py | 4 +- ...ask-rcnn_x101-32x8d_fpn_ms-poly-1x_coco.py | 4 +- .../retinanet_r50_fpn_crop640-50e_coco.py | 8 +- .../openimages/ssd300_32xb8-36e_openimages.py | 6 +- configs/paa/paa_r50_fpn_ms-3x_coco.py | 4 +- ...faster-rcnn_r50-caffe-c4_ms-18k_voc0712.py | 14 +- .../faster-rcnn_r50_fpn_1x_voc0712-cocofmt.py | 14 +- ...n_300-proposals_crop-ms-480-800-3x_coco.py | 4 +- .../queryinst_r50_fpn_ms-480-800-3x_coco.py | 4 +- .../mask-rcnn_regnetx-3.2GF_fpn_ms-3x_coco.py | 4 +- ...s50_fpn_syncbn-backbone+head_ms-1x_coco.py | 4 +- ...n_syncbn-backbone+head_ms-range-1x_coco.py | 4 +- ...n_syncbn-backbone+head_ms-range-1x_coco.py | 4 +- ...s50_fpn_syncbn-backbone+head_ms-1x_coco.py | 4 +- configs/retinanet/retinanet_tta.py | 2 +- configs/rpn/rpn_r50_fpn_1x_coco.py | 2 +- .../rtmdet/rtmdet-ins_l_8xb32-300e_coco.py | 8 +- .../rtmdet/rtmdet-ins_s_8xb32-300e_coco.py | 8 +- .../rtmdet/rtmdet-ins_tiny_8xb32-300e_coco.py | 4 +- configs/rtmdet/rtmdet_l_8xb32-300e_coco.py | 12 +- configs/rtmdet/rtmdet_s_8xb32-300e_coco.py | 8 +- configs/rtmdet/rtmdet_tiny_8xb32-300e_coco.py | 4 +- configs/rtmdet/rtmdet_tta.py | 2 +- ...etinanet_r101-gn_fpn_ms-480-960-2x_coco.py | 4 +- ...etinanet_r101-gn_fpn_ms-640-800-2x_coco.py | 4 +- ...01_fpn_seesaw-loss_random-ms-2x_lvis-v1.py | 4 +- ...pn_seesaw-loss_sample1e-3-ms-2x_lvis-v1.py | 4 +- ...50_fpn_seesaw-loss_random-ms-2x_lvis-v1.py | 4 +- ...pn_seesaw-loss_sample1e-3-ms-2x_lvis-v1.py | 4 +- ...mask-rcnn_r50-mocov2-pre_fpn_ms-2x_coco.py | 4 +- .../mask-rcnn_r50-swav-pre_fpn_ms-2x_coco.py | 4 +- .../decoupled-solo-light_r50_fpn_3x_coco.py | 8 +- configs/solo/solo_r50_fpn_3x_coco.py | 4 +- .../solov2/solov2-light_r50_fpn_ms-3x_coco.py | 8 +- configs/solov2/solov2_r50_fpn_ms-3x_coco.py | 4 +- ...n_300-proposals_crop-ms-480-800-3x_coco.py | 4 +- .../sparse-rcnn_r50_fpn_ms-480-800-3x_coco.py | 4 +- configs/ssd/ssd300_coco.py | 7 +- configs/ssd/ssd512_coco.py | 4 +- ...2conv_4conv1fc_syncbn-all_lsj-100e_coco.py | 8 +- ...k-rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco.py | 4 +- configs/tood/tood_r50_fpn_ms-2x_coco.py | 4 +- .../tridentnet_r50-caffe_ms-1x_coco.py | 4 +- configs/vfnet/vfnet_r50_fpn_1x_coco.py | 8 +- configs/vfnet/vfnet_r50_fpn_ms-2x_coco.py | 8 +- configs/yolact/yolact_r50_1xb8-55e_coco.py | 6 +- configs/yolo/yolov3_d53_8xb8-320-273e_coco.py | 13 +- .../yolo/yolov3_d53_8xb8-ms-416-273e_coco.py | 12 +- .../yolo/yolov3_d53_8xb8-ms-608-273e_coco.py | 24 +- .../yolov3_mobilenetv2_8xb24-320-300e_coco.py | 12 +- ...lov3_mobilenetv2_8xb24-ms-416-300e_coco.py | 24 +- configs/yolof/yolof_r50-c5_8xb8-1x_coco.py | 8 +- configs/yolox/yolox_s_8xb8-300e_coco.py | 24 +- configs/yolox/yolox_tiny_8xb8-300e_coco.py | 10 +- configs/yolox/yolox_tta.py | 2 +- .../advanced_guides/customize_transforms.md | 2 +- docs/en/advanced_guides/transforms.md | 4 +- docs/en/user_guides/config.md | 17 +- docs/en/user_guides/semi_det.md | 4 +- docs/en/user_guides/test.md | 4 +- docs/zh_cn/user_guides/config.md | 7 +- docs/zh_cn/user_guides/semi_det.md | 17 +- mmdet/datasets/base_det_dataset.py | 23 +- mmdet/datasets/coco.py | 5 +- mmdet/datasets/coco_panoptic.py | 8 +- mmdet/datasets/crowdhuman.py | 8 +- mmdet/datasets/lvis.py | 9 +- mmdet/datasets/objects365.py | 8 +- mmdet/datasets/openimages.py | 43 +-- mmdet/datasets/transforms/loading.py | 61 ++-- mmdet/datasets/transforms/wrappers.py | 18 +- mmdet/datasets/xml_style.py | 17 +- mmdet/engine/hooks/visualization_hook.py | 22 +- mmdet/evaluation/functional/__init__.py | 4 +- .../evaluation/functional/cityscapes_utils.py | 270 ++++++++++++++++++ mmdet/evaluation/functional/panoptic_utils.py | 33 +-- mmdet/evaluation/metrics/cityscapes_metric.py | 121 +++++--- mmdet/evaluation/metrics/coco_metric.py | 24 +- .../metrics/coco_panoptic_metric.py | 30 +- mmdet/evaluation/metrics/crowdhuman_metric.py | 26 +- .../metrics/dump_proposals_metric.py | 20 +- mmdet/evaluation/metrics/lvis_metric.py | 23 +- mmdet/models/test_time_augs/det_tta.py | 2 +- mmdet/testing/_utils.py | 8 +- ...cnn_convnext-v2-b_fpn_lsj-3x-fcmae_coco.py | 2 +- ...enternet2_swin-b_fpn_4x_lvis-coco-in21k.py | 2 +- ...posals_1-step_crop-ms-480-800-450k_coco.py | 4 +- ...det_effb0_bifpn_8xb16-crop512-300e_coco.py | 8 +- ...fb3_bifpn_8xb16-crop896-300e_coco-90cls.py | 8 +- ...det_effb3_bifpn_8xb16-crop896-300e_coco.py | 8 +- ..._effb0_bifpn_8xb16-crop512-300e_coco_tf.py | 8 +- .../efficientdet/tensorflow/coco_90class.py | 5 +- .../efficientdet/tensorflow/coco_90metric.py | 15 +- .../sparseinst_r50_iam_8xb8-ms-270k_coco.py | 4 +- requirements/mminstall.txt | 2 +- .../test_transforms/test_loading.py | 4 +- .../test_metrics/test_cityscapes_metric.py | 9 - .../test_metrics/test_coco_metric.py | 2 + tools/misc/get_crowdhuman_id_hw.py | 9 +- tools/misc/get_image_metas.py | 5 +- 158 files changed, 1255 insertions(+), 842 deletions(-) create mode 100644 mmdet/evaluation/functional/cityscapes_utils.py diff --git a/configs/_base_/datasets/cityscapes_detection.py b/configs/_base_/datasets/cityscapes_detection.py index a037fb838fa..caeba6bfcd2 100644 --- a/configs/_base_/datasets/cityscapes_detection.py +++ b/configs/_base_/datasets/cityscapes_detection.py @@ -2,8 +2,23 @@ dataset_type = 'CityscapesDataset' data_root = 'data/cityscapes/' +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/segmentation/cityscapes/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# './data/': 's3://openmmlab/datasets/segmentation/', +# 'data/': 's3://openmmlab/datasets/segmentation/' +# })) +backend_args = None + train_pipeline = [ - dict(type='LoadImageFromFile'), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', @@ -14,7 +29,7 @@ ] test_pipeline = [ - dict(type='LoadImageFromFile'), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=(2048, 1024), keep_ratio=True), # If you don't have a gt annotation, delete the pipeline dict(type='LoadAnnotations', with_bbox=True), @@ -39,7 +54,8 @@ ann_file='annotations/instancesonly_filtered_gtFine_train.json', data_prefix=dict(img='leftImg8bit/train/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline))) + pipeline=train_pipeline, + backend_args=backend_args))) val_dataloader = dict( batch_size=1, @@ -54,13 +70,15 @@ data_prefix=dict(img='leftImg8bit/val/'), test_mode=True, filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader val_evaluator = dict( type='CocoMetric', ann_file=data_root + 'annotations/instancesonly_filtered_gtFine_val.json', - metric='bbox') + metric='bbox', + backend_args=backend_args) test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/cityscapes_instance.py b/configs/_base_/datasets/cityscapes_instance.py index 0254af3f97a..136403136c6 100644 --- a/configs/_base_/datasets/cityscapes_instance.py +++ b/configs/_base_/datasets/cityscapes_instance.py @@ -2,8 +2,23 @@ dataset_type = 'CityscapesDataset' data_root = 'data/cityscapes/' +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/segmentation/cityscapes/' + +# Method 2: Use backend_args, file_client_args in versions before 3.0.0rc6 +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# './data/': 's3://openmmlab/datasets/segmentation/', +# 'data/': 's3://openmmlab/datasets/segmentation/' +# })) +backend_args = None + train_pipeline = [ - dict(type='LoadImageFromFile'), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( type='RandomResize', @@ -14,7 +29,7 @@ ] test_pipeline = [ - dict(type='LoadImageFromFile'), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=(2048, 1024), keep_ratio=True), # If you don't have a gt annotation, delete the pipeline dict(type='LoadAnnotations', with_bbox=True, with_mask=True), @@ -39,7 +54,8 @@ ann_file='annotations/instancesonly_filtered_gtFine_train.json', data_prefix=dict(img='leftImg8bit/train/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline))) + pipeline=train_pipeline, + backend_args=backend_args))) val_dataloader = dict( batch_size=1, @@ -54,7 +70,8 @@ data_prefix=dict(img='leftImg8bit/val/'), test_mode=True, filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader @@ -63,13 +80,13 @@ type='CocoMetric', ann_file=data_root + 'annotations/instancesonly_filtered_gtFine_val.json', - metric=['bbox', 'segm']), + metric=['bbox', 'segm'], + backend_args=backend_args), dict( type='CityScapesMetric', - ann_file=data_root + - 'annotations/instancesonly_filtered_gtFine_val.json', - seg_prefix=data_root + '/gtFine/val', - outfile_prefix='./work_dirs/cityscapes_metric/instance') + seg_prefix=data_root + 'gtFine/val', + outfile_prefix='./work_dirs/cityscapes_metric/instance', + backend_args=backend_args) ] test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/coco_detection.py b/configs/_base_/datasets/coco_detection.py index fcd9859f135..fdf8dfad947 100644 --- a/configs/_base_/datasets/coco_detection.py +++ b/configs/_base_/datasets/coco_detection.py @@ -2,23 +2,30 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' -# file_client_args = dict( +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/coco/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( # backend='petrel', # path_mapping=dict({ # './data/': 's3://openmmlab/datasets/detection/', # 'data/': 's3://openmmlab/datasets/detection/' # })) -file_client_args = dict(backend='disk') +backend_args = None train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict(type='RandomFlip', prob=0.5), dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=(1333, 800), keep_ratio=True), # If you don't have a gt annotation, delete the pipeline dict(type='LoadAnnotations', with_bbox=True), @@ -39,7 +46,8 @@ ann_file='annotations/instances_train2017.json', data_prefix=dict(img='train2017/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline)) + pipeline=train_pipeline, + backend_args=backend_args)) val_dataloader = dict( batch_size=1, num_workers=2, @@ -52,14 +60,16 @@ ann_file='annotations/instances_val2017.json', data_prefix=dict(img='val2017/'), test_mode=True, - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader val_evaluator = dict( type='CocoMetric', ann_file=data_root + 'annotations/instances_val2017.json', metric='bbox', - format_only=False) + format_only=False, + backend_args=backend_args) test_evaluator = val_evaluator # inference on test dataset and diff --git a/configs/_base_/datasets/coco_instance.py b/configs/_base_/datasets/coco_instance.py index 878d8b4915e..e91cb354038 100644 --- a/configs/_base_/datasets/coco_instance.py +++ b/configs/_base_/datasets/coco_instance.py @@ -2,23 +2,30 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' -# file_client_args = dict( +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/coco/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( # backend='petrel', # path_mapping=dict({ # './data/': 's3://openmmlab/datasets/detection/', # 'data/': 's3://openmmlab/datasets/detection/' # })) -file_client_args = dict(backend='disk') +backend_args = None train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict(type='RandomFlip', prob=0.5), dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=(1333, 800), keep_ratio=True), # If you don't have a gt annotation, delete the pipeline dict(type='LoadAnnotations', with_bbox=True, with_mask=True), @@ -39,7 +46,8 @@ ann_file='annotations/instances_train2017.json', data_prefix=dict(img='train2017/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline)) + pipeline=train_pipeline, + backend_args=backend_args)) val_dataloader = dict( batch_size=1, num_workers=2, @@ -52,14 +60,16 @@ ann_file='annotations/instances_val2017.json', data_prefix=dict(img='val2017/'), test_mode=True, - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader val_evaluator = dict( type='CocoMetric', ann_file=data_root + 'annotations/instances_val2017.json', metric=['bbox', 'segm'], - format_only=False) + format_only=False, + backend_args=backend_args) test_evaluator = val_evaluator # inference on test dataset and diff --git a/configs/_base_/datasets/coco_instance_semantic.py b/configs/_base_/datasets/coco_instance_semantic.py index 12652d02c6b..cc961863306 100644 --- a/configs/_base_/datasets/coco_instance_semantic.py +++ b/configs/_base_/datasets/coco_instance_semantic.py @@ -2,16 +2,23 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' -# file_client_args = dict( +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/coco/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( # backend='petrel', # path_mapping=dict({ # './data/': 's3://openmmlab/datasets/detection/', # 'data/': 's3://openmmlab/datasets/detection/' # })) -file_client_args = dict(backend='disk') +backend_args = None train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict( type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), dict(type='Resize', scale=(1333, 800), keep_ratio=True), @@ -19,7 +26,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=(1333, 800), keep_ratio=True), # If you don't have a gt annotation, delete the pipeline dict( @@ -42,7 +49,8 @@ ann_file='annotations/instances_train2017.json', data_prefix=dict(img='train2017/', seg='stuffthingmaps/train2017/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline)) + pipeline=train_pipeline, + backend_args=backend_args)) val_dataloader = dict( batch_size=1, @@ -56,7 +64,8 @@ ann_file='annotations/instances_val2017.json', data_prefix=dict(img='val2017/'), test_mode=True, - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader @@ -64,5 +73,6 @@ type='CocoMetric', ann_file=data_root + 'annotations/instances_val2017.json', metric=['bbox', 'segm'], - format_only=False) + format_only=False, + backend_args=backend_args) test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/coco_panoptic.py b/configs/_base_/datasets/coco_panoptic.py index 021d80b2807..2d75660f4b4 100644 --- a/configs/_base_/datasets/coco_panoptic.py +++ b/configs/_base_/datasets/coco_panoptic.py @@ -1,26 +1,33 @@ # dataset settings dataset_type = 'CocoPanopticDataset' -data_root = 'data/coco/' +# data_root = 'data/coco/' -# file_client_args = dict( +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +data_root = 's3://openmmlab/datasets/detection/coco/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( # backend='petrel', # path_mapping=dict({ # './data/': 's3://openmmlab/datasets/detection/', # 'data/': 's3://openmmlab/datasets/detection/' # })) -file_client_args = dict(backend='disk') +backend_args = None train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), - dict(type='LoadPanopticAnnotations', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), + dict(type='LoadPanopticAnnotations', backend_args=backend_args), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict(type='RandomFlip', prob=0.5), dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=(1333, 800), keep_ratio=True), - dict(type='LoadPanopticAnnotations', file_client_args=file_client_args), + dict(type='LoadPanopticAnnotations', backend_args=backend_args), dict( type='PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', @@ -40,7 +47,8 @@ data_prefix=dict( img='train2017/', seg='annotations/panoptic_train2017/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline)) + pipeline=train_pipeline, + backend_args=backend_args)) val_dataloader = dict( batch_size=1, num_workers=2, @@ -53,15 +61,15 @@ ann_file='annotations/panoptic_val2017.json', data_prefix=dict(img='val2017/', seg='annotations/panoptic_val2017/'), test_mode=True, - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader val_evaluator = dict( type='CocoPanopticMetric', ann_file=data_root + 'annotations/panoptic_val2017.json', seg_prefix=data_root + 'annotations/panoptic_val2017/', - file_client_args=file_client_args, -) + backend_args=backend_args) test_evaluator = val_evaluator # inference on test dataset and diff --git a/configs/_base_/datasets/deepfashion.py b/configs/_base_/datasets/deepfashion.py index bb70eeed7d0..a93dc7152f7 100644 --- a/configs/_base_/datasets/deepfashion.py +++ b/configs/_base_/datasets/deepfashion.py @@ -2,23 +2,30 @@ dataset_type = 'DeepFashionDataset' data_root = 'data/DeepFashion/In-shop/' -# file_client_args = dict( +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/coco/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( # backend='petrel', # path_mapping=dict({ # './data/': 's3://openmmlab/datasets/detection/', # 'data/': 's3://openmmlab/datasets/detection/' # })) -file_client_args = dict(backend='disk') +backend_args = None train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict(type='Resize', scale=(750, 1101), keep_ratio=True), dict(type='RandomFlip', prob=0.5), dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=(750, 1101), keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( @@ -41,7 +48,8 @@ ann_file='Anno/segmentation/DeepFashion_segmentation_train.json', data_prefix=dict(img='Img/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline))) + pipeline=train_pipeline, + backend_args=backend_args))) val_dataloader = dict( batch_size=1, num_workers=2, @@ -54,7 +62,8 @@ ann_file='Anno/segmentation/DeepFashion_segmentation_query.json', data_prefix=dict(img='Img/'), test_mode=True, - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = dict( batch_size=1, num_workers=2, @@ -67,17 +76,20 @@ ann_file='Anno/segmentation/DeepFashion_segmentation_gallery.json', data_prefix=dict(img='Img/'), test_mode=True, - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) val_evaluator = dict( type='CocoMetric', ann_file=data_root + 'Anno/segmentation/DeepFashion_segmentation_query.json', metric=['bbox', 'segm'], - format_only=False) + format_only=False, + backend_args=backend_args) test_evaluator = dict( type='CocoMetric', ann_file=data_root + 'Anno/segmentation/DeepFashion_segmentation_gallery.json', metric=['bbox', 'segm'], - format_only=False) + format_only=False, + backend_args=backend_args) diff --git a/configs/_base_/datasets/lvis_v0.5_instance.py b/configs/_base_/datasets/lvis_v0.5_instance.py index f8f65f2b5e8..d0ca44efb6d 100644 --- a/configs/_base_/datasets/lvis_v0.5_instance.py +++ b/configs/_base_/datasets/lvis_v0.5_instance.py @@ -2,16 +2,23 @@ dataset_type = 'LVISV05Dataset' data_root = 'data/lvis_v0.5/' -# file_client_args = dict( +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/lvis_v0.5/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( # backend='petrel', # path_mapping=dict({ # './data/': 's3://openmmlab/datasets/detection/', # 'data/': 's3://openmmlab/datasets/detection/' # })) -file_client_args = dict(backend='disk') +backend_args = None train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( type='RandomChoiceResize', @@ -22,7 +29,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( @@ -46,7 +53,8 @@ ann_file='annotations/lvis_v0.5_train.json', data_prefix=dict(img='train2017/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline))) + pipeline=train_pipeline, + backend_args=backend_args))) val_dataloader = dict( batch_size=1, num_workers=2, @@ -59,11 +67,13 @@ ann_file='annotations/lvis_v0.5_val.json', data_prefix=dict(img='val2017/'), test_mode=True, - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader val_evaluator = dict( type='LVISMetric', ann_file=data_root + 'annotations/lvis_v0.5_val.json', - metric=['bbox', 'segm']) + metric=['bbox', 'segm'], + backend_args=backend_args) test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/objects365v1_detection.py b/configs/_base_/datasets/objects365v1_detection.py index 7112f67c338..ee398698608 100644 --- a/configs/_base_/datasets/objects365v1_detection.py +++ b/configs/_base_/datasets/objects365v1_detection.py @@ -2,23 +2,30 @@ dataset_type = 'Objects365V1Dataset' data_root = 'data/Objects365/Obj365_v1/' -# file_client_args = dict( +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/coco/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( # backend='petrel', # path_mapping=dict({ # './data/': 's3://openmmlab/datasets/detection/', # 'data/': 's3://openmmlab/datasets/detection/' # })) -file_client_args = dict(backend='disk') +backend_args = None train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict(type='RandomFlip', prob=0.5), dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=(1333, 800), keep_ratio=True), # If you don't have a gt annotation, delete the pipeline dict(type='LoadAnnotations', with_bbox=True), @@ -39,7 +46,8 @@ ann_file='annotations/objects365_train.json', data_prefix=dict(img='train/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline)) + pipeline=train_pipeline, + backend_args=backend_args)) val_dataloader = dict( batch_size=1, num_workers=2, @@ -52,7 +60,8 @@ ann_file='annotations/objects365_val.json', data_prefix=dict(img='val/'), test_mode=True, - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader val_evaluator = dict( @@ -60,5 +69,6 @@ ann_file=data_root + 'annotations/objects365_val.json', metric='bbox', sort_categories=True, - format_only=False) + format_only=False, + backend_args=backend_args) test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/objects365v2_detection.py b/configs/_base_/datasets/objects365v2_detection.py index 017d8c01a62..b25a7ba901b 100644 --- a/configs/_base_/datasets/objects365v2_detection.py +++ b/configs/_base_/datasets/objects365v2_detection.py @@ -2,23 +2,30 @@ dataset_type = 'Objects365V2Dataset' data_root = 'data/Objects365/Obj365_v2/' -# file_client_args = dict( +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/coco/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( # backend='petrel', # path_mapping=dict({ # './data/': 's3://openmmlab/datasets/detection/', # 'data/': 's3://openmmlab/datasets/detection/' # })) -file_client_args = dict(backend='disk') +backend_args = None train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict(type='RandomFlip', prob=0.5), dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=(1333, 800), keep_ratio=True), # If you don't have a gt annotation, delete the pipeline dict(type='LoadAnnotations', with_bbox=True), @@ -39,7 +46,8 @@ ann_file='annotations/zhiyuan_objv2_train.json', data_prefix=dict(img='train/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline)) + pipeline=train_pipeline, + backend_args=backend_args)) val_dataloader = dict( batch_size=1, num_workers=2, @@ -52,12 +60,14 @@ ann_file='annotations/zhiyuan_objv2_val.json', data_prefix=dict(img='val/'), test_mode=True, - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader val_evaluator = dict( type='CocoMetric', ann_file=data_root + 'annotations/zhiyuan_objv2_val.json', metric='bbox', - format_only=False) + format_only=False, + backend_args=backend_args) test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/openimages_detection.py b/configs/_base_/datasets/openimages_detection.py index 9d99fb27800..129661b405c 100644 --- a/configs/_base_/datasets/openimages_detection.py +++ b/configs/_base_/datasets/openimages_detection.py @@ -2,24 +2,30 @@ dataset_type = 'OpenImagesDataset' data_root = 'data/OpenImages/' -# file_client_args = dict( +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/coco/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( # backend='petrel', # path_mapping=dict({ # './data/': 's3://openmmlab/datasets/detection/', # 'data/': 's3://openmmlab/datasets/detection/' # })) - -file_client_args = dict(backend='disk') +backend_args = None train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True), dict(type='Resize', scale=(1024, 800), keep_ratio=True), dict(type='RandomFlip', prob=0.5), dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=(1024, 800), keep_ratio=True), # avoid bboxes being resized dict(type='LoadAnnotations', with_bbox=True), @@ -44,7 +50,8 @@ label_file='annotations/class-descriptions-boxable.csv', hierarchy_file='annotations/bbox_labels_600_hierarchy.json', meta_file='annotations/train-image-metas.pkl', - pipeline=train_pipeline)) + pipeline=train_pipeline, + backend_args=backend_args)) val_dataloader = dict( batch_size=1, num_workers=0, @@ -61,7 +68,8 @@ meta_file='annotations/validation-image-metas.pkl', image_level_ann_file='annotations/validation-' 'annotations-human-imagelabels-boxable.csv', - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader val_evaluator = dict( diff --git a/configs/_base_/datasets/semi_coco_detection.py b/configs/_base_/datasets/semi_coco_detection.py index 02b729804a2..694f25f841e 100644 --- a/configs/_base_/datasets/semi_coco_detection.py +++ b/configs/_base_/datasets/semi_coco_detection.py @@ -2,13 +2,20 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' -# file_client_args = dict( +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/coco/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( # backend='petrel', # path_mapping=dict({ # './data/': 's3://openmmlab/datasets/detection/', # 'data/': 's3://openmmlab/datasets/detection/' # })) -file_client_args = dict(backend='disk') +backend_args = None color_space = [ [dict(type='ColorTransform')], @@ -36,7 +43,7 @@ # pipeline used to augment labeled data, # which will be sent to student model for supervised training. sup_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True), dict(type='RandomResize', scale=scale, keep_ratio=True), dict(type='RandomFlip', prob=0.5), @@ -82,7 +89,7 @@ # pipeline used to augment unlabeled data into different views unsup_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadEmptyAnnotations'), dict( type='MultiBranch', @@ -93,7 +100,7 @@ ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict( type='PackDetInputs', @@ -122,7 +129,8 @@ ann_file='annotations/instances_train2017.json', data_prefix=dict(img='train2017/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=sup_pipeline) + pipeline=sup_pipeline, + backend_args=backend_args) unlabeled_dataset = dict( type=dataset_type, @@ -130,7 +138,8 @@ ann_file='annotations/instances_unlabeled2017.json', data_prefix=dict(img='unlabeled2017/'), filter_cfg=dict(filter_empty_gt=False), - pipeline=unsup_pipeline) + pipeline=unsup_pipeline, + backend_args=backend_args) train_dataloader = dict( batch_size=batch_size, @@ -155,7 +164,8 @@ ann_file='annotations/instances_val2017.json', data_prefix=dict(img='val2017/'), test_mode=True, - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader @@ -163,5 +173,6 @@ type='CocoMetric', ann_file=data_root + 'annotations/instances_val2017.json', metric='bbox', - format_only=False) + format_only=False, + backend_args=backend_args) test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/voc0712.py b/configs/_base_/datasets/voc0712.py index 34330e40400..47f5e6563b7 100644 --- a/configs/_base_/datasets/voc0712.py +++ b/configs/_base_/datasets/voc0712.py @@ -2,23 +2,30 @@ dataset_type = 'VOCDataset' data_root = 'data/VOCdevkit/' -# file_client_args = dict( +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically Infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/segmentation/VOCdevkit/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( # backend='petrel', # path_mapping=dict({ -# './data/': 's3://openmmlab/datasets/detection/', -# 'data/': 's3://openmmlab/datasets/detection/' +# './data/': 's3://openmmlab/datasets/segmentation/', +# 'data/': 's3://openmmlab/datasets/segmentation/' # })) -file_client_args = dict(backend='disk') +backend_args = None train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True), dict(type='Resize', scale=(1000, 600), keep_ratio=True), dict(type='RandomFlip', prob=0.5), dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=(1000, 600), keep_ratio=True), # avoid bboxes being resized dict(type='LoadAnnotations', with_bbox=True), @@ -50,7 +57,8 @@ data_prefix=dict(sub_data_root='VOC2007/'), filter_cfg=dict( filter_empty_gt=True, min_size=32, bbox_min_size=32), - pipeline=train_pipeline), + pipeline=train_pipeline, + backend_args=backend_args), dict( type=dataset_type, data_root=data_root, @@ -58,7 +66,8 @@ data_prefix=dict(sub_data_root='VOC2012/'), filter_cfg=dict( filter_empty_gt=True, min_size=32, bbox_min_size=32), - pipeline=train_pipeline) + pipeline=train_pipeline, + backend_args=backend_args) ]))) val_dataloader = dict( @@ -73,7 +82,8 @@ ann_file='VOC2007/ImageSets/Main/test.txt', data_prefix=dict(sub_data_root='VOC2007/'), test_mode=True, - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader # Pascal VOC2007 uses `11points` as default evaluate mode, while PASCAL diff --git a/configs/albu_example/mask-rcnn_r50_fpn_albu-1x_coco.py b/configs/albu_example/mask-rcnn_r50_fpn_albu-1x_coco.py index 8a797d41fe5..b8a2780e99b 100644 --- a/configs/albu_example/mask-rcnn_r50_fpn_albu-1x_coco.py +++ b/configs/albu_example/mask-rcnn_r50_fpn_albu-1x_coco.py @@ -41,9 +41,7 @@ p=0.1), ] train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict( diff --git a/configs/centernet/centernet-update_r50-caffe_fpn_ms-1x_coco.py b/configs/centernet/centernet-update_r50-caffe_fpn_ms-1x_coco.py index 5e5a24ee5e4..1f6e2b3919d 100644 --- a/configs/centernet/centernet-update_r50-caffe_fpn_ms-1x_coco.py +++ b/configs/centernet/centernet-update_r50-caffe_fpn_ms-1x_coco.py @@ -64,9 +64,7 @@ # single-scale training is about 39.3 train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomChoiceResize', diff --git a/configs/centernet/centernet_r18-dcnv2_8xb16-crop512-140e_coco.py b/configs/centernet/centernet_r18-dcnv2_8xb16-crop512-140e_coco.py index 83b07195971..732a55d59ad 100644 --- a/configs/centernet/centernet_r18-dcnv2_8xb16-crop512-140e_coco.py +++ b/configs/centernet/centernet_r18-dcnv2_8xb16-crop512-140e_coco.py @@ -39,9 +39,7 @@ test_cfg=dict(topk=100, local_maximum_kernel=3, max_per_img=100)) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='PhotoMetricDistortion', @@ -67,8 +65,8 @@ test_pipeline = [ dict( type='LoadImageFromFile', - to_float32=True, - file_client_args={{_base_.file_client_args}}), + backend_args={{_base_.backend_args}}, + to_float32=True), # don't need Resize dict( type='RandomCenterCropPad', @@ -102,7 +100,9 @@ ann_file='annotations/instances_train2017.json', data_prefix=dict(img='train2017/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline))) + pipeline=train_pipeline, + backend_args={{_base_.backend_args}}, + ))) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = val_dataloader diff --git a/configs/centernet/centernet_tta.py b/configs/centernet/centernet_tta.py index 0c68914267e..edd7b03ecde 100644 --- a/configs/centernet/centernet_tta.py +++ b/configs/centernet/centernet_tta.py @@ -5,10 +5,7 @@ tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.5), max_per_img=100)) tta_pipeline = [ - dict( - type='LoadImageFromFile', - to_float32=True, - file_client_args=dict(backend='disk')), + dict(type='LoadImageFromFile', to_float32=True, backend_args=None), dict( type='TestTimeAug', transforms=[ diff --git a/configs/centripetalnet/centripetalnet_hourglass104_16xb6-crop511-210e-mstest_coco.py b/configs/centripetalnet/centripetalnet_hourglass104_16xb6-crop511-210e-mstest_coco.py index 043496f3da2..dd629edb2e8 100644 --- a/configs/centripetalnet/centripetalnet_hourglass104_16xb6-crop511-210e-mstest_coco.py +++ b/configs/centripetalnet/centripetalnet_hourglass104_16xb6-crop511-210e-mstest_coco.py @@ -45,9 +45,7 @@ # data settings train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='PhotoMetricDistortion', @@ -77,7 +75,7 @@ dict( type='LoadImageFromFile', to_float32=True, - file_client_args={{_base_.file_client_args}}), + backend_args={{_base_.backend_args}}), # don't need Resize dict( type='RandomCenterCropPad', diff --git a/configs/common/lsj-100e_coco-detection.py b/configs/common/lsj-100e_coco-detection.py index b03e33809da..bb631e5d5c1 100644 --- a/configs/common/lsj-100e_coco-detection.py +++ b/configs/common/lsj-100e_coco-detection.py @@ -4,17 +4,23 @@ data_root = 'data/coco/' image_size = (1024, 1024) -file_client_args = dict(backend='disk') -# comment out the code below to use different file client -# file_client_args = dict( +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/coco/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( # backend='petrel', # path_mapping=dict({ # './data/': 's3://openmmlab/datasets/detection/', # 'data/': 's3://openmmlab/datasets/detection/' # })) +backend_args = None train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', @@ -32,7 +38,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True), dict( @@ -56,7 +62,8 @@ ann_file='annotations/instances_train2017.json', data_prefix=dict(img='train2017/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline))) + pipeline=train_pipeline, + backend_args=backend_args))) val_dataloader = dict( batch_size=1, num_workers=2, @@ -69,14 +76,16 @@ ann_file='annotations/instances_val2017.json', data_prefix=dict(img='val2017/'), test_mode=True, - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader val_evaluator = dict( type='CocoMetric', ann_file=data_root + 'annotations/instances_val2017.json', metric='bbox', - format_only=False) + format_only=False, + backend_args=backend_args) test_evaluator = val_evaluator max_epochs = 25 diff --git a/configs/common/lsj-100e_coco-instance.py b/configs/common/lsj-100e_coco-instance.py index b00ab686126..6e62729d639 100644 --- a/configs/common/lsj-100e_coco-instance.py +++ b/configs/common/lsj-100e_coco-instance.py @@ -4,17 +4,23 @@ data_root = 'data/coco/' image_size = (1024, 1024) -file_client_args = dict(backend='disk') -# comment out the code below to use different file client -# file_client_args = dict( +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/coco/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( # backend='petrel', # path_mapping=dict({ # './data/': 's3://openmmlab/datasets/detection/', # 'data/': 's3://openmmlab/datasets/detection/' # })) +backend_args = None train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( type='RandomResize', @@ -32,7 +38,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( @@ -56,7 +62,8 @@ ann_file='annotations/instances_train2017.json', data_prefix=dict(img='train2017/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline))) + pipeline=train_pipeline, + backend_args=backend_args))) val_dataloader = dict( batch_size=1, num_workers=2, @@ -69,14 +76,16 @@ ann_file='annotations/instances_val2017.json', data_prefix=dict(img='val2017/'), test_mode=True, - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader val_evaluator = dict( type='CocoMetric', ann_file=data_root + 'annotations/instances_val2017.json', metric=['bbox', 'segm'], - format_only=False) + format_only=False, + backend_args=backend_args) test_evaluator = val_evaluator max_epochs = 25 diff --git a/configs/common/ms-90k_coco.py b/configs/common/ms-90k_coco.py index 7d7b5f35975..e2d6c3dafb6 100644 --- a/configs/common/ms-90k_coco.py +++ b/configs/common/ms-90k_coco.py @@ -3,20 +3,27 @@ # dataset settings dataset_type = 'CocoDataset' data_root = 'data/coco/' -# file_client_args = dict( +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/coco/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( # backend='petrel', # path_mapping=dict({ # './data/': 's3://openmmlab/datasets/detection/', # 'data/': 's3://openmmlab/datasets/detection/' # })) -file_client_args = dict(backend='disk') +backend_args = None # Align with Detectron2 backend = 'pillow' train_pipeline = [ dict( type='LoadImageFromFile', - file_client_args=file_client_args, + backend_args=backend_args, imdecode_backend=backend), dict(type='LoadAnnotations', with_bbox=True), dict( @@ -31,7 +38,7 @@ test_pipeline = [ dict( type='LoadImageFromFile', - file_client_args=file_client_args, + backend_args=backend_args, imdecode_backend=backend), dict(type='Resize', scale=(1333, 800), keep_ratio=True, backend=backend), dict(type='LoadAnnotations', with_bbox=True), @@ -53,7 +60,8 @@ ann_file='annotations/instances_train2017.json', data_prefix=dict(img='train2017/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline)) + pipeline=train_pipeline, + backend_args=backend_args)) val_dataloader = dict( batch_size=1, num_workers=2, @@ -67,14 +75,16 @@ ann_file='annotations/instances_val2017.json', data_prefix=dict(img='val2017/'), test_mode=True, - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader val_evaluator = dict( type='CocoMetric', ann_file=data_root + 'annotations/instances_val2017.json', metric='bbox', - format_only=False) + format_only=False, + backend_args=backend_args) test_evaluator = val_evaluator # training schedule for 90k diff --git a/configs/common/ms-poly-90k_coco-instance.py b/configs/common/ms-poly-90k_coco-instance.py index 2a2deb5bf00..d5566b3c3b8 100644 --- a/configs/common/ms-poly-90k_coco-instance.py +++ b/configs/common/ms-poly-90k_coco-instance.py @@ -3,20 +3,27 @@ # dataset settings dataset_type = 'CocoDataset' data_root = 'data/coco/' -# file_client_args = dict( +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/coco/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( # backend='petrel', # path_mapping=dict({ # './data/': 's3://openmmlab/datasets/detection/', # 'data/': 's3://openmmlab/datasets/detection/' # })) -file_client_args = dict(backend='disk') +backend_args = None # Align with Detectron2 backend = 'pillow' train_pipeline = [ dict( type='LoadImageFromFile', - file_client_args=file_client_args, + backend_args=backend_args, imdecode_backend=backend), dict( type='LoadAnnotations', @@ -35,7 +42,7 @@ test_pipeline = [ dict( type='LoadImageFromFile', - file_client_args=file_client_args, + backend_args=backend_args, imdecode_backend=backend), dict(type='Resize', scale=(1333, 800), keep_ratio=True, backend=backend), dict( @@ -61,7 +68,8 @@ ann_file='annotations/instances_train2017.json', data_prefix=dict(img='train2017/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline)) + pipeline=train_pipeline, + backend_args=backend_args)) val_dataloader = dict( batch_size=1, num_workers=2, @@ -75,14 +83,16 @@ ann_file='annotations/instances_val2017.json', data_prefix=dict(img='val2017/'), test_mode=True, - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader val_evaluator = dict( type='CocoMetric', ann_file=data_root + 'annotations/instances_val2017.json', metric=['bbox', 'segm'], - format_only=False) + format_only=False, + backend_args=backend_args) test_evaluator = val_evaluator # training schedule for 90k diff --git a/configs/common/ms-poly_3x_coco-instance.py b/configs/common/ms-poly_3x_coco-instance.py index 6a3d5e5569d..04072f9b84c 100644 --- a/configs/common/ms-poly_3x_coco-instance.py +++ b/configs/common/ms-poly_3x_coco-instance.py @@ -3,18 +3,25 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' -# file_client_args = dict( +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/coco/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( # backend='petrel', # path_mapping=dict({ # './data/': 's3://openmmlab/datasets/detection/', # 'data/': 's3://openmmlab/datasets/detection/' # })) -file_client_args = dict(backend='disk') +backend_args = None # In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)], # multiscale_mode='range' train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict( type='LoadAnnotations', with_bbox=True, @@ -27,7 +34,7 @@ dict(type='PackDetInputs'), ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict( type='LoadAnnotations', @@ -55,7 +62,8 @@ ann_file='annotations/instances_train2017.json', data_prefix=dict(img='train2017/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline))) + pipeline=train_pipeline, + backend_args=backend_args))) val_dataloader = dict( batch_size=2, num_workers=2, @@ -68,13 +76,15 @@ ann_file='annotations/instances_val2017.json', data_prefix=dict(img='val2017/'), test_mode=True, - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader val_evaluator = dict( type='CocoMetric', ann_file=data_root + 'annotations/instances_val2017.json', - metric=['bbox', 'segm']) + metric=['bbox', 'segm'], + backend_args=backend_args) test_evaluator = val_evaluator # training schedule for 3x with `RepeatDataset` diff --git a/configs/common/ms_3x_coco-instance.py b/configs/common/ms_3x_coco-instance.py index cae37d176ac..840a2437b30 100644 --- a/configs/common/ms_3x_coco-instance.py +++ b/configs/common/ms_3x_coco-instance.py @@ -4,16 +4,23 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' -# file_client_args = dict( +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/coco/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( # backend='petrel', # path_mapping=dict({ # './data/': 's3://openmmlab/datasets/detection/', # 'data/': 's3://openmmlab/datasets/detection/' # })) -file_client_args = dict(backend='disk') +backend_args = None train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( type='RandomResize', scale=[(1333, 640), (1333, 800)], @@ -22,7 +29,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( @@ -42,7 +49,8 @@ ann_file='annotations/instances_train2017.json', data_prefix=dict(img='train2017/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline)) + pipeline=train_pipeline, + backend_args=backend_args)) val_dataloader = dict( batch_size=2, num_workers=2, @@ -58,13 +66,15 @@ ann_file='annotations/instances_val2017.json', data_prefix=dict(img='val2017/'), test_mode=True, - pipeline=test_pipeline))) + pipeline=test_pipeline, + backend_args=backend_args))) test_dataloader = val_dataloader val_evaluator = dict( type='CocoMetric', ann_file=data_root + 'annotations/instances_val2017.json', - metric='bbox') + metric='bbox', + backend_args=backend_args) test_evaluator = val_evaluator # training schedule for 3x with `RepeatDataset` diff --git a/configs/common/ms_3x_coco.py b/configs/common/ms_3x_coco.py index 0ca42634478..facbb34cf05 100644 --- a/configs/common/ms_3x_coco.py +++ b/configs/common/ms_3x_coco.py @@ -4,16 +4,23 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' -# file_client_args = dict( +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/coco/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( # backend='petrel', # path_mapping=dict({ # './data/': 's3://openmmlab/datasets/detection/', # 'data/': 's3://openmmlab/datasets/detection/' # })) -file_client_args = dict(backend='disk') +backend_args = None train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', scale=[(1333, 640), (1333, 800)], @@ -22,7 +29,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True), dict( @@ -45,7 +52,8 @@ ann_file='annotations/instances_train2017.json', data_prefix=dict(img='train2017/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline))) + pipeline=train_pipeline, + backend_args=backend_args))) val_dataloader = dict( batch_size=1, num_workers=2, @@ -58,13 +66,15 @@ ann_file='annotations/instances_val2017.json', data_prefix=dict(img='val2017/'), test_mode=True, - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader val_evaluator = dict( type='CocoMetric', ann_file=data_root + 'annotations/instances_val2017.json', - metric='bbox') + metric='bbox', + backend_args=backend_args) test_evaluator = val_evaluator # training schedule for 3x with `RepeatDataset` diff --git a/configs/common/ssj_270k_coco-instance.py b/configs/common/ssj_270k_coco-instance.py index 677f375e1a1..7407644fd59 100644 --- a/configs/common/ssj_270k_coco-instance.py +++ b/configs/common/ssj_270k_coco-instance.py @@ -5,19 +5,25 @@ image_size = (1024, 1024) -file_client_args = dict(backend='disk') -# comment out the code below to use different file client -# file_client_args = dict( +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/coco/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( # backend='petrel', # path_mapping=dict({ # './data/': 's3://openmmlab/datasets/detection/', # 'data/': 's3://openmmlab/datasets/detection/' # })) +backend_args = None # Standard Scale Jittering (SSJ) resizes and crops an image # with a resize range of 0.8 to 1.25 of the original image size. train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( type='RandomResize', @@ -35,7 +41,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( @@ -55,7 +61,8 @@ ann_file='annotations/instances_train2017.json', data_prefix=dict(img='train2017/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline)) + pipeline=train_pipeline, + backend_args=backend_args)) val_dataloader = dict( batch_size=1, num_workers=2, @@ -68,14 +75,16 @@ ann_file='annotations/instances_val2017.json', data_prefix=dict(img='val2017/'), test_mode=True, - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader val_evaluator = dict( type='CocoMetric', ann_file=data_root + 'annotations/instances_val2017.json', metric=['bbox', 'segm'], - format_only=False) + format_only=False, + backend_args=backend_args) test_evaluator = val_evaluator # The model is trained by 270k iterations with batch_size 64, diff --git a/configs/common/ssj_scp_270k_coco-instance.py b/configs/common/ssj_scp_270k_coco-instance.py index 2289f2f6234..06159dd4031 100644 --- a/configs/common/ssj_scp_270k_coco-instance.py +++ b/configs/common/ssj_scp_270k_coco-instance.py @@ -5,19 +5,25 @@ image_size = (1024, 1024) -file_client_args = dict(backend='disk') -# comment out the code below to use different file client -# file_client_args = dict( +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/coco/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( # backend='petrel', # path_mapping=dict({ # './data/': 's3://openmmlab/datasets/detection/', # 'data/': 's3://openmmlab/datasets/detection/' # })) +backend_args = None # Standard Scale Jittering (SSJ) resizes and crops an image # with a resize range of 0.8 to 1.25 of the original image size. load_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( type='RandomResize', @@ -49,5 +55,6 @@ ann_file='annotations/instances_train2017.json', data_prefix=dict(img='train2017/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=load_pipeline), + pipeline=load_pipeline, + backend_args=backend_args), pipeline=train_pipeline)) diff --git a/configs/convnext/cascade-mask-rcnn_convnext-t-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py b/configs/convnext/cascade-mask-rcnn_convnext-t-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py index 53edb391921..1e031e90d52 100644 --- a/configs/convnext/cascade-mask-rcnn_convnext-t-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py +++ b/configs/convnext/cascade-mask-rcnn_convnext-t-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco.py @@ -85,9 +85,7 @@ # augmentation strategy originates from DETR / Sparse RCNN train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict(type='RandomFlip', prob=0.5), dict( diff --git a/configs/convnext/mask-rcnn_convnext-t-p4-w7_fpn_amp-ms-crop-3x_coco.py b/configs/convnext/mask-rcnn_convnext-t-p4-w7_fpn_amp-ms-crop-3x_coco.py index e9932c44b03..23d46e289eb 100644 --- a/configs/convnext/mask-rcnn_convnext-t-p4-w7_fpn_amp-ms-crop-3x_coco.py +++ b/configs/convnext/mask-rcnn_convnext-t-p4-w7_fpn_amp-ms-crop-3x_coco.py @@ -26,9 +26,7 @@ # augmentation strategy originates from DETR / Sparse RCNN train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict(type='RandomFlip', prob=0.5), dict( diff --git a/configs/cornernet/cornernet_hourglass104_8xb6-210e-mstest_coco.py b/configs/cornernet/cornernet_hourglass104_8xb6-210e-mstest_coco.py index 38c43a1c9f2..20751ef4af1 100644 --- a/configs/cornernet/cornernet_hourglass104_8xb6-210e-mstest_coco.py +++ b/configs/cornernet/cornernet_hourglass104_8xb6-210e-mstest_coco.py @@ -45,9 +45,7 @@ # data settings train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='PhotoMetricDistortion', @@ -78,7 +76,8 @@ dict( type='LoadImageFromFile', to_float32=True, - file_client_args={{_base_.file_client_args}}), + backend_args={{_base_.backend_args}}, + ), # don't need Resize dict( type='RandomCenterCropPad', diff --git a/configs/crowddet/crowddet-rcnn_r50_fpn_8xb2-30e_crowdhuman.py b/configs/crowddet/crowddet-rcnn_r50_fpn_8xb2-30e_crowdhuman.py index 97ec2db3c01..8815be77d49 100644 --- a/configs/crowddet/crowddet-rcnn_r50_fpn_8xb2-30e_crowdhuman.py +++ b/configs/crowddet/crowddet-rcnn_r50_fpn_8xb2-30e_crowdhuman.py @@ -132,9 +132,24 @@ dataset_type = 'CrowdHumanDataset' data_root = 'data/CrowdHuman/' -file_client_args = dict(backend='disk') + +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/tracking/CrowdHuman/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# './data/': 's3://openmmlab/datasets/tracking/', +# 'data/': 's3://openmmlab/datasets/tracking/' +# })) +backend_args = None + train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True), dict(type='RandomFlip', prob=0.5), dict( @@ -143,7 +158,7 @@ 'flip_direction')) ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=(1400, 800), keep_ratio=True), # avoid bboxes being resized dict(type='LoadAnnotations', with_bbox=True), @@ -165,7 +180,8 @@ ann_file='annotation_train.odgt', data_prefix=dict(img='Images/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline)) + pipeline=train_pipeline, + backend_args=backend_args)) val_dataloader = dict( batch_size=1, num_workers=2, @@ -178,13 +194,15 @@ ann_file='annotation_val.odgt', data_prefix=dict(img='Images/'), test_mode=True, - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader val_evaluator = dict( type='CrowdHumanMetric', ann_file=data_root + 'annotation_val.odgt', - metric=['AP', 'MR', 'JI']) + metric=['AP', 'MR', 'JI'], + backend_args=backend_args) test_evaluator = val_evaluator train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=30, val_interval=1) diff --git a/configs/dab_detr/dab-detr_r50_8xb2-50e_coco.py b/configs/dab_detr/dab-detr_r50_8xb2-50e_coco.py index 723f7b1340e..314ed97e2d8 100644 --- a/configs/dab_detr/dab-detr_r50_8xb2-50e_coco.py +++ b/configs/dab_detr/dab-detr_r50_8xb2-50e_coco.py @@ -93,9 +93,7 @@ # train_pipeline, NOTE the img_scale and the Pad's size_divisor is different # from the default setting in mmdet. train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict(type='RandomFlip', prob=0.5), dict( diff --git a/configs/deformable_detr/deformable-detr_r50_16xb2-50e_coco.py b/configs/deformable_detr/deformable-detr_r50_16xb2-50e_coco.py index b2f064cc511..e0dee411c8e 100644 --- a/configs/deformable_detr/deformable-detr_r50_16xb2-50e_coco.py +++ b/configs/deformable_detr/deformable-detr_r50_16xb2-50e_coco.py @@ -81,9 +81,7 @@ # train_pipeline, NOTE the img_scale and the Pad's size_divisor is different # from the default setting in mmdet. train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict(type='RandomFlip', prob=0.5), dict( diff --git a/configs/detr/detr_r50_8xb2-150e_coco.py b/configs/detr/detr_r50_8xb2-150e_coco.py index 1aba1c3c1ca..aaa15410532 100644 --- a/configs/detr/detr_r50_8xb2-150e_coco.py +++ b/configs/detr/detr_r50_8xb2-150e_coco.py @@ -89,9 +89,7 @@ # train_pipeline, NOTE the img_scale and the Pad's size_divisor is different # from the default setting in mmdet. train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict(type='RandomFlip', prob=0.5), dict( diff --git a/configs/dino/dino-4scale_r50_8xb2-12e_coco.py b/configs/dino/dino-4scale_r50_8xb2-12e_coco.py index eb5f2a44704..5831f898b4a 100644 --- a/configs/dino/dino-4scale_r50_8xb2-12e_coco.py +++ b/configs/dino/dino-4scale_r50_8xb2-12e_coco.py @@ -88,9 +88,7 @@ # train_pipeline, NOTE the img_scale and the Pad's size_divisor is different # from the default setting in mmdet. train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict(type='RandomFlip', prob=0.5), dict( diff --git a/configs/dyhead/atss_r50-caffe_fpn_dyhead_1x_coco.py b/configs/dyhead/atss_r50-caffe_fpn_dyhead_1x_coco.py index cbaf9a7c9b3..8716f1226cb 100644 --- a/configs/dyhead/atss_r50-caffe_fpn_dyhead_1x_coco.py +++ b/configs/dyhead/atss_r50-caffe_fpn_dyhead_1x_coco.py @@ -82,18 +82,14 @@ optim_wrapper = dict(optimizer=dict(lr=0.01)) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict(type='Resize', scale=(1333, 800), keep_ratio=True, backend='pillow'), dict(type='RandomFlip', prob=0.5), dict(type='PackDetInputs') ] test_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(1333, 800), keep_ratio=True, backend='pillow'), dict(type='LoadAnnotations', with_bbox=True), dict( diff --git a/configs/dyhead/atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco.py b/configs/dyhead/atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco.py index ffc7f44a745..f537b9dc9b1 100644 --- a/configs/dyhead/atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco.py +++ b/configs/dyhead/atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco.py @@ -90,9 +90,7 @@ # dataset settings train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', @@ -103,9 +101,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(2000, 1200), keep_ratio=True, backend='pillow'), dict(type='LoadAnnotations', with_bbox=True), dict( @@ -124,7 +120,8 @@ ann_file='annotations/instances_train2017.json', data_prefix=dict(img='train2017/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline))) + pipeline=train_pipeline, + backend_args={{_base_.backend_args}}))) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = val_dataloader diff --git a/configs/efficientnet/retinanet_effb3_fpn_8xb4-crop896-1x_coco.py b/configs/efficientnet/retinanet_effb3_fpn_8xb4-crop896-1x_coco.py index 039ed5fdc05..2d0d9cefd0b 100644 --- a/configs/efficientnet/retinanet_effb3_fpn_8xb4-crop896-1x_coco.py +++ b/configs/efficientnet/retinanet_effb3_fpn_8xb4-crop896-1x_coco.py @@ -41,9 +41,7 @@ # dataset settings train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', @@ -55,9 +53,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=image_size, keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True), dict( diff --git a/configs/fast_rcnn/README.md b/configs/fast_rcnn/README.md index 91342474482..cd582ec8c6f 100644 --- a/configs/fast_rcnn/README.md +++ b/configs/fast_rcnn/README.md @@ -68,7 +68,7 @@ The `pred_instance` is an `InstanceData` containing the sorted boxes and scores train_pipeline = [ dict( type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + backend_args={{_base_.backend_args}}), dict(type='LoadProposals', num_max_proposals=2000), dict(type='LoadAnnotations', with_bbox=True), dict( @@ -82,7 +82,7 @@ The `pred_instance` is an `InstanceData` containing the sorted boxes and scores test_pipeline = [ dict( type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + backend_args={{_base_.backend_args}}), dict(type='LoadProposals', num_max_proposals=None), dict( type='ProposalBroadcaster', diff --git a/configs/fast_rcnn/fast-rcnn_r50_fpn_1x_coco.py b/configs/fast_rcnn/fast-rcnn_r50_fpn_1x_coco.py index 5008292330f..daefe2d2d28 100644 --- a/configs/fast_rcnn/fast-rcnn_r50_fpn_1x_coco.py +++ b/configs/fast_rcnn/fast-rcnn_r50_fpn_1x_coco.py @@ -4,9 +4,7 @@ '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' ] train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadProposals', num_max_proposals=2000), dict(type='LoadAnnotations', with_bbox=True), dict( @@ -18,9 +16,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadProposals', num_max_proposals=None), dict( type='ProposalBroadcaster', diff --git a/configs/fcos/fcos_r101-caffe_fpn_gn-head_ms-640-800-2x_coco.py b/configs/fcos/fcos_r101-caffe_fpn_gn-head_ms-640-800-2x_coco.py index 0b8039c1e71..859b45c94b2 100644 --- a/configs/fcos/fcos_r101-caffe_fpn_gn-head_ms-640-800-2x_coco.py +++ b/configs/fcos/fcos_r101-caffe_fpn_gn-head_ms-640-800-2x_coco.py @@ -10,9 +10,7 @@ # dataset settings train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomChoiceResize', diff --git a/configs/fcos/fcos_r50-caffe_fpn_gn-head_ms-640-800-2x_coco.py b/configs/fcos/fcos_r50-caffe_fpn_gn-head_ms-640-800-2x_coco.py index 9888dd8f25f..12e9160d812 100644 --- a/configs/fcos/fcos_r50-caffe_fpn_gn-head_ms-640-800-2x_coco.py +++ b/configs/fcos/fcos_r50-caffe_fpn_gn-head_ms-640-800-2x_coco.py @@ -2,9 +2,7 @@ # dataset settings train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomChoiceResize', diff --git a/configs/fcos/fcos_x101-64x4d_fpn_gn-head_ms-640-800-2x_coco.py b/configs/fcos/fcos_x101-64x4d_fpn_gn-head_ms-640-800-2x_coco.py index 3f58665dce5..aae1fceea58 100644 --- a/configs/fcos/fcos_x101-64x4d_fpn_gn-head_ms-640-800-2x_coco.py +++ b/configs/fcos/fcos_x101-64x4d_fpn_gn-head_ms-640-800-2x_coco.py @@ -24,9 +24,7 @@ # dataset settings train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomChoiceResize', diff --git a/configs/foveabox/fovea_r101_fpn_gn-head-align_ms-640-800-4xb4-2x_coco.py b/configs/foveabox/fovea_r101_fpn_gn-head-align_ms-640-800-4xb4-2x_coco.py index 1ab77bf7458..e1852d581fc 100644 --- a/configs/foveabox/fovea_r101_fpn_gn-head-align_ms-640-800-4xb4-2x_coco.py +++ b/configs/foveabox/fovea_r101_fpn_gn-head-align_ms-640-800-4xb4-2x_coco.py @@ -8,9 +8,7 @@ with_deform=True, norm_cfg=dict(type='GN', num_groups=32, requires_grad=True))) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomChoiceResize', diff --git a/configs/foveabox/fovea_r50_fpn_gn-head-align_ms-640-800-4xb4-2x_coco.py b/configs/foveabox/fovea_r50_fpn_gn-head-align_ms-640-800-4xb4-2x_coco.py index be240259f3a..5690bcae08c 100644 --- a/configs/foveabox/fovea_r50_fpn_gn-head-align_ms-640-800-4xb4-2x_coco.py +++ b/configs/foveabox/fovea_r50_fpn_gn-head-align_ms-640-800-4xb4-2x_coco.py @@ -4,9 +4,7 @@ with_deform=True, norm_cfg=dict(type='GN', num_groups=32, requires_grad=True))) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomChoiceResize', diff --git a/configs/fpg/faster-rcnn_r50_fpn_crop640-50e_coco.py b/configs/fpg/faster-rcnn_r50_fpn_crop640-50e_coco.py index 019105dbfac..46211de03f3 100644 --- a/configs/fpg/faster-rcnn_r50_fpn_crop640-50e_coco.py +++ b/configs/fpg/faster-rcnn_r50_fpn_crop640-50e_coco.py @@ -16,9 +16,7 @@ data_root = 'data/coco/' train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', @@ -35,9 +33,7 @@ ] test_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=image_size, keep_ratio=True), dict( type='PackDetInputs', diff --git a/configs/fpg/mask-rcnn_r50_fpn_crop640-50e_coco.py b/configs/fpg/mask-rcnn_r50_fpn_crop640-50e_coco.py index baaf9a4b1e5..08ca5b6ffd8 100644 --- a/configs/fpg/mask-rcnn_r50_fpn_crop640-50e_coco.py +++ b/configs/fpg/mask-rcnn_r50_fpn_crop640-50e_coco.py @@ -22,9 +22,7 @@ data_root = 'data/coco/' train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( type='RandomResize', @@ -41,9 +39,7 @@ ] test_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=image_size, keep_ratio=True), dict( type='PackDetInputs', diff --git a/configs/gfl/gfl_r50_fpn_ms-2x_coco.py b/configs/gfl/gfl_r50_fpn_ms-2x_coco.py index cb1137e01df..22770eb1019 100644 --- a/configs/gfl/gfl_r50_fpn_ms-2x_coco.py +++ b/configs/gfl/gfl_r50_fpn_ms-2x_coco.py @@ -17,9 +17,7 @@ # multi-scale training train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', scale=[(1333, 480), (1333, 800)], diff --git a/configs/guided_anchoring/ga-retinanet_r101-caffe_fpn_ms-2x.py b/configs/guided_anchoring/ga-retinanet_r101-caffe_fpn_ms-2x.py index 459a1900241..012e89b8338 100644 --- a/configs/guided_anchoring/ga-retinanet_r101-caffe_fpn_ms-2x.py +++ b/configs/guided_anchoring/ga-retinanet_r101-caffe_fpn_ms-2x.py @@ -1,9 +1,7 @@ _base_ = './ga-retinanet_r101-caffe_fpn_1x_coco.py' train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', scale=[(1333, 480), (1333, 960)], diff --git a/configs/hrnet/fcos_hrnetv2p-w32-gn-head_ms-640-800-4xb4-2x_coco.py b/configs/hrnet/fcos_hrnetv2p-w32-gn-head_ms-640-800-4xb4-2x_coco.py index 3c107c8f1b7..4c977bf31ed 100644 --- a/configs/hrnet/fcos_hrnetv2p-w32-gn-head_ms-640-800-4xb4-2x_coco.py +++ b/configs/hrnet/fcos_hrnetv2p-w32-gn-head_ms-640-800-4xb4-2x_coco.py @@ -7,9 +7,7 @@ bgr_to_rgb=False)) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomChoiceResize', diff --git a/configs/htc/htc_r50_fpn_1x_coco.py b/configs/htc/htc_r50_fpn_1x_coco.py index 03ddb61ab1d..3573f1f6980 100644 --- a/configs/htc/htc_r50_fpn_1x_coco.py +++ b/configs/htc/htc_r50_fpn_1x_coco.py @@ -20,9 +20,7 @@ type='CrossEntropyLoss', ignore_index=255, loss_weight=0.2)))) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict( type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), dict(type='Resize', scale=(1333, 800), keep_ratio=True), diff --git a/configs/instaboost/cascade-mask-rcnn_r50_fpn_instaboost-4x_coco.py b/configs/instaboost/cascade-mask-rcnn_r50_fpn_instaboost-4x_coco.py index 00165fb0342..f7736cf5756 100644 --- a/configs/instaboost/cascade-mask-rcnn_r50_fpn_instaboost-4x_coco.py +++ b/configs/instaboost/cascade-mask-rcnn_r50_fpn_instaboost-4x_coco.py @@ -1,9 +1,7 @@ _base_ = '../cascade_rcnn/cascade-mask-rcnn_r50_fpn_1x_coco.py' train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict( type='InstaBoost', action_candidate=('normal', 'horizontal', 'skip'), diff --git a/configs/instaboost/mask-rcnn_r50_fpn_instaboost-4x_coco.py b/configs/instaboost/mask-rcnn_r50_fpn_instaboost-4x_coco.py index 4e90eda8387..0a8c9be81f0 100644 --- a/configs/instaboost/mask-rcnn_r50_fpn_instaboost-4x_coco.py +++ b/configs/instaboost/mask-rcnn_r50_fpn_instaboost-4x_coco.py @@ -1,9 +1,7 @@ _base_ = '../mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py' train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict( type='InstaBoost', action_candidate=('normal', 'horizontal', 'skip'), diff --git a/configs/ld/ld_r101-gflv1-r101-dcn_fpn_2x_coco.py b/configs/ld/ld_r101-gflv1-r101-dcn_fpn_2x_coco.py index 681c9e086c2..a7e928bdc23 100644 --- a/configs/ld/ld_r101-gflv1-r101-dcn_fpn_2x_coco.py +++ b/configs/ld/ld_r101-gflv1-r101-dcn_fpn_2x_coco.py @@ -38,9 +38,7 @@ # multi-scale training train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', scale=[(1333, 480), (1333, 800)], diff --git a/configs/mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py b/configs/mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py index 1a20244299b..c53e981bf0d 100644 --- a/configs/mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py +++ b/configs/mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py @@ -150,12 +150,16 @@ # dataset settings data_root = 'data/coco/' train_pipeline = [ - dict(type='LoadImageFromFile', to_float32=True), + dict( + type='LoadImageFromFile', + to_float32=True, + backend_args={{_base_.backend_args}}), dict( type='LoadPanopticAnnotations', with_bbox=True, with_mask=True, - with_seg=True), + with_seg=True, + backend_args={{_base_.backend_args}}), dict(type='RandomFlip', prob=0.5), # large scale jittering dict( @@ -179,12 +183,12 @@ type='CocoPanopticMetric', ann_file=data_root + 'annotations/panoptic_val2017.json', seg_prefix=data_root + 'annotations/panoptic_val2017/', - ), + backend_args={{_base_.backend_args}}), dict( type='CocoMetric', ann_file=data_root + 'annotations/instances_val2017.json', metric=['bbox', 'segm'], - ) + backend_args={{_base_.backend_args}}) ] test_evaluator = val_evaluator diff --git a/configs/mask2former/mask2former_r50_8xb2-lsj-50e_coco.py b/configs/mask2former/mask2former_r50_8xb2-lsj-50e_coco.py index 6bc9fd7472a..24a17f58c54 100644 --- a/configs/mask2former/mask2former_r50_8xb2-lsj-50e_coco.py +++ b/configs/mask2former/mask2former_r50_8xb2-lsj-50e_coco.py @@ -36,7 +36,10 @@ # dataset settings train_pipeline = [ - dict(type='LoadImageFromFile', to_float32=True), + dict( + type='LoadImageFromFile', + to_float32=True, + backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict(type='RandomFlip', prob=0.5), # large scale jittering @@ -57,7 +60,10 @@ ] test_pipeline = [ - dict(type='LoadImageFromFile', to_float32=True), + dict( + type='LoadImageFromFile', + to_float32=True, + backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(1333, 800), keep_ratio=True), # If you don't have a gt annotation, delete the pipeline dict(type='LoadAnnotations', with_bbox=True, with_mask=True), @@ -89,5 +95,6 @@ type='CocoMetric', ann_file=data_root + 'annotations/instances_val2017.json', metric=['bbox', 'segm'], - format_only=False) + format_only=False, + backend_args={{_base_.backend_args}}) test_evaluator = val_evaluator diff --git a/configs/mask_rcnn/mask-rcnn_r50-caffe_fpn_ms-1x_coco.py b/configs/mask_rcnn/mask-rcnn_r50-caffe_fpn_ms-1x_coco.py index 6c0f1bde7aa..7702ae14a9c 100644 --- a/configs/mask_rcnn/mask-rcnn_r50-caffe_fpn_ms-1x_coco.py +++ b/configs/mask_rcnn/mask-rcnn_r50-caffe_fpn_ms-1x_coco.py @@ -14,9 +14,7 @@ checkpoint='open-mmlab://detectron2/resnet50_caffe'))) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( type='RandomChoiceResize', diff --git a/configs/mask_rcnn/mask-rcnn_r50-caffe_fpn_ms-poly-1x_coco.py b/configs/mask_rcnn/mask-rcnn_r50-caffe_fpn_ms-poly-1x_coco.py index dd57d035f08..94d94dd3613 100644 --- a/configs/mask_rcnn/mask-rcnn_r50-caffe_fpn_ms-poly-1x_coco.py +++ b/configs/mask_rcnn/mask-rcnn_r50-caffe_fpn_ms-poly-1x_coco.py @@ -13,9 +13,7 @@ type='Pretrained', checkpoint='open-mmlab://detectron2/resnet50_caffe'))) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict( type='LoadAnnotations', with_bbox=True, diff --git a/configs/mask_rcnn/mask-rcnn_r50_fpn_poly-1x_coco.py b/configs/mask_rcnn/mask-rcnn_r50_fpn_poly-1x_coco.py index 193dcd1930f..826180ce0a8 100644 --- a/configs/mask_rcnn/mask-rcnn_r50_fpn_poly-1x_coco.py +++ b/configs/mask_rcnn/mask-rcnn_r50_fpn_poly-1x_coco.py @@ -5,9 +5,7 @@ ] train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict( type='LoadAnnotations', with_bbox=True, diff --git a/configs/mask_rcnn/mask-rcnn_x101-32x8d_fpn_ms-poly-1x_coco.py b/configs/mask_rcnn/mask-rcnn_x101-32x8d_fpn_ms-poly-1x_coco.py index a743aaea952..6ee204d9000 100644 --- a/configs/mask_rcnn/mask-rcnn_x101-32x8d_fpn_ms-poly-1x_coco.py +++ b/configs/mask_rcnn/mask-rcnn_x101-32x8d_fpn_ms-poly-1x_coco.py @@ -22,9 +22,7 @@ checkpoint='open-mmlab://detectron2/resnext101_32x8d'))) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict( type='LoadAnnotations', with_bbox=True, diff --git a/configs/nas_fpn/retinanet_r50_fpn_crop640-50e_coco.py b/configs/nas_fpn/retinanet_r50_fpn_crop640-50e_coco.py index 6062a7601f4..11c34f6758a 100644 --- a/configs/nas_fpn/retinanet_r50_fpn_crop640-50e_coco.py +++ b/configs/nas_fpn/retinanet_r50_fpn_crop640-50e_coco.py @@ -24,9 +24,7 @@ # dataset settings train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', @@ -38,9 +36,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(640, 640), keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True), dict( diff --git a/configs/openimages/ssd300_32xb8-36e_openimages.py b/configs/openimages/ssd300_32xb8-36e_openimages.py index 9847ef5302b..9cb51cae00a 100644 --- a/configs/openimages/ssd300_32xb8-36e_openimages.py +++ b/configs/openimages/ssd300_32xb8-36e_openimages.py @@ -11,9 +11,7 @@ data_root = 'data/OpenImages/' input_size = 300 train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='PhotoMetricDistortion', @@ -35,7 +33,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile'), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(input_size, input_size), keep_ratio=False), # avoid bboxes being resized dict(type='LoadAnnotations', with_bbox=True), diff --git a/configs/paa/paa_r50_fpn_ms-3x_coco.py b/configs/paa/paa_r50_fpn_ms-3x_coco.py index 803ceeca0ec..fed8b90a0fd 100644 --- a/configs/paa/paa_r50_fpn_ms-3x_coco.py +++ b/configs/paa/paa_r50_fpn_ms-3x_coco.py @@ -18,9 +18,7 @@ train_cfg = dict(max_epochs=max_epochs) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', scale=[(1333, 640), (1333, 800)], diff --git a/configs/pascal_voc/faster-rcnn_r50-caffe-c4_ms-18k_voc0712.py b/configs/pascal_voc/faster-rcnn_r50-caffe-c4_ms-18k_voc0712.py index 7a3c34367d7..dddc0bbdf33 100644 --- a/configs/pascal_voc/faster-rcnn_r50-caffe-c4_ms-18k_voc0712.py +++ b/configs/pascal_voc/faster-rcnn_r50-caffe-c4_ms-18k_voc0712.py @@ -7,9 +7,7 @@ # dataset settings train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomChoiceResize', @@ -21,9 +19,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(1333, 800), keep_ratio=True), # avoid bboxes being resized dict(type='LoadAnnotations', with_bbox=True), @@ -45,14 +41,16 @@ ann_file='VOC2007/ImageSets/Main/trainval.txt', data_prefix=dict(sub_data_root='VOC2007/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline), + pipeline=train_pipeline, + backend_args={{_base_.backend_args}}), dict( type='VOCDataset', data_root={{_base_.data_root}}, ann_file='VOC2012/ImageSets/Main/trainval.txt', data_prefix=dict(sub_data_root='VOC2012/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline) + pipeline=train_pipeline, + backend_args={{_base_.backend_args}}) ])) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) diff --git a/configs/pascal_voc/faster-rcnn_r50_fpn_1x_voc0712-cocofmt.py b/configs/pascal_voc/faster-rcnn_r50_fpn_1x_voc0712-cocofmt.py index d8bfd043a2e..0b0aa41d67f 100644 --- a/configs/pascal_voc/faster-rcnn_r50_fpn_1x_voc0712-cocofmt.py +++ b/configs/pascal_voc/faster-rcnn_r50_fpn_1x_voc0712-cocofmt.py @@ -22,18 +22,14 @@ data_root = 'data/VOCdevkit/' train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict(type='Resize', scale=(1000, 600), keep_ratio=True), dict(type='RandomFlip', prob=0.5), dict(type='PackDetInputs') ] test_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(1000, 600), keep_ratio=True), # avoid bboxes being resized dict(type='LoadAnnotations', with_bbox=True), @@ -54,7 +50,8 @@ data_prefix=dict(img=''), metainfo=METAINFO, filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline))) + pipeline=train_pipeline, + backend_args={{_base_.backend_args}}))) val_dataloader = dict( dataset=dict( type=dataset_type, @@ -68,7 +65,8 @@ type='CocoMetric', ann_file=data_root + 'annotations/voc07_test.json', metric='bbox', - format_only=False) + format_only=False, + backend_args={{_base_.backend_args}}) test_evaluator = val_evaluator # training schedule, the dataset is repeated 3 times, so the diff --git a/configs/queryinst/queryinst_r50_fpn_300-proposals_crop-ms-480-800-3x_coco.py b/configs/queryinst/queryinst_r50_fpn_300-proposals_crop-ms-480-800-3x_coco.py index 1f5ada6ead4..33ab061267b 100644 --- a/configs/queryinst/queryinst_r50_fpn_300-proposals_crop-ms-480-800-3x_coco.py +++ b/configs/queryinst/queryinst_r50_fpn_300-proposals_crop-ms-480-800-3x_coco.py @@ -9,9 +9,7 @@ # augmentation strategy originates from DETR. train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict(type='RandomFlip', prob=0.5), dict( diff --git a/configs/queryinst/queryinst_r50_fpn_ms-480-800-3x_coco.py b/configs/queryinst/queryinst_r50_fpn_ms-480-800-3x_coco.py index 4e4434982bc..6b99374ef43 100644 --- a/configs/queryinst/queryinst_r50_fpn_ms-480-800-3x_coco.py +++ b/configs/queryinst/queryinst_r50_fpn_ms-480-800-3x_coco.py @@ -1,9 +1,7 @@ _base_ = './queryinst_r50_fpn_1x_coco.py' train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( type='RandomChoiceResize', diff --git a/configs/regnet/mask-rcnn_regnetx-3.2GF_fpn_ms-3x_coco.py b/configs/regnet/mask-rcnn_regnetx-3.2GF_fpn_ms-3x_coco.py index 3fc02ffbbdb..36482c939dc 100644 --- a/configs/regnet/mask-rcnn_regnetx-3.2GF_fpn_ms-3x_coco.py +++ b/configs/regnet/mask-rcnn_regnetx-3.2GF_fpn_ms-3x_coco.py @@ -27,9 +27,7 @@ num_outs=5)) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( type='RandomChoiceResize', diff --git a/configs/resnest/cascade-mask-rcnn_s50_fpn_syncbn-backbone+head_ms-1x_coco.py b/configs/resnest/cascade-mask-rcnn_s50_fpn_syncbn-backbone+head_ms-1x_coco.py index 25ddc7a1a60..c6ef41c05cd 100644 --- a/configs/resnest/cascade-mask-rcnn_s50_fpn_syncbn-backbone+head_ms-1x_coco.py +++ b/configs/resnest/cascade-mask-rcnn_s50_fpn_syncbn-backbone+head_ms-1x_coco.py @@ -83,9 +83,7 @@ mask_head=dict(norm_cfg=norm_cfg))) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict( type='LoadAnnotations', with_bbox=True, diff --git a/configs/resnest/cascade-rcnn_s50_fpn_syncbn-backbone+head_ms-range-1x_coco.py b/configs/resnest/cascade-rcnn_s50_fpn_syncbn-backbone+head_ms-range-1x_coco.py index 97a3970e8b2..7ce7b56320a 100644 --- a/configs/resnest/cascade-rcnn_s50_fpn_syncbn-backbone+head_ms-range-1x_coco.py +++ b/configs/resnest/cascade-rcnn_s50_fpn_syncbn-backbone+head_ms-range-1x_coco.py @@ -81,9 +81,7 @@ ], )) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', scale=[(1333, 640), (1333, 800)], diff --git a/configs/resnest/faster-rcnn_s50_fpn_syncbn-backbone+head_ms-range-1x_coco.py b/configs/resnest/faster-rcnn_s50_fpn_syncbn-backbone+head_ms-range-1x_coco.py index f64dcdc2518..8f0ec6e07af 100644 --- a/configs/resnest/faster-rcnn_s50_fpn_syncbn-backbone+head_ms-range-1x_coco.py +++ b/configs/resnest/faster-rcnn_s50_fpn_syncbn-backbone+head_ms-range-1x_coco.py @@ -27,9 +27,7 @@ norm_cfg=norm_cfg))) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', scale=[(1333, 640), (1333, 800)], diff --git a/configs/resnest/mask-rcnn_s50_fpn_syncbn-backbone+head_ms-1x_coco.py b/configs/resnest/mask-rcnn_s50_fpn_syncbn-backbone+head_ms-1x_coco.py index 309228fea62..c6f27000862 100644 --- a/configs/resnest/mask-rcnn_s50_fpn_syncbn-backbone+head_ms-1x_coco.py +++ b/configs/resnest/mask-rcnn_s50_fpn_syncbn-backbone+head_ms-1x_coco.py @@ -28,9 +28,7 @@ mask_head=dict(norm_cfg=norm_cfg))) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict( type='LoadAnnotations', with_bbox=True, diff --git a/configs/retinanet/retinanet_tta.py b/configs/retinanet/retinanet_tta.py index d56563ea780..d0f37e0ab25 100644 --- a/configs/retinanet/retinanet_tta.py +++ b/configs/retinanet/retinanet_tta.py @@ -4,7 +4,7 @@ img_scales = [(1333, 800), (666, 400), (2000, 1200)] tta_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict(type='LoadImageFromFile', backend_args=None), dict( type='TestTimeAug', transforms=[[ diff --git a/configs/rpn/rpn_r50_fpn_1x_coco.py b/configs/rpn/rpn_r50_fpn_1x_coco.py index 692ff9e6650..7fe88d395b8 100644 --- a/configs/rpn/rpn_r50_fpn_1x_coco.py +++ b/configs/rpn/rpn_r50_fpn_1x_coco.py @@ -17,7 +17,7 @@ # type='CocoMetric', # ann_file=data_root + 'annotations/instances_val2017.json', # metric='proposal_fast', -# file_client_args={{_base_.file_client_args}}, +# backend_args={{_base_.backend_args}}, # format_only=False) # ] diff --git a/configs/rtmdet/rtmdet-ins_l_8xb32-300e_coco.py b/configs/rtmdet/rtmdet-ins_l_8xb32-300e_coco.py index 1ecacab8044..6b4b9240a64 100644 --- a/configs/rtmdet/rtmdet-ins_l_8xb32-300e_coco.py +++ b/configs/rtmdet/rtmdet-ins_l_8xb32-300e_coco.py @@ -32,9 +32,7 @@ ) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict( type='LoadAnnotations', with_bbox=True, @@ -67,9 +65,7 @@ train_dataloader = dict(pin_memory=True, dataset=dict(pipeline=train_pipeline)) train_pipeline_stage2 = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict( type='LoadAnnotations', with_bbox=True, diff --git a/configs/rtmdet/rtmdet-ins_s_8xb32-300e_coco.py b/configs/rtmdet/rtmdet-ins_s_8xb32-300e_coco.py index 7785f2ff208..28bc21cc93b 100644 --- a/configs/rtmdet/rtmdet-ins_s_8xb32-300e_coco.py +++ b/configs/rtmdet/rtmdet-ins_s_8xb32-300e_coco.py @@ -10,9 +10,7 @@ bbox_head=dict(in_channels=128, feat_channels=128)) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict( type='LoadAnnotations', with_bbox=True, @@ -43,9 +41,7 @@ ] train_pipeline_stage2 = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict( type='LoadAnnotations', with_bbox=True, diff --git a/configs/rtmdet/rtmdet-ins_tiny_8xb32-300e_coco.py b/configs/rtmdet/rtmdet-ins_tiny_8xb32-300e_coco.py index 33b62878027..954f911614e 100644 --- a/configs/rtmdet/rtmdet-ins_tiny_8xb32-300e_coco.py +++ b/configs/rtmdet/rtmdet-ins_tiny_8xb32-300e_coco.py @@ -12,9 +12,7 @@ bbox_head=dict(in_channels=96, feat_channels=96)) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict( type='LoadAnnotations', with_bbox=True, diff --git a/configs/rtmdet/rtmdet_l_8xb32-300e_coco.py b/configs/rtmdet/rtmdet_l_8xb32-300e_coco.py index fc623fcc635..e4c46aadbda 100644 --- a/configs/rtmdet/rtmdet_l_8xb32-300e_coco.py +++ b/configs/rtmdet/rtmdet_l_8xb32-300e_coco.py @@ -62,9 +62,7 @@ ) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0), dict( @@ -86,9 +84,7 @@ ] train_pipeline_stage2 = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', @@ -103,9 +99,7 @@ ] test_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(640, 640), keep_ratio=True), dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))), dict( diff --git a/configs/rtmdet/rtmdet_s_8xb32-300e_coco.py b/configs/rtmdet/rtmdet_s_8xb32-300e_coco.py index 355918147cb..cbf76247b74 100644 --- a/configs/rtmdet/rtmdet_s_8xb32-300e_coco.py +++ b/configs/rtmdet/rtmdet_s_8xb32-300e_coco.py @@ -10,9 +10,7 @@ bbox_head=dict(in_channels=128, feat_channels=128, exp_on_reg=False)) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict(type='CachedMosaic', img_scale=(640, 640), pad_val=114.0), dict( @@ -34,9 +32,7 @@ ] train_pipeline_stage2 = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', diff --git a/configs/rtmdet/rtmdet_tiny_8xb32-300e_coco.py b/configs/rtmdet/rtmdet_tiny_8xb32-300e_coco.py index e05c4b169c1..a686f4a7f0c 100644 --- a/configs/rtmdet/rtmdet_tiny_8xb32-300e_coco.py +++ b/configs/rtmdet/rtmdet_tiny_8xb32-300e_coco.py @@ -12,9 +12,7 @@ bbox_head=dict(in_channels=96, feat_channels=96, exp_on_reg=False)) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='CachedMosaic', diff --git a/configs/rtmdet/rtmdet_tta.py b/configs/rtmdet/rtmdet_tta.py index f4e003541e9..f7adcbc712a 100644 --- a/configs/rtmdet/rtmdet_tta.py +++ b/configs/rtmdet/rtmdet_tta.py @@ -4,7 +4,7 @@ img_scales = [(640, 640), (320, 320), (960, 960)] tta_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict(type='LoadImageFromFile', backend_args=None), dict( type='TestTimeAug', transforms=[ diff --git a/configs/sabl/sabl-retinanet_r101-gn_fpn_ms-480-960-2x_coco.py b/configs/sabl/sabl-retinanet_r101-gn_fpn_ms-480-960-2x_coco.py index 6d6e932d177..dc7209aebad 100644 --- a/configs/sabl/sabl-retinanet_r101-gn_fpn_ms-480-960-2x_coco.py +++ b/configs/sabl/sabl-retinanet_r101-gn_fpn_ms-480-960-2x_coco.py @@ -54,9 +54,7 @@ debug=False)) # dataset settings train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', scale=[(1333, 480), (1333, 960)], diff --git a/configs/sabl/sabl-retinanet_r101-gn_fpn_ms-640-800-2x_coco.py b/configs/sabl/sabl-retinanet_r101-gn_fpn_ms-640-800-2x_coco.py index 083c0c129c6..ac5f6d9811d 100644 --- a/configs/sabl/sabl-retinanet_r101-gn_fpn_ms-640-800-2x_coco.py +++ b/configs/sabl/sabl-retinanet_r101-gn_fpn_ms-640-800-2x_coco.py @@ -54,9 +54,7 @@ debug=False)) # dataset settings train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', scale=[(1333, 480), (1333, 800)], diff --git a/configs/seesaw_loss/cascade-mask-rcnn_r101_fpn_seesaw-loss_random-ms-2x_lvis-v1.py b/configs/seesaw_loss/cascade-mask-rcnn_r101_fpn_seesaw-loss_random-ms-2x_lvis-v1.py index 9bb8df4cfb3..2a1a87d4203 100644 --- a/configs/seesaw_loss/cascade-mask-rcnn_r101_fpn_seesaw-loss_random-ms-2x_lvis-v1.py +++ b/configs/seesaw_loss/cascade-mask-rcnn_r101_fpn_seesaw-loss_random-ms-2x_lvis-v1.py @@ -80,9 +80,7 @@ # dataset settings train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( type='RandomChoiceResize', diff --git a/configs/seesaw_loss/cascade-mask-rcnn_r101_fpn_seesaw-loss_sample1e-3-ms-2x_lvis-v1.py b/configs/seesaw_loss/cascade-mask-rcnn_r101_fpn_seesaw-loss_sample1e-3-ms-2x_lvis-v1.py index dd02b596675..0e7b4df9136 100644 --- a/configs/seesaw_loss/cascade-mask-rcnn_r101_fpn_seesaw-loss_sample1e-3-ms-2x_lvis-v1.py +++ b/configs/seesaw_loss/cascade-mask-rcnn_r101_fpn_seesaw-loss_sample1e-3-ms-2x_lvis-v1.py @@ -80,9 +80,7 @@ # dataset settings train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( type='RandomChoiceResize', diff --git a/configs/seesaw_loss/mask-rcnn_r50_fpn_seesaw-loss_random-ms-2x_lvis-v1.py b/configs/seesaw_loss/mask-rcnn_r50_fpn_seesaw-loss_random-ms-2x_lvis-v1.py index 6f103768235..25c646c9c75 100644 --- a/configs/seesaw_loss/mask-rcnn_r50_fpn_seesaw-loss_random-ms-2x_lvis-v1.py +++ b/configs/seesaw_loss/mask-rcnn_r50_fpn_seesaw-loss_random-ms-2x_lvis-v1.py @@ -23,9 +23,7 @@ # dataset settings train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( type='RandomChoiceResize', diff --git a/configs/seesaw_loss/mask-rcnn_r50_fpn_seesaw-loss_sample1e-3-ms-2x_lvis-v1.py b/configs/seesaw_loss/mask-rcnn_r50_fpn_seesaw-loss_sample1e-3-ms-2x_lvis-v1.py index 3106cc55bc7..d60320e0b78 100644 --- a/configs/seesaw_loss/mask-rcnn_r50_fpn_seesaw-loss_sample1e-3-ms-2x_lvis-v1.py +++ b/configs/seesaw_loss/mask-rcnn_r50_fpn_seesaw-loss_sample1e-3-ms-2x_lvis-v1.py @@ -23,9 +23,7 @@ # dataset settings train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( type='RandomChoiceResize', diff --git a/configs/selfsup_pretrain/mask-rcnn_r50-mocov2-pre_fpn_ms-2x_coco.py b/configs/selfsup_pretrain/mask-rcnn_r50-mocov2-pre_fpn_ms-2x_coco.py index c73bf9e1a17..ddaebf5558a 100644 --- a/configs/selfsup_pretrain/mask-rcnn_r50-mocov2-pre_fpn_ms-2x_coco.py +++ b/configs/selfsup_pretrain/mask-rcnn_r50-mocov2-pre_fpn_ms-2x_coco.py @@ -13,9 +13,7 @@ type='Pretrained', checkpoint='./mocov2_r50_800ep_pretrain.pth'))) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( type='RandomResize', scale=[(1333, 640), (1333, 800)], diff --git a/configs/selfsup_pretrain/mask-rcnn_r50-swav-pre_fpn_ms-2x_coco.py b/configs/selfsup_pretrain/mask-rcnn_r50-swav-pre_fpn_ms-2x_coco.py index 8182cab1936..c393e0b3604 100644 --- a/configs/selfsup_pretrain/mask-rcnn_r50-swav-pre_fpn_ms-2x_coco.py +++ b/configs/selfsup_pretrain/mask-rcnn_r50-swav-pre_fpn_ms-2x_coco.py @@ -13,9 +13,7 @@ type='Pretrained', checkpoint='./swav_800ep_pretrain.pth.tar'))) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( type='RandomResize', scale=[(1333, 640), (1333, 800)], diff --git a/configs/solo/decoupled-solo-light_r50_fpn_3x_coco.py b/configs/solo/decoupled-solo-light_r50_fpn_3x_coco.py index 47b0cc1f09c..fc35df3c3cb 100644 --- a/configs/solo/decoupled-solo-light_r50_fpn_3x_coco.py +++ b/configs/solo/decoupled-solo-light_r50_fpn_3x_coco.py @@ -25,9 +25,7 @@ norm_cfg=dict(type='GN', num_groups=32, requires_grad=True))) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( type='RandomChoiceResize', @@ -38,9 +36,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(852, 512), keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( diff --git a/configs/solo/solo_r50_fpn_3x_coco.py b/configs/solo/solo_r50_fpn_3x_coco.py index c30d41f6d92..98a9505538c 100644 --- a/configs/solo/solo_r50_fpn_3x_coco.py +++ b/configs/solo/solo_r50_fpn_3x_coco.py @@ -1,9 +1,7 @@ _base_ = './solo_r50_fpn_1x_coco.py' train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( type='RandomChoiceResize', diff --git a/configs/solov2/solov2-light_r50_fpn_ms-3x_coco.py b/configs/solov2/solov2-light_r50_fpn_ms-3x_coco.py index eb1e854d5ae..cf0a7f779c0 100644 --- a/configs/solov2/solov2-light_r50_fpn_ms-3x_coco.py +++ b/configs/solov2/solov2-light_r50_fpn_ms-3x_coco.py @@ -10,9 +10,7 @@ # dataset settings train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( type='RandomChoiceResize', @@ -23,9 +21,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(448, 768), keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( diff --git a/configs/solov2/solov2_r50_fpn_ms-3x_coco.py b/configs/solov2/solov2_r50_fpn_ms-3x_coco.py index b51cff8e594..ec20b7dd6b9 100644 --- a/configs/solov2/solov2_r50_fpn_ms-3x_coco.py +++ b/configs/solov2/solov2_r50_fpn_ms-3x_coco.py @@ -1,9 +1,7 @@ _base_ = './solov2_r50_fpn_1x_coco.py' train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( type='RandomChoiceResize', diff --git a/configs/sparse_rcnn/sparse-rcnn_r50_fpn_300-proposals_crop-ms-480-800-3x_coco.py b/configs/sparse_rcnn/sparse-rcnn_r50_fpn_300-proposals_crop-ms-480-800-3x_coco.py index 98a7398f969..93edc0314b5 100644 --- a/configs/sparse_rcnn/sparse-rcnn_r50_fpn_300-proposals_crop-ms-480-800-3x_coco.py +++ b/configs/sparse_rcnn/sparse-rcnn_r50_fpn_300-proposals_crop-ms-480-800-3x_coco.py @@ -7,9 +7,7 @@ # augmentation strategy originates from DETR. train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict(type='RandomFlip', prob=0.5), dict( diff --git a/configs/sparse_rcnn/sparse-rcnn_r50_fpn_ms-480-800-3x_coco.py b/configs/sparse_rcnn/sparse-rcnn_r50_fpn_ms-480-800-3x_coco.py index f7c7a4a4de5..156028d7cdd 100644 --- a/configs/sparse_rcnn/sparse-rcnn_r50_fpn_ms-480-800-3x_coco.py +++ b/configs/sparse_rcnn/sparse-rcnn_r50_fpn_ms-480-800-3x_coco.py @@ -1,9 +1,7 @@ _base_ = './sparse-rcnn_r50_fpn_1x_coco.py' train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomChoiceResize', diff --git a/configs/ssd/ssd300_coco.py b/configs/ssd/ssd300_coco.py index 4ce1a3c314b..796d25c9053 100644 --- a/configs/ssd/ssd300_coco.py +++ b/configs/ssd/ssd300_coco.py @@ -6,7 +6,7 @@ # dataset settings input_size = 300 train_pipeline = [ - dict(type='LoadImageFromFile'), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='Expand', @@ -28,7 +28,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile'), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(input_size, input_size), keep_ratio=False), dict(type='LoadAnnotations', with_bbox=True), dict( @@ -50,7 +50,8 @@ ann_file='annotations/instances_train2017.json', data_prefix=dict(img='train2017/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline))) + pipeline=train_pipeline, + backend_args={{_base_.backend_args}}))) val_dataloader = dict(batch_size=8, dataset=dict(pipeline=test_pipeline)) test_dataloader = val_dataloader diff --git a/configs/ssd/ssd512_coco.py b/configs/ssd/ssd512_coco.py index 16140be2d24..7acd6144202 100644 --- a/configs/ssd/ssd512_coco.py +++ b/configs/ssd/ssd512_coco.py @@ -20,7 +20,7 @@ # dataset settings train_pipeline = [ - dict(type='LoadImageFromFile'), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='Expand', @@ -42,7 +42,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile'), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(input_size, input_size), keep_ratio=False), dict(type='LoadAnnotations', with_bbox=True), dict( diff --git a/configs/strong_baselines/mask-rcnn_r50-caffe_fpn_rpn-2conv_4conv1fc_syncbn-all_lsj-100e_coco.py b/configs/strong_baselines/mask-rcnn_r50-caffe_fpn_rpn-2conv_4conv1fc_syncbn-all_lsj-100e_coco.py index 3f809cc5ad8..70e92a82e0c 100644 --- a/configs/strong_baselines/mask-rcnn_r50-caffe_fpn_rpn-2conv_4conv1fc_syncbn-all_lsj-100e_coco.py +++ b/configs/strong_baselines/mask-rcnn_r50-caffe_fpn_rpn-2conv_4conv1fc_syncbn-all_lsj-100e_coco.py @@ -37,9 +37,7 @@ mask_head=dict(norm_cfg=head_norm_cfg))) train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( type='RandomResize', @@ -57,9 +55,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( diff --git a/configs/swin/mask-rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco.py b/configs/swin/mask-rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco.py index 37448b0b77d..7024b73249c 100644 --- a/configs/swin/mask-rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco.py +++ b/configs/swin/mask-rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco.py @@ -30,9 +30,7 @@ # augmentation strategy originates from DETR / Sparse RCNN train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict(type='RandomFlip', prob=0.5), dict( diff --git a/configs/tood/tood_r50_fpn_ms-2x_coco.py b/configs/tood/tood_r50_fpn_ms-2x_coco.py index 93d1d47521d..ffb296dccee 100644 --- a/configs/tood/tood_r50_fpn_ms-2x_coco.py +++ b/configs/tood/tood_r50_fpn_ms-2x_coco.py @@ -19,9 +19,7 @@ # multi-scale training train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', scale=[(1333, 480), (1333, 800)], diff --git a/configs/tridentnet/tridentnet_r50-caffe_ms-1x_coco.py b/configs/tridentnet/tridentnet_r50-caffe_ms-1x_coco.py index a3a88908b9e..806d20b90c9 100644 --- a/configs/tridentnet/tridentnet_r50-caffe_ms-1x_coco.py +++ b/configs/tridentnet/tridentnet_r50-caffe_ms-1x_coco.py @@ -1,9 +1,7 @@ _base_ = 'tridentnet_r50-caffe_1x_coco.py' train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomChoiceResize', diff --git a/configs/vfnet/vfnet_r50_fpn_1x_coco.py b/configs/vfnet/vfnet_r50_fpn_1x_coco.py index d45e5824086..99bc3b5f4c7 100644 --- a/configs/vfnet/vfnet_r50_fpn_1x_coco.py +++ b/configs/vfnet/vfnet_r50_fpn_1x_coco.py @@ -64,18 +64,14 @@ # data setting train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict(type='RandomFlip', prob=0.5), dict(type='PackDetInputs') ] test_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True), dict( diff --git a/configs/vfnet/vfnet_r50_fpn_ms-2x_coco.py b/configs/vfnet/vfnet_r50_fpn_ms-2x_coco.py index 95ce40fa1ac..0f8eed298e8 100644 --- a/configs/vfnet/vfnet_r50_fpn_ms-2x_coco.py +++ b/configs/vfnet/vfnet_r50_fpn_ms-2x_coco.py @@ -1,8 +1,6 @@ _base_ = './vfnet_r50_fpn_1x_coco.py' train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', scale=[(1333, 480), (1333, 960)], @@ -11,9 +9,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True), dict( diff --git a/configs/yolact/yolact_r50_1xb8-55e_coco.py b/configs/yolact/yolact_r50_1xb8-55e_coco.py index 4866f04ddf4..b7dabf1548a 100644 --- a/configs/yolact/yolact_r50_1xb8-55e_coco.py +++ b/configs/yolact/yolact_r50_1xb8-55e_coco.py @@ -95,9 +95,7 @@ mask_thr_binary=0.5)) # dataset settings train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict(type='FilterAnnotations', min_gt_bbox_wh=(4.0, 4.0)), dict( @@ -120,7 +118,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile'), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(input_size, input_size), keep_ratio=False), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( diff --git a/configs/yolo/yolov3_d53_8xb8-320-273e_coco.py b/configs/yolo/yolov3_d53_8xb8-320-273e_coco.py index f1ae4248a8d..a3d08dd7706 100644 --- a/configs/yolo/yolov3_d53_8xb8-320-273e_coco.py +++ b/configs/yolo/yolov3_d53_8xb8-320-273e_coco.py @@ -1,17 +1,8 @@ _base_ = './yolov3_d53_8xb8-ms-608-273e_coco.py' -# dataset settings -# file_client_args = dict( -# backend='petrel', -# path_mapping=dict({ -# './data/': 's3://openmmlab/datasets/detection/', -# 'data/': 's3://openmmlab/datasets/detection/' -# })) - -file_client_args = dict(backend='disk') input_size = (320, 320) train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), # `mean` and `to_rgb` should be the same with the `preprocess_cfg` dict(type='Expand', mean=[0, 0, 0], to_rgb=True, ratio_range=(1, 2)), @@ -25,7 +16,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=input_size, keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True), dict( diff --git a/configs/yolo/yolov3_d53_8xb8-ms-416-273e_coco.py b/configs/yolo/yolov3_d53_8xb8-ms-416-273e_coco.py index be098c8352d..ca0127e83ed 100644 --- a/configs/yolo/yolov3_d53_8xb8-ms-416-273e_coco.py +++ b/configs/yolo/yolov3_d53_8xb8-ms-416-273e_coco.py @@ -1,15 +1,7 @@ _base_ = './yolov3_d53_8xb8-ms-608-273e_coco.py' -# dataset settings -# file_client_args = dict( -# backend='petrel', -# path_mapping=dict({ -# './data/': 's3://openmmlab/datasets/detection/', -# 'data/': 's3://openmmlab/datasets/detection/' -# })) -file_client_args = dict(backend='disk') train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), # `mean` and `to_rgb` should be the same with the `preprocess_cfg` dict(type='Expand', mean=[0, 0, 0], to_rgb=True, ratio_range=(1, 2)), @@ -23,7 +15,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(416, 416), keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True), dict( diff --git a/configs/yolo/yolov3_d53_8xb8-ms-608-273e_coco.py b/configs/yolo/yolov3_d53_8xb8-ms-608-273e_coco.py index 287e09485cb..d4a36dfdaaf 100644 --- a/configs/yolo/yolov3_d53_8xb8-ms-608-273e_coco.py +++ b/configs/yolo/yolov3_d53_8xb8-ms-608-273e_coco.py @@ -66,16 +66,23 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' -# file_client_args = dict( +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/coco/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( # backend='petrel', # path_mapping=dict({ # './data/': 's3://openmmlab/datasets/detection/', # 'data/': 's3://openmmlab/datasets/detection/' # })) -file_client_args = dict(backend='disk') +backend_args = None train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True), dict( type='Expand', @@ -92,7 +99,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=(608, 608), keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True), dict( @@ -113,7 +120,8 @@ ann_file='annotations/instances_train2017.json', data_prefix=dict(img='train2017/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline)) + pipeline=train_pipeline, + backend_args=backend_args)) val_dataloader = dict( batch_size=1, num_workers=2, @@ -126,13 +134,15 @@ ann_file='annotations/instances_val2017.json', data_prefix=dict(img='val2017/'), test_mode=True, - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader val_evaluator = dict( type='CocoMetric', ann_file=data_root + 'annotations/instances_val2017.json', - metric='bbox') + metric='bbox', + backend_args=backend_args) test_evaluator = val_evaluator train_cfg = dict(max_epochs=273, val_interval=7) diff --git a/configs/yolo/yolov3_mobilenetv2_8xb24-320-300e_coco.py b/configs/yolo/yolov3_mobilenetv2_8xb24-320-300e_coco.py index a8eb5dd1647..07b39373432 100644 --- a/configs/yolo/yolov3_mobilenetv2_8xb24-320-300e_coco.py +++ b/configs/yolo/yolov3_mobilenetv2_8xb24-320-300e_coco.py @@ -9,17 +9,9 @@ [(10, 15), (24, 36), (72, 42)]]))) # yapf:enable -# file_client_args = dict( -# backend='petrel', -# path_mapping=dict({ -# './data/': 's3://openmmlab/datasets/detection/', -# 'data/': 's3://openmmlab/datasets/detection/' -# })) -file_client_args = dict(backend='disk') - input_size = (320, 320) train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), # `mean` and `to_rgb` should be the same with the `preprocess_cfg` dict( @@ -37,7 +29,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=input_size, keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True), dict( diff --git a/configs/yolo/yolov3_mobilenetv2_8xb24-ms-416-300e_coco.py b/configs/yolo/yolov3_mobilenetv2_8xb24-ms-416-300e_coco.py index 67116f4000f..9a161b66fe9 100644 --- a/configs/yolo/yolov3_mobilenetv2_8xb24-ms-416-300e_coco.py +++ b/configs/yolo/yolov3_mobilenetv2_8xb24-ms-416-300e_coco.py @@ -67,16 +67,23 @@ dataset_type = 'CocoDataset' data_root = 'data/coco/' -# file_client_args = dict( +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/coco/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( # backend='petrel', # path_mapping=dict({ # './data/': 's3://openmmlab/datasets/detection/', # 'data/': 's3://openmmlab/datasets/detection/' # })) -file_client_args = dict(backend='disk') +backend_args = None train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True), dict( type='Expand', @@ -93,7 +100,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=(416, 416), keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True), dict( @@ -117,7 +124,8 @@ ann_file='annotations/instances_train2017.json', data_prefix=dict(img='train2017/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline))) + pipeline=train_pipeline, + backend_args=backend_args))) val_dataloader = dict( batch_size=24, num_workers=4, @@ -130,13 +138,15 @@ ann_file='annotations/instances_val2017.json', data_prefix=dict(img='val2017/'), test_mode=True, - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader val_evaluator = dict( type='CocoMetric', ann_file=data_root + 'annotations/instances_val2017.json', - metric='bbox') + metric='bbox', + backend_args=backend_args) test_evaluator = val_evaluator train_cfg = dict(max_epochs=30) diff --git a/configs/yolof/yolof_r50-c5_8xb8-1x_coco.py b/configs/yolof/yolof_r50-c5_8xb8-1x_coco.py index b2637799712..5ea228e3e32 100644 --- a/configs/yolof/yolof_r50-c5_8xb8-1x_coco.py +++ b/configs/yolof/yolof_r50-c5_8xb8-1x_coco.py @@ -89,9 +89,7 @@ ] train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict(type='RandomFlip', prob=0.5), @@ -99,9 +97,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True), dict( diff --git a/configs/yolox/yolox_s_8xb8-300e_coco.py b/configs/yolox/yolox_s_8xb8-300e_coco.py index 0e6bb2d1dc8..3e324eb5b99 100644 --- a/configs/yolox/yolox_s_8xb8-300e_coco.py +++ b/configs/yolox/yolox_s_8xb8-300e_coco.py @@ -73,13 +73,20 @@ data_root = 'data/coco/' dataset_type = 'CocoDataset' -# file_client_args = dict( +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/coco/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( # backend='petrel', # path_mapping=dict({ # './data/': 's3://openmmlab/datasets/detection/', # 'data/': 's3://openmmlab/datasets/detection/' # })) -file_client_args = dict(backend='disk') +backend_args = None train_pipeline = [ dict(type='Mosaic', img_scale=img_scale, pad_val=114.0), @@ -120,14 +127,15 @@ ann_file='annotations/instances_train2017.json', data_prefix=dict(img='train2017/'), pipeline=[ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True) ], - filter_cfg=dict(filter_empty_gt=False, min_size=32)), + filter_cfg=dict(filter_empty_gt=False, min_size=32), + backend_args=backend_args), pipeline=train_pipeline) test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=img_scale, keep_ratio=True), dict( type='Pad', @@ -158,13 +166,15 @@ ann_file='annotations/instances_val2017.json', data_prefix=dict(img='val2017/'), test_mode=True, - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader val_evaluator = dict( type='CocoMetric', ann_file=data_root + 'annotations/instances_val2017.json', - metric='bbox') + metric='bbox', + backend_args=backend_args) test_evaluator = val_evaluator # training settings diff --git a/configs/yolox/yolox_tiny_8xb8-300e_coco.py b/configs/yolox/yolox_tiny_8xb8-300e_coco.py index b15480bed0a..86f7e9a6191 100644 --- a/configs/yolox/yolox_tiny_8xb8-300e_coco.py +++ b/configs/yolox/yolox_tiny_8xb8-300e_coco.py @@ -15,14 +15,6 @@ img_scale = (640, 640) # width, height -# file_client_args = dict( -# backend='petrel', -# path_mapping=dict({ -# './data/': 's3://openmmlab/datasets/detection/', -# 'data/': 's3://openmmlab/datasets/detection/' -# })) -file_client_args = dict(backend='disk') - train_pipeline = [ dict(type='Mosaic', img_scale=img_scale, pad_val=114.0), dict( @@ -44,7 +36,7 @@ ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(416, 416), keep_ratio=True), dict( type='Pad', diff --git a/configs/yolox/yolox_tta.py b/configs/yolox/yolox_tta.py index 8e86f26f5ac..e65244be6e1 100644 --- a/configs/yolox/yolox_tta.py +++ b/configs/yolox/yolox_tta.py @@ -4,7 +4,7 @@ img_scales = [(640, 640), (320, 320), (960, 960)] tta_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), + dict(type='LoadImageFromFile', backend_args=None), dict( type='TestTimeAug', transforms=[ diff --git a/docs/en/advanced_guides/customize_transforms.md b/docs/en/advanced_guides/customize_transforms.md index 870861b7d74..ae4ff47ef7e 100644 --- a/docs/en/advanced_guides/customize_transforms.md +++ b/docs/en/advanced_guides/customize_transforms.md @@ -32,7 +32,7 @@ custom_imports = dict(imports=['path.to.my_pipeline'], allow_failed_imports=False) train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict(type='RandomFlip', prob=0.5), diff --git a/docs/en/advanced_guides/transforms.md b/docs/en/advanced_guides/transforms.md index 8820a3cf129..034621c9fc5 100644 --- a/docs/en/advanced_guides/transforms.md +++ b/docs/en/advanced_guides/transforms.md @@ -17,7 +17,7 @@ Here is a pipeline example for Faster R-CNN. ```python train_pipeline = [ # Training data processing pipeline - dict(type='LoadImageFromFile'), # First pipeline to load images from file path + dict(type='LoadImageFromFile', backend_args=backend_args), # First pipeline to load images from file path dict( type='LoadAnnotations', # Second pipeline to load annotations for current image with_bbox=True), # Whether to use bounding box, True for detection @@ -32,7 +32,7 @@ train_pipeline = [ # Training data processing pipeline dict(type='PackDetInputs') # Pipeline that formats the annotation data and decides which keys in the data should be packed into data_samples ] test_pipeline = [ # Testing data processing pipeline - dict(type='LoadImageFromFile', file_client_args=file_client_args), # First pipeline to load images from file path + dict(type='LoadImageFromFile', backend_args=backend_args), # First pipeline to load images from file path dict(type='Resize', scale=(1333, 800), keep_ratio=True), # Pipeline that resize the images dict( type='PackDetInputs', # Pipeline that formats the annotation data and decides which keys in the data should be packed into data_samples diff --git a/docs/en/user_guides/config.md b/docs/en/user_guides/config.md index d08b2a731eb..2ee3bc9bf68 100644 --- a/docs/en/user_guides/config.md +++ b/docs/en/user_guides/config.md @@ -176,10 +176,10 @@ model = dict( ```python dataset_type = 'CocoDataset' # Dataset type, this will be used to define the dataset data_root = 'data/coco/' # Root path of data -file_client_args = dict(backend='disk') # file client arguments +backend_args = None # Arguments to instantiate the corresponding file backend train_pipeline = [ # Training data processing pipeline - dict(type='LoadImageFromFile', file_client_args=file_client_args), # First pipeline to load images from file path + dict(type='LoadImageFromFile', backend_args=backend_args), # First pipeline to load images from file path dict( type='LoadAnnotations', # Second pipeline to load annotations for current image with_bbox=True, # Whether to use bounding box, True for detection @@ -196,7 +196,7 @@ train_pipeline = [ # Training data processing pipeline dict(type='PackDetInputs') # Pipeline that formats the annotation data and decides which keys in the data should be packed into data_samples ] test_pipeline = [ # Testing data processing pipeline - dict(type='LoadImageFromFile', file_client_args=file_client_args), # First pipeline to load images from file path + dict(type='LoadImageFromFile', backend_args=backend_args), # First pipeline to load images from file path dict(type='Resize', scale=(1333, 800), keep_ratio=True), # Pipeline that resizes the images dict( type='PackDetInputs', # Pipeline that formats the annotation data and decides which keys in the data should be packed into data_samples @@ -217,7 +217,8 @@ train_dataloader = dict( # Train dataloader config ann_file='annotations/instances_train2017.json', # Path of annotation file data_prefix=dict(img='train2017/'), # Prefix of image path filter_cfg=dict(filter_empty_gt=True, min_size=32), # Config of filtering images and annotations - pipeline=train_pipeline)) + pipeline=train_pipeline, + backend_args=backend_args)) val_dataloader = dict( # Validation dataloader config batch_size=1, # Batch size of a single GPU. If batch-size > 1, the extra padding area may influence the performance. num_workers=2, # Worker to pre-fetch data for each single GPU @@ -232,7 +233,8 @@ val_dataloader = dict( # Validation dataloader config ann_file='annotations/instances_val2017.json', data_prefix=dict(img='val2017/'), test_mode=True, # Turn on the test mode of the dataset to avoid filtering annotations or images - pipeline=test_pipeline)) + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader # Testing dataloader config ``` @@ -243,7 +245,8 @@ val_evaluator = dict( # Validation evaluator config type='CocoMetric', # The coco metric used to evaluate AR, AP, and mAP for detection and instance segmentation ann_file=data_root + 'annotations/instances_val2017.json', # Annotation file path metric=['bbox', 'segm'], # Metrics to be evaluated, `bbox` for detection and `segm` for instance segmentation - format_only=False) + format_only=False, + backend_args=backend_args) test_evaluator = val_evaluator # Testing evaluator config ``` @@ -529,7 +532,7 @@ train_pipeline = [ dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile'), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict( type='PackDetInputs', diff --git a/docs/en/user_guides/semi_det.md b/docs/en/user_guides/semi_det.md index 6cf5538e539..94ec3d670c8 100644 --- a/docs/en/user_guides/semi_det.md +++ b/docs/en/user_guides/semi_det.md @@ -117,7 +117,7 @@ We adopt a teacher-student joint training semi-supervised object detection frame # pipeline used to augment labeled data, # which will be sent to student model for supervised training. sup_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True), dict(type='RandomResize', scale=scale, keep_ratio=True), dict(type='RandomFlip', prob=0.5), @@ -164,7 +164,7 @@ strong_pipeline = [ # pipeline used to augment unlabeled data into different views unsup_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadEmptyAnnotations'), dict( type='MultiBranch', diff --git a/docs/en/user_guides/test.md b/docs/en/user_guides/test.md index 333ccfbed1b..4a0d77b0f5f 100644 --- a/docs/en/user_guides/test.md +++ b/docs/en/user_guides/test.md @@ -219,7 +219,7 @@ tta_model = dict( tta_pipeline = [ dict(type='LoadImageFromFile', - file_client_args=dict(backend='disk')), + backend_args=None), dict( type='TestTimeAug', transforms=[[ @@ -274,7 +274,7 @@ tta_model = dict( img_scales = [(1333, 800), (666, 400), (2000, 1200)] tta_pipeline = [ dict(type='LoadImageFromFile', - file_client_args=dict(backend='disk')), + backend_args=None), dict( type='TestTimeAug', transforms=[[ diff --git a/docs/zh_cn/user_guides/config.md b/docs/zh_cn/user_guides/config.md index a3dc0f26635..319c78ac312 100644 --- a/docs/zh_cn/user_guides/config.md +++ b/docs/zh_cn/user_guides/config.md @@ -176,10 +176,9 @@ model = dict( ```python dataset_type = 'CocoDataset' # 数据集类型,这将被用来定义数据集。 data_root = 'data/coco/' # 数据的根路径。 -file_client_args = dict(backend='disk') # 文件读取后端的配置,默认从硬盘读取 train_pipeline = [ # 训练数据处理流程 - dict(type='LoadImageFromFile', file_client_args=file_client_args), # 第 1 个流程,从文件路径里加载图像。 + dict(type='LoadImageFromFile'), # 第 1 个流程,从文件路径里加载图像。 dict( type='LoadAnnotations', # 第 2 个流程,对于当前图像,加载它的注释信息。 with_bbox=True, # 是否使用标注框(bounding box), 目标检测需要设置为 True。 @@ -196,7 +195,7 @@ train_pipeline = [ # 训练数据处理流程 dict(type='PackDetInputs') # 将数据转换为检测器输入格式的流程 ] test_pipeline = [ # 测试数据处理流程 - dict(type='LoadImageFromFile', file_client_args=file_client_args), # 第 1 个流程,从文件路径里加载图像。 + dict(type='LoadImageFromFile'), # 第 1 个流程,从文件路径里加载图像。 dict(type='Resize', scale=(1333, 800), keep_ratio=True), # 变化图像大小的流程。 dict( type='PackDetInputs', # 将数据转换为检测器输入格式的流程 @@ -519,7 +518,7 @@ train_pipeline = [ dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile'), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict( type='PackDetInputs', diff --git a/docs/zh_cn/user_guides/semi_det.md b/docs/zh_cn/user_guides/semi_det.md index 4665e40260c..a223523705c 100644 --- a/docs/zh_cn/user_guides/semi_det.md +++ b/docs/zh_cn/user_guides/semi_det.md @@ -4,12 +4,13 @@ 按照以下流程进行半监督目标检测: -- [准备和拆分数据集](#准备和拆分数据集) -- [配置多分支数据流程](#配置多分支数据流程) -- [配置加载半监督数据](#配置半监督数据加载) -- [配置半监督模型](#配置半监督模型) -- [配置 MeanTeacherHook](#配置MeanTeacherHook) -- [配置 TeacherStudentValLoop](#配置TeacherStudentValLoop) +- [半监督目标检测](#半监督目标检测) + - [准备和拆分数据集](#准备和拆分数据集) + - [配置多分支数据流程](#配置多分支数据流程) + - [配置半监督数据加载](#配置半监督数据加载) + - [配置半监督模型](#配置半监督模型) + - [配置MeanTeacherHook](#配置meanteacherhook) + - [配置TeacherStudentValLoop](#配置teacherstudentvalloop) ## 准备和拆分数据集 @@ -116,7 +117,7 @@ mmdetection # pipeline used to augment labeled data, # which will be sent to student model for supervised training. sup_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile',backend_args = backend_args), dict(type='LoadAnnotations', with_bbox=True), dict(type='RandomResize', scale=scale, keep_ratio=True), dict(type='RandomFlip', prob=0.5), @@ -163,7 +164,7 @@ strong_pipeline = [ # pipeline used to augment unlabeled data into different views unsup_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadImageFromFile', backend_args = backend_args), dict(type='LoadEmptyAnnotations'), dict( type='MultiBranch', diff --git a/mmdet/datasets/base_det_dataset.py b/mmdet/datasets/base_det_dataset.py index 55598ef267a..379cc4e9f63 100644 --- a/mmdet/datasets/base_det_dataset.py +++ b/mmdet/datasets/base_det_dataset.py @@ -3,7 +3,7 @@ from typing import List, Optional from mmengine.dataset import BaseDataset -from mmengine.fileio import FileClient, load +from mmengine.fileio import load from mmengine.utils import is_abs from ..registry import DATASETS @@ -15,21 +15,28 @@ class BaseDetDataset(BaseDataset): Args: proposal_file (str, optional): Proposals file path. Defaults to None. - file_client_args (dict): Arguments to instantiate a FileClient. - See :class:`mmengine.fileio.FileClient` for details. - Defaults to ``dict(backend='disk')``. + file_client_args (dict): Arguments to instantiate the + corresponding backend in mmdet <= 3.0.0rc6. Defaults to None. + backend_args (dict, optional): Arguments to instantiate the + corresponding backend. Defaults to None. """ def __init__(self, *args, seg_map_suffix: str = '.png', proposal_file: Optional[str] = None, - file_client_args: dict = dict(backend='disk'), + file_client_args: dict = None, + backend_args: dict = None, **kwargs) -> None: self.seg_map_suffix = seg_map_suffix self.proposal_file = proposal_file - self.file_client_args = file_client_args - self.file_client = FileClient(**file_client_args) + self.backend_args = backend_args + if file_client_args is not None: + raise RuntimeError( + 'The `file_client_args` is deprecated, ' + 'please use `backend_args` instead, please refer to' + 'https://github.com/open-mmlab/mmdetection/blob/dev-3.x/configs/_base_/datasets/coco_detection.py' # noqa: E501 + ) super().__init__(*args, **kwargs) def full_init(self) -> None: @@ -88,7 +95,7 @@ def load_proposals(self) -> None: if not is_abs(self.proposal_file): self.proposal_file = osp.join(self.data_root, self.proposal_file) proposals_list = load( - self.proposal_file, file_client_args=self.file_client_args) + self.proposal_file, backend_args=self.backend_args) assert len(self.data_list) == len(proposals_list) for data_info in self.data_list: img_path = data_info['img_path'] diff --git a/mmdet/datasets/coco.py b/mmdet/datasets/coco.py index 873f635d0b0..f95dd8cb414 100644 --- a/mmdet/datasets/coco.py +++ b/mmdet/datasets/coco.py @@ -3,6 +3,8 @@ import os.path as osp from typing import List, Union +from mmengine.fileio import get_local_path + from mmdet.registry import DATASETS from .api_wrappers import COCO from .base_det_dataset import BaseDetDataset @@ -60,7 +62,8 @@ def load_data_list(self) -> List[dict]: Returns: List[dict]: A list of annotation. """ # noqa: E501 - with self.file_client.get_local_path(self.ann_file) as local_path: + with get_local_path( + self.ann_file, backend_args=self.backend_args) as local_path: self.coco = self.COCOAPI(local_path) # The order of returned `cat_ids` will not # change with the order of the `classes` diff --git a/mmdet/datasets/coco_panoptic.py b/mmdet/datasets/coco_panoptic.py index 917456ac137..33d4189e6c4 100644 --- a/mmdet/datasets/coco_panoptic.py +++ b/mmdet/datasets/coco_panoptic.py @@ -168,7 +168,9 @@ def __init__(self, pipeline: List[Union[dict, Callable]] = [], test_mode: bool = False, lazy_init: bool = False, - max_refetch: int = 1000) -> None: + max_refetch: int = 1000, + backend_args: dict = None, + **kwargs) -> None: super().__init__( ann_file=ann_file, metainfo=metainfo, @@ -180,7 +182,9 @@ def __init__(self, pipeline=pipeline, test_mode=test_mode, lazy_init=lazy_init, - max_refetch=max_refetch) + max_refetch=max_refetch, + backend_args=backend_args, + **kwargs) def parse_data_info(self, raw_data_info: dict) -> dict: """Parse raw annotation to target format. diff --git a/mmdet/datasets/crowdhuman.py b/mmdet/datasets/crowdhuman.py index fd67d2a5cc2..650176ee545 100644 --- a/mmdet/datasets/crowdhuman.py +++ b/mmdet/datasets/crowdhuman.py @@ -7,7 +7,7 @@ import mmcv from mmengine.dist import get_rank -from mmengine.fileio import dump, load +from mmengine.fileio import dump, get, get_text, load from mmengine.logging import print_log from mmengine.utils import ProgressBar @@ -66,8 +66,8 @@ def load_data_list(self) -> List[dict]: Returns: List[dict]: A list of annotation. """ # noqa: E501 - anno_strs = self.file_client.get_text( - self.ann_file).strip().split('\n') + anno_strs = get_text( + self.ann_file, backend_args=self.backend_args).strip().split('\n') print_log('loading CrowdHuman annotation...', level=logging.INFO) data_list = [] prog_bar = ProgressBar(len(anno_strs)) @@ -110,7 +110,7 @@ def parse_data_info(self, raw_data_info: dict) -> Union[dict, List[dict]]: data_info['img_id'] = raw_data_info['ID'] if not self.extra_ann_exist: - img_bytes = self.file_client.get(img_path) + img_bytes = get(img_path, backend_args=self.backend_args) img = mmcv.imfrombytes(img_bytes, backend='cv2') data_info['height'], data_info['width'] = img.shape[:2] self.extra_anns[raw_data_info['ID']] = img.shape[:2] diff --git a/mmdet/datasets/lvis.py b/mmdet/datasets/lvis.py index f24fec4971b..b9629f5d463 100644 --- a/mmdet/datasets/lvis.py +++ b/mmdet/datasets/lvis.py @@ -3,6 +3,8 @@ import warnings from typing import List +from mmengine.fileio import get_local_path + from mmdet.registry import DATASETS from .coco import CocoDataset @@ -285,7 +287,8 @@ def load_data_list(self) -> List[dict]: raise ImportError( 'Package lvis is not installed. Please run "pip install git+https://github.com/lvis-dataset/lvis-api.git".' # noqa: E501 ) - with self.file_client.get_local_path(self.ann_file) as local_path: + with get_local_path( + self.ann_file, backend_args=self.backend_args) as local_path: self.lvis = LVIS(local_path) self.cat_ids = self.lvis.get_cat_ids() self.cat2label = {cat_id: i for i, cat_id in enumerate(self.cat_ids)} @@ -597,7 +600,9 @@ def load_data_list(self) -> List[dict]: raise ImportError( 'Package lvis is not installed. Please run "pip install git+https://github.com/lvis-dataset/lvis-api.git".' # noqa: E501 ) - self.lvis = LVIS(self.ann_file) + with get_local_path( + self.ann_file, backend_args=self.backend_args) as local_path: + self.lvis = LVIS(local_path) self.cat_ids = self.lvis.get_cat_ids() self.cat2label = {cat_id: i for i, cat_id in enumerate(self.cat_ids)} self.cat_img_map = copy.deepcopy(self.lvis.cat_img_map) diff --git a/mmdet/datasets/objects365.py b/mmdet/datasets/objects365.py index 92e3fe14325..e99869bfa30 100644 --- a/mmdet/datasets/objects365.py +++ b/mmdet/datasets/objects365.py @@ -3,6 +3,8 @@ import os.path as osp from typing import List +from mmengine.fileio import get_local_path + from mmdet.registry import DATASETS from .api_wrappers import COCO from .coco import CocoDataset @@ -102,7 +104,8 @@ def load_data_list(self) -> List[dict]: Returns: List[dict]: A list of annotation. """ # noqa: E501 - with self.file_client.get_local_path(self.ann_file) as local_path: + with get_local_path( + self.ann_file, backend_args=self.backend_args) as local_path: self.coco = self.COCOAPI(local_path) # 'categories' list in objects365_train.json and objects365_val.json @@ -234,7 +237,8 @@ def load_data_list(self) -> List[dict]: Returns: List[dict]: A list of annotation. """ # noqa: E501 - with self.file_client.get_local_path(self.ann_file) as local_path: + with get_local_path( + self.ann_file, backend_args=self.backend_args) as local_path: self.coco = self.COCOAPI(local_path) # The order of returned `cat_ids` will not # change with the order of the `classes` diff --git a/mmdet/datasets/openimages.py b/mmdet/datasets/openimages.py index a6994071de1..a3c6c8ec44f 100644 --- a/mmdet/datasets/openimages.py +++ b/mmdet/datasets/openimages.py @@ -5,7 +5,7 @@ from typing import Dict, List, Optional import numpy as np -from mmengine.fileio import load +from mmengine.fileio import get_local_path, load from mmengine.utils import is_abs from mmdet.registry import DATASETS @@ -25,9 +25,8 @@ class OpenImagesDataset(BaseDetDataset): hierarchy_file (str): The file path of the class hierarchy. image_level_ann_file (str): Human-verified image level annotation, which is used in evaluation. - file_client_args (dict): Arguments to instantiate a FileClient. - See :class:`mmengine.fileio.FileClient` for details. - Defaults to ``dict(backend='disk')``. + backend_args (dict, optional): Arguments to instantiate the + corresponding backend. Defaults to None. """ METAINFO: dict = dict(dataset_type='oid_v6') @@ -66,7 +65,8 @@ def load_data_list(self) -> List[dict]: self._metainfo['RELATION_MATRIX'] = relation_matrix data_list = [] - with self.file_client.get_local_path(self.ann_file) as local_path: + with get_local_path( + self.ann_file, backend_args=self.backend_args) as local_path: with open(local_path, 'r') as f: reader = csv.reader(f) last_img_id = None @@ -123,9 +123,7 @@ def load_data_list(self) -> List[dict]: # add image metas to data list img_metas = load( - self.meta_file, - file_format='pkl', - file_client_args=self.file_client_args) + self.meta_file, file_format='pkl', backend_args=self.backend_args) assert len(img_metas) == len(data_list) for i, meta in enumerate(img_metas): img_id = data_list[i]['img_id'] @@ -167,7 +165,8 @@ def _parse_label_file(self, label_file: str) -> tuple: index_list = [] classes_names = [] - with self.file_client.get_local_path(label_file) as local_path: + with get_local_path( + label_file, backend_args=self.backend_args) as local_path: with open(local_path, 'r') as f: reader = csv.reader(f) for line in reader: @@ -201,7 +200,9 @@ def _parse_img_level_ann(self, """ item_lists = defaultdict(list) - with self.file_client.get_local_path(img_level_ann_file) as local_path: + with get_local_path( + img_level_ann_file, + backend_args=self.backend_args) as local_path: with open(local_path, 'r') as f: reader = csv.reader(f) for i, line in enumerate(reader): @@ -230,9 +231,7 @@ def _get_relation_matrix(self, hierarchy_file: str) -> np.ndarray: """ # noqa hierarchy = load( - hierarchy_file, - file_format='json', - file_client_args=self.file_client_args) + hierarchy_file, file_format='json', backend_args=self.backend_args) class_num = len(self._metainfo['classes']) relation_matrix = np.eye(class_num, class_num) relation_matrix = self._convert_hierarchy_tree(hierarchy, @@ -336,7 +335,8 @@ def load_data_list(self) -> List[dict]: self._metainfo['RELATION_MATRIX'] = relation_matrix data_list = [] - with self.file_client.get_local_path(self.ann_file) as local_path: + with get_local_path( + self.ann_file, backend_args=self.backend_args) as local_path: with open(local_path, 'r') as f: lines = f.readlines() i = 0 @@ -368,9 +368,7 @@ def load_data_list(self) -> List[dict]: # add image metas to data list img_metas = load( - self.meta_file, - file_format='pkl', - file_client_args=self.file_client_args) + self.meta_file, file_format='pkl', backend_args=self.backend_args) assert len(img_metas) == len(data_list) for i, meta in enumerate(img_metas): img_id = osp.split(data_list[i]['img_path'])[-1][:-4] @@ -413,7 +411,8 @@ def _parse_label_file(self, label_file: str) -> tuple: label_list = [] id_list = [] index_mapping = {} - with self.file_client.get_local_path(label_file) as local_path: + with get_local_path( + label_file, backend_args=self.backend_args) as local_path: with open(local_path, 'r') as f: reader = csv.reader(f) for line in reader: @@ -445,8 +444,9 @@ def _parse_img_level_ann(self, image_level_ann_file): """ item_lists = defaultdict(list) - with self.file_client.get_local_path( - image_level_ann_file) as local_path: + with get_local_path( + image_level_ann_file, + backend_args=self.backend_args) as local_path: with open(local_path, 'r') as f: reader = csv.reader(f) i = -1 @@ -478,6 +478,7 @@ def _get_relation_matrix(self, hierarchy_file: str) -> np.ndarray: relationship between the parent class and the child class, of shape (class_num, class_num). """ - with self.file_client.get_local_path(hierarchy_file) as local_path: + with get_local_path( + hierarchy_file, backend_args=self.backend_args) as local_path: class_label_tree = np.load(local_path, allow_pickle=True) return class_label_tree[1:, 1:] diff --git a/mmdet/datasets/transforms/loading.py b/mmdet/datasets/transforms/loading.py index f3092d40354..69e9a0ac621 100644 --- a/mmdet/datasets/transforms/loading.py +++ b/mmdet/datasets/transforms/loading.py @@ -8,7 +8,7 @@ from mmcv.transforms import BaseTransform from mmcv.transforms import LoadAnnotations as MMCV_LoadAnnotations from mmcv.transforms import LoadImageFromFile -from mmengine.fileio import FileClient +from mmengine.fileio import get from mmengine.structures import BaseDataElement from mmdet.registry import TRANSFORMS @@ -88,9 +88,10 @@ class LoadMultiChannelImageFromFiles(BaseTransform): argument for :func:``mmcv.imfrombytes``. See :func:``mmcv.imfrombytes`` for details. Defaults to 'cv2'. - file_client_args (dict): Arguments to instantiate a FileClient. - See :class:`mmengine.fileio.FileClient` for details. - Defaults to ``dict(backend='disk')``. + file_client_args (dict): Arguments to instantiate the + corresponding backend in mmdet <= 3.0.0rc6. Defaults to None. + backend_args (dict, optional): Arguments to instantiate the + corresponding backend in mmdet >= 3.0.0rc7. Defaults to None. """ def __init__( @@ -98,13 +99,19 @@ def __init__( to_float32: bool = False, color_type: str = 'unchanged', imdecode_backend: str = 'cv2', - file_client_args: dict = dict(backend='disk') + file_client_args: dict = None, + backend_args: dict = None, ) -> None: self.to_float32 = to_float32 self.color_type = color_type self.imdecode_backend = imdecode_backend - self.file_client_args = file_client_args.copy() - self.file_client = FileClient(**self.file_client_args) + self.backend_args = backend_args + if file_client_args is not None: + raise RuntimeError( + 'The `file_client_args` is deprecated, ' + 'please use `backend_args` instead, please refer to' + 'https://github.com/open-mmlab/mmdetection/blob/dev-3.x/configs/_base_/datasets/coco_detection.py' # noqa: E501 + ) def transform(self, results: dict) -> dict: """Transform functions to load multiple images and get images meta @@ -120,7 +127,7 @@ def transform(self, results: dict) -> dict: assert isinstance(results['img_path'], list) img = [] for name in results['img_path']: - img_bytes = self.file_client.get(name) + img_bytes = get(name, backend_args=self.backend_args) img.append( mmcv.imfrombytes( img_bytes, @@ -140,7 +147,7 @@ def __repr__(self): f'to_float32={self.to_float32}, ' f"color_type='{self.color_type}', " f"imdecode_backend='{self.imdecode_backend}', " - f'file_client_args={self.file_client_args})') + f'backend_args={self.backend_args})') return repr_str @@ -236,9 +243,8 @@ class LoadAnnotations(MMCV_LoadAnnotations): argument for :func:``mmcv.imfrombytes``. See :fun:``mmcv.imfrombytes`` for details. Defaults to 'cv2'. - file_client_args (dict): Arguments to instantiate a FileClient. - See :class:``mmengine.fileio.FileClient`` for details. - Defaults to ``dict(backend='disk')``. + backend_args (dict, optional): Arguments to instantiate the + corresponding backend. Defaults to None. """ def __init__(self, @@ -404,7 +410,7 @@ def __repr__(self) -> str: repr_str += f'with_seg={self.with_seg}, ' repr_str += f'poly2mask={self.poly2mask}, ' repr_str += f"imdecode_backend='{self.imdecode_backend}', " - repr_str += f'file_client_args={self.file_client_args})' + repr_str += f'backend_args={self.backend_args})' return repr_str @@ -501,21 +507,18 @@ class LoadPanopticAnnotations(LoadAnnotations): argument for :func:``mmcv.imfrombytes``. See :fun:``mmcv.imfrombytes`` for details. Defaults to 'cv2'. - file_client_args (dict): Arguments to instantiate a FileClient. - See :class:``mmengine.fileio.FileClient`` for details. - Defaults to ``dict(backend='disk')``. + backend_args (dict, optional): Arguments to instantiate the + corresponding backend in mmdet >= 3.0.0rc7. Defaults to None. """ - def __init__( - self, - with_bbox: bool = True, - with_label: bool = True, - with_mask: bool = True, - with_seg: bool = True, - box_type: str = 'hbox', - imdecode_backend: str = 'cv2', - file_client_args: dict = dict(backend='disk') - ) -> None: + def __init__(self, + with_bbox: bool = True, + with_label: bool = True, + with_mask: bool = True, + with_seg: bool = True, + box_type: str = 'hbox', + imdecode_backend: str = 'cv2', + backend_args: dict = None) -> None: try: from panopticapi import utils except ImportError: @@ -525,7 +528,6 @@ def __init__( 'panopticapi.git.') self.rgb2id = utils.rgb2id - self.file_client = FileClient(**file_client_args) super(LoadPanopticAnnotations, self).__init__( with_bbox=with_bbox, with_label=with_label, @@ -534,7 +536,7 @@ def __init__( with_keypoints=False, box_type=box_type, imdecode_backend=imdecode_backend, - file_client_args=file_client_args) + backend_args=backend_args) def _load_masks_and_semantic_segs(self, results: dict) -> None: """Private function to load mask and semantic segmentation annotations. @@ -550,7 +552,8 @@ def _load_masks_and_semantic_segs(self, results: dict) -> None: if results.get('seg_map_path', None) is None: return - img_bytes = self.file_client.get(results['seg_map_path']) + img_bytes = get( + results['seg_map_path'], backend_args=self.backend_args) pan_png = mmcv.imfrombytes( img_bytes, flag='color', channel_order='rgb').squeeze() pan_png = self.rgb2id(pan_png) diff --git a/mmdet/datasets/transforms/wrappers.py b/mmdet/datasets/transforms/wrappers.py index e5daf64fa22..3a17711c06b 100644 --- a/mmdet/datasets/transforms/wrappers.py +++ b/mmdet/datasets/transforms/wrappers.py @@ -28,8 +28,7 @@ class MultiBranch(BaseTransform): Examples: >>> branch_field = ['sup', 'unsup_teacher', 'unsup_student'] >>> sup_pipeline = [ - >>> dict(type='LoadImageFromFile', - >>> file_client_args=dict(backend='disk')), + >>> dict(type='LoadImageFromFile'), >>> dict(type='LoadAnnotations', with_bbox=True), >>> dict(type='Resize', scale=(1333, 800), keep_ratio=True), >>> dict(type='RandomFlip', prob=0.5), @@ -39,8 +38,7 @@ class MultiBranch(BaseTransform): >>> sup=dict(type='PackDetInputs')) >>> ] >>> weak_pipeline = [ - >>> dict(type='LoadImageFromFile', - >>> file_client_args=dict(backend='disk')), + >>> dict(type='LoadImageFromFile'), >>> dict(type='LoadAnnotations', with_bbox=True), >>> dict(type='Resize', scale=(1333, 800), keep_ratio=True), >>> dict(type='RandomFlip', prob=0.0), @@ -50,8 +48,7 @@ class MultiBranch(BaseTransform): >>> sup=dict(type='PackDetInputs')) >>> ] >>> strong_pipeline = [ - >>> dict(type='LoadImageFromFile', - >>> file_client_args=dict(backend='disk')), + >>> dict(type='LoadImageFromFile'), >>> dict(type='LoadAnnotations', with_bbox=True), >>> dict(type='Resize', scale=(1333, 800), keep_ratio=True), >>> dict(type='RandomFlip', prob=1.0), @@ -61,8 +58,7 @@ class MultiBranch(BaseTransform): >>> sup=dict(type='PackDetInputs')) >>> ] >>> unsup_pipeline = [ - >>> dict(type='LoadImageFromFile', - >>> file_client_args=file_client_args), + >>> dict(type='LoadImageFromFile'), >>> dict(type='LoadEmptyAnnotations'), >>> dict( >>> type='MultiBranch', @@ -75,15 +71,15 @@ class MultiBranch(BaseTransform): >>> unsup_branch = Compose(unsup_pipeline) >>> print(sup_branch) >>> Compose( - >>> LoadImageFromFile(ignore_empty=False, to_float32=False, color_type='color', imdecode_backend='cv2', file_client_args={'backend': 'disk'}) # noqa - >>> LoadAnnotations(with_bbox=True, with_label=True, with_mask=False, with_seg=False, poly2mask=True, imdecode_backend='cv2', file_client_args={'backend': 'disk'}) # noqa + >>> LoadImageFromFile(ignore_empty=False, to_float32=False, color_type='color', imdecode_backend='cv2') # noqa + >>> LoadAnnotations(with_bbox=True, with_label=True, with_mask=False, with_seg=False, poly2mask=True, imdecode_backend='cv2') # noqa >>> Resize(scale=(1333, 800), scale_factor=None, keep_ratio=True, clip_object_border=True), backend=cv2), interpolation=bilinear) # noqa >>> RandomFlip(prob=0.5, direction=horizontal) >>> MultiBranch(branch_pipelines=['sup']) >>> ) >>> print(unsup_branch) >>> Compose( - >>> LoadImageFromFile(ignore_empty=False, to_float32=False, color_type='color', imdecode_backend='cv2', file_client_args={'backend': 'disk'}) # noqa + >>> LoadImageFromFile(ignore_empty=False, to_float32=False, color_type='color', imdecode_backend='cv2') # noqa >>> LoadEmptyAnnotations(with_bbox=True, with_label=True, with_mask=False, with_seg=False, seg_ignore_label=255) # noqa >>> MultiBranch(branch_pipelines=['unsup_teacher', 'unsup_student']) >>> ) diff --git a/mmdet/datasets/xml_style.py b/mmdet/datasets/xml_style.py index 4f1ba5965d5..da0d2b261f1 100644 --- a/mmdet/datasets/xml_style.py +++ b/mmdet/datasets/xml_style.py @@ -4,7 +4,7 @@ from typing import List, Optional, Union import mmcv -from mmengine.fileio import list_from_file +from mmengine.fileio import get, get_local_path, list_from_file from mmdet.registry import DATASETS from .base_det_dataset import BaseDetDataset @@ -17,9 +17,8 @@ class XMLDataset(BaseDetDataset): Args: img_subdir (str): Subdir where images are stored. Default: JPEGImages. ann_subdir (str): Subdir where annotations are. Default: Annotations. - file_client_args (dict): Arguments to instantiate a FileClient. - See :class:`mmengine.fileio.FileClient` for details. - Defaults to ``dict(backend='disk')``. + backend_args (dict, optional): Arguments to instantiate the + corresponding backend. Defaults to None. """ def __init__(self, @@ -49,8 +48,7 @@ def load_data_list(self) -> List[dict]: } data_list = [] - img_ids = list_from_file( - self.ann_file, file_client_args=self.file_client_args) + img_ids = list_from_file(self.ann_file, backend_args=self.backend_args) for img_id in img_ids: file_name = osp.join(self.img_subdir, f'{img_id}.jpg') xml_path = osp.join(self.sub_data_root, self.ann_subdir, @@ -90,8 +88,9 @@ def parse_data_info(self, img_info: dict) -> Union[dict, List[dict]]: data_info['xml_path'] = img_info['xml_path'] # deal with xml file - with self.file_client.get_local_path( - img_info['xml_path']) as local_path: + with get_local_path( + img_info['xml_path'], + backend_args=self.backend_args) as local_path: raw_ann_info = ET.parse(local_path) root = raw_ann_info.getroot() size = root.find('size') @@ -99,7 +98,7 @@ def parse_data_info(self, img_info: dict) -> Union[dict, List[dict]]: width = int(size.find('width').text) height = int(size.find('height').text) else: - img_bytes = self.file_client.get(img_path) + img_bytes = get(img_path, backend_args=self.backend_args) img = mmcv.imfrombytes(img_bytes, backend='cv2') height, width = img.shape[:2] del img, img_bytes diff --git a/mmdet/engine/hooks/visualization_hook.py b/mmdet/engine/hooks/visualization_hook.py index 1319ee55ac0..a8372433bd3 100644 --- a/mmdet/engine/hooks/visualization_hook.py +++ b/mmdet/engine/hooks/visualization_hook.py @@ -4,7 +4,7 @@ from typing import Optional, Sequence import mmcv -from mmengine.fileio import FileClient +from mmengine.fileio import get from mmengine.hooks import Hook from mmengine.runner import Runner from mmengine.utils import mkdir_or_exist @@ -42,9 +42,8 @@ class DetVisualizationHook(Hook): wait_time (float): The interval of show (s). Defaults to 0. test_out_dir (str, optional): directory where painted images will be saved in testing process. - file_client_args (dict): Arguments to instantiate a FileClient. - See :class:`mmengine.fileio.FileClient` for details. - Defaults to ``dict(backend='disk')``. + backend_args (dict, optional): Arguments to instantiate the + corresponding backend. Defaults to None. """ def __init__(self, @@ -54,7 +53,7 @@ def __init__(self, show: bool = False, wait_time: float = 0., test_out_dir: Optional[str] = None, - file_client_args: dict = dict(backend='disk')): + backend_args: dict = None): self._visualizer: Visualizer = Visualizer.get_current_instance() self.interval = interval self.score_thr = score_thr @@ -68,8 +67,7 @@ def __init__(self, 'needs to be excluded.') self.wait_time = wait_time - self.file_client_args = file_client_args.copy() - self.file_client = None + self.backend_args = backend_args self.draw = draw self.test_out_dir = test_out_dir self._test_index = 0 @@ -88,16 +86,13 @@ def after_val_iter(self, runner: Runner, batch_idx: int, data_batch: dict, if self.draw is False: return - if self.file_client is None: - self.file_client = FileClient(**self.file_client_args) - # There is no guarantee that the same batch of images # is visualized for each evaluation. total_curr_iter = runner.iter + batch_idx # Visualize only the first data img_path = outputs[0].img_path - img_bytes = self.file_client.get(img_path) + img_bytes = get(img_path, backend_args=self.backend_args) img = mmcv.imfrombytes(img_bytes, channel_order='rgb') if total_curr_iter % self.interval == 0: @@ -129,14 +124,11 @@ def after_test_iter(self, runner: Runner, batch_idx: int, data_batch: dict, self.test_out_dir) mkdir_or_exist(self.test_out_dir) - if self.file_client is None: - self.file_client = FileClient(**self.file_client_args) - for data_sample in outputs: self._test_index += 1 img_path = data_sample.img_path - img_bytes = self.file_client.get(img_path) + img_bytes = get(img_path, backend_args=self.backend_args) img = mmcv.imfrombytes(img_bytes, channel_order='rgb') out_file = None diff --git a/mmdet/evaluation/functional/__init__.py b/mmdet/evaluation/functional/__init__.py index 6125ba74cd5..6f139f7bc4f 100644 --- a/mmdet/evaluation/functional/__init__.py +++ b/mmdet/evaluation/functional/__init__.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. from .bbox_overlaps import bbox_overlaps +from .cityscapes_utils import evaluateImgLists from .class_names import (cityscapes_classes, coco_classes, coco_panoptic_classes, dataset_aliases, get_classes, imagenet_det_classes, imagenet_vid_classes, @@ -18,5 +19,6 @@ 'print_recall_summary', 'plot_num_recall', 'plot_iou_recall', 'oid_v6_classes', 'oid_challenge_classes', 'INSTANCE_OFFSET', 'pq_compute_single_core', 'pq_compute_multi_core', 'bbox_overlaps', - 'objects365v1_classes', 'objects365v2_classes', 'coco_panoptic_classes' + 'objects365v1_classes', 'objects365v2_classes', 'coco_panoptic_classes', + 'evaluateImgLists' ] diff --git a/mmdet/evaluation/functional/cityscapes_utils.py b/mmdet/evaluation/functional/cityscapes_utils.py new file mode 100644 index 00000000000..e72cd171ce2 --- /dev/null +++ b/mmdet/evaluation/functional/cityscapes_utils.py @@ -0,0 +1,270 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# Copyright (c) https://github.com/mcordts/cityscapesScripts +# A wrapper of `cityscapesscripts` which supports loading groundtruth +# image from `backend_args`. +import json +import os +import sys +from pathlib import Path +from typing import Optional, Union + +import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling as CSEval # noqa: E501 +import mmcv +import numpy as np +from cityscapesscripts.evaluation.instance import Instance +from cityscapesscripts.helpers.csHelpers import id2label # noqa: E501 +from cityscapesscripts.helpers.csHelpers import labels, writeDict2JSON +from mmengine.fileio import get + + +def evaluateImgLists(prediction_list: list, + groundtruth_list: list, + args: CSEval.CArgs, + backend_args: Optional[dict] = None, + dump_matches: bool = False) -> dict: + """A wrapper of obj:``cityscapesscripts.evaluation. + + evalInstanceLevelSemanticLabeling.evaluateImgLists``. Support loading + groundtruth image from file backend. + Args: + prediction_list (list): A list of prediction txt file. + groundtruth_list (list): A list of groundtruth image file. + args (CSEval.CArgs): A global object setting in + obj:``cityscapesscripts.evaluation. + evalInstanceLevelSemanticLabeling`` + backend_args (dict, optional): Arguments to instantiate the + preifx of uri corresponding backend. Defaults to None. + dump_matches (bool): whether dump matches.json. Defaults to False. + Returns: + dict: The computed metric. + """ + # determine labels of interest + CSEval.setInstanceLabels(args) + # get dictionary of all ground truth instances + gt_instances = getGtInstances( + groundtruth_list, args, backend_args=backend_args) + # match predictions and ground truth + matches = matchGtWithPreds(prediction_list, groundtruth_list, gt_instances, + args, backend_args) + if dump_matches: + CSEval.writeDict2JSON(matches, 'matches.json') + # evaluate matches + apScores = CSEval.evaluateMatches(matches, args) + # averages + avgDict = CSEval.computeAverages(apScores, args) + # result dict + resDict = CSEval.prepareJSONDataForResults(avgDict, apScores, args) + if args.JSONOutput: + # create output folder if necessary + path = os.path.dirname(args.exportFile) + CSEval.ensurePath(path) + # Write APs to JSON + CSEval.writeDict2JSON(resDict, args.exportFile) + + CSEval.printResults(avgDict, args) + + return resDict + + +def matchGtWithPreds(prediction_list: list, + groundtruth_list: list, + gt_instances: dict, + args: CSEval.CArgs, + backend_args=None): + """A wrapper of obj:``cityscapesscripts.evaluation. + + evalInstanceLevelSemanticLabeling.matchGtWithPreds``. Support loading + groundtruth image from file backend. + Args: + prediction_list (list): A list of prediction txt file. + groundtruth_list (list): A list of groundtruth image file. + gt_instances (dict): Groundtruth dict. + args (CSEval.CArgs): A global object setting in + obj:``cityscapesscripts.evaluation. + evalInstanceLevelSemanticLabeling`` + backend_args (dict, optional): Arguments to instantiate the + preifx of uri corresponding backend. Defaults to None. + Returns: + dict: The processed prediction and groundtruth result. + """ + matches: dict = dict() + if not args.quiet: + print(f'Matching {len(prediction_list)} pairs of images...') + + count = 0 + for (pred, gt) in zip(prediction_list, groundtruth_list): + # Read input files + gt_image = readGTImage(gt, backend_args) + pred_info = readPredInfo(pred) + # Get and filter ground truth instances + unfiltered_instances = gt_instances[gt] + cur_gt_instances_orig = CSEval.filterGtInstances( + unfiltered_instances, args) + + # Try to assign all predictions + (cur_gt_instances, + cur_pred_instances) = CSEval.assignGt2Preds(cur_gt_instances_orig, + gt_image, pred_info, args) + + # append to global dict + matches[gt] = {} + matches[gt]['groundTruth'] = cur_gt_instances + matches[gt]['prediction'] = cur_pred_instances + + count += 1 + if not args.quiet: + print(f'\rImages Processed: {count}', end=' ') + sys.stdout.flush() + + if not args.quiet: + print('') + + return matches + + +def readGTImage(image_file: Union[str, Path], + backend_args: Optional[dict] = None) -> np.ndarray: + """Read an image from path. + + Same as obj:``cityscapesscripts.evaluation. + evalInstanceLevelSemanticLabeling.readGTImage``, but support loading + groundtruth image from file backend. + Args: + image_file (str or Path): Either a str or pathlib.Path. + backend_args (dict, optional): Instantiates the corresponding file + backend. It may contain `backend` key to specify the file + backend. If it contains, the file backend corresponding to this + value will be used and initialized with the remaining values, + otherwise the corresponding file backend will be selected + based on the prefix of the file path. Defaults to None. + Returns: + np.ndarray: The groundtruth image. + """ + img_bytes = get(image_file, backend_args=backend_args) + img = mmcv.imfrombytes(img_bytes, flag='unchanged', backend='pillow') + return img + + +def readPredInfo(prediction_file: str) -> dict: + """A wrapper of obj:``cityscapesscripts.evaluation. + + evalInstanceLevelSemanticLabeling.readPredInfo``. + Args: + prediction_file (str): The prediction txt file. + Returns: + dict: The processed prediction results. + """ + + printError = CSEval.printError + + predInfo = {} + if (not os.path.isfile(prediction_file)): + printError(f"Infofile '{prediction_file}' " + 'for the predictions not found.') + with open(prediction_file) as f: + for line in f: + splittedLine = line.split(' ') + if len(splittedLine) != 3: + printError('Invalid prediction file. Expected content: ' + 'relPathPrediction1 labelIDPrediction1 ' + 'confidencePrediction1') + if os.path.isabs(splittedLine[0]): + printError('Invalid prediction file. First entry in each ' + 'line must be a relative path.') + + filename = os.path.join( + os.path.dirname(prediction_file), splittedLine[0]) + + imageInfo = {} + imageInfo['labelID'] = int(float(splittedLine[1])) + imageInfo['conf'] = float(splittedLine[2]) # type: ignore + predInfo[filename] = imageInfo + + return predInfo + + +def getGtInstances(groundtruth_list: list, + args: CSEval.CArgs, + backend_args: Optional[dict] = None) -> dict: + """A wrapper of obj:``cityscapesscripts.evaluation. + + evalInstanceLevelSemanticLabeling.getGtInstances``. Support loading + groundtruth image from file backend. + Args: + groundtruth_list (list): A list of groundtruth image file. + args (CSEval.CArgs): A global object setting in + obj:``cityscapesscripts.evaluation. + evalInstanceLevelSemanticLabeling`` + backend_args (dict, optional): Arguments to instantiate the + preifx of uri corresponding backend. Defaults to None. + Returns: + dict: The computed metric. + """ + # if there is a global statistics json, then load it + if (os.path.isfile(args.gtInstancesFile)): + if not args.quiet: + print('Loading ground truth instances from JSON.') + with open(args.gtInstancesFile) as json_file: + gt_instances = json.load(json_file) + # otherwise create it + else: + if (not args.quiet): + print('Creating ground truth instances from png files.') + gt_instances = instances2dict( + groundtruth_list, args, backend_args=backend_args) + writeDict2JSON(gt_instances, args.gtInstancesFile) + + return gt_instances + + +def instances2dict(image_list: list, + args: CSEval.CArgs, + backend_args: Optional[dict] = None) -> dict: + """A wrapper of obj:``cityscapesscripts.evaluation. + + evalInstanceLevelSemanticLabeling.instances2dict``. Support loading + groundtruth image from file backend. + Args: + image_list (list): A list of image file. + args (CSEval.CArgs): A global object setting in + obj:``cityscapesscripts.evaluation. + evalInstanceLevelSemanticLabeling`` + backend_args (dict, optional): Arguments to instantiate the + preifx of uri corresponding backend. Defaults to None. + Returns: + dict: The processed groundtruth results. + """ + imgCount = 0 + instanceDict = {} + + if not isinstance(image_list, list): + image_list = [image_list] + + if not args.quiet: + print(f'Processing {len(image_list)} images...') + + for image_name in image_list: + # Load image + img_bytes = get(image_name, backend_args=backend_args) + imgNp = mmcv.imfrombytes(img_bytes, flag='unchanged', backend='pillow') + + # Initialize label categories + instances: dict = {} + for label in labels: + instances[label.name] = [] + + # Loop through all instance ids in instance image + for instanceId in np.unique(imgNp): + instanceObj = Instance(imgNp, instanceId) + + instances[id2label[instanceObj.labelID].name].append( + instanceObj.toDict()) + + instanceDict[image_name] = instances + imgCount += 1 + + if not args.quiet: + print(f'\rImages Processed: {imgCount}', end=' ') + sys.stdout.flush() + + return instanceDict diff --git a/mmdet/evaluation/functional/panoptic_utils.py b/mmdet/evaluation/functional/panoptic_utils.py index 77c6cd22ec1..6faa8ed52bc 100644 --- a/mmdet/evaluation/functional/panoptic_utils.py +++ b/mmdet/evaluation/functional/panoptic_utils.py @@ -1,7 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) 2018, Alexander Kirillov -# This file supports `file_client` for `panopticapi`, +# This file supports `backend_args` for `panopticapi`, # the source code is copied from `panopticapi`, # only the way to load the gt images is modified. import multiprocessing @@ -9,7 +9,7 @@ import mmcv import numpy as np -from mmengine.fileio import FileClient +from mmengine.fileio import get # A custom value to distinguish instance ID and category ID; need to # be greater than the number of categories. @@ -32,7 +32,7 @@ def pq_compute_single_core(proc_id, gt_folder, pred_folder, categories, - file_client=None, + backend_args=None, print_log=False): """The single core function to evaluate the metric of Panoptic Segmentation. @@ -45,8 +45,8 @@ def pq_compute_single_core(proc_id, gt_folder (str): The path of the ground truth images. pred_folder (str): The path of the prediction images. categories (str): The categories of the dataset. - file_client (object): The file client of the dataset. If None, - the backend will be set to `disk`. + backend_args (object): The Backend of the dataset. If None, + the backend will be set to `local`. print_log (bool): Whether to print the log. Defaults to False. """ if PQStat is None: @@ -55,10 +55,6 @@ def pq_compute_single_core(proc_id, 'pip install git+https://github.com/cocodataset/' 'panopticapi.git.') - if file_client is None: - file_client_args = dict(backend='disk') - file_client = FileClient(**file_client_args) - pq_stat = PQStat() idx = 0 @@ -68,9 +64,10 @@ def pq_compute_single_core(proc_id, proc_id, idx, len(annotation_set))) idx += 1 # The gt images can be on the local disk or `ceph`, so we use - # file_client here. - img_bytes = file_client.get( - os.path.join(gt_folder, gt_ann['file_name'])) + # backend here. + img_bytes = get( + os.path.join(gt_folder, gt_ann['file_name']), + backend_args=backend_args) pan_gt = mmcv.imfrombytes(img_bytes, flag='color', channel_order='rgb') pan_gt = rgb2id(pan_gt) @@ -181,7 +178,7 @@ def pq_compute_multi_core(matched_annotations_list, gt_folder, pred_folder, categories, - file_client=None, + backend_args=None, nproc=32): """Evaluate the metrics of Panoptic Segmentation with multithreading. @@ -194,8 +191,8 @@ def pq_compute_multi_core(matched_annotations_list, gt_folder (str): The path of the ground truth images. pred_folder (str): The path of the prediction images. categories (str): The categories of the dataset. - file_client (object): The file client of the dataset. If None, - the backend will be set to `disk`. + backend_args (object): The file client of the dataset. If None, + the backend will be set to `local`. nproc (int): Number of processes for panoptic quality computing. Defaults to 32. When `nproc` exceeds the number of cpu cores, the number of cpu cores is used. @@ -206,10 +203,6 @@ def pq_compute_multi_core(matched_annotations_list, 'pip install git+https://github.com/cocodataset/' 'panopticapi.git.') - if file_client is None: - file_client_args = dict(backend='disk') - file_client = FileClient(**file_client_args) - cpu_num = min(nproc, multiprocessing.cpu_count()) annotations_split = np.array_split(matched_annotations_list, cpu_num) @@ -220,7 +213,7 @@ def pq_compute_multi_core(matched_annotations_list, for proc_id, annotation_set in enumerate(annotations_split): p = workers.apply_async(pq_compute_single_core, (proc_id, annotation_set, gt_folder, - pred_folder, categories, file_client)) + pred_folder, categories, backend_args)) processes.append(p) # Close the process pool, otherwise it will lead to memory diff --git a/mmdet/evaluation/metrics/cityscapes_metric.py b/mmdet/evaluation/metrics/cityscapes_metric.py index 2b28100aff4..23edbf964b1 100644 --- a/mmdet/evaluation/metrics/cityscapes_metric.py +++ b/mmdet/evaluation/metrics/cityscapes_metric.py @@ -2,26 +2,26 @@ import os import os.path as osp import shutil +import tempfile from collections import OrderedDict from typing import Dict, Optional, Sequence import mmcv import numpy as np -from mmengine.dist import is_main_process, master_only +from mmengine.dist import is_main_process from mmengine.evaluator import BaseMetric from mmengine.logging import MMLogger from mmdet.registry import METRICS try: - import cityscapesscripts - from cityscapesscripts.evaluation import \ - evalInstanceLevelSemanticLabeling as CSEval - from cityscapesscripts.helpers import labels as CSLabels + import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling as CSEval # noqa: E501 + import cityscapesscripts.helpers.labels as CSLabels + + from mmdet.evaluation.functional.cityscapes_utils import evaluateImgLists + HAS_CITYSCAPESAPI = True except ImportError: - cityscapesscripts = None - CSLabels = None - CSEval = None + HAS_CITYSCAPESAPI = False @METRICS.register_module() @@ -40,8 +40,6 @@ class CityScapesMetric(BaseMetric): evaluation. It is useful when you want to format the result to a specific format and submit it to the test server. Defaults to False. - keep_results (bool): Whether to keep the results. When ``format_only`` - is True, ``keep_results`` must be True. Defaults to False. collect_device (str): Device name used for collecting results from different ranks during distributed training. Must be 'cpu' or 'gpu'. Defaults to 'cpu'. @@ -49,6 +47,12 @@ class CityScapesMetric(BaseMetric): names to disambiguate homonymous metrics of different evaluators. If prefix is not provided in the argument, self.default_prefix will be used instead. Defaults to None. + dump_matches (bool): Whether dump matches.json file during evaluating. + Defaults to False. + file_client_args (dict, optional): Arguments to instantiate the + corresponding backend in mmdet <= 3.0.0rc6. Defaults to None. + backend_args (dict, optional): Arguments to instantiate the + corresponding backend. Defaults to None. """ default_prefix: Optional[str] = 'cityscapes' @@ -56,33 +60,59 @@ def __init__(self, outfile_prefix: str, seg_prefix: Optional[str] = None, format_only: bool = False, - keep_results: bool = False, collect_device: str = 'cpu', - prefix: Optional[str] = None) -> None: - if cityscapesscripts is None: - raise RuntimeError('Please run "pip install cityscapesscripts" to ' - 'install cityscapesscripts first.') - - assert outfile_prefix, 'outfile_prefix must be not None.' - - if format_only: - assert keep_results, 'keep_results must be True when ' - 'format_only is True' - + prefix: Optional[str] = None, + dump_matches: bool = False, + file_client_args: dict = None, + backend_args: dict = None) -> None: + + if not HAS_CITYSCAPESAPI: + raise RuntimeError('Failed to import `cityscapesscripts`.' + 'Please try to install official ' + 'cityscapesscripts by ' + '"pip install cityscapesscripts"') super().__init__(collect_device=collect_device, prefix=prefix) + + self.tmp_dir = None self.format_only = format_only - self.keep_results = keep_results - self.seg_out_dir = osp.abspath(f'{outfile_prefix}.results') - self.seg_prefix = seg_prefix + if self.format_only: + assert outfile_prefix is not None, 'outfile_prefix must be not' + 'None when format_only is True, otherwise the result files will' + 'be saved to a temp directory which will be cleaned up at the end.' + else: + assert seg_prefix is not None, '`seg_prefix` is necessary when ' + 'computing the CityScapes metrics' + + if outfile_prefix is None: + self.tmp_dir = tempfile.TemporaryDirectory() + self.outfile_prefix = osp.join(self.tmp_dir.name, 'results') + else: + # the directory to save predicted panoptic segmentation mask + self.outfile_prefix = osp.join(outfile_prefix, 'results') # type: ignore # yapf: disable # noqa: E501 + + dir_name = osp.expanduser(self.outfile_prefix) + + if osp.exists(dir_name) and is_main_process(): + logger: MMLogger = MMLogger.get_current_instance() + logger.info('remove previous results.') + shutil.rmtree(dir_name) + os.makedirs(dir_name, exist_ok=True) + + self.backend_args = backend_args + if file_client_args is not None: + raise RuntimeError( + 'The `file_client_args` is deprecated, ' + 'please use `backend_args` instead, please refer to' + 'https://github.com/open-mmlab/mmdetection/blob/dev-3.x/configs/_base_/datasets/coco_detection.py' # noqa: E501 + ) - if is_main_process(): - os.makedirs(self.seg_out_dir, exist_ok=True) + self.seg_prefix = seg_prefix + self.dump_matches = dump_matches - @master_only def __del__(self) -> None: - """Clean up.""" - if not self.keep_results: - shutil.rmtree(self.seg_out_dir) + """Clean up the results if necessary.""" + if self.tmp_dir is not None: + self.tmp_dir.cleanup() # TODO: data_batch is no longer needed, consider adjusting the # parameter position @@ -102,7 +132,7 @@ def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: pred = data_sample['pred_instances'] filename = data_sample['img_path'] basename = osp.splitext(osp.basename(filename))[0] - pred_txt = osp.join(self.seg_out_dir, basename + '_pred.txt') + pred_txt = osp.join(self.outfile_prefix, basename + '_pred.txt') result['pred_txt'] = pred_txt labels = pred['labels'].cpu().numpy() masks = pred['masks'].cpu().numpy().astype(np.uint8) @@ -118,7 +148,8 @@ def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: class_name = self.dataset_meta['classes'][label] class_id = CSLabels.name2label[class_name].id png_filename = osp.join( - self.seg_out_dir, basename + f'_{i}_{class_name}.png') + self.outfile_prefix, + basename + f'_{i}_{class_name}.png') mmcv.imwrite(mask, png_filename) f.write(f'{osp.basename(png_filename)} ' f'{class_id} {mask_score}\n') @@ -127,8 +158,7 @@ def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: gt = dict() img_path = filename.replace('leftImg8bit.png', 'gtFine_instanceIds.png') - img_path = img_path.replace('leftImg8bit', 'gtFine') - gt['file_name'] = osp.join(self.seg_prefix, img_path) + gt['file_name'] = img_path.replace('leftImg8bit', 'gtFine') self.results.append((gt, result)) @@ -146,25 +176,28 @@ def compute_metrics(self, results: list) -> Dict[str, float]: if self.format_only: logger.info( - f'results are saved to {osp.dirname(self.seg_out_dir)}') + f'results are saved to {osp.dirname(self.outfile_prefix)}') return OrderedDict() logger.info('starts to compute metric') gts, preds = zip(*results) # set global states in cityscapes evaluation API - CSEval.args.cityscapesPath = osp.join(self.seg_prefix, '../..') - CSEval.args.predictionPath = self.seg_out_dir - CSEval.args.predictionWalk = None + gt_instances_file = osp.join(self.outfile_prefix, 'gtInstances.json') # type: ignore # yapf: disable # noqa: E501 + # split gt and prediction list + gts, preds = zip(*results) CSEval.args.JSONOutput = False CSEval.args.colorized = False - CSEval.args.gtInstancesFile = osp.join(self.seg_out_dir, - 'gtInstances.json') + CSEval.args.gtInstancesFile = gt_instances_file groundTruthImgList = [gt['file_name'] for gt in gts] predictionImgList = [pred['pred_txt'] for pred in preds] - CSEval_results = CSEval.evaluateImgLists(predictionImgList, - groundTruthImgList, - CSEval.args)['averages'] + CSEval_results = evaluateImgLists( + predictionImgList, + groundTruthImgList, + CSEval.args, + self.backend_args, + dump_matches=self.dump_matches)['averages'] + eval_results = OrderedDict() eval_results['mAP'] = CSEval_results['allAp'] eval_results['AP@50'] = CSEval_results['allAp50%'] diff --git a/mmdet/evaluation/metrics/coco_metric.py b/mmdet/evaluation/metrics/coco_metric.py index bd56803da3d..1ca33c50cfa 100644 --- a/mmdet/evaluation/metrics/coco_metric.py +++ b/mmdet/evaluation/metrics/coco_metric.py @@ -9,7 +9,7 @@ import numpy as np import torch from mmengine.evaluator import BaseMetric -from mmengine.fileio import FileClient, dump, load +from mmengine.fileio import dump, get_local_path, load from mmengine.logging import MMLogger from terminaltables import AsciiTable @@ -50,9 +50,10 @@ class CocoMetric(BaseMetric): outfile_prefix (str, optional): The prefix of json files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Defaults to None. - file_client_args (dict): Arguments to instantiate a FileClient. - See :class:`mmengine.fileio.FileClient` for details. - Defaults to ``dict(backend='disk')``. + file_client_args (dict, optional): Arguments to instantiate the + corresponding backend in mmdet <= 3.0.0rc6. Defaults to None. + backend_args (dict, optional): Arguments to instantiate the + corresponding backend. Defaults to None. collect_device (str): Device name used for collecting results from different ranks during distributed training. Must be 'cpu' or 'gpu'. Defaults to 'cpu'. @@ -74,7 +75,8 @@ def __init__(self, metric_items: Optional[Sequence[str]] = None, format_only: bool = False, outfile_prefix: Optional[str] = None, - file_client_args: dict = dict(backend='disk'), + file_client_args: dict = None, + backend_args: dict = None, collect_device: str = 'cpu', prefix: Optional[str] = None, sort_categories: bool = False) -> None: @@ -108,13 +110,19 @@ def __init__(self, self.outfile_prefix = outfile_prefix - self.file_client_args = file_client_args - self.file_client = FileClient(**file_client_args) + self.backend_args = backend_args + if file_client_args is not None: + raise RuntimeError( + 'The `file_client_args` is deprecated, ' + 'please use `backend_args` instead, please refer to' + 'https://github.com/open-mmlab/mmdetection/blob/dev-3.x/configs/_base_/datasets/coco_detection.py' # noqa: E501 + ) # if ann_file is not specified, # initialize coco api with the converted dataset if ann_file is not None: - with self.file_client.get_local_path(ann_file) as local_path: + with get_local_path( + ann_file, backend_args=self.backend_args) as local_path: self._coco_api = COCO(local_path) if sort_categories: # 'categories' list in objects365_train.json and diff --git a/mmdet/evaluation/metrics/coco_panoptic_metric.py b/mmdet/evaluation/metrics/coco_panoptic_metric.py index bafe275925a..1ccf796d917 100644 --- a/mmdet/evaluation/metrics/coco_panoptic_metric.py +++ b/mmdet/evaluation/metrics/coco_panoptic_metric.py @@ -8,7 +8,7 @@ import mmcv import numpy as np from mmengine.evaluator import BaseMetric -from mmengine.fileio import FileClient, dump, load +from mmengine.fileio import dump, get_local_path, load from mmengine.logging import MMLogger, print_log from terminaltables import AsciiTable @@ -56,9 +56,10 @@ class CocoPanopticMetric(BaseMetric): nproc (int): Number of processes for panoptic quality computing. Defaults to 32. When ``nproc`` exceeds the number of cpu cores, the number of cpu cores is used. - file_client_args (dict): Arguments to instantiate a FileClient. - See :class:`mmengine.fileio.FileClient` for details. - Defaults to ``dict(backend='disk')``. + file_client_args (dict, optional): Arguments to instantiate the + corresponding backend in mmdet <= 3.0.0rc6. Defaults to None. + backend_args (dict, optional): Arguments to instantiate the + corresponding backend. Defaults to None. collect_device (str): Device name used for collecting results from different ranks during distributed training. Must be 'cpu' or 'gpu'. Defaults to 'cpu'. @@ -76,7 +77,8 @@ def __init__(self, format_only: bool = False, outfile_prefix: Optional[str] = None, nproc: int = 32, - file_client_args: dict = dict(backend='disk'), + file_client_args: dict = None, + backend_args: dict = None, collect_device: str = 'cpu', prefix: Optional[str] = None) -> None: if panopticapi is None: @@ -108,19 +110,23 @@ def __init__(self, self.cat_ids = None self.cat2label = None - self.file_client_args = file_client_args - self.file_client = FileClient(**file_client_args) + self.backend_args = backend_args + if file_client_args is not None: + raise RuntimeError( + 'The `file_client_args` is deprecated, ' + 'please use `backend_args` instead, please refer to' + 'https://github.com/open-mmlab/mmdetection/blob/dev-3.x/configs/_base_/datasets/coco_detection.py' # noqa: E501 + ) if ann_file: - with self.file_client.get_local_path(ann_file) as local_path: + with get_local_path( + ann_file, backend_args=self.backend_args) as local_path: self._coco_api = COCOPanoptic(local_path) self.categories = self._coco_api.cats else: self._coco_api = None self.categories = None - self.file_client = FileClient(**file_client_args) - def __del__(self) -> None: """Clean up.""" if self.tmp_dir is not None: @@ -370,7 +376,7 @@ def _compute_batch_pq_stats(self, data_samples: Sequence[dict]): gt_folder=self.seg_prefix, pred_folder=self.seg_out_dir, categories=categories, - file_client=self.file_client) + backend_args=self.backend_args) self.results.append(pq_stats) @@ -497,7 +503,7 @@ def compute_metrics(self, results: list) -> Dict[str, float]: gt_folder, pred_folder, self.categories, - file_client=self.file_client, + backend_args=self.backend_args, nproc=self.nproc) else: diff --git a/mmdet/evaluation/metrics/crowdhuman_metric.py b/mmdet/evaluation/metrics/crowdhuman_metric.py index a16f4351cde..3bec5b53685 100644 --- a/mmdet/evaluation/metrics/crowdhuman_metric.py +++ b/mmdet/evaluation/metrics/crowdhuman_metric.py @@ -9,7 +9,7 @@ import numpy as np from mmengine.evaluator import BaseMetric -from mmengine.fileio import FileClient, dump, load +from mmengine.fileio import dump, get_text, load from mmengine.logging import MMLogger from scipy.sparse import csr_matrix from scipy.sparse.csgraph import maximum_bipartite_matching @@ -38,9 +38,10 @@ class CrowdHumanMetric(BaseMetric): outfile_prefix (str, optional): The prefix of json files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Defaults to None. - file_client_args (dict): Arguments to instantiate a FileClient. - See :class:`mmengine.fileio.FileClient` for details. - Defaults to ``dict(backend='disk')``. + file_client_args (dict, optional): Arguments to instantiate the + corresponding backend in mmdet <= 3.0.0rc6. Defaults to None. + backend_args (dict, optional): Arguments to instantiate the + corresponding backend. Defaults to None. collect_device (str): Device name used for collecting results from different ranks during distributed training. Must be 'cpu' or 'gpu'. Defaults to 'cpu'. @@ -68,7 +69,8 @@ def __init__(self, metric: Union[str, List[str]] = ['AP', 'MR', 'JI'], format_only: bool = False, outfile_prefix: Optional[str] = None, - file_client_args: dict = dict(backend='disk'), + file_client_args: dict = None, + backend_args: dict = None, collect_device: str = 'cpu', prefix: Optional[str] = None, eval_mode: int = 0, @@ -93,8 +95,13 @@ def __init__(self, 'None when format_only is True, otherwise the result files will' 'be saved to a temp directory which will be cleaned up at the end.' self.outfile_prefix = outfile_prefix - self.file_client_args = file_client_args - self.file_client = FileClient(**file_client_args) + self.backend_args = backend_args + if file_client_args is not None: + raise RuntimeError( + 'The `file_client_args` is deprecated, ' + 'please use `backend_args` instead, please refer to' + 'https://github.com/open-mmlab/mmdetection/blob/dev-3.x/configs/_base_/datasets/coco_detection.py' # noqa: E501 + ) assert eval_mode in [0, 1, 2], \ "Unknown eval mode. mr_ref should be one of '0', '1', '2'." @@ -221,10 +228,11 @@ def load_eval_samples(self, result_file): Returns: Dict[Image]: The detection result packaged by Image """ - gt_str = self.file_client.get_text(self.ann_file).strip().split('\n') + gt_str = get_text( + self.ann_file, backend_args=self.backend_args).strip().split('\n') gt_records = [json.loads(line) for line in gt_str] - pred_records = load(result_file) + pred_records = load(result_file, backend_args=self.backend_args) eval_samples = dict() for gt_record, pred_record in zip(gt_records, pred_records): assert gt_record['ID'] == pred_record['ID'], \ diff --git a/mmdet/evaluation/metrics/dump_proposals_metric.py b/mmdet/evaluation/metrics/dump_proposals_metric.py index 06ecc78d69b..68dc2d5ab84 100644 --- a/mmdet/evaluation/metrics/dump_proposals_metric.py +++ b/mmdet/evaluation/metrics/dump_proposals_metric.py @@ -22,9 +22,10 @@ class DumpProposals(BaseMetric): proposals_file (str): Proposals file path. Defaults to 'proposals.pkl'. num_max_proposals (int, optional): Maximum number of proposals to dump. If not specified, all proposals will be dumped. - file_client_args (dict): Arguments to instantiate a FileClient. - See :class:`mmengine.fileio.FileClient` for details. - Defaults to ``dict(backend='disk')``. + file_client_args (dict, optional): Arguments to instantiate the + corresponding backend in mmdet <= 3.0.0rc6. Defaults to None. + backend_args (dict, optional): Arguments to instantiate the + corresponding backend. Defaults to None. collect_device (str): Device name used for collecting results from different ranks during distributed training. Must be 'cpu' or 'gpu'. Defaults to 'cpu'. @@ -40,13 +41,20 @@ def __init__(self, output_dir: str = '', proposals_file: str = 'proposals.pkl', num_max_proposals: Optional[int] = None, - file_client_args: dict = dict(backend='disk'), + file_client_args: dict = None, + backend_args: dict = None, collect_device: str = 'cpu', prefix: Optional[str] = None) -> None: super().__init__(collect_device=collect_device, prefix=prefix) self.num_max_proposals = num_max_proposals # TODO: update after mmengine finish refactor fileio. - self.file_client_args = file_client_args + self.backend_args = backend_args + if file_client_args is not None: + raise RuntimeError( + 'The `file_client_args` is deprecated, ' + 'please use `backend_args` instead, please refer to' + 'https://github.com/open-mmlab/mmdetection/blob/dev-3.x/configs/_base_/datasets/coco_detection.py' # noqa: E501 + ) self.output_dir = output_dir assert proposals_file.endswith(('.pkl', '.pickle')), \ 'The output file must be a pkl file.' @@ -106,6 +114,6 @@ def compute_metrics(self, results: list) -> dict: dump( dump_results, file=self.proposals_file, - file_client_args=self.file_client_args) + backend_args=self.backend_args) logger.info(f'Results are saved at {self.proposals_file}') return {} diff --git a/mmdet/evaluation/metrics/lvis_metric.py b/mmdet/evaluation/metrics/lvis_metric.py index 388c097d5ff..b4b3dd44f9a 100644 --- a/mmdet/evaluation/metrics/lvis_metric.py +++ b/mmdet/evaluation/metrics/lvis_metric.py @@ -7,6 +7,7 @@ from typing import Dict, List, Optional, Sequence, Union import numpy as np +from mmengine.fileio import get_local_path from mmengine.logging import MMLogger from terminaltables import AsciiTable @@ -62,6 +63,10 @@ class LVISMetric(CocoMetric): names to disambiguate homonymous metrics of different evaluators. If prefix is not provided in the argument, self.default_prefix will be used instead. Defaults to None. + file_client_args (dict, optional): Arguments to instantiate the + corresponding backend in mmdet <= 3.0.0rc6. Defaults to None. + backend_args (dict, optional): Arguments to instantiate the + corresponding backend. Defaults to None. """ default_prefix: Optional[str] = 'lvis' @@ -76,7 +81,9 @@ def __init__(self, format_only: bool = False, outfile_prefix: Optional[str] = None, collect_device: str = 'cpu', - prefix: Optional[str] = None) -> None: + prefix: Optional[str] = None, + file_client_args: dict = None, + backend_args: dict = None) -> None: if lvis is None: raise RuntimeError( 'Package lvis is not installed. Please run "pip install ' @@ -110,10 +117,22 @@ def __init__(self, 'be saved to a temp directory which will be cleaned up at the end.' self.outfile_prefix = outfile_prefix + self.backend_args = backend_args + if file_client_args is not None: + raise RuntimeError( + 'The `file_client_args` is deprecated, ' + 'please use `backend_args` instead, please refer to' + 'https://github.com/open-mmlab/mmdetection/blob/dev-3.x/configs/_base_/datasets/coco_detection.py' # noqa: E501 + ) # if ann_file is not specified, # initialize lvis api with the converted dataset - self._lvis_api = LVIS(ann_file) if ann_file else None + if ann_file is not None: + with get_local_path( + ann_file, backend_args=self.backend_args) as local_path: + self._lvis_api = LVIS(local_path) + else: + self._lvis_api = None # handle dataset lazy init self.cat_ids = None diff --git a/mmdet/models/test_time_augs/det_tta.py b/mmdet/models/test_time_augs/det_tta.py index 66f0817a9f8..95f91db9e12 100644 --- a/mmdet/models/test_time_augs/det_tta.py +++ b/mmdet/models/test_time_augs/det_tta.py @@ -27,7 +27,7 @@ class DetTTAModel(BaseTTAModel): >>> >>> tta_pipeline = [ >>> dict(type='LoadImageFromFile', - >>> file_client_args=dict(backend='disk')), + >>> backend_args=None), >>> dict( >>> type='TestTimeAug', >>> transforms=[[ diff --git a/mmdet/testing/_utils.py b/mmdet/testing/_utils.py index 471a6bd3a7b..ce74376250e 100644 --- a/mmdet/testing/_utils.py +++ b/mmdet/testing/_utils.py @@ -274,7 +274,7 @@ def demo_mm_sampling_results(proposals_list, # TODO: Support full ceph def replace_to_ceph(cfg): - file_client_args = dict( + backend_args = dict( backend='petrel', path_mapping=dict({ './data/': 's3://openmmlab/datasets/detection/', @@ -286,12 +286,12 @@ def _process_pipeline(dataset, name): def replace_img(pipeline): if pipeline['type'] == 'LoadImageFromFile': - pipeline['file_client_args'] = file_client_args + pipeline['backend_args'] = backend_args def replace_ann(pipeline): if pipeline['type'] == 'LoadAnnotations' or pipeline[ 'type'] == 'LoadPanopticAnnotations': - pipeline['file_client_args'] = file_client_args + pipeline['backend_args'] = backend_args if 'pipeline' in dataset: replace_img(dataset.pipeline[0]) @@ -307,7 +307,7 @@ def replace_ann(pipeline): def _process_evaluator(evaluator, name): if evaluator['type'] == 'CocoPanopticMetric': - evaluator['file_client_args'] = file_client_args + evaluator['backend_args'] = backend_args # half ceph _process_pipeline(cfg.train_dataloader.dataset, cfg.filename) diff --git a/projects/ConvNeXt-V2/configs/mask-rcnn_convnext-v2-b_fpn_lsj-3x-fcmae_coco.py b/projects/ConvNeXt-V2/configs/mask-rcnn_convnext-v2-b_fpn_lsj-3x-fcmae_coco.py index f5815d8ecdf..95b960df92f 100644 --- a/projects/ConvNeXt-V2/configs/mask-rcnn_convnext-v2-b_fpn_lsj-3x-fcmae_coco.py +++ b/projects/ConvNeXt-V2/configs/mask-rcnn_convnext-v2-b_fpn_lsj-3x-fcmae_coco.py @@ -31,7 +31,7 @@ rcnn=dict(nms=dict(type='soft_nms')))) train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), + dict(type='LoadImageFromFile', backend_args=_base_.backend_args), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( type='RandomResize', diff --git a/projects/Detic/configs/detic_centernet2_swin-b_fpn_4x_lvis-coco-in21k.py b/projects/Detic/configs/detic_centernet2_swin-b_fpn_4x_lvis-coco-in21k.py index 19a17aea7bc..d554c40ec20 100644 --- a/projects/Detic/configs/detic_centernet2_swin-b_fpn_4x_lvis-coco-in21k.py +++ b/projects/Detic/configs/detic_centernet2_swin-b_fpn_4x_lvis-coco-in21k.py @@ -252,7 +252,7 @@ test_pipeline = [ dict( type='LoadImageFromFile', - file_client_args=_base_.file_client_args, + backend_args=_base_.backend_args, imdecode_backend=backend), dict(type='Resize', scale=(1333, 800), keep_ratio=True, backend=backend), dict( diff --git a/projects/DiffusionDet/configs/diffusiondet_r50_fpn_500-proposals_1-step_crop-ms-480-800-450k_coco.py b/projects/DiffusionDet/configs/diffusiondet_r50_fpn_500-proposals_1-step_crop-ms-480-800-450k_coco.py index 310cdc4cf2b..187cdc39734 100644 --- a/projects/DiffusionDet/configs/diffusiondet_r50_fpn_500-proposals_1-step_crop-ms-480-800-450k_coco.py +++ b/projects/DiffusionDet/configs/diffusiondet_r50_fpn_500-proposals_1-step_crop-ms-480-800-450k_coco.py @@ -95,7 +95,7 @@ train_pipeline = [ dict( type='LoadImageFromFile', - file_client_args=_base_.file_client_args, + backend_args=_base_.backend_args, imdecode_backend=backend), dict(type='LoadAnnotations', with_bbox=True), dict(type='RandomFlip', prob=0.5), @@ -136,7 +136,7 @@ test_pipeline = [ dict( type='LoadImageFromFile', - file_client_args=_base_.file_client_args, + backend_args=_base_.backend_args, imdecode_backend=backend), dict(type='Resize', scale=(1333, 800), keep_ratio=True, backend=backend), # If you don't have a gt annotation, delete the pipeline diff --git a/projects/EfficientDet/configs/efficientdet_effb0_bifpn_8xb16-crop512-300e_coco.py b/projects/EfficientDet/configs/efficientdet_effb0_bifpn_8xb16-crop512-300e_coco.py index 8ccbc85a479..c7a3b309237 100644 --- a/projects/EfficientDet/configs/efficientdet_effb0_bifpn_8xb16-crop512-300e_coco.py +++ b/projects/EfficientDet/configs/efficientdet_effb0_bifpn_8xb16-crop512-300e_coco.py @@ -94,9 +94,7 @@ # dataset settings train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', @@ -108,9 +106,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(image_size, image_size), keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True), dict( diff --git a/projects/EfficientDet/configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco-90cls.py b/projects/EfficientDet/configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco-90cls.py index e1ff4d7d147..fe82a5e1b94 100644 --- a/projects/EfficientDet/configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco-90cls.py +++ b/projects/EfficientDet/configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco-90cls.py @@ -94,9 +94,7 @@ # dataset settings train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', @@ -108,9 +106,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(image_size, image_size), keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True), dict( diff --git a/projects/EfficientDet/configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco.py b/projects/EfficientDet/configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco.py index 5d9a6b6fe93..2079e2ac65a 100644 --- a/projects/EfficientDet/configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco.py +++ b/projects/EfficientDet/configs/efficientdet_effb3_bifpn_8xb16-crop896-300e_coco.py @@ -94,9 +94,7 @@ # dataset settings train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', @@ -108,9 +106,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(image_size, image_size), keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True), dict( diff --git a/projects/EfficientDet/configs/tensorflow/efficientdet_effb0_bifpn_8xb16-crop512-300e_coco_tf.py b/projects/EfficientDet/configs/tensorflow/efficientdet_effb0_bifpn_8xb16-crop512-300e_coco_tf.py index 00200cdf718..bf3d3fc1799 100644 --- a/projects/EfficientDet/configs/tensorflow/efficientdet_effb0_bifpn_8xb16-crop512-300e_coco_tf.py +++ b/projects/EfficientDet/configs/tensorflow/efficientdet_effb0_bifpn_8xb16-crop512-300e_coco_tf.py @@ -94,9 +94,7 @@ # dataset settings train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', @@ -108,9 +106,7 @@ dict(type='PackDetInputs') ] test_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), + dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(image_size, image_size), keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True), dict( diff --git a/projects/EfficientDet/efficientdet/tensorflow/coco_90class.py b/projects/EfficientDet/efficientdet/tensorflow/coco_90class.py index b0742af0be9..d2996ccb8fc 100644 --- a/projects/EfficientDet/efficientdet/tensorflow/coco_90class.py +++ b/projects/EfficientDet/efficientdet/tensorflow/coco_90class.py @@ -3,6 +3,8 @@ import os.path as osp from typing import List, Union +from mmengine.fileio import get_local_path + from mmdet.datasets.base_det_dataset import BaseDetDataset from mmdet.registry import DATASETS from .api_wrappers import COCO @@ -62,7 +64,8 @@ def load_data_list(self) -> List[dict]: Returns: List[dict]: A list of annotation. """ # noqa: E501 - with self.file_client.get_local_path(self.ann_file) as local_path: + with get_local_path( + self.ann_file, backend_args=self.backend_args) as local_path: self.coco = self.COCOAPI(local_path) # The order of returned `cat_ids` will not # change with the order of the `classes` diff --git a/projects/EfficientDet/efficientdet/tensorflow/coco_90metric.py b/projects/EfficientDet/efficientdet/tensorflow/coco_90metric.py index 7bc12d00956..eed65224018 100644 --- a/projects/EfficientDet/efficientdet/tensorflow/coco_90metric.py +++ b/projects/EfficientDet/efficientdet/tensorflow/coco_90metric.py @@ -8,7 +8,7 @@ import numpy as np from mmengine.evaluator import BaseMetric -from mmengine.fileio import FileClient, dump, load +from mmengine.fileio import dump, get_local_path, load from mmengine.logging import MMLogger from terminaltables import AsciiTable @@ -49,9 +49,8 @@ class Coco90Metric(BaseMetric): outfile_prefix (str, optional): The prefix of json files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Defaults to None. - file_client_args (dict): Arguments to instantiate a FileClient. - See :class:`mmengine.fileio.FileClient` for details. - Defaults to ``dict(backend='disk')``. + backend_args (dict, optional): Arguments to instantiate the + corresponding backend. Defaults to None. collect_device (str): Device name used for collecting results from different ranks during distributed training. Must be 'cpu' or 'gpu'. Defaults to 'cpu'. @@ -71,7 +70,7 @@ def __init__(self, metric_items: Optional[Sequence[str]] = None, format_only: bool = False, outfile_prefix: Optional[str] = None, - file_client_args: dict = dict(backend='disk'), + backend_args: dict = None, collect_device: str = 'cpu', prefix: Optional[str] = None) -> None: super().__init__(collect_device=collect_device, prefix=prefix) @@ -104,13 +103,13 @@ def __init__(self, self.outfile_prefix = outfile_prefix - self.file_client_args = file_client_args - self.file_client = FileClient(**file_client_args) + self.backend_args = backend_args # if ann_file is not specified, # initialize coco api with the converted dataset if ann_file is not None: - with self.file_client.get_local_path(ann_file) as local_path: + with get_local_path( + ann_file, backend_args=self.backend_args) as local_path: self._coco_api = COCO(local_path) else: self._coco_api = None diff --git a/projects/SparseInst/configs/sparseinst_r50_iam_8xb8-ms-270k_coco.py b/projects/SparseInst/configs/sparseinst_r50_iam_8xb8-ms-270k_coco.py index 030675d2609..7a85f52398d 100644 --- a/projects/SparseInst/configs/sparseinst_r50_iam_8xb8-ms-270k_coco.py +++ b/projects/SparseInst/configs/sparseinst_r50_iam_8xb8-ms-270k_coco.py @@ -76,7 +76,7 @@ train_pipeline = [ dict( type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}, + backend_args={{_base_.backend_args}}, imdecode_backend=backend), dict( type='LoadAnnotations', @@ -96,7 +96,7 @@ test_pipeline = [ dict( type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}, + backend_args={{_base_.backend_args}}, imdecode_backend=backend), dict(type='Resize', scale=(640, 853), keep_ratio=True, backend=backend), dict( diff --git a/requirements/mminstall.txt b/requirements/mminstall.txt index 8faef45b260..abe49081a48 100644 --- a/requirements/mminstall.txt +++ b/requirements/mminstall.txt @@ -1,2 +1,2 @@ mmcv>=2.0.0rc4,<2.1.0 -mmengine>=0.4.0,<1.0.0 +mmengine>=0.6.0,<1.0.0 diff --git a/tests/test_datasets/test_transforms/test_loading.py b/tests/test_datasets/test_transforms/test_loading.py index 49d912cc274..a4fcf4e087c 100644 --- a/tests/test_datasets/test_transforms/test_loading.py +++ b/tests/test_datasets/test_transforms/test_loading.py @@ -122,7 +122,7 @@ def test_repr(self): 'with_label=False, with_mask=False, ' 'with_seg=False, poly2mask=True, ' "imdecode_backend='cv2', " - 'file_client_args=None)')) + 'backend_args=None)')) class TestFilterAnnotations(unittest.TestCase): @@ -387,7 +387,7 @@ def test_rper(self): 'to_float32=False, ' "color_type='unchanged', " "imdecode_backend='cv2', " - "file_client_args={'backend': 'disk'})")) + 'backend_args=None)')) class TestLoadProposals(unittest.TestCase): diff --git a/tests/test_evaluation/test_metrics/test_cityscapes_metric.py b/tests/test_evaluation/test_metrics/test_cityscapes_metric.py index 91a4f745dd6..d0ee6eb9ba4 100644 --- a/tests/test_evaluation/test_metrics/test_cityscapes_metric.py +++ b/tests/test_evaluation/test_metrics/test_cityscapes_metric.py @@ -30,13 +30,6 @@ def test_init(self): with self.assertRaises(AssertionError): CityScapesMetric(outfile_prefix=None) - # test with format_only=True, keep_results=False - with self.assertRaises(AssertionError): - CityScapesMetric( - outfile_prefix=self.tmp_dir.name + 'test', - format_only=True, - keep_results=False) - @unittest.skipIf(cityscapesscripts is None, 'cityscapesscripts is not installed.') def test_evaluate(self): @@ -86,7 +79,6 @@ def test_evaluate(self): metric = CityScapesMetric( seg_prefix=self.seg_prefix, format_only=False, - keep_results=False, outfile_prefix=self.outfile_prefix) metric.dataset_meta = dict( classes=('person', 'rider', 'car', 'truck', 'bus', 'train', @@ -101,7 +93,6 @@ def test_evaluate(self): metric = CityScapesMetric( seg_prefix=self.seg_prefix, format_only=True, - keep_results=True, outfile_prefix=self.outfile_prefix) metric.dataset_meta = dict( classes=('person', 'rider', 'car', 'truck', 'bus', 'train', diff --git a/tests/test_evaluation/test_metrics/test_coco_metric.py b/tests/test_evaluation/test_metrics/test_coco_metric.py index 63611a1c3cb..547b8f21e0f 100644 --- a/tests/test_evaluation/test_metrics/test_coco_metric.py +++ b/tests/test_evaluation/test_metrics/test_coco_metric.py @@ -204,6 +204,8 @@ def test_classwise_evaluate(self): # test single coco dataset evaluation coco_metric = CocoMetric( ann_file=fake_json_file, metric='bbox', classwise=True) + # coco_metric1 = CocoMetric( + # ann_file=fake_json_file, metric='bbox', classwise=True) coco_metric.dataset_meta = dict(classes=['car', 'bicycle']) coco_metric.process( {}, diff --git a/tools/misc/get_crowdhuman_id_hw.py b/tools/misc/get_crowdhuman_id_hw.py index b3ab3748daf..8ed9142a423 100644 --- a/tools/misc/get_crowdhuman_id_hw.py +++ b/tools/misc/get_crowdhuman_id_hw.py @@ -15,7 +15,7 @@ import mmcv from mmengine.config import Config -from mmengine.fileio import FileClient, dump +from mmengine.fileio import dump, get, get_text from mmengine.logging import print_log @@ -37,11 +37,10 @@ def parse_args(): def get_image_metas(anno_str, img_prefix): id_hw = {} - file_client = FileClient(backend='disk') anno_dict = json.loads(anno_str) img_path = osp.join(img_prefix, f"{anno_dict['ID']}.jpg") img_id = anno_dict['ID'] - img_bytes = file_client.get(img_path) + img_bytes = get(img_path) img = mmcv.imfrombytes(img_bytes, backend='cv2') id_hw[img_id] = img.shape[:2] return id_hw @@ -52,8 +51,6 @@ def main(): # get ann_file and img_prefix from config files cfg = Config.fromfile(args.config) - file_client_args = cfg.get('file_client_args', dict(backend='disk')) - file_client = FileClient(**file_client_args) dataset = args.dataset dataloader_cfg = cfg.get(f'{dataset}_dataloader') ann_file = osp.join(dataloader_cfg.dataset.data_root, @@ -64,7 +61,7 @@ def main(): # load image metas print_log( f'loading CrowdHuman {dataset} annotation...', level=logging.INFO) - anno_strs = file_client.get_text(ann_file).strip().split('\n') + anno_strs = get_text(ann_file).strip().split('\n') pool = Pool(args.nproc) # get image metas with multiple processes id_hw_temp = pool.starmap( diff --git a/tools/misc/get_image_metas.py b/tools/misc/get_image_metas.py index 03b35cb2c0d..5644fa8c1ab 100644 --- a/tools/misc/get_image_metas.py +++ b/tools/misc/get_image_metas.py @@ -14,7 +14,7 @@ import mmcv from mmengine.config import Config -from mmengine.fileio import FileClient, dump +from mmengine.fileio import dump, get def parse_args(): @@ -69,12 +69,11 @@ def get_metas_from_txt_style_ann_file(ann_file): def get_image_metas(data_info, img_prefix): - file_client = FileClient(backend='disk') filename = data_info.get('filename', None) if filename is not None: if img_prefix is not None: filename = osp.join(img_prefix, filename) - img_bytes = file_client.get(filename) + img_bytes = get(filename) img = mmcv.imfrombytes(img_bytes, flag='color') shape = img.shape meta = dict(filename=filename, ori_shape=shape) From 3f89a036c5e02f97f8f74f5508e0fe03486bbad2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Haian=20Huang=28=E6=B7=B1=E5=BA=A6=E7=9C=B8=29?= Date: Fri, 17 Mar 2023 15:26:32 +0800 Subject: [PATCH 13/38] Fix some cfg error (#9866) --- ...ade-rpn_fast-rcnn_r50-caffe_fpn_1x_coco.py | 74 +++---------------- configs/centripetalnet/README.md | 2 +- ...glass104_16xb6-crop511-210e-mstest_coco.py | 49 +++++++++++- configs/cornernet/README.md | 2 +- ...rnet_hourglass104_8xb6-210e-mstest_coco.py | 49 +++++++++++- ...sk-rcnn_r50-dconv-c3-c5_fpn_amp-1x_coco.py | 5 +- ...k-rcnn_r50-mdconv-c3-c5_fpn_amp-1x_coco.py | 5 +- .../faster-rcnn_r101-caffe_fpn_ms-3x_coco.py | 38 ---------- .../faster-rcnn_r50-caffe-c4_ms-1x_coco.py | 40 ++-------- .../faster-rcnn_r50-caffe-dc5_1x_coco.py | 32 -------- .../faster-rcnn_r50-caffe-dc5_ms-1x_coco.py | 46 +++--------- .../faster-rcnn_r50-caffe-dc5_ms-3x_coco.py | 20 ++++- .../faster-rcnn_r50-caffe_c4-1x_coco.py | 34 --------- .../faster-rcnn_r50-caffe_fpn_90k_coco.py | 31 +++++--- .../faster-rcnn_r50-caffe_fpn_ms-1x_coco.py | 43 +++-------- .../faster-rcnn_r50-caffe_fpn_ms-2x_coco.py | 20 ++++- .../faster-rcnn_r50-caffe_fpn_ms-3x_coco.py | 38 ---------- .../faster-rcnn_r50-caffe_fpn_ms-90k_coco.py | 30 +++++--- .../faster-rcnn_r50-tnr-pre_fpn_1x_coco.py | 7 +- .../faster-rcnn_r50_fpn_amp-1x_coco.py | 7 +- .../faster-rcnn_x101-32x8d_fpn_ms-3x_coco.py | 53 ++----------- .../lad/lad_r101-paa-r50_fpn_2xb8_coco_1x.py | 3 - .../lad/lad_r50-paa-r101_fpn_2xb8_coco_1x.py | 3 - .../cascade-mask-rcnn_r50_fpn_1x_coco_v1.py | 1 - .../retinanet_r50-caffe_fpn_1x_coco_v1.py | 39 ++-------- configs/legacy_1.x/ssd300_coco_v1.py | 64 ---------------- .../libra-fast-rcnn_r50_fpn_1x_coco.py | 22 +++--- .../mask-rcnn_r50_fpn_1x-wandb_coco.py | 29 +++----- .../retinanet_r50-caffe_fpn_ms-1x_coco.py | 2 +- .../retinanet_r50_fpn_amp-1x_coco.py | 7 +- configs/wider_face/ssd300_24e_widerface.py | 34 +++++---- 31 files changed, 283 insertions(+), 546 deletions(-) diff --git a/configs/cascade_rpn/cascade-rpn_fast-rcnn_r50-caffe_fpn_1x_coco.py b/configs/cascade_rpn/cascade-rpn_fast-rcnn_r50-caffe_fpn_1x_coco.py index d977e78d975..ba23ce90652 100644 --- a/configs/cascade_rpn/cascade-rpn_fast-rcnn_r50-caffe_fpn_1x_coco.py +++ b/configs/cascade_rpn/cascade-rpn_fast-rcnn_r50-caffe_fpn_1x_coco.py @@ -1,17 +1,5 @@ -_base_ = '../fast_rcnn/fast-rcnn_r50_fpn_1x_coco.py' +_base_ = '../fast_rcnn/fast-rcnn_r50-caffe_fpn_1x_coco.py' model = dict( - backbone=dict( - type='ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=False), - norm_eval=True, - style='caffe', - init_cfg=dict( - type='Pretrained', - checkpoint='open-mmlab://detectron2/resnet50_caffe')), roi_head=dict( bbox_head=dict( bbox_coder=dict(target_stds=[0.04, 0.04, 0.08, 0.08]), @@ -25,53 +13,15 @@ pos_iou_thr=0.65, neg_iou_thr=0.65, min_pos_iou=0.65), sampler=dict(num=256))), test_cfg=dict(rcnn=dict(score_thr=1e-3))) -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict( - mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadProposals', num_max_proposals=300), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadProposals', num_max_proposals=300), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='ToTensor', keys=['proposals']), - dict( - type='ToDataContainer', - fields=[dict(key='proposals', stack=False)]), - dict(type='Collect', keys=['img', 'proposals']), - ]) -] -# TODO support proposals input -data = dict( - train=dict( - proposal_file=data_root + - 'proposals/crpn_r50_caffe_fpn_1x_train2017.pkl', - pipeline=train_pipeline), - val=dict( - proposal_file=data_root + - 'proposals/crpn_r50_caffe_fpn_1x_val2017.pkl', - pipeline=test_pipeline), - test=dict( - proposal_file=data_root + - 'proposals/crpn_r50_caffe_fpn_1x_val2017.pkl', - pipeline=test_pipeline)) + +# MMEngine support the following two ways, users can choose +# according to convenience +# train_dataloader = dict(dataset=dict(proposal_file='proposals/crpn_r50_caffe_fpn_1x_train2017.pkl')) # noqa +_base_.train_dataloader.dataset.proposal_file = 'proposals/crpn_r50_caffe_fpn_1x_train2017.pkl' # noqa + +# val_dataloader = dict(dataset=dict(proposal_file='proposals/crpn_r50_caffe_fpn_1x_val2017.pkl')) # noqa +# test_dataloader = val_dataloader +_base_.val_dataloader.dataset.proposal_file = 'proposals/crpn_r50_caffe_fpn_1x_val2017.pkl' # noqa +test_dataloader = _base_.val_dataloader + optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2)) diff --git a/configs/centripetalnet/README.md b/configs/centripetalnet/README.md index 4f06f45d38b..21edbd261af 100644 --- a/configs/centripetalnet/README.md +++ b/configs/centripetalnet/README.md @@ -20,7 +20,7 @@ Keypoint-based detectors have achieved pretty-well performance. However, incorre Note: -- TTA setting is single-scale and `flip=True`. +- TTA setting is single-scale and `flip=True`. If you want to reproduce the TTA performance, please add `--tta` in the test command. - The model we released is the best checkpoint rather than the latest checkpoint (box AP 44.8 vs 44.6 in our experiment). ## Citation diff --git a/configs/centripetalnet/centripetalnet_hourglass104_16xb6-crop511-210e-mstest_coco.py b/configs/centripetalnet/centripetalnet_hourglass104_16xb6-crop511-210e-mstest_coco.py index dd629edb2e8..b757ffd16dc 100644 --- a/configs/centripetalnet/centripetalnet_hourglass104_16xb6-crop511-210e-mstest_coco.py +++ b/configs/centripetalnet/centripetalnet_hourglass104_16xb6-crop511-210e-mstest_coco.py @@ -45,7 +45,7 @@ # data settings train_pipeline = [ - dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), + dict(type='LoadImageFromFile', backend_args=_base_.backend_args), dict(type='LoadAnnotations', with_bbox=True), dict( type='PhotoMetricDistortion', @@ -70,12 +70,11 @@ dict(type='PackDetInputs'), ] -# TODO: mstest is not currently implemented test_pipeline = [ dict( type='LoadImageFromFile', to_float32=True, - backend_args={{_base_.backend_args}}), + backend_args=_base_.backend_args), # don't need Resize dict( type='RandomCenterCropPad', @@ -136,3 +135,47 @@ # USER SHOULD NOT CHANGE ITS VALUES. # base_batch_size = (16 GPUs) x (6 samples per GPU) auto_scale_lr = dict(base_batch_size=96) + +tta_model = dict( + type='DetTTAModel', + tta_cfg=dict( + nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian'), + max_per_img=100)) + +tta_pipeline = [ + dict( + type='LoadImageFromFile', + to_float32=True, + backend_args=_base_.backend_args), + dict( + type='TestTimeAug', + transforms=[ + [ + # ``RandomFlip`` must be placed before ``RandomCenterCropPad``, + # otherwise bounding box coordinates after flipping cannot be + # recovered correctly. + dict(type='RandomFlip', prob=1.), + dict(type='RandomFlip', prob=0.) + ], + [ + dict( + type='RandomCenterCropPad', + crop_size=None, + ratios=None, + border=None, + test_mode=True, + test_pad_mode=['logical_or', 127], + mean=data_preprocessor['mean'], + std=data_preprocessor['std'], + # Image data is not converted to rgb. + to_rgb=data_preprocessor['bgr_to_rgb']) + ], + [dict(type='LoadAnnotations', with_bbox=True)], + [ + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'flip', 'flip_direction', 'border')) + ] + ]) +] diff --git a/configs/cornernet/README.md b/configs/cornernet/README.md index 21f74278fcc..e44964d8eac 100644 --- a/configs/cornernet/README.md +++ b/configs/cornernet/README.md @@ -22,7 +22,7 @@ We propose CornerNet, a new approach to object detection where we detect an obje Note: -- TTA setting is single-scale and `flip=True`. +- TTA setting is single-scale and `flip=True`. If you want to reproduce the TTA performance, please add `--tta` in the test command. - Experiments with `images_per_gpu=6` are conducted on Tesla V100-SXM2-32GB, `images_per_gpu=3` are conducted on GeForce GTX 1080 Ti. - Here are the descriptions of each experiment setting: - 10 x 5: 10 GPUs with 5 images per gpu. This is the same setting as that reported in the original paper. diff --git a/configs/cornernet/cornernet_hourglass104_8xb6-210e-mstest_coco.py b/configs/cornernet/cornernet_hourglass104_8xb6-210e-mstest_coco.py index 20751ef4af1..bdb46fff164 100644 --- a/configs/cornernet/cornernet_hourglass104_8xb6-210e-mstest_coco.py +++ b/configs/cornernet/cornernet_hourglass104_8xb6-210e-mstest_coco.py @@ -45,7 +45,7 @@ # data settings train_pipeline = [ - dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), + dict(type='LoadImageFromFile', backend_args=_base_.backend_args), dict(type='LoadAnnotations', with_bbox=True), dict( type='PhotoMetricDistortion', @@ -71,12 +71,11 @@ dict(type='PackDetInputs'), ] -# TODO: mstest is not currently implemented test_pipeline = [ dict( type='LoadImageFromFile', to_float32=True, - backend_args={{_base_.backend_args}}, + backend_args=_base_.backend_args, ), # don't need Resize dict( @@ -138,3 +137,47 @@ # USER SHOULD NOT CHANGE ITS VALUES. # base_batch_size = (8 GPUs) x (6 samples per GPU) auto_scale_lr = dict(base_batch_size=48) + +tta_model = dict( + type='DetTTAModel', + tta_cfg=dict( + nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian'), + max_per_img=100)) + +tta_pipeline = [ + dict( + type='LoadImageFromFile', + to_float32=True, + backend_args=_base_.backend_args), + dict( + type='TestTimeAug', + transforms=[ + [ + # ``RandomFlip`` must be placed before ``RandomCenterCropPad``, + # otherwise bounding box coordinates after flipping cannot be + # recovered correctly. + dict(type='RandomFlip', prob=1.), + dict(type='RandomFlip', prob=0.) + ], + [ + dict( + type='RandomCenterCropPad', + crop_size=None, + ratios=None, + border=None, + test_mode=True, + test_pad_mode=['logical_or', 127], + mean=data_preprocessor['mean'], + std=data_preprocessor['std'], + # Image data is not converted to rgb. + to_rgb=data_preprocessor['bgr_to_rgb']) + ], + [dict(type='LoadAnnotations', with_bbox=True)], + [ + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'flip', 'flip_direction', 'border')) + ] + ]) +] diff --git a/configs/dcn/mask-rcnn_r50-dconv-c3-c5_fpn_amp-1x_coco.py b/configs/dcn/mask-rcnn_r50-dconv-c3-c5_fpn_amp-1x_coco.py index 38b73b6ea6e..9d01594314a 100644 --- a/configs/dcn/mask-rcnn_r50-dconv-c3-c5_fpn_amp-1x_coco.py +++ b/configs/dcn/mask-rcnn_r50-dconv-c3-c5_fpn_amp-1x_coco.py @@ -4,4 +4,7 @@ dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True))) -fp16 = dict(loss_scale=512.) +# MMEngine support the following two ways, users can choose +# according to convenience +# optim_wrapper = dict(type='AmpOptimWrapper') +_base_.optim_wrapper.type = 'AmpOptimWrapper' diff --git a/configs/dcnv2/mask-rcnn_r50-mdconv-c3-c5_fpn_amp-1x_coco.py b/configs/dcnv2/mask-rcnn_r50-mdconv-c3-c5_fpn_amp-1x_coco.py index 4d5cffdb183..3b3894c2d61 100644 --- a/configs/dcnv2/mask-rcnn_r50-mdconv-c3-c5_fpn_amp-1x_coco.py +++ b/configs/dcnv2/mask-rcnn_r50-mdconv-c3-c5_fpn_amp-1x_coco.py @@ -4,4 +4,7 @@ dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False), stage_with_dcn=(False, True, True, True))) -fp16 = dict(loss_scale=512.) +# MMEngine support the following two ways, users can choose +# according to convenience +# optim_wrapper = dict(type='AmpOptimWrapper') +_base_.optim_wrapper.type = 'AmpOptimWrapper' diff --git a/configs/faster_rcnn/faster-rcnn_r101-caffe_fpn_ms-3x_coco.py b/configs/faster_rcnn/faster-rcnn_r101-caffe_fpn_ms-3x_coco.py index 72e738b153c..1cdb4d4973e 100644 --- a/configs/faster_rcnn/faster-rcnn_r101-caffe_fpn_ms-3x_coco.py +++ b/configs/faster_rcnn/faster-rcnn_r101-caffe_fpn_ms-3x_coco.py @@ -9,41 +9,3 @@ init_cfg=dict( type='Pretrained', checkpoint='open-mmlab://detectron2/resnet101_caffe'))) - -# use caffe img_norm -img_norm_cfg = dict( - mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='Resize', - img_scale=[(1333, 640), (1333, 800)], - multiscale_mode='range', - keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] - -data = dict( - train=dict(dataset=dict(pipeline=train_pipeline)), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) diff --git a/configs/faster_rcnn/faster-rcnn_r50-caffe-c4_ms-1x_coco.py b/configs/faster_rcnn/faster-rcnn_r50-caffe-c4_ms-1x_coco.py index b8fb5efd002..d4949d04ac2 100644 --- a/configs/faster_rcnn/faster-rcnn_r50-caffe-c4_ms-1x_coco.py +++ b/configs/faster_rcnn/faster-rcnn_r50-caffe-c4_ms-1x_coco.py @@ -1,38 +1,14 @@ _base_ = './faster-rcnn_r50-caffe_c4-1x_coco.py' -# use caffe img_norm -img_norm_cfg = dict( - mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) + train_pipeline = [ - dict(type='LoadImageFromFile'), + dict(type='LoadImageFromFile', backend_args=_base_.backend_args), dict(type='LoadAnnotations', with_bbox=True), dict( - type='Resize', - img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), - (1333, 768), (1333, 800)], - multiscale_mode='value', + type='RandomChoiceResize', + scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), (1333, 768), + (1333, 800)], keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), + dict(type='RandomFlip', prob=0.5), + dict(type='PackDetInputs') ] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) +_base_.train_dataloader.dataset.pipeline = train_pipeline diff --git a/configs/faster_rcnn/faster-rcnn_r50-caffe-dc5_1x_coco.py b/configs/faster_rcnn/faster-rcnn_r50-caffe-dc5_1x_coco.py index d24b5e08bc0..8952a5c9c6c 100644 --- a/configs/faster_rcnn/faster-rcnn_r50-caffe-dc5_1x_coco.py +++ b/configs/faster_rcnn/faster-rcnn_r50-caffe-dc5_1x_coco.py @@ -3,35 +3,3 @@ '../_base_/datasets/coco_detection.py', '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' ] -# use caffe img_norm -img_norm_cfg = dict( - mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) diff --git a/configs/faster_rcnn/faster-rcnn_r50-caffe-dc5_ms-1x_coco.py b/configs/faster_rcnn/faster-rcnn_r50-caffe-dc5_ms-1x_coco.py index d3eb21ecfdf..99a6fcc7d7a 100644 --- a/configs/faster_rcnn/faster-rcnn_r50-caffe-dc5_ms-1x_coco.py +++ b/configs/faster_rcnn/faster-rcnn_r50-caffe-dc5_ms-1x_coco.py @@ -1,42 +1,14 @@ -_base_ = [ - '../_base_/models/faster-rcnn_r50-caffe-dc5.py', - '../_base_/datasets/coco_detection.py', - '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' -] -# use caffe img_norm -img_norm_cfg = dict( - mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +_base_ = 'faster-rcnn_r50-caffe-dc5_1x_coco.py' + train_pipeline = [ - dict(type='LoadImageFromFile'), + dict(type='LoadImageFromFile', backend_args=_base_.backend_args), dict(type='LoadAnnotations', with_bbox=True), dict( - type='Resize', - img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), - (1333, 768), (1333, 800)], - multiscale_mode='value', + type='RandomChoiceResize', + scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), (1333, 768), + (1333, 800)], keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + dict(type='RandomFlip', prob=0.5), + dict(type='PackDetInputs') ] -data = dict( - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) +_base_.train_dataloader.dataset.pipeline = train_pipeline diff --git a/configs/faster_rcnn/faster-rcnn_r50-caffe-dc5_ms-3x_coco.py b/configs/faster_rcnn/faster-rcnn_r50-caffe-dc5_ms-3x_coco.py index 72404a689da..27063468a70 100644 --- a/configs/faster_rcnn/faster-rcnn_r50-caffe-dc5_ms-3x_coco.py +++ b/configs/faster_rcnn/faster-rcnn_r50-caffe-dc5_ms-3x_coco.py @@ -1,4 +1,18 @@ _base_ = './faster-rcnn_r50-caffe-dc5_ms-1x_coco.py' -# learning policy -lr_config = dict(step=[28, 34]) -runner = dict(type='EpochBasedRunner', max_epochs=36) + +# MMEngine support the following two ways, users can choose +# according to convenience +# param_scheduler = [ +# dict( +# type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), # noqa +# dict( +# type='MultiStepLR', +# begin=0, +# end=12, +# by_epoch=True, +# milestones=[28, 34], +# gamma=0.1) +# ] +_base_.param_scheduler[1].milestones = [28, 34] + +train_cfg = dict(max_epochs=36) diff --git a/configs/faster_rcnn/faster-rcnn_r50-caffe_c4-1x_coco.py b/configs/faster_rcnn/faster-rcnn_r50-caffe_c4-1x_coco.py index d68c7a77460..0888fc01790 100644 --- a/configs/faster_rcnn/faster-rcnn_r50-caffe_c4-1x_coco.py +++ b/configs/faster_rcnn/faster-rcnn_r50-caffe_c4-1x_coco.py @@ -3,37 +3,3 @@ '../_base_/datasets/coco_detection.py', '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' ] -# use caffe img_norm -img_norm_cfg = dict( - mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) -# optimizer -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) diff --git a/configs/faster_rcnn/faster-rcnn_r50-caffe_fpn_90k_coco.py b/configs/faster_rcnn/faster-rcnn_r50-caffe_fpn_90k_coco.py index f15b203831b..27f49355f3b 100644 --- a/configs/faster_rcnn/faster-rcnn_r50-caffe_fpn_90k_coco.py +++ b/configs/faster_rcnn/faster-rcnn_r50-caffe_fpn_90k_coco.py @@ -1,15 +1,22 @@ _base_ = 'faster-rcnn_r50-caffe_fpn_1x_coco.py' +max_iter = 90000 -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=0.001, - step=[60000, 80000]) +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), + dict( + type='MultiStepLR', + begin=0, + end=max_iter, + by_epoch=False, + milestones=[60000, 80000], + gamma=0.1) +] -# Runner type -runner = dict(_delete_=True, type='IterBasedRunner', max_iters=90000) - -checkpoint_config = dict(interval=10000) -evaluation = dict(interval=10000, metric='bbox') +train_cfg = dict( + _delete_=True, + type='IterBasedTrainLoop', + max_iters=max_iter, + val_interval=10000) +default_hooks = dict(checkpoint=dict(by_epoch=False, interval=10000)) +log_processor = dict(by_epoch=False) diff --git a/configs/faster_rcnn/faster-rcnn_r50-caffe_fpn_ms-1x_coco.py b/configs/faster_rcnn/faster-rcnn_r50-caffe_fpn_ms-1x_coco.py index 8158f4f80c1..7daa03d90a5 100644 --- a/configs/faster_rcnn/faster-rcnn_r50-caffe_fpn_ms-1x_coco.py +++ b/configs/faster_rcnn/faster-rcnn_r50-caffe_fpn_ms-1x_coco.py @@ -13,40 +13,19 @@ init_cfg=dict( type='Pretrained', checkpoint='open-mmlab://detectron2/resnet50_caffe'))) -# use caffe img_norm -img_norm_cfg = dict( - mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) + train_pipeline = [ - dict(type='LoadImageFromFile'), + dict(type='LoadImageFromFile', backend_args=_base_.backend_args), dict(type='LoadAnnotations', with_bbox=True), dict( - type='Resize', - img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), - (1333, 768), (1333, 800)], - multiscale_mode='value', + type='RandomChoiceResize', + scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), (1333, 768), + (1333, 800)], keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), + dict(type='RandomFlip', prob=0.5), + dict(type='PackDetInputs') ] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) +# MMEngine support the following two ways, users can choose +# according to convenience +# train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) +_base_.train_dataloader.dataset.pipeline = train_pipeline diff --git a/configs/faster_rcnn/faster-rcnn_r50-caffe_fpn_ms-2x_coco.py b/configs/faster_rcnn/faster-rcnn_r50-caffe_fpn_ms-2x_coco.py index 73ae4f7053e..44d320ea01b 100644 --- a/configs/faster_rcnn/faster-rcnn_r50-caffe_fpn_ms-2x_coco.py +++ b/configs/faster_rcnn/faster-rcnn_r50-caffe_fpn_ms-2x_coco.py @@ -1,4 +1,18 @@ _base_ = './faster-rcnn_r50-caffe_fpn_ms-1x_coco.py' -# learning policy -lr_config = dict(step=[16, 23]) -runner = dict(type='EpochBasedRunner', max_epochs=24) + +# MMEngine support the following two ways, users can choose +# according to convenience +# param_scheduler = [ +# dict( +# type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), # noqa +# dict( +# type='MultiStepLR', +# begin=0, +# end=12, +# by_epoch=True, +# milestones=[16, 23], +# gamma=0.1) +# ] +_base_.param_scheduler[1].milestones = [16, 23] + +train_cfg = dict(max_epochs=24) diff --git a/configs/faster_rcnn/faster-rcnn_r50-caffe_fpn_ms-3x_coco.py b/configs/faster_rcnn/faster-rcnn_r50-caffe_fpn_ms-3x_coco.py index c65e1a79324..365f6439241 100644 --- a/configs/faster_rcnn/faster-rcnn_r50-caffe_fpn_ms-3x_coco.py +++ b/configs/faster_rcnn/faster-rcnn_r50-caffe_fpn_ms-3x_coco.py @@ -13,41 +13,3 @@ init_cfg=dict( type='Pretrained', checkpoint='open-mmlab://detectron2/resnet50_caffe'))) - -# use caffe img_norm -img_norm_cfg = dict( - mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='Resize', - img_scale=[(1333, 640), (1333, 800)], - multiscale_mode='range', - keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] - -data = dict( - train=dict(dataset=dict(pipeline=train_pipeline)), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) diff --git a/configs/faster_rcnn/faster-rcnn_r50-caffe_fpn_ms-90k_coco.py b/configs/faster_rcnn/faster-rcnn_r50-caffe_fpn_ms-90k_coco.py index 3c0106a59b2..6b9b3eb0e79 100644 --- a/configs/faster_rcnn/faster-rcnn_r50-caffe_fpn_ms-90k_coco.py +++ b/configs/faster_rcnn/faster-rcnn_r50-caffe_fpn_ms-90k_coco.py @@ -1,15 +1,23 @@ _base_ = 'faster-rcnn_r50-caffe_fpn_ms-1x_coco.py' -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=0.001, - step=[60000, 80000]) +max_iter = 90000 -# Runner type -runner = dict(_delete_=True, type='IterBasedRunner', max_iters=90000) +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), + dict( + type='MultiStepLR', + begin=0, + end=max_iter, + by_epoch=False, + milestones=[60000, 80000], + gamma=0.1) +] -checkpoint_config = dict(interval=10000) -evaluation = dict(interval=10000, metric='bbox') +train_cfg = dict( + _delete_=True, + type='IterBasedTrainLoop', + max_iters=max_iter, + val_interval=10000) +default_hooks = dict(checkpoint=dict(by_epoch=False, interval=10000)) +log_processor = dict(by_epoch=False) diff --git a/configs/faster_rcnn/faster-rcnn_r50-tnr-pre_fpn_1x_coco.py b/configs/faster_rcnn/faster-rcnn_r50-tnr-pre_fpn_1x_coco.py index 7d952f2825d..7b3e5dedbe8 100644 --- a/configs/faster_rcnn/faster-rcnn_r50-tnr-pre_fpn_1x_coco.py +++ b/configs/faster_rcnn/faster-rcnn_r50-tnr-pre_fpn_1x_coco.py @@ -9,9 +9,6 @@ backbone=dict(init_cfg=dict(type='Pretrained', checkpoint=checkpoint))) # `lr` and `weight_decay` have been searched to be optimal. -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.0001, - weight_decay=0.1, +optim_wrapper = dict( + optimizer=dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.1), paramwise_cfg=dict(norm_decay_mult=0., bypass_duplicate=True)) diff --git a/configs/faster_rcnn/faster-rcnn_r50_fpn_amp-1x_coco.py b/configs/faster_rcnn/faster-rcnn_r50_fpn_amp-1x_coco.py index 4cecb8738b0..f765deaef1d 100644 --- a/configs/faster_rcnn/faster-rcnn_r50_fpn_amp-1x_coco.py +++ b/configs/faster_rcnn/faster-rcnn_r50_fpn_amp-1x_coco.py @@ -1,3 +1,6 @@ _base_ = './faster-rcnn_r50_fpn_1x_coco.py' -# fp16 settings -fp16 = dict(loss_scale=512.) + +# MMEngine support the following two ways, users can choose +# according to convenience +# optim_wrapper = dict(type='AmpOptimWrapper') +_base_.optim_wrapper.type = 'AmpOptimWrapper' diff --git a/configs/faster_rcnn/faster-rcnn_x101-32x8d_fpn_ms-3x_coco.py b/configs/faster_rcnn/faster-rcnn_x101-32x8d_fpn_ms-3x_coco.py index 2ca1a16116b..28d6290be7a 100644 --- a/configs/faster_rcnn/faster-rcnn_x101-32x8d_fpn_ms-3x_coco.py +++ b/configs/faster_rcnn/faster-rcnn_x101-32x8d_fpn_ms-3x_coco.py @@ -1,5 +1,13 @@ _base_ = ['../common/ms_3x_coco.py', '../_base_/models/faster-rcnn_r50_fpn.py'] model = dict( + # ResNeXt-101-32x8d model trained with Caffe2 at FB, + # so the mean and std need to be changed. + data_preprocessor=dict( + type='DetDataPreprocessor', + mean=[103.530, 116.280, 123.675], + std=[57.375, 57.120, 58.395], + bgr_to_rgb=False, + pad_size_divisor=32), backbone=dict( type='ResNeXt', depth=101, @@ -13,48 +21,3 @@ init_cfg=dict( type='Pretrained', checkpoint='open-mmlab://detectron2/resnext101_32x8d'))) - -# ResNeXt-101-32x8d model trained with Caffe2 at FB, -# so the mean and std need to be changed. -img_norm_cfg = dict( - mean=[103.530, 116.280, 123.675], - std=[57.375, 57.120, 58.395], - to_rgb=False) - -# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)], -# multiscale_mode='range' -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='Resize', - img_scale=[(1333, 640), (1333, 800)], - multiscale_mode='range', - keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] - -# Use RepeatDataset to speed up training -data = dict( - train=dict(dataset=dict(pipeline=train_pipeline)), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) diff --git a/configs/lad/lad_r101-paa-r50_fpn_2xb8_coco_1x.py b/configs/lad/lad_r101-paa-r50_fpn_2xb8_coco_1x.py index 36681eb0f19..d61d08638a0 100644 --- a/configs/lad/lad_r101-paa-r50_fpn_2xb8_coco_1x.py +++ b/configs/lad/lad_r101-paa-r50_fpn_2xb8_coco_1x.py @@ -125,6 +125,3 @@ max_per_img=100)) train_dataloader = dict(batch_size=8, num_workers=4) optim_wrapper = dict(type='AmpOptimWrapper', optimizer=dict(lr=0.01)) - -# TODO: MMEngine does not support fp16 yet. -# fp16 = dict(loss_scale=512.) diff --git a/configs/lad/lad_r50-paa-r101_fpn_2xb8_coco_1x.py b/configs/lad/lad_r50-paa-r101_fpn_2xb8_coco_1x.py index 434bc77be77..f7eaf2bfba1 100644 --- a/configs/lad/lad_r50-paa-r101_fpn_2xb8_coco_1x.py +++ b/configs/lad/lad_r50-paa-r101_fpn_2xb8_coco_1x.py @@ -124,6 +124,3 @@ max_per_img=100)) train_dataloader = dict(batch_size=8, num_workers=4) optim_wrapper = dict(type='AmpOptimWrapper', optimizer=dict(lr=0.01)) - -# TODO: MMEngine does not support fp16 yet. -# fp16 = dict(loss_scale=512.) diff --git a/configs/legacy_1.x/cascade-mask-rcnn_r50_fpn_1x_coco_v1.py b/configs/legacy_1.x/cascade-mask-rcnn_r50_fpn_1x_coco_v1.py index 2aa3a757e15..f948a7a9c10 100644 --- a/configs/legacy_1.x/cascade-mask-rcnn_r50_fpn_1x_coco_v1.py +++ b/configs/legacy_1.x/cascade-mask-rcnn_r50_fpn_1x_coco_v1.py @@ -76,4 +76,3 @@ output_size=14, sampling_ratio=2, aligned=False)))) -dist_params = dict(backend='nccl', port=29515) diff --git a/configs/legacy_1.x/retinanet_r50-caffe_fpn_1x_coco_v1.py b/configs/legacy_1.x/retinanet_r50-caffe_fpn_1x_coco_v1.py index a63d248c435..49abc31a002 100644 --- a/configs/legacy_1.x/retinanet_r50-caffe_fpn_1x_coco_v1.py +++ b/configs/legacy_1.x/retinanet_r50-caffe_fpn_1x_coco_v1.py @@ -1,5 +1,12 @@ _base_ = './retinanet_r50_fpn_1x_coco_v1.py' model = dict( + data_preprocessor=dict( + type='DetDataPreprocessor', + # use caffe img_norm + mean=[102.9801, 115.9465, 122.7717], + std=[1.0, 1.0, 1.0], + bgr_to_rgb=False, + pad_size_divisor=32), backbone=dict( norm_cfg=dict(requires_grad=False), norm_eval=True, @@ -7,35 +14,3 @@ init_cfg=dict( type='Pretrained', checkpoint='open-mmlab://detectron/resnet50_caffe'))) -# use caffe img_norm -img_norm_cfg = dict( - mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) diff --git a/configs/legacy_1.x/ssd300_coco_v1.py b/configs/legacy_1.x/ssd300_coco_v1.py index 65ccc1e542c..e5ffc633a9b 100644 --- a/configs/legacy_1.x/ssd300_coco_v1.py +++ b/configs/legacy_1.x/ssd300_coco_v1.py @@ -18,67 +18,3 @@ type='LegacyDeltaXYWHBBoxCoder', target_means=[.0, .0, .0, .0], target_stds=[0.1, 0.1, 0.2, 0.2]))) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile', to_float32=True), - dict(type='LoadAnnotations', with_bbox=True), - dict( - type='PhotoMetricDistortion', - brightness_delta=32, - contrast_range=(0.5, 1.5), - saturation_range=(0.5, 1.5), - hue_delta=18), - dict( - type='Expand', - mean=img_norm_cfg['mean'], - to_rgb=img_norm_cfg['to_rgb'], - ratio_range=(1, 4)), - dict( - type='MinIoURandomCrop', - min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), - min_crop_size=0.3), - dict(type='Resize', img_scale=(300, 300), keep_ratio=False), - dict(type='Normalize', **img_norm_cfg), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(300, 300), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=False), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - samples_per_gpu=8, - workers_per_gpu=3, - train=dict( - _delete_=True, - type='RepeatDataset', - times=5, - dataset=dict( - type=dataset_type, - ann_file=data_root + 'annotations/instances_train2017.json', - img_prefix=data_root + 'train2017/', - pipeline=train_pipeline)), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) -# optimizer -optimizer = dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4) -optimizer_config = dict(_delete_=True) -dist_params = dict(backend='nccl', port=29555) - -# NOTE: `auto_scale_lr` is for automatically scaling LR, -# USER SHOULD NOT CHANGE ITS VALUES. -# base_batch_size = (8 GPUs) x (8 samples per GPU) -auto_scale_lr = dict(base_batch_size=64) diff --git a/configs/libra_rcnn/libra-fast-rcnn_r50_fpn_1x_coco.py b/configs/libra_rcnn/libra-fast-rcnn_r50_fpn_1x_coco.py index 9d4a4e41ce0..2efe440ce36 100644 --- a/configs/libra_rcnn/libra-fast-rcnn_r50_fpn_1x_coco.py +++ b/configs/libra_rcnn/libra-fast-rcnn_r50_fpn_1x_coco.py @@ -38,13 +38,15 @@ floor_thr=-1, floor_fraction=0, num_bins=3))))) -# dataset settings -dataset_type = 'CocoDataset' -data_root = 'data/coco/' -data = dict( - train=dict(proposal_file=data_root + - 'libra_proposals/rpn_r50_fpn_1x_train2017.pkl'), - val=dict(proposal_file=data_root + - 'libra_proposals/rpn_r50_fpn_1x_val2017.pkl'), - test=dict(proposal_file=data_root + - 'libra_proposals/rpn_r50_fpn_1x_val2017.pkl')) + +# MMEngine support the following two ways, users can choose +# according to convenience +# _base_.train_dataloader.dataset.proposal_file = 'libra_proposals/rpn_r50_fpn_1x_train2017.pkl' # noqa +train_dataloader = dict( + dataset=dict(proposal_file='libra_proposals/rpn_r50_fpn_1x_train2017.pkl')) + +# _base_.val_dataloader.dataset.proposal_file = 'libra_proposals/rpn_r50_fpn_1x_val2017.pkl' # noqa +# test_dataloader = _base_.val_dataloader +val_dataloader = dict( + dataset=dict(proposal_file='libra_proposals/rpn_r50_fpn_1x_val2017.pkl')) +test_dataloader = val_dataloader diff --git a/configs/mask_rcnn/mask-rcnn_r50_fpn_1x-wandb_coco.py b/configs/mask_rcnn/mask-rcnn_r50_fpn_1x-wandb_coco.py index c5107210457..364e0aa42aa 100644 --- a/configs/mask_rcnn/mask-rcnn_r50_fpn_1x-wandb_coco.py +++ b/configs/mask_rcnn/mask-rcnn_r50_fpn_1x-wandb_coco.py @@ -1,27 +1,16 @@ -# TODO: Awaiting refactoring _base_ = [ '../_base_/models/mask-rcnn_r50_fpn.py', '../_base_/datasets/coco_instance.py', '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' ] -# Set evaluation interval -evaluation = dict(interval=2) -# Set checkpoint interval -checkpoint_config = dict(interval=4) +vis_backends = [dict(type='LocalVisBackend'), dict(type='WandBVisBackend')] +visualizer = dict(vis_backends=vis_backends) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - dict(type='MMDetWandbHook', - init_kwargs={ - 'project': 'mmdetection', - 'group': 'maskrcnn-r50-fpn-1x-coco' - }, - interval=50, - log_checkpoint=True, - log_checkpoint_metadata=True, - num_eval_images=100) - ]) +# MMEngine support the following two ways, users can choose +# according to convenience +# default_hooks = dict(checkpoint=dict(interval=4)) +_base_.default_hooks.checkpoint.interval = 4 + +# train_cfg = dict(val_interval=2) +_base_.train_cfg.val_interval = 2 diff --git a/configs/retinanet/retinanet_r50-caffe_fpn_ms-1x_coco.py b/configs/retinanet/retinanet_r50-caffe_fpn_ms-1x_coco.py index e42a52746ad..24b6d60078f 100644 --- a/configs/retinanet/retinanet_r50-caffe_fpn_ms-1x_coco.py +++ b/configs/retinanet/retinanet_r50-caffe_fpn_ms-1x_coco.py @@ -4,7 +4,7 @@ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict( - type='RandomResize', + type='RandomChoiceResize', scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), (1333, 768), (1333, 800)], keep_ratio=True), diff --git a/configs/retinanet/retinanet_r50_fpn_amp-1x_coco.py b/configs/retinanet/retinanet_r50_fpn_amp-1x_coco.py index 6b6cebe48a1..acf5266337b 100644 --- a/configs/retinanet/retinanet_r50_fpn_amp-1x_coco.py +++ b/configs/retinanet/retinanet_r50_fpn_amp-1x_coco.py @@ -1,3 +1,6 @@ _base_ = './retinanet_r50_fpn_1x_coco.py' -# fp16 settings -fp16 = dict(loss_scale=512.) + +# MMEngine support the following two ways, users can choose +# according to convenience +# optim_wrapper = dict(type='AmpOptimWrapper') +_base_.optim_wrapper.type = 'AmpOptimWrapper' diff --git a/configs/wider_face/ssd300_24e_widerface.py b/configs/wider_face/ssd300_24e_widerface.py index cb16dae0ae3..0447f3032b9 100644 --- a/configs/wider_face/ssd300_24e_widerface.py +++ b/configs/wider_face/ssd300_24e_widerface.py @@ -3,18 +3,24 @@ '../_base_/default_runtime.py' ] model = dict(bbox_head=dict(num_classes=1)) -# optimizer -optimizer = dict(type='SGD', lr=0.012, momentum=0.9, weight_decay=5e-4) -optimizer_config = dict() -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=1000, - warmup_ratio=0.001, - step=[16, 20]) -# runtime settings -runner = dict(type='EpochBasedRunner', max_epochs=24) -log_config = dict(interval=1) -# TODO add auto-scale-lr after a series of experiments +max_epochs = 24 +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, + end=1000), + dict( + type='MultiStepLR', + begin=0, + end=max_epochs, + by_epoch=True, + milestones=[16, 20], + gamma=0.1) +] + +optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2)) + +train_cfg = dict( + type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1) +default_hooks = dict(logger=dict(interval=1)) +log_processor = dict(window_size=1) From 9800f5952dc5daedbfb043cf0a34b4f30dd906f6 Mon Sep 17 00:00:00 2001 From: BigDong Date: Fri, 17 Mar 2023 22:39:58 +0800 Subject: [PATCH 14/38] [Fix] delete `data_root` in `CocoOccludedSeparatedMetric` to fix bug (#9969) --- mmdet/evaluation/metrics/coco_occluded_metric.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/mmdet/evaluation/metrics/coco_occluded_metric.py b/mmdet/evaluation/metrics/coco_occluded_metric.py index 544ff4426ba..81235a04e6e 100644 --- a/mmdet/evaluation/metrics/coco_occluded_metric.py +++ b/mmdet/evaluation/metrics/coco_occluded_metric.py @@ -1,6 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. - -import os.path as osp from typing import Dict, List, Optional, Union import mmengine @@ -68,11 +66,6 @@ def __init__( metric: Union[str, List[str]] = ['bbox', 'segm'], **kwargs) -> None: super().__init__(*args, metric=metric, **kwargs) - # load from local file - if osp.isfile(occluded_ann) and not osp.isabs(occluded_ann): - occluded_ann = osp.join(self.data_root, occluded_ann) - if osp.isfile(separated_ann) and not osp.isabs(separated_ann): - separated_ann = osp.join(self.data_root, separated_ann) self.occluded_ann = load(occluded_ann) self.separated_ann = load(separated_ann) self.score_thr = score_thr From e862054981971adccff7176476717388b080f1d5 Mon Sep 17 00:00:00 2001 From: RangiLyu Date: Fri, 17 Mar 2023 22:43:12 +0800 Subject: [PATCH 15/38] [Fix] Fix inference benchmark ut. (#9971) --- tests/test_utils/test_benchmark.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_utils/test_benchmark.py b/tests/test_utils/test_benchmark.py index b4dcb7ebc46..939a7eca4e5 100644 --- a/tests/test_utils/test_benchmark.py +++ b/tests/test_utils/test_benchmark.py @@ -4,9 +4,9 @@ import unittest import torch -import torch.nn as nn from mmengine import Config, MMLogger from mmengine.dataset import Compose +from mmengine.model import BaseModel from torch.utils.data import Dataset from mmdet.registry import DATASETS, MODELS @@ -16,12 +16,12 @@ @MODELS.register_module() -class ToyDetector(nn.Module): +class ToyDetector(BaseModel): def __init__(self, *args, **kwargs): super().__init__() - def forward(self, data_batch, return_loss=False): + def forward(self, *args, **kwargs): pass From 1d0a1956d1830c1c64031a9bda66cfc78c43f100 Mon Sep 17 00:00:00 2001 From: zwhus <121282623+zwhus@users.noreply.github.com> Date: Sat, 18 Mar 2023 14:11:47 +0800 Subject: [PATCH 16/38] [Fix] Fix cityscapes import error in downstream projects (#9984) --- .../evaluation/functional/cityscapes_utils.py | 58 ++++++++++++++----- mmdet/evaluation/metrics/cityscapes_metric.py | 2 +- 2 files changed, 46 insertions(+), 14 deletions(-) diff --git a/mmdet/evaluation/functional/cityscapes_utils.py b/mmdet/evaluation/functional/cityscapes_utils.py index e72cd171ce2..5ced3680dee 100644 --- a/mmdet/evaluation/functional/cityscapes_utils.py +++ b/mmdet/evaluation/functional/cityscapes_utils.py @@ -8,18 +8,26 @@ from pathlib import Path from typing import Optional, Union -import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling as CSEval # noqa: E501 import mmcv import numpy as np -from cityscapesscripts.evaluation.instance import Instance -from cityscapesscripts.helpers.csHelpers import id2label # noqa: E501 -from cityscapesscripts.helpers.csHelpers import labels, writeDict2JSON from mmengine.fileio import get +try: + import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling as CSEval # noqa: E501 + from cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling import \ + CArgs # noqa: E501 + from cityscapesscripts.evaluation.instance import Instance + from cityscapesscripts.helpers.csHelpers import (id2label, labels, + writeDict2JSON) + HAS_CITYSCAPESAPI = True +except ImportError: + CArgs = object + HAS_CITYSCAPESAPI = False + def evaluateImgLists(prediction_list: list, groundtruth_list: list, - args: CSEval.CArgs, + args: CArgs, backend_args: Optional[dict] = None, dump_matches: bool = False) -> dict: """A wrapper of obj:``cityscapesscripts.evaluation. @@ -29,7 +37,7 @@ def evaluateImgLists(prediction_list: list, Args: prediction_list (list): A list of prediction txt file. groundtruth_list (list): A list of groundtruth image file. - args (CSEval.CArgs): A global object setting in + args (CArgs): A global object setting in obj:``cityscapesscripts.evaluation. evalInstanceLevelSemanticLabeling`` backend_args (dict, optional): Arguments to instantiate the @@ -38,6 +46,11 @@ def evaluateImgLists(prediction_list: list, Returns: dict: The computed metric. """ + if not HAS_CITYSCAPESAPI: + raise RuntimeError('Failed to import `cityscapesscripts`.' + 'Please try to install official ' + 'cityscapesscripts by ' + '"pip install cityscapesscripts"') # determine labels of interest CSEval.setInstanceLabels(args) # get dictionary of all ground truth instances @@ -69,7 +82,7 @@ def evaluateImgLists(prediction_list: list, def matchGtWithPreds(prediction_list: list, groundtruth_list: list, gt_instances: dict, - args: CSEval.CArgs, + args: CArgs, backend_args=None): """A wrapper of obj:``cityscapesscripts.evaluation. @@ -79,7 +92,7 @@ def matchGtWithPreds(prediction_list: list, prediction_list (list): A list of prediction txt file. groundtruth_list (list): A list of groundtruth image file. gt_instances (dict): Groundtruth dict. - args (CSEval.CArgs): A global object setting in + args (CArgs): A global object setting in obj:``cityscapesscripts.evaluation. evalInstanceLevelSemanticLabeling`` backend_args (dict, optional): Arguments to instantiate the @@ -87,6 +100,11 @@ def matchGtWithPreds(prediction_list: list, Returns: dict: The processed prediction and groundtruth result. """ + if not HAS_CITYSCAPESAPI: + raise RuntimeError('Failed to import `cityscapesscripts`.' + 'Please try to install official ' + 'cityscapesscripts by ' + '"pip install cityscapesscripts"') matches: dict = dict() if not args.quiet: print(f'Matching {len(prediction_list)} pairs of images...') @@ -154,7 +172,11 @@ def readPredInfo(prediction_file: str) -> dict: Returns: dict: The processed prediction results. """ - + if not HAS_CITYSCAPESAPI: + raise RuntimeError('Failed to import `cityscapesscripts`.' + 'Please try to install official ' + 'cityscapesscripts by ' + '"pip install cityscapesscripts"') printError = CSEval.printError predInfo = {} @@ -184,7 +206,7 @@ def readPredInfo(prediction_file: str) -> dict: def getGtInstances(groundtruth_list: list, - args: CSEval.CArgs, + args: CArgs, backend_args: Optional[dict] = None) -> dict: """A wrapper of obj:``cityscapesscripts.evaluation. @@ -192,7 +214,7 @@ def getGtInstances(groundtruth_list: list, groundtruth image from file backend. Args: groundtruth_list (list): A list of groundtruth image file. - args (CSEval.CArgs): A global object setting in + args (CArgs): A global object setting in obj:``cityscapesscripts.evaluation. evalInstanceLevelSemanticLabeling`` backend_args (dict, optional): Arguments to instantiate the @@ -200,6 +222,11 @@ def getGtInstances(groundtruth_list: list, Returns: dict: The computed metric. """ + if not HAS_CITYSCAPESAPI: + raise RuntimeError('Failed to import `cityscapesscripts`.' + 'Please try to install official ' + 'cityscapesscripts by ' + '"pip install cityscapesscripts"') # if there is a global statistics json, then load it if (os.path.isfile(args.gtInstancesFile)): if not args.quiet: @@ -218,7 +245,7 @@ def getGtInstances(groundtruth_list: list, def instances2dict(image_list: list, - args: CSEval.CArgs, + args: CArgs, backend_args: Optional[dict] = None) -> dict: """A wrapper of obj:``cityscapesscripts.evaluation. @@ -226,7 +253,7 @@ def instances2dict(image_list: list, groundtruth image from file backend. Args: image_list (list): A list of image file. - args (CSEval.CArgs): A global object setting in + args (CArgs): A global object setting in obj:``cityscapesscripts.evaluation. evalInstanceLevelSemanticLabeling`` backend_args (dict, optional): Arguments to instantiate the @@ -234,6 +261,11 @@ def instances2dict(image_list: list, Returns: dict: The processed groundtruth results. """ + if not HAS_CITYSCAPESAPI: + raise RuntimeError('Failed to import `cityscapesscripts`.' + 'Please try to install official ' + 'cityscapesscripts by ' + '"pip install cityscapesscripts"') imgCount = 0 instanceDict = {} diff --git a/mmdet/evaluation/metrics/cityscapes_metric.py b/mmdet/evaluation/metrics/cityscapes_metric.py index 23edbf964b1..84c35390bee 100644 --- a/mmdet/evaluation/metrics/cityscapes_metric.py +++ b/mmdet/evaluation/metrics/cityscapes_metric.py @@ -18,7 +18,7 @@ import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling as CSEval # noqa: E501 import cityscapesscripts.helpers.labels as CSLabels - from mmdet.evaluation.functional.cityscapes_utils import evaluateImgLists + from mmdet.evaluation.functional import evaluateImgLists HAS_CITYSCAPESAPI = True except ImportError: HAS_CITYSCAPESAPI = False From 764c098580dec70bd4a482c2e281241d0901b7b3 Mon Sep 17 00:00:00 2001 From: RangiLyu Date: Wed, 22 Mar 2023 10:14:00 +0800 Subject: [PATCH 17/38] [Doc] Config migration guide. (#9960) Co-authored-by: Range King --- docs/en/index.rst | 2 +- .../migration/api_and_registry_migration.md | 1 + docs/en/migration/config_migration.md | 819 ++++++++++++++++++ docs/en/migration/dataset_migration.md | 1 + docs/en/migration/migration.md | 12 + docs/en/migration/migration_faq.md | 1 + docs/en/migration/model_migration.md | 1 + docs/en/overview.md | 2 + docs/zh_cn/index.rst | 2 +- .../migration/api_and_registry_migration.md | 1 + docs/zh_cn/migration/config_migration.md | 814 +++++++++++++++++ docs/zh_cn/migration/dataset_migration.md | 1 + docs/zh_cn/migration/migration.md | 12 + docs/zh_cn/migration/migration_faq.md | 1 + docs/zh_cn/migration/model_migration.md | 1 + docs/zh_cn/overview.md | 2 + 16 files changed, 1671 insertions(+), 2 deletions(-) create mode 100644 docs/en/migration/api_and_registry_migration.md create mode 100644 docs/en/migration/config_migration.md create mode 100644 docs/en/migration/dataset_migration.md create mode 100644 docs/en/migration/migration.md create mode 100644 docs/en/migration/migration_faq.md create mode 100644 docs/en/migration/model_migration.md create mode 100644 docs/zh_cn/migration/api_and_registry_migration.md create mode 100644 docs/zh_cn/migration/config_migration.md create mode 100644 docs/zh_cn/migration/dataset_migration.md create mode 100644 docs/zh_cn/migration/migration.md create mode 100644 docs/zh_cn/migration/migration_faq.md create mode 100644 docs/zh_cn/migration/model_migration.md diff --git a/docs/en/index.rst b/docs/en/index.rst index 285954487bb..32c5952a4ae 100644 --- a/docs/en/index.rst +++ b/docs/en/index.rst @@ -24,7 +24,7 @@ Welcome to MMDetection's documentation! :maxdepth: 1 :caption: Migration - migration.md + migration/migration.md .. toctree:: :maxdepth: 1 diff --git a/docs/en/migration/api_and_registry_migration.md b/docs/en/migration/api_and_registry_migration.md new file mode 100644 index 00000000000..72bfd3aec8e --- /dev/null +++ b/docs/en/migration/api_and_registry_migration.md @@ -0,0 +1 @@ +# Migrate API and Registry from MMDetection 2.x to 3.x diff --git a/docs/en/migration/config_migration.md b/docs/en/migration/config_migration.md new file mode 100644 index 00000000000..20fe0bb7e0f --- /dev/null +++ b/docs/en/migration/config_migration.md @@ -0,0 +1,819 @@ +# Migrate Configuration File from MMDetection 2.x to 3.x + +The configuration file of MMDetection 3.x has undergone significant changes in comparison to the 2.x version. This document explains how to migrate 2.x configuration files to 3.x. + +In the previous tutorial [Learn about Configs](../user_guides/config.md), we used Mask R-CNN as an example to introduce the configuration file structure of MMDetection 3.x. Here, we will follow the same structure to demonstrate how to migrate 2.x configuration files to 3.x. + +## Model Configuration + +There have been no major changes to the model configuration in 3.x compared to 2.x. For the model's backbone, neck, head, as well as train_cfg and test_cfg, the parameters remain the same as in version 2.x. + +On the other hand, we have added the `DataPreprocessor` module in MMDetection 3.x. The configuration for the `DataPreprocessor` module is located in `model.data_preprocessor`. It is used to preprocess the input data, such as normalizing input images and padding images of different sizes into batches, and loading images from memory to VRAM. This configuration replaces the `Normalize` and `Pad` modules in `train_pipeline` and `test_pipeline` of the earlier version. + + + + + + + + + +
2.x Config + +```python +# Image normalization parameters +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True) +pipeline=[ + ..., + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), # Padding the image to multiples of 32 + ... +] +``` + +
2.x Config + +```python +model = dict( + data_preprocessor=dict( + type='DetDataPreprocessor', + # Image normalization parameters + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + # Image padding parameters + pad_mask=True, # In instance segmentation, the mask needs to be padded + pad_size_divisor=32) # Padding the image to multiples of 32 +) + +``` + +
+ +## Dataset and Evaluator Configuration + +The dataset and evaluator configurations have undergone major changes compared to version 2.x. We will introduce how to migrate from version 2.x to version 3.x from three aspects: Dataloader and Dataset, Data transform pipeline, and Evaluator configuration. + +### Dataloader and Dataset Configuration + +In the new version, we set the data loading settings consistent with PyTorch's official DataLoader, +making it easier for users to understand and get started with. +We put the data loading settings for training, validation, and testing separately in `train_dataloader`, `val_dataloader`, and `test_dataloader`. +Users can set different parameters for these dataloaders. +The input parameters are basically the same as those required by [PyTorch DataLoader](https://pytorch.org/docs/stable/data.html?highlight=dataloader#torch.utils.data.DataLoader). + +This way, we put the unconfigurable parameters in version 2.x, such as `sampler`, `batch_sampler`, and `persistent_workers`, in the configuration file, so that users can set dataloader parameters more flexibly. + +Users can set the dataset configuration through `train_dataloader.dataset`, `val_dataloader.dataset`, and `test_dataloader.dataset`, which correspond to `data.train`, `data.val`, and `data.test` in version 2.x. + + + + + + + + + +
2.x Config + +```python +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) +``` + +
3.x Config + +```python +train_dataloader = dict( + batch_size=2, + num_workers=2, + persistent_workers=True, # Avoid recreating subprocesses after each iteration + sampler=dict(type='DefaultSampler', shuffle=True), # Default sampler, supports both distributed and non-distributed training + batch_sampler=dict(type='AspectRatioBatchSampler'), # Default batch_sampler, used to ensure that images in the batch have similar aspect ratios, so as to better utilize graphics memory + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='annotations/instances_train2017.json', + data_prefix=dict(img='train2017/'), + filter_cfg=dict(filter_empty_gt=True, min_size=32), + pipeline=train_pipeline)) +# In version 3.x, validation and test dataloaders can be configured independently +val_dataloader = dict( + batch_size=1, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='annotations/instances_val2017.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=test_pipeline)) +test_dataloader = val_dataloader # The configuration of the testing dataloader is the same as that of the validation dataloader, which is omitted here + +``` + +
+ +### Data Transform Pipeline Configuration + +As mentioned earlier, we have separated the normalization and padding configurations for images from the `train_pipeline` and `test_pipeline`, and have placed them in `model.data_preprocessor` instead. Hence, in the 3.x version of the pipeline, we no longer require the `Normalize` and `Pad` transforms. + +At the same time, we have also refactored the transform responsible for packing the data format, and have merged the `Collect` and `DefaultFormatBundle` transforms into `PackDetInputs`. This transform is responsible for packing the data from the data pipeline into the input format of the model. For more details on the input format conversion, please refer to the [data flow documentation](../advanced_guides/data_flow.md). + +Below, we will use the `train_pipeline` of Mask R-CNN as an example, to demonstrate how to migrate from the 2.x configuration to the 3.x configuration: + + + + + + + + + +
2.x Config + +```python +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +``` + +
3.x Config + +```python +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', prob=0.5), + dict(type='PackDetInputs') +] +``` + +
+ +For the `test_pipeline`, apart from removing the `Normalize` and `Pad` transforms, we have also separated the data augmentation for testing (TTA) from the normal testing process, and have removed `MultiScaleFlipAug`. For more information on how to use the new TTA version, please refer to the [TTA documentation](../advanced_guides/tta.md). + +Below, we will again use the `test_pipeline` of Mask R-CNN as an example, to demonstrate how to migrate from the 2.x configuration to the 3.x configuration: + + + + + + + + + +
2.x Config + +```python +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +``` + +
3.x Config + +```python +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(1333, 800), keep_ratio=True), + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] +``` + +
+ +In addition, we have also refactored some data augmentation transforms. The following table lists the mapping between the transforms used in the 2.x version and the 3.x version: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Name2.x Config3.x Config
Resize + +```python +dict(type='Resize', + img_scale=(1333, 800), + keep_ratio=True) +``` + + + +```python +dict(type='Resize', + scale=(1333, 800), + keep_ratio=True) +``` + +
RandomResize + +```python +dict( + type='Resize', + img_scale=[ + (1333, 640), (1333, 800)], + multiscale_mode='range', + keep_ratio=True) +``` + + + +```python +dict( + type='RandomResize', + scale=[ + (1333, 640), (1333, 800)], + keep_ratio=True) +``` + +
RandomChoiceResize + +```python +dict( + type='Resize', + img_scale=[ + (1333, 640), (1333, 672), + (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True) +``` + + + +```python +dict( + type='RandomChoiceResize', + scales=[ + (1333, 640), (1333, 672), + (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + keep_ratio=True) +``` + +
RandomFlip + +```python +dict(type='RandomFlip', flip_ratio=0.5) +``` + + + +```python +dict(type='RandomFlip', prob=0.5) +``` + +
+ +### 评测器配置 + +In version 3.x, model accuracy evaluation is no longer tied to the dataset, but is instead accomplished through the use of an Evaluator. +The Evaluator configuration is divided into two parts: `val_evaluator` and `test_evaluator`. The `val_evaluator` is used for validation dataset evaluation, while the `test_evaluator` is used for testing dataset evaluation. +This corresponds to the `evaluation` field in version 2.x. + +The following table shows the corresponding relationship between Evaluators in version 2.x and 3.x. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Metric Name2.x Config3.x Config
COCO + +```python +data = dict( + val=dict( + type='CocoDataset', + ann_file=data_root + 'annotations/instances_val2017.json')) +evaluation = dict(metric=['bbox', 'segm']) +``` + + + +```python +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/instances_val2017.json', + metric=['bbox', 'segm'], + format_only=False) +``` + +
Pascal VOC + +```python +data = dict( + val=dict( + type=dataset_type, + ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt')) +evaluation = dict(metric='mAP') +``` + + + +```python +val_evaluator = dict( + type='VOCMetric', + metric='mAP', + eval_mode='11points') +``` + +
OpenImages + +```python +data = dict( + val=dict( + type='OpenImagesDataset', + ann_file=data_root + 'annotations/validation-annotations-bbox.csv', + img_prefix=data_root + 'OpenImages/validation/', + label_file=data_root + 'annotations/class-descriptions-boxable.csv', + hierarchy_file=data_root + + 'annotations/bbox_labels_600_hierarchy.json', + meta_file=data_root + 'annotations/validation-image-metas.pkl', + image_level_ann_file=data_root + + 'annotations/validation-annotations-human-imagelabels-boxable.csv')) +evaluation = dict(interval=1, metric='mAP') +``` + + + +```python +val_evaluator = dict( + type='OpenImagesMetric', + iou_thrs=0.5, + ioa_thrs=0.5, + use_group_of=True, + get_supercategory=True) +``` + +
CityScapes + +```python +data = dict( + val=dict( + type='CityScapesDataset', + ann_file=data_root + + 'annotations/instancesonly_filtered_gtFine_val.json', + img_prefix=data_root + 'leftImg8bit/val/', + pipeline=test_pipeline)) +evaluation = dict(metric=['bbox', 'segm']) +``` + + + +```python +val_evaluator = [ + dict( + type='CocoMetric', + ann_file=data_root + + 'annotations/instancesonly_filtered_gtFine_val.json', + metric=['bbox', 'segm']), + dict( + type='CityScapesMetric', + ann_file=data_root + + 'annotations/instancesonly_filtered_gtFine_val.json', + seg_prefix=data_root + '/gtFine/val', + outfile_prefix='./work_dirs/cityscapes_metric/instance') +] +``` + +
+ +## Configuration for Training and Testing + + + + + + + + + +
2.x Config + +```python +runner = dict( + type='EpochBasedRunner', # Type of training loop + max_epochs=12) # Maximum number of training epochs +evaluation = dict(interval=2) # Interval for evaluation, check the performance every 2 epochs +``` + +
3.x Config + +```python +train_cfg = dict( + type='EpochBasedTrainLoop', # Type of training loop, please refer to https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/loops.py + max_epochs=12, # Maximum number of training epochs + val_interval=2) # Interval for validation, check the performance every 2 epochs +val_cfg = dict(type='ValLoop') # Type of validation loop +test_cfg = dict(type='TestLoop') # Type of testing loop +``` + +
+ +## Optimization Configuration + +The configuration for optimizer and gradient clipping is moved to the `optim_wrapper` field. +The following table shows the correspondences for optimizer configuration between 2.x version and 3.x version: + + + + + + + + + +
2.x Config + +```python +optimizer = dict( + type='SGD', # Optimizer: Stochastic Gradient Descent + lr=0.02, # Base learning rate + momentum=0.9, # SGD with momentum + weight_decay=0.0001) # Weight decay +optimizer_config = dict(grad_clip=None) # Configuration for gradient clipping, set to None to disable +``` + +
3.x Config + +```python +optim_wrapper = dict( # Configuration for the optimizer wrapper + type='OptimWrapper', # Type of optimizer wrapper, you can switch to AmpOptimWrapper to enable mixed precision training + optimizer=dict( # Optimizer configuration, supports various PyTorch optimizers, please refer to https://pytorch.org/docs/stable/optim.html#algorithms + type='SGD', # SGD + lr=0.02, # Base learning rate + momentum=0.9, # SGD with momentum + weight_decay=0.0001), # Weight decay + clip_grad=None, # Configuration for gradient clipping, set to None to disable. For usage, please see https://mmengine.readthedocs.io/en/latest/tutorials/optimizer.html + ) +``` + +
+ +The configuration for learning rate is also moved from the `lr_config` field to the `param_scheduler` field. The `param_scheduler` configuration is more similar to PyTorch's learning rate scheduler and more flexible. The following table shows the correspondences for learning rate configuration between 2.x version and 3.x version: + + + + + + + + + +
2.x Config + +```python +lr_config = dict( + policy='step', # Use multi-step learning rate strategy during training + warmup='linear', # Use linear learning rate warmup + warmup_iters=500, # End warmup at iteration 500 + warmup_ratio=0.001, # Coefficient for learning rate warmup + step=[8, 11], # Learning rate decay at which epochs + gamma=0.1) # Learning rate decay coefficient + +``` + +
3.x Config + +```python +param_scheduler = [ + dict( + type='LinearLR', # Use linear learning rate warmup + start_factor=0.001, # Coefficient for learning rate warmup + by_epoch=False, # Update the learning rate during warmup at each iteration + begin=0, # Starting from the first iteration + end=500), # End at the 500th iteration + dict( + type='MultiStepLR', # Use multi-step learning rate strategy during training + by_epoch=True, # Update the learning rate at each epoch + begin=0, # Starting from the first epoch + end=12, # Ending at the 12th epoch + milestones=[8, 11], # Learning rate decay at which epochs + gamma=0.1) # Learning rate decay coefficient +] + +``` + +
+ +For information on how to migrate other learning rate adjustment policies, please refer to the [learning rate migration document of MMEngine](https://mmengine.readthedocs.io/zh_CN/latest/migration/param_scheduler.html). + +## Migration of Other Configurations + +### Configuration for Saving Checkpoints + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Function2.x Config3.x Config
Set Save Interval + +```python +checkpoint_config = dict( + interval=1) +``` + + + +```python +default_hooks = dict( + checkpoint=dict( + type='CheckpointHook', + interval=1)) +``` + +
Save Best Model + +```python +evaluation = dict( + save_best='auto') +``` + + + +```python +default_hooks = dict( + checkpoint=dict( + type='CheckpointHook', + save_best='auto')) +``` + +
Keep Latest Model + +```python +checkpoint_config = dict( + max_keep_ckpts=3) +``` + + + +```python +default_hooks = dict( + checkpoint=dict( + type='CheckpointHook', + max_keep_ckpts=3)) +``` + +
+ +### Logging Configuration + +In MMDetection 3.x, the logging and visualization of the log are carried out respectively by the logger and visualizer in MMEngine. The following table shows the comparison between the configuration of printing logs and visualizing logs in MMDetection 2.x and 3.x. + + + + + + + + + + + + + + + + + + + + + + + + +
Function2.x Config3.x Config
Set Log Printing Interval + +```python +log_config = dict(interval=50) +``` + + + +```python +default_hooks = dict( + logger=dict(type='LoggerHook', interval=50)) +# Optional: set moving average window size +log_processor = dict( + type='LogProcessor', window_size=50) +``` + +
Use TensorBoard or WandB to visualize logs + +```python +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook'), + dict(type='MMDetWandbHook', + init_kwargs={ + 'project': 'mmdetection', + 'group': 'maskrcnn-r50-fpn-1x-coco' + }, + interval=50, + log_checkpoint=True, + log_checkpoint_metadata=True, + num_eval_images=100) + ]) +``` + + + +```python +vis_backends = [ + dict(type='LocalVisBackend'), + dict(type='TensorboardVisBackend'), + dict(type='WandbVisBackend', + init_kwargs={ + 'project': 'mmdetection', + 'group': 'maskrcnn-r50-fpn-1x-coco' + }) +] +visualizer = dict( + type='DetLocalVisualizer', + vis_backends=vis_backends, + name='visualizer') +``` + +
+ +For visualization-related tutorials, please refer to [Visualization Tutorial](../user_guides/visualization.md) of MMDetection. + +### Runtime Configuration + +The runtime configuration fields in version 3.x have been adjusted, and the specific correspondence is as follows: + + + + + + + + + + + + + + + + +
2.x Config3.x Config
+ +```python +cudnn_benchmark = False +opencv_num_threads = 0 +mp_start_method = 'fork' +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None + + +``` + + + +```python +env_cfg = dict( + cudnn_benchmark=False, + mp_cfg=dict(mp_start_method='fork', + opencv_num_threads=0), + dist_cfg=dict(backend='nccl')) +log_level = 'INFO' +load_from = None +resume = False +``` + +
diff --git a/docs/en/migration/dataset_migration.md b/docs/en/migration/dataset_migration.md new file mode 100644 index 00000000000..75d093298e0 --- /dev/null +++ b/docs/en/migration/dataset_migration.md @@ -0,0 +1 @@ +# Migrate dataset from MMDetection 2.x to 3.x diff --git a/docs/en/migration/migration.md b/docs/en/migration/migration.md new file mode 100644 index 00000000000..ec6a2f891b1 --- /dev/null +++ b/docs/en/migration/migration.md @@ -0,0 +1,12 @@ +# Migrating from MMDetection 2.x to 3.x + +MMDetection 3.x is a significant update that includes many changes to API and configuration files. This document aims to help users migrate from MMDetection 2.x to 3.x. +We divided the migration guide into the following sections: + +- [Configuration file migration](./config_migration.md) +- [API and Registry migration](./api_and_registry_migration.md) +- [Dataset migration](./dataset_migration.md) +- [Model migration](./model_migration.md) +- [Frequently Asked Questions](./migration_faq.md) + +If you encounter any problems during the migration process, feel free to raise an issue. We also welcome contributions to this document. diff --git a/docs/en/migration/migration_faq.md b/docs/en/migration/migration_faq.md new file mode 100644 index 00000000000..a6e3c356c27 --- /dev/null +++ b/docs/en/migration/migration_faq.md @@ -0,0 +1 @@ +# Migration FAQ diff --git a/docs/en/migration/model_migration.md b/docs/en/migration/model_migration.md new file mode 100644 index 00000000000..04e280879fc --- /dev/null +++ b/docs/en/migration/model_migration.md @@ -0,0 +1 @@ +# Migrate models from MMDetection 2.x to 3.x diff --git a/docs/en/overview.md b/docs/en/overview.md index f78b658f017..39fb6f51564 100644 --- a/docs/en/overview.md +++ b/docs/en/overview.md @@ -50,3 +50,5 @@ Here is a detailed step-by-step guide to learn more about MMDetection: - [Basic Concepts](https://mmdetection.readthedocs.io/en/dev-3.x/advanced_guides/index.html#basic-concepts) - [Component Customization](https://mmdetection.readthedocs.io/en/dev-3.x/advanced_guides/index.html#component-customization) + +4. For users of MMDetection 2.x version, we provide a guide to help you adapt to the new version. You can find it in the [migration guide](./migration/migration.md). diff --git a/docs/zh_cn/index.rst b/docs/zh_cn/index.rst index 280e1ecacf6..58a4d8a52d3 100644 --- a/docs/zh_cn/index.rst +++ b/docs/zh_cn/index.rst @@ -24,7 +24,7 @@ Welcome to MMDetection's documentation! :maxdepth: 1 :caption: 迁移版本 - migration.md + migration/migration.md .. toctree:: :maxdepth: 1 diff --git a/docs/zh_cn/migration/api_and_registry_migration.md b/docs/zh_cn/migration/api_and_registry_migration.md new file mode 100644 index 00000000000..66e1c340806 --- /dev/null +++ b/docs/zh_cn/migration/api_and_registry_migration.md @@ -0,0 +1 @@ +# 将 API 和注册器从 MMDetection 2.x 迁移至 3.x diff --git a/docs/zh_cn/migration/config_migration.md b/docs/zh_cn/migration/config_migration.md new file mode 100644 index 00000000000..c4f9c8e3d2d --- /dev/null +++ b/docs/zh_cn/migration/config_migration.md @@ -0,0 +1,814 @@ +# 将配置文件从 MMDetection 2.x 迁移至 3.x + +MMDetection 3.x 的配置文件与 2.x 相比有较大变化,这篇文档将介绍如何将 2.x 的配置文件迁移到 3.x。 + +在前面的[配置文件教程](../user_guides/config.md)中,我们以 Mask R-CNN 为例介绍了 MMDetection 3.x 的配置文件结构,这里我们将按同样的结构介绍如何将 2.x 的配置文件迁移至 3.x。 + +## 模型配置 + +模型的配置与 2.x 相比并没有太大变化,对于模型的 backbone,neck,head,以及 train_cfg 和 test_cfg,它们的参数与 2.x 版本的参数保持一致。 + +不同的是,我们在 3.x 版本的模型中新增了 `DataPreprocessor` 模块。 +`DataPreprocessor` 模块的配置位于 `model.data_preprocessor` 中,它用于对输入数据进行预处理,例如对输入图像进行归一化,将不同大小的图片进行 padding 从而组成 batch,将图像从内存中读取到显存中等。这部分配置取代了原本存在于 train_pipeline 和 test_pipeline 中的 `Normalize` 和 `Pad`。 + + + + + + + + + +
原配置 + +```python +# 图像归一化参数 +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True) +pipeline=[ + ..., + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), # 图像 padding 到 32 的倍数 + ... +] +``` + +
新配置 + +```python +model = dict( + data_preprocessor=dict( + type='DetDataPreprocessor', + # 图像归一化参数 + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + # 图像 padding 参数 + pad_mask=True, # 在实例分割中,需要将 mask 也进行 padding + pad_size_divisor=32) # 图像 padding 到 32 的倍数 +) +``` + +
+ +## 数据集和评测器配置 + +数据集和评测部分的配置相比 2.x 版本有较大的变化。我们将从 Dataloader 和 Dataset,Data transform pipeline,以及评测器配置三个方面介绍如何将 2.x 版本的配置迁移到 3.x 版本。 + +### Dataloader 和 Dataset 配置 + +在新版本中,我们将数据加载的设置与 PyTorch 官方的 DataLoader 保持一致,这样可以使用户更容易理解和上手。 +我们将训练、验证和测试的数据加载设置分别放在 `train_dataloader`,`val_dataloader` 和 `test_dataloader` 中,用户可以分别对这些 dataloader 设置不同的参数,其输入参数与 [PyTorch 的 Dataloader](https://pytorch.org/docs/stable/data.html?highlight=dataloader#torch.utils.data.DataLoader) 所需要的参数基本一致。 + +通过这种方式,我们将 2.x 版本中不可配置的 `sampler`,`batch_sampler`,`persistent_workers` 等参数都放到了配置文件中,使得用户可以更加灵活地设置数据加载的参数。 + +用户可以通过 `train_dataloader.dataset`,`val_dataloader.dataset` 和 `test_dataloader.dataset` 来设置数据集的配置,它们分别对应 2.x 版本中的 `data.train`,`data.val` 和 `data.test`。 + + + + + + + + + +
原配置 + +```python +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) +``` + +
新配置 + +```python +train_dataloader = dict( + batch_size=2, + num_workers=2, + persistent_workers=True, # 避免每次迭代后 dataloader 重新创建子进程 + sampler=dict(type='DefaultSampler', shuffle=True), # 默认的 sampler,同时支持分布式训练和非分布式训练 + batch_sampler=dict(type='AspectRatioBatchSampler'), # 默认的 batch_sampler,用于保证 batch 中的图片具有相似的长宽比,从而可以更好地利用显存 + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='annotations/instances_train2017.json', + data_prefix=dict(img='train2017/'), + filter_cfg=dict(filter_empty_gt=True, min_size=32), + pipeline=train_pipeline)) +# 在 3.x 版本中可以独立配置验证和测试的 dataloader +val_dataloader = dict( + batch_size=1, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='annotations/instances_val2017.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=test_pipeline)) +test_dataloader = val_dataloader # 测试 dataloader 的配置与验证 dataloader 的配置相同,这里省略 +``` + +
+ +### Data transform pipeline 配置 + +上文中提到,我们将图像 normalize 和 padding 的配置从 `train_pipeline` 和 `test_pipeline` 中独立出来,放到了 `model.data_preprocessor` 中,因此在 3.x 版本的 pipeline 中,我们不再需要 `Normalize` 和 `Pad` 这两个 transform。 + +同时,我们也对负责数据格式打包的 transform 进行了重构,将 `Collect` 和 `DefaultFormatBundle` 这两个 transform 合并为了 `PackDetInputs`,它负责将 data pipeline 中的数据打包成模型的输入格式,关于输入格式的转换,详见[数据流文档](../advanced_guides/data_flow.md)。 + +下面以 Mask R-CNN 1x 的 train_pipeline 为例,介绍如何将 2.x 版本的配置迁移到 3.x 版本: + + + + + + + + + +
原配置 + +```python +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +``` + +
新配置 + +```python +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', prob=0.5), + dict(type='PackDetInputs') +] +``` + +
+ +对于 test_pipeline,除了将 `Normalize` 和 `Pad` 这两个 transform 去掉之外,我们也将测试时的数据增强(TTA)与普通的测试流程分开,移除了 `MultiScaleFlipAug`。关于新版的 TTA 如何使用,详见[TTA 文档](../advanced_guides/tta.md)。 + +下面同样以 Mask R-CNN 1x 的 test_pipeline 为例,介绍如何将 2.x 版本的配置迁移到 3.x 版本: + + + + + + + + + +
原配置 + +```python +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +``` + +
新配置 + +```python +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=(1333, 800), keep_ratio=True), + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] +``` + +
+ +除此之外,我们还对一些数据增强进行了重构,下表列出了 2.x 版本中的 transform 与 3.x 版本中的 transform 的对应关系: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
名称原配置新配置
Resize + +```python +dict(type='Resize', + img_scale=(1333, 800), + keep_ratio=True) +``` + + + +```python +dict(type='Resize', + scale=(1333, 800), + keep_ratio=True) +``` + +
RandomResize + +```python +dict( + type='Resize', + img_scale=[ + (1333, 640), (1333, 800)], + multiscale_mode='range', + keep_ratio=True) +``` + + + +```python +dict( + type='RandomResize', + scale=[ + (1333, 640), (1333, 800)], + keep_ratio=True) +``` + +
RandomChoiceResize + +```python +dict( + type='Resize', + img_scale=[ + (1333, 640), (1333, 672), + (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True) +``` + + + +```python +dict( + type='RandomChoiceResize', + scales=[ + (1333, 640), (1333, 672), + (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + keep_ratio=True) +``` + +
RandomFlip + +```python +dict(type='RandomFlip', + flip_ratio=0.5) +``` + + + +```python +dict(type='RandomFlip', + prob=0.5) +``` + +
+ +### 评测器配置 + +在 3.x 版本中,模型精度评测不再与数据集绑定,而是通过评测器(Evaluator)来完成。 +评测器配置分为 val_evaluator 和 test_evaluator 两部分,其中 val_evaluator 用于验证集评测,test_evaluator 用于测试集评测,对应 2.x 版本中的 evaluation 字段。 +下表列出了 2.x 版本与 3.x 版本中的评测器的对应关系: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
评测指标名称原配置新配置
COCO + +```python +data = dict( + val=dict( + type='CocoDataset', + ann_file=data_root + 'annotations/instances_val2017.json')) +evaluation = dict(metric=['bbox', 'segm']) +``` + + + +```python +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/instances_val2017.json', + metric=['bbox', 'segm'], + format_only=False) +``` + +
Pascal VOC + +```python +data = dict( + val=dict( + type=dataset_type, + ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt')) +evaluation = dict(metric='mAP') +``` + + + +```python +val_evaluator = dict( + type='VOCMetric', + metric='mAP', + eval_mode='11points') +``` + +
OpenImages + +```python +data = dict( + val=dict( + type='OpenImagesDataset', + ann_file=data_root + 'annotations/validation-annotations-bbox.csv', + img_prefix=data_root + 'OpenImages/validation/', + label_file=data_root + 'annotations/class-descriptions-boxable.csv', + hierarchy_file=data_root + + 'annotations/bbox_labels_600_hierarchy.json', + meta_file=data_root + 'annotations/validation-image-metas.pkl', + image_level_ann_file=data_root + + 'annotations/validation-annotations-human-imagelabels-boxable.csv')) +evaluation = dict(interval=1, metric='mAP') +``` + + + +```python +val_evaluator = dict( + type='OpenImagesMetric', + iou_thrs=0.5, + ioa_thrs=0.5, + use_group_of=True, + get_supercategory=True) +``` + +
CityScapes + +```python +data = dict( + val=dict( + type='CityScapesDataset', + ann_file=data_root + + 'annotations/instancesonly_filtered_gtFine_val.json', + img_prefix=data_root + 'leftImg8bit/val/', + pipeline=test_pipeline)) +evaluation = dict(metric=['bbox', 'segm']) +``` + + + +```python +val_evaluator = [ + dict( + type='CocoMetric', + ann_file=data_root + + 'annotations/instancesonly_filtered_gtFine_val.json', + metric=['bbox', 'segm']), + dict( + type='CityScapesMetric', + ann_file=data_root + + 'annotations/instancesonly_filtered_gtFine_val.json', + seg_prefix=data_root + '/gtFine/val', + outfile_prefix='./work_dirs/cityscapes_metric/instance') +] +``` + +
+ +## 训练和测试的配置 + + + + + + + + + +
原配置 + +```python +runner = dict( + type='EpochBasedRunner', # 训练循环的类型 + max_epochs=12) # 最大训练轮次 +evaluation = dict(interval=2) # 验证间隔。每 2 个 epoch 验证一次 +``` + +
新配置 + +```python +train_cfg = dict( + type='EpochBasedTrainLoop', # 训练循环的类型,请参考 https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/loops.py + max_epochs=12, # 最大训练轮次 + val_interval=2) # 验证间隔。每 2 个 epoch 验证一次 +val_cfg = dict(type='ValLoop') # 验证循环的类型 +test_cfg = dict(type='TestLoop') # 测试循环的类型 +``` + +
+ +## 优化相关配置 + +优化器以及梯度裁剪的配置都移至 optim_wrapper 字段中。下表列出了 2.x 版本与 3.x 版本中的优化器配置的对应关系: + + + + + + + + + +
原配置 + +```python +optimizer = dict( + type='SGD', # 随机梯度下降优化器 + lr=0.02, # 基础学习率 + momentum=0.9, # 带动量的随机梯度下降 + weight_decay=0.0001) # 权重衰减 +optimizer_config = dict(grad_clip=None) # 梯度裁剪的配置,设置为 None 关闭梯度裁剪 +``` + +
新配置 + +```python +optim_wrapper = dict( # 优化器封装的配置 + type='OptimWrapper', # 优化器封装的类型。可以切换至 AmpOptimWrapper 来启用混合精度训练 + optimizer=dict( # 优化器配置。支持 PyTorch 的各种优化器。请参考 https://pytorch.org/docs/stable/optim.html#algorithms + type='SGD', # 随机梯度下降优化器 + lr=0.02, # 基础学习率 + momentum=0.9, # 带动量的随机梯度下降 + weight_decay=0.0001), # 权重衰减 + clip_grad=None, # 梯度裁剪的配置,设置为 None 关闭梯度裁剪。使用方法请见 https://mmengine.readthedocs.io/en/latest/tutorials/optimizer.html + ) +``` + +
+ +学习率的配置也从 lr_config 字段中移至 param_scheduler 字段中。param_scheduler 的配置更贴近 PyTorch 的学习率调整策略,更加灵活。下表列出了 2.x 版本与 3.x 版本中的学习率配置的对应关系: + + + + + + + + + +
原配置 + +```python +lr_config = dict( + policy='step', # 在训练过程中使用 multi step 学习率策略 + warmup='linear', # 使用线性学习率预热 + warmup_iters=500, # 到第 500 个 iteration 结束预热 + warmup_ratio=0.001, # 学习率预热的系数 + step=[8, 11], # 在哪几个 epoch 进行学习率衰减 + gamma=0.1) # 学习率衰减系数 +``` + +
新配置 + +```python +param_scheduler = [ + dict( + type='LinearLR', # 使用线性学习率预热 + start_factor=0.001, # 学习率预热的系数 + by_epoch=False, # 按 iteration 更新预热学习率 + begin=0, # 从第一个 iteration 开始 + end=500), # 到第 500 个 iteration 结束 + dict( + type='MultiStepLR', # 在训练过程中使用 multi step 学习率策略 + by_epoch=True, # 按 epoch 更新学习率 + begin=0, # 从第一个 epoch 开始 + end=12, # 到第 12 个 epoch 结束 + milestones=[8, 11], # 在哪几个 epoch 进行学习率衰减 + gamma=0.1) # 学习率衰减系数 +] +``` + +
+ +关于其他的学习率调整策略的迁移,请参考 MMEngine 的[学习率迁移文档](https://mmengine.readthedocs.io/zh_CN/latest/migration/param_scheduler.html)。 + +## 其他配置的迁移 + +### 保存 checkpoint 的配置 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
功能原配置新配置
设置保存间隔 + +```python +checkpoint_config = dict( + interval=1) +``` + + + +```python +default_hooks = dict( + checkpoint=dict( + type='CheckpointHook', + interval=1)) +``` + +
保存最佳模型 + +```python +evaluation = dict( + save_best='auto') +``` + + + +```python +default_hooks = dict( + checkpoint=dict( + type='CheckpointHook', + save_best='auto')) +``` + +
只保留最新的几个模型 + +```python +checkpoint_config = dict( + max_keep_ckpts=3) +``` + + + +```python +default_hooks = dict( + checkpoint=dict( + type='CheckpointHook', + max_keep_ckpts=3)) +``` + +
+ +### 日志的配置 + +3.x 版本中,日志的打印和可视化由 MMEngine 中的 logger 和 visualizer 分别完成。下表列出了 2.x 版本与 3.x 版本中的日志配置的对应关系: + + + + + + + + + + + + + + + + + + + + + + + + +
功能原配置新配置
设置日志打印间隔 + +```python +log_config = dict( + interval=50) +``` + + + +```python +default_hooks = dict( + logger=dict( + type='LoggerHook', + interval=50)) +# 可选: 配置日志打印数值的平滑窗口大小 +log_processor = dict( + type='LogProcessor', + window_size=50) +``` + +
使用 TensorBoard 或 WandB 可视化日志 + +```python +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook'), + dict(type='MMDetWandbHook', + init_kwargs={ + 'project': 'mmdetection', + 'group': 'maskrcnn-r50-fpn-1x-coco' + }, + interval=50, + log_checkpoint=True, + log_checkpoint_metadata=True, + num_eval_images=100) + ]) +``` + + + +```python +vis_backends = [ + dict(type='LocalVisBackend'), + dict(type='TensorboardVisBackend'), + dict(type='WandbVisBackend', + init_kwargs={ + 'project': 'mmdetection', + 'group': 'maskrcnn-r50-fpn-1x-coco' + }) +] +visualizer = dict( + type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') +``` + +
+ +关于可视化相关的教程,请参考 MMDetection 的[可视化教程](../user_guides/visualization.md)。 + +### Runtime 的配置 + +3.x 版本中 runtime 的配置字段有所调整,具体的对应关系如下: + + + + + + + + + + + + + + + + +
原配置新配置
+ +```python +cudnn_benchmark = False +opencv_num_threads = 0 +mp_start_method = 'fork' +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None + + +``` + + + +```python +env_cfg = dict( + cudnn_benchmark=False, + mp_cfg=dict(mp_start_method='fork', + opencv_num_threads=0), + dist_cfg=dict(backend='nccl')) +log_level = 'INFO' +load_from = None +resume = False +``` + +
diff --git a/docs/zh_cn/migration/dataset_migration.md b/docs/zh_cn/migration/dataset_migration.md new file mode 100644 index 00000000000..c379b9f1b7b --- /dev/null +++ b/docs/zh_cn/migration/dataset_migration.md @@ -0,0 +1 @@ +# 将数据集从 MMDetection 2.x 迁移至 3.x diff --git a/docs/zh_cn/migration/migration.md b/docs/zh_cn/migration/migration.md new file mode 100644 index 00000000000..d706856fa82 --- /dev/null +++ b/docs/zh_cn/migration/migration.md @@ -0,0 +1,12 @@ +# 从 MMDetection 2.x 迁移至 3.x + +MMDetection 3.x 版本是一个重大更新,包含了许多 API 和配置文件的变化。本文档旨在帮助用户从 MMDetection 2.x 版本迁移到 3.x 版本。 +我们将迁移指南分为以下几个部分: + +- [配置文件迁移](./config_migration.md) +- [API 和 Registry 迁移](./api_and_registry_migration.md) +- [数据集迁移](./dataset_migration.md) +- [模型迁移](./model_migration.md) +- [常见问题](./migration_faq.md) + +如果您在迁移过程中遇到任何问题,欢迎在 issue 中提出。我们也欢迎您为本文档做出贡献。 diff --git a/docs/zh_cn/migration/migration_faq.md b/docs/zh_cn/migration/migration_faq.md new file mode 100644 index 00000000000..208a138b25d --- /dev/null +++ b/docs/zh_cn/migration/migration_faq.md @@ -0,0 +1 @@ +# 迁移 FAQ diff --git a/docs/zh_cn/migration/model_migration.md b/docs/zh_cn/migration/model_migration.md new file mode 100644 index 00000000000..d7992440228 --- /dev/null +++ b/docs/zh_cn/migration/model_migration.md @@ -0,0 +1 @@ +# 将模型从 MMDetection 2.x 迁移至 3.x diff --git a/docs/zh_cn/overview.md b/docs/zh_cn/overview.md index b27ead66357..deaa5a2c173 100644 --- a/docs/zh_cn/overview.md +++ b/docs/zh_cn/overview.md @@ -50,3 +50,5 @@ MMDetection 由 7 个主要部分组成,apis、structures、datasets、models - [基础概念](https://mmdetection.readthedocs.io/zh_CN/dev-3.x/advanced_guides/index.html#basic-concepts) - [组件定制](https://mmdetection.readthedocs.io/zh_CN/dev-3.x/advanced_guides/index.html#component-customization) + +4. 对于 MMDetection 2.x 版本的用户,我们提供了[迁移指南](./migration/migration.md),帮助您完成新版本的适配。 From 9ba04a803a9618c2aadc9fc09acbaf180b5d1890 Mon Sep 17 00:00:00 2001 From: takuoko Date: Wed, 22 Mar 2023 11:16:16 +0900 Subject: [PATCH 18/38] [Feature] Release DINO Swin-L 36e model (#9927) --- configs/dino/README.md | 1 + configs/dino/dino-5scale_swin-l_8xb2-36e_coco.py | 13 +++++++++++++ configs/dino/metafile.yml | 12 ++++++++++++ 3 files changed, 26 insertions(+) create mode 100644 configs/dino/dino-5scale_swin-l_8xb2-36e_coco.py diff --git a/configs/dino/README.md b/configs/dino/README.md index 8512f7f8c7f..54f51d598ef 100644 --- a/configs/dino/README.md +++ b/configs/dino/README.md @@ -18,6 +18,7 @@ We present DINO (DETR with Improved deNoising anchOr boxes), a state-of-the-art | :------: | :---------: | :-----: | :----: | :---------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | | R-50 | DINO-4scale | 12e | 49.0 | [config](./dino-4scale_r50_8xb2-12e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v3.0/dino/dino-4scale_r50_8xb2-12e_coco/dino-4scale_r50_8xb2-12e_coco_20221202_182705-55b2bba2.pth) \| [log](https://download.openmmlab.com/mmdetection/v3.0/dino/dino-4scale_r50_8xb2-12e_coco/dino-4scale_r50_8xb2-12e_coco_20221202_182705.log.json) | | Swin-L | DINO-5scale | 12e | 57.2 | [config](./dino-5scale_swin-l_8xb2-12e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v3.0/dino/dino-5scale_swin-l_8xb2-12e_coco/dino-5scale_swin-l_8xb2-12e_coco_20230228_072924-a654145f.pth) \| [log](https://download.openmmlab.com/mmdetection/v3.0/dino/dino-5scale_swin-l_8xb2-12e_coco/dino-5scale_swin-l_8xb2-12e_coco_20230228_072924.log) | +| Swin-L | DINO-5scale | 36e | 58.4 | [config](./dino-5scale_swin-l_8xb2-36e_coco.py) | [model](https://github.com/RistoranteRist/mmlab-weights/releases/download/dino-swinl/dino-5scale_swin-l_8xb2-36e_coco-5486e051.pth) \| [log](https://github.com/RistoranteRist/mmlab-weights/releases/download/dino-swinl/20230307_032359.log) | ### NOTE diff --git a/configs/dino/dino-5scale_swin-l_8xb2-36e_coco.py b/configs/dino/dino-5scale_swin-l_8xb2-36e_coco.py new file mode 100644 index 00000000000..d55a38e61d4 --- /dev/null +++ b/configs/dino/dino-5scale_swin-l_8xb2-36e_coco.py @@ -0,0 +1,13 @@ +_base_ = './dino-5scale_swin-l_8xb2-12e_coco.py' +max_epochs = 36 +train_cfg = dict( + type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1) +param_scheduler = [ + dict( + type='MultiStepLR', + begin=0, + end=max_epochs, + by_epoch=True, + milestones=[27, 33], + gamma=0.1) +] diff --git a/configs/dino/metafile.yml b/configs/dino/metafile.yml index 2f61fb38431..89dcb23e509 100644 --- a/configs/dino/metafile.yml +++ b/configs/dino/metafile.yml @@ -60,3 +60,15 @@ Models: Metrics: box AP: 57.2 Weights: https://download.openmmlab.com/mmdetection/v3.0/dino/dino-5scale_swin-l_8xb2-12e_coco/dino-5scale_swin-l_8xb2-12e_coco_20230228_072924-a654145f.pth + + - Name: dino-5scale_swin-l_8xb2-36e_coco.py + In Collection: DINO + Config: configs/dino/dino-5scale_swin-l_8xb2-36e_coco.py + Metadata: + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 58.4 + Weights: https://github.com/RistoranteRist/mmlab-weights/releases/download/dino-swinl/dino-5scale_swin-l_8xb2-36e_coco-5486e051.pth From 9c9a86656c524b04e6ce76b7fe8566fb291d38fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Haian=20Huang=28=E6=B7=B1=E5=BA=A6=E7=9C=B8=29?= <1286304229@qq.com> Date: Wed, 22 Mar 2023 10:18:04 +0800 Subject: [PATCH 19/38] Refine data_prepare docs (#9935) Co-authored-by: Range King --- docs/zh_cn/user_guides/dataset_prepare.md | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/docs/zh_cn/user_guides/dataset_prepare.md b/docs/zh_cn/user_guides/dataset_prepare.md index e03127bdb68..4ebbd668a72 100644 --- a/docs/zh_cn/user_guides/dataset_prepare.md +++ b/docs/zh_cn/user_guides/dataset_prepare.md @@ -1,17 +1,14 @@ -## 数据集准备(待更新) +## 数据集准备 -为了测试一个模型的精度,我们通常会在标准数据集上对其进行测试。MMDetection 支持多个公共数据集,包括 [COCO](https://cocodataset.org/) , -[Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC) ,[Cityscapes](https://www.cityscapes-dataset.com/) 等等。 -这一部分将会介绍如何在支持的数据集上测试现有模型。 +MMDetection 支持多个公共数据集,包括 [COCO](https://cocodataset.org/), [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC), [Cityscapes](https://www.cityscapes-dataset.com/) 和 [其他更多数据集](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/_base_/datasets)。 -一些公共数据集,比如 Pascal VOC 及其镜像数据集,或者 COCO 等数据集都可以从官方网站或者镜像网站获取。 -注意:在检测任务中,Pascal VOC 2012 是 Pascal VOC 2007 的无交集扩展,我们通常将两者一起使用。 -我们建议将数据集下载,然后解压到项目外部的某个文件夹内,然后通过符号链接的方式,将数据集根目录链接到 `$MMDETECTION/data` 文件夹下,格式如下所示。 -如果你的文件夹结构和下方不同的话,你需要在配置文件中改变对应的路径。 -我们提供了下载 COCO 等数据集的脚本,你可以运行 `python tools/misc/download_dataset.py --dataset-name coco2017` 下载 COCO 数据集。 -对于中国境内的用户,我们也推荐通过开源数据平台 [OpenDataLab](https://opendatalab.com/?source=OpenMMLab%20GitHub) 来下载数据,以获得更好的下载体验。 +一些公共数据集,比如 Pascal VOC 及其镜像数据集,或者 COCO 等数据集都可以从官方网站或者镜像网站获取。注意:在检测任务中,Pascal VOC 2012 是 Pascal VOC 2007 的无交集扩展,我们通常将两者一起使用。 我们建议将数据集下载,然后解压到项目外部的某个文件夹内,然后通过符号链接的方式,将数据集根目录链接到 `$MMDETECTION/data` 文件夹下, 如果你的文件夹结构和下方不同的话,你需要在配置文件中改变对应的路径。 -```plain +我们提供了下载 COCO 等数据集的脚本,你可以运行 `python tools/misc/download_dataset.py --dataset-name coco2017` 下载 COCO 数据集。 对于中国境内的用户,我们也推荐通过开源数据平台 [OpenDataLab](https://opendatalab.com/?source=OpenMMLab%20GitHub) 来下载数据,以获得更好的下载体验。 + +更多用法请参考[数据集下载](./useful_tools.md#dataset-download) + +```text mmdetection ├── mmdet ├── tools @@ -37,7 +34,7 @@ mmdetection 有些模型需要额外的 [COCO-stuff](http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/stuffthingmaps_trainval2017.zip) 数据集,比如 HTC,DetectoRS 和 SCNet,你可以下载并解压它们到 `coco` 文件夹下。文件夹会是如下结构: -```plain +```text mmdetection ├── data │ ├── coco From 892bcdad0cb7f693c4e2745e4d187b21dd1fddae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=96=9B=E5=AE=9A=E8=B0=94=E7=9A=84=E5=8A=A0=E8=8F=B2?= =?UTF-8?q?=E7=8C=AB?= <62554593+zgzhengSEU@users.noreply.github.com> Date: Wed, 22 Mar 2023 10:19:45 +0800 Subject: [PATCH 20/38] [Docs] update zh_cn doc error (#9883) Co-authored-by: Range King --- docs/en/notes/faq.md | 2 +- docs/zh_cn/notes/faq.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/notes/faq.md b/docs/en/notes/faq.md index f93b4a84f47..cfea240c720 100644 --- a/docs/en/notes/faq.md +++ b/docs/en/notes/faq.md @@ -169,7 +169,7 @@ We list some common troubles faced by many users and their corresponding solutio - Save the best model - It can be turned on by configuring `default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=1, save_best='auto'),`. In the case of the `auto` parameter, the first key in the returned evaluation result will be used as the basis for selecting the best model. You can also directly set the key in the evaluation result to manually set it, for example, `save_best='mAP'`. + It can be turned on by configuring `default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=1, save_best='auto'),`. In the case of the `auto` parameter, the first key in the returned evaluation result will be used as the basis for selecting the best model. You can also directly set the key in the evaluation result to manually set it, for example, `save_best='coco/bbox_mAP'`. ## Evaluation diff --git a/docs/zh_cn/notes/faq.md b/docs/zh_cn/notes/faq.md index bca80ba18ba..bc52cf6e202 100644 --- a/docs/zh_cn/notes/faq.md +++ b/docs/zh_cn/notes/faq.md @@ -172,7 +172,7 @@ PYTHONPATH="$(dirname $0)/..":$PYTHONPATH - 训练中保存最好模型 - 可以通过配置 `evaluation = dict(save_best=‘auto’)`开启。在 auto 参数情况下会根据返回的验证结果中的第一个 key 作为选择最优模型的依据,你也可以直接设置评估结果中的 key 来手动设置,例如 `evaluation = dict(save_best=‘mAP’)`。 + 可以通过配置 `default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=1, save_best='auto')`开启。在 `auto` 参数情况下会根据返回的验证结果中的第一个 key 作为选择最优模型的依据,你也可以直接设置评估结果中的 key 来手动设置,例如 `save_best='coco/bbox_mAP'`。 - 在 Resume 训练中使用 `ExpMomentumEMAHook` From 22eaded5fda11479946e9147439ae2e69808047d Mon Sep 17 00:00:00 2001 From: "Mr.Li" <1055271769@qq.com> Date: Fri, 24 Mar 2023 16:38:59 +0800 Subject: [PATCH 21/38] [Doc]: Add more social networking links(dev-3.x) (#10021) --- README.md | 10 ++++++++-- README_zh-CN.md | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2cbbe559f1f..07d9b920fa9 100644 --- a/README.md +++ b/README.md @@ -43,9 +43,9 @@ English | [简体中文](README_zh-CN.md) ## Introduction diff --git a/README_zh-CN.md b/README_zh-CN.md index 7f68b926957..7c345369ce5 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -41,6 +41,26 @@ +
+ + + + + + + + + + + + + + + + + +
+ ## 简介 MMDetection 是一个基于 PyTorch 的目标检测开源工具箱。它是 [OpenMMLab](https://openmmlab.com/) 项目的一部分。 From 05f42355edb901e41264e680b7f225f6a4a23689 Mon Sep 17 00:00:00 2001 From: yechenzhi <136920488@qq.com> Date: Tue, 28 Mar 2023 09:59:20 +0800 Subject: [PATCH 22/38] Fix conflict bug in train_cfg (#10030) --- configs/solov2/solov2_r50_fpn_ms-3x_coco.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/solov2/solov2_r50_fpn_ms-3x_coco.py b/configs/solov2/solov2_r50_fpn_ms-3x_coco.py index ec20b7dd6b9..d6f09827efb 100644 --- a/configs/solov2/solov2_r50_fpn_ms-3x_coco.py +++ b/configs/solov2/solov2_r50_fpn_ms-3x_coco.py @@ -15,7 +15,7 @@ # training schedule for 3x max_epochs = 36 -train_cfg = dict(by_epoch=True, max_epochs=max_epochs) +train_cfg = dict(max_epochs=max_epochs) # learning rate param_scheduler = [ From 1d06209db5a1d147800e669e86c962f776360ceb Mon Sep 17 00:00:00 2001 From: LinXiaoZheng <90811472+Zheng-LinXiao@users.noreply.github.com> Date: Wed, 29 Mar 2023 14:57:14 +0800 Subject: [PATCH 23/38] [Docs] Add rtmder config introduce (#10042) --- configs/rtmdet/README.md | 73 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/configs/rtmdet/README.md b/configs/rtmdet/README.md index b17a916b022..593e7b607ac 100644 --- a/configs/rtmdet/README.md +++ b/configs/rtmdet/README.md @@ -378,3 +378,76 @@ result = inference_model( img='demo/resources/det.jpg', device='cuda:0') ``` + +### Model Config + +In MMDetection's config, we use `model` to set up detection algorithm components. In addition to neural network components such as `backbone`, `neck`, etc, it also requires `data_preprocessor`, `train_cfg`, and `test_cfg`. `data_preprocessor` is responsible for processing a batch of data output by dataloader. `train_cfg`, and `test_cfg` in the model config are for training and testing hyperparameters of the components.Taking RTMDet as an example, we will introduce each field in the config according to different function modules: + +```python +model = dict( + type='RTMDet', # The name of detector + data_preprocessor=dict( # The config of data preprocessor, usually includes image normalization and padding + type='DetDataPreprocessor', # The type of the data preprocessor. Refer to https://mmdetection.readthedocs.io/en/3.x/api.html#mmdet.models.data_preprocessors.DetDataPreprocessor + mean=[103.53, 116.28, 123.675], # Mean values used to pre-training the pre-trained backbone models, ordered in R, G, B + std=[57.375, 57.12, 58.395], # Standard variance used to pre-training the pre-trained backbone models, ordered in R, G, B + bgr_to_rgb=False, # whether to convert image from BGR to RGB + batch_augments=None), # Batch-level augmentations + backbone=dict( # The config of backbone + type='CSPNeXt', # The type of backbone network. Refer to https://mmdetection.readthedocs.io/en/3.x/api.html#mmdet.models.backbones.CSPNeXt + arch='P5', # Architecture of CSPNeXt, from {P5, P6}. Defaults to P5 + expand_ratio=0.5, # Ratio to adjust the number of channels of the hidden layer. Defaults to 0.5 + deepen_factor=1, # Depth multiplier, multiply number of blocks in CSP layer by this amount. Defaults to 1.0 + widen_factor=1, # Width multiplier, multiply number of channels in each layer by this amount. Defaults to 1.0 + channel_attention=True, # Whether to add channel attention in each stage. Defaults to True + norm_cfg=dict(type='SyncBN'), # Dictionary to construct and config norm layer. Defaults to dict(type=’BN’, requires_grad=True) + act_cfg=dict(type='SiLU', inplace=True)), # Config dict for activation layer. Defaults to dict(type=’SiLU’) + neck=dict( + type='CSPNeXtPAFPN', # The type of neck is CSPNeXtPAFPN. Refer to https://mmdetection.readthedocs.io/en/3.x/api.html#mmdet.models.necks.CSPNeXtPAFPN + in_channels=[256, 512, 1024], # Number of input channels per scale + out_channels=256, # Number of output channels (used at each scale) + num_csp_blocks=3, # Number of bottlenecks in CSPLayer. Defaults to 3 + expand_ratio=0.5, # Ratio to adjust the number of channels of the hidden layer. Default: 0.5 + norm_cfg=dict(type='SyncBN'), # Config dict for normalization layer. Default: dict(type=’BN’) + act_cfg=dict(type='SiLU', inplace=True)), # Config dict for activation layer. Default: dict(type=’Swish’) + bbox_head=dict( + type='RTMDetSepBNHead', # The type of bbox_head is RTMDetSepBNHead. RTMDetHead with separated BN layers and shared conv layers. Refer to https://mmdetection.readthedocs.io/en/3.x/api.html#mmdet.models.dense_heads.RTMDetSepBNHead + num_classes=80, # Number of categories excluding the background category + in_channels=256, # Number of channels in the input feature map + stacked_convs=2, # Whether to share conv layers between stages. Defaults to True + feat_channels=256, # Feature channels of convolutional layers in the head + anchor_generator=dict( # The config of anchor generator + type='MlvlPointGenerator', # The methods use MlvlPointGenerator. Refer to https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/task_modules/prior_generators/point_generator.py#L92 + offset=0, # The offset of points, the value is normalized with corresponding stride. Defaults to 0.5 + strides=[8, 16, 32]), # Strides of anchors in multiple feature levels in order (w, h) + bbox_coder=dict(type='DistancePointBBoxCoder'), # Distance Point BBox coder.This coder encodes gt bboxes (x1, y1, x2, y2) into (top, bottom, left,right) and decode it back to the original. Refer to https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/task_modules/coders/distance_point_bbox_coder.py#L9 + loss_cls=dict( # Config of loss function for the classification branch + type='QualityFocalLoss', # Type of loss for classification branch. Refer to https://mmdetection.readthedocs.io/en/3.x/api.html#mmdet.models.losses.QualityFocalLoss + use_sigmoid=True, # Whether sigmoid operation is conducted in QFL. Defaults to True + beta=2.0, # The beta parameter for calculating the modulating factor. Defaults to 2.0 + loss_weight=1.0), # Loss weight of current loss + loss_bbox=dict( # Config of loss function for the regression branch + type='GIoULoss', # Type of loss. Refer to https://mmdetection.readthedocs.io/en/3.x/api.html#mmdet.models.losses.GIoULoss + loss_weight=2.0), # Loss weight of the regression branch + with_objectness=False, # Whether to add an objectness branch. Defaults to True + exp_on_reg=True, # Whether to use .exp() in regression + share_conv=True, # Whether to share conv layers between stages. Defaults to True + pred_kernel_size=1, # Kernel size of prediction layer. Defaults to 1 + norm_cfg=dict(type='SyncBN'), # Config dict for normalization layer. Defaults to dict(type='BN', momentum=0.03, eps=0.001) + act_cfg=dict(type='SiLU', inplace=True)), # Config dict for activation layer. Defaults to dict(type='SiLU') + train_cfg=dict( # Config of training hyperparameters for ATSS + assigner=dict( # Config of assigner + type='DynamicSoftLabelAssigner', # Type of assigner. DynamicSoftLabelAssigner computes matching between predictions and ground truth with dynamic soft label assignment. Refer to https://github.com/open-mmlab/mmdetection/blob/3.x/mmdet/models/task_modules/assigners/dynamic_soft_label_assigner.py#L40 + topk=13), # Select top-k predictions to calculate dynamic k best matches for each gt. Defaults to 13 + allowed_border=-1, # The border allowed after padding for valid anchors + pos_weight=-1, # The weight of positive samples during training + debug=False), # Whether to set the debug mode + test_cfg=dict( # Config for testing hyperparameters for ATSS + nms_pre=30000, # The number of boxes before NMS + min_bbox_size=0, # The allowed minimal box size + score_thr=0.001, # Threshold to filter out boxes + nms=dict( # Config of NMS in the second stage + type='nms', # Type of NMS + iou_threshold=0.65), # NMS threshold + max_per_img=300), # Max number of detections of each image +) +``` From a4c94134f0bcb82686184dbb69f7f9011ef87d72 Mon Sep 17 00:00:00 2001 From: LYMDLUT <70597027+LYMDLUT@users.noreply.github.com> Date: Wed, 29 Mar 2023 15:10:38 +0800 Subject: [PATCH 24/38] Fix conditional DETR AP and Log (#9889) Co-authored-by: lym --- configs/conditional_detr/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/conditional_detr/README.md b/configs/conditional_detr/README.md index e36ea20565a..4043571c576 100644 --- a/configs/conditional_detr/README.md +++ b/configs/conditional_detr/README.md @@ -25,7 +25,7 @@ We provide the config files and models for Conditional DETR: [Conditional DETR f | Backbone | Model | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | | :------: | :--------------: | :-----: | :------: | :------------: | :----: | :-----------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| R-50 | Conditional DETR | 50e | | | 40.9 | [config](./conditional-detr_r50_8xb2-50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v3.0/conditional_detr/conditional-detr_r50_8xb2-50e_coco/conditional-detr_r50_8xb2-50e_coco_20221121_180202-c83a1dc0.pth) \| [log](https://download.openmmlab.com/mmdetection/v3.0/conditional_detr/conditional-detr_r50_8xb2-50e_coco/conditional-detr_r50_8xb2-50e_coco_20221121_180202.log.json) | +| R-50 | Conditional DETR | 50e | | | 41.1 | [config](./conditional-detr_r50_8xb2-50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v3.0/conditional_detr/conditional-detr_r50_8xb2-50e_coco/conditional-detr_r50_8xb2-50e_coco_20221121_180202-c83a1dc0.pth) \| [log](https://download.openmmlab.com/mmdetection/v3.0/conditional_detr/conditional-detr_r50_8xb2-50e_coco/conditional-detr_r50_8xb2-50e_coco_20221121_180202.log.json) | ## Citation From b0be9bd4ac556ef26cfddefa1bcb523fc1eaba78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Haian=20Huang=28=E6=B7=B1=E5=BA=A6=E7=9C=B8=29?= <1286304229@qq.com> Date: Thu, 30 Mar 2023 10:51:56 +0800 Subject: [PATCH 25/38] Add visualization docs (#9938) --- docs/en/user_guides/visualization.md | 90 ++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/docs/en/user_guides/visualization.md b/docs/en/user_guides/visualization.md index f0fa8b81498..dade26ed688 100644 --- a/docs/en/user_guides/visualization.md +++ b/docs/en/user_guides/visualization.md @@ -1 +1,91 @@ # Visualization + +Before reading this tutorial, it is recommended to read MMEngine's [Visualization](https://github.com/open-mmlab/mmengine/blob/main/docs/en/advanced_tutorials/visualization.md) documentation to get a first glimpse of the `Visualizer` definition and usage. + +In brief, the [`Visualizer`](mmengine.visualization.Visualizer) is implemented in MMEngine to meet the daily visualization needs, and contains three main functions: + +- Implement common drawing APIs, such as [`draw_bboxes`](mmengine.visualization.Visualizer.draw_bboxes) which implements bounding box drawing functions, [`draw_lines`](mmengine.visualization.Visualizer.draw_lines) implements the line drawing function. +- Support writing visualization results, learning rate curves, loss function curves, and verification accuracy curves to various backends, including local disks and common deep learning training logging tools such as [TensorBoard](https://www.tensorflow.org/tensorboard) and [Wandb](https://wandb.ai/site). +- Support calling anywhere in the code to visualize or record intermediate states of the model during training or testing, such as feature maps and validation results. + +Based on MMEngine's Visualizer, MMDet comes with a variety of pre-built visualization tools that can be used by the user by simply modifying the following configuration files. + +- The `tools/analysis_tools/browse_dataset.py` script provides a dataset visualization function that draws images and corresponding annotations after Data Transforms, as described in [`browse_dataset.py`](useful_tools.md#Visualization). +- MMEngine implements `LoggerHook`, which uses `Visualizer` to write the learning rate, loss and evaluation results to the backend set by `Visualizer`. Therefore, by modifying the `Visualizer` backend in the configuration file, for example to ` TensorBoardVISBackend` or `WandbVISBackend`, you can implement logging to common training logging tools such as `TensorBoard` or `WandB`, thus making it easy for users to use these visualization tools to analyze and monitor the training process. +- The `VisualizerHook` is implemented in MMDet, which uses the `Visualizer` to visualize or store the prediction results of the validation or prediction phase into the backend set by the `Visualizer`, so by modifying the `Visualizer` backend in the configuration file, for example, to ` TensorBoardVISBackend` or `WandbVISBackend`, you can implement storing the predicted images to `TensorBoard` or `Wandb`. + +## Configuration + +Thanks to the use of the registration mechanism, in MMDet we can set the behavior of the `Visualizer` by modifying the configuration file. Usually, we define the default configuration for the visualizer in `configs/_base_/default_runtime.py`, see [configuration tutorial](config.md) for details. + +```Python +vis_backends = [dict(type='LocalVisBackend')] +visualizer = dict( + type='DetLocalVisualizer', + vis_backends=vis_backends, + name='visualizer') +``` + +Based on the above example, we can see that the configuration of `Visualizer` consists of two main parts, namely, the type of `Visualizer` and the visualization backend `vis_backends` it uses. + +- Users can directly use `DetLocalVisualizer` to visualize labels or predictions for support tasks. +- MMDet sets the visualization backend `vis_backend` to the local visualization backend `LocalVisBackend` by default, saving all visualization results and other training information in a local folder. + +## Storage + +MMDet uses the local visualization backend [`LocalVisBackend`](mmengine.visualization.LocalVisBackend) by default, and the model loss, learning rate, model evaluation accuracy and visualization The information stored in `VisualizerHook` and `LoggerHook`, including loss, learning rate, evaluation accuracy will be saved to the `{work_dir}/{config_name}/{time}/{vis_data}` folder by default. In addition, MMDet also supports other common visualization backends, such as `TensorboardVisBackend` and `WandbVisBackend`, and you only need to change the `vis_backends` type in the configuration file to the corresponding visualization backend. For example, you can store data to `TensorBoard` and `Wandb` by simply inserting the following code block into the configuration file. + +```Python +# https://mmengine.readthedocs.io/en/latest/api/visualization.html +_base_.visualizer.vis_backends = [ + dict(type='LocalVisBackend'), # + dict(type='TensorboardVisBackend'), + dict(type='WandbVisBackend'),] +``` + +## Plot + +### Plot the prediction results + +MMDet mainly uses [`DetVisualizationHook`](mmdet.engine.hooks.DetVisualizationHook) to plot the prediction results of validation and test, by default `DetVisualizationHook` is off, and the default configuration is as follows. + +```Python +visualization=dict( # user visualization of validation and test results + type='DetVisualizationHook', + draw=False, + interval=1, + show=False) +``` + +The following table shows the parameters supported by `DetVisualizationHook`. + +| Parameters | Description | +| :--------: | :-----------------------------------------------------------------------------------------------------------: | +| draw | The DetVisualizationHook is turned on and off by the enable parameter, which is the default state. | +| interval | Controls how much iteration to store or display the results of a val or test if VisualizationHook is enabled. | +| show | Controls whether to visualize the results of val or test. | + +If you want to enable `DetVisualizationHook` related functions and configurations during training or testing, you only need to modify the configuration, take `configs/rtmdet/rtmdet_tiny_8xb32-300e_coco.py` as an example, draw annotations and predictions at the same time, and display the images, the configuration can be modified as follows + +```Python +visualization = _base_.default_hooks.visualization +visualization.update(dict(draw=True, show=True)) +``` + +
+ +
+ +The `test.py` procedure is further simplified by providing the `--show` and `--show-dir` parameters to visualize the annotation and prediction results during the test without modifying the configuration. + +```Shell +# Show test results +python tools/test.py configs/rtmdet/rtmdet_tiny_8xb32-300e_coco.py https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth --show + +# Specify where to store the prediction results +python tools/test.py configs/rtmdet/rtmdet_tiny_8xb32-300e_coco.py https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth --show-dir imgs/ +``` + +
+ +
From d8f9e1b2bff1658da6f4d5a5d226833e74c82a4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Haian=20Huang=28=E6=B7=B1=E5=BA=A6=E7=9C=B8=29?= <1286304229@qq.com> Date: Thu, 30 Mar 2023 11:39:37 +0800 Subject: [PATCH 26/38] Update some docs (#10046) --- .../advanced_guides/customize_transforms.md | 2 +- docs/en/advanced_guides/transforms.md | 2 +- docs/en/user_guides/test.md | 2 +- docs/zh_cn/advanced_guides/conventions.md | 2 +- .../advanced_guides/customize_transforms.md | 32 ++- .../user_guides/robustness_benchmarking.md | 2 +- docs/zh_cn/user_guides/test.md | 214 +++++++++++------- 7 files changed, 147 insertions(+), 109 deletions(-) diff --git a/docs/en/advanced_guides/customize_transforms.md b/docs/en/advanced_guides/customize_transforms.md index ae4ff47ef7e..5fe84e9f7c9 100644 --- a/docs/en/advanced_guides/customize_transforms.md +++ b/docs/en/advanced_guides/customize_transforms.md @@ -32,7 +32,7 @@ custom_imports = dict(imports=['path.to.my_pipeline'], allow_failed_imports=False) train_pipeline = [ - dict(type='LoadImageFromFile', backend_args=backend_args), + dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(type='Resize', scale=(1333, 800), keep_ratio=True), dict(type='RandomFlip', prob=0.5), diff --git a/docs/en/advanced_guides/transforms.md b/docs/en/advanced_guides/transforms.md index 034621c9fc5..4db036ae5c2 100644 --- a/docs/en/advanced_guides/transforms.md +++ b/docs/en/advanced_guides/transforms.md @@ -1,4 +1,4 @@ -# Data Transforms +# Data Transforms (Need to update) ## Design of Data transforms pipeline diff --git a/docs/en/user_guides/test.md b/docs/en/user_guides/test.md index 4a0d77b0f5f..302dd5949c2 100644 --- a/docs/en/user_guides/test.md +++ b/docs/en/user_guides/test.md @@ -79,7 +79,7 @@ Assuming that you have already downloaded the checkpoints to the directory `chec ```shell python tools/test.py \ - configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc.py \ + configs/pascal_voc/faster-rcnn_r50_fpn_1x_voc0712.py \ checkpoints/faster_rcnn_r50_fpn_1x_voc0712_20200624-c9895d40.pth ``` diff --git a/docs/zh_cn/advanced_guides/conventions.md b/docs/zh_cn/advanced_guides/conventions.md index 261f5ed5eb7..9fb1f14c898 100644 --- a/docs/zh_cn/advanced_guides/conventions.md +++ b/docs/zh_cn/advanced_guides/conventions.md @@ -1,4 +1,4 @@ -# 默认约定(待更新) +# 默认约定 如果你想把 MMDetection 修改为自己的项目,请遵循下面的约定。 diff --git a/docs/zh_cn/advanced_guides/customize_transforms.md b/docs/zh_cn/advanced_guides/customize_transforms.md index b51f96b7f2d..aa40717904a 100644 --- a/docs/zh_cn/advanced_guides/customize_transforms.md +++ b/docs/zh_cn/advanced_guides/customize_transforms.md @@ -1,25 +1,26 @@ -# 自定义数据预处理流程(待更新) +# 自定义数据预处理流程 1. 在任意文件里写一个新的流程,例如在 `my_pipeline.py`,它以一个字典作为输入并且输出一个字典: ```python import random - from mmdet.datasets import PIPELINES + from mmcv.transforms import BaseTransform + from mmdet.registry import TRANSFORMS - @PIPELINES.register_module() - class MyTransform: + @TRANSFORMS.register_module() + class MyTransform(BaseTransform): """Add your transform Args: p (float): Probability of shifts. Default 0.5. """ - def __init__(self, p=0.5): - self.p = p + def __init__(self, prob=0.5): + self.prob = prob - def __call__(self, results): - if random.random() > self.p: + def transform(self, results): + if random.random() > self.prob: results['dummy'] = True return results ``` @@ -29,18 +30,13 @@ ```python custom_imports = dict(imports=['path.to.my_pipeline'], allow_failed_imports=False) - img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='MyTransform', p=0.2), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), + dict(type='Resize', scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', prob=0.5), + dict(type='MyTransform', prob=0.2), + dict(type='PackDetInputs') ] ``` @@ -48,4 +44,4 @@ 如果想要可视化数据增强处理流程的结果,可以使用 `tools/misc/browse_dataset.py` 直观 地浏览检测数据集(图像和标注信息),或将图像保存到指定目录。 - 使用方法请参考[日志分析](../useful_tools.md) + 使用方法请参考[可视化文档](../user_guides/visualization.md) diff --git a/docs/zh_cn/user_guides/robustness_benchmarking.md b/docs/zh_cn/user_guides/robustness_benchmarking.md index d9c66a70f15..e95c79a91f1 100644 --- a/docs/zh_cn/user_guides/robustness_benchmarking.md +++ b/docs/zh_cn/user_guides/robustness_benchmarking.md @@ -1,4 +1,4 @@ -# 检测器鲁棒性检查 (待更新) +# 检测器鲁棒性检查 ## 介绍 diff --git a/docs/zh_cn/user_guides/test.md b/docs/zh_cn/user_guides/test.md index 0cd70cfa9f8..96e28c89219 100644 --- a/docs/zh_cn/user_guides/test.md +++ b/docs/zh_cn/user_guides/test.md @@ -1,4 +1,4 @@ -# 测试现有模型(待更新) +# 测试现有模型 我们提供了测试脚本,能够测试一个现有模型在所有数据集(COCO,Pascal VOC,Cityscapes 等)上的性能。我们支持在如下环境下测试: @@ -15,7 +15,6 @@ python tools/test.py \ ${CONFIG_FILE} \ ${CHECKPOINT_FILE} \ [--out ${RESULT_FILE}] \ - [--eval ${EVAL_METRICS}] \ [--show] # CPU 测试:禁用 GPU 并运行单 GPU 测试脚本 @@ -24,7 +23,6 @@ python tools/test.py \ ${CONFIG_FILE} \ ${CHECKPOINT_FILE} \ [--out ${RESULT_FILE}] \ - [--eval ${EVAL_METRICS}] \ [--show] # 单节点多 GPU 测试 @@ -32,8 +30,7 @@ bash tools/dist_test.sh \ ${CONFIG_FILE} \ ${CHECKPOINT_FILE} \ ${GPU_NUM} \ - [--out ${RESULT_FILE}] \ - [--eval ${EVAL_METRICS}] + [--out ${RESULT_FILE}] ``` `tools/dist_test.sh` 也支持多节点测试,不过需要依赖 PyTorch 的 [启动工具](https://pytorch.org/docs/stable/distributed.html#launch-utility) 。 @@ -41,12 +38,9 @@ bash tools/dist_test.sh \ 可选参数: - `RESULT_FILE`: 结果文件名称,需以 .pkl 形式存储。如果没有声明,则不将结果存储到文件。 -- `EVAL_METRICS`: 需要测试的度量指标。可选值是取决于数据集的,比如 `proposal_fast`,`proposal`,`bbox`,`segm` 是 COCO 数据集的可选值,`mAP`,`recall` 是 Pascal VOC 数据集的可选值。Cityscapes 数据集可以测试 `cityscapes` 和所有 COCO 数据集支持的度量指标。 - `--show`: 如果开启,检测结果将被绘制在图像上,以一个新窗口的形式展示。它只适用于单 GPU 的测试,是用于调试和可视化的。请确保使用此功能时,你的 GUI 可以在环境中打开。否则,你可能会遇到这么一个错误 `cannot connect to X server`。 - `--show-dir`: 如果指明,检测结果将会被绘制在图像上并保存到指定目录。它只适用于单 GPU 的测试,是用于调试和可视化的。即使你的环境中没有 GUI,这个选项也可使用。 -- `--show-score-thr`: 如果指明,得分低于此阈值的检测结果将会被移除。 - `--cfg-options`: 如果指明,这里的键值对将会被合并到配置文件中。 -- `--eval-options`: 如果指明,这里的键值对将会作为字典参数被传入 `dataset.evaluation()` 函数中,仅在测试阶段使用。 ### 样例 @@ -70,63 +64,56 @@ bash tools/dist_test.sh \ --show-dir rtmdet_l_8xb32-300e_coco_results ``` -3. 在 Pascal VOC 数据集上测试 Faster R-CNN,不保存测试结果,测试 `mAP`。配置文件和 checkpoint 文件 [在此](https://github.com/open-mmlab/mmdetection/tree/master/configs/pascal_voc) 。 +3. 在 Pascal VOC 数据集上测试 Faster R-CNN,不保存测试结果,测试 `mAP`。配置文件和 checkpoint 文件 [在此](../../../configs/pascal_voc) 。 ```shell python tools/test.py \ - configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc.py \ - checkpoints/faster_rcnn_r50_fpn_1x_voc0712_20200624-c9895d40.pth \ - --eval mAP + configs/pascal_voc/faster-rcnn_r50_fpn_1x_voc0712.py \ + checkpoints/faster_rcnn_r50_fpn_1x_voc0712_20200624-c9895d40.pth ``` -4. 使用 8 块 GPU 测试 Mask R-CNN,测试 `bbox` 和 `mAP` 。配置文件和 checkpoint 文件 [在此](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn) 。 +4. 使用 8 块 GPU 测试 Mask R-CNN,测试 `bbox` 和 `mAP` 。配置文件和 checkpoint 文件 [在此](../../../configs/mask_rcnn) 。 ```shell ./tools/dist_test.sh \ - configs/mask_rcnn_r50_fpn_1x_coco.py \ + configs/mask-rcnn_r50_fpn_1x_coco.py \ checkpoints/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth \ 8 \ - --out results.pkl \ - --eval bbox segm + --out results.pkl ``` -5. 使用 8 块 GPU 测试 Mask R-CNN,测试**每类**的 `bbox` 和 `mAP`。配置文件和 checkpoint 文件 [在此](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn) 。 +5. 使用 8 块 GPU 测试 Mask R-CNN,测试**每类**的 `bbox` 和 `mAP`。配置文件和 checkpoint 文件 [在此](../../../configs/mask_rcnn) 。 ```shell ./tools/dist_test.sh \ - configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py \ + configs/mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py \ checkpoints/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth \ - 8 \ - --out results.pkl \ - --eval bbox segm \ - --options "classwise=True" + 8 ``` -6. 在 COCO test-dev 数据集上,使用 8 块 GPU 测试 Mask R-CNN,并生成 JSON 文件提交到官方评测服务器。配置文件和 checkpoint 文件 [在此](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn) 。 + 该命令生成两个JSON文件 `./work_dirs/coco_instance/test.bbox.json` 和 `./work_dirs/coco_instance/test.segm.json`。 + +6. 在 COCO test-dev 数据集上,使用 8 块 GPU 测试 Mask R-CNN,并生成 JSON 文件提交到官方评测服务器,配置文件和 checkpoint 文件 [在此](../../../configs/mask_rcnnn) 。你可以在 [config](./././configs/_base_/datasets/coco_instance.py) 的注释中用 test_evaluator 和 test_dataloader 替换原来的 test_evaluator 和 test_dataloader,然后运行: ```shell ./tools/dist_test.sh \ - configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py \ - checkpoints/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth \ - 8 \ - --format-only \ - --options "jsonfile_prefix=./mask_rcnn_test-dev_results" + configs/cityscapes/mask-rcnn_r50_fpn_1x_cityscapes.py \ + checkpoints/mask_rcnn_r50_fpn_1x_cityscapes_20200227-afe51d5a.pth \ + 8 ``` -这行命令生成两个 JSON 文件 `mask_rcnn_test-dev_results.bbox.json` 和 `mask_rcnn_test-dev_results.segm.json`。 + 这行命令生成两个 JSON 文件 `mask_rcnn_test-dev_results.bbox.json` 和 `mask_rcnn_test-dev_results.segm.json`。 -7. 在 Cityscapes 数据集上,使用 8 块 GPU 测试 Mask R-CNN,生成 txt 和 png 文件,并上传到官方评测服务器。配置文件和 checkpoint 文件 [在此](https://github.com/open-mmlab/mmdetection/tree/master/configs/cityscapes) 。 +7. 在 Cityscapes 数据集上,使用 8 块 GPU 测试 Mask R-CNN,生成 txt 和 png 文件,并上传到官方评测服务器。配置文件和 checkpoint 文件 [在此](../../../configs/cityscapes) 。 你可以在 [config](./././configs/_base_/datasets/cityscapes_instance.py) 的注释中用 test_evaluator 和 test_dataloader 替换原来的 test_evaluator 和 test_dataloader,然后运行: ```shell ./tools/dist_test.sh \ - configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py \ + configs/cityscapes/mask-rcnn_r50_fpn_1x_cityscapes.py \ checkpoints/mask_rcnn_r50_fpn_1x_cityscapes_20200227-afe51d5a.pth \ - 8 \ - --format-only \ - --options "txtfile_prefix=./mask_rcnn_cityscapes_test_results" + 8 ``` -生成的 png 和 txt 文件在 `./mask_rcnn_cityscapes_test_results` 文件夹下。 + 生成的 png 和 txt 文件在 `./work_dirs/cityscapes_metric` 文件夹下。 ### 不使用 Ground Truth 标注进行测试 @@ -154,8 +141,6 @@ python tools/dataset_converters/images2coco.py \ python tools/test.py \ ${CONFIG_FILE} \ ${CHECKPOINT_FILE} \ - --format-only \ - --options ${JSONFILE_PREFIX} \ [--show] # CPU 测试:禁用 GPU 并运行单 GPU 测试脚本 @@ -164,7 +149,6 @@ python tools/test.py \ ${CONFIG_FILE} \ ${CHECKPOINT_FILE} \ [--out ${RESULT_FILE}] \ - [--eval ${EVAL_METRICS}] \ [--show] # 单节点多 GPU 测试 @@ -172,8 +156,6 @@ bash tools/dist_test.sh \ ${CONFIG_FILE} \ ${CHECKPOINT_FILE} \ ${GPU_NUM} \ - --format-only \ - --options ${JSONFILE_PREFIX} \ [--show] ``` @@ -182,14 +164,12 @@ bash tools/dist_test.sh \ ```sh ./tools/dist_test.sh \ - configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py \ + configs/mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py \ checkpoints/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth \ - 8 \ - -format-only \ - --options "jsonfile_prefix=./mask_rcnn_test-dev_results" + 8 ``` -这行命令生成两个 JSON 文件 `mask_rcnn_test-dev_results.bbox.json` 和 `mask_rcnn_test-dev_results.segm.json`。 +这行命令生成两个 JSON 文件 `./work_dirs/coco_instance/test.bbox.jso` 和 `./work_dirs/coco_instance/test.segm.jsonn`。 ### 批量推理 @@ -197,47 +177,109 @@ MMDetection 在测试模式下,既支持单张图片的推理,也支持对 开启批量推理的配置文件修改方法为: ```shell -data = dict(train=dict(...), val=dict(...), test=dict(samples_per_gpu=2, ...)) +data = dict(train_dataloader=dict(...), val_dataloader=dict(...), test_dataloader=dict(batch_size=2, ...)) ``` -或者你可以通过将 `--cfg-options` 设置为 `--cfg-options data.test.samples_per_gpu=2` 来开启它。 - -### 弃用 ImageToTensor - -在测试模式下,弃用 `ImageToTensor` 流程,取而代之的是 `DefaultFormatBundle`。建议在你的测试数据流程的配置文件中手动替换它,如: - -```python -# (已弃用)使用 ImageToTensor -pipelines = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', mean=[0, 0, 0], std=[1, 1, 1]), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) - ] - -# (建议使用)手动将 ImageToTensor 替换为 DefaultFormatBundle -pipelines = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', mean=[0, 0, 0], std=[1, 1, 1]), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img']), - ]) - ] +或者你可以通过将 `--cfg-options` 设置为 `--cfg-options test_dataloader.batch_size=` 来开启它。 + +## 测试时增强 (TTA) + +测试时增强 (TTA) 是一种在测试阶段使用的数据增强策略。它对同一张图片应用不同的增强,例如翻转和缩放,用于模型推理,然后将每个增强后的图像的预测结果合并,以获得更准确的预测结果。为了让用户更容易使用 TTA,MMEngine 提供了 [BaseTTAModel](https://mmengine.readthedocs.io/en/latest/api/generated/mmengine.model.BaseTTAModel.html#mmengine.model.BaseTTAModel) 类,允许用户根据自己的需求通过简单地扩展 BaseTTAModel 类来实现不同的 TTA 策略。 + +在 MMDetection 中,我们提供了 [DetTTAModel](../../../mmdet/models/test_time_augs/det_tta.py) 类,它继承自 BaseTTAModel。 + +### 使用案例 + +使用 TTA 需要两个步骤。首先,你需要在配置文件中添加 `tta_model` 和 `tta_pipeline`: + +```shell +tta_model = dict( + type='DetTTAModel', + tta_cfg=dict(nms=dict( + type='nms', + iou_threshold=0.5), + max_per_img=100)) + +tta_pipeline = [ + dict(type='LoadImageFromFile', + backend_args=None), + dict( + type='TestTimeAug', + transforms=[[ + dict(type='Resize', scale=(1333, 800), keep_ratio=True) + ], [ # It uses 2 flipping transformations (flipping and not flipping). + dict(type='RandomFlip', prob=1.), + dict(type='RandomFlip', prob=0.) + ], [ + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', + 'img_shape', 'scale_factor', 'flip', + 'flip_direction')) + ]])] ``` + +第二步,运行测试脚本时,设置 `--tta` 参数,如下所示: + +```shell +# 单 GPU 测试 +python tools/test.py \ + ${CONFIG_FILE} \ + ${CHECKPOINT_FILE} \ + [--tta] + +# CPU 测试:禁用 GPU 并运行单 GPU 测试脚本 +export CUDA_VISIBLE_DEVICES=-1 +python tools/test.py \ + ${CONFIG_FILE} \ + ${CHECKPOINT_FILE} \ + [--out ${RESULT_FILE}] \ + [--tta] + +# 多 GPU 测试 +bash tools/dist_test.sh \ + ${CONFIG_FILE} \ + ${CHECKPOINT_FILE} \ + ${GPU_NUM} \ + [--tta] +``` + +你也可以自己修改 TTA 配置,例如添加缩放增强: + +```shell +tta_model = dict( + type='DetTTAModel', + tta_cfg=dict(nms=dict( + type='nms', + iou_threshold=0.5), + max_per_img=100)) + +img_scales = [(1333, 800), (666, 400), (2000, 1200)] +tta_pipeline = [ + dict(type='LoadImageFromFile', + backend_args=None), + dict( + type='TestTimeAug', + transforms=[[ + dict(type='Resize', scale=s, keep_ratio=True) for s in img_scales + ], [ + dict(type='RandomFlip', prob=1.), + dict(type='RandomFlip', prob=0.) + ], [ + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', + 'img_shape', 'scale_factor', 'flip', + 'flip_direction')) + ]])] +``` + +以上数据增强管道将首先对图像执行 3 个多尺度转换,然后执行 2 个翻转转换(翻转和不翻转),最后使用 PackDetInputs 将图像打包到最终结果中。 +这里有更多的 TTA 使用案例供您参考: + +- [RetinaNet](../../../configs/retinanet/retinanet_tta.py) +- [CenterNet](../../../configs/centernet/centernet_tta.py) +- [YOLOX](../../../configs/rtmdet/rtmdet_tta.py) +- [RTMDet](../../../configs/yolox/yolox_tta.py) + +更多高级用法和 TTA 的数据流,请参考 [MMEngine](https://mmengine.readthedocs.io/en/latest/advanced_tutorials/test_time_augmentation.html#data-flow)。我们将在后续支持实例分割 TTA。 From a2f33dbcd10ca7e1419e471bb0d19f869235c5f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Haian=20Huang=28=E6=B7=B1=E5=BA=A6=E7=9C=B8=29?= <1286304229@qq.com> Date: Thu, 30 Mar 2023 14:52:57 +0800 Subject: [PATCH 27/38] Fix accepting an unexpected argument local-rank in PyTorch 2.0 (#10050) --- tools/test.py | 5 ++++- tools/train.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/test.py b/tools/test.py index 0b487f3eddd..b30bbdb1c5c 100644 --- a/tools/test.py +++ b/tools/test.py @@ -52,7 +52,10 @@ def parse_args(): default='none', help='job launcher') parser.add_argument('--tta', action='store_true') - parser.add_argument('--local_rank', type=int, default=0) + # When using PyTorch version >= 2.0.0, the `torch.distributed.launch` + # will pass the `--local-rank` parameter to `tools/train.py` instead + # of `--local_rank`. + parser.add_argument('--local_rank', '--local-rank', type=int, default=0) args = parser.parse_args() if 'LOCAL_RANK' not in os.environ: os.environ['LOCAL_RANK'] = str(args.local_rank) diff --git a/tools/train.py b/tools/train.py index eb21359487b..54e91794930 100644 --- a/tools/train.py +++ b/tools/train.py @@ -46,7 +46,10 @@ def parse_args(): choices=['none', 'pytorch', 'slurm', 'mpi'], default='none', help='job launcher') - parser.add_argument('--local_rank', type=int, default=0) + # When using PyTorch version >= 2.0.0, the `torch.distributed.launch` + # will pass the `--local-rank` parameter to `tools/train.py` instead + # of `--local_rank`. + parser.add_argument('--local_rank', '--local-rank', type=int, default=0) args = parser.parse_args() if 'LOCAL_RANK' not in os.environ: os.environ['LOCAL_RANK'] = str(args.local_rank) From 4a0b0c3143a71de12ad9c127f1cf2dce377bff0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Haian=20Huang=28=E6=B7=B1=E5=BA=A6=E7=9C=B8=29?= <1286304229@qq.com> Date: Fri, 31 Mar 2023 10:19:16 +0800 Subject: [PATCH 28/38] Support setting the `cache_size_limit` parameter of dynamo in PyTorch 2.0 (#10054) --- docs/en/notes/faq.md | 36 ++++++++++++++++++++++++++++++++++++ docs/zh_cn/notes/faq.md | 36 ++++++++++++++++++++++++++++++++++++ mmdet/utils/__init__.py | 6 ++++-- mmdet/utils/setup_env.py | 28 ++++++++++++++++++++++++++++ tools/test.py | 5 +++++ tools/train.py | 6 ++++++ 6 files changed, 115 insertions(+), 2 deletions(-) diff --git a/docs/en/notes/faq.md b/docs/en/notes/faq.md index cfea240c720..e1948125401 100644 --- a/docs/en/notes/faq.md +++ b/docs/en/notes/faq.md @@ -2,6 +2,42 @@ We list some common troubles faced by many users and their corresponding solutions here. Feel free to enrich the list if you find any frequent issues and have ways to help others to solve them. If the contents here do not cover your issue, please create an issue using the [provided templates](https://github.com/open-mmlab/mmdetection/blob/master/.github/ISSUE_TEMPLATE/error-report.md/) and make sure you fill in all required information in the template. +## PyTorch 2.0 Support + +The vast majority of algorithms in MMDetection now support PyTorch 2.0 and its `torch.compile` function. Users only need to install MMDetection 3.0.0rc7 or later versions to enjoy this feature. If any unsupported algorithms are found during use, please feel free to give us feedback. We also welcome contributions from the community to benchmark the speed improvement brought by using the `torch.compile` function. + +To enable the `torch.compile` function, simply add `--cfg-options compile=True` after `train.py` or `test.py`. For example, to enable `torch.compile` for RTMDet, you can use the following command: + +```shell +# Single GPU +python tools/train.py configs/rtmdet/rtmdet_s_8xb32-300e_coco.py --cfg-options compile=True + +# Single node multiple GPUs +./tools/dist_train.sh configs/rtmdet/rtmdet_s_8xb32-300e_coco.py 8 --cfg-options compile=True + +# Single node multiple GPUs + AMP +./tools/dist_train.sh configs/rtmdet/rtmdet_s_8xb32-300e_coco.py 8 --cfg-options compile=True --amp +``` + +It is important to note that PyTorch 2.0's support for dynamic shapes is not yet fully developed. In most object detection algorithms, not only are the input shapes dynamic, but the loss calculation and post-processing parts are also dynamic. This can lead to slower training speeds when using the `torch.compile` function. Therefore, if you wish to enable the `torch.compile` function, you should follow these principles: + +1. Input images to the network are fixed shape, not multi-scale +2. set `torch._dynamo.config.cache_size_limit` parameter. TorchDynamo will convert and cache the Python bytecode, and the compiled functions will be stored in the cache. When the next check finds that the function needs to be recompiled, the function will be recompiled and cached. However, if the number of recompilations exceeds the maximum value set (64), the function will no longer be cached or recompiled. As mentioned above, the loss calculation and post-processing parts of the object detection algorithm are also dynamically calculated, and these functions need to be recompiled every time. Therefore, setting the `torch._dynamo.config.cache_size_limit` parameter to a smaller value can effectively reduce the compilation time + +In MMDetection, you can set the `torch._dynamo.config.cache_size_limit` parameter through the environment variable `DYNAMO_CACHE_SIZE_LIMIT`. For example, the command is as follows: + +```shell +# Single GPU +export DYNAMO_CACHE_SIZE_LIMIT = 4 +python tools/train.py configs/rtmdet/rtmdet_s_8xb32-300e_coco.py --cfg-options compile=True + +# Single node multiple GPUs +export DYNAMO_CACHE_SIZE_LIMIT = 4 +./tools/dist_train.sh configs/rtmdet/rtmdet_s_8xb32-300e_coco.py 8 --cfg-options compile=True +``` + +About the common questions about PyTorch 2.0's dynamo, you can refer to [here](https://pytorch.org/docs/stable/dynamo/faq.html) + ## Installation - Compatibility issue between MMCV and MMDetection; "ConvWS is already registered in conv layer"; "AssertionError: MMCV==xxx is used but incompatible. Please install mmcv>=xxx, \<=xxx." diff --git a/docs/zh_cn/notes/faq.md b/docs/zh_cn/notes/faq.md index bc52cf6e202..dd2bbb7ee7b 100644 --- a/docs/zh_cn/notes/faq.md +++ b/docs/zh_cn/notes/faq.md @@ -2,6 +2,42 @@ 我们在这里列出了使用时的一些常见问题及其相应的解决方案。 如果您发现有一些问题被遗漏,请随时提 PR 丰富这个列表。 如果您无法在此获得帮助,请使用 [issue模板](https://github.com/open-mmlab/mmdetection/blob/master/.github/ISSUE_TEMPLATE/error-report.md/)创建问题,但是请在模板中填写所有必填信息,这有助于我们更快定位问题。 +## PyTorch 2.0 支持 + +MMDetection 目前绝大部分算法已经支持了 PyTorch 2.0 及其 `torch.compile` 功能, 用户只需要安装 MMDetection 3.0.0rc7 及其以上版本即可。如果你在使用中发现有不支持的算法,欢迎给我们反馈。我们也非常欢迎社区贡献者来 benchmark 对比 `torch.compile` 功能所带来的速度提升。 + +如果你想启动 `torch.compile` 功能,只需要在 `train.py` 或者 `test.py` 后面加上 `--cfg-options compile=True`。 以 RTMDet 为例,你可以使用以下命令启动 `torch.compile` 功能: + +```shell +# 单卡 +python tools/train.py configs/rtmdet/rtmdet_s_8xb32-300e_coco.py --cfg-options compile=True + +# 单机 8 卡 +./tools/dist_train.sh configs/rtmdet/rtmdet_s_8xb32-300e_coco.py 8 --cfg-options compile=True + +# 单机 8 卡 + AMP 混合精度训练 +./tools/dist_train.sh configs/rtmdet/rtmdet_s_8xb32-300e_coco.py 8 --cfg-options compile=True --amp +``` + +需要特别注意的是,PyTorch 2.0 对于动态 shape 支持不是非常完善,目标检测算法中大部分不仅输入 shape 是动态的,而且 loss 计算和后处理过程中也是动态的,这会导致在开启 `torch.compile` 功能后训练速度会变慢。基于此,如果你想启动 `torch.compile` 功能,则应该遵循如下原则: + +1. 输入到网络的图片是固定 shape 的,而非多尺度的 +2. 设置 `torch._dynamo.config.cache_size_limit` 参数。TorchDynamo 会将 Python 字节码转换并缓存,已编译的函数会被存入缓存中。当下一次检查发现需要重新编译时,该函数会被重新编译并缓存。但是如果重编译次数超过预设的最大值(64),则该函数将不再被缓存或重新编译。前面说过目标检测算法中的 loss 计算和后处理部分也是动态计算的,这些函数需要在每次迭代中重新编译。因此将 `torch._dynamo.config.cache_size_limit` 参数设置得更小一些可以有效减少编译时间 + +在 MMDetection 中可以通过环境变量 `DYNAMO_CACHE_SIZE_LIMIT` 设置 `torch._dynamo.config.cache_size_limit` 参数,以 RTMDet 为例,命令如下所示: + +```shell +# 单卡 +export DYNAMO_CACHE_SIZE_LIMIT = 4 +python tools/train.py configs/rtmdet/rtmdet_s_8xb32-300e_coco.py --cfg-options compile=True + +# 单机 8 卡 +export DYNAMO_CACHE_SIZE_LIMIT = 4 +./tools/dist_train.sh configs/rtmdet/rtmdet_s_8xb32-300e_coco.py 8 --cfg-options compile=True +``` + +关于 PyTorch 2.0 的 dynamo 常见问题,可以参考 [这里](https://pytorch.org/docs/stable/dynamo/faq.html) + ## 安装 - MMCV 与 MMDetection 的兼容问题: "ConvWS is already registered in conv layer"; "AssertionError: MMCV==xxx is used but incompatible. Please install mmcv>=xxx, \<=xxx." diff --git a/mmdet/utils/__init__.py b/mmdet/utils/__init__.py index 12047895936..1a864342563 100644 --- a/mmdet/utils/__init__.py +++ b/mmdet/utils/__init__.py @@ -8,7 +8,8 @@ from .misc import (find_latest_checkpoint, get_test_pipeline_cfg, update_data_root) from .replace_cfg_vals import replace_cfg_vals -from .setup_env import register_all_modules, setup_multi_processes +from .setup_env import (register_all_modules, setup_cache_size_limit_of_dynamo, + setup_multi_processes) from .split_batch import split_batch from .typing_utils import (ConfigType, InstanceList, MultiConfig, OptConfigType, OptInstanceList, OptMultiConfig, @@ -21,5 +22,6 @@ 'AvoidCUDAOOM', 'all_reduce_dict', 'allreduce_grads', 'reduce_mean', 'sync_random_seed', 'ConfigType', 'InstanceList', 'MultiConfig', 'OptConfigType', 'OptInstanceList', 'OptMultiConfig', 'OptPixelList', - 'PixelList', 'RangeType', 'get_test_pipeline_cfg' + 'PixelList', 'RangeType', 'get_test_pipeline_cfg', + 'setup_cache_size_limit_of_dynamo' ] diff --git a/mmdet/utils/setup_env.py b/mmdet/utils/setup_env.py index 0e56218db96..a7b37845a88 100644 --- a/mmdet/utils/setup_env.py +++ b/mmdet/utils/setup_env.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. import datetime +import logging import os import platform import warnings @@ -7,6 +8,33 @@ import cv2 import torch.multiprocessing as mp from mmengine import DefaultScope +from mmengine.logging import print_log +from mmengine.utils import digit_version + + +def setup_cache_size_limit_of_dynamo(): + """Setup cache size limit of dynamo. + + Note: Due to the dynamic shape of the loss calculation and + post-processing parts in the object detection algorithm, these + functions must be compiled every time they are run. + Setting a large value for torch._dynamo.config.cache_size_limit + may result in repeated compilation, which can slow down training + and testing speed. Therefore, we need to set the default value of + cache_size_limit smaller. An empirical value is 4. + """ + + import torch + if digit_version(torch.__version__) >= digit_version('2.0.0'): + if 'DYNAMO_CACHE_SIZE_LIMIT' in os.environ: + import torch._dynamo + cache_size_limit = int(os.environ['DYNAMO_CACHE_SIZE_LIMIT']) + torch._dynamo.config.cache_size_limit = cache_size_limit + print_log( + f'torch._dynamo.config.cache_size_limit is force ' + f'set to {cache_size_limit}.', + logger='current', + level=logging.WARNING) def setup_multi_processes(cfg): diff --git a/tools/test.py b/tools/test.py index b30bbdb1c5c..ac7a1d09966 100644 --- a/tools/test.py +++ b/tools/test.py @@ -12,6 +12,7 @@ from mmdet.engine.hooks.utils import trigger_visualization_hook from mmdet.evaluation import DumpDetResults from mmdet.registry import RUNNERS +from mmdet.utils import setup_cache_size_limit_of_dynamo # TODO: support fuse_conv_bn and format_only @@ -65,6 +66,10 @@ def parse_args(): def main(): args = parse_args() + # Reduce the number of repeated compilations and improve + # testing speed. + setup_cache_size_limit_of_dynamo() + # load config cfg = Config.fromfile(args.config) cfg.launcher = args.launcher diff --git a/tools/train.py b/tools/train.py index 54e91794930..177346a5a4d 100644 --- a/tools/train.py +++ b/tools/train.py @@ -9,6 +9,8 @@ from mmengine.registry import RUNNERS from mmengine.runner import Runner +from mmdet.utils import setup_cache_size_limit_of_dynamo + def parse_args(): parser = argparse.ArgumentParser(description='Train a detector') @@ -60,6 +62,10 @@ def parse_args(): def main(): args = parse_args() + # Reduce the number of repeated compilations and improve + # training speed. + setup_cache_size_limit_of_dynamo() + # load config cfg = Config.fromfile(args.config) cfg.launcher = args.launcher From 9783ee1b806e575f1b2eb9ca572e06c6c76a0ef1 Mon Sep 17 00:00:00 2001 From: RangiLyu Date: Fri, 31 Mar 2023 10:48:05 +0800 Subject: [PATCH 29/38] [Fix] Fix common/ms_3x_coco-instance.py (#10056) --- configs/common/ms_3x_coco-instance.py | 36 +++++++++++++-------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/configs/common/ms_3x_coco-instance.py b/configs/common/ms_3x_coco-instance.py index 840a2437b30..f80cf88e9b1 100644 --- a/configs/common/ms_3x_coco-instance.py +++ b/configs/common/ms_3x_coco-instance.py @@ -43,31 +43,31 @@ persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=True), batch_sampler=dict(type='AspectRatioBatchSampler'), - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file='annotations/instances_train2017.json', - data_prefix=dict(img='train2017/'), - filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline, - backend_args=backend_args)) -val_dataloader = dict( - batch_size=2, - num_workers=2, - persistent_workers=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), dataset=dict( type='RepeatDataset', times=3, dataset=dict( type=dataset_type, data_root=data_root, - ann_file='annotations/instances_val2017.json', - data_prefix=dict(img='val2017/'), - test_mode=True, - pipeline=test_pipeline, + ann_file='annotations/instances_train2017.json', + data_prefix=dict(img='train2017/'), + filter_cfg=dict(filter_empty_gt=True, min_size=32), + pipeline=train_pipeline, backend_args=backend_args))) +val_dataloader = dict( + batch_size=1, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='annotations/instances_val2017.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=test_pipeline, + backend_args=backend_args)) test_dataloader = val_dataloader val_evaluator = dict( From fa12f9560e754316881c1cff18bbedaa20826e98 Mon Sep 17 00:00:00 2001 From: zwhus <121282623+zwhus@users.noreply.github.com> Date: Fri, 31 Mar 2023 10:55:46 +0800 Subject: [PATCH 30/38] Fix compute flops error (#10051) --- tools/analysis_tools/get_flops.py | 78 ++++++++++--------------------- 1 file changed, 24 insertions(+), 54 deletions(-) diff --git a/tools/analysis_tools/get_flops.py b/tools/analysis_tools/get_flops.py index 3ee808d1a1c..ebd6d0f9f76 100644 --- a/tools/analysis_tools/get_flops.py +++ b/tools/analysis_tools/get_flops.py @@ -4,6 +4,7 @@ from functools import partial from pathlib import Path +import numpy as np import torch from mmengine.config import Config, DictAction from mmengine.logging import MMLogger @@ -24,11 +25,10 @@ def parse_args(): parser = argparse.ArgumentParser(description='Get a detector flops') parser.add_argument('config', help='train config file path') parser.add_argument( - '--shape', + '--num-images', type=int, - nargs='+', - default=[1280, 800], - help='input image size') + default=100, + help='num images of calculate model flops') parser.add_argument( '--cfg-options', nargs='+', @@ -56,8 +56,9 @@ def inference(args, logger): logger.error(f'{config_name} not found.') cfg = Config.fromfile(args.config) + cfg.val_dataloader.batch_size = 1 cfg.work_dir = tempfile.TemporaryDirectory().name - cfg.log_level = 'WARN' + if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) @@ -74,56 +75,24 @@ def inference(args, logger): cfg['model']['roi_head']['mask_head']['norm_cfg'] = dict( type='SyncBN', requires_grad=True) - if len(args.shape) == 1: - h = w = args.shape[0] - elif len(args.shape) == 2: - h, w = args.shape - else: - raise ValueError('invalid input shape') result = {} - - # Supports two ways to calculate flops, - # 1. randomly generate a picture - # 2. load a picture from the dataset - # In two stage detectors, _forward need batch_samples to get - # rpn_results_list, then use rpn_results_list to compute flops, - # so only the second way is supported - try: - model = MODELS.build(cfg.model) - if torch.cuda.is_available(): - model.cuda() - model = revert_sync_batchnorm(model) - data_batch = {'inputs': [torch.rand(3, h, w)], 'batch_samples': [None]} - data = model.data_preprocessor(data_batch) - result['ori_shape'] = (h, w) - result['pad_shape'] = data['inputs'].shape[-2:] - model.eval() - outputs = get_model_complexity_info( - model, - None, - inputs=data['inputs'], - show_table=False, - show_arch=False) - flops = outputs['flops'] - params = outputs['params'] - result['compute_type'] = 'direct: randomly generate a picture' - - except TypeError: - logger.warning( - 'Failed to directly get FLOPs, try to get flops with real data') - data_loader = Runner.build_dataloader(cfg.val_dataloader) - data_batch = next(iter(data_loader)) - model = MODELS.build(cfg.model) - if torch.cuda.is_available(): - model = model.cuda() - model = revert_sync_batchnorm(model) - model.eval() - _forward = model.forward + avg_flops = [] + data_loader = Runner.build_dataloader(cfg.val_dataloader) + model = MODELS.build(cfg.model) + if torch.cuda.is_available(): + model = model.cuda() + model = revert_sync_batchnorm(model) + model.eval() + _forward = model.forward + + for idx, data_batch in enumerate(data_loader): + if idx == args.num_images: + break data = model.data_preprocessor(data_batch) result['ori_shape'] = data['data_samples'][0].ori_shape result['pad_shape'] = data['data_samples'][0].pad_shape - - del data_loader + if hasattr(data['data_samples'][0], 'batch_input_shape'): + result['pad_shape'] = data['data_samples'][0].batch_input_shape model.forward = partial(_forward, data_samples=data['data_samples']) outputs = get_model_complexity_info( model, @@ -131,13 +100,14 @@ def inference(args, logger): inputs=data['inputs'], show_table=False, show_arch=False) - flops = outputs['flops'] + avg_flops.append(outputs['flops']) params = outputs['params'] result['compute_type'] = 'dataloader: load a picture from the dataset' + del data_loader - flops = _format_size(flops) + mean_flops = _format_size(int(np.average(avg_flops))) params = _format_size(params) - result['flops'] = flops + result['flops'] = mean_flops result['params'] = params return result From f771df555abfe1147ab755e2630caa1f00cafb60 Mon Sep 17 00:00:00 2001 From: Kevin Ye <2016110079@email.szu.edu.cn> Date: Fri, 31 Mar 2023 11:35:04 +0800 Subject: [PATCH 31/38] Support output mAP details for each category (#10033) --- mmdet/evaluation/metrics/coco_metric.py | 33 ++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/mmdet/evaluation/metrics/coco_metric.py b/mmdet/evaluation/metrics/coco_metric.py index 1ca33c50cfa..00c8421c254 100644 --- a/mmdet/evaluation/metrics/coco_metric.py +++ b/mmdet/evaluation/metrics/coco_metric.py @@ -519,6 +519,7 @@ def compute_metrics(self, results: list) -> Dict[str, float]: results_per_category = [] for idx, cat_id in enumerate(self.cat_ids): + t = [] # area range index 0: all area ranges # max dets index -1: typically 100 per image nm = self._coco_api.loadCats(cat_id)[0] @@ -528,14 +529,38 @@ def compute_metrics(self, results: list) -> Dict[str, float]: ap = np.mean(precision) else: ap = float('nan') - results_per_category.append( - (f'{nm["name"]}', f'{round(ap, 3)}')) + t.append(f'{nm["name"]}') + t.append(f'{round(ap, 3)}') eval_results[f'{nm["name"]}_precision'] = round(ap, 3) - num_columns = min(6, len(results_per_category) * 2) + # indexes of IoU @50 and @75 + for iou in [0, 5]: + precision = precisions[iou, :, idx, 0, -1] + precision = precision[precision > -1] + if precision.size: + ap = np.mean(precision) + else: + ap = float('nan') + t.append(f'{round(ap, 3)}') + + # indexes of area of small, median and large + for area in [1, 2, 3]: + precision = precisions[:, :, idx, area, -1] + precision = precision[precision > -1] + if precision.size: + ap = np.mean(precision) + else: + ap = float('nan') + t.append(f'{round(ap, 3)}') + results_per_category.append(tuple(t)) + + num_columns = len(results_per_category[0]) results_flatten = list( itertools.chain(*results_per_category)) - headers = ['category', 'AP'] * (num_columns // 2) + headers = [ + 'category', 'mAP', 'mAP_50', 'mAP_75', 'mAP_s', + 'mAP_m', 'mAP_l' + ] results_2d = itertools.zip_longest(*[ results_flatten[i::num_columns] for i in range(num_columns) From 50c74636c11c60cd3741b4bf3bef3af026cf5c1c Mon Sep 17 00:00:00 2001 From: xiejiajiannb <46562271+xiejiajiannb@users.noreply.github.com> Date: Mon, 3 Apr 2023 09:55:13 +0800 Subject: [PATCH 32/38] Translating visualization documents into English (#10058) --- docs/zh_cn/user_guides/visualization.md | 94 ++++++++++++++++++++++++- 1 file changed, 93 insertions(+), 1 deletion(-) diff --git a/docs/zh_cn/user_guides/visualization.md b/docs/zh_cn/user_guides/visualization.md index 04aa43c3ed6..f90ab6d49fd 100644 --- a/docs/zh_cn/user_guides/visualization.md +++ b/docs/zh_cn/user_guides/visualization.md @@ -1 +1,93 @@ -# 可视化(待更新) +# 可视化 + +在阅读本教程之前,建议先阅读 MMEngine 的 [Visualization](https://github.com/open-mmlab/mmengine/blob/main/docs/en/advanced_tutorials/visualization.md) 文档,以对 `Visualizer` 的定义和用法有一个初步的了解。 + +简而言之,`Visualizer` 在 MMEngine 中实现以满足日常可视化需求,并包含以下三个主要功能: + +- 实现通用的绘图 API,例如 [`draw_bboxes`](mmengine.visualization.Visualizer.draw_bboxes) 实现了绘制边界框的功能,[`draw_lines`](mmengine.visualization.Visualizer.draw_lines) 实现了绘制线条的功能。 +- 支持将可视化结果、学习率曲线、损失函数曲线以及验证精度曲线写入到各种后端中,包括本地磁盘以及常见的深度学习训练日志工具,例如 [TensorBoard](https://www.tensorflow.org/tensorboard) 和 [Wandb](https://wandb.ai/site)。 +- 支持在代码的任何位置调用以可视化或记录模型在训练或测试期间的中间状态,例如特征图和验证结果。 + +基于 MMEngine 的 `Visualizer`,MMDet 提供了各种预构建的可视化工具,用户可以通过简单地修改以下配置文件来使用它们。 + +- `tools/analysis_tools/browse_dataset.py` 脚本提供了一个数据集可视化功能,可以在数据经过数据转换后绘制图像和相应的注释,具体描述请参见[`browse_dataset.py`](useful_tools.md#Visualization)。 + +- MMEngine实现了`LoggerHook`,使用`Visualizer`将学习率、损失和评估结果写入由`Visualizer`设置的后端。因此,通过修改配置文件中的`Visualizer`后端,例如修改为`TensorBoardVISBackend`或`WandbVISBackend`,可以实现日志记录到常用的训练日志工具,如`TensorBoard`或`WandB`,从而方便用户使用这些可视化工具来分析和监控训练过程。 + +- 在MMDet中实现了`VisualizerHook`,它使用`Visualizer`将验证或预测阶段的预测结果可视化或存储到由`Visualizer`设置的后端。因此,通过修改配置文件中的`Visualizer`后端,例如修改为`TensorBoardVISBackend`或`WandbVISBackend`,可以将预测图像存储到`TensorBoard`或`Wandb`中。 + +## 配置 + +由于使用了注册机制,在MMDet中我们可以通过修改配置文件来设置`Visualizer`的行为。通常,我们会在`configs/_base_/default_runtime.py`中为可视化器定义默认配置,详细信息请参见[配置教程](config.md)。 + +```Python +vis_backends = [dict(type='LocalVisBackend')] +visualizer = dict( + type='DetLocalVisualizer', + vis_backends=vis_backends, + name='visualizer') +``` + +基于上面的例子,我们可以看到`Visualizer`的配置由两个主要部分组成,即`Visualizer`类型和其使用的可视化后端`vis_backends`。 + +- 用户可直接使用`DetLocalVisualizer`来可视化支持任务的标签或预测结果。 +- MMDet默认将可视化后端`vis_backend`设置为本地可视化后端`LocalVisBackend`,将所有可视化结果和其他训练信息保存在本地文件夹中。 + +## 存储 + +MMDet默认使用本地可视化后端[`LocalVisBackend`](mmengine.visualization.LocalVisBackend),`VisualizerHook`和`LoggerHook`中存储的模型损失、学习率、模型评估精度和可视化信息,包括损失、学习率、评估精度将默认保存到`{work_dir}/{config_name}/{time}/{vis_data}`文件夹中。此外,MMDet还支持其他常见的可视化后端,例如`TensorboardVisBackend`和`WandbVisBackend`,您只需要在配置文件中更改`vis_backends`类型为相应的可视化后端即可。例如,只需在配置文件中插入以下代码块即可将数据存储到`TensorBoard`和`Wandb`中。 + +```Python +# https://mmengine.readthedocs.io/en/latest/api/visualization.html +_base_.visualizer.vis_backends = [ + dict(type='LocalVisBackend'), # + dict(type='TensorboardVisBackend'), + dict(type='WandbVisBackend'),] +``` + +## 绘图 + +### 绘制预测结果 + +MMDet主要使用[`DetVisualizationHook`](mmdet.engine.hooks.DetVisualizationHook)来绘制验证和测试的预测结果,默认情况下`DetVisualizationHook`是关闭的,其默认配置如下。 + +```Python +visualization=dict( #用户可视化验证和测试结果 + type='DetVisualizationHook', + draw=False, + interval=1, + show=False) +``` + +以下表格展示了`DetVisualizationHook`支持的参数。 + +| 参数 | 描述 | +| :------: | :------------------------------------------------------------------------------: | +| draw | DetVisualizationHook通过enable参数打开和关闭,默认状态为关闭。 | +| interval | 控制在DetVisualizationHook启用时存储或显示验证或测试结果的间隔,单位为迭代次数。 | +| show | 控制是否可视化验证或测试的结果。 | + +如果您想在训练或测试期间启用 `DetVisualizationHook` 相关功能和配置,您只需要修改配置文件,以 `configs/rtmdet/rtmdet_tiny_8xb32-300e_coco.py` 为例,同时绘制注释和预测,并显示图像,配置文件可以修改如下: + +```Python +visualization = _base_.default_hooks.visualization +visualization.update(dict(draw=True, show=True)) +``` + +
+ +
+ +`test.py`程序提供了`--show`和`--show-dir`参数,可以在测试过程中可视化注释和预测结果,而不需要修改配置文件,从而进一步简化了测试过程。 + +```Shell +# 展示测试结果 +python tools/test.py configs/rtmdet/rtmdet_tiny_8xb32-300e_coco.py https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth --show + +# 指定存储预测结果的位置 +python tools/test.py configs/rtmdet/rtmdet_tiny_8xb32-300e_coco.py https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth --show-dir imgs/ +``` + +
+ +
From 7cff63f4106a309fb733d7140b85b437719ddb8a Mon Sep 17 00:00:00 2001 From: RangiLyu Date: Mon, 3 Apr 2023 14:31:09 +0800 Subject: [PATCH 33/38] [Refactor] Refactor WIDERFace dataset (#9970) Co-authored-by: luochunhua Co-authored-by: huanghaian --- configs/_base_/datasets/wider_face.py | 112 ++++++++++-------- .../retinanet_r50_fpn_1x_widerface.py | 10 ++ configs/wider_face/ssd300_24e_widerface.py | 26 ---- .../wider_face/ssd300_8xb32-24e_widerface.py | 64 ++++++++++ mmdet/datasets/wider_face.py | 92 +++++++++----- mmdet/datasets/xml_style.py | 34 +++++- .../WIDERFace/WIDER_train/0--Parade/.gitkeep | 0 .../Annotations/0_Parade_marchingband_1_5.xml | 1 + tests/data/WIDERFace/train.txt | 1 + tests/test_datasets/test_wider_face.py | 28 +++++ 10 files changed, 258 insertions(+), 110 deletions(-) create mode 100644 configs/wider_face/retinanet_r50_fpn_1x_widerface.py delete mode 100644 configs/wider_face/ssd300_24e_widerface.py create mode 100644 configs/wider_face/ssd300_8xb32-24e_widerface.py create mode 100644 tests/data/WIDERFace/WIDER_train/0--Parade/.gitkeep create mode 100644 tests/data/WIDERFace/WIDER_train/Annotations/0_Parade_marchingband_1_5.xml create mode 100644 tests/data/WIDERFace/train.txt create mode 100644 tests/test_datasets/test_wider_face.py diff --git a/configs/_base_/datasets/wider_face.py b/configs/_base_/datasets/wider_face.py index d1d649be42b..7042bc46e87 100644 --- a/configs/_base_/datasets/wider_face.py +++ b/configs/_base_/datasets/wider_face.py @@ -1,63 +1,73 @@ # dataset settings dataset_type = 'WIDERFaceDataset' data_root = 'data/WIDERFace/' -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/cityscapes/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# './data/': 's3://openmmlab/datasets/detection/', +# 'data/': 's3://openmmlab/datasets/detection/' +# })) +backend_args = None + +img_scale = (640, 640) # VGA resolution + train_pipeline = [ - dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True), - dict( - type='PhotoMetricDistortion', - brightness_delta=32, - contrast_range=(0.5, 1.5), - saturation_range=(0.5, 1.5), - hue_delta=18), - dict( - type='Expand', - mean=img_norm_cfg['mean'], - to_rgb=img_norm_cfg['to_rgb'], - ratio_range=(1, 4)), - dict( - type='MinIoURandomCrop', - min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), - min_crop_size=0.3), - dict(type='Resize', img_scale=(300, 300), keep_ratio=False), - dict(type='Normalize', **img_norm_cfg), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), + dict(type='Resize', scale=img_scale, keep_ratio=True), + dict(type='RandomFlip', prob=0.5), + dict(type='PackDetInputs') ] test_pipeline = [ - dict(type='LoadImageFromFile'), + dict(type='LoadImageFromFile', backend_args=backend_args), + dict(type='Resize', scale=img_scale, keep_ratio=True), + dict(type='LoadAnnotations', with_bbox=True), dict( - type='MultiScaleFlipAug', - img_scale=(300, 300), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=False), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) ] -data = dict( - samples_per_gpu=60, - workers_per_gpu=2, - train=dict( - type='RepeatDataset', - times=2, - dataset=dict( - type=dataset_type, - ann_file=data_root + 'train.txt', - img_prefix=data_root + 'WIDER_train/', - min_size=17, - pipeline=train_pipeline)), - val=dict( + +train_dataloader = dict( + batch_size=2, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=True), + batch_sampler=dict(type='AspectRatioBatchSampler'), + dataset=dict( type=dataset_type, - ann_file=data_root + 'val.txt', - img_prefix=data_root + 'WIDER_val/', - pipeline=test_pipeline), - test=dict( + data_root=data_root, + ann_file='train.txt', + data_prefix=dict(img='WIDER_train'), + filter_cfg=dict(filter_empty_gt=True, bbox_min_size=17, min_size=32), + pipeline=train_pipeline)) + +val_dataloader = dict( + batch_size=1, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( type=dataset_type, - ann_file=data_root + 'val.txt', - img_prefix=data_root + 'WIDER_val/', + data_root=data_root, + ann_file='val.txt', + data_prefix=dict(img='WIDER_val'), + test_mode=True, pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict( + # TODO: support WiderFace-Evaluation for easy, medium, hard cases + type='VOCMetric', + metric='mAP', + eval_mode='11points') +test_evaluator = val_evaluator diff --git a/configs/wider_face/retinanet_r50_fpn_1x_widerface.py b/configs/wider_face/retinanet_r50_fpn_1x_widerface.py new file mode 100644 index 00000000000..78067255f8f --- /dev/null +++ b/configs/wider_face/retinanet_r50_fpn_1x_widerface.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/wider_face.py', '../_base_/schedules/schedule_1x.py', + '../_base_/default_runtime.py' +] +# model settings +model = dict(bbox_head=dict(num_classes=1)) +# optimizer +optim_wrapper = dict( + optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)) diff --git a/configs/wider_face/ssd300_24e_widerface.py b/configs/wider_face/ssd300_24e_widerface.py deleted file mode 100644 index 0447f3032b9..00000000000 --- a/configs/wider_face/ssd300_24e_widerface.py +++ /dev/null @@ -1,26 +0,0 @@ -_base_ = [ - '../_base_/models/ssd300.py', '../_base_/datasets/wider_face.py', - '../_base_/default_runtime.py' -] -model = dict(bbox_head=dict(num_classes=1)) - -max_epochs = 24 -param_scheduler = [ - dict( - type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, - end=1000), - dict( - type='MultiStepLR', - begin=0, - end=max_epochs, - by_epoch=True, - milestones=[16, 20], - gamma=0.1) -] - -optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2)) - -train_cfg = dict( - type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1) -default_hooks = dict(logger=dict(interval=1)) -log_processor = dict(window_size=1) diff --git a/configs/wider_face/ssd300_8xb32-24e_widerface.py b/configs/wider_face/ssd300_8xb32-24e_widerface.py new file mode 100644 index 00000000000..02c3c927f78 --- /dev/null +++ b/configs/wider_face/ssd300_8xb32-24e_widerface.py @@ -0,0 +1,64 @@ +_base_ = [ + '../_base_/models/ssd300.py', '../_base_/datasets/wider_face.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_2x.py' +] +model = dict(bbox_head=dict(num_classes=1)) + +train_pipeline = [ + dict(type='LoadImageFromFile', backend_args=_base_.backend_args), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='Expand', + mean={{_base_.model.data_preprocessor.mean}}, + to_rgb={{_base_.model.data_preprocessor.bgr_to_rgb}}, + ratio_range=(1, 4)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict(type='Resize', scale=(300, 300), keep_ratio=False), + dict(type='RandomFlip', prob=0.5), + dict(type='PackDetInputs') +] + +test_pipeline = [ + dict(type='LoadImageFromFile', backend_args=_base_.backend_args), + dict(type='Resize', scale=(300, 300), keep_ratio=False), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] + +dataset_type = 'WIDERFaceDataset' +data_root = 'data/WIDERFace/' +train_dataloader = dict( + batch_size=32, num_workers=8, dataset=dict(pipeline=train_pipeline)) + +val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, + end=1000), + dict(type='MultiStepLR', by_epoch=True, milestones=[16, 20], gamma=0.1) +] + +# optimizer +optim_wrapper = dict( + optimizer=dict(lr=0.012, momentum=0.9, weight_decay=5e-4), + clip_grad=dict(max_norm=35, norm_type=2)) + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (32 samples per GPU) +auto_scale_lr = dict(base_batch_size=256) diff --git a/mmdet/datasets/wider_face.py b/mmdet/datasets/wider_face.py index 9edeb80eb55..62c7fff869a 100644 --- a/mmdet/datasets/wider_face.py +++ b/mmdet/datasets/wider_face.py @@ -2,9 +2,12 @@ import os.path as osp import xml.etree.ElementTree as ET -from mmengine.fileio import list_from_file +from mmengine.dist import is_main_process +from mmengine.fileio import get_local_path, list_from_file +from mmengine.utils import ProgressBar from mmdet.registry import DATASETS +from mmdet.utils.typing_utils import List, Union from .xml_style import XMLDataset @@ -17,36 +20,71 @@ class WIDERFaceDataset(XMLDataset): """ METAINFO = {'classes': ('face', ), 'palette': [(0, 255, 0)]} - def __init__(self, **kwargs): - super(WIDERFaceDataset, self).__init__(**kwargs) + def load_data_list(self) -> List[dict]: + """Load annotation from XML style ann_file. - def load_annotations(self, ann_file): - """Load annotation from WIDERFace XML style annotation file. + Returns: + list[dict]: Annotation info from XML file. + """ + assert self._metainfo.get('classes', None) is not None, \ + 'classes in `XMLDataset` can not be None.' + self.cat2label = { + cat: i + for i, cat in enumerate(self._metainfo['classes']) + } + + data_list = [] + img_ids = list_from_file(self.ann_file, backend_args=self.backend_args) + + # loading process takes around 10 mins + if is_main_process(): + prog_bar = ProgressBar(len(img_ids)) + + for img_id in img_ids: + raw_img_info = {} + raw_img_info['img_id'] = img_id + raw_img_info['file_name'] = f'{img_id}.jpg' + parsed_data_info = self.parse_data_info(raw_img_info) + data_list.append(parsed_data_info) + + if is_main_process(): + prog_bar.update() + return data_list + + def parse_data_info(self, img_info: dict) -> Union[dict, List[dict]]: + """Parse raw annotation to target format. Args: - ann_file (str): Path of XML file. + img_info (dict): Raw image information, usually it includes + `img_id`, `file_name`, and `xml_path`. Returns: - list[dict]: Annotation info from XML file. + Union[dict, List[dict]]: Parsed annotation. """ + data_info = {} + img_id = img_info['img_id'] + xml_path = osp.join(self.data_prefix['img'], 'Annotations', + f'{img_id}.xml') + data_info['img_id'] = img_id + data_info['xml_path'] = xml_path - data_infos = [] - img_ids = list_from_file(ann_file) - for img_id in img_ids: - filename = f'{img_id}.jpg' - xml_path = osp.join(self.img_prefix, 'Annotations', - f'{img_id}.xml') - tree = ET.parse(xml_path) - root = tree.getroot() - size = root.find('size') - width = int(size.find('width').text) - height = int(size.find('height').text) - folder = root.find('folder').text - data_infos.append( - dict( - id=img_id, - filename=osp.join(folder, filename), - width=width, - height=height)) - - return data_infos + # deal with xml file + with get_local_path( + xml_path, backend_args=self.backend_args) as local_path: + raw_ann_info = ET.parse(local_path) + root = raw_ann_info.getroot() + size = root.find('size') + width = int(size.find('width').text) + height = int(size.find('height').text) + folder = root.find('folder').text + img_path = osp.join(self.data_prefix['img'], folder, + img_info['file_name']) + data_info['img_path'] = img_path + + data_info['height'] = height + data_info['width'] = width + + # Coordinates are in range [0, width - 1 or height - 1] + data_info['instances'] = self._parse_instance_info( + raw_ann_info, minus_one=False) + return data_info diff --git a/mmdet/datasets/xml_style.py b/mmdet/datasets/xml_style.py index da0d2b261f1..f5a6d8ca9b9 100644 --- a/mmdet/datasets/xml_style.py +++ b/mmdet/datasets/xml_style.py @@ -106,6 +106,24 @@ def parse_data_info(self, img_info: dict) -> Union[dict, List[dict]]: data_info['height'] = height data_info['width'] = width + data_info['instances'] = self._parse_instance_info( + raw_ann_info, minus_one=True) + + return data_info + + def _parse_instance_info(self, + raw_ann_info: ET, + minus_one: bool = True) -> List[dict]: + """parse instance information. + + Args: + raw_ann_info (ElementTree): ElementTree object. + minus_one (bool): Whether to subtract 1 from the coordinates. + Defaults to True. + + Returns: + List[dict]: List of instances. + """ instances = [] for obj in raw_ann_info.findall('object'): instance = {} @@ -116,11 +134,16 @@ def parse_data_info(self, img_info: dict) -> Union[dict, List[dict]]: difficult = 0 if difficult is None else int(difficult.text) bnd_box = obj.find('bndbox') bbox = [ - int(float(bnd_box.find('xmin').text)) - 1, - int(float(bnd_box.find('ymin').text)) - 1, - int(float(bnd_box.find('xmax').text)) - 1, - int(float(bnd_box.find('ymax').text)) - 1 + int(float(bnd_box.find('xmin').text)), + int(float(bnd_box.find('ymin').text)), + int(float(bnd_box.find('xmax').text)), + int(float(bnd_box.find('ymax').text)) ] + + # VOC needs to subtract 1 from the coordinates + if minus_one: + bbox = [x - 1 for x in bbox] + ignore = False if self.bbox_min_size is not None: assert not self.test_mode @@ -135,8 +158,7 @@ def parse_data_info(self, img_info: dict) -> Union[dict, List[dict]]: instance['bbox'] = bbox instance['bbox_label'] = self.cat2label[name] instances.append(instance) - data_info['instances'] = instances - return data_info + return instances def filter_data(self) -> List[dict]: """Filter annotations according to filter_cfg. diff --git a/tests/data/WIDERFace/WIDER_train/0--Parade/.gitkeep b/tests/data/WIDERFace/WIDER_train/0--Parade/.gitkeep new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/data/WIDERFace/WIDER_train/Annotations/0_Parade_marchingband_1_5.xml b/tests/data/WIDERFace/WIDER_train/Annotations/0_Parade_marchingband_1_5.xml new file mode 100644 index 00000000000..a8577debcaf --- /dev/null +++ b/tests/data/WIDERFace/WIDER_train/Annotations/0_Parade_marchingband_1_5.xml @@ -0,0 +1 @@ +0--Parade0_Parade_marchingband_1_5.jpg./WIDER_train/images/0--Parade/0_Parade_marchingband_1_5.jpgUnknown102468330faceUnspecified00495177532228faceUnspecified00406203444251faceUnspecified00316213354255faceUnspecified00221226259268faceUnspecified002320855243faceUnspecified00919166960209faceUnspecified00784152822197faceUnspecified00714190753235faceUnspecified00594185626221faceUnspecified00146215179248 diff --git a/tests/data/WIDERFace/train.txt b/tests/data/WIDERFace/train.txt new file mode 100644 index 00000000000..e8f06ff7164 --- /dev/null +++ b/tests/data/WIDERFace/train.txt @@ -0,0 +1 @@ +0_Parade_marchingband_1_5 diff --git a/tests/test_datasets/test_wider_face.py b/tests/test_datasets/test_wider_face.py new file mode 100644 index 00000000000..aad3869b005 --- /dev/null +++ b/tests/test_datasets/test_wider_face.py @@ -0,0 +1,28 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import unittest + +import cv2 +import numpy as np + +from mmdet.datasets import WIDERFaceDataset + + +class TestWIDERFaceDataset(unittest.TestCase): + + def setUp(self) -> None: + img_path = 'tests/data/WIDERFace/WIDER_train/0--Parade/0_Parade_marchingband_1_5.jpg' # noqa: E501 + dummy_img = np.zeros((683, 1024, 3), dtype=np.uint8) + cv2.imwrite(img_path, dummy_img) + + def test_wider_face_dataset(self): + dataset = WIDERFaceDataset( + data_root='tests/data/WIDERFace', + ann_file='train.txt', + data_prefix=dict(img='WIDER_train'), + pipeline=[]) + dataset.full_init() + self.assertEqual(len(dataset), 1) + + data_list = dataset.load_data_list() + self.assertEqual(len(data_list), 1) + self.assertEqual(len(data_list[0]['instances']), 10) From 4e267f4d6b1f129c6e1949e41a46bbd6f112cd3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Haian=20Huang=28=E6=B7=B1=E5=BA=A6=E7=9C=B8=29?= <1286304229@qq.com> Date: Mon, 3 Apr 2023 14:50:50 +0800 Subject: [PATCH 34/38] Fix metainfo of softteacher (#10073) --- model-index.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/model-index.yml b/model-index.yml index 41f1491a02b..d810c14e03d 100644 --- a/model-index.yml +++ b/model-index.yml @@ -75,7 +75,6 @@ Import: - configs/seesaw_loss/metafile.yml - configs/simple_copy_paste/metafile.yml - configs/sparse_rcnn/metafile.yml - - configs/soft_teacher/metafile.yml - configs/solo/metafile.yml - configs/solov2/metafile.yml - configs/ssd/metafile.yml From 17a29a5211370e6b585fc5d7332f130dc3f984df Mon Sep 17 00:00:00 2001 From: vansin <60632596+vansinhu@users.noreply.github.com> Date: Mon, 3 Apr 2023 19:29:54 +0800 Subject: [PATCH 35/38] [Feature] Support Semi-automatic annotation Base Label-Studio (#10039) Co-authored-by: Range King --- docs/en/user_guides/index.rst | 1 + docs/en/user_guides/label_studio.md | 256 ++++++++++++++++++ docs/zh_cn/user_guides/index.rst | 1 + docs/zh_cn/user_guides/label_studio.md | 255 +++++++++++++++++ .../LabelStudio/backend_template/_wsgi.py | 145 ++++++++++ .../backend_template/mmdetection.py | 148 ++++++++++ projects/LabelStudio/readme.md | 3 + 7 files changed, 809 insertions(+) create mode 100644 docs/en/user_guides/label_studio.md create mode 100644 docs/zh_cn/user_guides/label_studio.md create mode 100644 projects/LabelStudio/backend_template/_wsgi.py create mode 100644 projects/LabelStudio/backend_template/mmdetection.py create mode 100644 projects/LabelStudio/readme.md diff --git a/docs/en/user_guides/index.rst b/docs/en/user_guides/index.rst index 0a9582a4c7d..7986451893b 100644 --- a/docs/en/user_guides/index.rst +++ b/docs/en/user_guides/index.rst @@ -32,3 +32,4 @@ Useful Tools visualization.md robustness_benchmarking.md deploy.md + label_studio.md diff --git a/docs/en/user_guides/label_studio.md b/docs/en/user_guides/label_studio.md new file mode 100644 index 00000000000..07a1e84a2e2 --- /dev/null +++ b/docs/en/user_guides/label_studio.md @@ -0,0 +1,256 @@ +# Semi-automatic Object Detection Annotation with MMDetection and Label-Studio + +Annotation data is a time-consuming and laborious task. This article introduces how to perform semi-automatic annotation using the RTMDet algorithm in MMDetection in conjunction with Label-Studio software. Specifically, using RTMDet to predict image annotations and then refining the annotations with Label-Studio. Community users can refer to this process and methodology and apply it to other fields. + +- RTMDet: RTMDet is a high-precision single-stage object detection algorithm developed by OpenMMLab, open-sourced in the MMDetection object detection toolbox. Its open-source license is Apache 2.0, and it can be used freely without restrictions by industrial users. + +- [Label Studio](https://github.com/heartexlabs/label-studio) is an excellent annotation software covering the functionality of dataset annotation in areas such as image classification, object detection, and segmentation. + +In this article, we will use [cat](https://download.openmmlab.com/mmyolo/data/cat_dataset.zip) images for semi-automatic annotation. + +## Environment Configuration + +To begin with, you need to create a virtual environment and then install PyTorch and MMCV. In this article, we will specify the versions of PyTorch and MMCV. Next, you can install MMDetection, Label-Studio, and label-studio-ml-backend using the following steps: + +Create a virtual environment: + +```shell +conda create -n rtmdet python=3.9 -y +conda activate rtmdet +``` + +Install PyTorch: + +```shell +# Linux and Windows CPU only +pip install torch==1.10.1+cpu torchvision==0.11.2+cpu torchaudio==0.10.1 -f https://download.pytorch.org/whl/cpu/torch_stable.html +# Linux and Windows CUDA 11.3 +pip install torch==1.10.1+cu113 torchvision==0.11.2+cu113 torchaudio==0.10.1 -f https://download.pytorch.org/whl/cu113/torch_stable.html +# OSX +pip install torch==1.10.1 torchvision==0.11.2 torchaudio==0.10.1 +``` + +Install MMCV: + +```shell +pip install -U openmim +mim install "mmcv>=2.0.0rc0" +# Installing mmcv will automatically install mmengine +``` + +Install MMDetection: + +```shell +git clone https://github.com/open-mmlab/mmdetection -b dev-3.x +cd mmdetection +pip install -v -e . +``` + +Install Label-Studio and label-studio-ml-backend: + +```shell +# Installing Label-Studio may take some time, if the version is not found, please use the official source +pip install label-studio==1.7.2 +pip install label-studio-ml==1.0.9 +``` + +Download the rtmdet weights: + +```shell +cd path/to/mmetection +mkdir work_dirs +cd work_dirs +wget https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_m_8xb32-300e_coco/rtmdet_m_8xb32-300e_coco_20220719_112220-229f527c.pth +``` + +## Start the Service + +Start the RTMDet backend inference service: + +```shell +cd path/to/mmetection + +label-studio-ml start projects/LabelStudio/backend_template --with \ +config_file=configs/rtmdet/rtmdet_m_8xb32-300e_coco.py \ +checkpoint_file=./work_dirs/rtmdet_m_8xb32-300e_coco_20220719_112220-229f527c.pth \ +device=cpu \ +--port 8003 +# Set device=cpu to use CPU inference, and replace cpu with cuda:0 to use GPU inference. +``` + +![](https://cdn.vansin.top/picgo20230330131601.png) + +The RTMDet backend inference service has now been started. To configure it in the Label-Studio web system, use http://localhost:8003 as the backend inference service. + +Now, start the Label-Studio web service: + +```shell +label-studio start +``` + +![](https://cdn.vansin.top/picgo20230330132913.png) + +Open your web browser and go to http://localhost:8080/ to see the Label-Studio interface. + +![](https://cdn.vansin.top/picgo20230330133118.png) + +Register a user and then create an RTMDet-Semiautomatic-Label project. + +![](https://cdn.vansin.top/picgo20230330133333.png) + +Download the example cat images by running the following command and import them using the Data Import button: + +```shell +cd path/to/mmetection +mkdir data && cd data + +wget https://download.openmmlab.com/mmyolo/data/cat_dataset.zip && unzip cat_dataset.zip +``` + +![](https://cdn.vansin.top/picgo20230330133628.png) + +![](https://cdn.vansin.top/picgo20230330133715.png) + +Then, select the Object Detection With Bounding Boxes template. + +![](https://cdn.vansin.top/picgo20230330133807.png) + +```shell +airplane +apple +backpack +banana +baseball_bat +baseball_glove +bear +bed +bench +bicycle +bird +boat +book +bottle +bowl +broccoli +bus +cake +car +carrot +cat +cell_phone +chair +clock +couch +cow +cup +dining_table +dog +donut +elephant +fire_hydrant +fork +frisbee +giraffe +hair_drier +handbag +horse +hot_dog +keyboard +kite +knife +laptop +microwave +motorcycle +mouse +orange +oven +parking_meter +person +pizza +potted_plant +refrigerator +remote +sandwich +scissors +sheep +sink +skateboard +skis +snowboard +spoon +sports_ball +stop_sign +suitcase +surfboard +teddy_bear +tennis_racket +tie +toaster +toilet +toothbrush +traffic_light +train +truck +tv +umbrella +vase +wine_glass +zebra +``` + +Then, copy and add the above categories to Label-Studio and click Save. + +![](https://cdn.vansin.top/picgo20230330134027.png) + +In the Settings, click Add Model to add the RTMDet backend inference service. + +![](https://cdn.vansin.top/picgo20230330134320.png) + +Click Validate and Save, and then click Start Labeling. + +![](https://cdn.vansin.top/picgo20230330134424.png) + +If you see Connected as shown below, the backend inference service has been successfully added. + +![](https://cdn.vansin.top/picgo20230330134554.png) + +## Start Semi-Automatic Labeling + +Click on Label to start labeling. + +![](https://cdn.vansin.top/picgo20230330134804.png) + +We can see that the RTMDet backend inference service has successfully returned the predicted results and displayed them on the image. However, we noticed that the predicted bounding boxes for the cats are a bit too large and not very accurate. + +![](https://cdn.vansin.top/picgo20230403104419.png) + +We manually adjust the position of the cat bounding box, and then click Submit to complete the annotation of this image. + +![](https://cdn.vansin.top/picgo/20230403105923.png) + +After submitting all images, click export to export the labeled dataset in COCO format. + +![](https://cdn.vansin.top/picgo20230330135921.png) + +Use VS Code to open the unzipped folder to see the labeled dataset, which includes the images and the annotation files in JSON format. + +![](https://cdn.vansin.top/picgo20230330140321.png) + +At this point, the semi-automatic labeling is complete. We can use this dataset to train a more accurate model in MMDetection and then continue semi-automatic labeling on newly collected images with this model. This way, we can iteratively expand the high-quality dataset and improve the accuracy of the model. + +## Use MMYOLO as the Backend Inference Service + +If you want to use Label-Studio in MMYOLO, you can refer to replacing the config_file and checkpoint_file with the configuration file and weight file of MMYOLO when starting the backend inference service. + +```shell +cd path/to/mmetection + +label-studio-ml start projects/LabelStudio/backend_template --with \ +config_file= path/to/mmyolo_config.py \ +checkpoint_file= path/to/mmyolo_weights.pth \ +device=cpu \ +--port 8003 +# device=cpu is for using CPU inference. If using GPU inference, replace cpu with cuda:0. +``` + +Rotation object detection and instance segmentation are still under development, please stay tuned. diff --git a/docs/zh_cn/user_guides/index.rst b/docs/zh_cn/user_guides/index.rst index 0c413db58f0..5abc50ad1cd 100644 --- a/docs/zh_cn/user_guides/index.rst +++ b/docs/zh_cn/user_guides/index.rst @@ -31,3 +31,4 @@ MMDetection 在 `Model Zoo =2.0.0rc0" +# 安装 mmcv 的过程中会自动安装 mmengine +``` + +安装 MMDetection + +```shell +git clone https://github.com/open-mmlab/mmdetection -b dev-3.x +cd mmdetection +pip install -v -e . +``` + +安装 Label-Studio 和 label-studio-ml-backend + +```shell +# 安装 label-studio 需要一段时间,如果找不到版本请使用官方源 +pip install label-studio==1.7.2 +pip install label-studio-ml==1.0.9 +``` + +下载rtmdet权重 + +```shell +cd path/to/mmetection +mkdir work_dirs +cd work_dirs +wget https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_m_8xb32-300e_coco/rtmdet_m_8xb32-300e_coco_20220719_112220-229f527c.pth +``` + +## 启动服务 + +启动 RTMDet 后端推理服务: + +```shell +cd path/to/mmetection + +label-studio-ml start projects/LabelStudio/backend_template --with \ +config_file=configs/rtmdet/rtmdet_m_8xb32-300e_coco.py \ +checkpoint_file=./work_dirs/rtmdet_m_8xb32-300e_coco_20220719_112220-229f527c.pth \ +device=cpu \ +--port 8003 +# device=cpu 为使用 CPU 推理,如果使用 GPU 推理,将 cpu 替换为 cuda:0 +``` + +![](https://cdn.vansin.top/picgo20230330131601.png) + +此时,RTMDet 后端推理服务已经启动,后续在 Label-Studio Web 系统中配置 http://localhost:8003 后端推理服务即可。 + +现在启动 Label-Studio 网页服务: + +```shell +label-studio start +``` + +![](https://cdn.vansin.top/picgo20230330132913.png) + +打开浏览器访问 [http://localhost:8080/](http://localhost:8080/) 即可看到 Label-Studio 的界面。 + +![](https://cdn.vansin.top/picgo20230330133118.png) + +我们注册一个用户,然后创建一个 RTMDet-Semiautomatic-Label 项目。 + +![](https://cdn.vansin.top/picgo20230330133333.png) + +我们通过下面的方式下载好示例的喵喵图片,点击 Data Import 导入需要标注的猫图片。 + +```shell +cd path/to/mmetection +mkdir data && cd data + +wget https://download.openmmlab.com/mmyolo/data/cat_dataset.zip && unzip cat_dataset.zip +``` + +![](https://cdn.vansin.top/picgo20230330133628.png) + +![](https://cdn.vansin.top/picgo20230330133715.png) + +然后选择 Object Detection With Bounding Boxes 模板 + +![](https://cdn.vansin.top/picgo20230330133807.png) + +```shell +airplane +apple +backpack +banana +baseball_bat +baseball_glove +bear +bed +bench +bicycle +bird +boat +book +bottle +bowl +broccoli +bus +cake +car +carrot +cat +cell_phone +chair +clock +couch +cow +cup +dining_table +dog +donut +elephant +fire_hydrant +fork +frisbee +giraffe +hair_drier +handbag +horse +hot_dog +keyboard +kite +knife +laptop +microwave +motorcycle +mouse +orange +oven +parking_meter +person +pizza +potted_plant +refrigerator +remote +sandwich +scissors +sheep +sink +skateboard +skis +snowboard +spoon +sports_ball +stop_sign +suitcase +surfboard +teddy_bear +tennis_racket +tie +toaster +toilet +toothbrush +traffic_light +train +truck +tv +umbrella +vase +wine_glass +zebra +``` + +然后将上述类别复制添加到 Label-Studio,然后点击 Save。 + +![](https://cdn.vansin.top/picgo20230330134027.png) + +然后在设置中点击 Add Model 添加 RTMDet 后端推理服务。 + +![](https://cdn.vansin.top/picgo20230330134320.png) + +点击 Validate and Save,然后点击 Start Labeling。 + +![](https://cdn.vansin.top/picgo20230330134424.png) + +看到如下 Connected 就说明后端推理服务添加成功。 + +![](https://cdn.vansin.top/picgo20230330134554.png) + +## 开始半自动化标注 + +点击 Label 开始标注 + +![](https://cdn.vansin.top/picgo20230330134804.png) + +我们可以看到 RTMDet 后端推理服务已经成功返回了预测结果并显示在图片上,我们可以发现这个喵喵预测的框有点大。 + +![](https://cdn.vansin.top/picgo20230403104419.png) + +我们手工拖动框,修正一下框的位置,得到以下修正过后的标注,然后点击 Submit,本张图片就标注完毕了。 + +![](https://cdn.vansin.top/picgo/20230403105923.png) + +我们 submit 完毕所有图片后,点击 exprot 导出 COCO 格式的数据集,就能把标注好的数据集的压缩包导出来了。 + +![](https://cdn.vansin.top/picgo20230330135921.png) + +用 vscode 打开解压后的文件夹,可以看到标注好的数据集,包含了图片和 json 格式的标注文件。 + +![](https://cdn.vansin.top/picgo20230330140321.png) + +到此半自动化标注就完成了,我们可以用这个数据集在 MMDetection 训练精度更高的模型了,训练出更好的模型,然后再用这个模型继续半自动化标注新采集的图片,这样就可以不断迭代,扩充高质量数据集,提高模型的精度。 + +## 使用 MMYOLO 作为后端推理服务 + +如果想在 MMYOLO 中使用 Label-Studio,可以参考在启动后端推理服务时,将 config_file 和 checkpoint_file 替换为 MMYOLO 的配置文件和权重文件即可。 + +```shell +cd path/to/mmetection + +label-studio-ml start projects/LabelStudio/backend_template --with \ +config_file= path/to/mmyolo_config.py \ +checkpoint_file= path/to/mmyolo_weights.pth \ +device=cpu \ +--port 8003 +# device=cpu 为使用 CPU 推理,如果使用 GPU 推理,将 cpu 替换为 cuda:0 +``` + +旋转目标检测和实例分割还在支持中,敬请期待。 diff --git a/projects/LabelStudio/backend_template/_wsgi.py b/projects/LabelStudio/backend_template/_wsgi.py new file mode 100644 index 00000000000..1f8fb68cdf8 --- /dev/null +++ b/projects/LabelStudio/backend_template/_wsgi.py @@ -0,0 +1,145 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import json +import logging +import logging.config +import os + +logging.config.dictConfig({ + 'version': 1, + 'formatters': { + 'standard': { + 'format': + '[%(asctime)s] [%(levelname)s] [%(name)s::%(funcName)s::%(lineno)d] %(message)s' # noqa E501 + } + }, + 'handlers': { + 'console': { + 'class': 'logging.StreamHandler', + 'level': 'DEBUG', + 'stream': 'ext://sys.stdout', + 'formatter': 'standard' + } + }, + 'root': { + 'level': 'ERROR', + 'handlers': ['console'], + 'propagate': True + } +}) + +_DEFAULT_CONFIG_PATH = os.path.join(os.path.dirname(__file__), 'config.json') + + +def get_kwargs_from_config(config_path=_DEFAULT_CONFIG_PATH): + if not os.path.exists(config_path): + return dict() + with open(config_path) as f: + config = json.load(f) + assert isinstance(config, dict) + return config + + +if __name__ == '__main__': + + from label_studio_ml.api import init_app + + from projects.LabelStudio.backend_template.mmdetection import MMDetection + + parser = argparse.ArgumentParser(description='Label studio') + parser.add_argument( + '-p', + '--port', + dest='port', + type=int, + default=9090, + help='Server port') + parser.add_argument( + '--host', dest='host', type=str, default='0.0.0.0', help='Server host') + parser.add_argument( + '--kwargs', + '--with', + dest='kwargs', + metavar='KEY=VAL', + nargs='+', + type=lambda kv: kv.split('='), + help='Additional LabelStudioMLBase model initialization kwargs') + parser.add_argument( + '-d', + '--debug', + dest='debug', + action='store_true', + help='Switch debug mode') + parser.add_argument( + '--log-level', + dest='log_level', + choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], + default=None, + help='Logging level') + parser.add_argument( + '--model-dir', + dest='model_dir', + default=os.path.dirname(__file__), + help='Directory models are store', + ) + parser.add_argument( + '--check', + dest='check', + action='store_true', + help='Validate model instance before launching server') + + args = parser.parse_args() + + # setup logging level + if args.log_level: + logging.root.setLevel(args.log_level) + + def isfloat(value): + try: + float(value) + return True + except ValueError: + return False + + def parse_kwargs(): + param = dict() + for k, v in args.kwargs: + if v.isdigit(): + param[k] = int(v) + elif v == 'True' or v == 'true': + param[k] = True + elif v == 'False' or v == 'False': + param[k] = False + elif isfloat(v): + param[k] = float(v) + else: + param[k] = v + return param + + kwargs = get_kwargs_from_config() + + if args.kwargs: + kwargs.update(parse_kwargs()) + + if args.check: + print('Check "' + MMDetection.__name__ + '" instance creation..') + model = MMDetection(**kwargs) + + app = init_app( + model_class=MMDetection, + model_dir=os.environ.get('MODEL_DIR', args.model_dir), + redis_queue=os.environ.get('RQ_QUEUE_NAME', 'default'), + redis_host=os.environ.get('REDIS_HOST', 'localhost'), + redis_port=os.environ.get('REDIS_PORT', 6379), + **kwargs) + + app.run(host=args.host, port=args.port, debug=args.debug) + +else: + # for uWSGI use + app = init_app( + model_class=MMDetection, + model_dir=os.environ.get('MODEL_DIR', os.path.dirname(__file__)), + redis_queue=os.environ.get('RQ_QUEUE_NAME', 'default'), + redis_host=os.environ.get('REDIS_HOST', 'localhost'), + redis_port=os.environ.get('REDIS_PORT', 6379)) diff --git a/projects/LabelStudio/backend_template/mmdetection.py b/projects/LabelStudio/backend_template/mmdetection.py new file mode 100644 index 00000000000..f25e80e8fc9 --- /dev/null +++ b/projects/LabelStudio/backend_template/mmdetection.py @@ -0,0 +1,148 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import io +import json +import logging +import os +from urllib.parse import urlparse + +import boto3 +from botocore.exceptions import ClientError +from label_studio_ml.model import LabelStudioMLBase +from label_studio_ml.utils import (DATA_UNDEFINED_NAME, get_image_size, + get_single_tag_keys) +from label_studio_tools.core.utils.io import get_data_dir + +from mmdet.apis import inference_detector, init_detector + +logger = logging.getLogger(__name__) + + +class MMDetection(LabelStudioMLBase): + """Object detector based on https://github.com/open-mmlab/mmdetection.""" + + def __init__(self, + config_file=None, + checkpoint_file=None, + image_dir=None, + labels_file=None, + score_threshold=0.5, + device='cpu', + **kwargs): + + super(MMDetection, self).__init__(**kwargs) + config_file = config_file or os.environ['config_file'] + checkpoint_file = checkpoint_file or os.environ['checkpoint_file'] + self.config_file = config_file + self.checkpoint_file = checkpoint_file + self.labels_file = labels_file + # default Label Studio image upload folder + upload_dir = os.path.join(get_data_dir(), 'media', 'upload') + self.image_dir = image_dir or upload_dir + logger.debug( + f'{self.__class__.__name__} reads images from {self.image_dir}') + if self.labels_file and os.path.exists(self.labels_file): + self.label_map = json_load(self.labels_file) + else: + self.label_map = {} + + self.from_name, self.to_name, self.value, self.labels_in_config = get_single_tag_keys( # noqa E501 + self.parsed_label_config, 'RectangleLabels', 'Image') + schema = list(self.parsed_label_config.values())[0] + self.labels_in_config = set(self.labels_in_config) + + # Collect label maps from `predicted_values="airplane,car"` attribute in