From cf0a3e8fc6b4d835f5f50b9c000ab4a3812950e1 Mon Sep 17 00:00:00 2001 From: Hongyuan Zhang <66273343+Alias-z@users.noreply.github.com> Date: Fri, 31 May 2024 20:30:02 +0200 Subject: [PATCH 1/8] Fix TypeError: 'GeometryCollection' object is not subscriptable when slicing COCO Solution to https://github.com/obss/sahi/discussions/1011 by adding a filter for Polygon only --- sahi/utils/coco.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/sahi/utils/coco.py b/sahi/utils/coco.py index e84f2f3d3..a7cc76e4b 100644 --- a/sahi/utils/coco.py +++ b/sahi/utils/coco.py @@ -14,7 +14,7 @@ from typing import Dict, List, Optional, Set, Union import numpy as np -from shapely import MultiPolygon +from shapely import MultiPolygon, Polygon, GeometryCollection from shapely.validation import make_valid from tqdm import tqdm @@ -224,12 +224,29 @@ def __init__( self._shapely_annotation = shapely_annotation def get_sliced_coco_annotation(self, slice_bbox: List[int]): + + def filter_polygons(geometry): + """ + This function checks if the geometry is a Polygon or MultiPolygon and filters accordingly. + It returns a MultiPolygon made only from Polygon components. + """ + if isinstance(geometry, Polygon): + return MultiPolygon([geometry]) + elif isinstance(geometry, MultiPolygon): + return geometry + elif isinstance(geometry, GeometryCollection): + polygons = [geom for geom in geometry.geoms if isinstance(geom, Polygon)] + if polygons: + return MultiPolygon(polygons) + return MultiPolygon() # Return an empty MultiPolygon if no Polygon geometries are found + shapely_polygon = box(slice_bbox[0], slice_bbox[1], slice_bbox[2], slice_bbox[3]) samp = self._shapely_annotation.multipolygon if not samp.is_valid: valid = make_valid(samp) - if not isinstance(valid, MultiPolygon): - valid = MultiPolygon([valid]) + valid = filter_polygons(valid) + # if not isinstance(valid, MultiPolygon): + # valid = MultiPolygon([valid]) self._shapely_annotation.multipolygon = valid intersection_shapely_annotation = self._shapely_annotation.get_intersection(shapely_polygon) return CocoAnnotation.from_shapely_annotation( From 0afbf5be30ca1cd46f89c30b767c8815f153881b Mon Sep 17 00:00:00 2001 From: Hongyuan Zhang <66273343+Alias-z@users.noreply.github.com> Date: Sun, 2 Jun 2024 20:00:30 +0200 Subject: [PATCH 2/8] Test lint fixing with isort and black --- sahi/utils/coco.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/sahi/utils/coco.py b/sahi/utils/coco.py index a7cc76e4b..de7d11eb8 100644 --- a/sahi/utils/coco.py +++ b/sahi/utils/coco.py @@ -14,7 +14,7 @@ from typing import Dict, List, Optional, Set, Union import numpy as np -from shapely import MultiPolygon, Polygon, GeometryCollection +from shapely import GeometryCollection, MultiPolygon, Polygon from shapely.validation import make_valid from tqdm import tqdm @@ -227,8 +227,10 @@ def get_sliced_coco_annotation(self, slice_bbox: List[int]): def filter_polygons(geometry): """ - This function checks if the geometry is a Polygon or MultiPolygon and filters accordingly. - It returns a MultiPolygon made only from Polygon components. + Filters out and returns only Polygon or MultiPolygon components of a geometry. + If geometry is a Polygon, it converts it into a MultiPolygon. + If it's a GeometryCollection, it filters to create a MultiPolygon from any Polygons in the collection. + Returns an empty MultiPolygon if no Polygon or MultiPolygon components are found. """ if isinstance(geometry, Polygon): return MultiPolygon([geometry]) @@ -236,17 +238,14 @@ def filter_polygons(geometry): return geometry elif isinstance(geometry, GeometryCollection): polygons = [geom for geom in geometry.geoms if isinstance(geom, Polygon)] - if polygons: - return MultiPolygon(polygons) - return MultiPolygon() # Return an empty MultiPolygon if no Polygon geometries are found + return MultiPolygon(polygons) if polygons else MultiPolygon() + return MultiPolygon() shapely_polygon = box(slice_bbox[0], slice_bbox[1], slice_bbox[2], slice_bbox[3]) samp = self._shapely_annotation.multipolygon if not samp.is_valid: valid = make_valid(samp) valid = filter_polygons(valid) - # if not isinstance(valid, MultiPolygon): - # valid = MultiPolygon([valid]) self._shapely_annotation.multipolygon = valid intersection_shapely_annotation = self._shapely_annotation.get_intersection(shapely_polygon) return CocoAnnotation.from_shapely_annotation( From fad86cfdf97b1649bb22ecabaf5086b088e817ba Mon Sep 17 00:00:00 2001 From: Hongyuan Zhang <66273343+Alias-z@users.noreply.github.com> Date: Sun, 2 Jun 2024 22:53:28 +0200 Subject: [PATCH 3/8] Attempt with run_code_style format --- sahi/utils/coco.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sahi/utils/coco.py b/sahi/utils/coco.py index de7d11eb8..942561cb7 100644 --- a/sahi/utils/coco.py +++ b/sahi/utils/coco.py @@ -224,7 +224,6 @@ def __init__( self._shapely_annotation = shapely_annotation def get_sliced_coco_annotation(self, slice_bbox: List[int]): - def filter_polygons(geometry): """ Filters out and returns only Polygon or MultiPolygon components of a geometry. From edc260a8d17b4b95f1bac0c9ee1f125f64bace4b Mon Sep 17 00:00:00 2001 From: Hongyuan Zhang <66273343+Alias-z@users.noreply.github.com> Date: Tue, 4 Jun 2024 10:05:50 +0200 Subject: [PATCH 4/8] Update has_mask method for mmdet models --- sahi/models/mmdet.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sahi/models/mmdet.py b/sahi/models/mmdet.py index c64ea87c7..fe3e13688 100644 --- a/sahi/models/mmdet.py +++ b/sahi/models/mmdet.py @@ -190,8 +190,9 @@ def has_mask(self): """ Returns if model output contains segmentation mask """ - has_mask = self.model.model.with_mask - return has_mask + # has_mask = self.model.model.with_mask + dataloader = self.model.cfg["train_dataloader"]["dataset"]["dataset"]["pipeline"] + return any(isinstance(item, dict) and item.get("with_mask", False) for item in dataloader) @property def category_names(self): From 68ad269e2354120c07b040ff714cfcf9563e087e Mon Sep 17 00:00:00 2001 From: Hongyuan Zhang <66273343+Alias-z@users.noreply.github.com> Date: Tue, 4 Jun 2024 22:34:00 +0200 Subject: [PATCH 5/8] update mmdet test configs --- .../cascade-mask-rcnn_r50_fpn_1x_coco.py | 478 ++++++++++++++- .../retinanet/retinanet_r50_fpn_1x_coco.py | 379 +++++++++++- .../models/mmdet/retinanet/retinanet_tta.py | 28 - .../mmdet/yolox/yolox_s_8xb8-300e_coco.py | 219 ------- .../mmdet/yolox/yolox_tiny_8x8_300e_coco.py | 548 ++++++++++++++++++ .../mmdet/yolox/yolox_tiny_8xb8-300e_coco.py | 43 -- tests/data/models/mmdet/yolox/yolox_tta.py | 37 -- .../cascade-mask-rcnn_r50_fpn.py | 167 ------ .../cascade-mask-rcnn_r50_fpn_1x_coco.py | 6 - .../cascade_mask_rcnn_r50_fpn.py | 215 ------- .../cascade_mask_rcnn_r50_fpn_1x_coco.py | 1 - .../cascade_mask_rcnn_r50_fpn_1x_coco_v280.py | 1 - .../cascade_mask_rcnn_r50_fpn_v280.py | 215 ------- .../mmdet_cascade_mask_rcnn/coco_instance.py | 52 -- .../default_runtime.py | 15 - .../mmdet_cascade_mask_rcnn/schedule_1x.py | 6 - .../models/mmdet_retinanet/coco_detection.py | 52 -- .../models/mmdet_retinanet/default_runtime.py | 15 - .../mmdet_retinanet/retinanet_r50_fpn.py | 50 -- .../retinanet_r50_fpn_1x_coco.py | 3 - .../retinanet_r50_fpn_1x_coco_v280.py | 3 - .../mmdet_retinanet/retinanet_r50_fpn_v280.py | 48 -- .../models/mmdet_retinanet/schedule_1x.py | 6 - .../mmdet_yolox/yolox_tiny_8x8_300e_coco.py | 163 ------ 24 files changed, 1390 insertions(+), 1360 deletions(-) delete mode 100644 tests/data/models/mmdet/retinanet/retinanet_tta.py delete mode 100644 tests/data/models/mmdet/yolox/yolox_s_8xb8-300e_coco.py create mode 100644 tests/data/models/mmdet/yolox/yolox_tiny_8x8_300e_coco.py delete mode 100644 tests/data/models/mmdet/yolox/yolox_tiny_8xb8-300e_coco.py delete mode 100644 tests/data/models/mmdet/yolox/yolox_tta.py delete mode 100644 tests/data/models/mmdet_cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn.py delete mode 100644 tests/data/models/mmdet_cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn_1x_coco.py delete mode 100644 tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn.py delete mode 100644 tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py delete mode 100644 tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco_v280.py delete mode 100644 tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_v280.py delete mode 100644 tests/data/models/mmdet_cascade_mask_rcnn/coco_instance.py delete mode 100644 tests/data/models/mmdet_cascade_mask_rcnn/default_runtime.py delete mode 100644 tests/data/models/mmdet_cascade_mask_rcnn/schedule_1x.py delete mode 100644 tests/data/models/mmdet_retinanet/coco_detection.py delete mode 100644 tests/data/models/mmdet_retinanet/default_runtime.py delete mode 100644 tests/data/models/mmdet_retinanet/retinanet_r50_fpn.py delete mode 100644 tests/data/models/mmdet_retinanet/retinanet_r50_fpn_1x_coco.py delete mode 100644 tests/data/models/mmdet_retinanet/retinanet_r50_fpn_1x_coco_v280.py delete mode 100644 tests/data/models/mmdet_retinanet/retinanet_r50_fpn_v280.py delete mode 100644 tests/data/models/mmdet_retinanet/schedule_1x.py delete mode 100644 tests/data/models/mmdet_yolox/yolox_tiny_8x8_300e_coco.py diff --git a/tests/data/models/mmdet/cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn_1x_coco.py b/tests/data/models/mmdet/cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn_1x_coco.py index 77b3ebac4..c06d9fc57 100644 --- a/tests/data/models/mmdet/cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn_1x_coco.py +++ b/tests/data/models/mmdet/cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn_1x_coco.py @@ -1,6 +1,474 @@ -_base_ = [ - "../_base_/models/cascade-mask-rcnn_r50_fpn.py", - "../_base_/datasets/coco_instance.py", - "../_base_/schedules/schedule_1x.py", - "../_base_/default_runtime.py", +auto_scale_lr = dict(base_batch_size=16, enable=False) +backend_args = None +data_root = "data/coco/" +dataset_type = "CocoDataset" +default_hooks = dict( + checkpoint=dict(interval=1, type="CheckpointHook"), + logger=dict(interval=50, type="LoggerHook"), + param_scheduler=dict(type="ParamSchedulerHook"), + sampler_seed=dict(type="DistSamplerSeedHook"), + timer=dict(type="IterTimerHook"), + visualization=dict(type="DetVisualizationHook"), +) +default_scope = "mmdet" +env_cfg = dict( + cudnn_benchmark=False, dist_cfg=dict(backend="nccl"), mp_cfg=dict(mp_start_method="fork", opencv_num_threads=0) +) +load_from = None +log_level = "INFO" +log_processor = dict(by_epoch=True, type="LogProcessor", window_size=50) +model = dict( + backbone=dict( + depth=50, + frozen_stages=1, + init_cfg=dict(checkpoint="torchvision://resnet50", type="Pretrained"), + norm_cfg=dict(requires_grad=True, type="BN"), + norm_eval=True, + num_stages=4, + out_indices=( + 0, + 1, + 2, + 3, + ), + style="pytorch", + type="ResNet", + ), + data_preprocessor=dict( + bgr_to_rgb=True, + mean=[ + 123.675, + 116.28, + 103.53, + ], + pad_mask=True, + pad_size_divisor=32, + std=[ + 58.395, + 57.12, + 57.375, + ], + type="DetDataPreprocessor", + ), + neck=dict( + in_channels=[ + 256, + 512, + 1024, + 2048, + ], + num_outs=5, + out_channels=256, + type="FPN", + ), + roi_head=dict( + bbox_head=[ + dict( + bbox_coder=dict( + target_means=[ + 0.0, + 0.0, + 0.0, + 0.0, + ], + target_stds=[ + 0.1, + 0.1, + 0.2, + 0.2, + ], + type="DeltaXYWHBBoxCoder", + ), + fc_out_channels=1024, + in_channels=256, + loss_bbox=dict(beta=1.0, loss_weight=1.0, type="SmoothL1Loss"), + loss_cls=dict(loss_weight=1.0, type="CrossEntropyLoss", use_sigmoid=False), + num_classes=80, + reg_class_agnostic=True, + roi_feat_size=7, + type="Shared2FCBBoxHead", + ), + dict( + bbox_coder=dict( + target_means=[ + 0.0, + 0.0, + 0.0, + 0.0, + ], + target_stds=[ + 0.05, + 0.05, + 0.1, + 0.1, + ], + type="DeltaXYWHBBoxCoder", + ), + fc_out_channels=1024, + in_channels=256, + loss_bbox=dict(beta=1.0, loss_weight=1.0, type="SmoothL1Loss"), + loss_cls=dict(loss_weight=1.0, type="CrossEntropyLoss", use_sigmoid=False), + num_classes=80, + reg_class_agnostic=True, + roi_feat_size=7, + type="Shared2FCBBoxHead", + ), + dict( + bbox_coder=dict( + target_means=[ + 0.0, + 0.0, + 0.0, + 0.0, + ], + target_stds=[ + 0.033, + 0.033, + 0.067, + 0.067, + ], + type="DeltaXYWHBBoxCoder", + ), + fc_out_channels=1024, + in_channels=256, + loss_bbox=dict(beta=1.0, loss_weight=1.0, type="SmoothL1Loss"), + loss_cls=dict(loss_weight=1.0, type="CrossEntropyLoss", use_sigmoid=False), + num_classes=80, + reg_class_agnostic=True, + roi_feat_size=7, + type="Shared2FCBBoxHead", + ), + ], + bbox_roi_extractor=dict( + featmap_strides=[ + 4, + 8, + 16, + 32, + ], + out_channels=256, + roi_layer=dict(output_size=7, sampling_ratio=0, type="RoIAlign"), + type="SingleRoIExtractor", + ), + mask_head=dict( + conv_out_channels=256, + in_channels=256, + loss_mask=dict(loss_weight=1.0, type="CrossEntropyLoss", use_mask=True), + num_classes=80, + num_convs=4, + type="FCNMaskHead", + ), + mask_roi_extractor=dict( + featmap_strides=[ + 4, + 8, + 16, + 32, + ], + out_channels=256, + roi_layer=dict(output_size=14, sampling_ratio=0, type="RoIAlign"), + type="SingleRoIExtractor", + ), + num_stages=3, + stage_loss_weights=[ + 1, + 0.5, + 0.25, + ], + type="CascadeRoIHead", + ), + rpn_head=dict( + anchor_generator=dict( + ratios=[ + 0.5, + 1.0, + 2.0, + ], + scales=[ + 8, + ], + strides=[ + 4, + 8, + 16, + 32, + 64, + ], + type="AnchorGenerator", + ), + bbox_coder=dict( + target_means=[ + 0.0, + 0.0, + 0.0, + 0.0, + ], + target_stds=[ + 1.0, + 1.0, + 1.0, + 1.0, + ], + type="DeltaXYWHBBoxCoder", + ), + feat_channels=256, + in_channels=256, + loss_bbox=dict(beta=0.1111111111111111, loss_weight=1.0, type="SmoothL1Loss"), + loss_cls=dict(loss_weight=1.0, type="CrossEntropyLoss", use_sigmoid=True), + type="RPNHead", + ), + test_cfg=dict( + rcnn=dict(mask_thr_binary=0.5, max_per_img=100, nms=dict(iou_threshold=0.5, type="nms"), score_thr=0.05), + rpn=dict(max_per_img=1000, min_bbox_size=0, nms=dict(iou_threshold=0.7, type="nms"), nms_pre=1000), + ), + train_cfg=dict( + rcnn=[ + dict( + assigner=dict( + ignore_iof_thr=-1, + match_low_quality=False, + min_pos_iou=0.5, + neg_iou_thr=0.5, + pos_iou_thr=0.5, + type="MaxIoUAssigner", + ), + debug=False, + mask_size=28, + pos_weight=-1, + sampler=dict(add_gt_as_proposals=True, neg_pos_ub=-1, num=512, pos_fraction=0.25, type="RandomSampler"), + ), + dict( + assigner=dict( + ignore_iof_thr=-1, + match_low_quality=False, + min_pos_iou=0.6, + neg_iou_thr=0.6, + pos_iou_thr=0.6, + type="MaxIoUAssigner", + ), + debug=False, + mask_size=28, + pos_weight=-1, + sampler=dict(add_gt_as_proposals=True, neg_pos_ub=-1, num=512, pos_fraction=0.25, type="RandomSampler"), + ), + dict( + assigner=dict( + ignore_iof_thr=-1, + match_low_quality=False, + min_pos_iou=0.7, + neg_iou_thr=0.7, + pos_iou_thr=0.7, + type="MaxIoUAssigner", + ), + debug=False, + mask_size=28, + pos_weight=-1, + sampler=dict(add_gt_as_proposals=True, neg_pos_ub=-1, num=512, pos_fraction=0.25, type="RandomSampler"), + ), + ], + rpn=dict( + allowed_border=0, + assigner=dict( + ignore_iof_thr=-1, + match_low_quality=True, + min_pos_iou=0.3, + neg_iou_thr=0.3, + pos_iou_thr=0.7, + type="MaxIoUAssigner", + ), + debug=False, + pos_weight=-1, + sampler=dict(add_gt_as_proposals=False, neg_pos_ub=-1, num=256, pos_fraction=0.5, type="RandomSampler"), + ), + rpn_proposal=dict(max_per_img=2000, min_bbox_size=0, nms=dict(iou_threshold=0.7, type="nms"), nms_pre=2000), + ), + type="CascadeRCNN", +) +optim_wrapper = dict(optimizer=dict(lr=0.02, momentum=0.9, type="SGD", weight_decay=0.0001), type="OptimWrapper") +param_scheduler = [ + dict(begin=0, by_epoch=False, end=500, start_factor=0.001, type="LinearLR"), + dict( + begin=0, + by_epoch=True, + end=12, + gamma=0.1, + milestones=[ + 8, + 11, + ], + type="MultiStepLR", + ), ] +resume = False +test_cfg = dict(type="TestLoop") +test_dataloader = dict( + batch_size=1, + dataset=dict( + ann_file="annotations/instances_val2017.json", + backend_args=None, + data_prefix=dict(img="val2017/"), + data_root="data/coco/", + pipeline=[ + dict(backend_args=None, type="LoadImageFromFile"), + dict( + keep_ratio=True, + scale=( + 1333, + 800, + ), + type="Resize", + ), + dict(type="LoadAnnotations", with_bbox=True, with_mask=True), + dict( + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + ), + type="PackDetInputs", + ), + ], + test_mode=True, + type="CocoDataset", + ), + drop_last=False, + num_workers=2, + persistent_workers=True, + sampler=dict(shuffle=False, type="DefaultSampler"), +) +test_evaluator = dict( + ann_file="data/coco/annotations/instances_val2017.json", + backend_args=None, + format_only=False, + metric=[ + "bbox", + "segm", + ], + type="CocoMetric", +) +test_pipeline = [ + dict(backend_args=None, type="LoadImageFromFile"), + dict( + keep_ratio=True, + scale=( + 1333, + 800, + ), + type="Resize", + ), + dict(type="LoadAnnotations", with_bbox=True, with_mask=True), + dict( + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + ), + type="PackDetInputs", + ), +] +train_cfg = dict(max_epochs=12, type="EpochBasedTrainLoop", val_interval=1) +train_dataloader = dict( + batch_sampler=dict(type="AspectRatioBatchSampler"), + batch_size=2, + dataset=dict( + ann_file="annotations/instances_train2017.json", + backend_args=None, + data_prefix=dict(img="train2017/"), + data_root="data/coco/", + filter_cfg=dict(filter_empty_gt=True, min_size=32), + pipeline=[ + dict(backend_args=None, type="LoadImageFromFile"), + dict(type="LoadAnnotations", with_bbox=True, with_mask=True), + dict( + keep_ratio=True, + scale=( + 1333, + 800, + ), + type="Resize", + ), + dict(prob=0.5, type="RandomFlip"), + dict(type="PackDetInputs"), + ], + type="CocoDataset", + ), + num_workers=2, + persistent_workers=True, + sampler=dict(shuffle=True, type="DefaultSampler"), +) +train_pipeline = [ + dict(backend_args=None, type="LoadImageFromFile"), + dict(type="LoadAnnotations", with_bbox=True, with_mask=True), + dict( + keep_ratio=True, + scale=( + 1333, + 800, + ), + type="Resize", + ), + dict(prob=0.5, type="RandomFlip"), + dict(type="PackDetInputs"), +] +val_cfg = dict(type="ValLoop") +val_dataloader = dict( + batch_size=1, + dataset=dict( + ann_file="annotations/instances_val2017.json", + backend_args=None, + data_prefix=dict(img="val2017/"), + data_root="data/coco/", + pipeline=[ + dict(backend_args=None, type="LoadImageFromFile"), + dict( + keep_ratio=True, + scale=( + 1333, + 800, + ), + type="Resize", + ), + dict(type="LoadAnnotations", with_bbox=True, with_mask=True), + dict( + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + ), + type="PackDetInputs", + ), + ], + test_mode=True, + type="CocoDataset", + ), + drop_last=False, + num_workers=2, + persistent_workers=True, + sampler=dict(shuffle=False, type="DefaultSampler"), +) +val_evaluator = dict( + ann_file="data/coco/annotations/instances_val2017.json", + backend_args=None, + format_only=False, + metric=[ + "bbox", + "segm", + ], + type="CocoMetric", +) +vis_backends = [ + dict(type="LocalVisBackend"), +] +visualizer = dict( + name="visualizer", + type="DetLocalVisualizer", + vis_backends=[ + dict(type="LocalVisBackend"), + ], +) diff --git a/tests/data/models/mmdet/retinanet/retinanet_r50_fpn_1x_coco.py b/tests/data/models/mmdet/retinanet/retinanet_r50_fpn_1x_coco.py index f3264da93..c39c3e707 100644 --- a/tests/data/models/mmdet/retinanet/retinanet_r50_fpn_1x_coco.py +++ b/tests/data/models/mmdet/retinanet/retinanet_r50_fpn_1x_coco.py @@ -1,10 +1,369 @@ -_base_ = [ - "../_base_/models/retinanet_r50_fpn.py", - "../_base_/datasets/coco_detection.py", - "../_base_/schedules/schedule_1x.py", - "../_base_/default_runtime.py", - "./retinanet_tta.py", -] - -# optimizer -optim_wrapper = dict(optimizer=dict(type="SGD", lr=0.01, momentum=0.9, weight_decay=0.0001)) +auto_scale_lr = dict(base_batch_size=16, enable=False) +backend_args = None +data_root = "data/coco/" +dataset_type = "CocoDataset" +default_hooks = dict( + checkpoint=dict(interval=1, type="CheckpointHook"), + logger=dict(interval=50, type="LoggerHook"), + param_scheduler=dict(type="ParamSchedulerHook"), + sampler_seed=dict(type="DistSamplerSeedHook"), + timer=dict(type="IterTimerHook"), + visualization=dict(type="DetVisualizationHook"), +) +default_scope = "mmdet" +env_cfg = dict( + cudnn_benchmark=False, dist_cfg=dict(backend="nccl"), mp_cfg=dict(mp_start_method="fork", opencv_num_threads=0) +) +img_scales = [ + ( + 1333, + 800, + ), + ( + 666, + 400, + ), + ( + 2000, + 1200, + ), +] +load_from = None +log_level = "INFO" +log_processor = dict(by_epoch=True, type="LogProcessor", window_size=50) +model = dict( + backbone=dict( + depth=50, + frozen_stages=1, + init_cfg=dict(checkpoint="torchvision://resnet50", type="Pretrained"), + norm_cfg=dict(requires_grad=True, type="BN"), + norm_eval=True, + num_stages=4, + out_indices=( + 0, + 1, + 2, + 3, + ), + style="pytorch", + type="ResNet", + ), + bbox_head=dict( + anchor_generator=dict( + octave_base_scale=4, + ratios=[ + 0.5, + 1.0, + 2.0, + ], + scales_per_octave=3, + strides=[ + 8, + 16, + 32, + 64, + 128, + ], + type="AnchorGenerator", + ), + bbox_coder=dict( + target_means=[ + 0.0, + 0.0, + 0.0, + 0.0, + ], + target_stds=[ + 1.0, + 1.0, + 1.0, + 1.0, + ], + type="DeltaXYWHBBoxCoder", + ), + feat_channels=256, + in_channels=256, + loss_bbox=dict(loss_weight=1.0, type="L1Loss"), + loss_cls=dict(alpha=0.25, gamma=2.0, loss_weight=1.0, type="FocalLoss", use_sigmoid=True), + num_classes=80, + stacked_convs=4, + type="RetinaHead", + ), + data_preprocessor=dict( + bgr_to_rgb=True, + mean=[ + 123.675, + 116.28, + 103.53, + ], + pad_size_divisor=32, + std=[ + 58.395, + 57.12, + 57.375, + ], + type="DetDataPreprocessor", + ), + neck=dict( + add_extra_convs="on_input", + in_channels=[ + 256, + 512, + 1024, + 2048, + ], + num_outs=5, + out_channels=256, + start_level=1, + type="FPN", + ), + test_cfg=dict( + max_per_img=100, min_bbox_size=0, nms=dict(iou_threshold=0.5, type="nms"), nms_pre=1000, score_thr=0.05 + ), + train_cfg=dict( + allowed_border=-1, + assigner=dict(ignore_iof_thr=-1, min_pos_iou=0, neg_iou_thr=0.4, pos_iou_thr=0.5, type="MaxIoUAssigner"), + debug=False, + pos_weight=-1, + sampler=dict(type="PseudoSampler"), + ), + type="RetinaNet", +) +optim_wrapper = dict(optimizer=dict(lr=0.01, momentum=0.9, type="SGD", weight_decay=0.0001), type="OptimWrapper") +param_scheduler = [ + dict(begin=0, by_epoch=False, end=500, start_factor=0.001, type="LinearLR"), + dict( + begin=0, + by_epoch=True, + end=12, + gamma=0.1, + milestones=[ + 8, + 11, + ], + type="MultiStepLR", + ), +] +resume = False +test_cfg = dict(type="TestLoop") +test_dataloader = dict( + batch_size=1, + dataset=dict( + ann_file="annotations/instances_val2017.json", + backend_args=None, + data_prefix=dict(img="val2017/"), + data_root="data/coco/", + pipeline=[ + dict(backend_args=None, type="LoadImageFromFile"), + dict( + keep_ratio=True, + scale=( + 1333, + 800, + ), + type="Resize", + ), + dict(type="LoadAnnotations", with_bbox=True), + dict( + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + ), + type="PackDetInputs", + ), + ], + test_mode=True, + type="CocoDataset", + ), + drop_last=False, + num_workers=2, + persistent_workers=True, + sampler=dict(shuffle=False, type="DefaultSampler"), +) +test_evaluator = dict( + ann_file="data/coco/annotations/instances_val2017.json", + backend_args=None, + format_only=False, + metric="bbox", + type="CocoMetric", +) +test_pipeline = [ + dict(backend_args=None, type="LoadImageFromFile"), + dict( + keep_ratio=True, + scale=( + 1333, + 800, + ), + type="Resize", + ), + dict(type="LoadAnnotations", with_bbox=True), + dict( + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + ), + type="PackDetInputs", + ), +] +train_cfg = dict(max_epochs=12, type="EpochBasedTrainLoop", val_interval=1) +train_dataloader = dict( + batch_sampler=dict(type="AspectRatioBatchSampler"), + batch_size=2, + dataset=dict( + ann_file="annotations/instances_train2017.json", + backend_args=None, + data_prefix=dict(img="train2017/"), + data_root="data/coco/", + filter_cfg=dict(filter_empty_gt=True, min_size=32), + pipeline=[ + dict(backend_args=None, type="LoadImageFromFile"), + dict(type="LoadAnnotations", with_bbox=True), + dict( + keep_ratio=True, + scale=( + 1333, + 800, + ), + type="Resize", + ), + dict(prob=0.5, type="RandomFlip"), + dict(type="PackDetInputs"), + ], + type="CocoDataset", + ), + num_workers=2, + persistent_workers=True, + sampler=dict(shuffle=True, type="DefaultSampler"), +) +train_pipeline = [ + dict(backend_args=None, type="LoadImageFromFile"), + dict(type="LoadAnnotations", with_bbox=True), + dict( + keep_ratio=True, + scale=( + 1333, + 800, + ), + type="Resize", + ), + dict(prob=0.5, type="RandomFlip"), + dict(type="PackDetInputs"), +] +tta_model = dict(tta_cfg=dict(max_per_img=100, nms=dict(iou_threshold=0.5, type="nms")), type="DetTTAModel") +tta_pipeline = [ + dict(backend_args=None, type="LoadImageFromFile"), + dict( + transforms=[ + [ + dict( + keep_ratio=True, + scale=( + 1333, + 800, + ), + type="Resize", + ), + dict( + keep_ratio=True, + scale=( + 666, + 400, + ), + type="Resize", + ), + dict( + keep_ratio=True, + scale=( + 2000, + 1200, + ), + type="Resize", + ), + ], + [ + dict(prob=1.0, type="RandomFlip"), + dict(prob=0.0, type="RandomFlip"), + ], + [ + dict(type="LoadAnnotations", with_bbox=True), + ], + [ + dict( + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + "flip", + "flip_direction", + ), + type="PackDetInputs", + ), + ], + ], + type="TestTimeAug", + ), +] +val_cfg = dict(type="ValLoop") +val_dataloader = dict( + batch_size=1, + dataset=dict( + ann_file="annotations/instances_val2017.json", + backend_args=None, + data_prefix=dict(img="val2017/"), + data_root="data/coco/", + pipeline=[ + dict(backend_args=None, type="LoadImageFromFile"), + dict( + keep_ratio=True, + scale=( + 1333, + 800, + ), + type="Resize", + ), + dict(type="LoadAnnotations", with_bbox=True), + dict( + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + ), + type="PackDetInputs", + ), + ], + test_mode=True, + type="CocoDataset", + ), + drop_last=False, + num_workers=2, + persistent_workers=True, + sampler=dict(shuffle=False, type="DefaultSampler"), +) +val_evaluator = dict( + ann_file="data/coco/annotations/instances_val2017.json", + backend_args=None, + format_only=False, + metric="bbox", + type="CocoMetric", +) +vis_backends = [ + dict(type="LocalVisBackend"), +] +visualizer = dict( + name="visualizer", + type="DetLocalVisualizer", + vis_backends=[ + dict(type="LocalVisBackend"), + ], +) diff --git a/tests/data/models/mmdet/retinanet/retinanet_tta.py b/tests/data/models/mmdet/retinanet/retinanet_tta.py deleted file mode 100644 index 2457b68b4..000000000 --- a/tests/data/models/mmdet/retinanet/retinanet_tta.py +++ /dev/null @@ -1,28 +0,0 @@ -tta_model = dict(type="DetTTAModel", tta_cfg=dict(nms=dict(type="nms", iou_threshold=0.5), max_per_img=100)) - -img_scales = [(1333, 800), (666, 400), (2000, 1200)] -tta_pipeline = [ - dict(type="LoadImageFromFile", backend_args=None), - dict( - type="TestTimeAug", - transforms=[ - [dict(type="Resize", scale=s, keep_ratio=True) for s in img_scales], - [dict(type="RandomFlip", prob=1.0), dict(type="RandomFlip", prob=0.0)], - [dict(type="LoadAnnotations", with_bbox=True)], - [ - dict( - type="PackDetInputs", - meta_keys=( - "img_id", - "img_path", - "ori_shape", - "img_shape", - "scale_factor", - "flip", - "flip_direction", - ), - ) - ], - ], - ), -] diff --git a/tests/data/models/mmdet/yolox/yolox_s_8xb8-300e_coco.py b/tests/data/models/mmdet/yolox/yolox_s_8xb8-300e_coco.py deleted file mode 100644 index 155dc3818..000000000 --- a/tests/data/models/mmdet/yolox/yolox_s_8xb8-300e_coco.py +++ /dev/null @@ -1,219 +0,0 @@ -_base_ = ["../_base_/schedules/schedule_1x.py", "../_base_/default_runtime.py", "./yolox_tta.py"] - -img_scale = (640, 640) # width, height - -# model settings -model = dict( - type="YOLOX", - data_preprocessor=dict( - type="DetDataPreprocessor", - pad_size_divisor=32, - batch_augments=[dict(type="BatchSyncRandomResize", random_size_range=(480, 800), size_divisor=32, interval=10)], - ), - backbone=dict( - type="CSPDarknet", - deepen_factor=0.33, - widen_factor=0.5, - out_indices=(2, 3, 4), - use_depthwise=False, - spp_kernal_sizes=(5, 9, 13), - norm_cfg=dict(type="BN", momentum=0.03, eps=0.001), - act_cfg=dict(type="Swish"), - ), - neck=dict( - type="YOLOXPAFPN", - in_channels=[128, 256, 512], - out_channels=128, - num_csp_blocks=1, - use_depthwise=False, - upsample_cfg=dict(scale_factor=2, mode="nearest"), - norm_cfg=dict(type="BN", momentum=0.03, eps=0.001), - act_cfg=dict(type="Swish"), - ), - bbox_head=dict( - type="YOLOXHead", - num_classes=80, - in_channels=128, - feat_channels=128, - stacked_convs=2, - strides=(8, 16, 32), - use_depthwise=False, - norm_cfg=dict(type="BN", momentum=0.03, eps=0.001), - act_cfg=dict(type="Swish"), - loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, reduction="sum", loss_weight=1.0), - loss_bbox=dict(type="IoULoss", mode="square", eps=1e-16, reduction="sum", loss_weight=5.0), - loss_obj=dict(type="CrossEntropyLoss", use_sigmoid=True, reduction="sum", loss_weight=1.0), - loss_l1=dict(type="L1Loss", reduction="sum", loss_weight=1.0), - ), - train_cfg=dict(assigner=dict(type="SimOTAAssigner", center_radius=2.5)), - # In order to align the source code, the threshold of the val phase is - # 0.01, and the threshold of the test phase is 0.001. - test_cfg=dict(score_thr=0.01, nms=dict(type="nms", iou_threshold=0.65)), -) - -# dataset settings -data_root = "data/coco/" -dataset_type = "CocoDataset" - -# Example to use different file client -# Method 1: simply set the data root and let the file I/O module -# automatically infer from prefix (not support LMDB and Memcache yet) - -# data_root = 's3://openmmlab/datasets/detection/coco/' - -# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 -# backend_args = dict( -# backend='petrel', -# path_mapping=dict({ -# './data/': 's3://openmmlab/datasets/detection/', -# 'data/': 's3://openmmlab/datasets/detection/' -# })) -backend_args = None - -train_pipeline = [ - dict(type="Mosaic", img_scale=img_scale, pad_val=114.0), - dict( - type="RandomAffine", - scaling_ratio_range=(0.1, 2), - # img_scale is (width, height) - border=(-img_scale[0] // 2, -img_scale[1] // 2), - ), - dict(type="MixUp", img_scale=img_scale, ratio_range=(0.8, 1.6), pad_val=114.0), - dict(type="YOLOXHSVRandomAug"), - dict(type="RandomFlip", prob=0.5), - # According to the official implementation, multi-scale - # training is not considered here but in the - # 'mmdet/models/detectors/yolox.py'. - # Resize and Pad are for the last 15 epochs when Mosaic, - # RandomAffine, and MixUp are closed by YOLOXModeSwitchHook. - dict(type="Resize", scale=img_scale, keep_ratio=True), - dict( - type="Pad", - pad_to_square=True, - # If the image is three-channel, the pad value needs - # to be set separately for each channel. - pad_val=dict(img=(114.0, 114.0, 114.0)), - ), - dict(type="FilterAnnotations", min_gt_bbox_wh=(1, 1), keep_empty=False), - dict(type="PackDetInputs"), -] - -train_dataset = dict( - # use MultiImageMixDataset wrapper to support mosaic and mixup - type="MultiImageMixDataset", - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file="annotations/instances_train2017.json", - data_prefix=dict(img="train2017/"), - pipeline=[ - dict(type="LoadImageFromFile", backend_args=backend_args), - dict(type="LoadAnnotations", with_bbox=True), - ], - filter_cfg=dict(filter_empty_gt=False, min_size=32), - backend_args=backend_args, - ), - pipeline=train_pipeline, -) - -test_pipeline = [ - dict(type="LoadImageFromFile", backend_args=backend_args), - dict(type="Resize", scale=img_scale, keep_ratio=True), - dict(type="Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), - dict(type="LoadAnnotations", with_bbox=True), - dict(type="PackDetInputs", meta_keys=("img_id", "img_path", "ori_shape", "img_shape", "scale_factor")), -] - -train_dataloader = dict( - batch_size=8, - num_workers=4, - persistent_workers=True, - sampler=dict(type="DefaultSampler", shuffle=True), - dataset=train_dataset, -) -val_dataloader = dict( - batch_size=8, - num_workers=4, - persistent_workers=True, - drop_last=False, - sampler=dict(type="DefaultSampler", shuffle=False), - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file="annotations/instances_val2017.json", - data_prefix=dict(img="val2017/"), - test_mode=True, - pipeline=test_pipeline, - backend_args=backend_args, - ), -) -test_dataloader = val_dataloader - -val_evaluator = dict( - type="CocoMetric", - ann_file=data_root + "annotations/instances_val2017.json", - metric="bbox", - backend_args=backend_args, -) -test_evaluator = val_evaluator - -# training settings -max_epochs = 300 -num_last_epochs = 15 -interval = 10 - -train_cfg = dict(max_epochs=max_epochs, val_interval=interval) - -# optimizer -# default 8 gpu -base_lr = 0.01 -optim_wrapper = dict( - type="OptimWrapper", - optimizer=dict(type="SGD", lr=base_lr, momentum=0.9, weight_decay=5e-4, nesterov=True), - paramwise_cfg=dict(norm_decay_mult=0.0, bias_decay_mult=0.0), -) - -# learning rate -param_scheduler = [ - dict( - # use quadratic formula to warm up 5 epochs - # and lr is updated by iteration - # TODO: fix default scope in get function - type="mmdet.QuadraticWarmupLR", - by_epoch=True, - begin=0, - end=5, - convert_to_iter_based=True, - ), - dict( - # use cosine lr from 5 to 285 epoch - type="CosineAnnealingLR", - eta_min=base_lr * 0.05, - begin=5, - T_max=max_epochs - num_last_epochs, - end=max_epochs - num_last_epochs, - by_epoch=True, - convert_to_iter_based=True, - ), - dict( - # use fixed lr during last 15 epochs - type="ConstantLR", - by_epoch=True, - factor=1, - begin=max_epochs - num_last_epochs, - end=max_epochs, - ), -] - -default_hooks = dict(checkpoint=dict(interval=interval, max_keep_ckpts=3)) # only keep latest 3 checkpoints - -custom_hooks = [ - dict(type="YOLOXModeSwitchHook", num_last_epochs=num_last_epochs, priority=48), - dict(type="SyncNormHook", priority=48), - dict(type="EMAHook", ema_type="ExpMomentumEMA", momentum=0.0001, update_buffers=True, priority=49), -] - -# NOTE: `auto_scale_lr` is for automatically scaling LR, -# USER SHOULD NOT CHANGE ITS VALUES. -# base_batch_size = (8 GPUs) x (8 samples per GPU) -auto_scale_lr = dict(base_batch_size=64) diff --git a/tests/data/models/mmdet/yolox/yolox_tiny_8x8_300e_coco.py b/tests/data/models/mmdet/yolox/yolox_tiny_8x8_300e_coco.py new file mode 100644 index 000000000..4829ac6c9 --- /dev/null +++ b/tests/data/models/mmdet/yolox/yolox_tiny_8x8_300e_coco.py @@ -0,0 +1,548 @@ +auto_scale_lr = dict(base_batch_size=64, enable=False) +backend_args = None +base_lr = 0.01 +custom_hooks = [ + dict(num_last_epochs=15, priority=48, type="YOLOXModeSwitchHook"), + dict(priority=48, type="SyncNormHook"), + dict(ema_type="ExpMomentumEMA", momentum=0.0001, priority=49, type="EMAHook", update_buffers=True), +] +data_root = "data/coco/" +dataset_type = "CocoDataset" +default_hooks = dict( + checkpoint=dict(interval=10, max_keep_ckpts=3, type="CheckpointHook"), + logger=dict(interval=50, type="LoggerHook"), + param_scheduler=dict(type="ParamSchedulerHook"), + sampler_seed=dict(type="DistSamplerSeedHook"), + timer=dict(type="IterTimerHook"), + visualization=dict(type="DetVisualizationHook"), +) +default_scope = "mmdet" +env_cfg = dict( + cudnn_benchmark=False, dist_cfg=dict(backend="nccl"), mp_cfg=dict(mp_start_method="fork", opencv_num_threads=0) +) +img_scale = ( + 640, + 640, +) +img_scales = [ + ( + 640, + 640, + ), + ( + 320, + 320, + ), + ( + 960, + 960, + ), +] +interval = 10 +load_from = None +log_level = "INFO" +log_processor = dict(by_epoch=True, type="LogProcessor", window_size=50) +max_epochs = 300 +model = dict( + backbone=dict( + act_cfg=dict(type="Swish"), + deepen_factor=0.33, + norm_cfg=dict(eps=0.001, momentum=0.03, type="BN"), + out_indices=( + 2, + 3, + 4, + ), + spp_kernal_sizes=( + 5, + 9, + 13, + ), + type="CSPDarknet", + use_depthwise=False, + widen_factor=0.375, + ), + bbox_head=dict( + act_cfg=dict(type="Swish"), + feat_channels=96, + in_channels=96, + loss_bbox=dict(eps=1e-16, loss_weight=5.0, mode="square", reduction="sum", type="IoULoss"), + loss_cls=dict(loss_weight=1.0, reduction="sum", type="CrossEntropyLoss", use_sigmoid=True), + loss_l1=dict(loss_weight=1.0, reduction="sum", type="L1Loss"), + loss_obj=dict(loss_weight=1.0, reduction="sum", type="CrossEntropyLoss", use_sigmoid=True), + norm_cfg=dict(eps=0.001, momentum=0.03, type="BN"), + num_classes=80, + stacked_convs=2, + strides=( + 8, + 16, + 32, + ), + type="YOLOXHead", + use_depthwise=False, + ), + data_preprocessor=dict( + batch_augments=[ + dict( + interval=10, + random_size_range=( + 320, + 640, + ), + size_divisor=32, + type="BatchSyncRandomResize", + ), + ], + pad_size_divisor=32, + type="DetDataPreprocessor", + ), + neck=dict( + act_cfg=dict(type="Swish"), + in_channels=[ + 96, + 192, + 384, + ], + norm_cfg=dict(eps=0.001, momentum=0.03, type="BN"), + num_csp_blocks=1, + out_channels=96, + type="YOLOXPAFPN", + upsample_cfg=dict(mode="nearest", scale_factor=2), + use_depthwise=False, + ), + test_cfg=dict(nms=dict(iou_threshold=0.65, type="nms"), score_thr=0.01), + train_cfg=dict(assigner=dict(center_radius=2.5, type="SimOTAAssigner")), + type="YOLOX", +) +num_last_epochs = 15 +optim_wrapper = dict( + optimizer=dict(lr=0.01, momentum=0.9, nesterov=True, type="SGD", weight_decay=0.0005), + paramwise_cfg=dict(bias_decay_mult=0.0, norm_decay_mult=0.0), + type="OptimWrapper", +) +param_scheduler = [ + dict(begin=0, by_epoch=True, convert_to_iter_based=True, end=5, type="mmdet.QuadraticWarmupLR"), + dict( + T_max=285, begin=5, by_epoch=True, convert_to_iter_based=True, end=285, eta_min=0.0005, type="CosineAnnealingLR" + ), + dict(begin=285, by_epoch=True, end=300, factor=1, type="ConstantLR"), +] +resume = False +test_cfg = dict(type="TestLoop") +test_dataloader = dict( + batch_size=8, + dataset=dict( + ann_file="annotations/instances_val2017.json", + backend_args=None, + data_prefix=dict(img="val2017/"), + data_root="data/coco/", + pipeline=[ + dict(backend_args=None, type="LoadImageFromFile"), + dict( + keep_ratio=True, + scale=( + 416, + 416, + ), + type="Resize", + ), + dict( + pad_to_square=True, + pad_val=dict( + img=( + 114.0, + 114.0, + 114.0, + ) + ), + type="Pad", + ), + dict(type="LoadAnnotations", with_bbox=True), + dict( + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + ), + type="PackDetInputs", + ), + ], + test_mode=True, + type="CocoDataset", + ), + drop_last=False, + num_workers=4, + persistent_workers=True, + sampler=dict(shuffle=False, type="DefaultSampler"), +) +test_evaluator = dict( + ann_file="data/coco/annotations/instances_val2017.json", backend_args=None, metric="bbox", type="CocoMetric" +) +test_pipeline = [ + dict(backend_args=None, type="LoadImageFromFile"), + dict( + keep_ratio=True, + scale=( + 416, + 416, + ), + type="Resize", + ), + dict( + pad_to_square=True, + pad_val=dict( + img=( + 114.0, + 114.0, + 114.0, + ) + ), + type="Pad", + ), + dict(type="LoadAnnotations", with_bbox=True), + dict( + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + ), + type="PackDetInputs", + ), +] +train_cfg = dict(max_epochs=300, type="EpochBasedTrainLoop", val_interval=10) +train_dataloader = dict( + batch_size=8, + dataset=dict( + dataset=dict( + ann_file="annotations/instances_train2017.json", + backend_args=None, + data_prefix=dict(img="train2017/"), + data_root="data/coco/", + filter_cfg=dict(filter_empty_gt=False, min_size=32), + pipeline=[ + dict(backend_args=None, type="LoadImageFromFile"), + dict(type="LoadAnnotations", with_bbox=True), + ], + type="CocoDataset", + ), + pipeline=[ + dict( + img_scale=( + 640, + 640, + ), + pad_val=114.0, + type="Mosaic", + ), + dict( + border=( + -320, + -320, + ), + scaling_ratio_range=( + 0.5, + 1.5, + ), + type="RandomAffine", + ), + dict(type="YOLOXHSVRandomAug"), + dict(prob=0.5, type="RandomFlip"), + dict( + keep_ratio=True, + scale=( + 640, + 640, + ), + type="Resize", + ), + dict( + pad_to_square=True, + pad_val=dict( + img=( + 114.0, + 114.0, + 114.0, + ) + ), + type="Pad", + ), + dict( + keep_empty=False, + min_gt_bbox_wh=( + 1, + 1, + ), + type="FilterAnnotations", + ), + dict(type="PackDetInputs"), + ], + type="MultiImageMixDataset", + ), + num_workers=4, + persistent_workers=True, + sampler=dict(shuffle=True, type="DefaultSampler"), +) +train_dataset = dict( + dataset=dict( + ann_file="annotations/instances_train2017.json", + backend_args=None, + data_prefix=dict(img="train2017/"), + data_root="data/coco/", + filter_cfg=dict(filter_empty_gt=False, min_size=32), + pipeline=[ + dict(backend_args=None, type="LoadImageFromFile"), + dict(type="LoadAnnotations", with_bbox=True), + ], + type="CocoDataset", + ), + pipeline=[ + dict( + img_scale=( + 640, + 640, + ), + pad_val=114.0, + type="Mosaic", + ), + dict( + border=( + -320, + -320, + ), + scaling_ratio_range=( + 0.1, + 2, + ), + type="RandomAffine", + ), + dict( + img_scale=( + 640, + 640, + ), + pad_val=114.0, + ratio_range=( + 0.8, + 1.6, + ), + type="MixUp", + ), + dict(type="YOLOXHSVRandomAug"), + dict(prob=0.5, type="RandomFlip"), + dict( + keep_ratio=True, + scale=( + 640, + 640, + ), + type="Resize", + ), + dict( + pad_to_square=True, + pad_val=dict( + img=( + 114.0, + 114.0, + 114.0, + ) + ), + type="Pad", + ), + dict( + keep_empty=False, + min_gt_bbox_wh=( + 1, + 1, + ), + type="FilterAnnotations", + ), + dict(type="PackDetInputs"), + ], + type="MultiImageMixDataset", +) +train_pipeline = [ + dict( + img_scale=( + 640, + 640, + ), + pad_val=114.0, + type="Mosaic", + ), + dict( + border=( + -320, + -320, + ), + scaling_ratio_range=( + 0.5, + 1.5, + ), + type="RandomAffine", + ), + dict(type="YOLOXHSVRandomAug"), + dict(prob=0.5, type="RandomFlip"), + dict( + keep_ratio=True, + scale=( + 640, + 640, + ), + type="Resize", + ), + dict( + pad_to_square=True, + pad_val=dict( + img=( + 114.0, + 114.0, + 114.0, + ) + ), + type="Pad", + ), + dict( + keep_empty=False, + min_gt_bbox_wh=( + 1, + 1, + ), + type="FilterAnnotations", + ), + dict(type="PackDetInputs"), +] +tta_model = dict(tta_cfg=dict(max_per_img=100, nms=dict(iou_threshold=0.65, type="nms")), type="DetTTAModel") +tta_pipeline = [ + dict(backend_args=None, type="LoadImageFromFile"), + dict( + transforms=[ + [ + dict( + keep_ratio=True, + scale=( + 640, + 640, + ), + type="Resize", + ), + dict( + keep_ratio=True, + scale=( + 320, + 320, + ), + type="Resize", + ), + dict( + keep_ratio=True, + scale=( + 960, + 960, + ), + type="Resize", + ), + ], + [ + dict(prob=1.0, type="RandomFlip"), + dict(prob=0.0, type="RandomFlip"), + ], + [ + dict( + pad_to_square=True, + pad_val=dict( + img=( + 114.0, + 114.0, + 114.0, + ) + ), + type="Pad", + ), + ], + [ + dict(type="LoadAnnotations", with_bbox=True), + ], + [ + dict( + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + "flip", + "flip_direction", + ), + type="PackDetInputs", + ), + ], + ], + type="TestTimeAug", + ), +] +val_cfg = dict(type="ValLoop") +val_dataloader = dict( + batch_size=8, + dataset=dict( + ann_file="annotations/instances_val2017.json", + backend_args=None, + data_prefix=dict(img="val2017/"), + data_root="data/coco/", + pipeline=[ + dict(backend_args=None, type="LoadImageFromFile"), + dict( + keep_ratio=True, + scale=( + 416, + 416, + ), + type="Resize", + ), + dict( + pad_to_square=True, + pad_val=dict( + img=( + 114.0, + 114.0, + 114.0, + ) + ), + type="Pad", + ), + dict(type="LoadAnnotations", with_bbox=True), + dict( + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + ), + type="PackDetInputs", + ), + ], + test_mode=True, + type="CocoDataset", + ), + drop_last=False, + num_workers=4, + persistent_workers=True, + sampler=dict(shuffle=False, type="DefaultSampler"), +) +val_evaluator = dict( + ann_file="data/coco/annotations/instances_val2017.json", backend_args=None, metric="bbox", type="CocoMetric" +) +vis_backends = [ + dict(type="LocalVisBackend"), +] +visualizer = dict( + name="visualizer", + type="DetLocalVisualizer", + vis_backends=[ + dict(type="LocalVisBackend"), + ], +) diff --git a/tests/data/models/mmdet/yolox/yolox_tiny_8xb8-300e_coco.py b/tests/data/models/mmdet/yolox/yolox_tiny_8xb8-300e_coco.py deleted file mode 100644 index b4de1ce5a..000000000 --- a/tests/data/models/mmdet/yolox/yolox_tiny_8xb8-300e_coco.py +++ /dev/null @@ -1,43 +0,0 @@ -_base_ = "./yolox_s_8xb8-300e_coco.py" - -# model settings -model = dict( - data_preprocessor=dict( - batch_augments=[dict(type="BatchSyncRandomResize", random_size_range=(320, 640), size_divisor=32, interval=10)] - ), - backbone=dict(deepen_factor=0.33, widen_factor=0.375), - neck=dict(in_channels=[96, 192, 384], out_channels=96), - bbox_head=dict(in_channels=96, feat_channels=96), -) - -img_scale = (640, 640) # width, height - -train_pipeline = [ - dict(type="Mosaic", img_scale=img_scale, pad_val=114.0), - dict( - type="RandomAffine", - scaling_ratio_range=(0.5, 1.5), - # img_scale is (width, height) - border=(-img_scale[0] // 2, -img_scale[1] // 2), - ), - dict(type="YOLOXHSVRandomAug"), - dict(type="RandomFlip", prob=0.5), - # Resize and Pad are for the last 15 epochs when Mosaic and - # RandomAffine are closed by YOLOXModeSwitchHook. - dict(type="Resize", scale=img_scale, keep_ratio=True), - dict(type="Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), - dict(type="FilterAnnotations", min_gt_bbox_wh=(1, 1), keep_empty=False), - dict(type="PackDetInputs"), -] - -test_pipeline = [ - dict(type="LoadImageFromFile", backend_args={{_base_.backend_args}}), - dict(type="Resize", scale=(416, 416), keep_ratio=True), - dict(type="Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), - dict(type="LoadAnnotations", with_bbox=True), - dict(type="PackDetInputs", meta_keys=("img_id", "img_path", "ori_shape", "img_shape", "scale_factor")), -] - -train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) -val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) -test_dataloader = val_dataloader diff --git a/tests/data/models/mmdet/yolox/yolox_tta.py b/tests/data/models/mmdet/yolox/yolox_tta.py deleted file mode 100644 index 7d0b62283..000000000 --- a/tests/data/models/mmdet/yolox/yolox_tta.py +++ /dev/null @@ -1,37 +0,0 @@ -tta_model = dict(type="DetTTAModel", tta_cfg=dict(nms=dict(type="nms", iou_threshold=0.65), max_per_img=100)) - -img_scales = [(640, 640), (320, 320), (960, 960)] -tta_pipeline = [ - dict(type="LoadImageFromFile", backend_args=None), - dict( - type="TestTimeAug", - transforms=[ - [dict(type="Resize", scale=s, keep_ratio=True) for s in img_scales], - [ - # ``RandomFlip`` must be placed before ``Pad``, otherwise - # bounding box coordinates after flipping cannot be - # recovered correctly. - dict(type="RandomFlip", prob=1.0), - dict(type="RandomFlip", prob=0.0), - ], - [ - dict(type="Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), - ], - [dict(type="LoadAnnotations", with_bbox=True)], - [ - dict( - type="PackDetInputs", - meta_keys=( - "img_id", - "img_path", - "ori_shape", - "img_shape", - "scale_factor", - "flip", - "flip_direction", - ), - ) - ], - ], - ), -] diff --git a/tests/data/models/mmdet_cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn.py b/tests/data/models/mmdet_cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn.py deleted file mode 100644 index 53406bc3f..000000000 --- a/tests/data/models/mmdet_cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn.py +++ /dev/null @@ -1,167 +0,0 @@ -# model settings -model = dict( - type="CascadeRCNN", - data_preprocessor=dict( - type="DetDataPreprocessor", - mean=[123.675, 116.28, 103.53], - std=[58.395, 57.12, 57.375], - bgr_to_rgb=True, - pad_mask=True, - pad_size_divisor=32, - ), - backbone=dict( - type="ResNet", - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type="BN", requires_grad=True), - norm_eval=True, - style="pytorch", - init_cfg=dict(type="Pretrained", checkpoint="torchvision://resnet50"), - ), - neck=dict(type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5), - rpn_head=dict( - type="RPNHead", - in_channels=256, - feat_channels=256, - anchor_generator=dict(type="AnchorGenerator", scales=[8], ratios=[0.5, 1.0, 2.0], strides=[4, 8, 16, 32, 64]), - bbox_coder=dict(type="DeltaXYWHBBoxCoder", target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[1.0, 1.0, 1.0, 1.0]), - loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type="SmoothL1Loss", beta=1.0 / 9.0, loss_weight=1.0), - ), - roi_head=dict( - type="CascadeRoIHead", - num_stages=3, - stage_loss_weights=[1, 0.5, 0.25], - bbox_roi_extractor=dict( - type="SingleRoIExtractor", - roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0), - out_channels=256, - featmap_strides=[4, 8, 16, 32], - ), - bbox_head=[ - dict( - type="Shared2FCBBoxHead", - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=80, - bbox_coder=dict( - type="DeltaXYWHBBoxCoder", target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[0.1, 0.1, 0.2, 0.2] - ), - reg_class_agnostic=True, - loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type="SmoothL1Loss", beta=1.0, loss_weight=1.0), - ), - dict( - type="Shared2FCBBoxHead", - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=80, - bbox_coder=dict( - type="DeltaXYWHBBoxCoder", target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[0.05, 0.05, 0.1, 0.1] - ), - reg_class_agnostic=True, - loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type="SmoothL1Loss", beta=1.0, loss_weight=1.0), - ), - dict( - type="Shared2FCBBoxHead", - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=80, - bbox_coder=dict( - type="DeltaXYWHBBoxCoder", - target_means=[0.0, 0.0, 0.0, 0.0], - target_stds=[0.033, 0.033, 0.067, 0.067], - ), - reg_class_agnostic=True, - loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type="SmoothL1Loss", beta=1.0, loss_weight=1.0), - ), - ], - mask_roi_extractor=dict( - type="SingleRoIExtractor", - roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0), - out_channels=256, - featmap_strides=[4, 8, 16, 32], - ), - mask_head=dict( - type="FCNMaskHead", - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=80, - loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0), - ), - ), - # model training and testing settings - train_cfg=dict( - rpn=dict( - assigner=dict( - type="MaxIoUAssigner", - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - match_low_quality=True, - ignore_iof_thr=-1, - ), - sampler=dict(type="RandomSampler", num=256, pos_fraction=0.5, neg_pos_ub=-1, add_gt_as_proposals=False), - allowed_border=0, - pos_weight=-1, - debug=False, - ), - rpn_proposal=dict(nms_pre=2000, max_per_img=2000, nms=dict(type="nms", iou_threshold=0.7), min_bbox_size=0), - rcnn=[ - dict( - assigner=dict( - type="MaxIoUAssigner", - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - match_low_quality=False, - ignore_iof_thr=-1, - ), - sampler=dict(type="RandomSampler", num=512, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False, - ), - dict( - assigner=dict( - type="MaxIoUAssigner", - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - match_low_quality=False, - ignore_iof_thr=-1, - ), - sampler=dict(type="RandomSampler", num=512, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False, - ), - dict( - assigner=dict( - type="MaxIoUAssigner", - pos_iou_thr=0.7, - neg_iou_thr=0.7, - min_pos_iou=0.7, - match_low_quality=False, - ignore_iof_thr=-1, - ), - sampler=dict(type="RandomSampler", num=512, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), - mask_size=28, - pos_weight=-1, - debug=False, - ), - ], - ), - test_cfg=dict( - rpn=dict(nms_pre=1000, max_per_img=1000, nms=dict(type="nms", iou_threshold=0.7), min_bbox_size=0), - rcnn=dict(score_thr=0.05, nms=dict(type="nms", iou_threshold=0.5), max_per_img=100, mask_thr_binary=0.5), - ), -) diff --git a/tests/data/models/mmdet_cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn_1x_coco.py b/tests/data/models/mmdet_cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn_1x_coco.py deleted file mode 100644 index 77b3ebac4..000000000 --- a/tests/data/models/mmdet_cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn_1x_coco.py +++ /dev/null @@ -1,6 +0,0 @@ -_base_ = [ - "../_base_/models/cascade-mask-rcnn_r50_fpn.py", - "../_base_/datasets/coco_instance.py", - "../_base_/schedules/schedule_1x.py", - "../_base_/default_runtime.py", -] diff --git a/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn.py b/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn.py deleted file mode 100644 index 2f6cba0f6..000000000 --- a/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn.py +++ /dev/null @@ -1,215 +0,0 @@ -# model settings -model = dict( - type="CascadeRCNN", - pretrained="torchvision://resnet50", - backbone=dict( - type="ResNet", - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type="BN", requires_grad=True), - norm_eval=True, - style="pytorch", - ), - neck=dict(type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5), - rpn_head=dict( - type="RPNHead", - in_channels=256, - feat_channels=256, - anchor_generator=dict( - type="AnchorGenerator", - scales=[8], - ratios=[0.5, 1.0, 2.0], - strides=[4, 8, 16, 32, 64], - ), - bbox_coder=dict( - type="DeltaXYWHBBoxCoder", - target_means=[0.0, 0.0, 0.0, 0.0], - target_stds=[1.0, 1.0, 1.0, 1.0], - ), - loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type="SmoothL1Loss", beta=1.0 / 9.0, loss_weight=1.0), - ), - roi_head=dict( - type="CascadeRoIHead", - num_stages=3, - stage_loss_weights=[1, 0.5, 0.25], - bbox_roi_extractor=dict( - type="SingleRoIExtractor", - roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0), - out_channels=256, - featmap_strides=[4, 8, 16, 32], - ), - bbox_head=[ - dict( - type="Shared2FCBBoxHead", - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=80, - bbox_coder=dict( - type="DeltaXYWHBBoxCoder", - target_means=[0.0, 0.0, 0.0, 0.0], - target_stds=[0.1, 0.1, 0.2, 0.2], - ), - reg_class_agnostic=True, - loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type="SmoothL1Loss", beta=1.0, loss_weight=1.0), - ), - dict( - type="Shared2FCBBoxHead", - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=80, - bbox_coder=dict( - type="DeltaXYWHBBoxCoder", - target_means=[0.0, 0.0, 0.0, 0.0], - target_stds=[0.05, 0.05, 0.1, 0.1], - ), - reg_class_agnostic=True, - loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type="SmoothL1Loss", beta=1.0, loss_weight=1.0), - ), - dict( - type="Shared2FCBBoxHead", - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=80, - bbox_coder=dict( - type="DeltaXYWHBBoxCoder", - target_means=[0.0, 0.0, 0.0, 0.0], - target_stds=[0.033, 0.033, 0.067, 0.067], - ), - reg_class_agnostic=True, - loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type="SmoothL1Loss", beta=1.0, loss_weight=1.0), - ), - ], - mask_roi_extractor=dict( - type="SingleRoIExtractor", - roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0), - out_channels=256, - featmap_strides=[4, 8, 16, 32], - ), - mask_head=dict( - type="FCNMaskHead", - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=80, - loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0), - ), - ), - # model training and testing settings - train_cfg=dict( - rpn=dict( - assigner=dict( - type="MaxIoUAssigner", - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - match_low_quality=True, - ignore_iof_thr=-1, - ), - sampler=dict( - type="RandomSampler", - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False, - ), - allowed_border=0, - pos_weight=-1, - debug=False, - ), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_per_img=2000, - nms=dict(type="nms", iou_threshold=0.7), - min_bbox_size=0, - ), - rcnn=[ - dict( - assigner=dict( - type="MaxIoUAssigner", - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - match_low_quality=False, - ignore_iof_thr=-1, - ), - sampler=dict( - type="RandomSampler", - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True, - ), - mask_size=28, - pos_weight=-1, - debug=False, - ), - dict( - assigner=dict( - type="MaxIoUAssigner", - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - match_low_quality=False, - ignore_iof_thr=-1, - ), - sampler=dict( - type="RandomSampler", - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True, - ), - mask_size=28, - pos_weight=-1, - debug=False, - ), - dict( - assigner=dict( - type="MaxIoUAssigner", - pos_iou_thr=0.7, - neg_iou_thr=0.7, - min_pos_iou=0.7, - match_low_quality=False, - ignore_iof_thr=-1, - ), - sampler=dict( - type="RandomSampler", - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True, - ), - mask_size=28, - pos_weight=-1, - debug=False, - ), - ], - ), - test_cfg=dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_per_img=1000, - nms=dict(type="nms", iou_threshold=0.7), - min_bbox_size=0, - ), - rcnn=dict( - score_thr=0.05, - nms=dict(type="nms", iou_threshold=0.5), - max_per_img=100, - mask_thr_binary=0.5, - ), - ), -) diff --git a/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py b/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py deleted file mode 100644 index 8c747b1a3..000000000 --- a/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py +++ /dev/null @@ -1 +0,0 @@ -_base_ = ["cascade_mask_rcnn_r50_fpn.py", "coco_instance.py", "schedule_1x.py", "default_runtime.py"] diff --git a/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco_v280.py b/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco_v280.py deleted file mode 100644 index c2c341585..000000000 --- a/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco_v280.py +++ /dev/null @@ -1 +0,0 @@ -_base_ = ["cascade_mask_rcnn_r50_fpn_v280.py", "coco_instance.py", "schedule_1x.py", "default_runtime.py"] diff --git a/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_v280.py b/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_v280.py deleted file mode 100644 index 70f4afd21..000000000 --- a/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_v280.py +++ /dev/null @@ -1,215 +0,0 @@ -# model settings -model = dict( - type="CascadeRCNN", - pretrained="torchvision://resnet50", - backbone=dict( - type="ResNet", - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type="BN", requires_grad=True), - norm_eval=True, - style="pytorch", - ), - neck=dict(type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5), - rpn_head=dict( - type="RPNHead", - in_channels=256, - feat_channels=256, - anchor_generator=dict( - type="AnchorGenerator", - scales=[8], - ratios=[0.5, 1.0, 2.0], - strides=[4, 8, 16, 32, 64], - ), - bbox_coder=dict( - type="DeltaXYWHBBoxCoder", - target_means=[0.0, 0.0, 0.0, 0.0], - target_stds=[1.0, 1.0, 1.0, 1.0], - ), - loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0), - loss_bbox=dict(type="SmoothL1Loss", beta=1.0 / 9.0, loss_weight=1.0), - ), - roi_head=dict( - type="CascadeRoIHead", - num_stages=3, - stage_loss_weights=[1, 0.5, 0.25], - bbox_roi_extractor=dict( - type="SingleRoIExtractor", - roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0), - out_channels=256, - featmap_strides=[4, 8, 16, 32], - ), - bbox_head=[ - dict( - type="Shared2FCBBoxHead", - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=80, - bbox_coder=dict( - type="DeltaXYWHBBoxCoder", - target_means=[0.0, 0.0, 0.0, 0.0], - target_stds=[0.1, 0.1, 0.2, 0.2], - ), - reg_class_agnostic=True, - loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type="SmoothL1Loss", beta=1.0, loss_weight=1.0), - ), - dict( - type="Shared2FCBBoxHead", - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=80, - bbox_coder=dict( - type="DeltaXYWHBBoxCoder", - target_means=[0.0, 0.0, 0.0, 0.0], - target_stds=[0.05, 0.05, 0.1, 0.1], - ), - reg_class_agnostic=True, - loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type="SmoothL1Loss", beta=1.0, loss_weight=1.0), - ), - dict( - type="Shared2FCBBoxHead", - in_channels=256, - fc_out_channels=1024, - roi_feat_size=7, - num_classes=80, - bbox_coder=dict( - type="DeltaXYWHBBoxCoder", - target_means=[0.0, 0.0, 0.0, 0.0], - target_stds=[0.033, 0.033, 0.067, 0.067], - ), - reg_class_agnostic=True, - loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), - loss_bbox=dict(type="SmoothL1Loss", beta=1.0, loss_weight=1.0), - ), - ], - mask_roi_extractor=dict( - type="SingleRoIExtractor", - roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0), - out_channels=256, - featmap_strides=[4, 8, 16, 32], - ), - mask_head=dict( - type="FCNMaskHead", - num_convs=4, - in_channels=256, - conv_out_channels=256, - num_classes=80, - loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0), - ), - ), -) -# model training and testing settings -train_cfg = dict( - rpn=dict( - assigner=dict( - type="MaxIoUAssigner", - pos_iou_thr=0.7, - neg_iou_thr=0.3, - min_pos_iou=0.3, - match_low_quality=True, - ignore_iof_thr=-1, - ), - sampler=dict( - type="RandomSampler", - num=256, - pos_fraction=0.5, - neg_pos_ub=-1, - add_gt_as_proposals=False, - ), - allowed_border=0, - pos_weight=-1, - debug=False, - ), - rpn_proposal=dict( - nms_across_levels=False, - nms_pre=2000, - nms_post=2000, - max_num=2000, - nms_thr=0.7, - min_bbox_size=0, - ), - rcnn=[ - dict( - assigner=dict( - type="MaxIoUAssigner", - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0.5, - match_low_quality=False, - ignore_iof_thr=-1, - ), - sampler=dict( - type="RandomSampler", - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True, - ), - mask_size=28, - pos_weight=-1, - debug=False, - ), - dict( - assigner=dict( - type="MaxIoUAssigner", - pos_iou_thr=0.6, - neg_iou_thr=0.6, - min_pos_iou=0.6, - match_low_quality=False, - ignore_iof_thr=-1, - ), - sampler=dict( - type="RandomSampler", - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True, - ), - mask_size=28, - pos_weight=-1, - debug=False, - ), - dict( - assigner=dict( - type="MaxIoUAssigner", - pos_iou_thr=0.7, - neg_iou_thr=0.7, - min_pos_iou=0.7, - match_low_quality=False, - ignore_iof_thr=-1, - ), - sampler=dict( - type="RandomSampler", - num=512, - pos_fraction=0.25, - neg_pos_ub=-1, - add_gt_as_proposals=True, - ), - mask_size=28, - pos_weight=-1, - debug=False, - ), - ], -) -test_cfg = dict( - rpn=dict( - nms_across_levels=False, - nms_pre=1000, - nms_post=1000, - max_num=1000, - nms_thr=0.7, - min_bbox_size=0, - ), - rcnn=dict( - score_thr=0.05, - nms=dict(type="nms", iou_threshold=0.5), - max_per_img=100, - mask_thr_binary=0.5, - ), -) diff --git a/tests/data/models/mmdet_cascade_mask_rcnn/coco_instance.py b/tests/data/models/mmdet_cascade_mask_rcnn/coco_instance.py deleted file mode 100644 index 1661d18cd..000000000 --- a/tests/data/models/mmdet_cascade_mask_rcnn/coco_instance.py +++ /dev/null @@ -1,52 +0,0 @@ -dataset_type = "CocoDataset" -data_root = "data/coco/" -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type="LoadImageFromFile"), - dict(type="LoadAnnotations", with_bbox=True, with_mask=True), - dict(type="Resize", img_scale=(1333, 800), keep_ratio=True), - dict(type="RandomFlip", flip_ratio=0.5), - dict(type="Normalize", **img_norm_cfg), - dict(type="Pad", size_divisor=32), - dict(type="DefaultFormatBundle"), - dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels", "gt_masks"]), -] -test_pipeline = [ - dict(type="LoadImageFromFile"), - dict( - type="MultiScaleFlipAug", - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type="Resize", keep_ratio=True), - dict(type="RandomFlip"), - dict(type="Normalize", **img_norm_cfg), - dict(type="Pad", size_divisor=32), - dict(type="DefaultFormatBundle"), - dict(type="Collect", keys=["img"]), - ], - ), -] -data = dict( - samples_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + "annotations/instances_train2017.json", - img_prefix=data_root + "train2017/", - pipeline=train_pipeline, - ), - val=dict( - type=dataset_type, - ann_file=data_root + "annotations/instances_val2017.json", - img_prefix=data_root + "val2017/", - pipeline=test_pipeline, - ), - test=dict( - type=dataset_type, - ann_file=data_root + "annotations/instances_val2017.json", - img_prefix=data_root + "val2017/", - pipeline=test_pipeline, - ), -) -evaluation = dict(metric=["bbox", "segm"]) diff --git a/tests/data/models/mmdet_cascade_mask_rcnn/default_runtime.py b/tests/data/models/mmdet_cascade_mask_rcnn/default_runtime.py deleted file mode 100644 index 75ee67b9f..000000000 --- a/tests/data/models/mmdet_cascade_mask_rcnn/default_runtime.py +++ /dev/null @@ -1,15 +0,0 @@ -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type="TextLoggerHook"), - # dict(type='TensorboardLoggerHook') - ], -) -# yapf:enable -dist_params = dict(backend="nccl") -log_level = "INFO" -load_from = None -resume_from = None -workflow = [("train", 1)] diff --git a/tests/data/models/mmdet_cascade_mask_rcnn/schedule_1x.py b/tests/data/models/mmdet_cascade_mask_rcnn/schedule_1x.py deleted file mode 100644 index cc7fa00a8..000000000 --- a/tests/data/models/mmdet_cascade_mask_rcnn/schedule_1x.py +++ /dev/null @@ -1,6 +0,0 @@ -# optimizer -optimizer = dict(type="SGD", lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=None) -# learning policy -lr_config = dict(policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.001, step=[8, 11]) -total_epochs = 12 diff --git a/tests/data/models/mmdet_retinanet/coco_detection.py b/tests/data/models/mmdet_retinanet/coco_detection.py deleted file mode 100644 index 67dbc3efb..000000000 --- a/tests/data/models/mmdet_retinanet/coco_detection.py +++ /dev/null @@ -1,52 +0,0 @@ -dataset_type = "CocoDataset" -data_root = "data/coco/" -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type="LoadImageFromFile"), - dict(type="LoadAnnotations", with_bbox=True), - dict(type="Resize", img_scale=(1333, 800), keep_ratio=True), - dict(type="RandomFlip", flip_ratio=0.5), - dict(type="Normalize", **img_norm_cfg), - dict(type="Pad", size_divisor=32), - dict(type="DefaultFormatBundle"), - dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]), -] -test_pipeline = [ - dict(type="LoadImageFromFile"), - dict( - type="MultiScaleFlipAug", - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type="Resize", keep_ratio=True), - dict(type="RandomFlip"), - dict(type="Normalize", **img_norm_cfg), - dict(type="Pad", size_divisor=32), - dict(type="DefaultFormatBundle"), - dict(type="Collect", keys=["img"]), - ], - ), -] -data = dict( - samples_per_gpu=2, - workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=data_root + "annotations/instances_train2017.json", - img_prefix=data_root + "train2017/", - pipeline=train_pipeline, - ), - val=dict( - type=dataset_type, - ann_file=data_root + "annotations/instances_val2017.json", - img_prefix=data_root + "val2017/", - pipeline=test_pipeline, - ), - test=dict( - type=dataset_type, - ann_file=data_root + "annotations/instances_val2017.json", - img_prefix=data_root + "val2017/", - pipeline=test_pipeline, - ), -) -evaluation = dict(interval=1, metric="bbox") diff --git a/tests/data/models/mmdet_retinanet/default_runtime.py b/tests/data/models/mmdet_retinanet/default_runtime.py deleted file mode 100644 index 75ee67b9f..000000000 --- a/tests/data/models/mmdet_retinanet/default_runtime.py +++ /dev/null @@ -1,15 +0,0 @@ -checkpoint_config = dict(interval=1) -# yapf:disable -log_config = dict( - interval=50, - hooks=[ - dict(type="TextLoggerHook"), - # dict(type='TensorboardLoggerHook') - ], -) -# yapf:enable -dist_params = dict(backend="nccl") -log_level = "INFO" -load_from = None -resume_from = None -workflow = [("train", 1)] diff --git a/tests/data/models/mmdet_retinanet/retinanet_r50_fpn.py b/tests/data/models/mmdet_retinanet/retinanet_r50_fpn.py deleted file mode 100644 index 4f4f60db2..000000000 --- a/tests/data/models/mmdet_retinanet/retinanet_r50_fpn.py +++ /dev/null @@ -1,50 +0,0 @@ -# model settings -model = dict( - type="RetinaNet", - pretrained="torchvision://resnet50", - backbone=dict( - type="ResNet", - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type="BN", requires_grad=True), - norm_eval=True, - style="pytorch", - ), - neck=dict( - type="FPN", - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs="on_input", - num_outs=5, - ), - bbox_head=dict( - type="RetinaHead", - num_classes=80, - in_channels=256, - stacked_convs=4, - feat_channels=256, - anchor_generator=dict( - type="AnchorGenerator", - octave_base_scale=4, - scales_per_octave=3, - ratios=[0.5, 1.0, 2.0], - strides=[8, 16, 32, 64, 128], - ), - bbox_coder=dict(type="DeltaXYWHBBoxCoder", target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[1.0, 1.0, 1.0, 1.0]), - loss_cls=dict(type="FocalLoss", use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), - loss_bbox=dict(type="L1Loss", loss_weight=1.0), - ), - # training and testing settings - train_cfg=dict( - assigner=dict(type="MaxIoUAssigner", pos_iou_thr=0.5, neg_iou_thr=0.4, min_pos_iou=0, ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False, - ), - test_cfg=dict( - nms_pre=1000, min_bbox_size=0, score_thr=0.05, nms=dict(type="nms", iou_threshold=0.5), max_per_img=100 - ), -) diff --git a/tests/data/models/mmdet_retinanet/retinanet_r50_fpn_1x_coco.py b/tests/data/models/mmdet_retinanet/retinanet_r50_fpn_1x_coco.py deleted file mode 100644 index 75ffb614b..000000000 --- a/tests/data/models/mmdet_retinanet/retinanet_r50_fpn_1x_coco.py +++ /dev/null @@ -1,3 +0,0 @@ -_base_ = ["retinanet_r50_fpn.py", "coco_detection.py", "schedule_1x.py", "default_runtime.py"] -# optimizer -optimizer = dict(type="SGD", lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/tests/data/models/mmdet_retinanet/retinanet_r50_fpn_1x_coco_v280.py b/tests/data/models/mmdet_retinanet/retinanet_r50_fpn_1x_coco_v280.py deleted file mode 100644 index fdf669a8f..000000000 --- a/tests/data/models/mmdet_retinanet/retinanet_r50_fpn_1x_coco_v280.py +++ /dev/null @@ -1,3 +0,0 @@ -_base_ = ["retinanet_r50_fpn_v280.py", "coco_detection.py", "schedule_1x.py", "default_runtime.py"] -# optimizer -optimizer = dict(type="SGD", lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/tests/data/models/mmdet_retinanet/retinanet_r50_fpn_v280.py b/tests/data/models/mmdet_retinanet/retinanet_r50_fpn_v280.py deleted file mode 100644 index cefd366d0..000000000 --- a/tests/data/models/mmdet_retinanet/retinanet_r50_fpn_v280.py +++ /dev/null @@ -1,48 +0,0 @@ -# model settings -model = dict( - type="RetinaNet", - pretrained="torchvision://resnet50", - backbone=dict( - type="ResNet", - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type="BN", requires_grad=True), - norm_eval=True, - style="pytorch", - ), - neck=dict( - type="FPN", - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs="on_input", - num_outs=5, - ), - bbox_head=dict( - type="RetinaHead", - num_classes=80, - in_channels=256, - stacked_convs=4, - feat_channels=256, - anchor_generator=dict( - type="AnchorGenerator", - octave_base_scale=4, - scales_per_octave=3, - ratios=[0.5, 1.0, 2.0], - strides=[8, 16, 32, 64, 128], - ), - bbox_coder=dict(type="DeltaXYWHBBoxCoder", target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[1.0, 1.0, 1.0, 1.0]), - loss_cls=dict(type="FocalLoss", use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), - loss_bbox=dict(type="L1Loss", loss_weight=1.0), - ), -) -# training and testing settings -train_cfg = dict( - assigner=dict(type="MaxIoUAssigner", pos_iou_thr=0.5, neg_iou_thr=0.4, min_pos_iou=0, ignore_iof_thr=-1), - allowed_border=-1, - pos_weight=-1, - debug=False, -) -test_cfg = dict(nms_pre=1000, min_bbox_size=0, score_thr=0.05, nms=dict(type="nms", iou_threshold=0.5), max_per_img=100) diff --git a/tests/data/models/mmdet_retinanet/schedule_1x.py b/tests/data/models/mmdet_retinanet/schedule_1x.py deleted file mode 100644 index cc7fa00a8..000000000 --- a/tests/data/models/mmdet_retinanet/schedule_1x.py +++ /dev/null @@ -1,6 +0,0 @@ -# optimizer -optimizer = dict(type="SGD", lr=0.02, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict(grad_clip=None) -# learning policy -lr_config = dict(policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.001, step=[8, 11]) -total_epochs = 12 diff --git a/tests/data/models/mmdet_yolox/yolox_tiny_8x8_300e_coco.py b/tests/data/models/mmdet_yolox/yolox_tiny_8x8_300e_coco.py deleted file mode 100644 index b7040cb1f..000000000 --- a/tests/data/models/mmdet_yolox/yolox_tiny_8x8_300e_coco.py +++ /dev/null @@ -1,163 +0,0 @@ -optimizer = dict( - type="SGD", - lr=0.01, - momentum=0.9, - weight_decay=0.0005, - nesterov=True, - paramwise_cfg=dict(norm_decay_mult=0.0, bias_decay_mult=0.0), -) -optimizer_config = dict(grad_clip=None) -lr_config = dict( - policy="YOLOX", - warmup="exp", - by_epoch=False, - warmup_by_epoch=True, - warmup_ratio=1, - warmup_iters=5, - num_last_epochs=15, - min_lr_ratio=0.05, -) -runner = dict(type="EpochBasedRunner", max_epochs=300) -checkpoint_config = dict(interval=10) -log_config = dict(interval=50, hooks=[dict(type="TextLoggerHook")]) -custom_hooks = [ - dict(type="YOLOXModeSwitchHook", num_last_epochs=15, priority=48), - dict(type="SyncNormHook", num_last_epochs=15, interval=10, priority=48), - dict(type="ExpMomentumEMAHook", resume_from=None, momentum=0.0001, priority=49), -] -dist_params = dict(backend="nccl") -log_level = "INFO" -load_from = None -resume_from = None -workflow = [("train", 1)] -img_scale = (640, 640) -model = dict( - type="YOLOX", - input_size=(640, 640), - random_size_range=(10, 20), - random_size_interval=10, - backbone=dict(type="CSPDarknet", deepen_factor=0.33, widen_factor=0.375), - neck=dict(type="YOLOXPAFPN", in_channels=[96, 192, 384], out_channels=96, num_csp_blocks=1), - bbox_head=dict(type="YOLOXHead", num_classes=80, in_channels=96, feat_channels=96), - train_cfg=dict(assigner=dict(type="SimOTAAssigner", center_radius=2.5)), - test_cfg=dict(score_thr=0.01, nms=dict(type="nms", iou_threshold=0.65)), -) -data_root = "data/coco/" -dataset_type = "CocoDataset" -train_pipeline = [ - dict(type="Mosaic", img_scale=(640, 640), pad_val=114.0), - dict(type="RandomAffine", scaling_ratio_range=(0.5, 1.5), border=(-320, -320)), - dict(type="YOLOXHSVRandomAug"), - dict(type="RandomFlip", flip_ratio=0.5), - dict(type="Resize", img_scale=(640, 640), keep_ratio=True), - dict(type="Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), - dict(type="FilterAnnotations", min_gt_bbox_wh=(1, 1), keep_empty=False), - dict(type="DefaultFormatBundle"), - dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]), -] -train_dataset = dict( - type="MultiImageMixDataset", - dataset=dict( - type="CocoDataset", - ann_file="data/coco/annotations/instances_train2017.json", - img_prefix="data/coco/train2017/", - pipeline=[dict(type="LoadImageFromFile"), dict(type="LoadAnnotations", with_bbox=True)], - filter_empty_gt=False, - ), - pipeline=[ - dict(type="Mosaic", img_scale=(640, 640), pad_val=114.0), - dict(type="RandomAffine", scaling_ratio_range=(0.5, 1.5), border=(-320, -320)), - dict(type="YOLOXHSVRandomAug"), - dict(type="RandomFlip", flip_ratio=0.5), - dict(type="Resize", img_scale=(640, 640), keep_ratio=True), - dict(type="Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), - dict(type="FilterAnnotations", min_gt_bbox_wh=(1, 1), keep_empty=False), - dict(type="DefaultFormatBundle"), - dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]), - ], -) -test_pipeline = [ - dict(type="LoadImageFromFile"), - dict( - type="MultiScaleFlipAug", - img_scale=(416, 416), - flip=False, - transforms=[ - dict(type="Resize", keep_ratio=True), - dict(type="RandomFlip"), - dict(type="Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), - dict(type="DefaultFormatBundle"), - dict(type="Collect", keys=["img"]), - ], - ), -] -data = dict( - samples_per_gpu=8, - workers_per_gpu=4, - persistent_workers=True, - train=dict( - type="MultiImageMixDataset", - dataset=dict( - type="CocoDataset", - ann_file="data/coco/annotations/instances_train2017.json", - img_prefix="data/coco/train2017/", - pipeline=[dict(type="LoadImageFromFile"), dict(type="LoadAnnotations", with_bbox=True)], - filter_empty_gt=False, - ), - pipeline=[ - dict(type="Mosaic", img_scale=(640, 640), pad_val=114.0), - dict(type="RandomAffine", scaling_ratio_range=(0.5, 1.5), border=(-320, -320)), - dict(type="YOLOXHSVRandomAug"), - dict(type="RandomFlip", flip_ratio=0.5), - dict(type="Resize", img_scale=(640, 640), keep_ratio=True), - dict(type="Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), - dict(type="FilterAnnotations", min_gt_bbox_wh=(1, 1), keep_empty=False), - dict(type="DefaultFormatBundle"), - dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]), - ], - ), - val=dict( - type="CocoDataset", - ann_file="data/coco/annotations/instances_val2017.json", - img_prefix="data/coco/val2017/", - pipeline=[ - dict(type="LoadImageFromFile"), - dict( - type="MultiScaleFlipAug", - img_scale=(416, 416), - flip=False, - transforms=[ - dict(type="Resize", keep_ratio=True), - dict(type="RandomFlip"), - dict(type="Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), - dict(type="DefaultFormatBundle"), - dict(type="Collect", keys=["img"]), - ], - ), - ], - ), - test=dict( - type="CocoDataset", - ann_file="data/coco/annotations/instances_val2017.json", - img_prefix="data/coco/val2017/", - pipeline=[ - dict(type="LoadImageFromFile"), - dict( - type="MultiScaleFlipAug", - img_scale=(416, 416), - flip=False, - transforms=[ - dict(type="Resize", keep_ratio=True), - dict(type="RandomFlip"), - dict(type="Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), - dict(type="DefaultFormatBundle"), - dict(type="Collect", keys=["img"]), - ], - ), - ], - ), -) -max_epochs = 300 -num_last_epochs = 15 -interval = 10 -evaluation = dict(save_best="auto", interval=10, dynamic_intervals=[(285, 1)], metric="bbox") From 0156fb9409cf96cdaec25a35c3c2975a96a115f8 Mon Sep 17 00:00:00 2001 From: Hongyuan Zhang <66273343+Alias-z@users.noreply.github.com> Date: Tue, 4 Jun 2024 22:46:15 +0200 Subject: [PATCH 6/8] Revert "update mmdet test configs" This reverts commit 68ad269e2354120c07b040ff714cfcf9563e087e. --- .../cascade-mask-rcnn_r50_fpn_1x_coco.py | 478 +-------------- .../retinanet/retinanet_r50_fpn_1x_coco.py | 379 +----------- .../models/mmdet/retinanet/retinanet_tta.py | 28 + .../mmdet/yolox/yolox_s_8xb8-300e_coco.py | 219 +++++++ .../mmdet/yolox/yolox_tiny_8x8_300e_coco.py | 548 ------------------ .../mmdet/yolox/yolox_tiny_8xb8-300e_coco.py | 43 ++ tests/data/models/mmdet/yolox/yolox_tta.py | 37 ++ .../cascade-mask-rcnn_r50_fpn.py | 167 ++++++ .../cascade-mask-rcnn_r50_fpn_1x_coco.py | 6 + .../cascade_mask_rcnn_r50_fpn.py | 215 +++++++ .../cascade_mask_rcnn_r50_fpn_1x_coco.py | 1 + .../cascade_mask_rcnn_r50_fpn_1x_coco_v280.py | 1 + .../cascade_mask_rcnn_r50_fpn_v280.py | 215 +++++++ .../mmdet_cascade_mask_rcnn/coco_instance.py | 52 ++ .../default_runtime.py | 15 + .../mmdet_cascade_mask_rcnn/schedule_1x.py | 6 + .../models/mmdet_retinanet/coco_detection.py | 52 ++ .../models/mmdet_retinanet/default_runtime.py | 15 + .../mmdet_retinanet/retinanet_r50_fpn.py | 50 ++ .../retinanet_r50_fpn_1x_coco.py | 3 + .../retinanet_r50_fpn_1x_coco_v280.py | 3 + .../mmdet_retinanet/retinanet_r50_fpn_v280.py | 48 ++ .../models/mmdet_retinanet/schedule_1x.py | 6 + .../mmdet_yolox/yolox_tiny_8x8_300e_coco.py | 163 ++++++ 24 files changed, 1360 insertions(+), 1390 deletions(-) create mode 100644 tests/data/models/mmdet/retinanet/retinanet_tta.py create mode 100644 tests/data/models/mmdet/yolox/yolox_s_8xb8-300e_coco.py delete mode 100644 tests/data/models/mmdet/yolox/yolox_tiny_8x8_300e_coco.py create mode 100644 tests/data/models/mmdet/yolox/yolox_tiny_8xb8-300e_coco.py create mode 100644 tests/data/models/mmdet/yolox/yolox_tta.py create mode 100644 tests/data/models/mmdet_cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn.py create mode 100644 tests/data/models/mmdet_cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn_1x_coco.py create mode 100644 tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn.py create mode 100644 tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py create mode 100644 tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco_v280.py create mode 100644 tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_v280.py create mode 100644 tests/data/models/mmdet_cascade_mask_rcnn/coco_instance.py create mode 100644 tests/data/models/mmdet_cascade_mask_rcnn/default_runtime.py create mode 100644 tests/data/models/mmdet_cascade_mask_rcnn/schedule_1x.py create mode 100644 tests/data/models/mmdet_retinanet/coco_detection.py create mode 100644 tests/data/models/mmdet_retinanet/default_runtime.py create mode 100644 tests/data/models/mmdet_retinanet/retinanet_r50_fpn.py create mode 100644 tests/data/models/mmdet_retinanet/retinanet_r50_fpn_1x_coco.py create mode 100644 tests/data/models/mmdet_retinanet/retinanet_r50_fpn_1x_coco_v280.py create mode 100644 tests/data/models/mmdet_retinanet/retinanet_r50_fpn_v280.py create mode 100644 tests/data/models/mmdet_retinanet/schedule_1x.py create mode 100644 tests/data/models/mmdet_yolox/yolox_tiny_8x8_300e_coco.py diff --git a/tests/data/models/mmdet/cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn_1x_coco.py b/tests/data/models/mmdet/cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn_1x_coco.py index c06d9fc57..77b3ebac4 100644 --- a/tests/data/models/mmdet/cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn_1x_coco.py +++ b/tests/data/models/mmdet/cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn_1x_coco.py @@ -1,474 +1,6 @@ -auto_scale_lr = dict(base_batch_size=16, enable=False) -backend_args = None -data_root = "data/coco/" -dataset_type = "CocoDataset" -default_hooks = dict( - checkpoint=dict(interval=1, type="CheckpointHook"), - logger=dict(interval=50, type="LoggerHook"), - param_scheduler=dict(type="ParamSchedulerHook"), - sampler_seed=dict(type="DistSamplerSeedHook"), - timer=dict(type="IterTimerHook"), - visualization=dict(type="DetVisualizationHook"), -) -default_scope = "mmdet" -env_cfg = dict( - cudnn_benchmark=False, dist_cfg=dict(backend="nccl"), mp_cfg=dict(mp_start_method="fork", opencv_num_threads=0) -) -load_from = None -log_level = "INFO" -log_processor = dict(by_epoch=True, type="LogProcessor", window_size=50) -model = dict( - backbone=dict( - depth=50, - frozen_stages=1, - init_cfg=dict(checkpoint="torchvision://resnet50", type="Pretrained"), - norm_cfg=dict(requires_grad=True, type="BN"), - norm_eval=True, - num_stages=4, - out_indices=( - 0, - 1, - 2, - 3, - ), - style="pytorch", - type="ResNet", - ), - data_preprocessor=dict( - bgr_to_rgb=True, - mean=[ - 123.675, - 116.28, - 103.53, - ], - pad_mask=True, - pad_size_divisor=32, - std=[ - 58.395, - 57.12, - 57.375, - ], - type="DetDataPreprocessor", - ), - neck=dict( - in_channels=[ - 256, - 512, - 1024, - 2048, - ], - num_outs=5, - out_channels=256, - type="FPN", - ), - roi_head=dict( - bbox_head=[ - dict( - bbox_coder=dict( - target_means=[ - 0.0, - 0.0, - 0.0, - 0.0, - ], - target_stds=[ - 0.1, - 0.1, - 0.2, - 0.2, - ], - type="DeltaXYWHBBoxCoder", - ), - fc_out_channels=1024, - in_channels=256, - loss_bbox=dict(beta=1.0, loss_weight=1.0, type="SmoothL1Loss"), - loss_cls=dict(loss_weight=1.0, type="CrossEntropyLoss", use_sigmoid=False), - num_classes=80, - reg_class_agnostic=True, - roi_feat_size=7, - type="Shared2FCBBoxHead", - ), - dict( - bbox_coder=dict( - target_means=[ - 0.0, - 0.0, - 0.0, - 0.0, - ], - target_stds=[ - 0.05, - 0.05, - 0.1, - 0.1, - ], - type="DeltaXYWHBBoxCoder", - ), - fc_out_channels=1024, - in_channels=256, - loss_bbox=dict(beta=1.0, loss_weight=1.0, type="SmoothL1Loss"), - loss_cls=dict(loss_weight=1.0, type="CrossEntropyLoss", use_sigmoid=False), - num_classes=80, - reg_class_agnostic=True, - roi_feat_size=7, - type="Shared2FCBBoxHead", - ), - dict( - bbox_coder=dict( - target_means=[ - 0.0, - 0.0, - 0.0, - 0.0, - ], - target_stds=[ - 0.033, - 0.033, - 0.067, - 0.067, - ], - type="DeltaXYWHBBoxCoder", - ), - fc_out_channels=1024, - in_channels=256, - loss_bbox=dict(beta=1.0, loss_weight=1.0, type="SmoothL1Loss"), - loss_cls=dict(loss_weight=1.0, type="CrossEntropyLoss", use_sigmoid=False), - num_classes=80, - reg_class_agnostic=True, - roi_feat_size=7, - type="Shared2FCBBoxHead", - ), - ], - bbox_roi_extractor=dict( - featmap_strides=[ - 4, - 8, - 16, - 32, - ], - out_channels=256, - roi_layer=dict(output_size=7, sampling_ratio=0, type="RoIAlign"), - type="SingleRoIExtractor", - ), - mask_head=dict( - conv_out_channels=256, - in_channels=256, - loss_mask=dict(loss_weight=1.0, type="CrossEntropyLoss", use_mask=True), - num_classes=80, - num_convs=4, - type="FCNMaskHead", - ), - mask_roi_extractor=dict( - featmap_strides=[ - 4, - 8, - 16, - 32, - ], - out_channels=256, - roi_layer=dict(output_size=14, sampling_ratio=0, type="RoIAlign"), - type="SingleRoIExtractor", - ), - num_stages=3, - stage_loss_weights=[ - 1, - 0.5, - 0.25, - ], - type="CascadeRoIHead", - ), - rpn_head=dict( - anchor_generator=dict( - ratios=[ - 0.5, - 1.0, - 2.0, - ], - scales=[ - 8, - ], - strides=[ - 4, - 8, - 16, - 32, - 64, - ], - type="AnchorGenerator", - ), - bbox_coder=dict( - target_means=[ - 0.0, - 0.0, - 0.0, - 0.0, - ], - target_stds=[ - 1.0, - 1.0, - 1.0, - 1.0, - ], - type="DeltaXYWHBBoxCoder", - ), - feat_channels=256, - in_channels=256, - loss_bbox=dict(beta=0.1111111111111111, loss_weight=1.0, type="SmoothL1Loss"), - loss_cls=dict(loss_weight=1.0, type="CrossEntropyLoss", use_sigmoid=True), - type="RPNHead", - ), - test_cfg=dict( - rcnn=dict(mask_thr_binary=0.5, max_per_img=100, nms=dict(iou_threshold=0.5, type="nms"), score_thr=0.05), - rpn=dict(max_per_img=1000, min_bbox_size=0, nms=dict(iou_threshold=0.7, type="nms"), nms_pre=1000), - ), - train_cfg=dict( - rcnn=[ - dict( - assigner=dict( - ignore_iof_thr=-1, - match_low_quality=False, - min_pos_iou=0.5, - neg_iou_thr=0.5, - pos_iou_thr=0.5, - type="MaxIoUAssigner", - ), - debug=False, - mask_size=28, - pos_weight=-1, - sampler=dict(add_gt_as_proposals=True, neg_pos_ub=-1, num=512, pos_fraction=0.25, type="RandomSampler"), - ), - dict( - assigner=dict( - ignore_iof_thr=-1, - match_low_quality=False, - min_pos_iou=0.6, - neg_iou_thr=0.6, - pos_iou_thr=0.6, - type="MaxIoUAssigner", - ), - debug=False, - mask_size=28, - pos_weight=-1, - sampler=dict(add_gt_as_proposals=True, neg_pos_ub=-1, num=512, pos_fraction=0.25, type="RandomSampler"), - ), - dict( - assigner=dict( - ignore_iof_thr=-1, - match_low_quality=False, - min_pos_iou=0.7, - neg_iou_thr=0.7, - pos_iou_thr=0.7, - type="MaxIoUAssigner", - ), - debug=False, - mask_size=28, - pos_weight=-1, - sampler=dict(add_gt_as_proposals=True, neg_pos_ub=-1, num=512, pos_fraction=0.25, type="RandomSampler"), - ), - ], - rpn=dict( - allowed_border=0, - assigner=dict( - ignore_iof_thr=-1, - match_low_quality=True, - min_pos_iou=0.3, - neg_iou_thr=0.3, - pos_iou_thr=0.7, - type="MaxIoUAssigner", - ), - debug=False, - pos_weight=-1, - sampler=dict(add_gt_as_proposals=False, neg_pos_ub=-1, num=256, pos_fraction=0.5, type="RandomSampler"), - ), - rpn_proposal=dict(max_per_img=2000, min_bbox_size=0, nms=dict(iou_threshold=0.7, type="nms"), nms_pre=2000), - ), - type="CascadeRCNN", -) -optim_wrapper = dict(optimizer=dict(lr=0.02, momentum=0.9, type="SGD", weight_decay=0.0001), type="OptimWrapper") -param_scheduler = [ - dict(begin=0, by_epoch=False, end=500, start_factor=0.001, type="LinearLR"), - dict( - begin=0, - by_epoch=True, - end=12, - gamma=0.1, - milestones=[ - 8, - 11, - ], - type="MultiStepLR", - ), +_base_ = [ + "../_base_/models/cascade-mask-rcnn_r50_fpn.py", + "../_base_/datasets/coco_instance.py", + "../_base_/schedules/schedule_1x.py", + "../_base_/default_runtime.py", ] -resume = False -test_cfg = dict(type="TestLoop") -test_dataloader = dict( - batch_size=1, - dataset=dict( - ann_file="annotations/instances_val2017.json", - backend_args=None, - data_prefix=dict(img="val2017/"), - data_root="data/coco/", - pipeline=[ - dict(backend_args=None, type="LoadImageFromFile"), - dict( - keep_ratio=True, - scale=( - 1333, - 800, - ), - type="Resize", - ), - dict(type="LoadAnnotations", with_bbox=True, with_mask=True), - dict( - meta_keys=( - "img_id", - "img_path", - "ori_shape", - "img_shape", - "scale_factor", - ), - type="PackDetInputs", - ), - ], - test_mode=True, - type="CocoDataset", - ), - drop_last=False, - num_workers=2, - persistent_workers=True, - sampler=dict(shuffle=False, type="DefaultSampler"), -) -test_evaluator = dict( - ann_file="data/coco/annotations/instances_val2017.json", - backend_args=None, - format_only=False, - metric=[ - "bbox", - "segm", - ], - type="CocoMetric", -) -test_pipeline = [ - dict(backend_args=None, type="LoadImageFromFile"), - dict( - keep_ratio=True, - scale=( - 1333, - 800, - ), - type="Resize", - ), - dict(type="LoadAnnotations", with_bbox=True, with_mask=True), - dict( - meta_keys=( - "img_id", - "img_path", - "ori_shape", - "img_shape", - "scale_factor", - ), - type="PackDetInputs", - ), -] -train_cfg = dict(max_epochs=12, type="EpochBasedTrainLoop", val_interval=1) -train_dataloader = dict( - batch_sampler=dict(type="AspectRatioBatchSampler"), - batch_size=2, - dataset=dict( - ann_file="annotations/instances_train2017.json", - backend_args=None, - data_prefix=dict(img="train2017/"), - data_root="data/coco/", - filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=[ - dict(backend_args=None, type="LoadImageFromFile"), - dict(type="LoadAnnotations", with_bbox=True, with_mask=True), - dict( - keep_ratio=True, - scale=( - 1333, - 800, - ), - type="Resize", - ), - dict(prob=0.5, type="RandomFlip"), - dict(type="PackDetInputs"), - ], - type="CocoDataset", - ), - num_workers=2, - persistent_workers=True, - sampler=dict(shuffle=True, type="DefaultSampler"), -) -train_pipeline = [ - dict(backend_args=None, type="LoadImageFromFile"), - dict(type="LoadAnnotations", with_bbox=True, with_mask=True), - dict( - keep_ratio=True, - scale=( - 1333, - 800, - ), - type="Resize", - ), - dict(prob=0.5, type="RandomFlip"), - dict(type="PackDetInputs"), -] -val_cfg = dict(type="ValLoop") -val_dataloader = dict( - batch_size=1, - dataset=dict( - ann_file="annotations/instances_val2017.json", - backend_args=None, - data_prefix=dict(img="val2017/"), - data_root="data/coco/", - pipeline=[ - dict(backend_args=None, type="LoadImageFromFile"), - dict( - keep_ratio=True, - scale=( - 1333, - 800, - ), - type="Resize", - ), - dict(type="LoadAnnotations", with_bbox=True, with_mask=True), - dict( - meta_keys=( - "img_id", - "img_path", - "ori_shape", - "img_shape", - "scale_factor", - ), - type="PackDetInputs", - ), - ], - test_mode=True, - type="CocoDataset", - ), - drop_last=False, - num_workers=2, - persistent_workers=True, - sampler=dict(shuffle=False, type="DefaultSampler"), -) -val_evaluator = dict( - ann_file="data/coco/annotations/instances_val2017.json", - backend_args=None, - format_only=False, - metric=[ - "bbox", - "segm", - ], - type="CocoMetric", -) -vis_backends = [ - dict(type="LocalVisBackend"), -] -visualizer = dict( - name="visualizer", - type="DetLocalVisualizer", - vis_backends=[ - dict(type="LocalVisBackend"), - ], -) diff --git a/tests/data/models/mmdet/retinanet/retinanet_r50_fpn_1x_coco.py b/tests/data/models/mmdet/retinanet/retinanet_r50_fpn_1x_coco.py index c39c3e707..f3264da93 100644 --- a/tests/data/models/mmdet/retinanet/retinanet_r50_fpn_1x_coco.py +++ b/tests/data/models/mmdet/retinanet/retinanet_r50_fpn_1x_coco.py @@ -1,369 +1,10 @@ -auto_scale_lr = dict(base_batch_size=16, enable=False) -backend_args = None -data_root = "data/coco/" -dataset_type = "CocoDataset" -default_hooks = dict( - checkpoint=dict(interval=1, type="CheckpointHook"), - logger=dict(interval=50, type="LoggerHook"), - param_scheduler=dict(type="ParamSchedulerHook"), - sampler_seed=dict(type="DistSamplerSeedHook"), - timer=dict(type="IterTimerHook"), - visualization=dict(type="DetVisualizationHook"), -) -default_scope = "mmdet" -env_cfg = dict( - cudnn_benchmark=False, dist_cfg=dict(backend="nccl"), mp_cfg=dict(mp_start_method="fork", opencv_num_threads=0) -) -img_scales = [ - ( - 1333, - 800, - ), - ( - 666, - 400, - ), - ( - 2000, - 1200, - ), -] -load_from = None -log_level = "INFO" -log_processor = dict(by_epoch=True, type="LogProcessor", window_size=50) -model = dict( - backbone=dict( - depth=50, - frozen_stages=1, - init_cfg=dict(checkpoint="torchvision://resnet50", type="Pretrained"), - norm_cfg=dict(requires_grad=True, type="BN"), - norm_eval=True, - num_stages=4, - out_indices=( - 0, - 1, - 2, - 3, - ), - style="pytorch", - type="ResNet", - ), - bbox_head=dict( - anchor_generator=dict( - octave_base_scale=4, - ratios=[ - 0.5, - 1.0, - 2.0, - ], - scales_per_octave=3, - strides=[ - 8, - 16, - 32, - 64, - 128, - ], - type="AnchorGenerator", - ), - bbox_coder=dict( - target_means=[ - 0.0, - 0.0, - 0.0, - 0.0, - ], - target_stds=[ - 1.0, - 1.0, - 1.0, - 1.0, - ], - type="DeltaXYWHBBoxCoder", - ), - feat_channels=256, - in_channels=256, - loss_bbox=dict(loss_weight=1.0, type="L1Loss"), - loss_cls=dict(alpha=0.25, gamma=2.0, loss_weight=1.0, type="FocalLoss", use_sigmoid=True), - num_classes=80, - stacked_convs=4, - type="RetinaHead", - ), - data_preprocessor=dict( - bgr_to_rgb=True, - mean=[ - 123.675, - 116.28, - 103.53, - ], - pad_size_divisor=32, - std=[ - 58.395, - 57.12, - 57.375, - ], - type="DetDataPreprocessor", - ), - neck=dict( - add_extra_convs="on_input", - in_channels=[ - 256, - 512, - 1024, - 2048, - ], - num_outs=5, - out_channels=256, - start_level=1, - type="FPN", - ), - test_cfg=dict( - max_per_img=100, min_bbox_size=0, nms=dict(iou_threshold=0.5, type="nms"), nms_pre=1000, score_thr=0.05 - ), - train_cfg=dict( - allowed_border=-1, - assigner=dict(ignore_iof_thr=-1, min_pos_iou=0, neg_iou_thr=0.4, pos_iou_thr=0.5, type="MaxIoUAssigner"), - debug=False, - pos_weight=-1, - sampler=dict(type="PseudoSampler"), - ), - type="RetinaNet", -) -optim_wrapper = dict(optimizer=dict(lr=0.01, momentum=0.9, type="SGD", weight_decay=0.0001), type="OptimWrapper") -param_scheduler = [ - dict(begin=0, by_epoch=False, end=500, start_factor=0.001, type="LinearLR"), - dict( - begin=0, - by_epoch=True, - end=12, - gamma=0.1, - milestones=[ - 8, - 11, - ], - type="MultiStepLR", - ), -] -resume = False -test_cfg = dict(type="TestLoop") -test_dataloader = dict( - batch_size=1, - dataset=dict( - ann_file="annotations/instances_val2017.json", - backend_args=None, - data_prefix=dict(img="val2017/"), - data_root="data/coco/", - pipeline=[ - dict(backend_args=None, type="LoadImageFromFile"), - dict( - keep_ratio=True, - scale=( - 1333, - 800, - ), - type="Resize", - ), - dict(type="LoadAnnotations", with_bbox=True), - dict( - meta_keys=( - "img_id", - "img_path", - "ori_shape", - "img_shape", - "scale_factor", - ), - type="PackDetInputs", - ), - ], - test_mode=True, - type="CocoDataset", - ), - drop_last=False, - num_workers=2, - persistent_workers=True, - sampler=dict(shuffle=False, type="DefaultSampler"), -) -test_evaluator = dict( - ann_file="data/coco/annotations/instances_val2017.json", - backend_args=None, - format_only=False, - metric="bbox", - type="CocoMetric", -) -test_pipeline = [ - dict(backend_args=None, type="LoadImageFromFile"), - dict( - keep_ratio=True, - scale=( - 1333, - 800, - ), - type="Resize", - ), - dict(type="LoadAnnotations", with_bbox=True), - dict( - meta_keys=( - "img_id", - "img_path", - "ori_shape", - "img_shape", - "scale_factor", - ), - type="PackDetInputs", - ), -] -train_cfg = dict(max_epochs=12, type="EpochBasedTrainLoop", val_interval=1) -train_dataloader = dict( - batch_sampler=dict(type="AspectRatioBatchSampler"), - batch_size=2, - dataset=dict( - ann_file="annotations/instances_train2017.json", - backend_args=None, - data_prefix=dict(img="train2017/"), - data_root="data/coco/", - filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=[ - dict(backend_args=None, type="LoadImageFromFile"), - dict(type="LoadAnnotations", with_bbox=True), - dict( - keep_ratio=True, - scale=( - 1333, - 800, - ), - type="Resize", - ), - dict(prob=0.5, type="RandomFlip"), - dict(type="PackDetInputs"), - ], - type="CocoDataset", - ), - num_workers=2, - persistent_workers=True, - sampler=dict(shuffle=True, type="DefaultSampler"), -) -train_pipeline = [ - dict(backend_args=None, type="LoadImageFromFile"), - dict(type="LoadAnnotations", with_bbox=True), - dict( - keep_ratio=True, - scale=( - 1333, - 800, - ), - type="Resize", - ), - dict(prob=0.5, type="RandomFlip"), - dict(type="PackDetInputs"), -] -tta_model = dict(tta_cfg=dict(max_per_img=100, nms=dict(iou_threshold=0.5, type="nms")), type="DetTTAModel") -tta_pipeline = [ - dict(backend_args=None, type="LoadImageFromFile"), - dict( - transforms=[ - [ - dict( - keep_ratio=True, - scale=( - 1333, - 800, - ), - type="Resize", - ), - dict( - keep_ratio=True, - scale=( - 666, - 400, - ), - type="Resize", - ), - dict( - keep_ratio=True, - scale=( - 2000, - 1200, - ), - type="Resize", - ), - ], - [ - dict(prob=1.0, type="RandomFlip"), - dict(prob=0.0, type="RandomFlip"), - ], - [ - dict(type="LoadAnnotations", with_bbox=True), - ], - [ - dict( - meta_keys=( - "img_id", - "img_path", - "ori_shape", - "img_shape", - "scale_factor", - "flip", - "flip_direction", - ), - type="PackDetInputs", - ), - ], - ], - type="TestTimeAug", - ), -] -val_cfg = dict(type="ValLoop") -val_dataloader = dict( - batch_size=1, - dataset=dict( - ann_file="annotations/instances_val2017.json", - backend_args=None, - data_prefix=dict(img="val2017/"), - data_root="data/coco/", - pipeline=[ - dict(backend_args=None, type="LoadImageFromFile"), - dict( - keep_ratio=True, - scale=( - 1333, - 800, - ), - type="Resize", - ), - dict(type="LoadAnnotations", with_bbox=True), - dict( - meta_keys=( - "img_id", - "img_path", - "ori_shape", - "img_shape", - "scale_factor", - ), - type="PackDetInputs", - ), - ], - test_mode=True, - type="CocoDataset", - ), - drop_last=False, - num_workers=2, - persistent_workers=True, - sampler=dict(shuffle=False, type="DefaultSampler"), -) -val_evaluator = dict( - ann_file="data/coco/annotations/instances_val2017.json", - backend_args=None, - format_only=False, - metric="bbox", - type="CocoMetric", -) -vis_backends = [ - dict(type="LocalVisBackend"), -] -visualizer = dict( - name="visualizer", - type="DetLocalVisualizer", - vis_backends=[ - dict(type="LocalVisBackend"), - ], -) +_base_ = [ + "../_base_/models/retinanet_r50_fpn.py", + "../_base_/datasets/coco_detection.py", + "../_base_/schedules/schedule_1x.py", + "../_base_/default_runtime.py", + "./retinanet_tta.py", +] + +# optimizer +optim_wrapper = dict(optimizer=dict(type="SGD", lr=0.01, momentum=0.9, weight_decay=0.0001)) diff --git a/tests/data/models/mmdet/retinanet/retinanet_tta.py b/tests/data/models/mmdet/retinanet/retinanet_tta.py new file mode 100644 index 000000000..2457b68b4 --- /dev/null +++ b/tests/data/models/mmdet/retinanet/retinanet_tta.py @@ -0,0 +1,28 @@ +tta_model = dict(type="DetTTAModel", tta_cfg=dict(nms=dict(type="nms", iou_threshold=0.5), max_per_img=100)) + +img_scales = [(1333, 800), (666, 400), (2000, 1200)] +tta_pipeline = [ + dict(type="LoadImageFromFile", backend_args=None), + dict( + type="TestTimeAug", + transforms=[ + [dict(type="Resize", scale=s, keep_ratio=True) for s in img_scales], + [dict(type="RandomFlip", prob=1.0), dict(type="RandomFlip", prob=0.0)], + [dict(type="LoadAnnotations", with_bbox=True)], + [ + dict( + type="PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + "flip", + "flip_direction", + ), + ) + ], + ], + ), +] diff --git a/tests/data/models/mmdet/yolox/yolox_s_8xb8-300e_coco.py b/tests/data/models/mmdet/yolox/yolox_s_8xb8-300e_coco.py new file mode 100644 index 000000000..155dc3818 --- /dev/null +++ b/tests/data/models/mmdet/yolox/yolox_s_8xb8-300e_coco.py @@ -0,0 +1,219 @@ +_base_ = ["../_base_/schedules/schedule_1x.py", "../_base_/default_runtime.py", "./yolox_tta.py"] + +img_scale = (640, 640) # width, height + +# model settings +model = dict( + type="YOLOX", + data_preprocessor=dict( + type="DetDataPreprocessor", + pad_size_divisor=32, + batch_augments=[dict(type="BatchSyncRandomResize", random_size_range=(480, 800), size_divisor=32, interval=10)], + ), + backbone=dict( + type="CSPDarknet", + deepen_factor=0.33, + widen_factor=0.5, + out_indices=(2, 3, 4), + use_depthwise=False, + spp_kernal_sizes=(5, 9, 13), + norm_cfg=dict(type="BN", momentum=0.03, eps=0.001), + act_cfg=dict(type="Swish"), + ), + neck=dict( + type="YOLOXPAFPN", + in_channels=[128, 256, 512], + out_channels=128, + num_csp_blocks=1, + use_depthwise=False, + upsample_cfg=dict(scale_factor=2, mode="nearest"), + norm_cfg=dict(type="BN", momentum=0.03, eps=0.001), + act_cfg=dict(type="Swish"), + ), + bbox_head=dict( + type="YOLOXHead", + num_classes=80, + in_channels=128, + feat_channels=128, + stacked_convs=2, + strides=(8, 16, 32), + use_depthwise=False, + norm_cfg=dict(type="BN", momentum=0.03, eps=0.001), + act_cfg=dict(type="Swish"), + loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, reduction="sum", loss_weight=1.0), + loss_bbox=dict(type="IoULoss", mode="square", eps=1e-16, reduction="sum", loss_weight=5.0), + loss_obj=dict(type="CrossEntropyLoss", use_sigmoid=True, reduction="sum", loss_weight=1.0), + loss_l1=dict(type="L1Loss", reduction="sum", loss_weight=1.0), + ), + train_cfg=dict(assigner=dict(type="SimOTAAssigner", center_radius=2.5)), + # In order to align the source code, the threshold of the val phase is + # 0.01, and the threshold of the test phase is 0.001. + test_cfg=dict(score_thr=0.01, nms=dict(type="nms", iou_threshold=0.65)), +) + +# dataset settings +data_root = "data/coco/" +dataset_type = "CocoDataset" + +# Example to use different file client +# Method 1: simply set the data root and let the file I/O module +# automatically infer from prefix (not support LMDB and Memcache yet) + +# data_root = 's3://openmmlab/datasets/detection/coco/' + +# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6 +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# './data/': 's3://openmmlab/datasets/detection/', +# 'data/': 's3://openmmlab/datasets/detection/' +# })) +backend_args = None + +train_pipeline = [ + dict(type="Mosaic", img_scale=img_scale, pad_val=114.0), + dict( + type="RandomAffine", + scaling_ratio_range=(0.1, 2), + # img_scale is (width, height) + border=(-img_scale[0] // 2, -img_scale[1] // 2), + ), + dict(type="MixUp", img_scale=img_scale, ratio_range=(0.8, 1.6), pad_val=114.0), + dict(type="YOLOXHSVRandomAug"), + dict(type="RandomFlip", prob=0.5), + # According to the official implementation, multi-scale + # training is not considered here but in the + # 'mmdet/models/detectors/yolox.py'. + # Resize and Pad are for the last 15 epochs when Mosaic, + # RandomAffine, and MixUp are closed by YOLOXModeSwitchHook. + dict(type="Resize", scale=img_scale, keep_ratio=True), + dict( + type="Pad", + pad_to_square=True, + # If the image is three-channel, the pad value needs + # to be set separately for each channel. + pad_val=dict(img=(114.0, 114.0, 114.0)), + ), + dict(type="FilterAnnotations", min_gt_bbox_wh=(1, 1), keep_empty=False), + dict(type="PackDetInputs"), +] + +train_dataset = dict( + # use MultiImageMixDataset wrapper to support mosaic and mixup + type="MultiImageMixDataset", + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file="annotations/instances_train2017.json", + data_prefix=dict(img="train2017/"), + pipeline=[ + dict(type="LoadImageFromFile", backend_args=backend_args), + dict(type="LoadAnnotations", with_bbox=True), + ], + filter_cfg=dict(filter_empty_gt=False, min_size=32), + backend_args=backend_args, + ), + pipeline=train_pipeline, +) + +test_pipeline = [ + dict(type="LoadImageFromFile", backend_args=backend_args), + dict(type="Resize", scale=img_scale, keep_ratio=True), + dict(type="Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), + dict(type="LoadAnnotations", with_bbox=True), + dict(type="PackDetInputs", meta_keys=("img_id", "img_path", "ori_shape", "img_shape", "scale_factor")), +] + +train_dataloader = dict( + batch_size=8, + num_workers=4, + persistent_workers=True, + sampler=dict(type="DefaultSampler", shuffle=True), + dataset=train_dataset, +) +val_dataloader = dict( + batch_size=8, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type="DefaultSampler", shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file="annotations/instances_val2017.json", + data_prefix=dict(img="val2017/"), + test_mode=True, + pipeline=test_pipeline, + backend_args=backend_args, + ), +) +test_dataloader = val_dataloader + +val_evaluator = dict( + type="CocoMetric", + ann_file=data_root + "annotations/instances_val2017.json", + metric="bbox", + backend_args=backend_args, +) +test_evaluator = val_evaluator + +# training settings +max_epochs = 300 +num_last_epochs = 15 +interval = 10 + +train_cfg = dict(max_epochs=max_epochs, val_interval=interval) + +# optimizer +# default 8 gpu +base_lr = 0.01 +optim_wrapper = dict( + type="OptimWrapper", + optimizer=dict(type="SGD", lr=base_lr, momentum=0.9, weight_decay=5e-4, nesterov=True), + paramwise_cfg=dict(norm_decay_mult=0.0, bias_decay_mult=0.0), +) + +# learning rate +param_scheduler = [ + dict( + # use quadratic formula to warm up 5 epochs + # and lr is updated by iteration + # TODO: fix default scope in get function + type="mmdet.QuadraticWarmupLR", + by_epoch=True, + begin=0, + end=5, + convert_to_iter_based=True, + ), + dict( + # use cosine lr from 5 to 285 epoch + type="CosineAnnealingLR", + eta_min=base_lr * 0.05, + begin=5, + T_max=max_epochs - num_last_epochs, + end=max_epochs - num_last_epochs, + by_epoch=True, + convert_to_iter_based=True, + ), + dict( + # use fixed lr during last 15 epochs + type="ConstantLR", + by_epoch=True, + factor=1, + begin=max_epochs - num_last_epochs, + end=max_epochs, + ), +] + +default_hooks = dict(checkpoint=dict(interval=interval, max_keep_ckpts=3)) # only keep latest 3 checkpoints + +custom_hooks = [ + dict(type="YOLOXModeSwitchHook", num_last_epochs=num_last_epochs, priority=48), + dict(type="SyncNormHook", priority=48), + dict(type="EMAHook", ema_type="ExpMomentumEMA", momentum=0.0001, update_buffers=True, priority=49), +] + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# USER SHOULD NOT CHANGE ITS VALUES. +# base_batch_size = (8 GPUs) x (8 samples per GPU) +auto_scale_lr = dict(base_batch_size=64) diff --git a/tests/data/models/mmdet/yolox/yolox_tiny_8x8_300e_coco.py b/tests/data/models/mmdet/yolox/yolox_tiny_8x8_300e_coco.py deleted file mode 100644 index 4829ac6c9..000000000 --- a/tests/data/models/mmdet/yolox/yolox_tiny_8x8_300e_coco.py +++ /dev/null @@ -1,548 +0,0 @@ -auto_scale_lr = dict(base_batch_size=64, enable=False) -backend_args = None -base_lr = 0.01 -custom_hooks = [ - dict(num_last_epochs=15, priority=48, type="YOLOXModeSwitchHook"), - dict(priority=48, type="SyncNormHook"), - dict(ema_type="ExpMomentumEMA", momentum=0.0001, priority=49, type="EMAHook", update_buffers=True), -] -data_root = "data/coco/" -dataset_type = "CocoDataset" -default_hooks = dict( - checkpoint=dict(interval=10, max_keep_ckpts=3, type="CheckpointHook"), - logger=dict(interval=50, type="LoggerHook"), - param_scheduler=dict(type="ParamSchedulerHook"), - sampler_seed=dict(type="DistSamplerSeedHook"), - timer=dict(type="IterTimerHook"), - visualization=dict(type="DetVisualizationHook"), -) -default_scope = "mmdet" -env_cfg = dict( - cudnn_benchmark=False, dist_cfg=dict(backend="nccl"), mp_cfg=dict(mp_start_method="fork", opencv_num_threads=0) -) -img_scale = ( - 640, - 640, -) -img_scales = [ - ( - 640, - 640, - ), - ( - 320, - 320, - ), - ( - 960, - 960, - ), -] -interval = 10 -load_from = None -log_level = "INFO" -log_processor = dict(by_epoch=True, type="LogProcessor", window_size=50) -max_epochs = 300 -model = dict( - backbone=dict( - act_cfg=dict(type="Swish"), - deepen_factor=0.33, - norm_cfg=dict(eps=0.001, momentum=0.03, type="BN"), - out_indices=( - 2, - 3, - 4, - ), - spp_kernal_sizes=( - 5, - 9, - 13, - ), - type="CSPDarknet", - use_depthwise=False, - widen_factor=0.375, - ), - bbox_head=dict( - act_cfg=dict(type="Swish"), - feat_channels=96, - in_channels=96, - loss_bbox=dict(eps=1e-16, loss_weight=5.0, mode="square", reduction="sum", type="IoULoss"), - loss_cls=dict(loss_weight=1.0, reduction="sum", type="CrossEntropyLoss", use_sigmoid=True), - loss_l1=dict(loss_weight=1.0, reduction="sum", type="L1Loss"), - loss_obj=dict(loss_weight=1.0, reduction="sum", type="CrossEntropyLoss", use_sigmoid=True), - norm_cfg=dict(eps=0.001, momentum=0.03, type="BN"), - num_classes=80, - stacked_convs=2, - strides=( - 8, - 16, - 32, - ), - type="YOLOXHead", - use_depthwise=False, - ), - data_preprocessor=dict( - batch_augments=[ - dict( - interval=10, - random_size_range=( - 320, - 640, - ), - size_divisor=32, - type="BatchSyncRandomResize", - ), - ], - pad_size_divisor=32, - type="DetDataPreprocessor", - ), - neck=dict( - act_cfg=dict(type="Swish"), - in_channels=[ - 96, - 192, - 384, - ], - norm_cfg=dict(eps=0.001, momentum=0.03, type="BN"), - num_csp_blocks=1, - out_channels=96, - type="YOLOXPAFPN", - upsample_cfg=dict(mode="nearest", scale_factor=2), - use_depthwise=False, - ), - test_cfg=dict(nms=dict(iou_threshold=0.65, type="nms"), score_thr=0.01), - train_cfg=dict(assigner=dict(center_radius=2.5, type="SimOTAAssigner")), - type="YOLOX", -) -num_last_epochs = 15 -optim_wrapper = dict( - optimizer=dict(lr=0.01, momentum=0.9, nesterov=True, type="SGD", weight_decay=0.0005), - paramwise_cfg=dict(bias_decay_mult=0.0, norm_decay_mult=0.0), - type="OptimWrapper", -) -param_scheduler = [ - dict(begin=0, by_epoch=True, convert_to_iter_based=True, end=5, type="mmdet.QuadraticWarmupLR"), - dict( - T_max=285, begin=5, by_epoch=True, convert_to_iter_based=True, end=285, eta_min=0.0005, type="CosineAnnealingLR" - ), - dict(begin=285, by_epoch=True, end=300, factor=1, type="ConstantLR"), -] -resume = False -test_cfg = dict(type="TestLoop") -test_dataloader = dict( - batch_size=8, - dataset=dict( - ann_file="annotations/instances_val2017.json", - backend_args=None, - data_prefix=dict(img="val2017/"), - data_root="data/coco/", - pipeline=[ - dict(backend_args=None, type="LoadImageFromFile"), - dict( - keep_ratio=True, - scale=( - 416, - 416, - ), - type="Resize", - ), - dict( - pad_to_square=True, - pad_val=dict( - img=( - 114.0, - 114.0, - 114.0, - ) - ), - type="Pad", - ), - dict(type="LoadAnnotations", with_bbox=True), - dict( - meta_keys=( - "img_id", - "img_path", - "ori_shape", - "img_shape", - "scale_factor", - ), - type="PackDetInputs", - ), - ], - test_mode=True, - type="CocoDataset", - ), - drop_last=False, - num_workers=4, - persistent_workers=True, - sampler=dict(shuffle=False, type="DefaultSampler"), -) -test_evaluator = dict( - ann_file="data/coco/annotations/instances_val2017.json", backend_args=None, metric="bbox", type="CocoMetric" -) -test_pipeline = [ - dict(backend_args=None, type="LoadImageFromFile"), - dict( - keep_ratio=True, - scale=( - 416, - 416, - ), - type="Resize", - ), - dict( - pad_to_square=True, - pad_val=dict( - img=( - 114.0, - 114.0, - 114.0, - ) - ), - type="Pad", - ), - dict(type="LoadAnnotations", with_bbox=True), - dict( - meta_keys=( - "img_id", - "img_path", - "ori_shape", - "img_shape", - "scale_factor", - ), - type="PackDetInputs", - ), -] -train_cfg = dict(max_epochs=300, type="EpochBasedTrainLoop", val_interval=10) -train_dataloader = dict( - batch_size=8, - dataset=dict( - dataset=dict( - ann_file="annotations/instances_train2017.json", - backend_args=None, - data_prefix=dict(img="train2017/"), - data_root="data/coco/", - filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=[ - dict(backend_args=None, type="LoadImageFromFile"), - dict(type="LoadAnnotations", with_bbox=True), - ], - type="CocoDataset", - ), - pipeline=[ - dict( - img_scale=( - 640, - 640, - ), - pad_val=114.0, - type="Mosaic", - ), - dict( - border=( - -320, - -320, - ), - scaling_ratio_range=( - 0.5, - 1.5, - ), - type="RandomAffine", - ), - dict(type="YOLOXHSVRandomAug"), - dict(prob=0.5, type="RandomFlip"), - dict( - keep_ratio=True, - scale=( - 640, - 640, - ), - type="Resize", - ), - dict( - pad_to_square=True, - pad_val=dict( - img=( - 114.0, - 114.0, - 114.0, - ) - ), - type="Pad", - ), - dict( - keep_empty=False, - min_gt_bbox_wh=( - 1, - 1, - ), - type="FilterAnnotations", - ), - dict(type="PackDetInputs"), - ], - type="MultiImageMixDataset", - ), - num_workers=4, - persistent_workers=True, - sampler=dict(shuffle=True, type="DefaultSampler"), -) -train_dataset = dict( - dataset=dict( - ann_file="annotations/instances_train2017.json", - backend_args=None, - data_prefix=dict(img="train2017/"), - data_root="data/coco/", - filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=[ - dict(backend_args=None, type="LoadImageFromFile"), - dict(type="LoadAnnotations", with_bbox=True), - ], - type="CocoDataset", - ), - pipeline=[ - dict( - img_scale=( - 640, - 640, - ), - pad_val=114.0, - type="Mosaic", - ), - dict( - border=( - -320, - -320, - ), - scaling_ratio_range=( - 0.1, - 2, - ), - type="RandomAffine", - ), - dict( - img_scale=( - 640, - 640, - ), - pad_val=114.0, - ratio_range=( - 0.8, - 1.6, - ), - type="MixUp", - ), - dict(type="YOLOXHSVRandomAug"), - dict(prob=0.5, type="RandomFlip"), - dict( - keep_ratio=True, - scale=( - 640, - 640, - ), - type="Resize", - ), - dict( - pad_to_square=True, - pad_val=dict( - img=( - 114.0, - 114.0, - 114.0, - ) - ), - type="Pad", - ), - dict( - keep_empty=False, - min_gt_bbox_wh=( - 1, - 1, - ), - type="FilterAnnotations", - ), - dict(type="PackDetInputs"), - ], - type="MultiImageMixDataset", -) -train_pipeline = [ - dict( - img_scale=( - 640, - 640, - ), - pad_val=114.0, - type="Mosaic", - ), - dict( - border=( - -320, - -320, - ), - scaling_ratio_range=( - 0.5, - 1.5, - ), - type="RandomAffine", - ), - dict(type="YOLOXHSVRandomAug"), - dict(prob=0.5, type="RandomFlip"), - dict( - keep_ratio=True, - scale=( - 640, - 640, - ), - type="Resize", - ), - dict( - pad_to_square=True, - pad_val=dict( - img=( - 114.0, - 114.0, - 114.0, - ) - ), - type="Pad", - ), - dict( - keep_empty=False, - min_gt_bbox_wh=( - 1, - 1, - ), - type="FilterAnnotations", - ), - dict(type="PackDetInputs"), -] -tta_model = dict(tta_cfg=dict(max_per_img=100, nms=dict(iou_threshold=0.65, type="nms")), type="DetTTAModel") -tta_pipeline = [ - dict(backend_args=None, type="LoadImageFromFile"), - dict( - transforms=[ - [ - dict( - keep_ratio=True, - scale=( - 640, - 640, - ), - type="Resize", - ), - dict( - keep_ratio=True, - scale=( - 320, - 320, - ), - type="Resize", - ), - dict( - keep_ratio=True, - scale=( - 960, - 960, - ), - type="Resize", - ), - ], - [ - dict(prob=1.0, type="RandomFlip"), - dict(prob=0.0, type="RandomFlip"), - ], - [ - dict( - pad_to_square=True, - pad_val=dict( - img=( - 114.0, - 114.0, - 114.0, - ) - ), - type="Pad", - ), - ], - [ - dict(type="LoadAnnotations", with_bbox=True), - ], - [ - dict( - meta_keys=( - "img_id", - "img_path", - "ori_shape", - "img_shape", - "scale_factor", - "flip", - "flip_direction", - ), - type="PackDetInputs", - ), - ], - ], - type="TestTimeAug", - ), -] -val_cfg = dict(type="ValLoop") -val_dataloader = dict( - batch_size=8, - dataset=dict( - ann_file="annotations/instances_val2017.json", - backend_args=None, - data_prefix=dict(img="val2017/"), - data_root="data/coco/", - pipeline=[ - dict(backend_args=None, type="LoadImageFromFile"), - dict( - keep_ratio=True, - scale=( - 416, - 416, - ), - type="Resize", - ), - dict( - pad_to_square=True, - pad_val=dict( - img=( - 114.0, - 114.0, - 114.0, - ) - ), - type="Pad", - ), - dict(type="LoadAnnotations", with_bbox=True), - dict( - meta_keys=( - "img_id", - "img_path", - "ori_shape", - "img_shape", - "scale_factor", - ), - type="PackDetInputs", - ), - ], - test_mode=True, - type="CocoDataset", - ), - drop_last=False, - num_workers=4, - persistent_workers=True, - sampler=dict(shuffle=False, type="DefaultSampler"), -) -val_evaluator = dict( - ann_file="data/coco/annotations/instances_val2017.json", backend_args=None, metric="bbox", type="CocoMetric" -) -vis_backends = [ - dict(type="LocalVisBackend"), -] -visualizer = dict( - name="visualizer", - type="DetLocalVisualizer", - vis_backends=[ - dict(type="LocalVisBackend"), - ], -) diff --git a/tests/data/models/mmdet/yolox/yolox_tiny_8xb8-300e_coco.py b/tests/data/models/mmdet/yolox/yolox_tiny_8xb8-300e_coco.py new file mode 100644 index 000000000..b4de1ce5a --- /dev/null +++ b/tests/data/models/mmdet/yolox/yolox_tiny_8xb8-300e_coco.py @@ -0,0 +1,43 @@ +_base_ = "./yolox_s_8xb8-300e_coco.py" + +# model settings +model = dict( + data_preprocessor=dict( + batch_augments=[dict(type="BatchSyncRandomResize", random_size_range=(320, 640), size_divisor=32, interval=10)] + ), + backbone=dict(deepen_factor=0.33, widen_factor=0.375), + neck=dict(in_channels=[96, 192, 384], out_channels=96), + bbox_head=dict(in_channels=96, feat_channels=96), +) + +img_scale = (640, 640) # width, height + +train_pipeline = [ + dict(type="Mosaic", img_scale=img_scale, pad_val=114.0), + dict( + type="RandomAffine", + scaling_ratio_range=(0.5, 1.5), + # img_scale is (width, height) + border=(-img_scale[0] // 2, -img_scale[1] // 2), + ), + dict(type="YOLOXHSVRandomAug"), + dict(type="RandomFlip", prob=0.5), + # Resize and Pad are for the last 15 epochs when Mosaic and + # RandomAffine are closed by YOLOXModeSwitchHook. + dict(type="Resize", scale=img_scale, keep_ratio=True), + dict(type="Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), + dict(type="FilterAnnotations", min_gt_bbox_wh=(1, 1), keep_empty=False), + dict(type="PackDetInputs"), +] + +test_pipeline = [ + dict(type="LoadImageFromFile", backend_args={{_base_.backend_args}}), + dict(type="Resize", scale=(416, 416), keep_ratio=True), + dict(type="Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), + dict(type="LoadAnnotations", with_bbox=True), + dict(type="PackDetInputs", meta_keys=("img_id", "img_path", "ori_shape", "img_shape", "scale_factor")), +] + +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader diff --git a/tests/data/models/mmdet/yolox/yolox_tta.py b/tests/data/models/mmdet/yolox/yolox_tta.py new file mode 100644 index 000000000..7d0b62283 --- /dev/null +++ b/tests/data/models/mmdet/yolox/yolox_tta.py @@ -0,0 +1,37 @@ +tta_model = dict(type="DetTTAModel", tta_cfg=dict(nms=dict(type="nms", iou_threshold=0.65), max_per_img=100)) + +img_scales = [(640, 640), (320, 320), (960, 960)] +tta_pipeline = [ + dict(type="LoadImageFromFile", backend_args=None), + dict( + type="TestTimeAug", + transforms=[ + [dict(type="Resize", scale=s, keep_ratio=True) for s in img_scales], + [ + # ``RandomFlip`` must be placed before ``Pad``, otherwise + # bounding box coordinates after flipping cannot be + # recovered correctly. + dict(type="RandomFlip", prob=1.0), + dict(type="RandomFlip", prob=0.0), + ], + [ + dict(type="Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), + ], + [dict(type="LoadAnnotations", with_bbox=True)], + [ + dict( + type="PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + "flip", + "flip_direction", + ), + ) + ], + ], + ), +] diff --git a/tests/data/models/mmdet_cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn.py b/tests/data/models/mmdet_cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn.py new file mode 100644 index 000000000..53406bc3f --- /dev/null +++ b/tests/data/models/mmdet_cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn.py @@ -0,0 +1,167 @@ +# model settings +model = dict( + type="CascadeRCNN", + data_preprocessor=dict( + type="DetDataPreprocessor", + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_mask=True, + pad_size_divisor=32, + ), + backbone=dict( + type="ResNet", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type="BN", requires_grad=True), + norm_eval=True, + style="pytorch", + init_cfg=dict(type="Pretrained", checkpoint="torchvision://resnet50"), + ), + neck=dict(type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5), + rpn_head=dict( + type="RPNHead", + in_channels=256, + feat_channels=256, + anchor_generator=dict(type="AnchorGenerator", scales=[8], ratios=[0.5, 1.0, 2.0], strides=[4, 8, 16, 32, 64]), + bbox_coder=dict(type="DeltaXYWHBBoxCoder", target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type="SmoothL1Loss", beta=1.0 / 9.0, loss_weight=1.0), + ), + roi_head=dict( + type="CascadeRoIHead", + num_stages=3, + stage_loss_weights=[1, 0.5, 0.25], + bbox_roi_extractor=dict( + type="SingleRoIExtractor", + roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + ), + bbox_head=[ + dict( + type="Shared2FCBBoxHead", + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type="DeltaXYWHBBoxCoder", target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[0.1, 0.1, 0.2, 0.2] + ), + reg_class_agnostic=True, + loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type="SmoothL1Loss", beta=1.0, loss_weight=1.0), + ), + dict( + type="Shared2FCBBoxHead", + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type="DeltaXYWHBBoxCoder", target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[0.05, 0.05, 0.1, 0.1] + ), + reg_class_agnostic=True, + loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type="SmoothL1Loss", beta=1.0, loss_weight=1.0), + ), + dict( + type="Shared2FCBBoxHead", + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type="DeltaXYWHBBoxCoder", + target_means=[0.0, 0.0, 0.0, 0.0], + target_stds=[0.033, 0.033, 0.067, 0.067], + ), + reg_class_agnostic=True, + loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type="SmoothL1Loss", beta=1.0, loss_weight=1.0), + ), + ], + mask_roi_extractor=dict( + type="SingleRoIExtractor", + roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + ), + mask_head=dict( + type="FCNMaskHead", + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=80, + loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0), + ), + ), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type="MaxIoUAssigner", + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1, + ), + sampler=dict(type="RandomSampler", num=256, pos_fraction=0.5, neg_pos_ub=-1, add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False, + ), + rpn_proposal=dict(nms_pre=2000, max_per_img=2000, nms=dict(type="nms", iou_threshold=0.7), min_bbox_size=0), + rcnn=[ + dict( + assigner=dict( + type="MaxIoUAssigner", + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=False, + ignore_iof_thr=-1, + ), + sampler=dict(type="RandomSampler", num=512, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False, + ), + dict( + assigner=dict( + type="MaxIoUAssigner", + pos_iou_thr=0.6, + neg_iou_thr=0.6, + min_pos_iou=0.6, + match_low_quality=False, + ignore_iof_thr=-1, + ), + sampler=dict(type="RandomSampler", num=512, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False, + ), + dict( + assigner=dict( + type="MaxIoUAssigner", + pos_iou_thr=0.7, + neg_iou_thr=0.7, + min_pos_iou=0.7, + match_low_quality=False, + ignore_iof_thr=-1, + ), + sampler=dict(type="RandomSampler", num=512, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False, + ), + ], + ), + test_cfg=dict( + rpn=dict(nms_pre=1000, max_per_img=1000, nms=dict(type="nms", iou_threshold=0.7), min_bbox_size=0), + rcnn=dict(score_thr=0.05, nms=dict(type="nms", iou_threshold=0.5), max_per_img=100, mask_thr_binary=0.5), + ), +) diff --git a/tests/data/models/mmdet_cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn_1x_coco.py b/tests/data/models/mmdet_cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn_1x_coco.py new file mode 100644 index 000000000..77b3ebac4 --- /dev/null +++ b/tests/data/models/mmdet_cascade_mask_rcnn/cascade-mask-rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = [ + "../_base_/models/cascade-mask-rcnn_r50_fpn.py", + "../_base_/datasets/coco_instance.py", + "../_base_/schedules/schedule_1x.py", + "../_base_/default_runtime.py", +] diff --git a/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn.py b/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn.py new file mode 100644 index 000000000..2f6cba0f6 --- /dev/null +++ b/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn.py @@ -0,0 +1,215 @@ +# model settings +model = dict( + type="CascadeRCNN", + pretrained="torchvision://resnet50", + backbone=dict( + type="ResNet", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type="BN", requires_grad=True), + norm_eval=True, + style="pytorch", + ), + neck=dict(type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5), + rpn_head=dict( + type="RPNHead", + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type="AnchorGenerator", + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64], + ), + bbox_coder=dict( + type="DeltaXYWHBBoxCoder", + target_means=[0.0, 0.0, 0.0, 0.0], + target_stds=[1.0, 1.0, 1.0, 1.0], + ), + loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type="SmoothL1Loss", beta=1.0 / 9.0, loss_weight=1.0), + ), + roi_head=dict( + type="CascadeRoIHead", + num_stages=3, + stage_loss_weights=[1, 0.5, 0.25], + bbox_roi_extractor=dict( + type="SingleRoIExtractor", + roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + ), + bbox_head=[ + dict( + type="Shared2FCBBoxHead", + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type="DeltaXYWHBBoxCoder", + target_means=[0.0, 0.0, 0.0, 0.0], + target_stds=[0.1, 0.1, 0.2, 0.2], + ), + reg_class_agnostic=True, + loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type="SmoothL1Loss", beta=1.0, loss_weight=1.0), + ), + dict( + type="Shared2FCBBoxHead", + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type="DeltaXYWHBBoxCoder", + target_means=[0.0, 0.0, 0.0, 0.0], + target_stds=[0.05, 0.05, 0.1, 0.1], + ), + reg_class_agnostic=True, + loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type="SmoothL1Loss", beta=1.0, loss_weight=1.0), + ), + dict( + type="Shared2FCBBoxHead", + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type="DeltaXYWHBBoxCoder", + target_means=[0.0, 0.0, 0.0, 0.0], + target_stds=[0.033, 0.033, 0.067, 0.067], + ), + reg_class_agnostic=True, + loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type="SmoothL1Loss", beta=1.0, loss_weight=1.0), + ), + ], + mask_roi_extractor=dict( + type="SingleRoIExtractor", + roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + ), + mask_head=dict( + type="FCNMaskHead", + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=80, + loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0), + ), + ), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type="MaxIoUAssigner", + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1, + ), + sampler=dict( + type="RandomSampler", + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False, + ), + allowed_border=0, + pos_weight=-1, + debug=False, + ), + rpn_proposal=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_per_img=2000, + nms=dict(type="nms", iou_threshold=0.7), + min_bbox_size=0, + ), + rcnn=[ + dict( + assigner=dict( + type="MaxIoUAssigner", + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=False, + ignore_iof_thr=-1, + ), + sampler=dict( + type="RandomSampler", + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + ), + mask_size=28, + pos_weight=-1, + debug=False, + ), + dict( + assigner=dict( + type="MaxIoUAssigner", + pos_iou_thr=0.6, + neg_iou_thr=0.6, + min_pos_iou=0.6, + match_low_quality=False, + ignore_iof_thr=-1, + ), + sampler=dict( + type="RandomSampler", + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + ), + mask_size=28, + pos_weight=-1, + debug=False, + ), + dict( + assigner=dict( + type="MaxIoUAssigner", + pos_iou_thr=0.7, + neg_iou_thr=0.7, + min_pos_iou=0.7, + match_low_quality=False, + ignore_iof_thr=-1, + ), + sampler=dict( + type="RandomSampler", + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + ), + mask_size=28, + pos_weight=-1, + debug=False, + ), + ], + ), + test_cfg=dict( + rpn=dict( + nms_across_levels=False, + nms_pre=1000, + nms_post=1000, + max_per_img=1000, + nms=dict(type="nms", iou_threshold=0.7), + min_bbox_size=0, + ), + rcnn=dict( + score_thr=0.05, + nms=dict(type="nms", iou_threshold=0.5), + max_per_img=100, + mask_thr_binary=0.5, + ), + ), +) diff --git a/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py b/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 000000000..8c747b1a3 --- /dev/null +++ b/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1 @@ +_base_ = ["cascade_mask_rcnn_r50_fpn.py", "coco_instance.py", "schedule_1x.py", "default_runtime.py"] diff --git a/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco_v280.py b/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco_v280.py new file mode 100644 index 000000000..c2c341585 --- /dev/null +++ b/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco_v280.py @@ -0,0 +1 @@ +_base_ = ["cascade_mask_rcnn_r50_fpn_v280.py", "coco_instance.py", "schedule_1x.py", "default_runtime.py"] diff --git a/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_v280.py b/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_v280.py new file mode 100644 index 000000000..70f4afd21 --- /dev/null +++ b/tests/data/models/mmdet_cascade_mask_rcnn/cascade_mask_rcnn_r50_fpn_v280.py @@ -0,0 +1,215 @@ +# model settings +model = dict( + type="CascadeRCNN", + pretrained="torchvision://resnet50", + backbone=dict( + type="ResNet", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type="BN", requires_grad=True), + norm_eval=True, + style="pytorch", + ), + neck=dict(type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5), + rpn_head=dict( + type="RPNHead", + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type="AnchorGenerator", + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64], + ), + bbox_coder=dict( + type="DeltaXYWHBBoxCoder", + target_means=[0.0, 0.0, 0.0, 0.0], + target_stds=[1.0, 1.0, 1.0, 1.0], + ), + loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type="SmoothL1Loss", beta=1.0 / 9.0, loss_weight=1.0), + ), + roi_head=dict( + type="CascadeRoIHead", + num_stages=3, + stage_loss_weights=[1, 0.5, 0.25], + bbox_roi_extractor=dict( + type="SingleRoIExtractor", + roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + ), + bbox_head=[ + dict( + type="Shared2FCBBoxHead", + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type="DeltaXYWHBBoxCoder", + target_means=[0.0, 0.0, 0.0, 0.0], + target_stds=[0.1, 0.1, 0.2, 0.2], + ), + reg_class_agnostic=True, + loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type="SmoothL1Loss", beta=1.0, loss_weight=1.0), + ), + dict( + type="Shared2FCBBoxHead", + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type="DeltaXYWHBBoxCoder", + target_means=[0.0, 0.0, 0.0, 0.0], + target_stds=[0.05, 0.05, 0.1, 0.1], + ), + reg_class_agnostic=True, + loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type="SmoothL1Loss", beta=1.0, loss_weight=1.0), + ), + dict( + type="Shared2FCBBoxHead", + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type="DeltaXYWHBBoxCoder", + target_means=[0.0, 0.0, 0.0, 0.0], + target_stds=[0.033, 0.033, 0.067, 0.067], + ), + reg_class_agnostic=True, + loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type="SmoothL1Loss", beta=1.0, loss_weight=1.0), + ), + ], + mask_roi_extractor=dict( + type="SingleRoIExtractor", + roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + ), + mask_head=dict( + type="FCNMaskHead", + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=80, + loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0), + ), + ), +) +# model training and testing settings +train_cfg = dict( + rpn=dict( + assigner=dict( + type="MaxIoUAssigner", + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1, + ), + sampler=dict( + type="RandomSampler", + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False, + ), + allowed_border=0, + pos_weight=-1, + debug=False, + ), + rpn_proposal=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=2000, + nms_thr=0.7, + min_bbox_size=0, + ), + rcnn=[ + dict( + assigner=dict( + type="MaxIoUAssigner", + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=False, + ignore_iof_thr=-1, + ), + sampler=dict( + type="RandomSampler", + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + ), + mask_size=28, + pos_weight=-1, + debug=False, + ), + dict( + assigner=dict( + type="MaxIoUAssigner", + pos_iou_thr=0.6, + neg_iou_thr=0.6, + min_pos_iou=0.6, + match_low_quality=False, + ignore_iof_thr=-1, + ), + sampler=dict( + type="RandomSampler", + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + ), + mask_size=28, + pos_weight=-1, + debug=False, + ), + dict( + assigner=dict( + type="MaxIoUAssigner", + pos_iou_thr=0.7, + neg_iou_thr=0.7, + min_pos_iou=0.7, + match_low_quality=False, + ignore_iof_thr=-1, + ), + sampler=dict( + type="RandomSampler", + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + ), + mask_size=28, + pos_weight=-1, + debug=False, + ), + ], +) +test_cfg = dict( + rpn=dict( + nms_across_levels=False, + nms_pre=1000, + nms_post=1000, + max_num=1000, + nms_thr=0.7, + min_bbox_size=0, + ), + rcnn=dict( + score_thr=0.05, + nms=dict(type="nms", iou_threshold=0.5), + max_per_img=100, + mask_thr_binary=0.5, + ), +) diff --git a/tests/data/models/mmdet_cascade_mask_rcnn/coco_instance.py b/tests/data/models/mmdet_cascade_mask_rcnn/coco_instance.py new file mode 100644 index 000000000..1661d18cd --- /dev/null +++ b/tests/data/models/mmdet_cascade_mask_rcnn/coco_instance.py @@ -0,0 +1,52 @@ +dataset_type = "CocoDataset" +data_root = "data/coco/" +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type="LoadImageFromFile"), + dict(type="LoadAnnotations", with_bbox=True, with_mask=True), + dict(type="Resize", img_scale=(1333, 800), keep_ratio=True), + dict(type="RandomFlip", flip_ratio=0.5), + dict(type="Normalize", **img_norm_cfg), + dict(type="Pad", size_divisor=32), + dict(type="DefaultFormatBundle"), + dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels", "gt_masks"]), +] +test_pipeline = [ + dict(type="LoadImageFromFile"), + dict( + type="MultiScaleFlipAug", + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type="Resize", keep_ratio=True), + dict(type="RandomFlip"), + dict(type="Normalize", **img_norm_cfg), + dict(type="Pad", size_divisor=32), + dict(type="DefaultFormatBundle"), + dict(type="Collect", keys=["img"]), + ], + ), +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + "annotations/instances_train2017.json", + img_prefix=data_root + "train2017/", + pipeline=train_pipeline, + ), + val=dict( + type=dataset_type, + ann_file=data_root + "annotations/instances_val2017.json", + img_prefix=data_root + "val2017/", + pipeline=test_pipeline, + ), + test=dict( + type=dataset_type, + ann_file=data_root + "annotations/instances_val2017.json", + img_prefix=data_root + "val2017/", + pipeline=test_pipeline, + ), +) +evaluation = dict(metric=["bbox", "segm"]) diff --git a/tests/data/models/mmdet_cascade_mask_rcnn/default_runtime.py b/tests/data/models/mmdet_cascade_mask_rcnn/default_runtime.py new file mode 100644 index 000000000..75ee67b9f --- /dev/null +++ b/tests/data/models/mmdet_cascade_mask_rcnn/default_runtime.py @@ -0,0 +1,15 @@ +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type="TextLoggerHook"), + # dict(type='TensorboardLoggerHook') + ], +) +# yapf:enable +dist_params = dict(backend="nccl") +log_level = "INFO" +load_from = None +resume_from = None +workflow = [("train", 1)] diff --git a/tests/data/models/mmdet_cascade_mask_rcnn/schedule_1x.py b/tests/data/models/mmdet_cascade_mask_rcnn/schedule_1x.py new file mode 100644 index 000000000..cc7fa00a8 --- /dev/null +++ b/tests/data/models/mmdet_cascade_mask_rcnn/schedule_1x.py @@ -0,0 +1,6 @@ +# optimizer +optimizer = dict(type="SGD", lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict(policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.001, step=[8, 11]) +total_epochs = 12 diff --git a/tests/data/models/mmdet_retinanet/coco_detection.py b/tests/data/models/mmdet_retinanet/coco_detection.py new file mode 100644 index 000000000..67dbc3efb --- /dev/null +++ b/tests/data/models/mmdet_retinanet/coco_detection.py @@ -0,0 +1,52 @@ +dataset_type = "CocoDataset" +data_root = "data/coco/" +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type="LoadImageFromFile"), + dict(type="LoadAnnotations", with_bbox=True), + dict(type="Resize", img_scale=(1333, 800), keep_ratio=True), + dict(type="RandomFlip", flip_ratio=0.5), + dict(type="Normalize", **img_norm_cfg), + dict(type="Pad", size_divisor=32), + dict(type="DefaultFormatBundle"), + dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]), +] +test_pipeline = [ + dict(type="LoadImageFromFile"), + dict( + type="MultiScaleFlipAug", + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type="Resize", keep_ratio=True), + dict(type="RandomFlip"), + dict(type="Normalize", **img_norm_cfg), + dict(type="Pad", size_divisor=32), + dict(type="DefaultFormatBundle"), + dict(type="Collect", keys=["img"]), + ], + ), +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + "annotations/instances_train2017.json", + img_prefix=data_root + "train2017/", + pipeline=train_pipeline, + ), + val=dict( + type=dataset_type, + ann_file=data_root + "annotations/instances_val2017.json", + img_prefix=data_root + "val2017/", + pipeline=test_pipeline, + ), + test=dict( + type=dataset_type, + ann_file=data_root + "annotations/instances_val2017.json", + img_prefix=data_root + "val2017/", + pipeline=test_pipeline, + ), +) +evaluation = dict(interval=1, metric="bbox") diff --git a/tests/data/models/mmdet_retinanet/default_runtime.py b/tests/data/models/mmdet_retinanet/default_runtime.py new file mode 100644 index 000000000..75ee67b9f --- /dev/null +++ b/tests/data/models/mmdet_retinanet/default_runtime.py @@ -0,0 +1,15 @@ +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type="TextLoggerHook"), + # dict(type='TensorboardLoggerHook') + ], +) +# yapf:enable +dist_params = dict(backend="nccl") +log_level = "INFO" +load_from = None +resume_from = None +workflow = [("train", 1)] diff --git a/tests/data/models/mmdet_retinanet/retinanet_r50_fpn.py b/tests/data/models/mmdet_retinanet/retinanet_r50_fpn.py new file mode 100644 index 000000000..4f4f60db2 --- /dev/null +++ b/tests/data/models/mmdet_retinanet/retinanet_r50_fpn.py @@ -0,0 +1,50 @@ +# model settings +model = dict( + type="RetinaNet", + pretrained="torchvision://resnet50", + backbone=dict( + type="ResNet", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type="BN", requires_grad=True), + norm_eval=True, + style="pytorch", + ), + neck=dict( + type="FPN", + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs="on_input", + num_outs=5, + ), + bbox_head=dict( + type="RetinaHead", + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + anchor_generator=dict( + type="AnchorGenerator", + octave_base_scale=4, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128], + ), + bbox_coder=dict(type="DeltaXYWHBBoxCoder", target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict(type="FocalLoss", use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), + loss_bbox=dict(type="L1Loss", loss_weight=1.0), + ), + # training and testing settings + train_cfg=dict( + assigner=dict(type="MaxIoUAssigner", pos_iou_thr=0.5, neg_iou_thr=0.4, min_pos_iou=0, ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False, + ), + test_cfg=dict( + nms_pre=1000, min_bbox_size=0, score_thr=0.05, nms=dict(type="nms", iou_threshold=0.5), max_per_img=100 + ), +) diff --git a/tests/data/models/mmdet_retinanet/retinanet_r50_fpn_1x_coco.py b/tests/data/models/mmdet_retinanet/retinanet_r50_fpn_1x_coco.py new file mode 100644 index 000000000..75ffb614b --- /dev/null +++ b/tests/data/models/mmdet_retinanet/retinanet_r50_fpn_1x_coco.py @@ -0,0 +1,3 @@ +_base_ = ["retinanet_r50_fpn.py", "coco_detection.py", "schedule_1x.py", "default_runtime.py"] +# optimizer +optimizer = dict(type="SGD", lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/tests/data/models/mmdet_retinanet/retinanet_r50_fpn_1x_coco_v280.py b/tests/data/models/mmdet_retinanet/retinanet_r50_fpn_1x_coco_v280.py new file mode 100644 index 000000000..fdf669a8f --- /dev/null +++ b/tests/data/models/mmdet_retinanet/retinanet_r50_fpn_1x_coco_v280.py @@ -0,0 +1,3 @@ +_base_ = ["retinanet_r50_fpn_v280.py", "coco_detection.py", "schedule_1x.py", "default_runtime.py"] +# optimizer +optimizer = dict(type="SGD", lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/tests/data/models/mmdet_retinanet/retinanet_r50_fpn_v280.py b/tests/data/models/mmdet_retinanet/retinanet_r50_fpn_v280.py new file mode 100644 index 000000000..cefd366d0 --- /dev/null +++ b/tests/data/models/mmdet_retinanet/retinanet_r50_fpn_v280.py @@ -0,0 +1,48 @@ +# model settings +model = dict( + type="RetinaNet", + pretrained="torchvision://resnet50", + backbone=dict( + type="ResNet", + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type="BN", requires_grad=True), + norm_eval=True, + style="pytorch", + ), + neck=dict( + type="FPN", + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs="on_input", + num_outs=5, + ), + bbox_head=dict( + type="RetinaHead", + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + anchor_generator=dict( + type="AnchorGenerator", + octave_base_scale=4, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128], + ), + bbox_coder=dict(type="DeltaXYWHBBoxCoder", target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict(type="FocalLoss", use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), + loss_bbox=dict(type="L1Loss", loss_weight=1.0), + ), +) +# training and testing settings +train_cfg = dict( + assigner=dict(type="MaxIoUAssigner", pos_iou_thr=0.5, neg_iou_thr=0.4, min_pos_iou=0, ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False, +) +test_cfg = dict(nms_pre=1000, min_bbox_size=0, score_thr=0.05, nms=dict(type="nms", iou_threshold=0.5), max_per_img=100) diff --git a/tests/data/models/mmdet_retinanet/schedule_1x.py b/tests/data/models/mmdet_retinanet/schedule_1x.py new file mode 100644 index 000000000..cc7fa00a8 --- /dev/null +++ b/tests/data/models/mmdet_retinanet/schedule_1x.py @@ -0,0 +1,6 @@ +# optimizer +optimizer = dict(type="SGD", lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict(policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.001, step=[8, 11]) +total_epochs = 12 diff --git a/tests/data/models/mmdet_yolox/yolox_tiny_8x8_300e_coco.py b/tests/data/models/mmdet_yolox/yolox_tiny_8x8_300e_coco.py new file mode 100644 index 000000000..b7040cb1f --- /dev/null +++ b/tests/data/models/mmdet_yolox/yolox_tiny_8x8_300e_coco.py @@ -0,0 +1,163 @@ +optimizer = dict( + type="SGD", + lr=0.01, + momentum=0.9, + weight_decay=0.0005, + nesterov=True, + paramwise_cfg=dict(norm_decay_mult=0.0, bias_decay_mult=0.0), +) +optimizer_config = dict(grad_clip=None) +lr_config = dict( + policy="YOLOX", + warmup="exp", + by_epoch=False, + warmup_by_epoch=True, + warmup_ratio=1, + warmup_iters=5, + num_last_epochs=15, + min_lr_ratio=0.05, +) +runner = dict(type="EpochBasedRunner", max_epochs=300) +checkpoint_config = dict(interval=10) +log_config = dict(interval=50, hooks=[dict(type="TextLoggerHook")]) +custom_hooks = [ + dict(type="YOLOXModeSwitchHook", num_last_epochs=15, priority=48), + dict(type="SyncNormHook", num_last_epochs=15, interval=10, priority=48), + dict(type="ExpMomentumEMAHook", resume_from=None, momentum=0.0001, priority=49), +] +dist_params = dict(backend="nccl") +log_level = "INFO" +load_from = None +resume_from = None +workflow = [("train", 1)] +img_scale = (640, 640) +model = dict( + type="YOLOX", + input_size=(640, 640), + random_size_range=(10, 20), + random_size_interval=10, + backbone=dict(type="CSPDarknet", deepen_factor=0.33, widen_factor=0.375), + neck=dict(type="YOLOXPAFPN", in_channels=[96, 192, 384], out_channels=96, num_csp_blocks=1), + bbox_head=dict(type="YOLOXHead", num_classes=80, in_channels=96, feat_channels=96), + train_cfg=dict(assigner=dict(type="SimOTAAssigner", center_radius=2.5)), + test_cfg=dict(score_thr=0.01, nms=dict(type="nms", iou_threshold=0.65)), +) +data_root = "data/coco/" +dataset_type = "CocoDataset" +train_pipeline = [ + dict(type="Mosaic", img_scale=(640, 640), pad_val=114.0), + dict(type="RandomAffine", scaling_ratio_range=(0.5, 1.5), border=(-320, -320)), + dict(type="YOLOXHSVRandomAug"), + dict(type="RandomFlip", flip_ratio=0.5), + dict(type="Resize", img_scale=(640, 640), keep_ratio=True), + dict(type="Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), + dict(type="FilterAnnotations", min_gt_bbox_wh=(1, 1), keep_empty=False), + dict(type="DefaultFormatBundle"), + dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]), +] +train_dataset = dict( + type="MultiImageMixDataset", + dataset=dict( + type="CocoDataset", + ann_file="data/coco/annotations/instances_train2017.json", + img_prefix="data/coco/train2017/", + pipeline=[dict(type="LoadImageFromFile"), dict(type="LoadAnnotations", with_bbox=True)], + filter_empty_gt=False, + ), + pipeline=[ + dict(type="Mosaic", img_scale=(640, 640), pad_val=114.0), + dict(type="RandomAffine", scaling_ratio_range=(0.5, 1.5), border=(-320, -320)), + dict(type="YOLOXHSVRandomAug"), + dict(type="RandomFlip", flip_ratio=0.5), + dict(type="Resize", img_scale=(640, 640), keep_ratio=True), + dict(type="Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), + dict(type="FilterAnnotations", min_gt_bbox_wh=(1, 1), keep_empty=False), + dict(type="DefaultFormatBundle"), + dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]), + ], +) +test_pipeline = [ + dict(type="LoadImageFromFile"), + dict( + type="MultiScaleFlipAug", + img_scale=(416, 416), + flip=False, + transforms=[ + dict(type="Resize", keep_ratio=True), + dict(type="RandomFlip"), + dict(type="Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), + dict(type="DefaultFormatBundle"), + dict(type="Collect", keys=["img"]), + ], + ), +] +data = dict( + samples_per_gpu=8, + workers_per_gpu=4, + persistent_workers=True, + train=dict( + type="MultiImageMixDataset", + dataset=dict( + type="CocoDataset", + ann_file="data/coco/annotations/instances_train2017.json", + img_prefix="data/coco/train2017/", + pipeline=[dict(type="LoadImageFromFile"), dict(type="LoadAnnotations", with_bbox=True)], + filter_empty_gt=False, + ), + pipeline=[ + dict(type="Mosaic", img_scale=(640, 640), pad_val=114.0), + dict(type="RandomAffine", scaling_ratio_range=(0.5, 1.5), border=(-320, -320)), + dict(type="YOLOXHSVRandomAug"), + dict(type="RandomFlip", flip_ratio=0.5), + dict(type="Resize", img_scale=(640, 640), keep_ratio=True), + dict(type="Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), + dict(type="FilterAnnotations", min_gt_bbox_wh=(1, 1), keep_empty=False), + dict(type="DefaultFormatBundle"), + dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]), + ], + ), + val=dict( + type="CocoDataset", + ann_file="data/coco/annotations/instances_val2017.json", + img_prefix="data/coco/val2017/", + pipeline=[ + dict(type="LoadImageFromFile"), + dict( + type="MultiScaleFlipAug", + img_scale=(416, 416), + flip=False, + transforms=[ + dict(type="Resize", keep_ratio=True), + dict(type="RandomFlip"), + dict(type="Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), + dict(type="DefaultFormatBundle"), + dict(type="Collect", keys=["img"]), + ], + ), + ], + ), + test=dict( + type="CocoDataset", + ann_file="data/coco/annotations/instances_val2017.json", + img_prefix="data/coco/val2017/", + pipeline=[ + dict(type="LoadImageFromFile"), + dict( + type="MultiScaleFlipAug", + img_scale=(416, 416), + flip=False, + transforms=[ + dict(type="Resize", keep_ratio=True), + dict(type="RandomFlip"), + dict(type="Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), + dict(type="DefaultFormatBundle"), + dict(type="Collect", keys=["img"]), + ], + ), + ], + ), +) +max_epochs = 300 +num_last_epochs = 15 +interval = 10 +evaluation = dict(save_best="auto", interval=10, dynamic_intervals=[(285, 1)], metric="bbox") From 9c8aff5da984e7e85be4a7fdf9563e734ce199da Mon Sep 17 00:00:00 2001 From: Hongyuan Zhang <66273343+Alias-z@users.noreply.github.com> Date: Tue, 4 Jun 2024 23:05:12 +0200 Subject: [PATCH 7/8] update has_mask method --- sahi/models/mmdet.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sahi/models/mmdet.py b/sahi/models/mmdet.py index fe3e13688..5702b207e 100644 --- a/sahi/models/mmdet.py +++ b/sahi/models/mmdet.py @@ -191,8 +191,9 @@ def has_mask(self): Returns if model output contains segmentation mask """ # has_mask = self.model.model.with_mask - dataloader = self.model.cfg["train_dataloader"]["dataset"]["dataset"]["pipeline"] - return any(isinstance(item, dict) and item.get("with_mask", False) for item in dataloader) + train_pipeline = self.model.cfg["train_dataloader"]['dataset']['pipeline'] + has_mask = any(isinstance(item, dict) and any('mask' in key for key in item.keys()) for item in train_pipeline) + return has_mask @property def category_names(self): From 9f26c66a7eab1a0b8353de539ddb3ec635556439 Mon Sep 17 00:00:00 2001 From: Hongyuan Zhang <66273343+Alias-z@users.noreply.github.com> Date: Tue, 4 Jun 2024 23:11:08 +0200 Subject: [PATCH 8/8] Reformat with black and isort --- sahi/models/mmdet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sahi/models/mmdet.py b/sahi/models/mmdet.py index 5702b207e..b4b363d3c 100644 --- a/sahi/models/mmdet.py +++ b/sahi/models/mmdet.py @@ -191,8 +191,8 @@ def has_mask(self): Returns if model output contains segmentation mask """ # has_mask = self.model.model.with_mask - train_pipeline = self.model.cfg["train_dataloader"]['dataset']['pipeline'] - has_mask = any(isinstance(item, dict) and any('mask' in key for key in item.keys()) for item in train_pipeline) + train_pipeline = self.model.cfg["train_dataloader"]["dataset"]["pipeline"] + has_mask = any(isinstance(item, dict) and any("mask" in key for key in item.keys()) for item in train_pipeline) return has_mask @property