Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Keypoint & Instance Segmentation Training Hyperparameters #163

Merged
merged 9 commits into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions configs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,7 @@ training_strategy:
momentum: 0.937
weight_decay: 0.0005
nesterov: True
cosine_annealing: True
```

## Exporter
Expand Down
1 change: 1 addition & 0 deletions configs/detection_heavy_model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ loader:
dataset_name: coco_test

trainer:
precision: "16-mixed"
preprocessing:
train_image_size: [384, 512]
keep_aspect_ratio: true
Expand Down
1 change: 1 addition & 0 deletions configs/detection_light_model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ loader:
dataset_name: coco_test

trainer:
precision: "16-mixed"
preprocessing:
train_image_size: [384, 512]
keep_aspect_ratio: true
Expand Down
16 changes: 12 additions & 4 deletions configs/instance_segmentation_heavy_model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ loader:
dataset_name: coco_test

trainer:
precision: "16-mixed"
preprocessing:
train_image_size: [384, 512]
keep_aspect_ratio: true
Expand All @@ -27,10 +28,10 @@ trainer:

batch_size: 8
epochs: &epochs 300
accumulate_grad_batches: 8 # For best results, always accumulate gradients to effectively use 64 batch size
n_workers: 8
n_workers: 4
validation_interval: 10
n_log_images: 8
gradient_clip_val: 10

callbacks:
- name: EMACallback
Expand All @@ -40,15 +41,22 @@ trainer:
decay_tau: 2000
- name: ExportOnTrainEnd
- name: TestOnTrainEnd
- name: GradientAccumulationScheduler
params:
scheduling: # warmup phase is 3 epochs
0: 1
1: 4
2: 8 # For best results, always accumulate gradients to effectively use 64 batch size

training_strategy:
name: "TripleLRSGDStrategy"
params:
warmup_epochs: 3
warmup_bias_lr: 0.1
warmup_bias_lr: 0.0
warmup_momentum: 0.8
lr: 0.01
lre: 0.0001
momentum: 0.937
weight_decay: 0.0005
nesterov: True
nesterov: True
cosine_annealing: False
16 changes: 12 additions & 4 deletions configs/instance_segmentation_light_model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ loader:
dataset_name: coco_test

trainer:
precision: "16-mixed"
preprocessing:
train_image_size: [384, 512]
keep_aspect_ratio: true
Expand All @@ -27,10 +28,10 @@ trainer:

batch_size: 8
epochs: &epochs 300
accumulate_grad_batches: 8 # For best results, always accumulate gradients to effectively use 64 batch size
n_workers: 8
n_workers: 4
validation_interval: 10
n_log_images: 8
gradient_clip_val: 10

callbacks:
- name: EMACallback
Expand All @@ -40,15 +41,22 @@ trainer:
decay_tau: 2000
- name: ExportOnTrainEnd
- name: TestOnTrainEnd
- name: GradientAccumulationScheduler
params:
scheduling: # warmup phase is 3 epochs
0: 1
1: 4
2: 8 # For best results, always accumulate gradients to effectively use 64 batch size

training_strategy:
name: "TripleLRSGDStrategy"
params:
warmup_epochs: 3
warmup_bias_lr: 0.1
warmup_bias_lr: 0.0
warmup_momentum: 0.8
lr: 0.01
lre: 0.0001
momentum: 0.937
weight_decay: 0.0005
nesterov: True
nesterov: True
cosine_annealing: False
18 changes: 13 additions & 5 deletions configs/keypoint_bbox_heavy_model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,19 @@ model:
params:
variant: heavy
loss_params:
iou_type: "siou"
iou_type: "ciou"
n_warmup_epochs: 0 # No assigner warmup
iou_loss_weight: 60 # Should be 7.5 * accumulate_grad_batches for best results
class_loss_weight: 4 # Should be 0.5 * accumulate_grad_batches for best results
regr_kpts_loss_weight: 96 # Should be 12 * accumulate_grad_batches for best results
vis_kpts_loss_weight: 16 # Should be 2 * accumulate_grad_batches for best results
vis_kpts_loss_weight: 8 # Should be 1 * accumulate_grad_batches for best results

loader:
params:
dataset_name: coco_test

trainer:
precision: "16-mixed"
preprocessing:
train_image_size: [384, 512]
keep_aspect_ratio: true
Expand All @@ -33,7 +34,7 @@ trainer:
n_workers: 4
validation_interval: 10
n_log_images: 8
accumulate_grad_batches: 8 # For best results, always accumulate gradients to effectively use 64 batch size
gradient_clip_val: 10

callbacks:
- name: EMACallback
Expand All @@ -43,15 +44,22 @@ trainer:
decay_tau: 2000
- name: ExportOnTrainEnd
- name: TestOnTrainEnd
- name: GradientAccumulationScheduler
params:
scheduling: # warmup phase is 3 epochs
0: 1
1: 4
2: 8 # For best results, always accumulate gradients to effectively use 64 batch size

training_strategy:
name: "TripleLRSGDStrategy"
params:
warmup_epochs: 3
warmup_bias_lr: 0.1
warmup_bias_lr: 0.0
warmup_momentum: 0.8
lr: 0.01
lre: 0.0001
momentum: 0.937
weight_decay: 0.0005
nesterov: True
nesterov: True
cosine_annealing: False
16 changes: 12 additions & 4 deletions configs/keypoint_bbox_light_model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,19 @@ model:
params:
variant: light
loss_params:
iou_type: "siou"
iou_type: "ciou"
n_warmup_epochs: 0 # No assigner warmup
iou_loss_weight: 60 # Should be 7.5 * accumulate_grad_batches for best results
class_loss_weight: 4 # Should be 0.5 * accumulate_grad_batches for best results
regr_kpts_loss_weight: 96 # Should be 12 * accumulate_grad_batches for best results
vis_kpts_loss_weight: 16 # Should be 2 * accumulate_grad_batches for best results
vis_kpts_loss_weight: 8 # Should be 1 * accumulate_grad_batches for best results

loader:
params:
dataset_name: coco_test

trainer:
precision: "16-mixed"
preprocessing:
train_image_size: [384, 512]
keep_aspect_ratio: true
Expand All @@ -33,7 +34,7 @@ trainer:
n_workers: 4
validation_interval: 10
n_log_images: 8
accumulate_grad_batches: 8 # For best results, always accumulate gradients to effectively use 64 batch size
gradient_clip_val: 10

callbacks:
- name: EMACallback
Expand All @@ -43,15 +44,22 @@ trainer:
decay_tau: 2000
- name: ExportOnTrainEnd
- name: TestOnTrainEnd
- name: GradientAccumulationScheduler
params:
scheduling: # warmup phase is 3 epochs
0: 1
1: 4
2: 8 # For best results, always accumulate gradients to effectively use 64 batch size

training_strategy:
name: "TripleLRSGDStrategy"
params:
warmup_epochs: 3
warmup_bias_lr: 0.1
warmup_bias_lr: 0.0
warmup_momentum: 0.8
lr: 0.01
lre: 0.0001
momentum: 0.937
weight_decay: 0.0005
nesterov: True
cosine_annealing: False
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def __init__(
iou_loss_weight: float = 7.5,
viz_pw: float = 1.0,
regr_kpts_loss_weight: float = 12,
vis_kpts_loss_weight: float = 2.0,
vis_kpts_loss_weight: float = 1.0,
sigmas: list[float] | None = None,
area_factor: float | None = None,
**kwargs: Any,
Expand All @@ -59,7 +59,7 @@ def __init__(
@type regr_kpts_loss_weight: float
@param regr_kpts_loss_weight: Weight of regression loss for keypoints. Defaults to 12.0. For optimal results, multiply with accumulate_grad_batches.
@type vis_kpts_loss_weight: float
@param vis_kpts_loss_weight: Weight of visibility loss for keypoints. Defaults to 2.0. For optimal results, multiply with accumulate_grad_batches.
@param vis_kpts_loss_weight: Weight of visibility loss for keypoints. Defaults to 1.0. For optimal results, multiply with accumulate_grad_batches.
@type iou_loss_weight: float
@param iou_loss_weight: Weight of IoU loss. Defaults to 2.5. For optimal results, multiply with accumulate_grad_batches.
@type sigmas: list[float] | None
Expand Down Expand Up @@ -105,7 +105,7 @@ def prepare(
target_kpts = self.get_label(labels, TaskType.KEYPOINTS)
target_kpts = insert_class(target_kpts, target_bbox)

batch_size = pred_scores.shape[0]
self.batch_size = pred_scores.shape[0]
n_kpts = (target_kpts.shape[1] - 2) // 3

self._init_parameters(feats)
Expand All @@ -114,14 +114,16 @@ def prepare(
pred_kpts = self.dist2kpts_noscale(
self.anchor_points_strided,
pred_kpts.view(
batch_size,
self.batch_size,
-1,
n_kpts,
3,
),
)

target_bbox = self._preprocess_bbox_target(target_bbox, batch_size)
target_bbox = self._preprocess_bbox_target(
target_bbox, self.batch_size
)

gt_bbox_labels = target_bbox[:, :, :1]
gt_xyxy = target_bbox[:, :, 1:]
Expand All @@ -141,7 +143,7 @@ def prepare(
)

batched_kpts = self._preprocess_kpts_target(
target_kpts, batch_size, self.gt_kpts_scale
target_kpts, self.batch_size, self.gt_kpts_scale
)
assigned_gt_idx_expanded = assigned_gt_idx.unsqueeze(-1).unsqueeze(-1)
selected_keypoints = batched_kpts.gather(
Expand Down Expand Up @@ -239,7 +241,7 @@ def forward(
"visibility": visibility_loss.detach(),
}

return loss, sub_losses
return loss * self.batch_size, sub_losses

def _preprocess_kpts_target(
self, kpts_target: Tensor, batch_size: int, scale_tensor: Tensor
Expand Down
10 changes: 5 additions & 5 deletions luxonis_train/attached_modules/metrics/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ For more information, see [object-keypoint-similarity](https://learnopencv.com/o

**Params**

| Key | Type | Default value | Description |
| ------------------ | --------------------- | ------------- | --------------------------------------------------------------------- |
| `sigmas` | `list[float] \| None` | `None` | List of sigmas for each keypoint. If `None`, the COCO sigmas are used |
| `area_factor` | `float` | `0.53` | Factor by which to multiply the bounding box area |
| `use_cocoeval_oks` | `bool` | `True` | Whether to use the same OKS formula as in COCO evaluation |
| Key | Type | Default value | Description |
| ------------------ | --------------------- | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `sigmas` | `list[float] \| None` | `None` | List of sigma values for each keypoint. If `None`, the COCO sigmas are used when the COCO dataset is provided. Otherwise, a default sigma value of 1/n_keypoints is applied. |
| `area_factor` | `float` | `0.53` | Factor by which to multiply the bounding box area |
| `use_cocoeval_oks` | `bool` | `True` | Whether to use the same OKS formula as in COCO evaluation |

## MeanAveragePrecision

Expand Down
28 changes: 27 additions & 1 deletion luxonis_train/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,32 @@ def check_graph(self) -> Self:
raise ValueError("No outputs specified.")
return self

@model_validator(mode="after")
def check_for_invalid_characters(self) -> Self:
for modules in [
self.nodes,
self.losses,
self.metrics,
self.visualizers,
]:
invalid_char_error_message = (
JSabadin marked this conversation as resolved.
Show resolved Hide resolved
"Name, alias or attached module contains a '/', which is not allowed. "
"Please rename the node or module to remove any '/' characters."
)
for module in modules:
JSabadin marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(module, AttachedModuleConfig):
if (module.attached_to and "/" in module.attached_to) or (
module.name and "/" in module.name
):
raise ValueError(invalid_char_error_message)

if isinstance(module, ModelNodeConfig):
if (module.alias and "/" in module.alias) or (
module.name and "/" in module.name
):
raise ValueError(invalid_char_error_message)
return self

@model_validator(mode="after")
def check_unique_names(self) -> Self:
for modules in [
Expand Down Expand Up @@ -402,7 +428,7 @@ class TrainerConfig(BaseModelExtraForbid):
deterministic: bool | Literal["warn"] | None = None
smart_cfg_auto_populate: bool = True
batch_size: PositiveInt = 32
accumulate_grad_batches: PositiveInt = 1
accumulate_grad_batches: PositiveInt | None = None
gradient_clip_val: NonNegativeFloat | None = None
gradient_clip_algorithm: Literal["norm", "value"] | None = None
use_weighted_sampler: bool = False
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,13 @@ def nodes(self) -> list[ModelNodeConfig]:
return [
ModelNodeConfig(
name=self.backbone,
alias=f"{self.task_name}/{self.backbone}",
alias=f"{self.task_name}-{self.backbone}",
params=self.backbone_params,
),
ModelNodeConfig(
name="DiscSubNetHead",
alias=f"{self.task_name}/DiscSubNetHead",
inputs=[f"{self.task_name}/{self.backbone}"],
alias=f"{self.task_name}-DiscSubNetHead",
inputs=[f"{self.task_name}-{self.backbone}"],
params=self.disc_subnet_params,
),
]
Expand All @@ -90,7 +90,7 @@ def losses(self) -> list[LossModuleConfig]:
return [
LossModuleConfig(
name="ReconstructionSegmentationLoss",
attached_to=f"{self.task_name}/DiscSubNetHead",
attached_to=f"{self.task_name}-DiscSubNetHead",
params=self.loss_params,
weight=1.0,
)
Expand All @@ -102,7 +102,7 @@ def metrics(self) -> list[MetricModuleConfig]:
return [
MetricModuleConfig(
name="JaccardIndex",
attached_to=f"{self.task_name}/DiscSubNetHead",
attached_to=f"{self.task_name}-DiscSubNetHead",
params={"num_classes": 2, "task": "multiclass"},
is_main_metric=True,
),
Expand All @@ -115,7 +115,7 @@ def visualizers(self) -> list[AttachedModuleConfig]:
return [
AttachedModuleConfig(
name="SegmentationVisualizer",
attached_to=f"{self.task_name}/DiscSubNetHead",
attached_to=f"{self.task_name}-DiscSubNetHead",
params=self.visualizer_params,
)
]
Loading
Loading