diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 97189ad9..c9355abb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,4 +20,4 @@ repos: hooks: - id: mdformat additional_dependencies: - - mdformat-gfm==0.3.6 \ No newline at end of file + - mdformat-gfm==0.3.6 diff --git a/configs/embeddings_model.yaml b/configs/embeddings_model.yaml new file mode 100644 index 00000000..0e5308d9 --- /dev/null +++ b/configs/embeddings_model.yaml @@ -0,0 +1,47 @@ +loader: + name: CustomReIDLoader + +model: + name: reid_test + nodes: + - name: GhostFaceNetV2 + + - name: GhostFaceNetHead + alias: color-embeddings + metadata_task_override: color + params: + embedding_size: 16 + + losses: + - name: SupConLoss + params: + miner: MultiSimilarityMiner + distance: CosineSimilarity + reducer: ThresholdReducer + reducer_params: + high: 0.3 + regularizer: LpRegularizer + + metrics: + - name: ClosestIsPositiveAccuracy + + - name: MedianDistances + + visualizers: + - name: EmbeddingsVisualizer + +loader: + params: + dataset_name: ParkingLot + +trainer: + preprocessing: + train_image_size: [256, 256] + + batch_size: 16 + epochs: 100 + validation_interval: 10 + n_log_images: 8 + + callbacks: + - name: ExportOnTrainEnd diff --git a/luxonis_train/attached_modules/base_attached_module.py b/luxonis_train/attached_modules/base_attached_module.py index a5f14761..7e177735 100644 --- a/luxonis_train/attached_modules/base_attached_module.py +++ b/luxonis_train/attached_modules/base_attached_module.py @@ -1,13 +1,13 @@ import logging from abc import ABC from contextlib import suppress -from typing import Generic +from typing import Generic, get_args from luxonis_ml.utils.registry import AutoRegisterMeta from torch import Size, Tensor, nn from typing_extensions import TypeVarTuple, Unpack -from luxonis_train.enums import TaskType +from luxonis_train.enums import Task, TaskType from luxonis_train.nodes import BaseNode from luxonis_train.utils import IncompatibleException, Labels, Packet @@ -57,19 +57,29 @@ class BaseAttachedModule( labels I{or} segmentation labels. """ - supported_tasks: list[TaskType | tuple[TaskType, ...]] | None = None + supported_tasks: list[Task | tuple[Task, ...]] | None = None def __init__(self, *, node: BaseNode | None = None): super().__init__() self._node = node self._epoch = 0 - self.required_labels: list[TaskType] = [] - if self._node and self.supported_tasks: + self.required_labels: list[Task] = [] + if self._node is not None and self.supported_tasks: + for tasks in self.supported_tasks: + if not isinstance(tasks, tuple): + tasks = (tasks,) + for task in tasks: + if isinstance(task, TaskType): + continue + task.name = self.node.metadata_task_override.get( + task.name, task.name + ) + module_supported = [ label.value - if isinstance(label, TaskType) - else f"({' + '.join(label)})" + if isinstance(label, Task) + else f"({' + '.join(map(str, label))})" for label in self.supported_tasks ] module_supported = f"[{', '.join(module_supported)}]" @@ -81,7 +91,7 @@ def __init__(self, *, node: BaseNode | None = None): ) node_tasks = set(self.node.tasks) for required_labels in self.supported_tasks: - if isinstance(required_labels, TaskType): + if isinstance(required_labels, Task): required_labels = [required_labels] else: required_labels = list(required_labels) @@ -159,7 +169,7 @@ def class_names(self) -> list[str]: return self.node.class_names @property - def node_tasks(self) -> list[TaskType]: + def node_tasks(self) -> list[Task]: """Getter for the tasks of the attached node. @type: dict[TaskType, str] @@ -201,11 +211,11 @@ def get_label( @raises ValueError: If the module requires multiple labels and the C{task_type} is not provided. @raises IncompatibleException: If the label is not found in the labels dictionary. """ - return self._get_label(labels, task_type)[0] + return self._get_label(labels, task_type) def _get_label( - self, labels: Labels, task_type: TaskType | None = None - ) -> tuple[Tensor, TaskType]: + self, labels: Labels, task_type: Task | None = None + ) -> Tensor: if task_type is None: if len(self.required_labels) == 1: task_type = self.required_labels[0] @@ -221,7 +231,7 @@ def _get_label( f"Available labels: {list(labels.keys())}. " f"Missing label: '{task}'." ) - return labels[task], task_type + return labels[task] raise ValueError( f"{self.name} requires multiple labels. You must provide the " @@ -229,7 +239,7 @@ def _get_label( ) def get_input_tensors( - self, inputs: Packet[Tensor], task_type: TaskType | str | None = None + self, inputs: Packet[Tensor], task_type: Task | str | None = None ) -> list[Tensor]: """Extracts the input tensors from the packet. @@ -259,7 +269,7 @@ def get_input_tensors( For such cases, the C{prepare} method should be overridden. """ if task_type is not None: - if isinstance(task_type, TaskType): + if isinstance(task_type, Task): if task_type not in self.node_tasks: raise IncompatibleException( f"Task {task_type.value} is not supported by the node " @@ -345,24 +355,45 @@ def prepare( set(self.supported_tasks) & set(self.node_tasks) ) x = self.get_input_tensors(inputs) - if labels is None or len(labels) == 0: + if labels is None or not labels: return x, None # type: ignore - label, task_type = self._get_label(labels) - if task_type in [TaskType.CLASSIFICATION, TaskType.SEGMENTATION]: + + label = self._get_label(labels) + generics = self._get_generic_params() + if generics is None or generics[0].__name__ == "Unpack": + return x, label # type: ignore + + if len(generics) != 2: + raise RuntimeError( + f"The type signature of '{self.name}' implies a complicated " + f"custom module ({self.name}[{', '.join(g.__name__ for g in generics)}]). " + "Please implement your own `prepare` method. The default " + "`prepare` works only when the generic type of the module " + "is `[Tensor | list[Tensor], Tensor]`." + ) + + if generics[0] is Tensor: if len(x) == 1: x = x[0] else: logger.warning( - f"Module {self.name} expects a single tensor as input, " + f"Module '{self.name}' expects a single tensor as input, " f"but got {len(x)} tensors. Using the last tensor. " f"If this is not the desired behavior, please override the " "`prepare` method of the attached module or the `wrap` " - f"method of {self.node.name}." + f"method of '{self.node.name}'." ) x = x[-1] return x, label # type: ignore + def _get_generic_params(self) -> tuple[type, ...] | None: + cls = type(self) + try: + return get_args(cls.__orig_bases__[0]) # type: ignore + except Exception: + return None + def _check_node_type_override(self) -> None: if "node" not in self.__annotations__: return diff --git a/luxonis_train/attached_modules/losses/README.md b/luxonis_train/attached_modules/losses/README.md index ffe218d4..32d853dc 100644 --- a/luxonis_train/attached_modules/losses/README.md +++ b/luxonis_train/attached_modules/losses/README.md @@ -12,8 +12,9 @@ List of all the available loss functions. - [`AdaptiveDetectionLoss`](#adaptivedetectionloss) - [`EfficientKeypointBBoxLoss`](#efficientkeypointbboxloss) - [`FOMOLocalizationLoss`](#fomolocalizationLoss) -- \[`PrecisionDFLDetectionLoss`\] (# precisiondfldetectionloss) -- \[`PrecisionDFLSegmentationLoss`\] (# precisiondflsegmentationloss) +- [Embedding Losses](#embedding-losses) +- [`PrecisionDFLDetectionLoss`](#precisiondfldetectionloss) +- [`PrecisionDFLSegmentationLoss`](#precisiondflsegmentationloss) ## `CrossEntropyLoss` @@ -124,6 +125,48 @@ Adapted from [here](https://arxiv.org/abs/2108.07610). | --------------- | ------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | `object_weight` | `float` | `500` | Weight for the objects in the loss calculation. Training with a larger `object_weight` in the loss parameters may result in more false positives (FP), but it will improve accuracy. | +## Embedding Losses + +We support the following losses taken from [pytorch-metric-learning](https://kevinmusgrave.github.io/pytorch-metric-learning/losses/): + +- [AngularLoss](https://kevinmusgrave.github.io/pytorch-metric-learning/losses/#angularloss) +- [CircleLoss](https://kevinmusgrave.github.io/pytorch-metric-learning/losses/#circleloss) +- [ContrastiveLoss](https://kevinmusgrave.github.io/pytorch-metric-learning/losses/#contrastiveloss) +- [DynamicSoftMarginLoss](https://kevinmusgrave.github.io/pytorch-metric-learning/losses/#dynamicsoftmarginloss) +- [FastAPLoss](https://kevinmusgrave.github.io/pytorch-metric-learning/losses/#fastaploss) +- [HistogramLoss](https://kevinmusgrave.github.io/pytorch-metric-learning/losses/#histogramloss) +- [InstanceLoss](https://kevinmusgrave.github.io/pytorch-metric-learning/losses/#instanceloss) +- [IntraPairVarianceLoss](https://kevinmusgrave.github.io/pytorch-metric-learning/losses/#intrapairvarianceloss) +- [GeneralizedLiftedStructureLoss](https://kevinmusgrave.github.io/pytorch-metric-learning/losses/#generalizedliftedstructureloss) +- [LiftedStructureLoss](https://kevinmusgrave.github.io/pytorch-metric-learning/losses/#liftedstructureloss) +- [MarginLoss](https://kevinmusgrave.github.io/pytorch-metric-learning/losses/#marginloss) +- [MultiSimilarityLoss](https://kevinmusgrave.github.io/pytorch-metric-learning/losses/#multisimilarityloss) +- [NPairsLoss](https://kevinmusgrave.github.io/pytorch-metric-learning/losses/#npairsloss) +- [NCALoss](https://kevinmusgrave.github.io/pytorch-metric-learning/losses/#ncaloss) +- [NTXentLoss](https://kevinmusgrave.github.io/pytorch-metric-learning/losses/#ntxentloss) +- [PNPLoss](https://kevinmusgrave.github.io/pytorch-metric-learning/losses/#pnploss) +- [RankedListLoss](https://kevinmusgrave.github.io/pytorch-metric-learning/losses/#rankedlistloss) +- [SignalToNoiseRatioContrastiveLoss](https://kevinmusgrave.github.io/pytorch-metric-learning/losses/#signaltonoisecontrastiveloss) +- [SupConLoss](https://kevinmusgrave.github.io/pytorch-metric-learning/losses/#supconloss) +- [ThresholdConsistentMarginLoss](https://kevinmusgrave.github.io/pytorch-metric-learning/losses/#thresholdconsistentmarginloss) +- [TripletMarginLoss](https://kevinmusgrave.github.io/pytorch-metric-learning/losses/#tripletmarginloss) +- [TupletMarginLoss](https://kevinmusgrave.github.io/pytorch-metric-learning/losses/#tupletmarginloss) + +**Parameters:** + +For loss specific parameters, see the documentation pages linked above. In addition to the loss specific parameters, the following parameters are available: + +| Key | Type | Default value | Description | +| -------------------- | ------ | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `miner` | `str` | `None` | Name of the miner to use with the loss. If `None`, no miner is used. All miners from [pytorch-metric-learning](https://kevinmusgrave.github.io/pytorch-metric-learning/miners/) are supported. | +| `miner_params` | `dict` | `None` | Parameters for the miner. | +| `distance` | `str` | `None` | Name of the distance metric to use with the loss. If `None`, no distance metric is used. All distance metrics from [pytorch-metric-learning](https://kevinmusgrave.github.io/pytorch-metric-learning/distances/) are supported. | +| `distance_params` | `dict` | `None` | Parameters for the distance metric. | +| `reducer` | `str` | `None` | Name of the reducer to use with the loss. If `None`, no reducer is used. All reducers from [pytorch-metric-learning](https://kevinmusgrave.github.io/pytorch-metric-learning/reducers/) are supported. | +| `reducer_params` | `dict` | `None` | Parameters for the reducer. | +| `regularizer` | `str` | `None` | Name of the regularizer to use with the loss. If `None`, no regularizer is used. All regularizers from [pytorch-metric-learning](https://kevinmusgrave.github.io/pytorch-metric-learning/regularizers/) are supported. | +| `regularizer_params` | `dict` | `None` | Parameters for the regularizer. | + ## `PrecisionDFLDetectionLoss` Adapted from [here](https://arxiv.org/pdf/2207.02696.pdf) and [here](https://arxiv.org/pdf/2209.02976.pdf). diff --git a/luxonis_train/attached_modules/losses/__init__.py b/luxonis_train/attached_modules/losses/__init__.py index 32b33174..6c4e882d 100644 --- a/luxonis_train/attached_modules/losses/__init__.py +++ b/luxonis_train/attached_modules/losses/__init__.py @@ -3,6 +3,7 @@ from .bce_with_logits import BCEWithLogitsLoss from .cross_entropy import CrossEntropyLoss from .efficient_keypoint_bbox_loss import EfficientKeypointBBoxLoss +from .embedding_losses import EmbeddingLossWrapper from .fomo_localization_loss import FOMOLocalizationLoss from .ohem_bce_with_logits import OHEMBCEWithLogitsLoss from .ohem_cross_entropy import OHEMCrossEntropyLoss @@ -28,6 +29,7 @@ "OHEMCrossEntropyLoss", "OHEMBCEWithLogitsLoss", "FOMOLocalizationLoss", + "EmbeddingLossWrapper", "PrecisionDFLDetectionLoss", "PrecisionDFLSegmentationLoss", ] diff --git a/luxonis_train/attached_modules/losses/embedding_losses.py b/luxonis_train/attached_modules/losses/embedding_losses.py new file mode 100644 index 00000000..b7b3518e --- /dev/null +++ b/luxonis_train/attached_modules/losses/embedding_losses.py @@ -0,0 +1,134 @@ +import logging + +import pytorch_metric_learning.distances as pml_distances +import pytorch_metric_learning.losses as pml_losses +import pytorch_metric_learning.miners as pml_miners +import pytorch_metric_learning.reducers as pml_reducers +import pytorch_metric_learning.regularizers as pml_regularizers +from pytorch_metric_learning.losses import CrossBatchMemory +from torch import Tensor + +from luxonis_train.enums import Metadata +from luxonis_train.nodes.base_node import BaseNode +from luxonis_train.nodes.heads.ghostfacenet_head import GhostFaceNetHead +from luxonis_train.utils.types import Kwargs + +from .base_loss import BaseLoss + +logger = logging.getLogger(__name__) + +EMBEDDING_LOSSES = [ + "AngularLoss", + "CircleLoss", + "ContrastiveLoss", + "DynamicSoftMarginLoss", + "FastAPLoss", + "HistogramLoss", + "InstanceLoss", + "IntraPairVarianceLoss", + "GeneralizedLiftedStructureLoss", + "LiftedStructureLoss", + "MarginLoss", + "MultiSimilarityLoss", + "NPairsLoss", + "NCALoss", + "NTXentLoss", + "PNPLoss", + "RankedListLoss", + "SignalToNoiseRatioContrastiveLoss", + "SupConLoss", + "ThresholdConsistentMarginLoss", + "TripletMarginLoss", + "TupletMarginLoss", +] + +for _loss_name in EMBEDDING_LOSSES: + + class EmbeddingLossWrapper( + BaseLoss[Tensor, Tensor], register_name=_loss_name + ): + node: GhostFaceNetHead + supported_tasks = [Metadata("id")] + miner: pml_miners.BaseMiner | None + + def __init__( + self, + *, + miner: str | None = None, + miner_params: Kwargs | None = None, + distance: str | None = None, + distance_params: Kwargs | None = None, + reducer: str | None = None, + reducer_params: Kwargs | None = None, + regularizer: str | None = None, + regularizer_params: Kwargs | None = None, + node: BaseNode | None = None, + **kwargs, + ): + super().__init__(node=node) + loss_name = _loss_name # noqa: B023 + + if not hasattr(pml_losses, loss_name): + raise ValueError( + f"Loss {loss_name} not found in pytorch-metric-learning" + ) + Loss = getattr(pml_losses, loss_name) + + if reducer is not None: + if not hasattr(pml_reducers, reducer): + raise ValueError( + f"Reducer {reducer} not found in pytorch-metric-learning" + ) + Reducer = getattr(pml_reducers, reducer) + kwargs["reducer"] = Reducer(**(reducer_params or {})) + if regularizer is not None: + if not hasattr(pml_regularizers, regularizer): + raise ValueError( + f"Regularizer {regularizer} not found in pytorch-metric-learning" + ) + Regularizer = getattr(pml_regularizers, regularizer) + kwargs["embedding_regularizer"] = Regularizer( + **(regularizer_params or {}) + ) + if distance is not None: + if not hasattr(pml_distances, distance): + raise ValueError( + f"Distance {distance} not found in pytorch-metric-learning" + ) + Distance = getattr(pml_distances, distance) + kwargs["distance"] = Distance(**(distance_params or {})) + + if miner is not None: + if not hasattr(pml_miners, miner): + raise ValueError( + f"Miner {miner} not found in pytorch-metric-learning" + ) + Miner = getattr(pml_miners, miner) + self.miner = Miner(**(miner_params or {})) + else: + self.miner = None + + self.loss = Loss(**kwargs) + + if self.node.cross_batch_memory_size is not None: + if loss_name in CrossBatchMemory.supported_losses(): + self.loss = CrossBatchMemory( + self.loss, + embedding_size=self.node.embedding_size, + miner=self.miner, + ) + else: + logger.warning( + f"'CrossBatchMemory' is not supported for {loss_name}. " + "Ignoring cross_batch_memory_size." + ) + + def forward(self, inputs: Tensor, target: Tensor) -> Tensor: + if self.miner is not None: + hard_pairs = self.miner(inputs, target) + return self.loss(inputs, target, hard_pairs) + return self.loss(inputs, target) + + @property + def name(self) -> str: + return _loss_name # noqa: B023 diff --git a/luxonis_train/attached_modules/metrics/README.md b/luxonis_train/attached_modules/metrics/README.md index 42f42fcb..59021576 100644 --- a/luxonis_train/attached_modules/metrics/README.md +++ b/luxonis_train/attached_modules/metrics/README.md @@ -8,6 +8,8 @@ List of all the available metrics. - [ObjectKeypointSimilarity](#objectkeypointsimilarity) - [MeanAveragePrecision](#meanaverageprecision) - [MeanAveragePrecisionKeypoints](#meanaverageprecisionkeypoints) +- [ClosestIsPositiveAccuracy](#closestispositiveaccuracy) +- [MedianDistances](#mediandistances) ## Torchmetrics @@ -63,3 +65,13 @@ Evaluation leverages COCO evaluation framework (COCOeval) to assess mAP performa | `area_factor` | `float` | `0.53` | Factor by which to multiply the bounding box area | | `max_dets` | `int` | `20` | Maximum number of detections per image | | `box_fotmat` | `Literal["xyxy", "xywh", "cxcywh"]` | `"xyxy"` | Format of the bounding boxes | + +## ClosestIsPositiveAccuracy + +Compute the accuracy of the closest positive sample to the query sample. +Needs to be connected to the `GhostFaceNetHead` node. + +## MedianDistances + +Compute the median distance between the query and the positive samples. +Needs to be connected to the `GhostFaceNetHead` node. diff --git a/luxonis_train/attached_modules/metrics/__init__.py b/luxonis_train/attached_modules/metrics/__init__.py index cdd0b3ac..df72a785 100644 --- a/luxonis_train/attached_modules/metrics/__init__.py +++ b/luxonis_train/attached_modules/metrics/__init__.py @@ -1,5 +1,6 @@ from .base_metric import BaseMetric from .confusion_matrix import ConfusionMatrix +from .embedding_metrics import ClosestIsPositiveAccuracy, MedianDistances from .mean_average_precision import MeanAveragePrecision from .mean_average_precision_keypoints import MeanAveragePrecisionKeypoints from .object_keypoint_similarity import ObjectKeypointSimilarity @@ -15,5 +16,7 @@ "ObjectKeypointSimilarity", "Precision", "Recall", + "ClosestIsPositiveAccuracy", "ConfusionMatrix", + "MedianDistances", ] diff --git a/luxonis_train/attached_modules/metrics/embedding_metrics.py b/luxonis_train/attached_modules/metrics/embedding_metrics.py new file mode 100644 index 00000000..b09d42f6 --- /dev/null +++ b/luxonis_train/attached_modules/metrics/embedding_metrics.py @@ -0,0 +1,208 @@ +import torch +from torch import Tensor + +from luxonis_train.enums import Metadata +from luxonis_train.nodes.heads.ghostfacenet_head import GhostFaceNetHead + +from .base_metric import BaseMetric + +# Converted from https://omoindrot.github.io/triplet-loss#offline-and-online-triplet-mining +# to PyTorch from TensorFlow + + +class ClosestIsPositiveAccuracy(BaseMetric[Tensor, Tensor]): + supported_tasks = [Metadata("id")] + node: GhostFaceNetHead + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.cross_batch_memory_size = self.node.cross_batch_memory_size + self.add_state("cross_batch_memory", default=[], dist_reduce_fx="cat") + self.add_state( + "correct_predictions", + default=torch.tensor(0), + dist_reduce_fx="sum", + ) + self.add_state( + "total_predictions", default=torch.tensor(0), dist_reduce_fx="sum" + ) + + def update(self, inputs: Tensor, target: Tensor): + embeddings, labels = inputs, target + + if self.cross_batch_memory_size is not None: + self.cross_batch_memory.extend(list(zip(embeddings, labels))) + + if len(self.cross_batch_memory) > self.cross_batch_memory_size: + self.cross_batch_memory = self.cross_batch_memory[ + -self.cross_batch_memory_size : + ] + + if len(self.cross_batch_memory) < self.cross_batch_memory_size: + return + + embeddings, labels = zip(*self.cross_batch_memory) + embeddings = torch.stack(embeddings) + labels = torch.stack(labels) + + pairwise_distances = _pairwise_distances(embeddings) + pairwise_distances.fill_diagonal_(float("inf")) + + closest_indices = torch.argmin(pairwise_distances, dim=1) + closest_labels = labels[closest_indices] + + positive_mask = _get_anchor_positive_triplet_mask(labels) + num_positives = positive_mask.sum(dim=1) + has_at_least_one_positive_and_negative = (num_positives > 0) & ( + num_positives < len(labels) + ) + + filtered_labels = labels[has_at_least_one_positive_and_negative] + filtered_closest_labels = closest_labels[ + has_at_least_one_positive_and_negative + ] + + correct_predictions = ( + filtered_labels == filtered_closest_labels + ).sum() + + self.correct_predictions += correct_predictions + self.total_predictions += len(filtered_labels) + + def compute(self): + return self.correct_predictions / self.total_predictions + + +class MedianDistances(BaseMetric[Tensor, Tensor]): + supported_tasks = [Metadata("id")] + node: GhostFaceNetHead + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.cross_batch_memory_size = self.node.cross_batch_memory_size + self.add_state("cross_batch_memory", default=[], dist_reduce_fx="cat") + self.add_state("all_distances", default=[], dist_reduce_fx="cat") + self.add_state("closest_distances", default=[], dist_reduce_fx="cat") + self.add_state("positive_distances", default=[], dist_reduce_fx="cat") + self.add_state( + "closest_vs_positive_distances", default=[], dist_reduce_fx="cat" + ) + + def update(self, inputs: Tensor, target: Tensor): + embeddings, labels = inputs, target + + if self.cross_batch_memory_size is not None: + self.cross_batch_memory.extend(list(zip(embeddings, labels))) + + if len(self.cross_batch_memory) > self.cross_batch_memory_size: + self.cross_batch_memory = self.cross_batch_memory[ + -self.cross_batch_memory_size : + ] + + if len(self.cross_batch_memory) < self.cross_batch_memory_size: + return + + embeddings, labels = zip(*self.cross_batch_memory) + embeddings = torch.stack(embeddings) + labels = torch.stack(labels) + + pairwise_distances = _pairwise_distances(embeddings) + self.all_distances.append( + pairwise_distances[ + torch.triu(torch.ones_like(pairwise_distances), diagonal=1) + == 1 + ].flatten() + ) + + pairwise_distances.fill_diagonal_(float("inf")) + + closest_distances, _ = torch.min(pairwise_distances, dim=1) + self.closest_distances.append(closest_distances) + + positive_mask = _get_anchor_positive_triplet_mask(labels).bool() + + only_positive_distances = pairwise_distances.clone() + only_positive_distances[~positive_mask] = float("inf") + + closest_positive_distances, _ = torch.min( + only_positive_distances, dim=1 + ) + + non_inf_mask = closest_positive_distances != float("inf") + difference = closest_positive_distances - closest_distances + difference = difference[non_inf_mask] + + self.closest_vs_positive_distances.append(difference) + self.positive_distances.append( + closest_positive_distances[non_inf_mask] + ) + + def compute(self): + if len(self.all_distances) == 0: + return { + "MedianDistance": torch.tensor(float("nan")), + "MedianClosestDistance": torch.tensor(float("nan")), + "MedianClosestPositiveDistance": torch.tensor(float("nan")), + "MedianClosestVsClosestPositiveDistance": torch.tensor( + float("nan") + ), + } + + all_distances = torch.cat(self.all_distances) + closest_distances = torch.cat(self.closest_distances) + positive_distances = torch.cat(self.positive_distances) + closest_vs_positive_distances = torch.cat( + self.closest_vs_positive_distances + ) + + return { + "MedianDistance": torch.median(all_distances), + "MedianClosestDistance": torch.median(closest_distances), + "MedianClosestPositiveDistance": torch.median(positive_distances), + "MedianClosestVsClosestPositiveDistance": torch.median( + closest_vs_positive_distances + ), + } + + +def _pairwise_distances(embeddings, squared=False): + """Compute the 2D matrix of distances between all the embeddings. + + @param embeddings: tensor of shape (batch_size, embed_dim) + @type embeddings: torch.Tensor + @param squared: If true, output is the pairwise squared euclidean + distance matrix. If false, output is the pairwise euclidean + distance matrix. + @type squared: bool + @return: pairwise_distances: tensor of shape (batch_size, + batch_size) + @rtype: torch.Tensor + """ + dot_product = torch.matmul(embeddings, embeddings.t()) + + square_norm = torch.diag(dot_product) + + distances = ( + square_norm.unsqueeze(0) - 2.0 * dot_product + square_norm.unsqueeze(1) + ) + distances = torch.max(distances, torch.tensor(0.0)) + + if not squared: + mask = (distances == 0.0).float() + distances = distances + mask * 1e-16 + + distances = torch.sqrt(distances) + + distances = distances * (1.0 - mask) + + return distances + + +def _get_anchor_positive_triplet_mask(labels): + indices_equal = torch.eye( + labels.shape[0], dtype=torch.uint8, device=labels.device + ) + indices_not_equal = ~indices_equal + labels_equal = labels.unsqueeze(0) == labels.unsqueeze(1) + mask = indices_not_equal & labels_equal + return mask diff --git a/luxonis_train/attached_modules/metrics/torchmetrics.py b/luxonis_train/attached_modules/metrics/torchmetrics.py index c222cb78..553ce31c 100644 --- a/luxonis_train/attached_modules/metrics/torchmetrics.py +++ b/luxonis_train/attached_modules/metrics/torchmetrics.py @@ -12,7 +12,7 @@ logger = logging.getLogger(__name__) -class TorchMetricWrapper(BaseMetric[Tensor]): +class TorchMetricWrapper(BaseMetric[Tensor, Tensor]): Metric: type[torchmetrics.Metric] def __init__(self, **kwargs: Any): diff --git a/luxonis_train/attached_modules/visualizers/README.md b/luxonis_train/attached_modules/visualizers/README.md index 03daa87f..afef5066 100644 --- a/luxonis_train/attached_modules/visualizers/README.md +++ b/luxonis_train/attached_modules/visualizers/README.md @@ -7,6 +7,8 @@ Visualizers are used to render the output of a node. They are used in the `visua - [`BBoxVisualizer`](#bboxvisualizer) - [`ClassificationVisualizer`](#classificationvisualizer) - [`KeypointVisualizer`](#keypointvisualizer) +- [`SegmentationVisualizer`](#segmentationvisualizer) +- [`EmbeddingsVisualizer`](#embeddingsvisualizer) - [`MultiVisualizer`](#multivisualizer) ## `BBoxVisualizer` @@ -72,6 +74,14 @@ Visualizer for bounding boxes. ![class_viz_example](https://github.com/luxonis/luxonis-train/blob/main/media/example_viz/class.png) +## `EmbeddingsVisualizer` + +**Parameters:** + +| Key | Type | Default value | Description | +| ------------------- | ------- | ------------- | ----------------------------------------------------------------------------------------------------------------------------- | +| `z_score_threshold` | `float` | `3.0` | Threshold for z-score filtering. Embeddings with z-score higher than this value are considered as outliers and are not drawn. | + ## `MultiVisualizer` Special type of meta-visualizer that combines several visualizers into one. The combined visualizers share canvas. diff --git a/luxonis_train/attached_modules/visualizers/__init__.py b/luxonis_train/attached_modules/visualizers/__init__.py index 1bd65f50..7389aa57 100644 --- a/luxonis_train/attached_modules/visualizers/__init__.py +++ b/luxonis_train/attached_modules/visualizers/__init__.py @@ -1,6 +1,7 @@ from .base_visualizer import BaseVisualizer from .bbox_visualizer import BBoxVisualizer from .classification_visualizer import ClassificationVisualizer +from .embeddings_visualizer import EmbeddingsVisualizer from .instance_segmentation_visualizer import InstanceSegmentationVisualizer from .keypoint_visualizer import KeypointVisualizer from .multi_visualizer import MultiVisualizer @@ -24,6 +25,7 @@ "KeypointVisualizer", "MultiVisualizer", "SegmentationVisualizer", + "EmbeddingsVisualizer", "InstanceSegmentationVisualizer", "combine_visualizations", "draw_bounding_box_labels", diff --git a/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py b/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py new file mode 100644 index 00000000..da483705 --- /dev/null +++ b/luxonis_train/attached_modules/visualizers/embeddings_visualizer.py @@ -0,0 +1,131 @@ +import logging +from collections.abc import Callable + +import numpy as np +import seaborn as sns +from luxonis_ml.data.utils import ColorMap +from matplotlib import pyplot as plt +from sklearn.decomposition import PCA +from torch import Tensor + +from luxonis_train.enums import Metadata + +from .base_visualizer import BaseVisualizer +from .utils import figure_to_torch + +logger = logging.getLogger(__name__) + + +class EmbeddingsVisualizer(BaseVisualizer[Tensor, Tensor]): + supported_tasks = [Metadata("id")] + + def __init__(self, z_score_threshold: float = 3, **kwargs): + """Visualizer for embedding tasks like reID. + + @type accumulate_n_batches: int + @param accumulate_n_batches: Number of batches to accumulate + before visualizing. + """ + super().__init__(**kwargs) + self.colors = ColorMap() + self.z_score_threshold = z_score_threshold + + def forward( + self, + label_canvas: Tensor, + prediction_canvas: Tensor, + embeddings: Tensor, + ids: Tensor, + ) -> tuple[Tensor, Tensor]: + """Creates a visualization of the embeddings. + + @type label_canvas: Tensor + @param label_canvas: The canvas to draw the labels on. + @type prediction_canvas: Tensor + @param prediction_canvas: The canvas to draw the predictions on. + @type embeddings: Tensor + @param embeddings: The embeddings to visualize. + @type ids: Tensor + @param ids: The ids to visualize. + @rtype: Tensor + @return: An embedding space projection. + """ + + embeddings_np = embeddings.detach().cpu().numpy() + ids_np = ids.detach().cpu().numpy().astype(int) + + pca = PCA(n_components=2) + embeddings_2d = pca.fit_transform(embeddings_np) + embeddings_2d, ids_np = self._filter_outliers(embeddings_2d, ids_np) + + kdeplot = self.plot_to_tensor(embeddings_2d, ids_np, self.kde_plot) + scatterplot = self.plot_to_tensor( + embeddings_2d, ids_np, self.scatter_plot + ) + + return kdeplot, scatterplot + + def _get_color(self, label: int) -> tuple[float, float, float]: + r, g, b = self.colors[label] + return r / 255, g / 255, b / 255 + + def _filter_outliers( + self, points: np.ndarray, ids: np.ndarray + ) -> tuple[np.ndarray, np.ndarray]: + mean = np.mean(points, axis=0) + std_dev = np.std(points, axis=0) + z_scores = (points - mean) / std_dev + + mask = (np.abs(z_scores) < self.z_score_threshold).all(axis=1) + logger.info(f"Filtered out {len(points) - mask.sum()} outliers") + return points[mask], ids[mask] + + @staticmethod + def plot_to_tensor( + embeddings_2d: np.ndarray, + ids_np: np.ndarray, + plot_func: Callable[[plt.Axes, np.ndarray, np.ndarray], None], + ) -> Tensor: + fig, ax = plt.subplots(figsize=(10, 10)) + ax.set_xlim(embeddings_2d[:, 0].min(), embeddings_2d[:, 0].max()) + ax.set_ylim(embeddings_2d[:, 1].min(), embeddings_2d[:, 1].max()) + + plot_func(ax, embeddings_2d, ids_np) + ax.axis("off") + + tensor_image = figure_to_torch(fig, width=512, height=512).unsqueeze(0) + plt.close(fig) + return tensor_image + + def kde_plot( + self, ax: plt.Axes, emb: np.ndarray, labels: np.ndarray + ) -> None: + for label in np.unique(labels): + subset = emb[labels == label] + color = self._get_color(label) + sns.kdeplot( + x=subset[:, 0], + y=subset[:, 1], + color=color, + alpha=0.9, + bw_adjust=1.5, + fill=True, + warn_singular=False, + ax=ax, + ) + + def scatter_plot( + self, ax: plt.Axes, emb: np.ndarray, labels: np.ndarray + ) -> None: + unique_labels = np.unique(labels) + palette = {lbl: self._get_color(lbl) for lbl in unique_labels} + sns.scatterplot( + x=emb[:, 0], + y=emb[:, 1], + hue=labels, + palette=palette, + alpha=0.9, + s=300, + legend=False, + ax=ax, + ) diff --git a/luxonis_train/attached_modules/visualizers/utils.py b/luxonis_train/attached_modules/visualizers/utils.py index 1a571eca..ac95046b 100644 --- a/luxonis_train/attached_modules/visualizers/utils.py +++ b/luxonis_train/attached_modules/visualizers/utils.py @@ -427,6 +427,6 @@ def resize_to_match( case _: raise ValueError( "Visualization should be either a single tensor or a tuple of " - "two tensors or a tuple of a tensor and a list of tensors." + "two tensors or a tuple of a tensor and a list of tensors. " f"Got: `{type(visualization)}`." ) diff --git a/luxonis_train/callbacks/ema.py b/luxonis_train/callbacks/ema.py index 63166ad3..a5ecd995 100644 --- a/luxonis_train/callbacks/ema.py +++ b/luxonis_train/callbacks/ema.py @@ -36,7 +36,7 @@ def __init__( @type decay_tau: float @param decay_tau: Decay tau for the moving average. """ - super(ModelEma, self).__init__() + super().__init__() model.eval() self.state_dict_ema = deepcopy(model.state_dict()) model.train() diff --git a/luxonis_train/config/config.py b/luxonis_train/config/config.py index 159a39fa..fa770505 100644 --- a/luxonis_train/config/config.py +++ b/luxonis_train/config/config.py @@ -67,6 +67,7 @@ class ModelNodeConfig(BaseModelExtraForbid): freezing: FreezingConfig = FreezingConfig() remove_on_export: bool = False task_name: str = "" + metadata_task_override: str | dict[str, str] | None = None params: Params = {} @@ -98,13 +99,14 @@ def validate_nodes(cls, nodes: Any) -> Any: names = [] last_body_index: int | None = None for i, node in enumerate(nodes): - name = node.get("alias", node.get("name")) + name = node.get("name") if name is None: raise ValueError( f"Node {i} does not specify the `name` field." ) if "Head" in name and last_body_index is None: last_body_index = i - 1 + name = node.get("alias") or name names.append(name) if i > 0 and "inputs" not in node and "input_sources" not in node: if last_body_index is not None: @@ -243,7 +245,7 @@ def check_attached_modules(cls, data: Params) -> Params: else: warnings.warn( f"Field `model.{section}` is deprecated. " - f"Please specify `{section}`under " + f"Please specify `{section}` under " "the node they are attached to." ) for node in data["nodes"]: diff --git a/luxonis_train/enums.py b/luxonis_train/enums.py index 09d38fb2..d82e8378 100644 --- a/luxonis_train/enums.py +++ b/luxonis_train/enums.py @@ -1,12 +1,27 @@ +from dataclasses import dataclass from enum import Enum +from typing import TypeAlias class TaskType(str, Enum): - """Tasks supported by nodes in LuxonisTrain.""" - CLASSIFICATION = "classification" SEGMENTATION = "segmentation" INSTANCE_SEGMENTATION = "instance_segmentation" BOUNDINGBOX = "boundingbox" KEYPOINTS = "keypoints" ARRAY = "array" + + +@dataclass(unsafe_hash=True) +class Metadata: + name: str + + @property + def value(self): + return f"metadata/{self.name}" + + def __str__(self) -> str: + return self.value + + +Task: TypeAlias = TaskType | Metadata diff --git a/luxonis_train/loaders/base_loader.py b/luxonis_train/loaders/base_loader.py index db97ac00..b0c7a79e 100644 --- a/luxonis_train/loaders/base_loader.py +++ b/luxonis_train/loaders/base_loader.py @@ -249,6 +249,11 @@ def get_n_keypoints(self) -> dict[str, int] | None: """ return None + def get_metadata_types( + self, + ) -> dict[str, dict[str, type[int] | type[float] | type[str]]]: + return {} + def dict_numpy_to_torch( self, numpy_dictionary: dict[str, np.ndarray] ) -> dict[str, Tensor]: @@ -260,10 +265,14 @@ def dict_numpy_to_torch( @rtype: dict[str, torch.Tensor] @return: Dictionary of torch tensors. """ - return { - task: torch.tensor(array).float() - for task, array in numpy_dictionary.items() - } + torch_dictionary = {} + + for task, array in numpy_dictionary.items(): + if array.dtype.kind in "U": + array = np.array([ord(c) for c in array[0]], dtype=np.int32) + torch_dictionary[task] = torch.tensor(array, dtype=torch.float32) + + return torch_dictionary def read_image(self, path: str) -> npt.NDArray[np.float32]: """Reads an image from a file. diff --git a/luxonis_train/loaders/luxonis_loader_torch.py b/luxonis_train/loaders/luxonis_loader_torch.py index 4267cced..531b4b1f 100644 --- a/luxonis_train/loaders/luxonis_loader_torch.py +++ b/luxonis_train/loaders/luxonis_loader_torch.py @@ -127,6 +127,15 @@ def get_n_keypoints(self) -> dict[str, int]: skeletons = self.dataset.get_skeletons() return {task: len(skeletons[task][0]) for task in skeletons} + @override + def get_metadata_types( + self, + ) -> dict[str, dict[str, type[int] | type[float] | type[str]]]: + return { + k: {"float": float, "int": int, "str": str, "Category": int}[v] + for k, v in self.dataset.get_metadata_types().items() + } + def augment_test_image(self, img: Tensor) -> Tensor: if self.loader.augmentations is None: return img diff --git a/luxonis_train/loaders/utils.py b/luxonis_train/loaders/utils.py index aed4df94..aa6b9fb4 100644 --- a/luxonis_train/loaders/utils.py +++ b/luxonis_train/loaders/utils.py @@ -1,5 +1,5 @@ import torch -from luxonis_ml.data.utils import get_task_type +from luxonis_ml.data.utils import get_task_type, task_is_metadata from torch import Tensor from luxonis_train.utils.types import Labels @@ -39,15 +39,13 @@ def collate_fn( label_box: list[Tensor] = [] for i, ann in enumerate(annos): new_ann = torch.zeros((ann.shape[0], ann.shape[1] + 1)) - # add target image index for build_targets() + # add batch index to separate boxes from different images new_ann[:, 0] = i new_ann[:, 1:] = ann label_box.append(new_ann) out_labels[task] = torch.cat(label_box, 0) - - elif task_type == "instance_segmentation": - masks = [label[task] for label in labels] - out_labels[task] = torch.cat(masks, 0) + elif task_type == "instance_segmentation" or task_is_metadata(task): + out_labels[task] = torch.cat(annos, 0) else: out_labels[task] = torch.stack(annos, 0) diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py index 17aea732..2b7252c8 100644 --- a/luxonis_train/models/luxonis_lightning.py +++ b/luxonis_train/models/luxonis_lightning.py @@ -161,7 +161,7 @@ def __init__( dict ) - self._logged_images = 0 + self._logged_images = defaultdict(int) frozen_nodes: list[tuple[str, int]] = [] nodes: dict[str, tuple[type[BaseNode], Kwargs]] = {} @@ -191,13 +191,13 @@ def __init__( f"Node {node_name} does not have the `task_name` parameter set. " "Please specify the `task_name` parameter for each head node. " ) - nodes[node_name] = ( Node, { **node_cfg.params, "task_name": node_cfg.task_name, "remove_on_export": node_cfg.remove_on_export, + "metadata_task_override": node_cfg.metadata_task_override, }, ) @@ -312,15 +312,10 @@ def _initiate_nodes( for source_name, shape in shapes.items() } - for ( - node_name, - ( - Node, - node_kwargs, - ), - node_input_names, - _, - ) in traverse_graph(self.graph, nodes): + for node_name, ( + Node, + node_kwargs, + ), node_input_names, _ in traverse_graph(self.graph, nodes): node_dummy_inputs: list[Packet[Tensor]] = [] """List of dummy input packets for the node. @@ -774,8 +769,13 @@ def _evaluation_step( ) -> dict[str, Tensor]: inputs, labels = batch images = None - if self._logged_images < self.cfg.trainer.n_log_images: + if not self._logged_images: images = get_denormalized_images(self.cfg, inputs) + for value in self._logged_images.values(): + if value < self.cfg.trainer.n_log_images: + images = get_denormalized_images(self.cfg, inputs) + break + outputs = self.forward( inputs, labels, @@ -790,17 +790,16 @@ def _evaluation_step( logged_images = self._logged_images for node_name, visualizations in outputs.visualizations.items(): for viz_name, viz_batch in visualizations.items(): - logged_images = self._logged_images for viz in viz_batch: - if logged_images >= self.cfg.trainer.n_log_images: - break + name = f"{mode}/visualizations/{node_name}/{viz_name}" + if logged_images[name] >= self.cfg.trainer.n_log_images: + continue self.logger.log_image( - f"{mode}/visualizations/{node_name}/{viz_name}/{logged_images}", + f"{name}/{logged_images[name]}", viz.detach().cpu().numpy().transpose(1, 2, 0), step=self.current_epoch, ) - logged_images += 1 - self._logged_images = logged_images + logged_images[name] += 1 return step_output @@ -840,7 +839,7 @@ def _evaluation_epoch_end(self, mode: Literal["test", "val"]) -> None: ) self.validation_step_outputs.clear() - self._logged_images = 0 + self._logged_images.clear() def configure_callbacks(self) -> list[pl.Callback]: """Configures Pytorch Lightning callbacks.""" diff --git a/luxonis_train/nodes/README.md b/luxonis_train/nodes/README.md index ab139d04..3d881ddf 100644 --- a/luxonis_train/nodes/README.md +++ b/luxonis_train/nodes/README.md @@ -18,6 +18,7 @@ arbitrarily as long as the two nodes are compatible with each other. We've group - [`DDRNet`](#ddrnet) - [`RecSubNet`](#recsubnet) - [`EfficientViT`](#efficientvit) + - [`GhostFaceNetV2`](#ghostfacenetv2) - [Necks](#necks) - [`RepPANNeck`](#reppanneck) - [Heads](#heads) @@ -29,9 +30,11 @@ arbitrarily as long as the two nodes are compatible with each other. We've group - [`DDRNetSegmentationHead`](#ddrnetsegmentationhead) - [`DiscSubNetHead`](#discsubnet) - [`FOMOHead`](#fomohead) + - [`GhostFaceNetHead`](#ghostfacenethead) - [`PrecisionBBoxHead`](#precisionbboxhead) - [`PrecisionSegmentBBoxHead`](#precisionsegmentbboxhead) - Every node takes these parameters: + +Every node takes these parameters: | Key | Type | Default value | Description | | ------------------ | ------------- | ------------- | --------------------------------------------------------------------------- | @@ -188,6 +191,14 @@ Adapted from [here](https://arxiv.org/abs/2205.14756) | `expand_ratio` | `int` | `4` | Factor by which channels expand in the local module | | `dim` | `int` | `None` | Dimension size for each attention head | +### `GhostFaceNetV2` + +**Parameters:** + +| Key | Type | Default value | Description | +| --------- | --------------- | ------------- | --------------------------- | +| `variant` | `Literal["V2"]` | `"V2"` | The variant of the network. | + ## Neck ### `RepPANNeck` @@ -293,7 +304,15 @@ Adapted from [here](https://arxiv.org/abs/2108.07610). | `conv_channels` | `int` | `16` | Number of output channels for each convolutional layer. | | `use_nms` | `bool` | `False` | If True, enable NMS. This can reduce FP, but it will also reduce TP for close neighbors. | -## `PrecisionBBoxHead` +### `GhostFaceNetHead` + +**Parameters:** + +| Key | Type | Default value | Description | +| ---------------- | ----- | ------------- | ---------------------------------------- | +| `embedding_size` | `int` | `512` | The size of the output embedding vector. | + +### `PrecisionBBoxHead` Adapted from [here](https://arxiv.org/pdf/2207.02696.pdf) and [here](https://arxiv.org/pdf/2209.02976.pdf). @@ -307,7 +326,7 @@ Adapted from [here](https://arxiv.org/pdf/2207.02696.pdf) and [here](https://arx | `iou_thres` | `float` | `0.45` | IoU threshold for non-maxima-suppression (used for evaluation) | | `max_det` | `int` | `300` | Max number of detections for non-maxima-suppression (used for evaluation) | -## `PrecisionSegmentBBoxHead` +### `PrecisionSegmentBBoxHead` Adapted from [here](https://arxiv.org/pdf/2207.02696.pdf) and [here](https://arxiv.org/pdf/2209.02976.pdf). diff --git a/luxonis_train/nodes/backbones/__init__.py b/luxonis_train/nodes/backbones/__init__.py index 441086b7..520296e8 100644 --- a/luxonis_train/nodes/backbones/__init__.py +++ b/luxonis_train/nodes/backbones/__init__.py @@ -3,6 +3,7 @@ from .efficientnet import EfficientNet from .efficientrep import EfficientRep from .efficientvit import EfficientViT +from .ghostfacenet import GhostFaceNetV2 from .micronet import MicroNet from .mobilenetv2 import MobileNetV2 from .mobileone import MobileOne @@ -23,5 +24,6 @@ "ResNet", "DDRNet", "RecSubNet", + "GhostFaceNetV2", "EfficientViT", ] diff --git a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py index 2698c26d..6bee5dfc 100644 --- a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py +++ b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py @@ -148,7 +148,7 @@ def __init__( out_channels=highres_channels, kernel_size=1, bias=False, - activation=nn.Identity(), + activation=False, ) ) self.down3.append( @@ -159,7 +159,7 @@ def __init__( stride=2, padding=1, bias=False, - activation=nn.Identity(), + activation=False, ) ) self.layer3_skip.append( @@ -180,7 +180,7 @@ def __init__( out_channels=highres_channels, kernel_size=1, bias=False, - activation=nn.Identity(), + activation=False, ) self.down4 = nn.Sequential( @@ -200,7 +200,7 @@ def __init__( stride=2, padding=1, bias=False, - activation=nn.Identity(), + activation=False, ), ) diff --git a/luxonis_train/nodes/backbones/ghostfacenet/__init__.py b/luxonis_train/nodes/backbones/ghostfacenet/__init__.py new file mode 100644 index 00000000..c24d9afb --- /dev/null +++ b/luxonis_train/nodes/backbones/ghostfacenet/__init__.py @@ -0,0 +1,3 @@ +from .ghostfacenet import GhostFaceNetV2 + +__all__ = ["GhostFaceNetV2"] diff --git a/luxonis_train/nodes/backbones/ghostfacenet/blocks.py b/luxonis_train/nodes/backbones/ghostfacenet/blocks.py new file mode 100644 index 00000000..118d61ac --- /dev/null +++ b/luxonis_train/nodes/backbones/ghostfacenet/blocks.py @@ -0,0 +1,188 @@ +import math +from typing import Literal + +import torch +import torch.nn.functional as F +from torch import Tensor, nn + +from luxonis_train.nodes.backbones.micronet.blocks import _make_divisible +from luxonis_train.nodes.blocks import SqueezeExciteBlock +from luxonis_train.nodes.blocks.blocks import ConvModule + + +class GhostModuleV2(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + mode: Literal["original", "attn"], + kernel_size: int = 1, + ratio: int = 2, + dw_size: int = 3, + stride: int = 1, + use_prelu: bool = True, + ): + super().__init__() + self.mode = mode + self.out_channels = out_channels + intermediate_channels = math.ceil(out_channels / ratio) + new_channels = intermediate_channels * (ratio - 1) + self.primary_conv = ConvModule( + in_channels, + intermediate_channels, + kernel_size, + stride, + kernel_size // 2, + activation=nn.PReLU() if use_prelu else False, + ) + self.cheap_operation = ConvModule( + intermediate_channels, + new_channels, + dw_size, + 1, + dw_size // 2, + groups=intermediate_channels, + activation=nn.PReLU() if use_prelu else False, + ) + + if self.mode == "attn": + self.short_conv = nn.Sequential( + ConvModule( + in_channels, + out_channels, + kernel_size, + stride, + kernel_size // 2, + activation=False, + ), + ConvModule( + out_channels, + out_channels, + kernel_size=(1, 5), + stride=1, + padding=(0, 2), + groups=out_channels, + activation=False, + ), + ConvModule( + out_channels, + out_channels, + kernel_size=(5, 1), + stride=1, + padding=(2, 0), + groups=out_channels, + activation=False, + ), + nn.AvgPool2d(kernel_size=2, stride=2), + nn.Sigmoid(), + ) + + def forward(self, x: Tensor) -> Tensor: + x1 = self.primary_conv(x) + x2 = self.cheap_operation(x1) + out = torch.cat([x1, x2], dim=1) + if self.mode == "original": + return out[:, : self.out_channels, ...] + + return out[:, : self.out_channels, ...] * F.interpolate( + self.short_conv(x), + size=(out.shape[-2], out.shape[-1]), + mode="nearest", + ) + + +class GhostBottleneckV2(nn.Module): + def __init__( + self, + in_channels: int, + intermediate_channels: int, + out_channels: int, + dw_kernel_size: int = 3, + stride: int = 1, + se_ratio: float = 0.0, + *, + layer_id: int, + ): + super().__init__() + has_se = se_ratio is not None and se_ratio > 0.0 + self.stride = stride + + # Point-wise expansion + if layer_id <= 1: + self.ghost1 = GhostModuleV2( + in_channels, + intermediate_channels, + use_prelu=True, + mode="original", + ) + else: + self.ghost1 = GhostModuleV2( + in_channels, intermediate_channels, use_prelu=True, mode="attn" + ) + + # Depth-wise convolution + if self.stride > 1: + self.conv_dw = nn.Conv2d( + intermediate_channels, + intermediate_channels, + dw_kernel_size, + stride=stride, + padding=(dw_kernel_size - 1) // 2, + groups=intermediate_channels, + bias=False, + ) + self.bn_dw = nn.BatchNorm2d(intermediate_channels) + + # Squeeze-and-excitation + if has_se: + reduced_chs = _make_divisible(intermediate_channels * se_ratio, 4) + self.se = SqueezeExciteBlock( + intermediate_channels, reduced_chs, True, activation=nn.PReLU() + ) + else: + self.se = None + + self.ghost2 = GhostModuleV2( + intermediate_channels, + out_channels, + use_prelu=False, + mode="original", + ) + + # shortcut + if in_channels == out_channels and self.stride == 1: + self.shortcut = nn.Identity() + else: + self.shortcut = nn.Sequential( + nn.Conv2d( + in_channels, + in_channels, + dw_kernel_size, + stride=stride, + padding=(dw_kernel_size - 1) // 2, + groups=in_channels, + bias=False, + ), + nn.BatchNorm2d(in_channels), + nn.Conv2d( + in_channels, + out_channels, + 1, + stride=1, + padding=0, + bias=False, + ), + nn.BatchNorm2d(out_channels), + ) + + def forward(self, x): + residual = x + x = self.ghost1(x) + if self.stride > 1: + x = self.conv_dw(x) + x = self.bn_dw(x) + if self.se is not None: + x = self.se(x) + x = self.ghost2(x) + x += self.shortcut(residual) + return x diff --git a/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py b/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py new file mode 100644 index 00000000..2ad0227c --- /dev/null +++ b/luxonis_train/nodes/backbones/ghostfacenet/ghostfacenet.py @@ -0,0 +1,103 @@ +# Original source: https://github.com/Hazqeel09/ellzaf_ml/blob/main/ellzaf_ml/models/ghostfacenetsv2.py +import math +from typing import Literal + +import torch.nn as nn +from torch import Tensor + +from luxonis_train.nodes.backbones.ghostfacenet.variants import get_variant +from luxonis_train.nodes.backbones.micronet.blocks import _make_divisible +from luxonis_train.nodes.base_node import BaseNode +from luxonis_train.nodes.blocks import ConvModule + + +class GhostFaceNetV2(BaseNode[Tensor, Tensor]): + in_channels: int + in_width: int + + def __init__(self, variant: Literal["V2"] = "V2", **kwargs): + """GhostFaceNetsV2 backbone. + + GhostFaceNetsV2 is a convolutional neural network architecture focused on face recognition, but it is + adaptable to generic embedding tasks. It is based on the GhostNet architecture and uses Ghost BottleneckV2 blocks. + + Source: U{https://github.com/Hazqeel09/ellzaf_ml/blob/main/ellzaf_ml/models/ghostfacenetsv2.py} + + @license: U{MIT License + } + + @see: U{GhostFaceNets: Lightweight Face Recognition Model From Cheap Operations + } + + @type variant: Literal["V2"] + @param variant: Variant of the GhostFaceNets embedding model. Defaults to "V2" (which is the only variant available). + """ + super().__init__(**kwargs) + + var = get_variant(variant) + output_channel = _make_divisible(int(16 * var.width), 4) + input_channel = output_channel + + stages: list[nn.Module] = [ + ConvModule( + self.in_channels, + output_channel, + kernel_size=3, + stride=2, + padding=1, + activation=nn.PReLU(), + ) + ] + layer_id = 0 + for cfg in var.block_configs: + layers = [] + for b_cfg in cfg: + output_channel = _make_divisible( + b_cfg.output_channels * var.width, 4 + ) + hidden_channel = _make_divisible( + b_cfg.expand_size * var.width, 4 + ) + layers.append( + var.block( + input_channel, + hidden_channel, + output_channel, + b_cfg.kernel_size, + b_cfg.stride, + se_ratio=b_cfg.se_ratio, + layer_id=layer_id, + ) + ) + input_channel = output_channel + layer_id += 1 + stages.append(nn.Sequential(*layers)) + + output_channel = _make_divisible(b_cfg.expand_size * var.width, 4) + stages.append( + ConvModule( + input_channel, + output_channel, + kernel_size=1, + activation=nn.PReLU(), + ) + ) + + self.blocks = nn.Sequential(*stages) + + self._init_weights() + + def _init_weights(self) -> None: + for m in self.modules(): + if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): + fan_in, _ = nn.init._calculate_fan_in_and_fan_out(m.weight) + negative_slope = 0.25 + m.weight.data.normal_( + 0, math.sqrt(2.0 / (fan_in * (1 + negative_slope**2))) + ) + if isinstance(m, nn.BatchNorm2d): + m.momentum = 0.9 + m.eps = 1e-5 + + def forward(self, x: Tensor) -> Tensor: + return self.blocks(x) diff --git a/luxonis_train/nodes/backbones/ghostfacenet/variants.py b/luxonis_train/nodes/backbones/ghostfacenet/variants.py new file mode 100644 index 00000000..9e09befc --- /dev/null +++ b/luxonis_train/nodes/backbones/ghostfacenet/variants.py @@ -0,0 +1,197 @@ +from typing import List, Literal + +from pydantic import BaseModel +from torch import nn + +from .blocks import GhostBottleneckV2 + + +class BlockConfig(BaseModel): + kernel_size: int + expand_size: int + output_channels: int + stride: int + se_ratio: float + + +class GhostFaceNetsVariant(BaseModel): + """Variant of the GhostFaceNets embedding model. + + @type cfgs: List[List[BlockConfig]] + @param cfgs: List of Ghost BottleneckV2 configurations. + @type num_classes: int + @param num_classes: Number of classes. Defaults to 0, which makes + the network output the raw embeddings. Otherwise it can be used + to add another linear layer to the network, which is useful for + training using ArcFace or similar classification-based losses + that require the user to drop the last layer of the network. + @type width: int + @param width: Width multiplier. Increases complexity and number of + parameters. Defaults to 1.0. + @type dropout: float + @param dropout: Dropout rate. Defaults to 0.2. + @type block: nn.Module + @param block: Ghost BottleneckV2 block. Defaults to + GhostBottleneckV2. + @type bn_momentum: float + @param bn_momentum: Batch normalization momentum. Defaults to 0.9. + @type bn_epsilon: float + @param bn_epsilon: Batch normalization epsilon. Defaults to 1e-5. + @type init_kaiming: bool + @param init_kaiming: If True, initializes the weights using the + Kaiming initialization. Defaults to True. + @type block_args: dict + @param block_args: Arguments to pass to the block. Defaults to None. + """ + + width: int + block: type[nn.Module] + block_configs: List[List[BlockConfig]] + + +V2 = GhostFaceNetsVariant( + width=1, + block=GhostBottleneckV2, + block_configs=[ + [ + BlockConfig( + kernel_size=3, + expand_size=16, + output_channels=16, + se_ratio=0.0, + stride=1, + ) + ], + [ + BlockConfig( + kernel_size=3, + expand_size=48, + output_channels=24, + se_ratio=0.0, + stride=2, + ) + ], + [ + BlockConfig( + kernel_size=3, + expand_size=72, + output_channels=24, + se_ratio=0.0, + stride=1, + ) + ], + [ + BlockConfig( + kernel_size=5, + expand_size=72, + output_channels=40, + se_ratio=0.25, + stride=2, + ) + ], + [ + BlockConfig( + kernel_size=5, + expand_size=120, + output_channels=40, + se_ratio=0.25, + stride=1, + ) + ], + [ + BlockConfig( + kernel_size=3, + expand_size=240, + output_channels=80, + se_ratio=0.0, + stride=2, + ) + ], + [ + BlockConfig( + kernel_size=3, + expand_size=200, + output_channels=80, + se_ratio=0.0, + stride=1, + ), + BlockConfig( + kernel_size=3, + expand_size=184, + output_channels=80, + se_ratio=0.0, + stride=1, + ), + BlockConfig( + kernel_size=3, + expand_size=184, + output_channels=80, + se_ratio=0.0, + stride=1, + ), + BlockConfig( + kernel_size=3, + expand_size=480, + output_channels=112, + se_ratio=0.25, + stride=1, + ), + BlockConfig( + kernel_size=3, + expand_size=672, + output_channels=112, + se_ratio=0.25, + stride=1, + ), + ], + [ + BlockConfig( + kernel_size=5, + expand_size=672, + output_channels=160, + se_ratio=0.25, + stride=2, + ) + ], + [ + BlockConfig( + kernel_size=5, + expand_size=960, + output_channels=160, + se_ratio=0.0, + stride=1, + ), + BlockConfig( + kernel_size=5, + expand_size=960, + output_channels=160, + se_ratio=0.25, + stride=1, + ), + BlockConfig( + kernel_size=5, + expand_size=960, + output_channels=160, + se_ratio=0.0, + stride=1, + ), + BlockConfig( + kernel_size=5, + expand_size=960, + output_channels=160, + se_ratio=0.25, + stride=1, + ), + ], + ], +) + + +def get_variant(variant: Literal["V2"]) -> GhostFaceNetsVariant: + variants = {"V2": V2} + if variant not in variants: # pragma: no cover + raise ValueError( + "GhostFaceNets model variant should be in " + f"{list(variants.keys())}, got {variant}." + ) + return variants[variant].model_copy() diff --git a/luxonis_train/nodes/backbones/micronet/blocks.py b/luxonis_train/nodes/backbones/micronet/blocks.py index 3da5e15e..a1fd8f13 100644 --- a/luxonis_train/nodes/backbones/micronet/blocks.py +++ b/luxonis_train/nodes/backbones/micronet/blocks.py @@ -145,7 +145,7 @@ def _create_lite_block( out_channels=out_channels, kernel_size=1, groups=group2, - activation=nn.Identity(), + activation=False, ), DYShiftMax( out_channels, @@ -179,7 +179,7 @@ def _create_transition_block( out_channels=intermediate_channels, kernel_size=1, groups=group1, - activation=nn.Identity(), + activation=False, ), DYShiftMax( intermediate_channels, @@ -217,7 +217,7 @@ def _create_full_block( out_channels=intermediate_channels, kernel_size=1, groups=groups_1[0], - activation=nn.Identity(), + activation=False, ), DYShiftMax( intermediate_channels, @@ -256,7 +256,7 @@ def _create_full_block( out_channels=out_channels, kernel_size=1, groups=group1, - activation=nn.Identity(), + activation=False, ), DYShiftMax( out_channels, @@ -357,7 +357,7 @@ def __init__( self.avg_pool = nn.AdaptiveAvgPool2d(1) - squeeze_channels = self._make_divisible(in_channels // reduction, 4) + squeeze_channels = _make_divisible(in_channels // reduction, 4) self.fc = nn.Sequential( nn.Linear(in_channels, squeeze_channels), @@ -413,16 +413,14 @@ def forward(self, x: Tensor) -> Tensor: return out - def _make_divisible( - self, value: int, divisor: int, min_value: int | None = None - ) -> int: - if min_value is None: - min_value = divisor - new_v = max(min_value, int(value + divisor / 2) // divisor * divisor) - # Make sure that round down does not go down by more than 10%. - if new_v < 0.9 * value: - new_v += divisor - return new_v + +def _make_divisible(value: int, divisor: int) -> int: + min_value = divisor + new_v = max(min_value, int(value + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * value: + new_v += divisor + return new_v class SpatialSepConvSF(nn.Module): diff --git a/luxonis_train/nodes/backbones/mobileone/blocks.py b/luxonis_train/nodes/backbones/mobileone/blocks.py index 4b926038..54017aa0 100644 --- a/luxonis_train/nodes/backbones/mobileone/blocks.py +++ b/luxonis_train/nodes/backbones/mobileone/blocks.py @@ -91,7 +91,7 @@ def __init__( stride=self.stride, padding=padding, groups=self.groups, - activation=nn.Identity(), + activation=False, ) ) self.rbr_conv: list[nn.Sequential] = nn.ModuleList(rbr_conv) # type: ignore @@ -106,7 +106,7 @@ def __init__( stride=self.stride, padding=0, groups=self.groups, - activation=nn.Identity(), + activation=False, ) def forward(self, inputs: Tensor) -> Tensor: diff --git a/luxonis_train/nodes/backbones/rexnetv1.py b/luxonis_train/nodes/backbones/rexnetv1.py index 6567586a..c34dbafe 100644 --- a/luxonis_train/nodes/backbones/rexnetv1.py +++ b/luxonis_train/nodes/backbones/rexnetv1.py @@ -202,7 +202,7 @@ def __init__( in_channels=dw_channels, out_channels=channels, kernel_size=1, - activation=nn.Identity(), + activation=False, ) ) diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py index 0a9a208a..409a9fe3 100644 --- a/luxonis_train/nodes/base_node.py +++ b/luxonis_train/nodes/base_node.py @@ -9,7 +9,7 @@ from torch import Size, Tensor, nn from typeguard import TypeCheckError, check_type -from luxonis_train.enums import TaskType +from luxonis_train.enums import Metadata, Task, TaskType from luxonis_train.utils import ( AttachIndexType, DatasetMetadata, @@ -107,7 +107,7 @@ def wrap(output: Tensor) -> Packet[Tensor]: """ attach_index: AttachIndexType - tasks: list[TaskType] | None = None + tasks: list[Task] | None = None def __init__( self, @@ -122,6 +122,7 @@ def __init__( export_output_names: list[str] | None = None, attach_index: AttachIndexType | None = None, task_name: str | None = None, + metadata_task_override: str | dict[str, str] | None = None, ): """Constructor for the C{BaseNode}. @@ -176,6 +177,35 @@ def __init__( f"argument for node '{self.name}' was not provided." ) self.task_name = task_name or "" + self.metadata_task_override = {} + if metadata_task_override is not None: + if self.tasks is None: + raise ValueError( + "Metadata task override can only be used with nodes that define tasks." + ) + n_metadata_tasks = sum( + 1 for task in self.tasks if isinstance(task, Metadata) + ) + if n_metadata_tasks > 1 and isinstance( + metadata_task_override, str + ): + raise ValueError( + f"Node '{self.name}' defines multiple metadata tasks, " + "but only a single task name was provided for " + "`metadata_task_override`. Provide a dictionary " + "mapping default names to new names instead ." + ) + for task in self.tasks: + if not isinstance(task, Metadata): + continue + + if isinstance(metadata_task_override, dict): + new_name = metadata_task_override.get(task.name, task.name) + else: + new_name = metadata_task_override + + self.metadata_task_override[task.name] = new_name + task.name = new_name if getattr(self, "attach_index", None) is None: parameters = inspect.signature(self.forward).parameters diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py index fa9912a8..dfacfeab 100644 --- a/luxonis_train/nodes/blocks/blocks.py +++ b/luxonis_train/nodes/blocks/blocks.py @@ -165,13 +165,13 @@ def __init__( self, in_channels: int, out_channels: int, - kernel_size: int, - stride: int = 1, - padding: int = 0, - dilation: int = 1, + kernel_size: int | tuple[int, int], + stride: int | tuple[int, int] = 1, + padding: int | tuple[int, int] = 0, + dilation: int | tuple[int, int] = 1, groups: int = 1, bias: bool = False, - activation: nn.Module | None = None, + activation: nn.Module | None | Literal[False] = None, use_norm: bool = True, ): """Conv2d + Optional BN + Activation. @@ -192,12 +192,13 @@ def __init__( @param groups: Groups. Defaults to 1. @type bias: bool @param bias: Whether to use bias. Defaults to False. - @type activation: L{nn.Module} | None - @param activation: Activation function. If None then nn.ReLU. + @type activation: L{nn.Module} | None | Literal[False] + @param activation: Activation function. If None then nn.ReLU. If + False then no activation. Defaults to None. @type use_norm: bool @param use_norm: Whether to use normalization. Defaults to True. """ - super().__init__( + blocks: list[nn.Module] = [ nn.Conv2d( in_channels, out_channels, @@ -208,9 +209,15 @@ def __init__( groups, bias, ), - nn.BatchNorm2d(out_channels) if use_norm else nn.Identity(), - activation or nn.ReLU(), - ) + ] + + if use_norm: + blocks.append(nn.BatchNorm2d(out_channels)) + + if activation is not False: + blocks.append(activation or nn.ReLU()) + + super().__init__(*blocks) class DWConvModule(ConvModule): @@ -443,7 +450,7 @@ def __init__( stride=stride, padding=padding, groups=groups, - activation=nn.Identity(), + activation=False, ) self.rbr_1x1 = ConvModule( in_channels=in_channels, @@ -452,7 +459,7 @@ def __init__( stride=stride, padding=padding_11, groups=groups, - activation=nn.Identity(), + activation=False, ) def forward(self, x: Tensor) -> Tensor: @@ -728,7 +735,7 @@ def __init__(self, in_channels: int, out_channels: int): in_channels=out_channels, out_channels=out_channels, kernel_size=1, - activation=nn.Identity(), + activation=False, ), nn.Sigmoid(), ) @@ -768,7 +775,7 @@ def __init__( in_channels=out_channels, out_channels=out_channels // reduction, kernel_size=1, - activation=nn.Identity(), + activation=False, ), nn.Sigmoid(), ) diff --git a/luxonis_train/nodes/heads/__init__.py b/luxonis_train/nodes/heads/__init__.py index 6ebcf816..96843207 100644 --- a/luxonis_train/nodes/heads/__init__.py +++ b/luxonis_train/nodes/heads/__init__.py @@ -6,6 +6,7 @@ from .efficient_bbox_head import EfficientBBoxHead from .efficient_keypoint_bbox_head import EfficientKeypointBBoxHead from .fomo_head import FOMOHead +from .ghostfacenet_head import GhostFaceNetHead from .precision_bbox_head import PrecisionBBoxHead from .precision_seg_bbox_head import PrecisionSegmentBBoxHead from .segmentation_head import SegmentationHead @@ -19,6 +20,7 @@ "SegmentationHead", "DDRNetSegmentationHead", "DiscSubNetHead", + "GhostFaceNetHead", "FOMOHead", "PrecisionBBoxHead", "PrecisionSegmentBBoxHead", diff --git a/luxonis_train/nodes/heads/ghostfacenet_head.py b/luxonis_train/nodes/heads/ghostfacenet_head.py new file mode 100644 index 00000000..4b7dcb06 --- /dev/null +++ b/luxonis_train/nodes/heads/ghostfacenet_head.py @@ -0,0 +1,82 @@ +# Original source: https://github.com/Hazqeel09/ellzaf_ml/blob/main/ellzaf_ml/models/ghostfacenetsv2.py +import math + +import torch.nn as nn +from torch import Tensor + +from luxonis_train.enums import Metadata +from luxonis_train.nodes.base_node import BaseNode +from luxonis_train.nodes.blocks.blocks import ConvModule + + +class GhostFaceNetHead(BaseNode[Tensor, list[Tensor]]): + in_channels: int + in_width: int + tasks = [Metadata("id")] + + def __init__( + self, + embedding_size: int = 512, + cross_batch_memory_size: int | None = None, + dropout: float = 0.2, + **kwargs, + ): + """GhostFaceNetV2 backbone. + + GhostFaceNetV2 is a convolutional neural network architecture focused on face recognition, but it is + adaptable to generic embedding tasks. It is based on the GhostNet architecture and uses Ghost BottleneckV2 blocks. + + Source: U{https://github.com/Hazqeel09/ellzaf_ml/blob/main/ellzaf_ml/models/ghostfacenetsv2.py} + + @license: U{MIT License + } + + @see: U{GhostFaceNets: Lightweight Face Recognition Model From Cheap Operations + } + + @type embedding_size: int + @param embedding_size: Size of the embedding. Defaults to 512. + @type cross_batch_memory_size: int | None + @param cross_batch_memory_size: Size of the cross-batch memory. Defaults to None. + @type dropout: float + @param dropout: Dropout rate. Defaults to 0.2. + """ + super().__init__(**kwargs) + self.embedding_size = embedding_size + self.cross_batch_memory_size = cross_batch_memory_size + _, H, W = self.original_in_shape + + self.head = nn.Sequential( + ConvModule( + self.in_channels, + self.in_channels, + kernel_size=( + H // 32 if H % 32 == 0 else H // 32 + 1, + W // 32 if W % 32 == 0 else W // 32 + 1, + ), + groups=self.in_channels, + activation=False, + ), + nn.Dropout(dropout), + nn.Conv2d( + self.in_channels, embedding_size, kernel_size=1, bias=False + ), + nn.Flatten(), + nn.BatchNorm1d(embedding_size), + ) + self._init_weights() + + def _init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): + fan_in, _ = nn.init._calculate_fan_in_and_fan_out(m.weight) + negative_slope = 0.25 + m.weight.data.normal_( + 0, math.sqrt(2.0 / (fan_in * (1 + negative_slope**2))) + ) + if isinstance(m, nn.BatchNorm2d): + m.momentum = 0.9 + m.eps = 1e-5 + + def forward(self, x: Tensor) -> Tensor: + return self.head(x) diff --git a/luxonis_train/utils/dataset_metadata.py b/luxonis_train/utils/dataset_metadata.py index fdbec775..2c10905c 100644 --- a/luxonis_train/utils/dataset_metadata.py +++ b/luxonis_train/utils/dataset_metadata.py @@ -62,7 +62,7 @@ def n_classes(self, task: str | None = None) -> int: for classes in self._classes.values(): if len(classes) != n_classes: raise RuntimeError( - "The dataset contains different number of classes for different tasks." + "The dataset contains different number of classes for different tasks. " "Please specify the 'task' argument to get the number of classes." ) return n_classes @@ -90,7 +90,7 @@ def n_keypoints(self, task: str | None = None) -> int: for n in self._n_keypoints.values(): if n != n_keypoints: raise RuntimeError( - "The dataset contains different number of keypoints for different tasks." + "The dataset contains different number of keypoints for different tasks. " "Please specify the 'task' argument to get the number of keypoints." ) return n_keypoints diff --git a/requirements.txt b/requirements.txt index 5ef87b3a..bd4e663a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,3 +18,6 @@ mlflow>=2.10.0 psutil>=5.0.0 tabulate>=0.9.0 grad-cam>=1.5.4 +pytorch_metric_learning>=2.7.0 +scikit-learn>=1.5.0 +seaborn>=1.16.0 diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index ab2fb1e8..92fc8720 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -4,10 +4,12 @@ from pathlib import Path from typing import Any +import cv2 import gdown +import numpy as np import pytest import torchvision -from luxonis_ml.data import LuxonisDataset +from luxonis_ml.data import Category, LuxonisDataset from luxonis_ml.data.parsers import LuxonisParser from luxonis_ml.utils import environ @@ -38,13 +40,40 @@ def parking_lot_dataset() -> LuxonisDataset: url = "gs://luxonis-test-bucket/luxonis-ml-test-data/D1_ParkingLot_Native.zip" parser = LuxonisParser( url, - dataset_name="_D1_ParkingLot", + dataset_name="D1_ParkingLot", delete_existing=True, save_dir=WORK_DIR, ) return parser.parse(random_split=True) +@pytest.fixture(scope="session") +def embedding_dataset() -> LuxonisDataset: + img_dir = WORK_DIR / "embedding_images" + img_dir.mkdir(exist_ok=True) + + def generator(): + for i in range(100): + color = [(255, 0, 0), (0, 255, 0), (0, 0, 255)][i % 3] + img = np.full((100, 100, 3), color, dtype=np.uint8) + img[i, i] = 255 + cv2.imwrite(str(img_dir / f"image_{i}.png"), img) + + yield { + "file": img_dir / f"image_{i}.png", + "annotation": { + "metadata": { + "color": Category(["red", "green", "blue"][i % 3]), + }, + }, + } + + dataset = LuxonisDataset("embedding_test", delete_existing=True) + dataset.add(generator()) + dataset.make_splits() + return dataset + + @pytest.fixture(scope="session") def coco_dataset() -> LuxonisDataset: dataset_name = "coco_test" diff --git a/tests/integration/test_embeddings.py b/tests/integration/test_embeddings.py new file mode 100644 index 00000000..ea1a4868 --- /dev/null +++ b/tests/integration/test_embeddings.py @@ -0,0 +1,15 @@ +from luxonis_ml.data import LuxonisDataset + +from luxonis_train.core import LuxonisModel + + +def test_embeddings_model(embedding_dataset: LuxonisDataset): + model = LuxonisModel( + cfg="configs/embeddings_model.yaml", + opts={ + "loader.params.dataset_name": embedding_dataset.dataset_name, + "trainer.epochs": 1, + "trainer.validation_interval": 1, + }, + ) + model.train() diff --git a/tests/unittests/test_base_attached_module.py b/tests/unittests/test_base_attached_module.py index c7cd1508..450242b9 100644 --- a/tests/unittests/test_base_attached_module.py +++ b/tests/unittests/test_base_attached_module.py @@ -148,12 +148,12 @@ def test_prepare(inputs: Packet[Tensor], labels: Labels): det_head = DummyDetectionHead() assert seg_loss.prepare(inputs, labels) == ( - SEGMENTATION_ARRAY, + [SEGMENTATION_ARRAY], SEGMENTATION_ARRAY, ) inputs["/segmentation"].append(FEATURES_ARRAY) assert seg_loss.prepare(inputs, labels) == ( - FEATURES_ARRAY, + [SEGMENTATION_ARRAY, FEATURES_ARRAY], SEGMENTATION_ARRAY, ) diff --git a/tests/unittests/test_callbacks/test_ema.py b/tests/unittests/test_callbacks/test_ema.py index e9d2db7f..51fc2505 100644 --- a/tests/unittests/test_callbacks/test_ema.py +++ b/tests/unittests/test_callbacks/test_ema.py @@ -9,7 +9,7 @@ class SimpleModel(LightningModule): def __init__(self): - super(SimpleModel, self).__init__() + super().__init__() self.layer = torch.nn.Linear(2, 2) def forward(self, x):