add ! validate image size

unicef · Oct 28, 2024 · b56973d · b56973d
1 parent 77e832e
commit b56973d
Show file tree

Hide file tree

Showing 21 changed files with 56 additions and 49 deletions.
diff --git a/src/hope_dedup_engine/apps/api/admin/duplicate.py b/src/hope_dedup_engine/apps/api/admin/duplicate.py
@@ -1,10 +1,7 @@
 from django.contrib.admin import ModelAdmin, register
 
-from adminfilters.filters import (
-    DjangoLookupFilter,
-    NumberFilter,
-    RelatedFieldComboFilter,
-)
+from adminfilters.autocomplete import AutoCompleteFilter
+from adminfilters.filters import DjangoLookupFilter, NumberFilter
 from adminfilters.mixin import AdminFiltersMixin
 
 from hope_dedup_engine.apps.api.models import Duplicate
@@ -20,7 +17,7 @@ class DuplicateAdmin(AdminFiltersMixin, ModelAdmin):
         "second_reference_pk",
     )
     list_filter = (
-        ("deduplication_set", RelatedFieldComboFilter),
+        ("deduplication_set", AutoCompleteFilter),
         ("score", NumberFilter),
         DjangoLookupFilter,
     )

diff --git a/src/hope_dedup_engine/apps/api/admin/image.py b/src/hope_dedup_engine/apps/api/admin/image.py
@@ -1,7 +1,8 @@
 from django.contrib.admin import ModelAdmin, register
 
+from adminfilters.autocomplete import AutoCompleteFilter
 from adminfilters.dates import DateRangeFilter
-from adminfilters.filters import DjangoLookupFilter, RelatedFieldComboFilter
+from adminfilters.filters import DjangoLookupFilter
 from adminfilters.mixin import AdminFiltersMixin
 
 from hope_dedup_engine.apps.api.models import Image
@@ -17,7 +18,7 @@ class ImageAdmin(AdminFiltersMixin, ModelAdmin):
     )
 
     list_filter = (
-        ("deduplication_set", RelatedFieldComboFilter),
+        ("deduplication_set", AutoCompleteFilter),
         ("created_at", DateRangeFilter),
         DjangoLookupFilter,
     )
@@ -27,6 +28,3 @@ def has_add_permission(self, request):
 
     def has_change_permission(self, request, obj=None):
         return False
-
-    def has_delete_permission(self, request, obj=None):
-        return obj is not None
diff --git a/src/hope_dedup_engine/apps/api/deduplication/process.py b/src/hope_dedup_engine/apps/api/deduplication/process.py
@@ -94,10 +94,11 @@ def find_duplicates(deduplication_set_id: str, serialized_lock: str) -> None:
         deduplication_set.state = deduplication_set.State.CLEAN
         deduplication_set.save()
 
-        if lock_enabled:
-            lock.release()
-
     except Exception:
         deduplication_set.state = DeduplicationSet.State.ERROR
         deduplication_set.save()
         raise
+
+    finally:
+        if lock_enabled:
+            lock.release()
diff --git a/src/hope_dedup_engine/apps/api/models/deduplication.py b/src/hope_dedup_engine/apps/api/models/deduplication.py
@@ -18,7 +18,7 @@ class Config(models.Model):
     )
 
     def __str__(self) -> str:
-        return " | ".join(
+        return f"{self.pk}: " + " | ".join(
             f"{field.name}: {getattr(self, field.name)}"
             for field in self._meta.fields
             if field.name not in ("id",)

diff --git a/src/hope_dedup_engine/apps/core/exceptions.py b/src/hope_dedup_engine/apps/core/exceptions.py
@@ -16,3 +16,12 @@ class DownloaderKeyError(Exception):
     def __init__(self, key: str) -> None:
         self.key = key
         super().__init__(f"Downloader key '{key}' does not exist.")
+
+
+class NotCompliantImageError(Exception):
+    """
+    Exception raised when an image is not compliant with the expected parameters.
+    """
+
+    def __init__(self, message: str) -> None:
+        super().__init__(message)
diff --git a/src/hope_dedup_engine/apps/faces/services/duplication_detector.py b/src/hope_dedup_engine/apps/faces/services/duplication_detector.py
@@ -133,16 +133,14 @@ def find_duplicates(self) -> Generator[tuple[str, str, float], None, None]:
                 min_distance = None
                 for encoding1 in encodings1:
                     distances = face_recognition.face_distance(encodings2, encoding1)
-                    current_min = min(distances) if np.any(distances) else float("inf")
+                    current_min = min(distances) if np.any(distances) else 0
                     if min_distance is None or current_min < min_distance:
                         min_distance = current_min
 
-                if min_distance is not None:
-                    print(
-                        f"Minimum distance between {path1} and {path2}: {min_distance}"
-                    )
-
-                if min_distance < self.face_distance_threshold:
+                if (
+                    min_distance is not None
+                    and min_distance < self.face_distance_threshold
+                ):
                     yield (path1, path2, round(min_distance, 5))
 
         except Exception as e:

diff --git a/src/hope_dedup_engine/apps/faces/services/image_processor.py b/src/hope_dedup_engine/apps/faces/services/image_processor.py
@@ -11,6 +11,7 @@
 import numpy as np
 from constance import config
 
+from hope_dedup_engine.apps.core.exceptions import NotCompliantImageError
 from hope_dedup_engine.apps.faces.managers import DNNInferenceManager, StorageManager
 
 
@@ -97,19 +98,19 @@ def _get_face_detections_dnn(
                 # Decode image from binary buffer to 3D numpy array (height, width, channels of BlueGreeRed color space)
                 image = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
             (h, w) = image.shape[:2]
+            _h, _w = (
+                self.blob_from_image_cfg.shape["height"],
+                self.blob_from_image_cfg.shape["width"],
+            )
+            if h < _h or w < _w:
+                raise NotCompliantImageError(
+                    f"Image {filename} too small: '{h}x{w}'. It needs to be at least '{_h}x{_w}'."
+                )
+
             # Create a blob (4D tensor) from the image
             blob = cv2.dnn.blobFromImage(
-                image=cv2.resize(
-                    image,
-                    dsize=(
-                        self.blob_from_image_cfg.shape["height"],
-                        self.blob_from_image_cfg.shape["width"],
-                    ),
-                ),
-                size=(
-                    self.blob_from_image_cfg.shape["height"],
-                    self.blob_from_image_cfg.shape["width"],
-                ),
+                image=cv2.resize(image, dsize=(_h, _w)),
+                size=(_h, _w),
                 scalefactor=self.blob_from_image_cfg.scale_factor,
                 mean=self.blob_from_image_cfg.mean_values,
             )
@@ -161,7 +162,9 @@ def encode_face(self, filename: str, encodings_filename: str) -> None:
             encodings: list[np.ndarray[np.float32, Any]] = []
             face_regions = self._get_face_detections_dnn(filename)
             if not face_regions:
-                self.logger.warning("No face regions detected in image %s", filename)
+                raise NotCompliantImageError(
+                    f"No face regions detected in image '{filename}'."
+                )
             else:
 
                 for region in face_regions:

diff --git a/src/hope_dedup_engine/config/fragments/constance.py b/src/hope_dedup_engine/config/fragments/constance.py
@@ -40,7 +40,7 @@
         "tuple_field",
     ),
     "FACE_DETECTION_CONFIDENCE": (
-        0.5,
+        0.7,
         """
         Specifies the minimum confidence score required for a detected face to be considered valid. Detections
         with confidence scores below this threshold are discarded as likely false positives.
@@ -67,7 +67,7 @@
         int,
     ),
     "FACE_ENCODINGS_MODEL": (
-        "small",
+        "large",
         """
         Specifies the model type used for encoding face landmarks. It can be either 'small' which is faster and
         detects only 5 key facial landmarks, or 'large' which is more precise and identifies 68 key facial landmarks
@@ -76,11 +76,12 @@
         "face_encodings_model",
     ),
     "FACE_DISTANCE_THRESHOLD": (
-        0.4,
+        0.26,
         """
-        Specifies the maximum allowable distance between two face embeddings for them to be considered a match. It helps
-        determine if two faces belong to the same person by setting a threshold for similarity. Lower values result in
-        stricter matching, while higher values allow for more lenient matches.
+        Specifies the maximum allowable distance between two face embeddings for them to be considered a match.
+        This tolerance threshold is crucial for assessing whether two faces belong to the same individual,
+        as it establishes the similarity limit. Lower values result in stricter matching, while higher values allow
+        for more lenient matches.
         """,
         float,
     ),

diff --git a/tests/extras/demoapp/demo_images/Aaron_Eckhart_0001.jpg b/tests/extras/demoapp/demo_images/Aaron_Eckhart_0001.jpg
diff --git a/tests/extras/demoapp/demo_images/Aaron_Guiel_0001.jpg b/tests/extras/demoapp/demo_images/Aaron_Guiel_0001.jpg
diff --git a/tests/extras/demoapp/demo_images/Aaron_Peirsol_0001.jpg b/tests/extras/demoapp/demo_images/Aaron_Peirsol_0001.jpg
diff --git a/tests/extras/demoapp/demo_images/Aaron_Peirsol_0002.jpg b/tests/extras/demoapp/demo_images/Aaron_Peirsol_0002.jpg
diff --git a/tests/extras/demoapp/demo_images/Cathy_Freeman_0001.jpg b/tests/extras/demoapp/demo_images/Cathy_Freeman_0001.jpg
diff --git a/tests/extras/demoapp/demo_images/Cathy_Freeman_0002.jpg b/tests/extras/demoapp/demo_images/Cathy_Freeman_0002.jpg
diff --git a/tests/extras/demoapp/demo_images/Ziwang_Xu_0001.jpg b/tests/extras/demoapp/demo_images/Ziwang_Xu_0001.jpg
diff --git a/tests/extras/demoapp/demo_images/Zoe_Ball_0001.jpg b/tests/extras/demoapp/demo_images/Zoe_Ball_0001.jpg
diff --git a/tests/extras/demoapp/demo_images/too_small.jpg b/tests/extras/demoapp/demo_images/too_small.jpg
diff --git a/tests/extras/demoapp/demo_images/without_face.jpg b/tests/extras/demoapp/demo_images/without_face.jpg
diff --git a/tests/faces/conftest.py b/tests/faces/conftest.py
@@ -129,7 +129,7 @@ def mock_image_processor(
 @pytest.fixture
 def image_bytes_io():
     img_byte_arr = BytesIO()
-    image = Image.new("RGB", (100, 100), color="red")
+    image = Image.new("RGB", (300, 300), color="red")
     image.save(img_byte_arr, format="JPEG")
     img_byte_arr.seek(0)
     img_byte_arr.fake_open = lambda *_: BytesIO(img_byte_arr.getvalue())

diff --git a/tests/faces/faces_const.py b/tests/faces/faces_const.py
@@ -8,7 +8,7 @@
     ["ignore_file.jpg", "ignore_file2.jpg"],
     ["ignore_file4.jpg", "ignore_file3.jpg"],
 ]
-FACE_DISTANCE_THRESHOLD = 0.4
+FACE_DISTANCE_THRESHOLD = 0.26
 
 DNN_FILE = {
     "name": FILENAME,
@@ -44,8 +44,8 @@
 }
 FACE_REGIONS_INVALID: Final[list[list[tuple[int, int, int, int]]]] = [[], [(0, 0, 10)]]
 FACE_REGIONS_VALID: Final[list[tuple[int, int, int, int]]] = [
-    (10, 10, 20, 20),
-    (30, 30, 40, 40),
+    (40, 40, 80, 80),
+    (120, 120, 160, 160),
 ]
 BLOB_FROM_IMAGE_SCALE_FACTOR: Final[float] = 1.0
 BLOB_FROM_IMAGE_MEAN_VALUES: Final[tuple[float, float, float]] = (104.0, 177.0, 123.0)
@@ -56,8 +56,8 @@
     (0, 0, 0.15, 0.1, 0.1, 0.2, 0.2),  # with confidence 0.15 -> invalid detection
 ]
 IMAGE_SIZE: Final[tuple[int, int, int]] = (
-    100,
-    100,
+    400,
+    400,
     3,
 )  # Size of the image after decoding (h, w, number of channels)
 RESIZED_IMAGE_SIZE: Final[tuple[int, int, int]] = (

diff --git a/tests/faces/test_duplication_detector.py b/tests/faces/test_duplication_detector.py
@@ -205,12 +205,12 @@ def open_mock(filename, mode="rb"):
                 (
                     "test_file.jpg",
                     "test_file2.jpg",
-                    0.36,
-                ),  # config.FACE_DISTANCE_THRESHOLD + 0.04
+                    0.22,
+                ),  # config.FACE_DISTANCE_THRESHOLD - 0.04
                 (
                     "test_file.jpg",
                     "test_file3.jpg",
-                    0.2,
+                    0.06,
                 ),  # config.FACE_DISTANCE_THRESHOLD - 0.2
                 # last pair will not be included in the result because the distance is greater than the threshold
                 # ("test_file2.jpg", "test_file3.jpg", 0.44), # config.FACE_DISTANCE_THRESHOLD + 0.04