diff --git a/src/hope_dedup_engine/apps/api/admin/duplicate.py b/src/hope_dedup_engine/apps/api/admin/duplicate.py index 4ab05214..e4e3c3ec 100644 --- a/src/hope_dedup_engine/apps/api/admin/duplicate.py +++ b/src/hope_dedup_engine/apps/api/admin/duplicate.py @@ -1,10 +1,7 @@ from django.contrib.admin import ModelAdmin, register -from adminfilters.filters import ( - DjangoLookupFilter, - NumberFilter, - RelatedFieldComboFilter, -) +from adminfilters.autocomplete import AutoCompleteFilter +from adminfilters.filters import DjangoLookupFilter, NumberFilter from adminfilters.mixin import AdminFiltersMixin from hope_dedup_engine.apps.api.models import Duplicate @@ -20,7 +17,7 @@ class DuplicateAdmin(AdminFiltersMixin, ModelAdmin): "second_reference_pk", ) list_filter = ( - ("deduplication_set", RelatedFieldComboFilter), + ("deduplication_set", AutoCompleteFilter), ("score", NumberFilter), DjangoLookupFilter, ) diff --git a/src/hope_dedup_engine/apps/api/admin/image.py b/src/hope_dedup_engine/apps/api/admin/image.py index 8b721863..b82a2a6d 100644 --- a/src/hope_dedup_engine/apps/api/admin/image.py +++ b/src/hope_dedup_engine/apps/api/admin/image.py @@ -1,7 +1,8 @@ from django.contrib.admin import ModelAdmin, register +from adminfilters.autocomplete import AutoCompleteFilter from adminfilters.dates import DateRangeFilter -from adminfilters.filters import DjangoLookupFilter, RelatedFieldComboFilter +from adminfilters.filters import DjangoLookupFilter from adminfilters.mixin import AdminFiltersMixin from hope_dedup_engine.apps.api.models import Image @@ -17,7 +18,7 @@ class ImageAdmin(AdminFiltersMixin, ModelAdmin): ) list_filter = ( - ("deduplication_set", RelatedFieldComboFilter), + ("deduplication_set", AutoCompleteFilter), ("created_at", DateRangeFilter), DjangoLookupFilter, ) @@ -27,6 +28,3 @@ def has_add_permission(self, request): def has_change_permission(self, request, obj=None): return False - - def has_delete_permission(self, request, obj=None): - return obj is not None diff --git a/src/hope_dedup_engine/apps/api/deduplication/process.py b/src/hope_dedup_engine/apps/api/deduplication/process.py index c6a69737..33bfb67c 100644 --- a/src/hope_dedup_engine/apps/api/deduplication/process.py +++ b/src/hope_dedup_engine/apps/api/deduplication/process.py @@ -94,10 +94,11 @@ def find_duplicates(deduplication_set_id: str, serialized_lock: str) -> None: deduplication_set.state = deduplication_set.State.CLEAN deduplication_set.save() - if lock_enabled: - lock.release() - except Exception: deduplication_set.state = DeduplicationSet.State.ERROR deduplication_set.save() raise + + finally: + if lock_enabled: + lock.release() diff --git a/src/hope_dedup_engine/apps/api/models/deduplication.py b/src/hope_dedup_engine/apps/api/models/deduplication.py index 95cb6955..8f894515 100644 --- a/src/hope_dedup_engine/apps/api/models/deduplication.py +++ b/src/hope_dedup_engine/apps/api/models/deduplication.py @@ -18,7 +18,7 @@ class Config(models.Model): ) def __str__(self) -> str: - return " | ".join( + return f"{self.pk}: " + " | ".join( f"{field.name}: {getattr(self, field.name)}" for field in self._meta.fields if field.name not in ("id",) diff --git a/src/hope_dedup_engine/apps/core/exceptions.py b/src/hope_dedup_engine/apps/core/exceptions.py index 365ffc75..a6bafdfb 100644 --- a/src/hope_dedup_engine/apps/core/exceptions.py +++ b/src/hope_dedup_engine/apps/core/exceptions.py @@ -16,3 +16,12 @@ class DownloaderKeyError(Exception): def __init__(self, key: str) -> None: self.key = key super().__init__(f"Downloader key '{key}' does not exist.") + + +class NotCompliantImageError(Exception): + """ + Exception raised when an image is not compliant with the expected parameters. + """ + + def __init__(self, message: str) -> None: + super().__init__(message) diff --git a/src/hope_dedup_engine/apps/faces/services/duplication_detector.py b/src/hope_dedup_engine/apps/faces/services/duplication_detector.py index d7b60a35..56c36a9d 100644 --- a/src/hope_dedup_engine/apps/faces/services/duplication_detector.py +++ b/src/hope_dedup_engine/apps/faces/services/duplication_detector.py @@ -125,22 +125,24 @@ def find_duplicates(self) -> Generator[tuple[str, str, float], None, None]: encodings_all = self._load_encodings_all() for path1, path2 in combinations(existed_images_name, 2): - min_distance = self.face_distance_threshold encodings1 = encodings_all.get(path1) encodings2 = encodings_all.get(path2) if encodings1 is None or encodings2 is None: continue + min_distance = None for encoding1 in encodings1: - if ( - current_min := min( - face_recognition.face_distance(encodings2, encoding1) - ) - ) < min_distance: + distances = face_recognition.face_distance(encodings2, encoding1) + current_min = min(distances) if np.any(distances) else 0 + if min_distance is None or current_min < min_distance: min_distance = current_min - if min_distance < self.face_distance_threshold: + if ( + min_distance is not None + and min_distance < self.face_distance_threshold + ): yield (path1, path2, round(min_distance, 5)) + except Exception as e: self.logger.exception( "Error finding duplicates for images %s", self.filenames diff --git a/src/hope_dedup_engine/apps/faces/services/image_processor.py b/src/hope_dedup_engine/apps/faces/services/image_processor.py index 7e3a0bad..ead049fe 100644 --- a/src/hope_dedup_engine/apps/faces/services/image_processor.py +++ b/src/hope_dedup_engine/apps/faces/services/image_processor.py @@ -11,6 +11,7 @@ import numpy as np from constance import config +from hope_dedup_engine.apps.core.exceptions import NotCompliantImageError from hope_dedup_engine.apps.faces.managers import DNNInferenceManager, StorageManager @@ -97,19 +98,19 @@ def _get_face_detections_dnn( # Decode image from binary buffer to 3D numpy array (height, width, channels of BlueGreeRed color space) image = cv2.imdecode(img_array, cv2.IMREAD_COLOR) (h, w) = image.shape[:2] + _h, _w = ( + self.blob_from_image_cfg.shape["height"], + self.blob_from_image_cfg.shape["width"], + ) + if h < _h or w < _w: + raise NotCompliantImageError( + f"Image {filename} too small: '{h}x{w}'. It needs to be at least '{_h}x{_w}'." + ) + # Create a blob (4D tensor) from the image blob = cv2.dnn.blobFromImage( - image=cv2.resize( - image, - dsize=( - self.blob_from_image_cfg.shape["height"], - self.blob_from_image_cfg.shape["width"], - ), - ), - size=( - self.blob_from_image_cfg.shape["height"], - self.blob_from_image_cfg.shape["width"], - ), + image=cv2.resize(image, dsize=(_h, _w)), + size=(_h, _w), scalefactor=self.blob_from_image_cfg.scale_factor, mean=self.blob_from_image_cfg.mean_values, ) @@ -147,6 +148,30 @@ def _get_face_detections_dnn( raise e return face_regions + def _preprocess_image(self, filename: str) -> np.ndarray: + """ + This function retrieves an image from the 'images' storage, reads it as an array of bytes, + and decodes it into a color image. + + The image's color space is first converted from BGR (Blue, Green, Red) to YUV. Histogram equalization is then + applied to the Y channel (luminance) to enhance the contrast of the image. + + Finally, the image is converted to RGB color space for further processing. + + Args: + filename (str): The filename of the image to preprocess. + + Returns: + np.ndarray: The preprocessed image as a NumPy array in RGB format. + """ + with self.storages.get_storage("images").open(filename, "rb") as img_file: + img_array = np.asarray(bytearray(img_file.read()), dtype=np.uint8) + image = cv2.cvtColor( + cv2.imdecode(img_array, cv2.IMREAD_COLOR), cv2.COLOR_BGR2YUV + ) + image[:, :, 0] = cv2.equalizeHist(image[:, :, 0]) + return cv2.cvtColor(image, cv2.COLOR_YUV2RGB) + def encode_face(self, filename: str, encodings_filename: str) -> None: """ Encode faces detected in an image and save the encodings to storage. @@ -156,19 +181,20 @@ def encode_face(self, filename: str, encodings_filename: str) -> None: encodings_filename (str): The filename to save the face encodings. """ try: - with self.storages.get_storage("images").open(filename, "rb") as img_file: - image = face_recognition.load_image_file(img_file) encodings: list[np.ndarray[np.float32, Any]] = [] + image = self._preprocess_image(filename) face_regions = self._get_face_detections_dnn(filename) if not face_regions: - self.logger.warning("No face regions detected in image %s", filename) + raise NotCompliantImageError( + f"No face regions detected in image '{filename}'." + ) else: for region in face_regions: if isinstance(region, (list, tuple)) and len(region) == 4: top, right, bottom, left = region face_encodings = face_recognition.face_encodings( image, - [(top, right, bottom, left)], + [(right, bottom, left, top)], num_jitters=self.face_encodings_cfg.num_jitters, model=self.face_encodings_cfg.model, ) diff --git a/src/hope_dedup_engine/config/fragments/constance.py b/src/hope_dedup_engine/config/fragments/constance.py index 32fe0b38..bbb8281c 100644 --- a/src/hope_dedup_engine/config/fragments/constance.py +++ b/src/hope_dedup_engine/config/fragments/constance.py @@ -40,7 +40,7 @@ "tuple_field", ), "FACE_DETECTION_CONFIDENCE": ( - 0.5, + 0.7, """ Specifies the minimum confidence score required for a detected face to be considered valid. Detections with confidence scores below this threshold are discarded as likely false positives. @@ -67,7 +67,7 @@ int, ), "FACE_ENCODINGS_MODEL": ( - "small", + "large", """ Specifies the model type used for encoding face landmarks. It can be either 'small' which is faster and detects only 5 key facial landmarks, or 'large' which is more precise and identifies 68 key facial landmarks @@ -76,11 +76,12 @@ "face_encodings_model", ), "FACE_DISTANCE_THRESHOLD": ( - 0.4, + 0.26, """ - Specifies the maximum allowable distance between two face embeddings for them to be considered a match. It helps - determine if two faces belong to the same person by setting a threshold for similarity. Lower values result in - stricter matching, while higher values allow for more lenient matches. + Specifies the maximum allowable distance between two face embeddings for them to be considered a match. + This tolerance threshold is crucial for assessing whether two faces belong to the same individual, + as it establishes the similarity limit. Lower values result in stricter matching, while higher values allow + for more lenient matches. """, float, ), diff --git a/tests/extras/demoapp/demo_images/Aaron_Eckhart_0001.jpg b/tests/extras/demoapp/demo_images/Aaron_Eckhart_0001.jpg index 4d2fb8db..3da86181 100644 Binary files a/tests/extras/demoapp/demo_images/Aaron_Eckhart_0001.jpg and b/tests/extras/demoapp/demo_images/Aaron_Eckhart_0001.jpg differ diff --git a/tests/extras/demoapp/demo_images/Aaron_Guiel_0001.jpg b/tests/extras/demoapp/demo_images/Aaron_Guiel_0001.jpg index c2fb5d0f..bac4a9f8 100644 Binary files a/tests/extras/demoapp/demo_images/Aaron_Guiel_0001.jpg and b/tests/extras/demoapp/demo_images/Aaron_Guiel_0001.jpg differ diff --git a/tests/extras/demoapp/demo_images/Aaron_Peirsol_0001.jpg b/tests/extras/demoapp/demo_images/Aaron_Peirsol_0001.jpg index b1cc3287..df2b0923 100644 Binary files a/tests/extras/demoapp/demo_images/Aaron_Peirsol_0001.jpg and b/tests/extras/demoapp/demo_images/Aaron_Peirsol_0001.jpg differ diff --git a/tests/extras/demoapp/demo_images/Aaron_Peirsol_0002.jpg b/tests/extras/demoapp/demo_images/Aaron_Peirsol_0002.jpg index cf561ab4..d934ad0e 100644 Binary files a/tests/extras/demoapp/demo_images/Aaron_Peirsol_0002.jpg and b/tests/extras/demoapp/demo_images/Aaron_Peirsol_0002.jpg differ diff --git a/tests/extras/demoapp/demo_images/Cathy_Freeman_0001.jpg b/tests/extras/demoapp/demo_images/Cathy_Freeman_0001.jpg index f2d6b5d8..c3921aea 100644 Binary files a/tests/extras/demoapp/demo_images/Cathy_Freeman_0001.jpg and b/tests/extras/demoapp/demo_images/Cathy_Freeman_0001.jpg differ diff --git a/tests/extras/demoapp/demo_images/Cathy_Freeman_0002.jpg b/tests/extras/demoapp/demo_images/Cathy_Freeman_0002.jpg index e4f8f62c..233d3f1c 100644 Binary files a/tests/extras/demoapp/demo_images/Cathy_Freeman_0002.jpg and b/tests/extras/demoapp/demo_images/Cathy_Freeman_0002.jpg differ diff --git a/tests/extras/demoapp/demo_images/Ziwang_Xu_0001.jpg b/tests/extras/demoapp/demo_images/Ziwang_Xu_0001.jpg index 2665ebc0..b8495b82 100644 Binary files a/tests/extras/demoapp/demo_images/Ziwang_Xu_0001.jpg and b/tests/extras/demoapp/demo_images/Ziwang_Xu_0001.jpg differ diff --git a/tests/extras/demoapp/demo_images/Zoe_Ball_0001.jpg b/tests/extras/demoapp/demo_images/Zoe_Ball_0001.jpg index f26223d2..c41d655f 100644 Binary files a/tests/extras/demoapp/demo_images/Zoe_Ball_0001.jpg and b/tests/extras/demoapp/demo_images/Zoe_Ball_0001.jpg differ diff --git a/tests/extras/demoapp/demo_images/too_small.jpg b/tests/extras/demoapp/demo_images/too_small.jpg new file mode 100644 index 00000000..60fca585 Binary files /dev/null and b/tests/extras/demoapp/demo_images/too_small.jpg differ diff --git a/tests/extras/demoapp/demo_images/without_face.jpg b/tests/extras/demoapp/demo_images/without_face.jpg index e3b70996..d870b9ba 100644 Binary files a/tests/extras/demoapp/demo_images/without_face.jpg and b/tests/extras/demoapp/demo_images/without_face.jpg differ diff --git a/tests/extras/demoapp/scripts/base_case b/tests/extras/demoapp/scripts/base_case index bcce8600..0d23a587 100755 --- a/tests/extras/demoapp/scripts/base_case +++ b/tests/extras/demoapp/scripts/base_case @@ -1,5 +1,7 @@ #!/usr/bin/env bash +start_time=$(date +%s) + source "$(dirname "0")/.common" if [[ $# -ne 1 ]] ; then @@ -9,16 +11,16 @@ fi ./create_deduplication_set "$1" | jq -r ".id" | xargs ./use_deduplication_set -./create_image Aaron_Eckhart_0001.jpg -./create_image Aaron_Guiel_0001.jpg -./create_image Aaron_Peirsol_0001.jpg -./create_image Aaron_Peirsol_0002.jpg -./create_image Cathy_Freeman_0001.jpg -./create_image Cathy_Freeman_0002.jpg -./create_image without_face.jpg -./create_image Ziwang_Xu_0001.jpg -./create_image Zoe_Ball_0001.jpg +for file in ../demo_images/*.{jpg,png}; do + if [[ -f "$file" ]]; then + ./create_image $(basename "$file") + fi +done + ./process_deduplication_set ./show_duplicates + +duration=$(( $(date +%s) - start_time )) +echo "Duration: $duration seconds." diff --git a/tests/faces/conftest.py b/tests/faces/conftest.py index a2bb53e2..28014c71 100644 --- a/tests/faces/conftest.py +++ b/tests/faces/conftest.py @@ -129,7 +129,7 @@ def mock_image_processor( @pytest.fixture def image_bytes_io(): img_byte_arr = BytesIO() - image = Image.new("RGB", (100, 100), color="red") + image = Image.new("RGB", (300, 300), color="red") image.save(img_byte_arr, format="JPEG") img_byte_arr.seek(0) img_byte_arr.fake_open = lambda *_: BytesIO(img_byte_arr.getvalue()) diff --git a/tests/faces/faces_const.py b/tests/faces/faces_const.py index 89ac631e..ab9beffd 100644 --- a/tests/faces/faces_const.py +++ b/tests/faces/faces_const.py @@ -8,7 +8,7 @@ ["ignore_file.jpg", "ignore_file2.jpg"], ["ignore_file4.jpg", "ignore_file3.jpg"], ] -FACE_DISTANCE_THRESHOLD = 0.4 +FACE_DISTANCE_THRESHOLD = 0.26 DNN_FILE = { "name": FILENAME, @@ -44,8 +44,8 @@ } FACE_REGIONS_INVALID: Final[list[list[tuple[int, int, int, int]]]] = [[], [(0, 0, 10)]] FACE_REGIONS_VALID: Final[list[tuple[int, int, int, int]]] = [ - (10, 10, 20, 20), - (30, 30, 40, 40), + (40, 40, 80, 80), + (120, 120, 160, 160), ] BLOB_FROM_IMAGE_SCALE_FACTOR: Final[float] = 1.0 BLOB_FROM_IMAGE_MEAN_VALUES: Final[tuple[float, float, float]] = (104.0, 177.0, 123.0) @@ -56,8 +56,8 @@ (0, 0, 0.15, 0.1, 0.1, 0.2, 0.2), # with confidence 0.15 -> invalid detection ] IMAGE_SIZE: Final[tuple[int, int, int]] = ( - 100, - 100, + 400, + 400, 3, ) # Size of the image after decoding (h, w, number of channels) RESIZED_IMAGE_SIZE: Final[tuple[int, int, int]] = ( diff --git a/tests/faces/test_duplication_detector.py b/tests/faces/test_duplication_detector.py index c55c1016..7c6773e6 100644 --- a/tests/faces/test_duplication_detector.py +++ b/tests/faces/test_duplication_detector.py @@ -205,12 +205,12 @@ def open_mock(filename, mode="rb"): ( "test_file.jpg", "test_file2.jpg", - 0.36, - ), # config.FACE_DISTANCE_THRESHOLD + 0.04 + 0.22, + ), # config.FACE_DISTANCE_THRESHOLD - 0.04 ( "test_file.jpg", "test_file3.jpg", - 0.2, + 0.06, ), # config.FACE_DISTANCE_THRESHOLD - 0.2 # last pair will not be included in the result because the distance is greater than the threshold # ("test_file2.jpg", "test_file3.jpg", 0.44), # config.FACE_DISTANCE_THRESHOLD + 0.04 diff --git a/tests/faces/test_image_processor.py b/tests/faces/test_image_processor.py index 1347eb75..8331ccb9 100644 --- a/tests/faces/test_image_processor.py +++ b/tests/faces/test_image_processor.py @@ -131,7 +131,6 @@ def test_encode_face(mock_image_processor, image_bytes_io, face_regions): patch.object( mock_image_processor, "_get_face_detections_dnn", return_value=face_regions ) as mock_get_face_detections_dnn, - patch.object(face_recognition, "load_image_file") as mock_load_image_file, patch.object(face_recognition, "face_encodings") as mock_face_encodings, ): mock_image_processor.encode_face(FILENAME, FILENAME_ENCODED) @@ -139,7 +138,6 @@ def test_encode_face(mock_image_processor, image_bytes_io, face_regions): mock_get_face_detections_dnn.assert_called_once() mocked_image_open.assert_called_with(FILENAME, "rb") assert mocked_image_open.side_effect == image_bytes_io.fake_open - mock_load_image_file.assert_called() if face_regions == FACE_REGIONS_VALID: mocked_encoded_open.assert_called_with(FILENAME_ENCODED, "wb") @@ -152,10 +150,7 @@ def test_encode_face(mock_image_processor, image_bytes_io, face_regions): @pytest.mark.parametrize( "method, exception_str", - ( - (str("load_image_file"), "Test load_image_file exception"), - (str("face_encodings"), "Test face_encodings exception"), - ), + ((str("face_encodings"), "Test face_encodings exception"),), ) def test_encode_face_exception_handling( mock_image_processor, mock_net, method: str, exception_str