Skip to content

Commit

Permalink
chg ! config for deduplicationset
Browse files Browse the repository at this point in the history
  • Loading branch information
vitali-yanushchyk-valor committed Nov 4, 2024
1 parent 17dca4a commit 86cdf13
Show file tree
Hide file tree
Showing 27 changed files with 1,040 additions and 572 deletions.
1,090 changes: 632 additions & 458 deletions pdm.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ dependencies = [
"flower>=2.0.1",
"setuptools>=74.1.2",
"django-smart-env>=0.1.0",
"jsonschema>=4.23.0",
]

[build-system]
Expand Down
2 changes: 1 addition & 1 deletion pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,6 @@ markers =
python_files=test_*.py
filterwarnings =
ignore::DeprecationWarning
ignore::django.utils.deprecation.RemovedInDjango51Warning
ignore::django.utils.deprecation.RemovedInDjango60Warning
ignore::coverage.exceptions.CoverageWarning
ignore::coverage.exceptions.CoverageWarning:
55 changes: 54 additions & 1 deletion src/hope_dedup_engine/apps/api/admin/config.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,61 @@
import json
from typing import Any

from django.contrib import messages
from django.contrib.admin import ModelAdmin, register
from django.http import HttpRequest, HttpResponse
from django.shortcuts import redirect, render
from django.urls import path, reverse

from hope_dedup_engine.apps.api.models import Config


@register(Config)
class ConfigAdmin(ModelAdmin):
pass
list_display = ("name", "settings")

def get_urls(self):
urls = super().get_urls()
custom_urls = [
path(
"confirm-save/<int:object_id>/",
self.admin_site.admin_view(self.confirm_save),
name="confirm_save_config",
),
]
return custom_urls + urls

def response_change(self, request: HttpRequest, obj: Any) -> HttpResponse:
dd_set = ", ".join([str(d) for d in obj.deduplicationset_set.all()])
if dd_set:
request.session["unsaved_data"] = request.POST.dict()
confirm_url = reverse("admin:confirm_save_config", args=[obj.pk])
self.message_user(
request,
f"Related deduplication sets {dd_set} was found. Please confirm saving.",
level=messages.WARNING,
)
return redirect(confirm_url)
return super().response_change(request, obj)

def confirm_save(self, request, object_id): # pragma: no cover
obj = self.get_object(request, object_id)
if request.method == "POST":
form_data = request.session.get("unsaved_data", None)
if form_data:
for field, value in form_data.items():
if field == "settings":
value = json.loads(value)
setattr(obj, field, value)
obj.save()

return redirect(reverse("admin:api_config_changelist"))

return render(
request,
"admin/api/config/confirm_save.html",
{
"object": obj,
"form_data": request.session.get("unsaved_data"),
},
)
1 change: 1 addition & 0 deletions src/hope_dedup_engine/apps/api/admin/deduplicationset.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class DeduplicationSetAdmin(AdminFiltersMixin, ExtraButtonsMixin, ModelAdmin):
"name",
"reference_pk",
"state_value",
"config",
"created_at",
"updated_at",
"deleted",
Expand Down
12 changes: 5 additions & 7 deletions src/hope_dedup_engine/apps/api/deduplication/adapters.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from collections.abc import Generator

from constance import config
from typing import Any

from hope_dedup_engine.apps.api.deduplication.registry import DuplicateKeyPair
from hope_dedup_engine.apps.api.models import DeduplicationSet
Expand All @@ -22,13 +21,12 @@ def run(self) -> Generator[DuplicateKeyPair, None, None]:
"reference_pk", "filename"
)
}
face_distance_threshold: float = (
self.deduplication_set.config
and self.deduplication_set.config.face_distance_threshold
) or config.FACE_DISTANCE_THRESHOLD
ds_config: dict[str, Any] = (
self.deduplication_set.config and self.deduplication_set.config.settings
) or {}
# ignored key pairs are not handled correctly in DuplicationDetector
detector = DuplicationDetector(
tuple[str](filename_to_reference_pk.keys()), face_distance_threshold
tuple[str](filename_to_reference_pk.keys()), ds_config
)
for first_filename, second_filename, distance in detector.find_duplicates():
yield filename_to_reference_pk[first_filename], filename_to_reference_pk[
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Generated by Django 5.0.7 on 2024-10-31 06:31

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("api", "0008_ignoredfilenamepair"),
]

operations = [
migrations.RemoveField(
model_name="config",
name="face_distance_threshold",
),
migrations.AddField(
model_name="config",
name="name",
field=models.CharField(
blank=True, db_index=True, max_length=128, null=True, unique=True
),
),
migrations.AddField(
model_name="config",
name="settings",
field=models.JSONField(blank=True, default=dict, null=True),
),
migrations.AlterField(
model_name="deduplicationset",
name="config",
field=models.ForeignKey(
null=True, on_delete=django.db.models.deletion.SET_NULL, to="api.config"
),
),
]
2 changes: 1 addition & 1 deletion src/hope_dedup_engine/apps/api/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from hope_dedup_engine.apps.api.models.auth import HDEToken # noqa: F401
from hope_dedup_engine.apps.api.models.config import Config # noqa: F401
from hope_dedup_engine.apps.api.models.deduplication import ( # noqa: F401
Config,
DeduplicationSet,
Duplicate,
Image,
Expand Down
25 changes: 25 additions & 0 deletions src/hope_dedup_engine/apps/api/models/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from django.core.exceptions import ValidationError
from django.db import models

from jsonschema import ValidationError as JSONSchemaValidationError

from hope_dedup_engine.apps.api.utils.config_schema import (
DefaultValidatingValidator,
settings_schema,
)


class Config(models.Model):
name = models.CharField(
max_length=128, unique=True, null=True, blank=True, db_index=True
)
settings = models.JSONField(default=dict, null=True, blank=True)

def __str__(self) -> str:
return f"{self.name}" if self.name else f"ID: {self.pk}"

def clean(self) -> None:
try:
DefaultValidatingValidator(settings_schema).validate(self.settings)
except JSONSchemaValidationError as e:
raise ValidationError({"settings": e.message})
17 changes: 1 addition & 16 deletions src/hope_dedup_engine/apps/api/models/deduplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from uuid import uuid4

from django.conf import settings
from django.core.validators import MaxValueValidator, MinValueValidator
from django.db import models

from hope_dedup_engine.apps.api.utils.notification import send_notification
Expand All @@ -11,20 +10,6 @@
REFERENCE_PK_LENGTH: Final[int] = 100


class Config(models.Model):
face_distance_threshold = models.FloatField(
null=True,
validators=[MinValueValidator(0.1), MaxValueValidator(1.0)],
)

def __str__(self) -> str:
return f"{self.pk}: " + " | ".join(
f"{field.name}: {getattr(self, field.name)}"
for field in self._meta.fields
if field.name not in ("id",)
)


class DeduplicationSet(models.Model):
"""
Bucket for entries we want to deduplicate
Expand Down Expand Up @@ -69,7 +54,7 @@ class State(models.IntegerChoices):
)
updated_at = models.DateTimeField(auto_now=True)
notification_url = models.CharField(max_length=255, null=True, blank=True)
config = models.OneToOneField(Config, null=True, on_delete=models.SET_NULL)
config = models.ForeignKey("Config", null=True, on_delete=models.SET_NULL)

@property
def state(self) -> State:
Expand Down
24 changes: 13 additions & 11 deletions src/hope_dedup_engine/apps/api/serializers.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,32 @@
from typing import Any

from jsonschema import Draft202012Validator
from jsonschema import ValidationError as JSONSchemaValidationError
from rest_framework import serializers

from hope_dedup_engine.apps.api.models import DeduplicationSet
from hope_dedup_engine.apps.api.models import Config, DeduplicationSet
from hope_dedup_engine.apps.api.models.deduplication import (
Config,
Duplicate,
IgnoredFilenamePair,
IgnoredReferencePkPair,
Image,
)

CONFIG = "config"
from hope_dedup_engine.apps.api.utils.config_schema import settings_schema


class ConfigSerializer(serializers.ModelSerializer):
class Meta:
model = Config
exclude = ("id",)

def validate_settings(self, value):
validator = Draft202012Validator(settings_schema)
try:
validator.validate(value)
except JSONSchemaValidationError as e:
raise serializers.ValidationError(f"Settings validation error: {e.message}")
return value


class DeduplicationSetSerializer(serializers.ModelSerializer):
state = serializers.CharField(source="get_state_value_display", read_only=True)
Expand All @@ -36,22 +44,16 @@ class Meta:
"updated_by",
)

def create(self, validated_data) -> DeduplicationSet:
config_data = validated_data.get(CONFIG) and validated_data.pop(CONFIG)
config = Config.objects.create(**config_data) if config_data else None
return DeduplicationSet.objects.create(config=config, **validated_data)


class CreateConfigSerializer(ConfigSerializer):
pass


class CreateDeduplicationSetSerializer(serializers.ModelSerializer):
config = CreateConfigSerializer(required=False)

class Meta:
model = DeduplicationSet
fields = ("config", "reference_pk", "notification_url")
fields = ("reference_pk", "notification_url")


class ImageSerializer(serializers.ModelSerializer):
Expand Down
97 changes: 97 additions & 0 deletions src/hope_dedup_engine/apps/api/utils/config_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
from django.conf import settings

from constance import config
from jsonschema import Draft202012Validator, validators

settings_schema: dict = {
"type": "object",
"properties": {
"detection": {
"type": "object",
"properties": {
"confidence": {
"type": "number",
"exclusiveMinimum": 0,
"maximum": 1.0,
"default": "constance.config.FACE_DETECTION_CONFIDENCE",
},
},
"default": {},
},
"recognition": {
"type": "object",
"properties": {
"num_jitters": {
"type": "integer",
"minimum": 1,
"default": "constance.config.FACE_ENCODINGS_NUM_JITTERS",
},
"model": {
"type": "string",
"enum": tuple(
ch[0]
for ch in settings.CONSTANCE_ADDITIONAL_FIELDS.get(
"face_encodings_model"
)[1].get("choices")
),
"default": "constance.config.FACE_ENCODINGS_MODEL",
},
"preprocessors": {
type: "array",
"items": {
"type": "string",
"enum": ["contrast"],
},
"uniqueItems": True,
"default": [],
},
},
"default": {},
},
"duplicates": {
"type": "object",
"properties": {
"tolerance": {
"type": "number",
"exclusiveMinimum": 0,
"maximum": 1.0,
"default": "constance.config.FACE_DISTANCE_THRESHOLD",
},
},
"default": {},
},
},
}


def extend_with_default(validator_class):
validate_properties = validator_class.VALIDATORS["properties"]

def set_defaults(validator, properties, instance, schema):
for property, subschema in properties.items():

if "default" in subschema:
default_value = subschema["default"]
if isinstance(default_value, str) and default_value.startswith(
"constance.config."
):
config_name = default_value.split(".")[-1]
default_value = getattr(config, config_name, None)

instance.setdefault(property, default_value)

for error in validate_properties(
validator,
properties,
instance,
schema,
):
yield error

return validators.extend(
validator_class,
{"properties": set_defaults},
)


DefaultValidatingValidator = extend_with_default(Draft202012Validator)
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def handle(self, *args: Any, **options: dict[str, Any]) -> None:
self.halt(FileNotFoundError(MESSAGES["not_exist"] % storage.src))
self.stdout.write(MESSAGES["storage_success"] % storage.name)
logger.info(MESSAGES["storage_success"] % storage.name)
except (CommandError, SystemCheckError) as e:
except (CommandError, FileNotFoundError, SystemCheckError) as e:
self.stdout.write(
self.style.ERROR(MESSAGES["failed"] % (storage.name, e))
)
Expand Down
Loading

0 comments on commit 86cdf13

Please sign in to comment.