From f3cfa7967d7418a8a88a4fcfa6200c142f392b52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Odini?= Date: Wed, 6 Nov 2024 20:35:53 +0100 Subject: [PATCH 1/2] refactor(proofs): run OCR in post_save signal instead of create (#549) --- open_prices/api/proofs/views.py | 6 ------ open_prices/proofs/models.py | 11 +++++++++++ open_prices/proofs/tests.py | 6 +++--- open_prices/proofs/utils.py | 2 +- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/open_prices/api/proofs/views.py b/open_prices/api/proofs/views.py index acd11bbd..dba60502 100644 --- a/open_prices/api/proofs/views.py +++ b/open_prices/api/proofs/views.py @@ -1,6 +1,4 @@ -from django.conf import settings from django_filters.rest_framework import DjangoFilterBackend -from django_q.tasks import async_task from drf_spectacular.utils import extend_schema from rest_framework import filters, mixins, status, viewsets from rest_framework.decorators import action @@ -77,10 +75,6 @@ def upload(self, request: Request) -> Response: status=status.HTTP_400_BAD_REQUEST, ) file_path, mimetype, image_thumb_path = store_file(request.data.get("file")) - async_task( - "open_prices.proofs.utils.run_ocr_task", - f"{settings.IMAGES_DIR}/{file_path}", - ) proof_create_data = { "file_path": file_path, "mimetype": mimetype, diff --git a/open_prices/proofs/models.py b/open_prices/proofs/models.py index a420c391..ea398928 100644 --- a/open_prices/proofs/models.py +++ b/open_prices/proofs/models.py @@ -6,6 +6,7 @@ from django.db.models import Count, signals from django.dispatch import receiver from django.utils import timezone +from django_q.tasks import async_task from open_prices.common import constants, utils from open_prices.locations import constants as location_constants @@ -299,6 +300,16 @@ def set_missing_fields_from_prices(self): self.save() +@receiver(signals.post_save, sender=Proof) +def proof_post_save_run_ocr(sender, instance, created, **kwargs): + if not settings.TESTING: + if created: + async_task( + "open_prices.proofs.utils.fetch_and_save_ocr_data", + f"{settings.IMAGES_DIR}/{instance.file_path}", + ) + + @receiver(signals.post_save, sender=Proof) def proof_post_save_update_prices(sender, instance, created, **kwargs): if not created: diff --git a/open_prices/proofs/tests.py b/open_prices/proofs/tests.py index 59622c8d..085bd4ff 100644 --- a/open_prices/proofs/tests.py +++ b/open_prices/proofs/tests.py @@ -14,7 +14,7 @@ from open_prices.proofs import constants as proof_constants from open_prices.proofs.factories import ProofFactory from open_prices.proofs.models import Proof -from open_prices.proofs.utils import run_ocr_task +from open_prices.proofs.utils import fetch_and_save_ocr_data LOCATION_OSM_NODE_652825274 = { "type": location_constants.TYPE_OSM, @@ -311,7 +311,7 @@ def test_proof_update(self): class RunOCRTaskTest(TestCase): - def test_run_ocr_task_success(self): + def test_fetch_and_save_ocr_data_success(self): response_data = {"responses": [{"textAnnotations": [{"description": "test"}]}]} with self.settings(GOOGLE_CLOUD_VISION_API_KEY="test_api_key"): # mock call to run_ocr_on_image @@ -323,7 +323,7 @@ def test_run_ocr_task_success(self): image_path = Path(f"{tmpdirname}/test.jpg") with image_path.open("w") as f: f.write("test") - run_ocr_task(image_path) + fetch_and_save_ocr_data(image_path) mock_run_ocr_on_image.assert_called_once_with( image_path, "test_api_key" ) diff --git a/open_prices/proofs/utils.py b/open_prices/proofs/utils.py index 3832151e..0d98d2c3 100644 --- a/open_prices/proofs/utils.py +++ b/open_prices/proofs/utils.py @@ -169,7 +169,7 @@ def run_ocr_on_image(image_path: Path | str, api_key: str) -> dict[str, Any] | N return r.json() -def run_ocr_task(image_path: Path | str, override: bool = False) -> None: +def fetch_and_save_ocr_data(image_path: Path | str, override: bool = False) -> None: """Run OCR on the image stored at the given path and save the result to a JSON file. From 2f53957d3b87a01e861a88fa27e6075d01a34f20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Odini?= Date: Wed, 6 Nov 2024 20:51:28 +0100 Subject: [PATCH 2/2] chore: update dependency openfoodfacts-python (#550) --- poetry.lock | 12 ++++++++---- pyproject.toml | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index eaa709e4..2f87fe46 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1858,13 +1858,13 @@ files = [ [[package]] name = "openfoodfacts" -version = "0.2.1" +version = "2.2.0" description = "Official Python SDK of Open Food Facts" optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "openfoodfacts-0.2.1-py3-none-any.whl", hash = "sha256:4bc0581f7e0a949078a6e5aba996f925f3f996e207927cf1c156fe36e3b16de8"}, - {file = "openfoodfacts-0.2.1.tar.gz", hash = "sha256:dfe65884e00dad09a9d1168d227697d501b2c54ece24279446595dabaa9fc91e"}, + {file = "openfoodfacts-2.2.0-py3-none-any.whl", hash = "sha256:172c953fd5727ffbe23bd2839afebc15cc346c3e186976e98b3d37026a90ba5b"}, + {file = "openfoodfacts-2.2.0.tar.gz", hash = "sha256:e79ec1292238078de5e4857a19a7ccc66ca99b766311a9d7e60e9bf5315ac762"}, ] [package.dependencies] @@ -1872,6 +1872,10 @@ pydantic = ">=2.0.0,<3.0.0" requests = ">=2.20.0" tqdm = ">=4.0.0,<5.0.0" +[package.extras] +pillow = ["Pillow (>=9.3,<10.4)"] +redis = ["redis[hiredis] (>=5.1.0,<5.2.0)"] + [[package]] name = "osmpythontools" version = "0.3.5" @@ -3382,4 +3386,4 @@ viz = ["matplotlib", "nc-time-axis", "seaborn"] [metadata] lock-version = "2.0" python-versions = "~3.11" -content-hash = "0afde6a4649fc6ee2305387c9c46262803139ccf88b0c2734df756f41e0a3ab7" +content-hash = "e4378c226cfd0a0ba4ff465ace8264ec1f11969fd4d0c8e8494c87184b8ba806" diff --git a/pyproject.toml b/pyproject.toml index 3128b0cb..ccd8bf10 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ readme = "README.md" [tool.poetry.dependencies] python = "~3.11" Babel = "~2.13.1" -openfoodfacts = "0.2.1" +openfoodfacts = "^2.2.0" psycopg2-binary = "~2.9.9" python-multipart = "~0.0.7" requests = "~2.31.0"