From 22b8e2df7cad7a00551ce9af6574567e850ef19d Mon Sep 17 00:00:00 2001 From: Michael Wood Date: Mon, 11 Nov 2024 17:17:01 +0000 Subject: [PATCH] rewrite_quality_data: Always use latest publisher object Instead of writing the quality/aggregate data to the publisher object that is associated with the datagetter run from the source file always write it to the latest set of publishers. --- .../management/commands/rewrite_quality_data.py | 11 ++++++++++- datastore/db/models.py | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/datastore/data_quality/management/commands/rewrite_quality_data.py b/datastore/data_quality/management/commands/rewrite_quality_data.py index f9564050..c8bb799d 100644 --- a/datastore/data_quality/management/commands/rewrite_quality_data.py +++ b/datastore/data_quality/management/commands/rewrite_quality_data.py @@ -98,7 +98,16 @@ def handle(self, *args, **options): ) def process_publishers(source_file): - publisher = source_file.get_publisher() + """Updates the publisher data with aggregates and quality data relating to their source files""" + + # We want to store the quality and aggregate data against the latest version of the publisher + # object rather than the version from the getter_run that this source file came from + # This is so that when we serialise the latest publishers we get the latest aggregate and + # quality data regardless of when the source file entered the system. + publisher = db.Publisher.objects.get( + getter_run=db.GetterRun.latest(), + prefix=source_file.data["publisher"]["prefix"], + ) print(publisher) diff --git a/datastore/db/models.py b/datastore/db/models.py index c4685cf7..5f4cb249 100644 --- a/datastore/db/models.py +++ b/datastore/db/models.py @@ -185,6 +185,7 @@ def get_distribution(self): return self.data["distribution"][0] def get_publisher(self): + """returns the Publisher object for this source file""" return Publisher.objects.get( getter_run=self.getter_run, prefix=self.data["publisher"]["prefix"] )