diff --git a/datastore/data_quality/management/commands/rewrite_quality_data.py b/datastore/data_quality/management/commands/rewrite_quality_data.py index f9564050..c8bb799d 100644 --- a/datastore/data_quality/management/commands/rewrite_quality_data.py +++ b/datastore/data_quality/management/commands/rewrite_quality_data.py @@ -98,7 +98,16 @@ def handle(self, *args, **options): ) def process_publishers(source_file): - publisher = source_file.get_publisher() + """Updates the publisher data with aggregates and quality data relating to their source files""" + + # We want to store the quality and aggregate data against the latest version of the publisher + # object rather than the version from the getter_run that this source file came from + # This is so that when we serialise the latest publishers we get the latest aggregate and + # quality data regardless of when the source file entered the system. + publisher = db.Publisher.objects.get( + getter_run=db.GetterRun.latest(), + prefix=source_file.data["publisher"]["prefix"], + ) print(publisher) diff --git a/datastore/db/models.py b/datastore/db/models.py index c4685cf7..5f4cb249 100644 --- a/datastore/db/models.py +++ b/datastore/db/models.py @@ -185,6 +185,7 @@ def get_distribution(self): return self.data["distribution"][0] def get_publisher(self): + """returns the Publisher object for this source file""" return Publisher.objects.get( getter_run=self.getter_run, prefix=self.data["publisher"]["prefix"] )