From 19a0828d70fb1af0721d368c9cab4a266b9be1b6 Mon Sep 17 00:00:00 2001 From: Dylan Hillerbrand Date: Wed, 2 Oct 2024 20:00:28 -0400 Subject: [PATCH] feat(source ids): add command to fix dact_id and fragmentarium_id formats --- .../commands/reformat_source_ids.py | 49 ++++++++++++++++ .../tests/test_reformat_source_ids.py | 56 +++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 django/cantusdb_project/main_app/management/commands/reformat_source_ids.py create mode 100644 django/cantusdb_project/main_app/tests/test_reformat_source_ids.py diff --git a/django/cantusdb_project/main_app/management/commands/reformat_source_ids.py b/django/cantusdb_project/main_app/management/commands/reformat_source_ids.py new file mode 100644 index 000000000..b11f3941a --- /dev/null +++ b/django/cantusdb_project/main_app/management/commands/reformat_source_ids.py @@ -0,0 +1,49 @@ +""" +A command designed to do a one-time reformatting of DACT IDs and Fragment +IDs in the database. + +Fragment IDs should be of the form "F-XXXX" where XXXX is some alphanumeric. +Fragment IDs are currently assumed to be in the form "F-XXXX" or "XXXX". +DACT IDs should be of the form "D:0XXXX" where XXXX is the Fragment ID alphanumeric. +DACT IDs are currently assumed to be in the form "0XXXX" or "D-0XXXX". + +This command simply adds the prefix "F-" to all Fragment IDs and "D:" to all +DACT IDs where they are missing. +""" + +from django.core.management.base import BaseCommand + +from main_app.models import Source + + +class Command(BaseCommand): + help = "Reformat DACT IDs and Fragment IDs in the database." + + def handle(self, *args, **options): + sources = Source.objects.all() + for source in sources: + if source.dact_id: + if len(source.dact_id) == 5 and source.dact_id.startswith("0"): + source.dact_id = f"D:{source.dact_id}" + elif len(source.dact_id) == 7 and source.dact_id.startswith("D-0"): + source.dact_id = f"D:{source.dact_id[2:]}" + else: + self.stdout.write( + self.style.WARNING( + f"{source.id} | DACT ID {source.dact_id} is not in the correct format." + ) + ) + if source.fragmentarium_id: + if len(source.fragmentarium_id) == 4: + source.fragmentarium_id = f"F-{source.fragmentarium_id}" + elif len( + source.fragmentarium_id + ) == 6 and source.fragmentarium_id.startswith("F-"): + pass + else: + self.stdout.write( + self.style.WARNING( + f"{source.id} | Fragment ID {source.fragmentarium_id} is not in the correct format." + ) + ) + source.save() diff --git a/django/cantusdb_project/main_app/tests/test_reformat_source_ids.py b/django/cantusdb_project/main_app/tests/test_reformat_source_ids.py new file mode 100644 index 000000000..7148d786e --- /dev/null +++ b/django/cantusdb_project/main_app/tests/test_reformat_source_ids.py @@ -0,0 +1,56 @@ +from django.test import TestCase +from django.core.management import call_command + +from main_app.models import Source +from main_app.tests.make_fakes import make_fake_institution, make_fake_segment + + +class TestReformatSourceIDs(TestCase): + def test_command(self): + segment = make_fake_segment() + fake_inst = make_fake_institution() + correct_source_1 = Source.objects.create( + segment=segment, + shelfmark="Correct Source 1", + holding_institution=fake_inst, + dact_id="0a1b3", + fragmentarium_id="a1b3", + ) + correct_source_2 = Source.objects.create( + segment=segment, + shelfmark="Correct Source 2", + holding_institution=fake_inst, + dact_id="D-0a1b3", + fragmentarium_id="F-a1b3", + ) + source_with_no_ids = Source.objects.create( + segment=segment, + shelfmark="Source with no IDs", + holding_institution=fake_inst, + ) + source_with_incorrect_ids = Source.objects.create( + segment=segment, + shelfmark="Source with incorrect IDs", + holding_institution=fake_inst, + dact_id="a1b3", + fragmentarium_id="F-1b3", + ) + + call_command("reformat_source_ids") + self.assertEqual(Source.objects.get(pk=correct_source_1.pk).dact_id, "D:0a1b3") + self.assertEqual( + Source.objects.get(pk=correct_source_1.pk).fragmentarium_id, "F-a1b3" + ) + self.assertEqual(Source.objects.get(pk=correct_source_2.pk).dact_id, "D:0a1b3") + self.assertEqual( + Source.objects.get(pk=correct_source_2.pk).fragmentarium_id, "F-a1b3" + ) + self.assertIsNone(Source.objects.get(pk=source_with_no_ids.pk).dact_id) + self.assertIsNone(Source.objects.get(pk=source_with_no_ids.pk).fragmentarium_id) + self.assertEqual( + Source.objects.get(pk=source_with_incorrect_ids.pk).dact_id, "a1b3" + ) + self.assertEqual( + Source.objects.get(pk=source_with_incorrect_ids.pk).fragmentarium_id, + "F-1b3", + )