Skip to content

Commit

Permalink
Merge pull request #1651 from dchiller/i1644-clean-frag-and-dact-ids
Browse files Browse the repository at this point in the history
Add command to standardize formatting of DACT IDs and Fragmentarium IDs
  • Loading branch information
dchiller authored Oct 4, 2024
2 parents 9282e98 + 19a0828 commit bcfac9b
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""
A command designed to do a one-time reformatting of DACT IDs and Fragment
IDs in the database.
Fragment IDs should be of the form "F-XXXX" where XXXX is some alphanumeric.
Fragment IDs are currently assumed to be in the form "F-XXXX" or "XXXX".
DACT IDs should be of the form "D:0XXXX" where XXXX is the Fragment ID alphanumeric.
DACT IDs are currently assumed to be in the form "0XXXX" or "D-0XXXX".
This command simply adds the prefix "F-" to all Fragment IDs and "D:" to all
DACT IDs where they are missing.
"""

from django.core.management.base import BaseCommand

from main_app.models import Source


class Command(BaseCommand):
help = "Reformat DACT IDs and Fragment IDs in the database."

def handle(self, *args, **options):
sources = Source.objects.all()
for source in sources:
if source.dact_id:
if len(source.dact_id) == 5 and source.dact_id.startswith("0"):
source.dact_id = f"D:{source.dact_id}"
elif len(source.dact_id) == 7 and source.dact_id.startswith("D-0"):
source.dact_id = f"D:{source.dact_id[2:]}"
else:
self.stdout.write(
self.style.WARNING(
f"{source.id} | DACT ID {source.dact_id} is not in the correct format."
)
)
if source.fragmentarium_id:
if len(source.fragmentarium_id) == 4:
source.fragmentarium_id = f"F-{source.fragmentarium_id}"
elif len(
source.fragmentarium_id
) == 6 and source.fragmentarium_id.startswith("F-"):
pass
else:
self.stdout.write(
self.style.WARNING(
f"{source.id} | Fragment ID {source.fragmentarium_id} is not in the correct format."
)
)
source.save()
56 changes: 56 additions & 0 deletions django/cantusdb_project/main_app/tests/test_reformat_source_ids.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from django.test import TestCase
from django.core.management import call_command

from main_app.models import Source
from main_app.tests.make_fakes import make_fake_institution, make_fake_segment


class TestReformatSourceIDs(TestCase):
def test_command(self):
segment = make_fake_segment()
fake_inst = make_fake_institution()
correct_source_1 = Source.objects.create(
segment=segment,
shelfmark="Correct Source 1",
holding_institution=fake_inst,
dact_id="0a1b3",
fragmentarium_id="a1b3",
)
correct_source_2 = Source.objects.create(
segment=segment,
shelfmark="Correct Source 2",
holding_institution=fake_inst,
dact_id="D-0a1b3",
fragmentarium_id="F-a1b3",
)
source_with_no_ids = Source.objects.create(
segment=segment,
shelfmark="Source with no IDs",
holding_institution=fake_inst,
)
source_with_incorrect_ids = Source.objects.create(
segment=segment,
shelfmark="Source with incorrect IDs",
holding_institution=fake_inst,
dact_id="a1b3",
fragmentarium_id="F-1b3",
)

call_command("reformat_source_ids")
self.assertEqual(Source.objects.get(pk=correct_source_1.pk).dact_id, "D:0a1b3")
self.assertEqual(
Source.objects.get(pk=correct_source_1.pk).fragmentarium_id, "F-a1b3"
)
self.assertEqual(Source.objects.get(pk=correct_source_2.pk).dact_id, "D:0a1b3")
self.assertEqual(
Source.objects.get(pk=correct_source_2.pk).fragmentarium_id, "F-a1b3"
)
self.assertIsNone(Source.objects.get(pk=source_with_no_ids.pk).dact_id)
self.assertIsNone(Source.objects.get(pk=source_with_no_ids.pk).fragmentarium_id)
self.assertEqual(
Source.objects.get(pk=source_with_incorrect_ids.pk).dact_id, "a1b3"
)
self.assertEqual(
Source.objects.get(pk=source_with_incorrect_ids.pk).fragmentarium_id,
"F-1b3",
)

0 comments on commit bcfac9b

Please sign in to comment.