diff --git a/api/scpca_portal/management/commands/create_portal_metadata.py b/api/scpca_portal/management/commands/create_portal_metadata.py index a3f9108c..36a2e7fa 100644 --- a/api/scpca_portal/management/commands/create_portal_metadata.py +++ b/api/scpca_portal/management/commands/create_portal_metadata.py @@ -3,7 +3,7 @@ from django.conf import settings from django.core.management.base import BaseCommand -from scpca_portal import common +from scpca_portal import common, s3 from scpca_portal.config.logging import get_and_configure_logger from scpca_portal.models import ComputedFile, Project @@ -11,32 +11,36 @@ class Command(BaseCommand): - help = """Creates a computed file and zip for portal-wide metadata, - saves the instance to the databse, and - uploads the zip file to S3 bucket. + help = """Creates a computed file for portal-wide metadata. + Saves generated computed file to the db. + Optionally uploads file to s3 and cleans up output data. """ - @staticmethod - def clean_up_output_data(): - """Cleans up the output data files after processing the computed file""" - logger.info("Cleaning up output data") - # This static method may not be required using buffers - def add_arguments(self, parser): parser.add_argument( "--clean-up-output-data", action=BooleanOptionalAction, default=settings.PRODUCTION ) + parser.add_argument( + "--update-s3", action=BooleanOptionalAction, default=settings.UPDATE_S3_DATA + ) def handle(self, *args, **kwargs): self.create_portal_metadata(**kwargs) - def create_portal_metadata(self, **kwargs): + def create_portal_metadata(self, clean_up_output_data: bool, update_s3: bool, **kwargs): logger.info("Creating the portal-wide metadata computed file") - computed_file = ComputedFile.get_portal_metadata_file( + if computed_file := ComputedFile.get_portal_metadata_file( Project.objects.all(), common.GENERATED_PORTAL_METADATA_DOWNLOAD_CONFIG - ) + ): + if update_s3: + logger.info("Updating the zip file in S3") + s3.upload_output_file(computed_file.s3_key, computed_file.s3_bucket) + + logger.info("Saving the object to the database") + computed_file.save() - if kwargs["clean_up_output_data"]: - self.clean_up_output_data() + if clean_up_output_data: + logger.info("Cleaning up the output directory") + computed_file.clean_up_local_computed_file() return computed_file diff --git a/api/scpca_portal/models/computed_file.py b/api/scpca_portal/models/computed_file.py index a9f417cb..dab0ec33 100644 --- a/api/scpca_portal/models/computed_file.py +++ b/api/scpca_portal/models/computed_file.py @@ -43,7 +43,6 @@ class OutputFileFormats: format = models.TextField(choices=OutputFileFormats.CHOICES, null=True) includes_merged = models.BooleanField(default=False) modality = models.TextField(choices=OutputFileModalities.CHOICES, null=True) - portal_metadata_only = models.BooleanField(default=False) metadata_only = models.BooleanField(default=False) portal_metadata_only = models.BooleanField(default=False) s3_bucket = models.TextField() @@ -133,7 +132,11 @@ def get_portal_metadata_file(cls, projects, download_config: Dict) -> Self: ) computed_file = cls( - portal_metadata_only=True, + format=download_config.get("format"), + modality=download_config.get("modality"), + includes_merged=download_config.get("includes_merged"), + metadata_only=download_config.get("metadata_only"), + portal_metadata_only=download_config.get("portal_metadata_only"), s3_bucket=settings.AWS_S3_OUTPUT_BUCKET_NAME, s3_key=common.PORTAL_METADATA_COMPUTED_FILE_NAME, size_in_bytes=zip_file_path.stat().st_size, diff --git a/api/scpca_portal/test/management/commands/test_create_portal_metadata.py b/api/scpca_portal/test/management/commands/test_create_portal_metadata.py index d4182252..cb343bb9 100644 --- a/api/scpca_portal/test/management/commands/test_create_portal_metadata.py +++ b/api/scpca_portal/test/management/commands/test_create_portal_metadata.py @@ -1,6 +1,8 @@ import csv import shutil from io import TextIOWrapper +from typing import Dict +from unittest.mock import patch from zipfile import ZipFile from django.conf import settings @@ -29,9 +31,6 @@ def tearDownClass(cls): super().tearDownClass() shutil.rmtree(common.OUTPUT_DATA_PATH, ignore_errors=True) - def assertProjectReadmeContains(self, text, zip_file): - self.assertIn(text, zip_file.read("README.md").decode("utf-8")) - def load_test_data(self): # Expected object counts PROJECTS_COUNT = 3 @@ -54,21 +53,61 @@ def load_test_data(self): self.assertEqual(Sample.objects.all().count(), SAMPLES_COUNT) self.assertEqual(Library.objects.all().count(), LIBRARIES_COUNT) - def test_create_portal_metadata(self): + # TODO: After PR #839 is merged into dev, add readme file format testing + def assertProjectReadmeContains(self, text, zip_file): + self.assertIn(text, zip_file.read(README_FILE).decode("utf-8")) + + def assertFields(self, computed_file, expected_fields: Dict): + for expected_key, expected_value in expected_fields.items(): + actual_value = getattr(computed_file, expected_key) + message = f"Expected {expected_value}, received {actual_value} on '{expected_key}'" + self.assertEqual(actual_value, expected_value, message) + + def assertEqualWithVariance(self, value, expected, variance=50): + # Make sure the given value is within the range of expected bounds + message = f"{value} is out of range" + self.assertGreaterEqual(value, expected - variance, message) + self.assertLessEqual(value, expected + variance, message) + + @patch("scpca_portal.management.commands.create_portal_metadata.s3.upload_output_file") + def test_create_portal_metadata(self, mock_upload_output_file): + # Set up the database for test self.load_test_data() - self.processor.create_portal_metadata(clean_up_output_data=False) + # Create the portal metadata computed file + self.processor.create_portal_metadata(clean_up_output_data=False, update_s3=True) + + # Test the computed file + computed_files = ComputedFile.objects.filter(portal_metadata_only=True) + # Make sure the computed file is created and singular + self.assertEqual(computed_files.count(), 1) + computed_file = computed_files.first() + # Make sure the computed file size is as expected range + self.assertEqualWithVariance(computed_file.size_in_bytes, 8430) + # Make sure all fields match the download configuration values + download_config = { + "modality": None, + "format": None, + "includes_merged": False, + "metadata_only": True, + "portal_metadata_only": True, + } + self.assertFields(computed_file, download_config) + # Make sure mock_upload_output_file called once + mock_upload_output_file.assert_called_once_with( + computed_file.s3_key, computed_file.s3_bucket + ) + # Test the content of the generated zip file zip_file_path = ComputedFile.get_local_file_path( common.GENERATED_PORTAL_METADATA_DOWNLOAD_CONFIG ) - with ZipFile(zip_file_path) as zip: - # Test the content of the generated zip file + with ZipFile(zip_file_path) as zip_file: # There are 2 file: # ├── README.md # |── metadata.tsv expected_file_count = 2 # Make sure the zip has the exact number of expected files - files = set(zip.namelist()) + files = set(zip_file.namelist()) self.assertEqual(len(files), expected_file_count) self.assertIn(README_FILE, files) self.assertIn(METADATA_FILE, files) @@ -76,9 +115,9 @@ def test_create_portal_metadata(self): expected_text = ( "This download includes associated metadata for samples from all projects" ) - self.assertProjectReadmeContains(expected_text, zip) + self.assertProjectReadmeContains(expected_text, zip_file) # metadata.tsv - with zip.open(METADATA_FILE) as metadata_file: + with zip_file.open(METADATA_FILE) as metadata_file: csv_reader = csv.DictReader( TextIOWrapper(metadata_file, "utf-8"), delimiter=common.TAB,