-
-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
813 - Persist portal metadata computed file and upload after creation #825
Changes from 93 commits
5f38079
62bff10
f4df115
7118063
4b07459
3d9f6af
3538049
301cab9
6250be9
72e8664
fc3b731
468c52d
836a8d9
d5ce758
7b416aa
37e29d8
9474b77
ef38860
690aa4b
2a0f7ef
f26e0d7
d001b6a
03a3791
428b9f8
5283d28
c65e59e
1628d3e
9024959
3435b18
9a2728c
d379f7e
f5104a2
fb2533d
bfde9f6
b2d26cb
8ddd3b1
4aeb76e
31ff9af
54a203d
4c68764
011abdf
20564c4
a7e56b5
9c703c8
0d12432
f3ce338
3f0ccd5
7213be4
7ecd55c
802080e
0200be7
1f5e074
ed9e0e4
017b992
9bd173f
5d81c0e
3cfca8a
02cdde6
cbc25eb
0e5cf01
03b7b40
ffcc45e
0c7b5ae
e7ba46a
189573f
970d67e
0a8b524
dc12498
b910454
d87b7d5
2c66417
8610545
631cef9
cfe68bf
450d11a
baa659c
2b77b7a
48ab892
0258446
182ab63
b692665
964fa78
f2574ef
651d753
52c6daf
9454372
68053c9
e0cf99f
afadbac
609dc06
d1f8a25
e652a76
0300b9f
cd59c14
daccc79
75ed5b3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,40 +3,46 @@ | |
from django.conf import settings | ||
from django.core.management.base import BaseCommand | ||
|
||
from scpca_portal import common | ||
from scpca_portal import common, s3 | ||
from scpca_portal.config.logging import get_and_configure_logger | ||
from scpca_portal.models import ComputedFile, Project | ||
|
||
logger = get_and_configure_logger(__name__) | ||
|
||
|
||
class Command(BaseCommand): | ||
help = """Creates a computed file and zip for portal-wide metadata, | ||
saves the instance to the databse, and | ||
uploads the zip file to S3 bucket. | ||
help = """Creates a computed file for portal-wide metadata. | ||
Saves generated computed file to the db. | ||
Optionally uploads file to s3 and cleans up output data. | ||
""" | ||
|
||
@staticmethod | ||
def clean_up_output_data(): | ||
"""Cleans up the output data files after processing the computed file""" | ||
logger.info("Cleaning up output data") | ||
# This static method may not be required using buffers | ||
|
||
def add_arguments(self, parser): | ||
parser.add_argument( | ||
"--clean-up-output-data", action=BooleanOptionalAction, default=settings.PRODUCTION | ||
) | ||
parser.add_argument( | ||
"--update-s3", action=BooleanOptionalAction, default=settings.UPDATE_S3_DATA | ||
) | ||
|
||
def handle(self, *args, **kwargs): | ||
self.create_portal_metadata(**kwargs) | ||
|
||
def create_portal_metadata(self, **kwargs): | ||
def create_portal_metadata(self, clean_up_output_data: bool, update_s3: bool, **kwargs): | ||
logger.info("Creating the portal-wide metadata computed file") | ||
computed_file = ComputedFile.get_portal_metadata_file( | ||
Project.objects.all(), common.GENERATED_PORTAL_METADATA_DOWNLOAD_CONFIG | ||
) | ||
|
||
if kwargs["clean_up_output_data"]: | ||
self.clean_up_output_data() | ||
if computed_file: | ||
logger.info("Saving the object to the database") | ||
computed_file.save() | ||
|
||
if update_s3: | ||
logger.info("Updating the zip file in S3") | ||
s3.upload_output_file(computed_file.s3_key, computed_file.s3_bucket) | ||
Comment on lines
+35
to
+37
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's move updating above saving it to the DB, so that if it fails on upload we exit and the file is unavailable. |
||
|
||
if clean_up_output_data: | ||
logger.info("Cleaning up the output directory") | ||
computed_file.clean_up_local_computed_file() | ||
|
||
return computed_file |
Original file line number | Diff line number | Diff line change | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -1,6 +1,7 @@ | ||||||||||||||||||
import csv | ||||||||||||||||||
import shutil | ||||||||||||||||||
from io import TextIOWrapper | ||||||||||||||||||
from unittest.mock import patch | ||||||||||||||||||
from zipfile import ZipFile | ||||||||||||||||||
|
||||||||||||||||||
from django.conf import settings | ||||||||||||||||||
|
@@ -29,9 +30,6 @@ def tearDownClass(cls): | |||||||||||||||||
super().tearDownClass() | ||||||||||||||||||
shutil.rmtree(common.OUTPUT_DATA_PATH, ignore_errors=True) | ||||||||||||||||||
|
||||||||||||||||||
def assertProjectReadmeContains(self, text, zip_file): | ||||||||||||||||||
self.assertIn(text, zip_file.read("README.md").decode("utf-8")) | ||||||||||||||||||
|
||||||||||||||||||
def load_test_data(self): | ||||||||||||||||||
# Expected object counts | ||||||||||||||||||
PROJECTS_COUNT = 3 | ||||||||||||||||||
|
@@ -54,31 +52,56 @@ def load_test_data(self): | |||||||||||||||||
self.assertEqual(Sample.objects.all().count(), SAMPLES_COUNT) | ||||||||||||||||||
self.assertEqual(Library.objects.all().count(), LIBRARIES_COUNT) | ||||||||||||||||||
|
||||||||||||||||||
def test_create_portal_metadata(self): | ||||||||||||||||||
# TODO: After PR #839 is merged into dev, add readme file format testing | ||||||||||||||||||
def assertProjectReadmeContains(self, text, zip_file): | ||||||||||||||||||
self.assertIn(text, zip_file.read(README_FILE).decode("utf-8")) | ||||||||||||||||||
Comment on lines
+56
to
+58
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point, we should draft the issue to do this instead of just having the comment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've gone ahead and filed the issue here #869 👍! |
||||||||||||||||||
|
||||||||||||||||||
@patch("scpca_portal.management.commands.create_portal_metadata.s3.upload_output_file") | ||||||||||||||||||
def test_create_portal_metadata(self, mock_upload_output_file): | ||||||||||||||||||
# Set up the database for test | ||||||||||||||||||
self.load_test_data() | ||||||||||||||||||
self.processor.create_portal_metadata(clean_up_output_data=False) | ||||||||||||||||||
# Create the portal metadata computed file | ||||||||||||||||||
self.processor.create_portal_metadata(clean_up_output_data=False, update_s3=True) | ||||||||||||||||||
|
||||||||||||||||||
# Test the computed file | ||||||||||||||||||
# Make sure the computed file is created and singular | ||||||||||||||||||
computed_files = ComputedFile.objects.filter(portal_metadata_only=True) | ||||||||||||||||||
self.assertEqual(computed_files.count(), 1) | ||||||||||||||||||
computed_file = computed_files.first() | ||||||||||||||||||
# Make sure the computed file size is as expected | ||||||||||||||||||
self.assertEqual(computed_file.size_in_bytes, 8460) | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
If you find that the value changes and you cannot reliably assert that the value matches you can create a test that allows for some wiggle room. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is such a great alternative! I've applied it at daccc79 👍 I’ve also added a new method called |
||||||||||||||||||
# Make sure all fields match the download configuration values | ||||||||||||||||||
self.assertIsNone(computed_file.format) | ||||||||||||||||||
self.assertIsNone(computed_file.modality) | ||||||||||||||||||
self.assertFalse(computed_file.includes_merged) | ||||||||||||||||||
self.assertTrue(computed_file.metadata_only) | ||||||||||||||||||
self.assertTrue(computed_file.portal_metadata_only) | ||||||||||||||||||
|
||||||||||||||||||
mock_upload_output_file.assert_called_once_with( | ||||||||||||||||||
computed_file.s3_key, computed_file.s3_bucket | ||||||||||||||||||
) | ||||||||||||||||||
|
||||||||||||||||||
# Test the content of the generated zip file | ||||||||||||||||||
zip_file_path = ComputedFile.get_local_file_path( | ||||||||||||||||||
common.GENERATED_PORTAL_METADATA_DOWNLOAD_CONFIG | ||||||||||||||||||
) | ||||||||||||||||||
with ZipFile(zip_file_path) as zip: | ||||||||||||||||||
# Test the content of the generated zip file | ||||||||||||||||||
with ZipFile(zip_file_path) as zip_file: | ||||||||||||||||||
# There are 2 file: | ||||||||||||||||||
# ├── README.md | ||||||||||||||||||
# |── metadata.tsv | ||||||||||||||||||
expected_file_count = 2 | ||||||||||||||||||
# Make sure the zip has the exact number of expected files | ||||||||||||||||||
files = set(zip.namelist()) | ||||||||||||||||||
files = set(zip_file.namelist()) | ||||||||||||||||||
self.assertEqual(len(files), expected_file_count) | ||||||||||||||||||
self.assertIn(README_FILE, files) | ||||||||||||||||||
self.assertIn(METADATA_FILE, files) | ||||||||||||||||||
# README.md | ||||||||||||||||||
expected_text = ( | ||||||||||||||||||
"This download includes associated metadata for samples from all projects" | ||||||||||||||||||
) | ||||||||||||||||||
self.assertProjectReadmeContains(expected_text, zip) | ||||||||||||||||||
self.assertProjectReadmeContains(expected_text, zip_file) | ||||||||||||||||||
# metadata.tsv | ||||||||||||||||||
with zip.open(METADATA_FILE) as metadata_file: | ||||||||||||||||||
with zip_file.open(METADATA_FILE) as metadata_file: | ||||||||||||||||||
csv_reader = csv.DictReader( | ||||||||||||||||||
TextIOWrapper(metadata_file, "utf-8"), | ||||||||||||||||||
delimiter=common.TAB, | ||||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can use the walrus operator here.