-
-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
915 - Add generate computed file command #936
915 - Add generate computed file command #936
Conversation
…ted_file_callback and loader::_create_computed_file
api/scpca_portal/loader.py
Outdated
|
||
def _create_computed_file_callback(future, *, update_s3: bool, clean_up_output_data: bool) -> None: | ||
""" | ||
Wrap multiprocessing logic by grabbing computed file future and uploading it tohe s3. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sp
self, | ||
project_id: str, | ||
sample_id: str, | ||
download_config: Dict, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be a string that references a dict in common.py
api/scpca_portal/loader.py
Outdated
if project: | ||
computed_file = ComputedFile.get_project_file( | ||
project, download_config, project.get_output_file_name(download_config) | ||
) | ||
_create_computed_file(computed_file, update_s3, clean_up_output_data=False) | ||
elif sample: | ||
computed_file = ComputedFile.get_sample_file( | ||
sample, | ||
download_config, | ||
sample.get_output_file_name(download_config), | ||
Lock(), # this should be removed when CF::get_sample_file is refactored | ||
) | ||
_create_computed_file(computed_file, update_s3, clean_up_output_data=False) | ||
sample.project.update_downloadable_sample_count() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if project: | |
computed_file = ComputedFile.get_project_file( | |
project, download_config, project.get_output_file_name(download_config) | |
) | |
_create_computed_file(computed_file, update_s3, clean_up_output_data=False) | |
elif sample: | |
computed_file = ComputedFile.get_sample_file( | |
sample, | |
download_config, | |
sample.get_output_file_name(download_config), | |
Lock(), # this should be removed when CF::get_sample_file is refactored | |
) | |
_create_computed_file(computed_file, update_s3, clean_up_output_data=False) | |
sample.project.update_downloadable_sample_count() | |
if project and computed_file := ComputedFile.get_project_file(project, download_config): | |
_create_computed_file(computed_file, update_s3, clean_up_output_data=False) | |
if sample and computed_file := ComputedFile.get_sample_file(sample, download_config): | |
_create_computed_file(computed_file, update_s3, clean_up_output_data=False) | |
sample.project.update_downloadable_sample_count() |
I think you can make Lock()
the default value for a lock in get_sample_file
. Otherwise rewriting it this way makes it a bit clearer.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks mostly correct, just some bugs / changes that we should discuss again before implementing.
projects = ( | ||
Project.objects.filter(project_computed_files__is_null=True) | ||
if not project_id | ||
else [Project.objects.filter(scpca_id=project_id).first()] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
else [Project.objects.filter(scpca_id=project_id).first()] | |
else Project.objects.filter(scpca_id=project_id) |
if download_config_name not in common.PROJECT_DOWNLOAD_CONFIGS.keys(): | ||
logger.error(f"{download_config_name} is not a valid project download config name.") | ||
logger.info( | ||
f"Here are the correct project download_config names: " |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
f"Here are the correct project download_config names: " | |
f"Here are valid download_config_name values for projects: " |
if download_config_name not in common.SAMPLE_DOWNLOAD_CONFIGS.keys(): | ||
logger.error(f"{download_config_name} is not a valid sample download config name.") | ||
logger.info( | ||
f"Here are the correct sample download_config names: " |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
f"Here are the correct sample download_config names: " | |
f"Here are valid download_config_name values for samples: " |
f"Here are the correct sample download_config names: " | ||
f"{common.SAMPLE_DOWNLOAD_CONFIGS.keys()}" | ||
) | ||
download_config = common.PROJECT_DOWNLOAD_CONFIGS[download_config_name] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
download_config = common.PROJECT_DOWNLOAD_CONFIGS[download_config_name] | |
download_config = common.SAMPLE_DOWNLOAD_CONFIGS[download_config_name] |
api/scpca_portal/models/project.py
Outdated
@@ -205,9 +205,9 @@ def get_output_file_name(self, download_config: Dict) -> str: | |||
def get_computed_file(self, download_config: Dict) -> ComputedFile: | |||
"Return the project computed file that matches the passed download_config." | |||
if download_config["metadata_only"]: | |||
return self.computed_files.filter(metadata_only=True).first() | |||
return self.project_computed_files.filter(metadata_only=True).first() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
return self.project_computed_files.filter(metadata_only=True).first() | |
return self.computed_files.filter(metadata_only=True).first() |
Let's keep using the property for now.
api/scpca_portal/models/project.py
Outdated
|
||
return self.computed_files.filter( | ||
return self.project_computed_files.filter( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
return self.project_computed_files.filter( | |
return self.computed_files.filter( |
api/scpca_portal/models/sample.py
Outdated
@@ -123,7 +123,7 @@ def get_metadata(self) -> Dict: | |||
|
|||
def get_computed_file(self, download_config: Dict) -> ComputedFile: | |||
"Return the sample computed file that matches the passed download_config." | |||
return self.computed_files.filter( | |||
return self.sample_computed_files.filter( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
return self.sample_computed_files.filter( | |
return self.computed_files.filter( |
Issue Number
Closes #915 and #841
Purpose/Implementation Notes
This PR makes 3 additions
loader::generate_computed_file
methodgenerate_computed_file
management command, which will be used as the entrypoint with Batch jobsdispatch_to_batch
management command, responsible to iterating over all permutations of projects, samples and downloads_configs which need computed files generated for themTypes of changes
What types of changes does your code introduce?
Functional tests
List out the functional tests you've completed to verify your changes work locally.
Checklist
Screenshots
N/A