Skip to content

Commit

Permalink
Merge pull request #682 from AlexsLemonade/dev
Browse files Browse the repository at this point in the history
Production Deploy
  • Loading branch information
davidsmejia authored Apr 26, 2024
2 parents 6384685 + 4dc3d08 commit 1458dfa
Show file tree
Hide file tree
Showing 46 changed files with 509 additions and 329 deletions.
1 change: 1 addition & 0 deletions .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ What types of changes does your code introduce?
<!-- Remove any which your PR isn't -->

- Bugfix (non-breaking change which fixes an issue)
- Refactor (addresses code organization and design mentioned in corresponding issue)
- New feature (non-breaking change which adds functionality)
- Breaking change (fix or feature that would cause existing functionality to not work as expected)

Expand Down
17 changes: 17 additions & 0 deletions api/scpca_portal/migrations/0041_remove_computedfile_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Generated by Django 3.2.18 on 2024-04-23 20:32

from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
("scpca_portal", "0040_auto_20240412_1531"),
]

operations = [
migrations.RemoveField(
model_name="computedfile",
name="type",
),
]
25 changes: 25 additions & 0 deletions api/scpca_portal/migrations/0042_auto_20240423_2045.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Generated by Django 3.2.18 on 2024-04-23 20:45

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("scpca_portal", "0041_remove_computedfile_type"),
]

operations = [
migrations.AddField(
model_name="computedfile",
name="has_multiplexed_data",
field=models.BooleanField(default=False),
),
migrations.AlterField(
model_name="computedfile",
name="modality",
field=models.TextField(
choices=[("SINGLE_CELL", "Single Cell"), ("SPATIAL", "Spatial")]
),
),
]
1 change: 1 addition & 0 deletions api/scpca_portal/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ class Meta:

has_bulk_rna_seq = models.BooleanField(default=False)
has_cite_seq_data = models.BooleanField(default=False)
has_multiplexed_data = models.BooleanField(default=False)


class TimestampedModel(models.Model):
Expand Down
71 changes: 25 additions & 46 deletions api/scpca_portal/models/computed_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,36 +27,15 @@ class MetadataFilenames:
SINGLE_CELL_METADATA_FILE_NAME = "single_cell_metadata.tsv"
SPATIAL_METADATA_FILE_NAME = "spatial_metadata.tsv"

# TODO(ark): these values are redundant and need to be refactored in order not to violate DRY.
class OutputFileModalities:
MULTIPLEXED = "MULTIPLEXED"
SINGLE_CELL = "SINGLE_CELL"
SPATIAL = "SPATIAL"

CHOICES = (
(MULTIPLEXED, "Multiplexed"),
(SINGLE_CELL, "Single Cell"),
(SPATIAL, "Spatial"),
)

class OutputFileTypes:
PROJECT_MULTIPLEXED_ZIP = "PROJECT_MULTIPLEXED_ZIP"
PROJECT_SPATIAL_ZIP = "PROJECT_SPATIAL_ZIP"
PROJECT_ZIP = "PROJECT_ZIP"

SAMPLE_MULTIPLEXED_ZIP = "SAMPLE_MULTIPLEXED_ZIP"
SAMPLE_SPATIAL_ZIP = "SAMPLE_SPATIAL_ZIP"
SAMPLE_ZIP = "SAMPLE_ZIP"

CHOICES = (
(PROJECT_MULTIPLEXED_ZIP, "Project Multiplexed ZIP"),
(PROJECT_SPATIAL_ZIP, "Project Spatial ZIP"),
(PROJECT_ZIP, "Project ZIP"),
(SAMPLE_MULTIPLEXED_ZIP, "Sample Multiplexed ZIP"),
(SAMPLE_SPATIAL_ZIP, "Sample Spatial ZIP"),
(SAMPLE_ZIP, "Sample ZIP"),
)

class OutputFileFormats:
ANN_DATA = "ANN_DATA"
SINGLE_CELL_EXPERIMENT = "SINGLE_CELL_EXPERIMENT"
Expand Down Expand Up @@ -102,7 +81,6 @@ class OutputFileFormats:
s3_bucket = models.TextField()
s3_key = models.TextField()
size_in_bytes = models.BigIntegerField()
type = models.TextField(choices=OutputFileTypes.CHOICES)
workflow_version = models.TextField()
includes_celltype_report = models.BooleanField(default=False)

Expand Down Expand Up @@ -139,13 +117,13 @@ def get_project_merged_file(
computed_file_name = project.output_merged_anndata_computed_file_name
readme_file_path = ComputedFile.README_ANNDATA_MERGED_FILE_PATH
project_file_mapping[
f"{project.input_merged_data_path}/{project.scpca_id}_merged_rna.hdf5"
] = f"{project.scpca_id}_merged_rna.hdf5"
f"{project.input_merged_data_path}/{project.scpca_id}_merged_rna.h5ad"
] = f"{project.scpca_id}_merged_rna.h5ad"

if project.has_cite_seq_data:
project_file_mapping[
f"{project.input_merged_data_path}/{project.scpca_id}_merged_adt.hdf5"
] = f"{project.scpca_id}_merged_adt.hdf5"
f"{project.input_merged_data_path}/{project.scpca_id}_merged_adt.h5ad"
] = f"{project.scpca_id}_merged_adt.h5ad"
else:
if not project.includes_merged_sce:
return None
Expand All @@ -163,7 +141,6 @@ def get_project_merged_file(
project=project,
s3_bucket=settings.AWS_S3_BUCKET_NAME,
s3_key=computed_file_name,
type=cls.OutputFileTypes.PROJECT_ZIP,
workflow_version=utils.join_workflow_versions(workflow_versions),
)

Expand Down Expand Up @@ -206,11 +183,10 @@ def get_project_multiplexed_file(

computed_file = cls(
format=file_format,
modality=cls.OutputFileModalities.MULTIPLEXED,
modality=cls.OutputFileModalities.SINGLE_CELL,
project=project,
s3_bucket=settings.AWS_S3_BUCKET_NAME,
s3_key=project.output_multiplexed_computed_file_name,
type=cls.OutputFileTypes.PROJECT_MULTIPLEXED_ZIP,
workflow_version=utils.join_workflow_versions(workflow_versions),
)

Expand All @@ -234,6 +210,7 @@ def get_project_multiplexed_file(

computed_file.has_bulk_rna_seq = project.has_bulk_rna_seq
computed_file.has_cite_seq_data = project.has_cite_seq_data
computed_file.has_multiplexed_data = project.has_multiplexed_data
computed_file.size_in_bytes = computed_file.zip_file_path.stat().st_size
computed_file.includes_celltype_report = project.samples.filter(is_cell_line=False).exists()

Expand All @@ -258,7 +235,6 @@ def get_project_single_cell_file(
project=project,
s3_bucket=settings.AWS_S3_BUCKET_NAME,
s3_key=computed_file_name,
type=cls.OutputFileTypes.PROJECT_ZIP,
workflow_version=utils.join_workflow_versions(workflow_versions),
)

Expand Down Expand Up @@ -295,7 +271,6 @@ def get_project_spatial_file(
project=project,
s3_bucket=settings.AWS_S3_BUCKET_NAME,
s3_key=project.output_spatial_computed_file_name,
type=cls.OutputFileTypes.PROJECT_SPATIAL_ZIP,
workflow_version=utils.join_workflow_versions(workflow_versions),
)

Expand Down Expand Up @@ -327,11 +302,10 @@ def get_sample_multiplexed_file(
"""
computed_file = cls(
format=file_format,
modality=cls.OutputFileModalities.MULTIPLEXED,
modality=cls.OutputFileModalities.SINGLE_CELL,
s3_bucket=settings.AWS_S3_BUCKET_NAME,
s3_key=sample.output_multiplexed_computed_file_name,
sample=sample,
type=cls.OutputFileTypes.SAMPLE_MULTIPLEXED_ZIP,
workflow_version=utils.join_workflow_versions(workflow_versions),
)

Expand Down Expand Up @@ -387,6 +361,7 @@ def get_sample_multiplexed_file(

computed_file.has_bulk_rna_seq = False # Sample downloads can't contain bulk data.
computed_file.has_cite_seq_data = sample.has_cite_seq_data
computed_file.has_multiplexed_data = sample.has_multiplexed_data
computed_file.size_in_bytes = computed_file.zip_file_path.stat().st_size
computed_file.includes_celltype_report = includes_celltype_report

Expand All @@ -406,10 +381,10 @@ def get_sample_single_cell_file(cls, sample, libraries, workflow_versions, file_
file_name = sample.output_single_cell_anndata_computed_file_name
readme_file_path = ComputedFile.README_ANNDATA_FILE_PATH
common_file_suffixes = [
"filtered_rna.hdf5",
"processed_rna.hdf5",
"filtered_rna.h5ad",
"processed_rna.h5ad",
"qc.html",
"unfiltered_rna.hdf5",
"unfiltered_rna.h5ad",
]
else:
file_name = sample.output_single_cell_computed_file_name
Expand All @@ -425,9 +400,9 @@ def get_sample_single_cell_file(cls, sample, libraries, workflow_versions, file_
common_file_suffixes.append("celltype-report.html")

cite_seq_anndata_file_suffixes = [
"filtered_adt.hdf5",
"processed_adt.hdf5",
"unfiltered_adt.hdf5",
"filtered_adt.h5ad",
"processed_adt.h5ad",
"unfiltered_adt.h5ad",
]

computed_file = cls(
Expand All @@ -436,7 +411,6 @@ def get_sample_single_cell_file(cls, sample, libraries, workflow_versions, file_
s3_bucket=settings.AWS_S3_BUCKET_NAME,
s3_key=file_name,
sample=sample,
type=cls.OutputFileTypes.SAMPLE_ZIP,
workflow_version=utils.join_workflow_versions(workflow_versions),
)

Expand Down Expand Up @@ -484,7 +458,6 @@ def get_sample_spatial_file(cls, sample, libraries, workflow_versions, file_form
s3_bucket=settings.AWS_S3_BUCKET_NAME,
s3_key=sample.output_spatial_computed_file_name,
sample=sample,
type=cls.OutputFileTypes.SAMPLE_SPATIAL_ZIP,
workflow_version=utils.join_workflow_versions(workflow_versions),
)

Expand Down Expand Up @@ -521,19 +494,25 @@ def download_url(self):

@property
def is_project_multiplexed_zip(self):
return self.type == ComputedFile.OutputFileTypes.PROJECT_MULTIPLEXED_ZIP
return (
self.modality == ComputedFile.OutputFileModalities.SINGLE_CELL
and self.has_multiplexed_data
)

@property
def is_project_zip(self):
return self.type == ComputedFile.OutputFileTypes.PROJECT_ZIP
def is_project_single_cell_zip(self):
return (
self.modality == ComputedFile.OutputFileModalities.SINGLE_CELL
and not self.has_multiplexed_data
)

@property
def is_project_spatial_zip(self):
return self.type == ComputedFile.OutputFileTypes.PROJECT_SPATIAL_ZIP
return self.modality == ComputedFile.OutputFileModalities.SPATIAL

@property
def metadata_file_name(self):
if self.is_project_multiplexed_zip or self.is_project_zip:
if self.is_project_multiplexed_zip or self.is_project_single_cell_zip:
return ComputedFile.MetadataFilenames.SINGLE_CELL_METADATA_FILE_NAME
if self.is_project_spatial_zip:
return ComputedFile.MetadataFilenames.SPATIAL_METADATA_FILE_NAME
Expand Down
17 changes: 9 additions & 8 deletions api/scpca_portal/models/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ class Meta:
diagnoses_counts = models.TextField(blank=True, null=True)
disease_timings = models.TextField()
downloadable_sample_count = models.IntegerField(default=0)
has_multiplexed_data = models.BooleanField(default=False)
has_single_cell_data = models.BooleanField(default=False)
has_spatial_data = models.BooleanField(default=False)
human_readable_pi_name = models.TextField()
Expand Down Expand Up @@ -103,7 +102,9 @@ def input_samples_metadata_file_path(self):
def multiplexed_computed_file(self):
try:
return self.project_computed_files.get(
type=ComputedFile.OutputFileTypes.PROJECT_MULTIPLEXED_ZIP
modality=ComputedFile.OutputFileModalities.SINGLE_CELL,
format=ComputedFile.OutputFileFormats.SINGLE_CELL_EXPERIMENT,
has_multiplexed_data=True,
)
except ComputedFile.DoesNotExist:
pass
Expand Down Expand Up @@ -149,8 +150,8 @@ def single_cell_computed_file(self):
try:
return self.project_computed_files.get(
format=ComputedFile.OutputFileFormats.SINGLE_CELL_EXPERIMENT,
modality=ComputedFile.OutputFileModalities.SINGLE_CELL,
includes_merged=False,
type=ComputedFile.OutputFileTypes.PROJECT_ZIP,
)
except ComputedFile.DoesNotExist:
pass
Expand All @@ -160,8 +161,8 @@ def single_cell_merged_computed_file(self):
try:
return self.project_computed_files.get(
format=ComputedFile.OutputFileFormats.SINGLE_CELL_EXPERIMENT,
modality=ComputedFile.OutputFileModalities.SINGLE_CELL,
includes_merged=True,
type=ComputedFile.OutputFileTypes.PROJECT_ZIP,
)
except ComputedFile.DoesNotExist:
pass
Expand All @@ -171,8 +172,8 @@ def single_cell_anndata_computed_file(self):
try:
return self.project_computed_files.get(
format=ComputedFile.OutputFileFormats.ANN_DATA,
modality=ComputedFile.OutputFileModalities.SINGLE_CELL,
includes_merged=False,
type=ComputedFile.OutputFileTypes.PROJECT_ZIP,
)
except ComputedFile.DoesNotExist:
pass
Expand All @@ -182,8 +183,8 @@ def single_cell_anndata_merged_computed_file(self):
try:
return self.project_computed_files.get(
format=ComputedFile.OutputFileFormats.ANN_DATA,
modality=ComputedFile.OutputFileModalities.SINGLE_CELL,
includes_merged=True,
type=ComputedFile.OutputFileTypes.PROJECT_ZIP,
)
except ComputedFile.DoesNotExist:
pass
Expand All @@ -192,7 +193,7 @@ def single_cell_anndata_merged_computed_file(self):
def spatial_computed_file(self):
try:
return self.project_computed_files.get(
type=ComputedFile.OutputFileTypes.PROJECT_SPATIAL_ZIP
modality=ComputedFile.OutputFileModalities.SPATIAL
)
except ComputedFile.DoesNotExist:
pass
Expand Down Expand Up @@ -1010,7 +1011,7 @@ def load_data(self, sample_id=None, **kwargs) -> None:
sample_metadata["has_cite_seq_data"] = has_cite_seq_data
sample_metadata["has_single_cell_data"] = has_single_cell_data
sample_metadata["has_spatial_data"] = has_spatial_data
sample_metadata["includes_anndata"] = len(list(Path(sample_dir).glob("*.hdf5"))) > 0
sample_metadata["includes_anndata"] = len(list(Path(sample_dir).glob("*.h5ad"))) > 0
sample_metadata["sample_cell_count_estimate"] = sample_cell_count_estimate
sample_metadata["seq_units"] = ", ".join(sorted(sample_seq_units, key=str.lower))
sample_metadata["technologies"] = ", ".join(sorted(sample_technologies, key=str.lower))
Expand Down
12 changes: 8 additions & 4 deletions api/scpca_portal/models/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,9 @@ def output_spatial_metadata_file_path(self):
def multiplexed_computed_file(self):
try:
return self.sample_computed_files.get(
type=ComputedFile.OutputFileTypes.SAMPLE_MULTIPLEXED_ZIP
modality=ComputedFile.OutputFileModalities.SINGLE_CELL,
format=ComputedFile.OutputFileFormats.SINGLE_CELL_EXPERIMENT,
has_multiplexed_data=True,
)
except ComputedFile.DoesNotExist:
pass
Expand All @@ -175,7 +177,8 @@ def single_cell_computed_file(self):
try:
return self.sample_computed_files.get(
format=ComputedFile.OutputFileFormats.SINGLE_CELL_EXPERIMENT,
type=ComputedFile.OutputFileTypes.SAMPLE_ZIP,
modality=ComputedFile.OutputFileModalities.SINGLE_CELL,
has_multiplexed_data=False,
)
except ComputedFile.DoesNotExist:
pass
Expand All @@ -185,7 +188,8 @@ def single_cell_anndata_computed_file(self):
try:
return self.sample_computed_files.get(
format=ComputedFile.OutputFileFormats.ANN_DATA,
type=ComputedFile.OutputFileTypes.SAMPLE_ZIP,
modality=ComputedFile.OutputFileModalities.SINGLE_CELL,
has_multiplexed_data=False,
)
except ComputedFile.DoesNotExist:
pass
Expand All @@ -194,7 +198,7 @@ def single_cell_anndata_computed_file(self):
def spatial_computed_file(self):
try:
return self.sample_computed_files.get(
type=ComputedFile.OutputFileTypes.SAMPLE_SPATIAL_ZIP
modality=ComputedFile.OutputFileModalities.SPATIAL
)
except ComputedFile.DoesNotExist:
pass
Expand Down
2 changes: 1 addition & 1 deletion api/scpca_portal/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class Meta:
"format",
"has_bulk_rna_seq",
"has_cite_seq_data",
"has_multiplexed_data",
"id",
"includes_merged",
"modality",
Expand All @@ -38,7 +39,6 @@ class Meta:
"s3_key",
"sample",
"size_in_bytes",
"type",
"updated_at",
"workflow_version",
)
Expand Down
Loading

0 comments on commit 1458dfa

Please sign in to comment.