Skip to content

Commit

Permalink
Merge pull request #931 from AlexsLemonade/feature/multiplexed-metada…
Browse files Browse the repository at this point in the history
…ta-columns

Merge feature/multiplexed-metadata-columns into dev
  • Loading branch information
avrohomgottlieb authored Oct 10, 2024
2 parents dd8fb4c + 41a8cc1 commit 8bbfdb8
Show file tree
Hide file tree
Showing 13 changed files with 88 additions and 78 deletions.
3 changes: 1 addition & 2 deletions api/scpca_portal/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,7 @@
"demux_samples",
"total_reads",
"mapped_reads",
"sample_cell_count_estimate",
"sample_cell_estimate", # ONLY FOR MULTIPLEXED
"demux_cell_count_estimate", # ONLY FOR MULTIPLEXED
"unfiltered_cells",
"filtered_cell_count",
"processed_cells",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 3.2.25 on 2024-10-10 17:43

from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
("scpca_portal", "0052_auto_20240929_1357"),
]

operations = [
migrations.RenameField(
model_name="sample",
old_name="demux_cell_count_estimate",
new_name="demux_cell_count_estimate_sum",
),
]
43 changes: 15 additions & 28 deletions api/scpca_portal/models/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,43 +172,30 @@ def get_sample_libraries_from_download_config(
def get_local_path_from_data_file_path(data_file_path: Path) -> Path:
return settings.INPUT_DATA_PATH / data_file_path

def get_metadata(self) -> Dict:
library_metadata = {
"scpca_library_id": self.scpca_id,
}

def get_metadata(self, demux_cell_count_estimate_id) -> Dict:
excluded_metadata_attributes = {
"scpca_sample_id",
"has_citeseq",
# for multiplexed samples, this is handled at the sample level
"sample_cell_estimates",
}
library_metadata.update(
{
key: value
for key, value in self.metadata.items()
if key not in excluded_metadata_attributes
}
)
library_metadata = {
key: value
for key, value in self.metadata.items()
if key not in excluded_metadata_attributes
}

if self.is_multiplexed:
library_metadata["demux_cell_count_estimate"] = self.metadata["sample_cell_estimates"][
demux_cell_count_estimate_id
]

return library_metadata

def get_combined_library_metadata(self) -> List[Dict]:
combined_metadatas = []
for sample in self.samples.all():
metadata = self.project.get_metadata() | sample.get_metadata() | self.get_metadata()
# Estimate attributes per modality:
# Single Cell: "sample_cell_count_estimate"
# Single Cell Multiplexed: "sample_cell_estimates"
# Spatial: None
if self.modality == Library.Modalities.SPATIAL or self.is_multiplexed:
del metadata["sample_cell_count_estimate"]
if not self.is_multiplexed:
del metadata["sample_cell_estimate"]

combined_metadatas.append(metadata)

return combined_metadatas
return [
self.project.get_metadata() | sample.get_metadata() | self.get_metadata(sample.scpca_id)
for sample in self.samples.all()
]

def get_download_config_file_paths(self, download_config: Dict) -> List[Path]:
"""
Expand Down
5 changes: 2 additions & 3 deletions api/scpca_portal/models/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,9 +392,8 @@ def update_sample_aggregate_properties(self):
"scpca_id", flat=True
)
)
# Sum demux_cell_count_estimate from all related library's
# sample_cell_estimates for that sample.
sample.demux_cell_count_estimate = sum(
# Sum of all related libraries' sample_cell_estimates for that sample.
sample.demux_cell_count_estimate_sum = sum(
library.metadata["sample_cell_estimates"].get(sample.scpca_id, 0)
for library in multiplexed_libraries
)
Expand Down
24 changes: 6 additions & 18 deletions api/scpca_portal/models/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class Modalities:

age = models.TextField()
age_timing = models.TextField()
demux_cell_count_estimate = models.IntegerField(null=True)
demux_cell_count_estimate_sum = models.IntegerField(null=True)
diagnosis = models.TextField(blank=True, null=True)
disease_timing = models.TextField(blank=True, null=True)
has_multiplexed_data = models.BooleanField(default=False)
Expand Down Expand Up @@ -107,29 +107,17 @@ def additional_metadata(self):
}

def get_metadata(self) -> Dict:
sample_metadata = {
"scpca_sample_id": self.scpca_id,
}

excluded_metadata_attributes = {
"scpca_project_id",
"submitter", # included in project metadata under the name pi_name
}
sample_metadata.update(
{
key: value
for key, value in self.metadata.items()
if key not in excluded_metadata_attributes
}
)

derived_attributes = {
"demux_cell_count_estimate",
"sample_cell_count_estimate",
"includes_anndata",
sample_metadata = {
key: value
for key, value in self.metadata.items()
if key not in excluded_metadata_attributes
}
sample_metadata.update({key: getattr(self, key) for key in derived_attributes})
sample_metadata["sample_cell_estimate"] = sample_metadata.pop("demux_cell_count_estimate")
sample_metadata["includes_anndata"] = self.includes_anndata

return sample_metadata

Expand Down
2 changes: 1 addition & 1 deletion api/scpca_portal/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ class Meta:
"age_timing",
"computed_files",
"created_at",
"demux_cell_count_estimate",
"demux_cell_count_estimate_sum",
"diagnosis",
"disease_timing",
"has_bulk_rna_seq",
Expand Down
12 changes: 6 additions & 6 deletions api/scpca_portal/test/expected_values/computed_file_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class SINGLE_CELL_SCE:
"metadata_only": False,
"s3_bucket": settings.AWS_S3_OUTPUT_BUCKET_NAME,
"s3_key": "SCPCP999990_SINGLE-CELL_SINGLE-CELL-EXPERIMENT.zip",
"size_in_bytes": 9077,
"size_in_bytes": 9040,
"workflow_version": "development",
"includes_celltype_report": True,
}
Expand Down Expand Up @@ -70,7 +70,7 @@ class SINGLE_CELL_SCE_MULTIPLEXED:
"metadata_only": False,
"s3_bucket": settings.AWS_S3_OUTPUT_BUCKET_NAME,
"s3_key": "SCPCP999991_SINGLE-CELL_SINGLE-CELL-EXPERIMENT_MULTIPLEXED.zip",
"size_in_bytes": 9511,
"size_in_bytes": 9478,
"workflow_version": "development",
"includes_celltype_report": True,
}
Expand Down Expand Up @@ -102,7 +102,7 @@ class SINGLE_CELL_SCE_MERGED:
"metadata_only": False,
"s3_bucket": settings.AWS_S3_OUTPUT_BUCKET_NAME,
"s3_key": "SCPCP999990_SINGLE-CELL_SINGLE-CELL-EXPERIMENT_MERGED.zip",
"size_in_bytes": 8474,
"size_in_bytes": 8437,
"workflow_version": "development",
"includes_celltype_report": True,
}
Expand Down Expand Up @@ -138,7 +138,7 @@ class SINGLE_CELL_ANN_DATA:
"metadata_only": False,
"s3_bucket": settings.AWS_S3_OUTPUT_BUCKET_NAME,
"s3_key": "SCPCP999990_SINGLE-CELL_ANN-DATA.zip",
"size_in_bytes": 9492,
"size_in_bytes": 9455,
"workflow_version": "development",
"includes_celltype_report": True,
}
Expand Down Expand Up @@ -170,7 +170,7 @@ class SINGLE_CELL_ANN_DATA_MERGED:
"metadata_only": False,
"s3_bucket": settings.AWS_S3_OUTPUT_BUCKET_NAME,
"s3_key": "SCPCP999990_SINGLE-CELL_ANN-DATA_MERGED.zip",
"size_in_bytes": 8600,
"size_in_bytes": 8563,
"workflow_version": "development",
"includes_celltype_report": True,
}
Expand Down Expand Up @@ -229,7 +229,7 @@ class ALL_METADATA:
"metadata_only": True,
"s3_bucket": settings.AWS_S3_OUTPUT_BUCKET_NAME,
"s3_key": "SCPCP999990_ALL_METADATA.zip",
"size_in_bytes": 5185,
"size_in_bytes": 5145,
"workflow_version": "development",
"includes_celltype_report": True,
}
6 changes: 3 additions & 3 deletions api/scpca_portal/test/expected_values/computed_file_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class SINGLE_CELL_SCE:
"metadata_only": False,
"s3_bucket": settings.AWS_S3_OUTPUT_BUCKET_NAME,
"s3_key": "SCPCS999990_SINGLE-CELL_SINGLE-CELL-EXPERIMENT.zip",
"size_in_bytes": 7089,
"size_in_bytes": 7057,
"workflow_version": "development",
"includes_celltype_report": True,
}
Expand Down Expand Up @@ -60,7 +60,7 @@ class SINGLE_CELL_ANN_DATA:
"metadata_only": False,
"s3_bucket": settings.AWS_S3_OUTPUT_BUCKET_NAME,
"s3_key": "SCPCS999990_SINGLE-CELL_ANN-DATA.zip",
"size_in_bytes": 7401,
"size_in_bytes": 7369,
"workflow_version": "development",
"includes_celltype_report": True,
}
Expand Down Expand Up @@ -129,7 +129,7 @@ class MULTIPLEXED_SINGLE_CELL_SCE:
"metadata_only": False,
"s3_bucket": settings.AWS_S3_OUTPUT_BUCKET_NAME,
"s3_key": "SCPCS999992-SCPCS999993_SINGLE-CELL_SINGLE-CELL-EXPERIMENT_MULTIPLEXED.zip",
"size_in_bytes": 7145,
"size_in_bytes": 7150,
"workflow_version": "development",
"includes_celltype_report": True,
}
8 changes: 4 additions & 4 deletions api/scpca_portal/test/expected_values/project_SCPCP999990.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class Sample_SCPCS999990:
VALUES = {
"age": "2",
"age_timing": "diagnosis",
"demux_cell_count_estimate": None,
"demux_cell_count_estimate_sum": None,
"diagnosis": "diagnosis1",
"disease_timing": "Initial diagnosis",
"has_bulk_rna_seq": False,
Expand All @@ -82,7 +82,7 @@ class Sample_SCPCS999991:
VALUES = {
"age": "2",
"age_timing": "collection",
"demux_cell_count_estimate": None,
"demux_cell_count_estimate_sum": None,
"diagnosis": "diagnosis2",
"disease_timing": "Initial diagnosis",
"has_bulk_rna_seq": False,
Expand All @@ -109,7 +109,7 @@ class Sample_SCPCS999994:
VALUES = {
"age": "2",
"age_timing": "collection",
"demux_cell_count_estimate": None,
"demux_cell_count_estimate_sum": None,
"diagnosis": "diagnosis5",
"disease_timing": "Initial diagnosis",
"has_bulk_rna_seq": True,
Expand All @@ -136,7 +136,7 @@ class Sample_SCPCS999997:
VALUES = {
"age": "2",
"age_timing": "collection",
"demux_cell_count_estimate": None,
"demux_cell_count_estimate_sum": None,
"diagnosis": "diagnosis5",
"disease_timing": "Initial diagnosis",
"has_bulk_rna_seq": False,
Expand Down
6 changes: 3 additions & 3 deletions api/scpca_portal/test/expected_values/project_SCPCP999991.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class Sample_SCPCS999992:
VALUES = {
"age": "2",
"age_timing": "unknown",
"demux_cell_count_estimate": 0,
"demux_cell_count_estimate_sum": 0,
"diagnosis": "diagnosis4",
"disease_timing": "Initial diagnosis",
"has_bulk_rna_seq": False,
Expand All @@ -75,7 +75,7 @@ class Sample_SCPCS999993:
VALUES = {
"age": "2",
"age_timing": "diagnosis",
"demux_cell_count_estimate": 0,
"demux_cell_count_estimate_sum": 0,
"diagnosis": "diagnosis3",
"disease_timing": "Initial diagnosis",
"has_bulk_rna_seq": False,
Expand All @@ -102,7 +102,7 @@ class Sample_SCPCS999995:
VALUES = {
"age": "2",
"age_timing": "unknown",
"demux_cell_count_estimate": None,
"demux_cell_count_estimate_sum": None,
"diagnosis": "diagnosis6",
"disease_timing": "Initial diagnosis",
"has_bulk_rna_seq": False,
Expand Down
4 changes: 2 additions & 2 deletions api/scpca_portal/test/expected_values/project_SCPCP999992.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class Sample_SCPCS999996:
VALUES = {
"age": "2",
"age_timing": "diagnosis",
"demux_cell_count_estimate": None,
"demux_cell_count_estimate_sum": None,
"diagnosis": "diagnosis7",
"disease_timing": "Initial diagnosis",
"has_bulk_rna_seq": False,
Expand All @@ -78,7 +78,7 @@ class Sample_SCPCS999998:
VALUES = {
"age": "2",
"age_timing": "unknown",
"demux_cell_count_estimate": None,
"demux_cell_count_estimate_sum": None,
"diagnosis": "diagnosis7",
"disease_timing": "Initial diagnosis",
"has_bulk_rna_seq": False,
Expand Down
31 changes: 25 additions & 6 deletions api/scpca_portal/test/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,15 +134,34 @@ class Meta:
workflow_version = "development"
# With factory_body, factory instances share attributes by default
# Use LazyFunction to populate metadata dict so that changes don't propogate to all instances
metadata = factory.LazyFunction(
lambda: {
"technology": "10Xv3.1",
"seq_unit": "nucleus",
"is_multiplexed": True,
metadata = factory.LazyAttribute(
lambda library_obj: {
"scpca_library_id": library_obj.scpca_id,
"scpca_sample_id": "SCPCS000000",
"technology": "10Xv3",
"seq_unit": "cell",
"is_multiplexed": False,
"has_citeseq": False,
"has_cellhash": True,
"has_cellhash": False,
"processed_cells": 2633,
"filtered_cells": 3424,
"unfiltered_cells": 61980,
"droplet_filtering_method": "emptyDropsCellRanger",
"total_reads": 121894873,
"mapped_reads": 90729577,
"genome_assembly": "Homo_sapiens.GRCh38.104",
"mapping_index": "Homo_sapiens.GRCh38.104.spliced_intron.txome",
"transcript_type": "total;spliced",
"cell_filtering_method": "miQC",
"normalization_method": "deconvolution",
"min_gene_cutoff": 200,
"prob_compromised_cutoff": 0.75,
"date_processed": "2024-09-10T17:11:52+0000",
"salmon_version": "1.9.0",
"alevin_fry_version": "0.7.0",
"workflow": "https://github.com/AlexsLemonade/scpca-nf",
"workflow_version": "development",
"workflow_commit": "319b074caf152f68e6f0bac58af5bcf4481eba2d",
}
)

Expand Down
4 changes: 2 additions & 2 deletions client/src/components/ProjectSamplesTable.js
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ export const ProjectSamplesTable = ({
accessor: ({ sample_cell_count_estimate: count }) => count || 'N/A'
},
{
id: 'demux_cell_count_estimate',
id: 'demux_cell_count_estimate_sum',
Header: () => (
<Box direction="row" align="center">
Est. Demux Sample Counts&nbsp;
Expand All @@ -204,7 +204,7 @@ export const ProjectSamplesTable = ({
&nbsp;&nbsp;
</Box>
),
accessor: ({ demux_cell_count_estimate: count }) => count || 'N/A',
accessor: ({ demux_cell_count_estimate_sum: count }) => count || 'N/A',
isVisible: hasMultiplexedData
},
{
Expand Down

0 comments on commit 8bbfdb8

Please sign in to comment.