Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

903 - Update Test Bucket to 2024-09-10 #908

Merged
merged 4 commits into from
Sep 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion api/scpca_portal/config/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class Local(Common):
AWS_REGION = None

# AWS S3
TEST_INPUT_BUCKET_NAME = "scpca-portal-public-test-inputs/2024-07-19/"
TEST_INPUT_BUCKET_NAME = "scpca-portal-public-test-inputs/2024-09-10/"
AWS_S3_INPUT_BUCKET_NAME = TEST_INPUT_BUCKET_NAME if Common.TEST else "scpca-portal-inputs"
AWS_S3_OUTPUT_BUCKET_NAME = "scpca-local-data"

Expand Down
13 changes: 13 additions & 0 deletions api/scpca_portal/metadata_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
("citation_doi", "doi", None),
]

PROJECT_METADATA_VALUES_TRANSFORMS = {"diagnoses": lambda d: ", ".join(sorted(d.split(";")))}

LIBRARY_METADATA_KEYS = [
("library_id", "scpca_library_id", None),
("sample_id", "scpca_sample_id", None),
Expand All @@ -48,6 +50,7 @@ def load_projects_metadata(*, filter_on_project_id: str = None):

for project_metadata in projects_metadata:
transform_keys(project_metadata, PROJECT_METADATA_KEYS)
transform_values(project_metadata, PROJECT_METADATA_VALUES_TRANSFORMS)

if filter_on_project_id:
return [pm for pm in projects_metadata if pm["scpca_project_id"] == filter_on_project_id]
Expand Down Expand Up @@ -84,6 +87,16 @@ def transform_keys(data_dict: Dict, key_transforms: List[Tuple]):
return data_dict


def transform_values(data_dict: Dict, value_transforms: List[Tuple]):
"""
Transform values in data dict according to transformation functions in value transform dict.
"""
for key, transformation_function in value_transforms.items():
data_dict[key] = transformation_function(data_dict[key])

return data_dict


class MetadataFilenames:
SINGLE_CELL_METADATA_FILE_NAME = "single_cell_metadata.tsv"
SPATIAL_METADATA_FILE_NAME = "spatial_metadata.tsv"
Expand Down
3 changes: 0 additions & 3 deletions api/scpca_portal/models/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,6 @@ def update_project_aggregate_properties(self):
"""

additional_metadata_keys = set()
diagnoses = set()
diagnoses_counts = Counter()
disease_timings = set()
modalities = set()
Expand All @@ -457,7 +456,6 @@ def update_project_aggregate_properties(self):

for sample in self.samples.all():
additional_metadata_keys.update(sample.additional_metadata.keys())
diagnoses.add(sample.diagnosis)
diagnoses_counts.update({sample.diagnosis: 1})
disease_timings.add(sample.disease_timing)
modalities.update(sample.modalities)
Expand Down Expand Up @@ -487,7 +485,6 @@ def update_project_aggregate_properties(self):
additional_metadata_keys.remove("multiplexed_with")

self.additional_metadata_keys = ", ".join(sorted(additional_metadata_keys, key=str.lower))
self.diagnoses = ", ".join(sorted(diagnoses))
self.diagnoses_counts = ", ".join(diagnoses_strings)
self.disease_timings = ", ".join(disease_timings)
self.modalities = sorted(modalities)
Expand Down
12 changes: 6 additions & 6 deletions api/scpca_portal/test/expected_values/project_SCPCP999990.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ class Project_SCPCP999990:
"SCPCP999990/bulk/SCPCP999990_bulk_metadata.tsv",
"SCPCP999990/bulk/SCPCP999990_bulk_quant.tsv",
],
"diagnoses": "diagnosis1, diagnosis2, diagnosis5, diagnosis8",
"diagnoses_counts": "diagnosis1 (1), diagnosis2 (1), diagnosis5 (1), diagnosis8 (1)",
"diagnoses": "diagnosis1, diagnosis2, diagnosis5",
"diagnoses_counts": "diagnosis1 (1), diagnosis2 (1), diagnosis5 (2)",
"disease_timings": "Initial diagnosis",
# This value is not determined until after computed file generation, and should be 3
"downloadable_sample_count": 0,
Expand Down Expand Up @@ -67,7 +67,7 @@ class Sample_SCPCS999990:
"is_cell_line": False,
"is_xenograft": False,
"multiplexed_with": [],
"sample_cell_count_estimate": 3432,
"sample_cell_count_estimate": 3424,
"scpca_id": SCPCA_ID,
"sex": "M",
"seq_units": "cell",
Expand Down Expand Up @@ -137,7 +137,7 @@ class Sample_SCPCS999997:
"age": "2",
"age_timing": "collection",
"demux_cell_count_estimate": None,
"diagnosis": "diagnosis8",
"diagnosis": "diagnosis5",
"disease_timing": "Initial diagnosis",
"has_bulk_rna_seq": False,
"has_cite_seq_data": False,
Expand All @@ -148,7 +148,7 @@ class Sample_SCPCS999997:
"is_cell_line": False,
"is_xenograft": False,
"multiplexed_with": [],
"sample_cell_count_estimate": 1568,
"sample_cell_count_estimate": 1570,
"scpca_id": SCPCA_ID,
"sex": "M",
"seq_units": "cell",
Expand Down Expand Up @@ -260,7 +260,7 @@ class Summary3:

class Summary4:
VALUES = {
"diagnosis": "diagnosis8",
"diagnosis": "diagnosis5",
"sample_count": 1,
"seq_unit": "cell",
"technology": "10Xv3",
Expand Down
10 changes: 5 additions & 5 deletions api/scpca_portal/test/expected_values/project_SCPCP999991.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class Sample_SCPCS999992:
"age": "2",
"age_timing": "unknown",
"demux_cell_count_estimate": 0,
"diagnosis": "diagnosis3",
"diagnosis": "diagnosis4",
"disease_timing": "Initial diagnosis",
"has_bulk_rna_seq": False,
"has_cite_seq_data": False,
Expand All @@ -76,7 +76,7 @@ class Sample_SCPCS999993:
"age": "2",
"age_timing": "diagnosis",
"demux_cell_count_estimate": 0,
"diagnosis": "diagnosis4",
"diagnosis": "diagnosis3",
"disease_timing": "Initial diagnosis",
"has_bulk_rna_seq": False,
"has_cite_seq_data": False,
Expand Down Expand Up @@ -114,7 +114,7 @@ class Sample_SCPCS999995:
"is_cell_line": False,
"is_xenograft": False,
"multiplexed_with": [],
"sample_cell_count_estimate": 3433,
"sample_cell_count_estimate": 3428,
"scpca_id": SCPCA_ID,
"sex": "M",
"seq_units": "cell",
Expand Down Expand Up @@ -167,15 +167,15 @@ class Library_SCPCL999995:

class Summary1:
VALUES = {
"diagnosis": "diagnosis3",
"diagnosis": "diagnosis4",
"sample_count": 1,
"seq_unit": "nucleus",
"technology": "10Xv3.1",
}

class Summary2:
VALUES = {
"diagnosis": "diagnosis4",
"diagnosis": "diagnosis3",
"sample_count": 1,
"seq_unit": "nucleus",
"technology": "10Xv3.1",
Expand Down
10 changes: 5 additions & 5 deletions api/scpca_portal/test/expected_values/project_SCPCP999992.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ class Project_SCPCP999992:
"SCPCP999992/merged/SCPCP999992_merged_adt.h5ad",
"SCPCP999992/merged/SCPCP999992_merged_rna.h5ad",
],
"diagnoses": "diagnosis7, diagnosis9",
"diagnoses_counts": "diagnosis7 (1), diagnosis9 (1)",
"diagnoses": "diagnosis7",
"diagnoses_counts": "diagnosis7 (2)",
"disease_timings": "Initial diagnosis",
# This value is not determined until after computed file generation, and should be 2
"downloadable_sample_count": 0,
Expand Down Expand Up @@ -79,7 +79,7 @@ class Sample_SCPCS999998:
"age": "2",
"age_timing": "unknown",
"demux_cell_count_estimate": None,
"diagnosis": "diagnosis9",
"diagnosis": "diagnosis7",
"disease_timing": "Initial diagnosis",
"has_bulk_rna_seq": False,
"has_cite_seq_data": True,
Expand All @@ -90,7 +90,7 @@ class Sample_SCPCS999998:
"is_cell_line": False,
"is_xenograft": False,
"multiplexed_with": [],
"sample_cell_count_estimate": 5245,
"sample_cell_count_estimate": 5247,
"scpca_id": SCPCA_ID,
"sex": "M",
"seq_units": "cell",
Expand Down Expand Up @@ -161,7 +161,7 @@ class Summary1:

class Summary2:
VALUES = {
"diagnosis": "diagnosis9",
"diagnosis": "diagnosis7",
"sample_count": 1,
"seq_unit": "cell",
"technology": "10Xv2_5prime",
Expand Down
2 changes: 1 addition & 1 deletion api/scpca_portal/test/views/test_filter_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def test_get(self):
self.assertEqual(response.status_code, status.HTTP_200_OK)

response = response.json()
self.assertEqual(len(response["diagnoses"]), 4)
self.assertEqual(len(response["diagnoses"]), 2)
self.assertEqual(len(response["modalities"]), 1) # CITE-seq only.
self.assertEqual(len(response["seq_units"]), 2)
self.assertEqual(len(response["technologies"]), 5)
Loading