diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8a7034f..8934385 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -15,3 +15,11 @@ as well as set up an auto-formatter commit hook. - Don't depend on `core__` tables. - Allows folks to build this study even if they can't or haven't built `core` - Allows `core` to smooth over data oddities we might be interested in + +- For non-CUBE tables (like quality `q_` metric summaries or `c_resources_per_pt`), + always try to have a `cumulus__all` row for the second grouping column, + as well as the normal row -- which can hold the group value, + be NULL if no grouping exists, or maybe `cumulus__none` if the row + represents all resources without a group value. + This way, downstream visualizers can reliably roll-up detailed views into + more abstract views. diff --git a/cumulus_library_data_metrics/base.py b/cumulus_library_data_metrics/base.py index 9f703a0..4cb590c 100644 --- a/cumulus_library_data_metrics/base.py +++ b/cumulus_library_data_metrics/base.py @@ -19,6 +19,11 @@ def __init__(self): super().__init__() self.display_text = f"Creating {self.name} tables…" self.summary_entries = {} + # A "group" is a value in the second column - a secondary characteristic like "field" + # (group examples: "code", "valueCodeableConcept") or a stratifier like "profile" + # (group examples: "Lab", "Note"). These will "roll-up" to the resource level in the + # summary with a "cumulus__all" group entry row. + self.summary_groups = {} self.queries = [] self.schemas = {} @@ -26,32 +31,34 @@ def __init__(self): # some of them are not present (notably DocRef.context.period.start). self.date_fields = copy.deepcopy(resource_info.DATES) - def make_table_fragment(self, src: str, stratifier: str | None = None): + def make_table_fragment(self, src: str, group: str | None = None): key = src.lower() - if stratifier: - key += f"_{stratifier.lower().replace(' ', '_')}" + if group: + key += f"_{group.lower().replace(' ', '_')}" return key def add_summary_entry( - self, src: str, stratifier: str | None = None, *, denominator: str | None = None + self, src: str, group: str | None = None, *, denominator: str | None = None ) -> None: # These are all flags for the summary-table-builder jinja. - key = self.make_table_fragment(src, stratifier) + key = self.make_table_fragment(src, group) self.summary_entries[key] = { "src": src, - "stratifier": stratifier, + "group": group, "denominator": denominator, } + self.summary_groups.setdefault(src, set()).add(group) - def make_summary(self, stratifier_column: str | None = None) -> None: + def make_summary(self, group_column: str | None = None) -> None: """Makes a summary table, from all the individual metric tables""" # Always define *something* even if we don't use it, so that consuming visualizations - # can assume a consistent two-column definition of resource + stratifier. - stratifier_column = stratifier_column or "stratifier" + # can assume a consistent two-column definition of resource + group. + group_column = group_column or "subgroup" sql = self.render_sql( "../base.summary", entries=self.summary_entries, - stratifier_column=stratifier_column, + group_column=group_column, + group_values=self.summary_groups, metric=self.name, ) self.queries.append(sql) diff --git a/cumulus_library_data_metrics/base.summary.jinja b/cumulus_library_data_metrics/base.summary.jinja index ee360c3..691e7f7 100644 --- a/cumulus_library_data_metrics/base.summary.jinja +++ b/cumulus_library_data_metrics/base.summary.jinja @@ -2,28 +2,74 @@ CREATE TABLE data_metrics__{{ metric }}_summary AS ( WITH {%- for entry_key, summary_info in entries.items() %} {{ entry_key }}_numerator AS ( - SELECT '{{ entry_key }}' AS entry_key, COUNT(id) AS numerator + SELECT COUNT(id) AS numerator FROM data_metrics__{{ metric }}_{{ entry_key }} ), {{ entry_key }}_denominator AS ( - {% if summary_info["denominator"] and summary_info["denominator"].strip() %} + {% if summary_info["denominator"] %} WITH denominator_slice AS {{ summary_info["denominator"] }} {% else %} WITH denominator_slice AS (SELECT id from {{ summary_info["src"] }}) {% endif %} - SELECT '{{ entry_key }}' AS entry_key, COUNT(id) AS denominator + SELECT COUNT(id) AS denominator FROM denominator_slice ), {%- endfor %} + -- Prepare roll-up "all resources" entries by unifying numerators and grabbing distinct IDs + {% for resource, groups in group_values.items() %} + {% set all_entry_key = resource.lower() + "_all" %} + + {{ all_entry_key }}_numerator_ids AS ( + {% for group_entry_key, summary_info in entries.items() %} + {% if summary_info["src"] == resource %} + SELECT id FROM data_metrics__{{ metric }}_{{ group_entry_key }} + UNION + {% endif %} + {% endfor %} + SELECT id FROM {{ resource }} WHERE 1=0 -- empty table to finish loop + ), + {{ all_entry_key }}_numerator AS ( + SELECT COUNT(DISTINCT id) AS numerator FROM {{ all_entry_key }}_numerator_ids + ), + + -- And now the denominators + {% set has_custom_denominator = false %} + {% for entry_key, summary_info in entries.items() %} + {% if summary_info["src"] == resource and summary_info["denominator"] %} + {% set has_custom_denominator = true %} + {% endif %} + {% endfor %} + {% if has_custom_denominator %} + {{ all_entry_key }}_denominator_ids AS ( + {% for group_entry_key, summary_info in entries.items() %} + {% if summary_info["src"] == resource %} + SELECT id FROM {{ summary_info["denominator"] }} + UNION + {% endif %} + {% endfor %} + SELECT id FROM {{ resource }} WHERE 1=0 -- empty table to finish loop + ), + {{ all_entry_key }}_denominator AS ( + SELECT COUNT(DISTINCT id) AS denominator FROM {{ all_entry_key }}_denominator_ids + ), + {% else %} + {{ all_entry_key }}_denominator AS ( + SELECT COUNT(id) AS denominator FROM {{ resource }} + ), + {% endif %} + + {% set _ = entries.update({all_entry_key: {"src": resource, "group": "cumulus__all"} }) %} + {% endfor %} + union_table AS ( {%- for entry_key, summary_info in entries.items() %} SELECT '{{ summary_info["src"] }}' AS resource, - {% if summary_info["stratifier"] %} - '{{ summary_info["stratifier"] }}' AS {{ stratifier_column }}, + {% if summary_info["group"] %} + '{{ summary_info["group"] }}' AS {{ group_column }}, {% else %} - CAST(NULL AS VARCHAR) AS {{ stratifier_column }}, + CAST(NULL AS VARCHAR) AS {{ group_column }}, {% endif %} numerator, denominator, @@ -35,8 +81,7 @@ CREATE TABLE data_metrics__{{ metric }}_summary AS ( END AS DECIMAL(5, 2)) AS percentage FROM {{ entry_key }}_numerator AS numerator_table - INNER JOIN {{ entry_key }}_denominator AS denominator_table - ON numerator_table.entry_key = denominator_table.entry_key + CROSS JOIN {{ entry_key }}_denominator AS denominator_table {%- if not loop.last %} UNION {%- endif -%} diff --git a/cumulus_library_data_metrics/c_resources_per_pt/c_resources_per_pt.jinja b/cumulus_library_data_metrics/c_resources_per_pt/c_resources_per_pt.jinja index 4ca3ad8..574abd1 100644 --- a/cumulus_library_data_metrics/c_resources_per_pt/c_resources_per_pt.jinja +++ b/cumulus_library_data_metrics/c_resources_per_pt/c_resources_per_pt.jinja @@ -72,7 +72,19 @@ SELECT MAX(num_resources) AS "max" FROM summed_counts UNION +-- duplicate the above for consistency with flagging +-- "no category available" situations via a NULL category +SELECT + 'cumulus__all' AS resource, + CAST(NULL AS VARCHAR) AS category, + CAST(AVG(num_resources) AS DECIMAL(18, 2)) AS average, + CAST(STDDEV_POP(num_resources) AS DECIMAL(18, 2)) AS std_dev, + MAX(num_resources) AS "max" +FROM summed_counts +UNION {%- for resource in patient_fields %} +-- If you change this cumulus_all block, also change its +-- duplicate block below where we use NULL as the category. SELECT '{{ resource }}' AS resource, 'cumulus__all' AS category, @@ -97,6 +109,22 @@ AND counts.patient = patient_refs.ref AND counts.category = cat_values.category GROUP BY cat_values.category HAVING MAX(num_resources) > 0 +{% else %} +-- copy of cumulus__all above, but with a NULL category, +-- for consistency: there's always a roll-up and a detailed row, +-- and this is how we treat other "no categorization available" +-- metrics like rows in q_date_recent. +-- Note how this is different than "cumulus__none" here, because +-- there just isn't a categorization we even look for. +UNION +SELECT + '{{ resource }}' AS resource, + CAST(NULL AS VARCHAR) AS category, + CAST(AVG(num_resources) AS DECIMAL(18, 2)) AS average, + CAST(STDDEV_POP(num_resources) AS DECIMAL(18, 2)) AS std_dev, + MAX(num_resources) AS "max" +FROM summed_{{ resource|lower }}_counts +WHERE resource = '{{ resource }}' {% endif %} {%- if not loop.last %} UNION diff --git a/cumulus_library_data_metrics/q_ref_target_pop/q_ref_target_pop.py b/cumulus_library_data_metrics/q_ref_target_pop/q_ref_target_pop.py index c5091cb..f093103 100644 --- a/cumulus_library_data_metrics/q_ref_target_pop/q_ref_target_pop.py +++ b/cumulus_library_data_metrics/q_ref_target_pop/q_ref_target_pop.py @@ -24,4 +24,4 @@ def add_metric_queries(self) -> None: self.make_table(src="MedicationRequest", dest="Patient", field="subject") self.make_table(src="Observation", dest="Patient", field="subject") self.make_table(src="Procedure", dest="Patient", field="subject") - self.make_summary(stratifier_column="target") + self.make_summary(group_column="target") diff --git a/cumulus_library_data_metrics/q_ref_target_valid/q_ref_target_valid.py b/cumulus_library_data_metrics/q_ref_target_valid/q_ref_target_valid.py index f99cfd0..440c24e 100644 --- a/cumulus_library_data_metrics/q_ref_target_valid/q_ref_target_valid.py +++ b/cumulus_library_data_metrics/q_ref_target_valid/q_ref_target_valid.py @@ -47,4 +47,4 @@ def add_metric_queries(self) -> None: self.make_table(src="Observation", dest="Encounter", field="encounter") self.make_table(src="Procedure", dest="Patient", field="subject") self.make_table(src="Procedure", dest="Encounter", field="encounter") - self.make_summary(stratifier_column="target") + self.make_summary(group_column="target") diff --git a/cumulus_library_data_metrics/q_system_use/q_system_use.py b/cumulus_library_data_metrics/q_system_use/q_system_use.py index cd10b3c..391119b 100644 --- a/cumulus_library_data_metrics/q_system_use/q_system_use.py +++ b/cumulus_library_data_metrics/q_system_use/q_system_use.py @@ -73,4 +73,4 @@ def add_metric_queries(self) -> None: "http://www.cms.gov/Medicare/Coding/ICD10", ], ) - self.make_summary(stratifier_column="field") + self.make_summary(group_column="field") diff --git a/cumulus_library_data_metrics/q_valid_us_core_v4/q_valid_us_core_v4.py b/cumulus_library_data_metrics/q_valid_us_core_v4/q_valid_us_core_v4.py index 8f79b71..1a5d51b 100644 --- a/cumulus_library_data_metrics/q_valid_us_core_v4/q_valid_us_core_v4.py +++ b/cumulus_library_data_metrics/q_valid_us_core_v4/q_valid_us_core_v4.py @@ -19,4 +19,4 @@ def make_table(self, **kwargs) -> None: def add_metric_queries(self) -> None: super().add_metric_queries() - self.make_summary(stratifier_column="profile") + self.make_summary(group_column="profile") diff --git a/tests/data/c_resources_per_pt/general/expected_summary.csv b/tests/data/c_resources_per_pt/general/expected_summary.csv index 80d3359..f561cb4 100644 --- a/tests/data/c_resources_per_pt/general/expected_summary.csv +++ b/tests/data/c_resources_per_pt/general/expected_summary.csv @@ -1,6 +1,9 @@ resource,category,average,std_dev,max +cumulus__all,,3.00,0.82,4 cumulus__all,cumulus__all,3.00,0.82,4 +ServiceRequest,,0.00,0.00,0 ServiceRequest,cumulus__all,0.00,0.00,0 +Procedure,,0.00,0.00,0 Procedure,cumulus__all,0.00,0.00,0 Observation,cumulus__all,0.00,0.00,0 MedicationRequest,outpatient,1.33,0.47,2 @@ -8,10 +11,12 @@ MedicationRequest,inpatient,0.33,0.47,1 MedicationRequest,discharge; inpatient,0.33,0.47,1 MedicationRequest,cumulus__none,0.33,0.47,1 MedicationRequest,cumulus__all,2.33,1.25,4 +Immunization,,0.00,0.00,0 Immunization,cumulus__all,0.00,0.00,0 Encounter,cumulus__all,0.00,0.00,0 DocumentReference,cumulus__all,0.00,0.00,0 DiagnosticReport,cumulus__all,0.00,0.00,0 +Device,,0.00,0.00,0 Device,cumulus__all,0.00,0.00,0 Condition,cumulus__none,0.67,0.47,1 Condition,cumulus__all,0.67,0.47,1 diff --git a/tests/data/q_date_recent/general/expected_summary.csv b/tests/data/q_date_recent/general/expected_summary.csv index 3d59982..567826c 100644 --- a/tests/data/q_date_recent/general/expected_summary.csv +++ b/tests/data/q_date_recent/general/expected_summary.csv @@ -1,10 +1,19 @@ -resource,stratifier,numerator,denominator,percentage +resource,subgroup,numerator,denominator,percentage Procedure,,2,5,40.00 +Procedure,cumulus__all,2,5,40.00 Observation,,0,0,0.00 +Observation,cumulus__all,0,0,0.00 MedicationRequest,,0,0,0.00 +MedicationRequest,cumulus__all,0,0,0.00 Immunization,,0,0,0.00 +Immunization,cumulus__all,0,0,0.00 Encounter,,1,4,25.00 +Encounter,cumulus__all,1,4,25.00 DocumentReference,,0,0,0.00 +DocumentReference,cumulus__all,0,0,0.00 DiagnosticReport,,0,0,0.00 +DiagnosticReport,cumulus__all,0,0,0.00 Condition,,2,4,50.00 +Condition,cumulus__all,2,4,50.00 AllergyIntolerance,,0,0,0.00 +AllergyIntolerance,cumulus__all,0,0,0.00 diff --git a/tests/data/q_ref_target_pop/general/expected_summary.csv b/tests/data/q_ref_target_pop/general/expected_summary.csv index 3ac98db..385529f 100644 --- a/tests/data/q_ref_target_pop/general/expected_summary.csv +++ b/tests/data/q_ref_target_pop/general/expected_summary.csv @@ -1,13 +1,23 @@ resource,target,numerator,denominator,percentage # Various edge cases here in procedure +Procedure,cumulus__all,1,4,25.00 Procedure,Patient,1,4,25.00 # Rest are just short happy-path checks to confirm that we look at the right json field +Observation,cumulus__all,0,1,0.00 Observation,Patient,0,1,0.00 +MedicationRequest,cumulus__all,0,1,0.00 MedicationRequest,Patient,0,1,0.00 +Immunization,cumulus__all,0,1,0.00 Immunization,Patient,0,1,0.00 +Encounter,cumulus__all,0,1,0.00 Encounter,Patient,0,1,0.00 +DocumentReference,cumulus__all,0,1,0.00 DocumentReference,Patient,0,1,0.00 +DiagnosticReport,cumulus__all,0,1,0.00 DiagnosticReport,Patient,0,1,0.00 +Device,cumulus__all,0,1,0.00 Device,Patient,0,1,0.00 +Condition,cumulus__all,0,1,0.00 Condition,Patient,0,1,0.00 +AllergyIntolerance,cumulus__all,0,1,0.00 AllergyIntolerance,Patient,0,1,0.00 diff --git a/tests/data/q_ref_target_valid/general/expected_procedure_encounter.csv b/tests/data/q_ref_target_valid/general/expected_procedure_encounter.csv index 01ec6d9..ce5d781 100644 --- a/tests/data/q_ref_target_valid/general/expected_procedure_encounter.csv +++ b/tests/data/q_ref_target_valid/general/expected_procedure_encounter.csv @@ -1,2 +1,3 @@ id,status,target bad-encounter,,Encounter/Nope +bad-both,,Encounter/Nope diff --git a/tests/data/q_ref_target_valid/general/expected_summary.csv b/tests/data/q_ref_target_valid/general/expected_summary.csv index 89c70fb..1b79417 100644 --- a/tests/data/q_ref_target_valid/general/expected_summary.csv +++ b/tests/data/q_ref_target_valid/general/expected_summary.csv @@ -1,22 +1,32 @@ resource,target,numerator,denominator,percentage # Various edge cases here in procedure -Procedure,Patient,1,9,11.11 -Procedure,Encounter,1,9,11.11 +Procedure,cumulus__all,3,10,30.00 +Procedure,Patient,2,10,20.00 +Procedure,Encounter,2,10,20.00 # Rest are just short happy-path checks to confirm that we look at the right json field +Observation,cumulus__all,0,1,0.00 Observation,Patient,0,1,0.00 Observation,Encounter,0,1,0.00 +MedicationRequest,cumulus__all,0,1,0.00 MedicationRequest,Patient,0,1,0.00 MedicationRequest,Encounter,0,1,0.00 +Immunization,cumulus__all,0,1,0.00 Immunization,Patient,0,1,0.00 Immunization,Encounter,0,1,0.00 +Encounter,cumulus__all,0,1,0.00 Encounter,Patient,0,1,0.00 # Except DocRefs also have some extra cases around encounter array support +DocumentReference,cumulus__all,3,6,50.00 DocumentReference,Patient,0,6,0.00 DocumentReference,Encounter,3,6,50.00 +DiagnosticReport,cumulus__all,0,1,0.00 DiagnosticReport,Patient,0,1,0.00 DiagnosticReport,Encounter,0,1,0.00 +Device,cumulus__all,0,1,0.00 Device,Patient,0,1,0.00 +Condition,cumulus__all,0,1,0.00 Condition,Patient,0,1,0.00 Condition,Encounter,0,1,0.00 +AllergyIntolerance,cumulus__all,0,1,0.00 AllergyIntolerance,Patient,0,1,0.00 AllergyIntolerance,Encounter,0,1,0.00 diff --git a/tests/data/q_ref_target_valid/general/procedure/0.ndjson b/tests/data/q_ref_target_valid/general/procedure/0.ndjson index 9ea8ad5..2bc93c6 100644 --- a/tests/data/q_ref_target_valid/general/procedure/0.ndjson +++ b/tests/data/q_ref_target_valid/general/procedure/0.ndjson @@ -1,6 +1,7 @@ {"resourceType": "Procedure", "id": "valid", "subject": {"reference": "Patient/A"}, "encounter": {"reference": "Encounter/A"}} {"resourceType": "Procedure", "id": "bad-patient", "subject": {"reference": "Patient/Nope"}, "encounter": {"reference": "Encounter/A"}} {"resourceType": "Procedure", "id": "bad-encounter", "subject": {"reference": "Patient/A"}, "encounter": {"reference": "Encounter/Nope"}} +{"resourceType": "Procedure", "id": "bad-both", "subject": {"reference": "Patient/Nope"}, "encounter": {"reference": "Encounter/Nope"}} {"resourceType": "Procedure", "id": "non-patient", "subject": {"reference": "Device/A"}, "encounter": {"reference": "Encounter/A"}} {"resourceType": "Procedure", "id": "non-encounter", "subject": {"reference": "Patient/A"}, "encounter": {"reference": "Device/A"}} {"resourceType": "Procedure", "id": "missing-patient-ref", "subject": {"display": "Patient A"}, "encounter": {"reference": "Encounter/A"}} diff --git a/tests/data/q_system_use/general/expected_summary.csv b/tests/data/q_system_use/general/expected_summary.csv index 6ac73f5..b230bf9 100644 --- a/tests/data/q_system_use/general/expected_summary.csv +++ b/tests/data/q_system_use/general/expected_summary.csv @@ -1,14 +1,24 @@ resource,field,numerator,denominator,percentage # Various edge cases here in procedure +Procedure,cumulus__all,4,12,33.33 Procedure,code,4,12,33.33 # Rest are often short happy-path checks to confirm that we look at the right json field Observation,valueCodeableConcept,0,2,0.00 +Observation,cumulus__all,0,2,0.00 Observation,code,0,2,0.00 MedicationRequest,medicationCodeableConcept,0,1,0.00 +MedicationRequest,cumulus__all,0,1,0.00 +Medication,cumulus__all,0,1,0.00 Medication,code,0,1,0.00 Immunization,vaccineCode,0,1,0.00 +Immunization,cumulus__all,0,1,0.00 DocumentReference,type,1,3,33.33 +DocumentReference,cumulus__all,1,3,33.33 +DiagnosticReport,cumulus__all,0,1,0.00 DiagnosticReport,code,0,1,0.00 Device,type,0,1,0.00 +Device,cumulus__all,0,1,0.00 +Condition,cumulus__all,0,1,0.00 Condition,code,0,1,0.00 +AllergyIntolerance,cumulus__all,0,1,0.00 AllergyIntolerance,code,0,1,0.00 diff --git a/tests/data/q_valid_us_core_v4/general/expected_summary.csv b/tests/data/q_valid_us_core_v4/general/expected_summary.csv index c73b55b..281c1b7 100644 --- a/tests/data/q_valid_us_core_v4/general/expected_summary.csv +++ b/tests/data/q_valid_us_core_v4/general/expected_summary.csv @@ -1,15 +1,26 @@ resource,profile,numerator,denominator,percentage Procedure,,0,0,0.00 +Procedure,cumulus__all,0,0,0.00 Patient,,0,0,0.00 +Patient,cumulus__all,0,0,0.00 +Observation,cumulus__all,3,3,100.00 Observation,Vital Signs,0,0,0.00 -Observation,Smoking Status,0,0,0.00 -Observation,Laboratory,0,0,0.00 +Observation,Smoking Status,2,2,100.00 +Observation,Laboratory,2,2,100.00 MedicationRequest,,0,0,0.00 +MedicationRequest,cumulus__all,0,0,0.00 Medication,,0,0,0.00 +Medication,cumulus__all,0,0,0.00 Immunization,,0,0,0.00 +Immunization,cumulus__all,0,0,0.00 Encounter,,2,3,66.67 +Encounter,cumulus__all,2,3,66.67 DocumentReference,,0,0,0.00 +DocumentReference,cumulus__all,0,0,0.00 +DiagnosticReport,cumulus__all,0,0,0.00 DiagnosticReport,Note,0,0,0.00 DiagnosticReport,Lab,0,0,0.00 Condition,,0,0,0.00 +Condition,cumulus__all,0,0,0.00 AllergyIntolerance,,0,0,0.00 +AllergyIntolerance,cumulus__all,0,0,0.00 diff --git a/tests/data/q_valid_us_core_v4/general/observation/0.ndjson b/tests/data/q_valid_us_core_v4/general/observation/0.ndjson new file mode 100644 index 0000000..7aaaf03 --- /dev/null +++ b/tests/data/q_valid_us_core_v4/general/observation/0.ndjson @@ -0,0 +1,3 @@ +{"resourceType": "Observation", "id": "lab-nothing", "category": [{"coding": [{"code": "laboratory", "system": "http://terminology.hl7.org/CodeSystem/observation-category"}]}]} +{"resourceType": "Observation", "id": "smoking-nothing", "code": {"coding": [{"system": "http://loinc.org", "code": "72166-2"}]}} +{"resourceType": "Observation", "id": "both-nothing", "code": {"coding": [{"system": "http://loinc.org", "code": "72166-2"}]}, "category": [{"coding": [{"code": "laboratory", "system": "http://terminology.hl7.org/CodeSystem/observation-category"}]}]}