diff --git a/code/ARAX/ARAXQuery/ARAX_expander.py b/code/ARAX/ARAXQuery/ARAX_expander.py index 9fdcb8e40..197831546 100644 --- a/code/ARAX/ARAXQuery/ARAX_expander.py +++ b/code/ARAX/ARAXQuery/ARAX_expander.py @@ -27,6 +27,7 @@ from openapi_server.models.q_node import QNode from openapi_server.models.edge import Edge from openapi_server.models.attribute_constraint import AttributeConstraint +from openapi_server.models.attribute import Attribute from Expand.kg2_querier import KG2Querier from Expand.trapi_querier import TRAPIQuerier @@ -59,6 +60,9 @@ def __init__(self): "aggregator_knowledge_source": {"==": "*"}} self.supported_qedge_qualifier_constraints = {"biolink:qualified_predicate", "biolink:object_direction_qualifier", "biolink:object_aspect_qualifier"} + self.higher_level_treats_predicates = {"biolink:treats_or_applied_or_studied_to_treat", + "biolink:applied_to_treat", + "biolink:studied_to_treat"} def describe_me(self): """ @@ -504,6 +508,16 @@ def apply(self, response, input_parameters, mode: str = "ARAX"): for kedge_key in kedges_to_remove: if kedge_key in overarching_kg.edges_by_qg_id[qedge_key]: del overarching_kg.edges_by_qg_id[qedge_key][kedge_key] + # Remove KG2 SemMedDB treats_or_applied-type edges if this is an inferred treats query + if alter_kg2_treats_edges: + edge_keys_to_remove = {edge_key for edge_key, edge in overarching_kg.edges_by_qg_id[qedge_key].items() + if edge.predicate in self.higher_level_treats_predicates and + any(source.resource_id == "infores:rtx-kg2" for source in edge.sources) and + any(source.resource_id == "infores:semmeddb" for source in edge.sources)} + log.debug(f"Removing {len(edge_keys_to_remove)} KG2 semmeddb treats_or_applied-type edges " + f"fulfilling {qedge_key}") + for edge_key in edge_keys_to_remove: + del overarching_kg.edges_by_qg_id[qedge_key][edge_key] if mode != "RTXKG2": # Apply any kryptonite ("not") qedges @@ -579,11 +593,17 @@ def apply(self, response, input_parameters, mode: str = "ARAX"): # Second half of patch for #2328; edit KG2 'treats_or_applied_or_studied_to_treat' edges to just 'treats' if mode != "RTXKG2" and do_issue_2328_patch and inferred_qedge_keys: num_edges_altered = 0 - higher_level_treats_predicates = {"biolink:treats_or_applied_or_studied_to_treat", - "biolink:applied_to_treat"} for edge in message.knowledge_graph.edges.values(): is_kg2_edge = any(source.resource_id == "infores:rtx-kg2" for source in edge.sources) - if is_kg2_edge and edge.predicate in higher_level_treats_predicates: + if is_kg2_edge and edge.predicate in self.higher_level_treats_predicates: + # Record the original KG2 predicate in an attribute + edge.attributes.append(Attribute(attribute_type_id="biolink:original_predicate", + value=edge.predicate, + value_type_id="biolink:predicate", + description="Predicate as it appears in RTX-KG2, prior to " + "alteration by ARAX.", + attribute_source="infores:arax")) + # Then change the predicate to treats edge.predicate = "biolink:treats" num_edges_altered += 1 if num_edges_altered: @@ -1132,7 +1152,7 @@ def _apply_any_kryptonite_edges(organized_kg: QGOrganizedKnowledgeGraph, full_qu organized_kg.edges_by_qg_id[qedge_key].pop(edge_key) if not organized_kg.edges_by_qg_id[qedge_key]: - log.warning(f"All {qedge_key} edges have been deleted due to an Exclude=true (i.e., 'kryptonite') edge!") + log.warning(f"All {qedge_key} edges have been deleted!") @staticmethod def _prune_kg(qnode_key_to_prune: str, prune_threshold: int, kg: QGOrganizedKnowledgeGraph, diff --git a/code/ARAX/ARAXQuery/Expand/trapi_querier.py b/code/ARAX/ARAXQuery/Expand/trapi_querier.py index bc24ee31e..9c256e0df 100644 --- a/code/ARAX/ARAXQuery/Expand/trapi_querier.py +++ b/code/ARAX/ARAXQuery/Expand/trapi_querier.py @@ -56,6 +56,9 @@ async def answer_one_hop_query_async(self, query_graph: QueryGraph, """ This function answers a one-hop (single-edge) query using the specified KP. :param query_graph: A TRAPI query graph. + :param alter_kg2_treats_edges: If true, will query KG2 for higher-level treats-type predicates instead of just + 'treats'. Any higher-level returned edges will later be altered to have 'treats' + predicates in ARAX_expander.py. :return: An (almost) TRAPI knowledge graph containing all of the nodes and edges returned as results for the query. (Organized by QG IDs.) """ @@ -90,10 +93,11 @@ async def answer_one_hop_query_async(self, query_graph: QueryGraph, # Patch to address lack of answers from KG2 for treats queries after treats refactor #2328 if alter_kg2_treats_edges and self.kp_infores_curie == "infores:rtx-kg2": for qedge in qg_copy.edges.values(): # Note there's only ever one qedge per QG here - qedge.predicates = ["biolink:treats_or_applied_or_studied_to_treat" if predicate == "biolink:treats" else predicate - for predicate in qedge.predicates] - log.info(f"For querying infores:rtx-kg2, edited the QG for this single-hop query to use " - f"the biolink:treats_or_applied_or_studied_to_treat predicate (instead of biolink:treats)") + qedge.predicates = list(set(qedge.predicates).union({"biolink:treats_or_applied_or_studied_to_treat", + "biolink:applied_to_treat", + "biolink:studied_to_treat"})) + log.info(f"For querying infores:rtx-kg2, edited {qedge_key} to use higher treats-type predicates: " + f"{qedge.predicates}") # Answer the query using the KP and load its answers into our object model final_kg = await self._answer_query_using_kp_async(qg_copy) diff --git a/code/ARAX/test/test_ARAX_expand.py b/code/ARAX/test/test_ARAX_expand.py index 38563a98d..02207a0e4 100644 --- a/code/ARAX/test/test_ARAX_expand.py +++ b/code/ARAX/test/test_ARAX_expand.py @@ -1531,7 +1531,15 @@ def test_treats_patch_issue_2328(): "object": "disease", "subject": "chemical", "predicates": ["biolink:treats"], - "knowledge_type": "inferred" + "knowledge_type": "inferred", + "attribute_constraints": [ + { + "id": "knowledge_source", + "name": "knowledge source", + "value": ["infores:rtx-kg2"], + "operator": "==" + } + ] } } } @@ -1541,9 +1549,11 @@ def test_treats_patch_issue_2328(): if any(source.resource_id == "infores:rtx-kg2" for source in edge.sources)] print(f"Answer includes {len(kg2_edges_treats)} edges from KG2") assert kg2_edges_treats + print(kg2_edges_treats) for edge in kg2_edges_treats: assert edge.predicate == "biolink:treats" assert edge.attributes + assert not any(source.resource_id == "infores:semmeddb" for source in edge.sources) # Verify that the predicate editing doesn't happen outside of inferred mode query = { @@ -1559,7 +1569,15 @@ def test_treats_patch_issue_2328(): "t_edge": { "object": "disease", "subject": "chemical", - "predicates": ["biolink:treats_or_applied_or_studied_to_treat", "biolink:applied_to_treat"] + "predicates": ["biolink:treats_or_applied_or_studied_to_treat", "biolink:applied_to_treat"], + "attribute_constraints": [ + { + "id": "knowledge_source", + "name": "knowledge source", + "value": ["infores:rtx-kg2"], + "operator": "==" + } + ] } } } @@ -1570,6 +1588,7 @@ def test_treats_patch_issue_2328(): print(f"Answer includes {len(kg2_edges_treats_or)} edges from KG2") assert kg2_edges_treats_or assert any(edge for edge in kg2_edges_treats_or if edge.predicate == "biolink:treats_or_applied_or_studied_to_treat") + assert any(edge for edge in kg2_edges_treats_or if edge.predicate == "biolink:applied_to_treat") if __name__ == "__main__":