From b39d677365e8d7e73c4b40e920583faf6da3bcb6 Mon Sep 17 00:00:00 2001 From: souzadevinicius Date: Thu, 19 Sep 2024 15:10:59 +0100 Subject: [PATCH] add hpo queries notebook --- notebooks/hpo.ipynb | 136 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 notebooks/hpo.ipynb diff --git a/notebooks/hpo.ipynb b/notebooks/hpo.ipynb new file mode 100644 index 0000000..605e285 --- /dev/null +++ b/notebooks/hpo.ipynb @@ -0,0 +1,136 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "from py2neo import Graph\n", + "from pathlib import Path\n", + "from pandas import DataFrame\n", + "\n", + "graph = Graph(\"bolt://localhost:8687\")\n", + "\n", + "# df = DataFrame(graph.run(\"\"\"\n", + "# MATCH (id:Id { id:\"chebi:5063\"})<-[:id]-(n:GraphNode)\n", + "# RETURN n.`grebi:name`[0] as name\n", + "# \"\"\").data())\n", + "\n", + "# print(df)\n", + "\n", + "\n", + "#Returns all HP terms. Each term node definitely corresponds to an HP term, but may ALSO correspond to an MP term due to the mappings.\n", + "#This also means that the relationships may come from either HP or MP.\n", + "#\n", + "df = DataFrame(graph.run(\"\"\"\n", + "MATCH (id:Id { id:\"hp:0000001\"})<-[:id]-(hpo_root_term:GraphNode)\n", + " <-[:`biolink:broad_match`]-(term:GraphNode)\n", + " -[outgoing_edge]->(n:GraphNode)\n", + "RETURN term.`grebi:name`[0] AS from, type(outgoing_edge) AS edge, n.`grebi:name`[0] AS to\n", + "\"\"\").data())\n", + "\n", + "df.to_csv(\"all_hp_all_out.csv\", index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "# This version of the above query filters the relationships to those asserted by HP only (not MP)\n", + "df = DataFrame(graph.run(\"\"\"\n", + "MATCH (id:Id { id:\"hp:0000001\"})<-[:id]-(hpo_root_term:GraphNode)\n", + " <-[:`biolink:broad_match`]-(term:GraphNode)\n", + " -[outgoing_edge]->(n:GraphNode)\n", + " WHERE \"OLS.hp\" IN outgoing_edge.`grebi:datasources`\n", + "RETURN term.`grebi:name`[0] AS from, type(outgoing_edge) AS edge, n.`grebi:name`[0] AS to\n", + "\"\"\").data())\n", + "\n", + "df.to_csv(\"all_hp_all_out_hp_only.csv\", index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "# This version of the above query filters the relationships to those asserted by HP only (not MP)\n", + "# Also adds identifiers in the results\n", + "\n", + "df = DataFrame(graph.run(\"\"\"\n", + "MATCH (id:Id { id:\"hp:0000001\"})<-[:id]-(hpo_root_term:GraphNode)\n", + " <-[:`biolink:broad_match`]-(term:GraphNode)\n", + " -[outgoing_edge]->(n:GraphNode)\n", + " WHERE \"OLS.hp\" IN outgoing_edge.`grebi:datasources`\n", + "RETURN\n", + " [id in term.id WHERE id =~ \"hp:[0-9]*\" | id][0] AS from_id,\n", + " term.`grebi:name`[0] AS from_label,\n", + " type(outgoing_edge) AS edge,\n", + " n.id AS to_ids,\n", + " n.`grebi:name`[0] AS to_label\n", + "\"\"\").data())\n", + "\n", + "df.to_csv(\"all_hp_all_out_hp_outgoing.csv\", index=False)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "# This query returns all incoming edges to all HP terms\n", + "# Note that the incoming edges may target either the HP terms or their equivalent MP terms\n", + "# and there is currently no way to differentiate!\n", + "# We will prob have to make two different versions of the Neo4j, one with merged mappings and one without\n", + "\n", + "df = DataFrame(graph.run(\"\"\"\n", + "MATCH (id:Id { id:\"hp:0000001\"})<-[:id]-(hpo_root_term:GraphNode)\n", + " <-[:`biolink:broad_match`]-(term:GraphNode)\n", + " <-[incoming_edge]-(n:GraphNode)\n", + "RETURN\n", + " [id in term.id WHERE id =~ \"hp:[0-9]*\" | id][0] AS to_id,\n", + " term.`grebi:name`[0] AS to_label,\n", + " type(incoming_edge) AS edge,\n", + " n.id AS from_ids,\n", + " n.`grebi:name`[0] AS from_label\n", + "\"\"\").data())\n", + "\n", + "df.to_csv(\"all_hp_all_in_hp.csv\", index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}