-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
211 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,211 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import sys, os, io, json\n", | ||
"from neo4j import GraphDatabase\n", | ||
"from py2neo import Graph\n", | ||
"from pathlib import Path\n", | ||
"from pandas import DataFrame\n", | ||
"import pandas as pd\n", | ||
"import networkx as nx\n", | ||
"\n", | ||
"graph = Graph(\"bolt://localhost:7687\")\n", | ||
"driver = GraphDatabase.driver('bolt://localhost:7687', auth=None)\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"3475900\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# Add num_desc to all onto classes, return the max value\n", | ||
"\n", | ||
"df = DataFrame(graph.run(\"\"\"\n", | ||
"MATCH (ancestor:`ols:Class`)<-[:`biolink:broad_match`*1..]-(subclass:`ols:Class`)\n", | ||
"WITH ancestor, count(DISTINCT subclass) AS num_desc\n", | ||
"SET ancestor.num_desc = num_desc\n", | ||
"RETURN max(num_desc) AS max_num_desc\n", | ||
"\"\"\").data())\n", | ||
"\n", | ||
"max_num_desc=df['max_num_desc'][0]\n", | ||
"print(max_num_desc)\n", | ||
"\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 14, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# For all onto classes set ic to a normalised value based on the number of descendants where 0 descendants = 1 ic\n", | ||
"# upper level classes like owl:Thing, bfo continuant will have very low ic scores (< 0.02)\n", | ||
"\n", | ||
"df = DataFrame(graph.run(\"\"\"\n", | ||
"MATCH (cl:`ols:Class`)\n", | ||
"SET cl.ic = 1.0 - (cl.num_desc/$max_num_desc)\n", | ||
"\"\"\", {\n", | ||
" 'max_num_desc': float(max_num_desc)\n", | ||
"}).data())\n", | ||
"\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"\n", | ||
"df = DataFrame(graph.run(\"\"\"\n", | ||
"CREATE INDEX ic FOR (n:GraphNode) ON (n.ic)\n", | ||
"\"\"\").data())" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Which nodes have an IC score of less than 0.5?" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 24, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"| | name | ic |\n", | ||
"|---:|:------------------------------|------------:|\n", | ||
"| 0 | entity | 0 |\n", | ||
"| 1 | Thing | 2.47418e-05 |\n", | ||
"| 2 | entity | 2.56049e-05 |\n", | ||
"| 3 | experimental factor | 0.00913087 |\n", | ||
"| 4 | bfo:continuant | 0.0175767 |\n", | ||
"| 5 | bfo:independent_continuant | 0.0793748 |\n", | ||
"| 6 | material entity | 0.0793815 |\n", | ||
"| 7 | object | 0.0867643 |\n", | ||
"| 8 | biological entity | 0.0906039 |\n", | ||
"| 9 | organismal entity | 0.251327 |\n", | ||
"| 10 | obi:organism | 0.253235 |\n", | ||
"| 11 | obo:ncbitaxon.owl | 0.253238 |\n", | ||
"| 12 | cellular organisms or viruses | 0.259099 |\n", | ||
"| 13 | NCBI_taxonomy:131567 | 0.33141 |\n", | ||
"| 14 | Archaea or Eukaryota | 0.493794 |\n", | ||
"| 15 | Eukaryota | 0.498043 |\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"\n", | ||
"df = DataFrame(graph.run(\"\"\"\n", | ||
"MATCH (n:GraphNode) WHERE n.ic < 0.5\n", | ||
"RETURN n.`grebi:name`[0] AS name, n.ic AS ic\n", | ||
"\"\"\").data())\n", | ||
"\n", | ||
"print(df.to_markdown())" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 28, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"| | name | ic |\n", | ||
"|---:|:---------------------------------------------------|------------:|\n", | ||
"| 0 | entity | 0 |\n", | ||
"| 1 | Thing | 2.47418e-05 |\n", | ||
"| 2 | entity | 2.56049e-05 |\n", | ||
"| 3 | experimental factor | 0.00913087 |\n", | ||
"| 4 | bfo:continuant | 0.0175767 |\n", | ||
"| 5 | bfo:independent_continuant | 0.0793748 |\n", | ||
"| 6 | material entity | 0.0793815 |\n", | ||
"| 7 | object | 0.0867643 |\n", | ||
"| 8 | biological entity | 0.0906039 |\n", | ||
"| 9 | organismal entity | 0.251327 |\n", | ||
"| 10 | obi:organism | 0.253235 |\n", | ||
"| 11 | obo:ncbitaxon.owl | 0.253238 |\n", | ||
"| 12 | cellular organisms or viruses | 0.259099 |\n", | ||
"| 13 | NCBI_taxonomy:131567 | 0.33141 |\n", | ||
"| 14 | Archaea or Eukaryota | 0.493794 |\n", | ||
"| 15 | Eukaryota | 0.498043 |\n", | ||
"| 16 | Unikonta | 0.591569 |\n", | ||
"| 17 | Fungi/Metazoa group | 0.592667 |\n", | ||
"| 18 | Metazoa | 0.650527 |\n", | ||
"| 19 | Eumetazoa | 0.652455 |\n", | ||
"| 20 | Bilateria | 0.657087 |\n", | ||
"| 21 | NCBI_taxonomy:33317 | 0.695427 |\n", | ||
"| 22 | Ecdysozoa | 0.72271 |\n", | ||
"| 23 | NCBI_taxonomy:88770 | 0.728683 |\n", | ||
"| 24 | Arthropoda | 0.729258 |\n", | ||
"| 25 | Mandibulata | 0.752957 |\n", | ||
"| 26 | NCBI_taxonomy:197562 | 0.754289 |\n", | ||
"| 27 | Viridiplantae or Bacteria or Euglenozoa or Archaea | 0.755665 |\n", | ||
"| 28 | Viridiplantae or Archaea or Bacteria | 0.756341 |\n", | ||
"| 29 | Viridiplantae or Bacteria or Euglenozoa | 0.759915 |\n", | ||
"| 30 | Viridiplantae or Bacteria | 0.76059 |\n", | ||
"| 31 | Hexapoda | 0.76777 |\n", | ||
"| 32 | Insecta | 0.773244 |\n", | ||
"| 33 | Dicondylia | 0.77332 |\n", | ||
"| 34 | NCBI_taxonomy:7496 | 0.773414 |\n", | ||
"| 35 | Fungi or Bacteria or Archaea | 0.775881 |\n", | ||
"| 36 | NCBI_taxonomy:33340 | 0.776196 |\n", | ||
"| 37 | Fungi or Bacteria | 0.780131 |\n", | ||
"| 38 | Endopterygota | 0.799735 |\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"\n", | ||
"df = DataFrame(graph.run(\"\"\"\n", | ||
"MATCH (n:GraphNode) WHERE n.ic < 0.8\n", | ||
"RETURN n.`grebi:name`[0] AS name, n.ic AS ic\n", | ||
"\"\"\").data())\n", | ||
"\n", | ||
"print(df.to_markdown())" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": ".venv", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.4" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |