From 7c19443e49d78237a539080d78ee0951695334ff Mon Sep 17 00:00:00 2001 From: James McLaughlin Date: Wed, 8 Jan 2025 16:05:39 +0000 Subject: [PATCH] add primekg --- dataload/00_fetch_data/primekg/fetch.sh | 5 ++++ dataload/01_ingest/primekg.py | 26 +++++++++++++++++++ .../configs/datasource_configs/primekg.yaml | 6 +++++ .../configs/subgraph_configs/ebi_monarch.json | 6 +++-- .../ebi_monarch_xspecies.json | 6 +++-- .../subgraph_configs/src/ebi_monarch.py | 3 ++- 6 files changed, 47 insertions(+), 5 deletions(-) create mode 100755 dataload/00_fetch_data/primekg/fetch.sh create mode 100755 dataload/01_ingest/primekg.py create mode 100644 dataload/configs/datasource_configs/primekg.yaml diff --git a/dataload/00_fetch_data/primekg/fetch.sh b/dataload/00_fetch_data/primekg/fetch.sh new file mode 100755 index 0000000..9b9a4ee --- /dev/null +++ b/dataload/00_fetch_data/primekg/fetch.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +curl -L https://dataverse.harvard.edu/api/access/datafile/6180620 > kg.csv + + diff --git a/dataload/01_ingest/primekg.py b/dataload/01_ingest/primekg.py new file mode 100755 index 0000000..c8757c8 --- /dev/null +++ b/dataload/01_ingest/primekg.py @@ -0,0 +1,26 @@ + +import pandas +import sys +import json + +df = pandas.read_csv(sys.stdin, dtype=str) + +for row in df.to_dict(orient="records"): + + x_id = row['x_source'] + ':' + row['x_id'] + y_id = row['y_source'] + ':' + row['y_id'] + + res = { + 'id': x_id, + 'grebi:name': row['x_name'] + } + + res["primekg:" + row['relation']] = { + 'grebi:value': y_id, + 'grebi:properties': {f"primekg:{key}": value for key, value in row.items()} + } + + print(json.dumps(res)) + + + \ No newline at end of file diff --git a/dataload/configs/datasource_configs/primekg.yaml b/dataload/configs/datasource_configs/primekg.yaml new file mode 100644 index 0000000..0aa8f6e --- /dev/null +++ b/dataload/configs/datasource_configs/primekg.yaml @@ -0,0 +1,6 @@ +name: PrimeKG +enabled: true +ingests: + - globs: ["./00_fetch_data/primekg/kg.csv"] + command: $GREBI_DATALOAD_HOME/01_ingest/primekg.py + diff --git a/dataload/configs/subgraph_configs/ebi_monarch.json b/dataload/configs/subgraph_configs/ebi_monarch.json index d54c35b..68e0528 100644 --- a/dataload/configs/subgraph_configs/ebi_monarch.json +++ b/dataload/configs/subgraph_configs/ebi_monarch.json @@ -30,7 +30,8 @@ "mondo:0000001", "efo:0000408", "chebi:36080", - "chebi:24431" + "chebi:24431", + "biolink:ChemicalEntity" ], "additional_equivalence_groups": [ [ @@ -167,6 +168,7 @@ "./configs/datasource_configs/robokop_string.yaml", "./configs/datasource_configs/robokop_textmining.yaml", "./configs/datasource_configs/robokop_viralproteome.yaml", - "./configs/datasource_configs/mesh.yaml" + "./configs/datasource_configs/mesh.yaml", + "./configs/datasource_configs/primekg.yaml" ] } diff --git a/dataload/configs/subgraph_configs/ebi_monarch_xspecies.json b/dataload/configs/subgraph_configs/ebi_monarch_xspecies.json index 40e8050..490f380 100644 --- a/dataload/configs/subgraph_configs/ebi_monarch_xspecies.json +++ b/dataload/configs/subgraph_configs/ebi_monarch_xspecies.json @@ -31,7 +31,8 @@ "mondo:0000001", "efo:0000408", "chebi:36080", - "chebi:24431" + "chebi:24431", + "biolink:ChemicalEntity" ], "additional_equivalence_groups": [ [ @@ -168,6 +169,7 @@ "./configs/datasource_configs/robokop_string.yaml", "./configs/datasource_configs/robokop_textmining.yaml", "./configs/datasource_configs/robokop_viralproteome.yaml", - "./configs/datasource_configs/mesh.yaml" + "./configs/datasource_configs/mesh.yaml", + "./configs/datasource_configs/primekg.yaml" ] } diff --git a/dataload/configs/subgraph_configs/src/ebi_monarch.py b/dataload/configs/subgraph_configs/src/ebi_monarch.py index 0234822..6ee6241 100644 --- a/dataload/configs/subgraph_configs/src/ebi_monarch.py +++ b/dataload/configs/subgraph_configs/src/ebi_monarch.py @@ -37,7 +37,8 @@ "./configs/datasource_configs/robokop_string.yaml", "./configs/datasource_configs/robokop_textmining.yaml", "./configs/datasource_configs/robokop_viralproteome.yaml", - "./configs/datasource_configs/mesh.yaml" + "./configs/datasource_configs/mesh.yaml", + "./configs/datasource_configs/primekg.yaml" ] if __name__ == '__main__':