Skip to content

Commit

Permalink
make config more modular, exclude impc observations from default graph
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesamcl committed Nov 5, 2024
1 parent 7e1b528 commit 4dd9ba8
Show file tree
Hide file tree
Showing 18 changed files with 1,056 additions and 255 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,6 @@ graph.db
*.swp
.nextflow*
work
__pycache__
*.pyc

32 changes: 0 additions & 32 deletions configs/datasource_configs/impc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,6 @@ ingests:
--json-rename-field mgiGeneAccessionId:id
--json-inject-type impc:MouseGene
--json-inject-key-prefix impc:'
- globs: ["/nfs/production/parkinso/spot/jmcl/impc-kg/categorical_observation_json/*.json.gz"]
command: '
grebi_transform_jsonl
--json-rename-field observationId:id
--json-inject-type impc:CategoricalObservation
--json-inject-key-prefix impc:'
- globs: ["/nfs/production/parkinso/spot/jmcl/impc-kg/embryo_specimen_json/*.json.gz"]
command: '
grebi_transform_jsonl
Expand Down Expand Up @@ -68,38 +62,12 @@ ingests:
--json-rename-field procedureId:id
--json-inject-type impc:Procedure
--json-inject-key-prefix impc:'
- globs: ["/nfs/production/parkinso/spot/jmcl/impc-kg/specimen_experiment_json/*.json.gz"]
command: '
grebi_transform_jsonl
--json-rename-field experimentId:id
--json-inject-type impc:SpecimenExperiment
--json-inject-key-prefix impc:'
- globs: ["/nfs/production/parkinso/spot/jmcl/impc-kg/text_observation_json/*.json.gz"]
command: '
grebi_transform_jsonl
--json-rename-field observationId:id
--json-inject-type impc:TextObservation
--json-inject-key-prefix impc:'
- globs: ["/nfs/production/parkinso/spot/jmcl/impc-kg/time_series_observation_json/*.json.gz"]
command: '
grebi_transform_jsonl
--json-rename-field observationId:id
--json-inject-type impc:TimeSeriesObservation
--json-inject-key-prefix impc:'
- globs: ["/nfs/production/parkinso/spot/jmcl/impc-kg/unidimensional_observation_json/*.json.gz"]
command: '
grebi_transform_jsonl
--json-rename-field observationId:id
--json-inject-type impc:UnidimensionalObservation
--json-inject-key-prefix impc:'
- globs: ["/nfs/production/parkinso/spot/jmcl/impc-kg/statistical_result_json/*.json.gz"]
command: '
grebi_transform_jsonl
--json-rename-field statisticalResultId:id
--json-inject-type impc:StatisticalResult
--json-inject-key-prefix impc:
--json-de-nest-field potentialPhenotypes.id
--json-de-nest-field statisticalMethod.name'
- globs: ["/nfs/production/parkinso/spot/jmcl/impc-kg/human_gene_json/*.json.gz"]
command: '
grebi_transform_jsonl
Expand Down
35 changes: 35 additions & 0 deletions configs/datasource_configs/impc_observations.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: IMPC
enabled: true
ingests:
- globs: ["/nfs/production/parkinso/spot/jmcl/impc-kg/categorical_observation_json/*.json.gz"]
command: '
grebi_transform_jsonl
--json-rename-field observationId:id
--json-inject-type impc:CategoricalObservation
--json-inject-key-prefix impc:'
- globs: ["/nfs/production/parkinso/spot/jmcl/impc-kg/time_series_observation_json/*.json.gz"]
command: '
grebi_transform_jsonl
--json-rename-field observationId:id
--json-inject-type impc:TimeSeriesObservation
--json-inject-key-prefix impc:'
- globs: ["/nfs/production/parkinso/spot/jmcl/impc-kg/unidimensional_observation_json/*.json.gz"]
command: '
grebi_transform_jsonl
--json-rename-field observationId:id
--json-inject-type impc:UnidimensionalObservation
--json-inject-key-prefix impc:'
- globs: ["/nfs/production/parkinso/spot/jmcl/impc-kg/statistical_result_json/*.json.gz"]
command: '
grebi_transform_jsonl
--json-rename-field statisticalResultId:id
--json-inject-type impc:StatisticalResult
--json-inject-key-prefix impc:
--json-de-nest-field potentialPhenotypes.id
--json-de-nest-field statisticalMethod.name'
- globs: ["/nfs/production/parkinso/spot/jmcl/impc-kg/specimen_experiment_json/*.json.gz"]
command: '
grebi_transform_jsonl
--json-rename-field experimentId:id
--json-inject-type impc:SpecimenExperiment
--json-inject-key-prefix impc:'
6 changes: 3 additions & 3 deletions configs/pipeline_configs/ebi.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"subgraphs": [
"ebi_full_monarch",
"ebi_monarch_xspecies",
"impc",
"monarch",
"hra_kg",
"hett"
"hra_kg"
]
}
8 changes: 8 additions & 0 deletions configs/subgraph_configs/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@


all: ebi_monarch_xspecies.json ebi_monarch.json hett.json hra_kg.json impc.json monarch.json

%.json: src/%.py
python3 $< > $@

.PHONY: all
172 changes: 172 additions & 0 deletions configs/subgraph_configs/ebi_monarch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
{
"id": "EBI_MONARCH",
"name": "EBI Resources and MONARCH Initiative KG",
"bytes_per_merged_file": 1073741824,
"identifier_props": [
"id",
"owl:equivalentClass",
"owl:equivalentProperty",
"owl:sameAs",
"grebi:hashId",
"grebi:equivalentTo",
"ols:iri",
"ols:shortForm",
"hgnc:ensembl_gene_id",
"obo:chebi/inchi",
"obo:chebi/inchikey",
"obo:chebi/smiles",
"impc:pmId",
"impc:humanGeneAccId",
"monarch:iri",
"skos:exactMatch",
"ncit:P368",
"ncit:C98965",
"dcterms:identifier",
"oboinowl:hasAlternativeId",
"robokop:equivalent_identifiers",
"mesh.vocab:identifier"
],
"type_superclasses": [
"mondo:0000001",
"efo:0000408",
"chebi:36080",
"chebi:24431"
],
"additional_equivalence_groups": [
[
"grebi:name",
"ols:label",
"rdfs:label",
"monarch:name",
"impc:name",
"reactome:displayName",
"dcterms:title",
"ncit:Preferred_Name",
"robokop:name"
],
[
"grebi:description",
"iao:definition",
"monarch:description",
"ols:definition",
"robokop:description"
],
[
"grebi:synonym",
"monarch:synonym",
"iao:alternative_label",
"ols:synonym",
"oboinowl:hasExactSynonym",
"dcterms:alternative"
],
[
"mondo:0000001",
"ogms:0000031"
],
[
"biolink:broad_match",
"skos:broader",
"skos:broadMatch",
"ols:directAncestor"
],
[
"biolink:subclass_of",
"ols:directParent",
"rdfs:subClassOf",
"rdfs:subPropertyOf"
],
[
"rdfs:isDefinedBy",
"ols:ontologyIri",
"ols:ontologyId"
]
],
"exclude_props": [
"ols:hierarchicalProperty",
"ols:synonymProperty",
"ols:curie",
"ols:shortForm",
"ols:ontologyPreferredPrefix",
"ols:iri",
"ols:uri",
"ols:imported",
"ols:hasHierarchicalParents",
"ols:hasHierarchicalChildren",
"ols:hasDirectParents",
"ols:hasDirectChildren",
"ols:numDescendants",
"ols:numHierarchicalDescendants",
"oboinowl:id",
"oboinowl:url",
"monarch:iri",
"cco:hasDocument",
"cco:hasMolecule"
],
"exclude_edges": [],
"exclude_self_referential_edges": [
"foaf:page",
"rdfs:seeAlso",
"oboinowl:hasDbXref",
"biolink:subclass_of",
"biolink:broad_match",
"ols:ontology_purl",
"ols:ontologyId",
"cheminf:000407",
"cheminf:InChIKey",
"biolink:interacts_with",
"reactome:url",
"dc:Identifier",
"hgnc:agr",
"hgnc:gencc",
"monarch:xref",
"hgnc:uniprot_ids",
"hgnc:omim_id",
"hgnc:entrez_id",
"reactome:referenceGene",
"reactome:identifier",
"reactome:crossReference",
"edam:Ensembl_gene_id",
"obo:pr#has_gene_template",
"ols:relatedTo",
"ols:relatedFrom",
"robokop:smiles",
"ctd:ChemicalURL",
"ctd:UniProtIDs"
],
"datasource_configs": [
"./configs/datasource_configs/gwas.yaml",
"./configs/datasource_configs/hgnc.yaml",
"./configs/datasource_configs/impc.yaml",
"./configs/datasource_configs/sssom.yaml",
"./configs/datasource_configs/ols.yaml",
"./configs/datasource_configs/reactome.yaml",
"./configs/datasource_configs/ubergraph.yaml",
"./configs/datasource_configs/otar.yaml",
"./configs/datasource_configs/monarch.yaml",
"./configs/datasource_configs/metabolights.yaml",
"./configs/datasource_configs/mondo_efo.yaml",
"./configs/datasource_configs/ctd.yaml",
"./configs/datasource_configs/hett_pesticides_appril.yaml",
"./configs/datasource_configs/hett_pesticides_eu.yaml",
"./configs/datasource_configs/hett_pesticides_gb.yaml",
"./configs/datasource_configs/aopwiki.yaml",
"./configs/datasource_configs/chembl.yaml",
"./configs/datasource_configs/robokop_alliance.yaml",
"./configs/datasource_configs/robokop_binding.yaml",
"./configs/datasource_configs/robokop_cam.yaml",
"./configs/datasource_configs/robokop_drugcentral.yaml",
"./configs/datasource_configs/robokop_gtex.yaml",
"./configs/datasource_configs/robokop_gtopdb.yaml",
"./configs/datasource_configs/robokop_hetionet.yaml",
"./configs/datasource_configs/robokop_hgoa.yaml",
"./configs/datasource_configs/robokop_hmdb.yaml",
"./configs/datasource_configs/robokop_icees.yaml",
"./configs/datasource_configs/robokop_intact.yaml",
"./configs/datasource_configs/robokop_panther.yaml",
"./configs/datasource_configs/robokop_pharos.yaml",
"./configs/datasource_configs/robokop_string.yaml",
"./configs/datasource_configs/robokop_textmining.yaml",
"./configs/datasource_configs/robokop_viralproteome.yaml",
"./configs/datasource_configs/mesh.yaml"
]
}
Loading

0 comments on commit 4dd9ba8

Please sign in to comment.