From 1eebb7c99918406dbe6a5e18e8fcc63d340d8cea Mon Sep 17 00:00:00 2001 From: Marco Cano Date: Tue, 30 Apr 2024 09:54:52 -0700 Subject: [PATCH 1/2] script to fix niaid datasets --- scripts/fix_niaid_datasets.py | 42 +++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 scripts/fix_niaid_datasets.py diff --git a/scripts/fix_niaid_datasets.py b/scripts/fix_niaid_datasets.py new file mode 100644 index 00000000..534a25ad --- /dev/null +++ b/scripts/fix_niaid_datasets.py @@ -0,0 +1,42 @@ +import logging + +from discovery.model.dataset import Dataset + +niaidContext = { +"schema": "http://schema.org/", +"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", +"rdfs": "http://www.w3.org/2000/01/rdf-schema#", +"bioschemas": "https://discovery.biothings.io/view/bioschemas/", +"niaid": "https://discovery.biothings.io/view/niaid/", +"nde": "https://discovery.biothings.io/view/nde/" +} + +def updateContext(): + """ + Update datasets missing @context and update @types with outdated names + """ + ids = [] + needs_renaming =[] + docs = Dataset.search() + for doc in docs.scan(): + d = doc.to_dict() + context = d.get("@context") + datasetID = getattr(getattr(doc, 'meta', None), 'id', None) + typ = getattr(doc, "@type", None) + if not context: + ids.append(datasetID) + # doc.update(**{"@context": niaidContext}) + if 'niaid:Niaid' in typ: + needs_renaming.append(typ) + # doc.update(**{"@type": "niaid:Dataset"}) + + print(len(ids)) + print(ids) + print("++++++++++++++++++") + print(len(needs_renaming)) + print(needs_renaming) + + + +if __name__ == "__main__": + updateContext() From 10590ce23fd25333d3356cf11339ba3f141081be Mon Sep 17 00:00:00 2001 From: Marco Cano Date: Thu, 2 May 2024 14:16:24 -0700 Subject: [PATCH 2/2] feat: :sparkles: add script to fix niaid datasets missing context --- scripts/fix_niaid_datasets.py | 46 +++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/scripts/fix_niaid_datasets.py b/scripts/fix_niaid_datasets.py index 534a25ad..282328ed 100644 --- a/scripts/fix_niaid_datasets.py +++ b/scripts/fix_niaid_datasets.py @@ -1,40 +1,44 @@ -import logging - from discovery.model.dataset import Dataset niaidContext = { "schema": "http://schema.org/", "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdfs": "http://www.w3.org/2000/01/rdf-schema#", -"bioschemas": "https://discovery.biothings.io/view/bioschemas/", -"niaid": "https://discovery.biothings.io/view/niaid/", -"nde": "https://discovery.biothings.io/view/nde/" +"bioschemas": "https://discovery.biothings.io/ns/bioschemas/", +"niaid": "https://discovery.biothings.io/ns/niaid/", } def updateContext(): """ Update datasets missing @context and update @types with outdated names """ - ids = [] - needs_renaming =[] + # ids = [] + # needs_renaming =[] docs = Dataset.search() for doc in docs.scan(): - d = doc.to_dict() - context = d.get("@context") - datasetID = getattr(getattr(doc, 'meta', None), 'id', None) - typ = getattr(doc, "@type", None) + context = getattr(doc, '@context', None) + # dataset_id = getattr(getattr(doc, 'meta', None), 'id', None) + clss_type = getattr(doc, "@type", None) if not context: - ids.append(datasetID) - # doc.update(**{"@context": niaidContext}) - if 'niaid:Niaid' in typ: - needs_renaming.append(typ) - # doc.update(**{"@type": "niaid:Dataset"}) + if ":" not in clss_type: + # add context and fix class name + added_prefix = "niaid:" + clss_type + # ids.append(added_prefix) + doc.update(**{"@context": niaidContext, "@type": added_prefix}) + else: + # add context + # ids.append('JUST CONTEXT') + doc.update(**{"@context": niaidContext}) + if 'niaid:Niaid' in clss_type: + updated_name = clss_type.replace("niaid:Niaid", "niaid:") + # needs_renaming.append(clss_type + " >>> " + updated_name) + doc.update(**{"@type": updated_name}) - print(len(ids)) - print(ids) - print("++++++++++++++++++") - print(len(needs_renaming)) - print(needs_renaming) + # print(len(ids)) + # print(ids) + # print("++++++++++++++++++") + # print(len(needs_renaming)) + # print(needs_renaming)