Skip to content

Commit

Permalink
Merge pull request #255 from biothings/fix-datasets
Browse files Browse the repository at this point in the history
Fix datasets with missing context or outdated names
  • Loading branch information
marcodarko authored May 2, 2024
2 parents 488a3c1 + 10590ce commit 9bdefc4
Showing 1 changed file with 46 additions and 0 deletions.
46 changes: 46 additions & 0 deletions scripts/fix_niaid_datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from discovery.model.dataset import Dataset

niaidContext = {
"schema": "http://schema.org/",
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
"bioschemas": "https://discovery.biothings.io/ns/bioschemas/",
"niaid": "https://discovery.biothings.io/ns/niaid/",
}

def updateContext():
"""
Update datasets missing @context and update @types with outdated names
"""
# ids = []
# needs_renaming =[]
docs = Dataset.search()
for doc in docs.scan():
context = getattr(doc, '@context', None)
# dataset_id = getattr(getattr(doc, 'meta', None), 'id', None)
clss_type = getattr(doc, "@type", None)
if not context:
if ":" not in clss_type:
# add context and fix class name
added_prefix = "niaid:" + clss_type
# ids.append(added_prefix)
doc.update(**{"@context": niaidContext, "@type": added_prefix})
else:
# add context
# ids.append('JUST CONTEXT')
doc.update(**{"@context": niaidContext})
if 'niaid:Niaid' in clss_type:
updated_name = clss_type.replace("niaid:Niaid", "niaid:")
# needs_renaming.append(clss_type + " >>> " + updated_name)
doc.update(**{"@type": updated_name})

# print(len(ids))
# print(ids)
# print("++++++++++++++++++")
# print(len(needs_renaming))
# print(needs_renaming)



if __name__ == "__main__":
updateContext()

0 comments on commit 9bdefc4

Please sign in to comment.