From 64119bd84e6306695f80bca52de8cf9b7dbe8da2 Mon Sep 17 00:00:00 2001 From: Al Niessner Date: Tue, 1 Aug 2023 14:55:31 -0700 Subject: [PATCH] minor updates for running --- src/pds/registrysweepers/repairkit/__init__.py | 9 ++++++--- src/pds/registrysweepers/repairkit/allarrays.py | 6 ++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/pds/registrysweepers/repairkit/__init__.py b/src/pds/registrysweepers/repairkit/__init__.py index 9ce094d..1cd49ec 100644 --- a/src/pds/registrysweepers/repairkit/__init__.py +++ b/src/pds/registrysweepers/repairkit/__init__.py @@ -56,11 +56,14 @@ def run(base_url:str, host = Host(password, base_url, username, verify_host_certs) query = {"match_all":{}} for document in query_registry_db(host, query, {}): + id = document['_id'] + src = document['_source'] repairs = {} - for fieldname,data in document.items(): + log.debug (f'working on document: {id}') + for fieldname,data in src.items(): for regex,funcs in REPAIR_TOOLS.items(): - if regex(filename): - repairs.update(func(document, fieldname) for func in funcs) + if regex(fieldname): + for func in funcs: repairs.update(func(src, fieldname)) if repairs: log.info(f'Writing repairs to document: {id}') write_update_docs(host, {id:repairs}) diff --git a/src/pds/registrysweepers/repairkit/allarrays.py b/src/pds/registrysweepers/repairkit/allarrays.py index cddc26a..cee304c 100644 --- a/src/pds/registrysweepers/repairkit/allarrays.py +++ b/src/pds/registrysweepers/repairkit/allarrays.py @@ -1,6 +1,12 @@ '''change single strings to array of strings''' +import logging + +log = logging.getLogger(__name__) + + def repair (document:{}, fieldname:str)->bool: + log.debug(f'checking {fieldname}') if isinstance (document[fieldname], str): log.info (f'found string for {fieldname} where it should be an array') return {fieldname:[document[fieldname]]}