generated from NASA-PDS/template-repo-python
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #54 from NASA-PDS/registry-api-349
New repairkit sweeper for updating metadata loaded with older versions of Harvest to use lists instead of single-value strings
- Loading branch information
Showing
6 changed files
with
120 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
"""repairkit is an executable package | ||
The reason repairkit is an executable package is for extension as new repairs | ||
are needed in the future. They can be added by updating the REPAIR_TOOLS mapping | ||
with the new field name and functional requirements. All the additions can then | ||
be modules with this executable package. | ||
""" | ||
import logging | ||
import re | ||
from typing import Union | ||
|
||
from pds.registrysweepers.utils import configure_logging | ||
from pds.registrysweepers.utils import Host | ||
from pds.registrysweepers.utils import query_registry_db | ||
from pds.registrysweepers.utils import write_updated_docs | ||
|
||
from . import allarrays | ||
|
||
""" | ||
dictionary repair tools is {field_name:[funcs]} where field_name can be: | ||
1: re.compile().fullmatch for the equivalent of "fred" == "fred" | ||
2: re.compile().match for more complex matching of subparts of the string | ||
and funcs are: | ||
def function_name (document:{}, fieldname:str)->{} | ||
and the return an empty {} if no changes and {fieldname:new_value} for repairs | ||
Examples | ||
re.compile("^ops:Info/.+").match("ops:Info/ops:filesize")->match object | ||
re.compile("^ops:Info/.+").fullmatch("ops:Info/ops:filesize")->match object | ||
re.compile("^ops:Info/").match("ops:Info/ops:filesize")->match object | ||
re.compile("^ops:Info/").fullmatch("ops:Info/ops:filesize")->None | ||
To get str_a == str_b, re.compile(str_a).fullmatch | ||
""" | ||
|
||
REPAIR_TOOLS = { | ||
re.compile("^ops:Data_File_Info/").match: [allarrays.repair], | ||
re.compile("^ops:Label_File_Info/").match: [allarrays.repair], | ||
} | ||
|
||
log = logging.getLogger(__name__) | ||
|
||
|
||
def run( | ||
base_url: str, | ||
username: str, | ||
password: str, | ||
verify_host_certs: bool = True, | ||
log_filepath: Union[str, None] = None, | ||
log_level: int = logging.INFO, | ||
): | ||
configure_logging(filepath=log_filepath, log_level=log_level) | ||
log.info("starting CLI processing") | ||
host = Host(password, base_url, username, verify_host_certs) | ||
for document in query_registry_db(host, {"match_all": {}}, {}): | ||
id = document["_id"] | ||
src = document["_source"] | ||
repairs = {} | ||
log.debug(f"working on document: {id}") | ||
for fieldname, data in src.items(): | ||
for regex, funcs in REPAIR_TOOLS.items(): | ||
if regex(fieldname): | ||
for func in funcs: | ||
repairs.update(func(src, fieldname)) | ||
if repairs: | ||
log.info(f"Writing repairs to document: {id}") | ||
write_updated_docs(host, {id: repairs}) | ||
return |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
from pds.registrysweepers.repairkit import run | ||
from pds.registrysweepers.utils import parse_args | ||
|
||
args = parse_args(description="sweep through the registry documents and fix common errors") | ||
|
||
run( | ||
base_url=args.base_URL, | ||
username=args.username, | ||
password=args.password, | ||
verify_host_certs=not args.insecure, | ||
log_level=args.log_level, | ||
log_filepath=args.log_file, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
"""change single strings to array of strings""" | ||
import logging | ||
from typing import Dict | ||
|
||
log = logging.getLogger(__name__) | ||
|
||
|
||
def repair(document: Dict, fieldname: str) -> Dict: | ||
log.debug(f"checking {fieldname}") | ||
if isinstance(document[fieldname], str): | ||
log.info(f"found string for {fieldname} where it should be an array") | ||
return {fieldname: [document[fieldname]]} | ||
return {} |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import unittest | ||
|
||
from pds.registrysweepers.repairkit import allarrays | ||
|
||
|
||
class AllArrays(unittest.TestCase): | ||
def test_valid_field(self): | ||
src = {"apple": ["orange"]} | ||
repair = allarrays.repair(src, "apple") | ||
self.assertEqual({}, repair) | ||
|
||
def test_invalid_field(self): | ||
src = {"apple": "orange"} | ||
repair = allarrays.repair(src, "apple") | ||
self.assertEqual({"apple": ["orange"]}, repair) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |