From 2bc57b563f5310db5b734b8ec717eba241cfb32c Mon Sep 17 00:00:00 2001 From: ryboselm Date: Thu, 7 Apr 2022 00:42:11 -0400 Subject: [PATCH] add clean_whitespace option to fuzzy retrieval function --- regolith/tools.py | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/regolith/tools.py b/regolith/tools.py index 3ee0d5443..50186dcc2 100644 --- a/regolith/tools.py +++ b/regolith/tools.py @@ -916,7 +916,7 @@ def document_by_value(documents, address, value): return g_doc -def fuzzy_retrieval(documents, sources, value, case_sensitive=True): +def fuzzy_retrieval(documents, sources, value, case_sensitive=True, clean_whitespace=True): """Retrieve a document from the documents where value is compared against multiple potential sources @@ -930,6 +930,8 @@ def fuzzy_retrieval(documents, sources, value, case_sensitive=True): The value to compare against to find the document of interest case_sensitive: Bool When true will match case (Default = True) + clean_whitespace: Bool + When true will strip whitespaces off front and end of string (Default = True) Returns ------- @@ -938,7 +940,7 @@ def fuzzy_retrieval(documents, sources, value, case_sensitive=True): Examples -------- - >>> fuzzy_retrieval(people, ['aka', 'name'], 'pi_name', case_sensitive = False) + >>> fuzzy_retrieval(people, ['aka', 'name'], 'pi_name', case_sensitive = False, clean_whitespace = True) This would get the person entry for which either the alias or the name was ``pi_name``. @@ -951,15 +953,28 @@ def fuzzy_retrieval(documents, sources, value, case_sensitive=True): if not isinstance(ret, list): ret = [ret] returns.extend(ret) - if not case_sensitive: - returns = [reti.lower() for reti in returns if - isinstance(reti, str)] - if isinstance(value, str): - if value.lower() in frozenset(returns): + if not clean_whitespace: + if not case_sensitive: + returns = [reti.lower() for reti in returns if + isinstance(reti, str)] + if isinstance(value, str): + if value.lower() in frozenset(returns): + return doc + else: + if value in frozenset(returns): return doc else: - if value in frozenset(returns): - return doc + if not case_sensitive: + returns = [reti.lower().strip() for reti in returns if + isinstance(reti, str)] + if isinstance(value, str): + if value.lower().strip() in frozenset(returns): + return doc + else: + returns = [reti.strip() for reti in returns if isinstance(reti, str)] + if isinstance(value, str): + if value.strip() in frozenset(returns): + return doc def number_suffix(number):