From 7609846a67d1f006c270d8d31b7f13bd219a92da Mon Sep 17 00:00:00 2001 From: Griffin Roupe Date: Fri, 20 May 2022 15:40:40 -0400 Subject: [PATCH 01/18] Adds full-text search method to GraphInterface --- PLATER/services/util/graph_adapter.py | 52 +++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/PLATER/services/util/graph_adapter.py b/PLATER/services/util/graph_adapter.py index 842832e..de8d855 100644 --- a/PLATER/services/util/graph_adapter.py +++ b/PLATER/services/util/graph_adapter.py @@ -1,5 +1,6 @@ import base64 import traceback +import re import httpx @@ -44,6 +45,57 @@ def find_biolink_leaves(self, biolink_concepts: list): leaf_set = all_concepts - ancestry_set return leaf_set + def search(self, query, indexes, fields=None, options={ + "prefix_search": False + }): + """ + Execute a query against the graph's RediSearch indexes + :param query: Search query. + :type query: str + :param indexes: List of indexes to search against. + :type indexes: list + :param [fields]: List of properties to search against. If none, searches all fields. Note that this argument is unimplemneted and will be ignored. + :type [fields]: list + :param [options]: Additional configuration options specifying how the search should be executed against the graph. + :type [options]: dict + :return: List of nodes and search scores + :rtype: List[dict] + """ + prefix_search = options.get("prefix_search", False) + # It seems that stop words and token characters don't tokenize properly and simply break within + # redisgraph's current RediSearch implementation (https://github.com/RedisGraph/RedisGraph/issues/1638) + stop_words = [ + 'a', 'is', 'the', 'an', 'and', 'are', 'as', 'at', 'be', 'but', 'by', 'for', 'if', 'in', 'into', 'it', + 'no', 'not', 'of', 'on', 'or', 'such', 'that', 'their', 'then', 'there', 'these', 'they', 'this', 'to', + 'was', 'will', 'with' + ] + token_chars = [ + ',', '.', '<', '>', '{', '}', '[', ']', '"', "'", ':', ';', '!', '@', '#', '$', '%', '^', '&', '*', '(', + ')', '-', '+', '=', '~' + ] + re_stop_words = r"\b(" + "|".join(stop_words) + r")\b\s*" + re_token_chars = "[" + re.escape("".join(token_chars)) + "]" + cleaned_query = re.sub(re_stop_words, "", query) + cleaned_query = re.sub(re_token_chars, " ", cleaned_query) + if prefix_search: cleaned_query += "*" + # Have to execute multi-index searches in a rudimentary way due to the limitations of redisearch in redisgraph. + statements = [ + f""" + CALL db.idx.fulltext.queryNodes('{index}', '{cleaned_query}') + YIELD node, score + RETURN node, score + """ + for index in indexes + ] + query = "UNION".join(statements) + logger.info(f"starting search query {query} on graph...") + logger.debug(f"cleaned query: {cleaned_query}") + result = self.driver.run_sync(query) + hits = self.convert_to_dict(result) + for hit in hits: + hit["node"] = dict(dict(hit["node"])["properties"]) + return hits + def get_schema(self, force_update=False): """ Gets the schema of the graph. To be used by. Also generates graph summary From 39f893b8bd77e98781759c339f96d5abd91a2d5a Mon Sep 17 00:00:00 2001 From: Griffin Roupe Date: Fri, 20 May 2022 16:28:41 -0400 Subject: [PATCH 02/18] Strip search query to avoid syntax errors triggered by prefix_search --- PLATER/services/util/graph_adapter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/PLATER/services/util/graph_adapter.py b/PLATER/services/util/graph_adapter.py index de8d855..93c49a6 100644 --- a/PLATER/services/util/graph_adapter.py +++ b/PLATER/services/util/graph_adapter.py @@ -77,6 +77,7 @@ def search(self, query, indexes, fields=None, options={ re_token_chars = "[" + re.escape("".join(token_chars)) + "]" cleaned_query = re.sub(re_stop_words, "", query) cleaned_query = re.sub(re_token_chars, " ", cleaned_query) + cleaned_query = cleaned_query.strip() if prefix_search: cleaned_query += "*" # Have to execute multi-index searches in a rudimentary way due to the limitations of redisearch in redisgraph. statements = [ From 71762b45a5c467cced2480ad6dad049dd86a5cbc Mon Sep 17 00:00:00 2001 From: Griffin Roupe Date: Fri, 20 May 2022 17:12:22 -0400 Subject: [PATCH 03/18] Adds result limit option to limit search results --- PLATER/services/util/graph_adapter.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/PLATER/services/util/graph_adapter.py b/PLATER/services/util/graph_adapter.py index 93c49a6..9d9aa82 100644 --- a/PLATER/services/util/graph_adapter.py +++ b/PLATER/services/util/graph_adapter.py @@ -46,7 +46,8 @@ def find_biolink_leaves(self, biolink_concepts: list): return leaf_set def search(self, query, indexes, fields=None, options={ - "prefix_search": False + "prefix_search": False, + "query_limit": 50 }): """ Execute a query against the graph's RediSearch indexes @@ -62,6 +63,7 @@ def search(self, query, indexes, fields=None, options={ :rtype: List[dict] """ prefix_search = options.get("prefix_search", False) + query_limit = options.get("query_limit", 50) # It seems that stop words and token characters don't tokenize properly and simply break within # redisgraph's current RediSearch implementation (https://github.com/RedisGraph/RedisGraph/issues/1638) stop_words = [ @@ -79,12 +81,29 @@ def search(self, query, indexes, fields=None, options={ cleaned_query = re.sub(re_token_chars, " ", cleaned_query) cleaned_query = cleaned_query.strip() if prefix_search: cleaned_query += "*" + # Have to execute multi-index searches in a rudimentary way due to the limitations of redisearch in redisgraph. + # Divide the query limit evenly between each statement so that, for example, if a user searches two indexes for a term, + # they won't end up with 50 results from the first index and 0 from the second because the query limit is 50. + # Instead they'll get 25 from the first index, and 25 from the second. + per_statement_limit = query_limit // len(indexes) + remainder = query_limit % len(indexes) + per_statement_limits = {index: per_statement_limit for index in indexes} + # Distribute the remainder across each statement limit. + # So that, for example, if the limit is 50 and there are 3 indexes, it'll be distributed as {index0: 17, index1: 17, index2: 16} + i = 0 + while remainder > 0: + per_statement_limits[indexes[i]] += 1 + remainder -= 1 + i += 1 + if i == len(indexes): + i = 0 statements = [ f""" CALL db.idx.fulltext.queryNodes('{index}', '{cleaned_query}') YIELD node, score RETURN node, score + LIMIT {per_statement_limits[index]} """ for index in indexes ] From a0f7c403f48acc75da9b40031e6f585d2f9ef801 Mon Sep 17 00:00:00 2001 From: Griffin Roupe Date: Fri, 20 May 2022 17:24:32 -0400 Subject: [PATCH 04/18] Adds ORDER BY to ensure best results are returned when query results are limited --- PLATER/services/util/graph_adapter.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/PLATER/services/util/graph_adapter.py b/PLATER/services/util/graph_adapter.py index 9d9aa82..fcd88cc 100644 --- a/PLATER/services/util/graph_adapter.py +++ b/PLATER/services/util/graph_adapter.py @@ -98,11 +98,14 @@ def search(self, query, indexes, fields=None, options={ i += 1 if i == len(indexes): i = 0 + # Note that although the native Lucene implementation used by Neo4j will always return hits ordered by descending score + # i.e. highest to lowest score order, RediSearch does not do this, so an ORDER BY statement is necessary. statements = [ f""" CALL db.idx.fulltext.queryNodes('{index}', '{cleaned_query}') YIELD node, score RETURN node, score + ORDER BY score, DESC LIMIT {per_statement_limits[index]} """ for index in indexes From f38386f34233f4d72a21a465c1252049a3fa1aec Mon Sep 17 00:00:00 2001 From: Griffin Roupe Date: Fri, 20 May 2022 18:27:08 -0400 Subject: [PATCH 05/18] Sort unionized hit scores for multiquery (multiple indexes) searches --- PLATER/services/util/graph_adapter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/PLATER/services/util/graph_adapter.py b/PLATER/services/util/graph_adapter.py index fcd88cc..da3a474 100644 --- a/PLATER/services/util/graph_adapter.py +++ b/PLATER/services/util/graph_adapter.py @@ -117,6 +117,7 @@ def search(self, query, indexes, fields=None, options={ hits = self.convert_to_dict(result) for hit in hits: hit["node"] = dict(dict(hit["node"])["properties"]) + hits.sort(key=lambda hit: hit["score"]) return hits def get_schema(self, force_update=False): From fe9f2f5aaf588bc8d4ed2882096455254cb3a7f6 Mon Sep 17 00:00:00 2001 From: Griffin Roupe Date: Fri, 20 May 2022 19:47:40 -0400 Subject: [PATCH 06/18] Turns str score into int --- PLATER/services/util/graph_adapter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/PLATER/services/util/graph_adapter.py b/PLATER/services/util/graph_adapter.py index da3a474..f0d803e 100644 --- a/PLATER/services/util/graph_adapter.py +++ b/PLATER/services/util/graph_adapter.py @@ -117,6 +117,7 @@ def search(self, query, indexes, fields=None, options={ hits = self.convert_to_dict(result) for hit in hits: hit["node"] = dict(dict(hit["node"])["properties"]) + hit["score"] = int(hit["score"]) hits.sort(key=lambda hit: hit["score"]) return hits From 5439cbc5a6e8507b482ebb07b852ff294b1832c4 Mon Sep 17 00:00:00 2001 From: Griffin Roupe Date: Fri, 20 May 2022 19:53:47 -0400 Subject: [PATCH 07/18] Fix desc order by statement --- PLATER/services/util/graph_adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PLATER/services/util/graph_adapter.py b/PLATER/services/util/graph_adapter.py index f0d803e..68111b9 100644 --- a/PLATER/services/util/graph_adapter.py +++ b/PLATER/services/util/graph_adapter.py @@ -105,7 +105,7 @@ def search(self, query, indexes, fields=None, options={ CALL db.idx.fulltext.queryNodes('{index}', '{cleaned_query}') YIELD node, score RETURN node, score - ORDER BY score, DESC + ORDER BY score DESC LIMIT {per_statement_limits[index]} """ for index in indexes From 22e223236e3013eb2fe7c3cd6bd9d121eeb40cdd Mon Sep 17 00:00:00 2001 From: Griffin Roupe Date: Fri, 20 May 2022 19:58:15 -0400 Subject: [PATCH 08/18] Fix post-query sorting to be descending for consistency --- PLATER/services/util/graph_adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PLATER/services/util/graph_adapter.py b/PLATER/services/util/graph_adapter.py index 68111b9..27cadc3 100644 --- a/PLATER/services/util/graph_adapter.py +++ b/PLATER/services/util/graph_adapter.py @@ -118,7 +118,7 @@ def search(self, query, indexes, fields=None, options={ for hit in hits: hit["node"] = dict(dict(hit["node"])["properties"]) hit["score"] = int(hit["score"]) - hits.sort(key=lambda hit: hit["score"]) + hits.sort(key=lambda hit: hit["score"], reverse=True) return hits def get_schema(self, force_update=False): From b645995ecc926c4fe00ebe1dd445aa4c83892364 Mon Sep 17 00:00:00 2001 From: Griffin Roupe Date: Fri, 20 May 2022 21:27:27 -0400 Subject: [PATCH 09/18] Change field int cast to float cast --- PLATER/services/util/graph_adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PLATER/services/util/graph_adapter.py b/PLATER/services/util/graph_adapter.py index 27cadc3..690d769 100644 --- a/PLATER/services/util/graph_adapter.py +++ b/PLATER/services/util/graph_adapter.py @@ -117,7 +117,7 @@ def search(self, query, indexes, fields=None, options={ hits = self.convert_to_dict(result) for hit in hits: hit["node"] = dict(dict(hit["node"])["properties"]) - hit["score"] = int(hit["score"]) + hit["score"] = float(hit["score"]) hits.sort(key=lambda hit: hit["score"], reverse=True) return hits From 9924d5182b546f14d0e00a157495a2197f421718 Mon Sep 17 00:00:00 2001 From: Griffin Roupe Date: Mon, 23 May 2022 16:15:31 -0400 Subject: [PATCH 10/18] Add levenshtein distance fuzzy searching --- PLATER/services/util/graph_adapter.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/PLATER/services/util/graph_adapter.py b/PLATER/services/util/graph_adapter.py index 690d769..bae8272 100644 --- a/PLATER/services/util/graph_adapter.py +++ b/PLATER/services/util/graph_adapter.py @@ -47,6 +47,7 @@ def find_biolink_leaves(self, biolink_concepts: list): def search(self, query, indexes, fields=None, options={ "prefix_search": False, + "levenshtein_distance": 0, "query_limit": 50 }): """ @@ -63,6 +64,7 @@ def search(self, query, indexes, fields=None, options={ :rtype: List[dict] """ prefix_search = options.get("prefix_search", False) + levenshtein_distance = options.get("levenshtein_distance", 0) query_limit = options.get("query_limit", 50) # It seems that stop words and token characters don't tokenize properly and simply break within # redisgraph's current RediSearch implementation (https://github.com/RedisGraph/RedisGraph/issues/1638) @@ -81,6 +83,7 @@ def search(self, query, indexes, fields=None, options={ cleaned_query = re.sub(re_token_chars, " ", cleaned_query) cleaned_query = cleaned_query.strip() if prefix_search: cleaned_query += "*" + elif levenshtein_distance: cleaned_query = ("%" * levenshtein_distance) + cleaned_query + ("%" * levenshtein_distance) # Have to execute multi-index searches in a rudimentary way due to the limitations of redisearch in redisgraph. # Divide the query limit evenly between each statement so that, for example, if a user searches two indexes for a term, From a3bba032787bc46a370c4f274b7caa60342dbef1 Mon Sep 17 00:00:00 2001 From: Griffin Roupe Date: Mon, 23 May 2022 16:49:56 -0400 Subject: [PATCH 11/18] Add fix for multiple spaces in a row messing up search --- PLATER/services/util/graph_adapter.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/PLATER/services/util/graph_adapter.py b/PLATER/services/util/graph_adapter.py index bae8272..b337720 100644 --- a/PLATER/services/util/graph_adapter.py +++ b/PLATER/services/util/graph_adapter.py @@ -81,6 +81,8 @@ def search(self, query, indexes, fields=None, options={ re_token_chars = "[" + re.escape("".join(token_chars)) + "]" cleaned_query = re.sub(re_stop_words, "", query) cleaned_query = re.sub(re_token_chars, " ", cleaned_query) + # Replace more than 1 consecutive space with just 1 space, since multi-spaces can mess up the search alg. + cleaned_query = re.sub(" +", " ", cleaned_query) cleaned_query = cleaned_query.strip() if prefix_search: cleaned_query += "*" elif levenshtein_distance: cleaned_query = ("%" * levenshtein_distance) + cleaned_query + ("%" * levenshtein_distance) From 63e57e77551654742a805944ee68608c49fa1959 Mon Sep 17 00:00:00 2001 From: Griffin Roupe Date: Mon, 23 May 2022 17:05:01 -0400 Subject: [PATCH 12/18] Remove LD for now --- PLATER/services/util/graph_adapter.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/PLATER/services/util/graph_adapter.py b/PLATER/services/util/graph_adapter.py index b337720..e2af8d6 100644 --- a/PLATER/services/util/graph_adapter.py +++ b/PLATER/services/util/graph_adapter.py @@ -81,11 +81,16 @@ def search(self, query, indexes, fields=None, options={ re_token_chars = "[" + re.escape("".join(token_chars)) + "]" cleaned_query = re.sub(re_stop_words, "", query) cleaned_query = re.sub(re_token_chars, " ", cleaned_query) - # Replace more than 1 consecutive space with just 1 space, since multi-spaces can mess up the search alg. + # Replace more than 1 consecutive space with just 1 space. cleaned_query = re.sub(" +", " ", cleaned_query) cleaned_query = cleaned_query.strip() if prefix_search: cleaned_query += "*" - elif levenshtein_distance: cleaned_query = ("%" * levenshtein_distance) + cleaned_query + ("%" * levenshtein_distance) + # elif levenshtein_distance: + # # Enforced maximum LD by Redisearch. + # if levenshtein_distance > 3: levenshtein_distance = 3 + # levenshtein_str = "%" * levenshtein_distance + # cleaned_query = levenshtein_str + re.sub(" ", levenshtein_str + " " + levenshtein_str, cleaned_query) + levenshtein_str + # elif levenshtein_distance: cleaned_query = ("%" * levenshtein_distance) + cleaned_query + ("%" * levenshtein_distance) # Have to execute multi-index searches in a rudimentary way due to the limitations of redisearch in redisgraph. # Divide the query limit evenly between each statement so that, for example, if a user searches two indexes for a term, From 5241d181dc9378336248e8334512707ff517059c Mon Sep 17 00:00:00 2001 From: Griffin Roupe Date: Tue, 31 May 2022 12:53:02 -0400 Subject: [PATCH 13/18] Add optional cypher postprocessing option --- PLATER/services/util/graph_adapter.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/PLATER/services/util/graph_adapter.py b/PLATER/services/util/graph_adapter.py index e2af8d6..96ba8e9 100644 --- a/PLATER/services/util/graph_adapter.py +++ b/PLATER/services/util/graph_adapter.py @@ -47,8 +47,9 @@ def find_biolink_leaves(self, biolink_concepts: list): def search(self, query, indexes, fields=None, options={ "prefix_search": False, + "postprocessing_cypher": "", "levenshtein_distance": 0, - "query_limit": 50 + "query_limit": 50, }): """ Execute a query against the graph's RediSearch indexes @@ -114,6 +115,7 @@ def search(self, query, indexes, fields=None, options={ f""" CALL db.idx.fulltext.queryNodes('{index}', '{cleaned_query}') YIELD node, score + {postprocessing_cypher} RETURN node, score ORDER BY score DESC LIMIT {per_statement_limits[index]} From c30a0def6a0fbc9d17dbc49e9337c0366adde462 Mon Sep 17 00:00:00 2001 From: Griffin Roupe Date: Tue, 31 May 2022 12:56:43 -0400 Subject: [PATCH 14/18] Fix cypher postprocess --- PLATER/services/util/graph_adapter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/PLATER/services/util/graph_adapter.py b/PLATER/services/util/graph_adapter.py index 96ba8e9..ec474c9 100644 --- a/PLATER/services/util/graph_adapter.py +++ b/PLATER/services/util/graph_adapter.py @@ -65,6 +65,7 @@ def search(self, query, indexes, fields=None, options={ :rtype: List[dict] """ prefix_search = options.get("prefix_search", False) + postprocessing_cypher = options.get("postprocessing_cypher", "") levenshtein_distance = options.get("levenshtein_distance", 0) query_limit = options.get("query_limit", 50) # It seems that stop words and token characters don't tokenize properly and simply break within From 67fd769b42229979ca51c5abfa0cb3473ae2542a Mon Sep 17 00:00:00 2001 From: Griffin Roupe Date: Wed, 1 Jun 2022 14:08:20 -0400 Subject: [PATCH 15/18] Add label data to hits --- PLATER/services/util/graph_adapter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/PLATER/services/util/graph_adapter.py b/PLATER/services/util/graph_adapter.py index ec474c9..7dad210 100644 --- a/PLATER/services/util/graph_adapter.py +++ b/PLATER/services/util/graph_adapter.py @@ -129,6 +129,7 @@ def search(self, query, indexes, fields=None, options={ result = self.driver.run_sync(query) hits = self.convert_to_dict(result) for hit in hits: + hit["labels"] = dict(hit["node"])["labels"] hit["node"] = dict(dict(hit["node"])["properties"]) hit["score"] = float(hit["score"]) hits.sort(key=lambda hit: hit["score"], reverse=True) From 2adca0994e7b3cd4c869e8772c7725fdbf4b99ef Mon Sep 17 00:00:00 2001 From: Griffin Roupe Date: Wed, 1 Jun 2022 14:56:27 -0400 Subject: [PATCH 16/18] Add LD back after further consideration --- PLATER/services/util/graph_adapter.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/PLATER/services/util/graph_adapter.py b/PLATER/services/util/graph_adapter.py index 7dad210..600287e 100644 --- a/PLATER/services/util/graph_adapter.py +++ b/PLATER/services/util/graph_adapter.py @@ -87,12 +87,11 @@ def search(self, query, indexes, fields=None, options={ cleaned_query = re.sub(" +", " ", cleaned_query) cleaned_query = cleaned_query.strip() if prefix_search: cleaned_query += "*" - # elif levenshtein_distance: - # # Enforced maximum LD by Redisearch. - # if levenshtein_distance > 3: levenshtein_distance = 3 - # levenshtein_str = "%" * levenshtein_distance - # cleaned_query = levenshtein_str + re.sub(" ", levenshtein_str + " " + levenshtein_str, cleaned_query) + levenshtein_str - # elif levenshtein_distance: cleaned_query = ("%" * levenshtein_distance) + cleaned_query + ("%" * levenshtein_distance) + elif levenshtein_distance: + # Enforced maximum LD by Redisearch. + if levenshtein_distance > 3: levenshtein_distance = 3 + levenshtein_str = "%" * levenshtein_distance # e.g. LD = 3; "short phrase" => "%%%short%%% %%%phrase%%%" + cleaned_query = levenshtein_str + re.sub(" ", levenshtein_str + " " + levenshtein_str, cleaned_query) + levenshtein_str # Have to execute multi-index searches in a rudimentary way due to the limitations of redisearch in redisgraph. # Divide the query limit evenly between each statement so that, for example, if a user searches two indexes for a term, From 93f0a78af9360fd895dc315c416fa7e33055caa6 Mon Sep 17 00:00:00 2001 From: Griffin Roupe Date: Thu, 2 Jun 2022 13:46:52 -0400 Subject: [PATCH 17/18] Return distinct nodes to avoid duplicates stemming from non-matching postprocessing cypher statements --- PLATER/services/util/graph_adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PLATER/services/util/graph_adapter.py b/PLATER/services/util/graph_adapter.py index 3d91838..4ebb3ca 100644 --- a/PLATER/services/util/graph_adapter.py +++ b/PLATER/services/util/graph_adapter.py @@ -116,7 +116,7 @@ def search(self, query, indexes, fields=None, options={ CALL db.idx.fulltext.queryNodes('{index}', '{cleaned_query}') YIELD node, score {postprocessing_cypher} - RETURN node, score + RETURN distinct(node), score ORDER BY score DESC LIMIT {per_statement_limits[index]} """ From 765a1e979f110b5d2b7a103a1891ae7ad8eaefd5 Mon Sep 17 00:00:00 2001 From: Griffin Roupe Date: Wed, 8 Jun 2022 12:21:15 -0400 Subject: [PATCH 18/18] Return hits & search terms --- PLATER/services/util/graph_adapter.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/PLATER/services/util/graph_adapter.py b/PLATER/services/util/graph_adapter.py index 4ebb3ca..d20fffb 100644 --- a/PLATER/services/util/graph_adapter.py +++ b/PLATER/services/util/graph_adapter.py @@ -86,8 +86,9 @@ def search(self, query, indexes, fields=None, options={ # Replace more than 1 consecutive space with just 1 space. cleaned_query = re.sub(" +", " ", cleaned_query) cleaned_query = cleaned_query.strip() + search_terms = cleaned_query.split(" ") if prefix_search: cleaned_query += "*" - elif levenshtein_distance: + if levenshtein_distance: # Enforced maximum LD by Redisearch. if levenshtein_distance > 3: levenshtein_distance = 3 levenshtein_str = "%" * levenshtein_distance # e.g. LD = 3; "short phrase" => "%%%short%%% %%%phrase%%%" @@ -132,7 +133,10 @@ def search(self, query, indexes, fields=None, options={ hit["node"] = dict(dict(hit["node"])["properties"]) hit["score"] = float(hit["score"]) hits.sort(key=lambda hit: hit["score"], reverse=True) - return hits + return { + "hits": hits, + "search_terms": search_terms + } def get_schema(self, force_update=False): """