Merge pull request stanfordnlp#1026 from ofermend/vectara_retriever_u…

…pdate support multiple corpora in vectara retriever
inayet · May 15, 2024 · 17d76b7 · 17d76b7
2 parents 05b3557 + c74b45e
commit 17d76b7
Showing 1 changed file with 12 additions and 8 deletions.
diff --git a/dspy/retrieve/vectara_rm.py b/dspy/retrieve/vectara_rm.py
@@ -18,7 +18,7 @@ class VectaraRM(dspy.Retrieve):
     """
     A retrieval module that uses Vectara to return the top passages for a given query.
 
-    Assumes that a Vectara corpus has been created and populated with the following payload:
+    Assumes that a Vectara corpora have been created and populated with the following payload:
         - document: The text of the passage
 
     Args:
@@ -67,17 +67,21 @@ def __init__(
     def _vectara_query(
         self,
         query: str,
-        limit: int = 3,
+        limit: int = 5,
     ) -> List[str]:
         """Query Vectara index to get for top k matching passages.
         Args:
             query: query string
         """
-        corpus_key = {
-            "customerId": self._vectara_customer_id,
-            "corpusId": self._vectara_corpus_id,
-            "lexicalInterpolationConfig": {"lambda": 0.025 },
-        }
+        # If multiple corpus ids are provided (comma-separated), create a list of corpus keys
+        # otherwise by default, the `split(',')` is a no-op so retains the single corpus id
+        corpus_key = [
+            {
+                "customerId": self._vectara_customer_id,
+                "corpusId": corpus_id,
+                "lexicalInterpolationConfig": {"lambda": 0.025 },
+            } for corpus_id in self._vectara_corpus_id.split(',')
+        ]
 
         data = {
             "query": [
@@ -91,7 +95,7 @@ def _vectara_query(
                         "startTag": START_SNIPPET,
                         "endTag": END_SNIPPET,
                     },
-                    "corpusKey": [corpus_key],
+                    "corpusKey": corpus_key,
                 },
             ],
         }