Skip to content

Commit

Permalink
Merge pull request stanfordnlp#1026 from ofermend/vectara_retriever_u…
Browse files Browse the repository at this point in the history
…pdate

support multiple corpora in vectara retriever
  • Loading branch information
arnavsinghvi11 authored May 15, 2024
2 parents 05b3557 + c74b45e commit 17d76b7
Showing 1 changed file with 12 additions and 8 deletions.
20 changes: 12 additions & 8 deletions dspy/retrieve/vectara_rm.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class VectaraRM(dspy.Retrieve):
"""
A retrieval module that uses Vectara to return the top passages for a given query.
Assumes that a Vectara corpus has been created and populated with the following payload:
Assumes that a Vectara corpora have been created and populated with the following payload:
- document: The text of the passage
Args:
Expand Down Expand Up @@ -67,17 +67,21 @@ def __init__(
def _vectara_query(
self,
query: str,
limit: int = 3,
limit: int = 5,
) -> List[str]:
"""Query Vectara index to get for top k matching passages.
Args:
query: query string
"""
corpus_key = {
"customerId": self._vectara_customer_id,
"corpusId": self._vectara_corpus_id,
"lexicalInterpolationConfig": {"lambda": 0.025 },
}
# If multiple corpus ids are provided (comma-separated), create a list of corpus keys
# otherwise by default, the `split(',')` is a no-op so retains the single corpus id
corpus_key = [
{
"customerId": self._vectara_customer_id,
"corpusId": corpus_id,
"lexicalInterpolationConfig": {"lambda": 0.025 },
} for corpus_id in self._vectara_corpus_id.split(',')
]

data = {
"query": [
Expand All @@ -91,7 +95,7 @@ def _vectara_query(
"startTag": START_SNIPPET,
"endTag": END_SNIPPET,
},
"corpusKey": [corpus_key],
"corpusKey": corpus_key,
},
],
}
Expand Down

0 comments on commit 17d76b7

Please sign in to comment.