Skip to content

Commit

Permalink
Fixed NumpyVectorStore.__eq__'s NotImplemented case (#613)
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesbraza authored Oct 19, 2024
1 parent 701e6a9 commit b2faca3
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 5 deletions.
14 changes: 11 additions & 3 deletions paperqa/llms.py
Original file line number Diff line number Diff line change
Expand Up @@ -845,13 +845,21 @@ class NumpyVectorStore(VectorStore):
_embeddings_matrix: np.ndarray | None = None

def __eq__(self, other) -> bool:
if isinstance(other, type(self)):
raise NotImplementedError
if not isinstance(other, type(self)):
return NotImplemented
return (
self.texts == other.texts
and self.texts_hashes == other.texts_hashes
and self.mmr_lambda == other.mmr_lambda
and self._embeddings_matrix == other._embeddings_matrix
and (
other._embeddings_matrix is None
if self._embeddings_matrix is None
else (
False
if other._embeddings_matrix is None
else np.allclose(self._embeddings_matrix, other._embeddings_matrix)
)
)
)

def clear(self) -> None:
Expand Down
33 changes: 31 additions & 2 deletions tests/test_paperqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pickle
import textwrap
from collections.abc import AsyncIterable
from copy import deepcopy
from io import BytesIO
from pathlib import Path

Expand Down Expand Up @@ -610,7 +611,7 @@ def test_duplicate(stub_data_dir: Path) -> None:
), "Unique documents should be hashed as unique"


def test_custom_embedding(stub_data_dir: Path) -> None:
def test_docs_with_custom_embedding(subtests: SubTests, stub_data_dir: Path) -> None:
class MyEmbeds(EmbeddingModel):
name: str = "my_embed"

Expand All @@ -625,7 +626,35 @@ async def embed_documents(self, texts):
citation="WikiMedia Foundation, 2023, Accessed now",
embedding_model=MyEmbeds(),
)
assert docs.texts[0].embedding == [1, 2, 3]
with subtests.test(msg="confirm-embedding"):
assert docs.texts[0].embedding == [1, 2, 3]

with subtests.test(msg="copying-before-get-evidence"):
# Before getting evidence, shallow and deep copies are the same
docs_shallow_copy = Docs(
texts_index=type(docs.texts_index)(**docs.texts_index.model_dump()),
**docs.model_dump(exclude={"texts_index"}),
)
docs_deep_copy = deepcopy(docs)
assert (
docs.texts_index
== docs_shallow_copy.texts_index
== docs_deep_copy.texts_index
)

with subtests.test(msg="copying-after-get-evidence"):
# After getting evidence, a shallow copy of Docs is not the same because its
# texts index gets lazily populated, while a deep copy should preserve it
docs.get_evidence(
"What country is Frederick Bates from?", embedding_model=MyEmbeds()
)
docs_shallow_copy = Docs(
texts_index=type(docs.texts_index)(**docs.texts_index.model_dump()),
**docs.model_dump(exclude={"texts_index"}),
)
docs_deep_copy = deepcopy(docs)
assert docs.texts_index != docs_shallow_copy.texts_index
assert docs.texts_index == docs_deep_copy.texts_index


def test_sparse_embedding(stub_data_dir: Path) -> None:
Expand Down

0 comments on commit b2faca3

Please sign in to comment.