From 76137122a484213220131f662417311867554fc8 Mon Sep 17 00:00:00 2001 From: Morgan Gallant Date: Thu, 6 Jun 2024 08:38:55 -0700 Subject: [PATCH] Some smaller updates (#27) * add test for specifying schema in row-based upsert Signed-off-by: Morgan Gallant * allow int attribute values Signed-off-by: Morgan Gallant * grrr Signed-off-by: Morgan Gallant * bump to v0.1.13 Signed-off-by: Morgan Gallant --------- Signed-off-by: Morgan Gallant --- pyproject.toml | 2 +- tests/test_bm25.py | 23 +++++++++++++++++++++++ turbopuffer/namespace.py | 8 ++++---- turbopuffer/vectors.py | 4 ++-- 4 files changed, 30 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3f503f6..964913c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "turbopuffer" -version = "0.1.11" +version = "0.1.13" description = "Python Client for accessing the turbopuffer API" authors = ["turbopuffer Inc. "] homepage = "https://turbopuffer.com" diff --git a/tests/test_bm25.py b/tests/test_bm25.py index 3b7b67a..1950f13 100644 --- a/tests/test_bm25.py +++ b/tests/test_bm25.py @@ -66,3 +66,26 @@ def test_bm25(): rank_by=["Sum", [["blabla", "BM25", "walrus tusk"], ["blabla", "BM25", "jumping fox"]]] ) assert [item.id for item in results] == [2, 6] + + # Upsert with row-based upsert format + ns.upsert( + [ + tpuf.VectorRow(id=8, vector=[0.8, 0.8], attributes={ "blabla": "row based upsert format is cool" }), + ], + schema=schema, + ) + + # Upsert with the dict format + ns.upsert( + [ + {'id': 9, 'vector': [0.9, 0.9], 'attributes': {"blabla": "dict format of row based upsert also works, but isn't typed as well"}}, + ], + schema=schema, + ) + + # Query to make sure the new row(s) is there + results = ns.query({ + "top_k": 10, + "rank_by": ["blabla", "BM25", "row based upsert"] + }) + assert [item.id for item in results] == [8, 9] diff --git a/turbopuffer/namespace.py b/turbopuffer/namespace.py index d76cd23..4bc4096 100644 --- a/turbopuffer/namespace.py +++ b/turbopuffer/namespace.py @@ -100,7 +100,7 @@ def created_at(self) -> Optional[datetime]: def upsert(self, ids: Union[List[int], List[str]], vectors: List[List[float]], - attributes: Optional[Dict[str, List[Optional[str]]]] = None, + attributes: Optional[Dict[str, List[Optional[Union[str, int]]]]] = None, schema: Optional[Dict] = None, distance_metric: Optional[str] = None) -> None: """ @@ -112,7 +112,7 @@ def upsert(self, ... @overload - def upsert(self, data: Union[dict, VectorColumns], distance_metric: Optional[str] = None) -> None: + def upsert(self, data: Union[dict, VectorColumns], distance_metric: Optional[str] = None, schema: Optional[Dict] = None) -> None: """ Creates or updates multiple vectors provided in a column-oriented layout. If this call succeeds, data is guaranteed to be durably written to object storage. @@ -123,7 +123,7 @@ def upsert(self, data: Union[dict, VectorColumns], distance_metric: Optional[str @overload def upsert(self, data: Union[Iterable[dict], Iterable[VectorRow]], - distance_metric: Optional[str] = None) -> None: + distance_metric: Optional[str] = None, schema: Optional[Dict] = None) -> None: """ Creates or updates a multiple vectors provided as a list or iterator. If this call succeeds, data is guaranteed to be durably written to object storage. @@ -134,7 +134,7 @@ def upsert(self, data: Union[Iterable[dict], Iterable[VectorRow]], @overload def upsert(self, data: VectorResult, - distance_metric: Optional[str] = None) -> None: + distance_metric: Optional[str] = None, schema: Optional[Dict] = None) -> None: """ Creates or updates multiple vectors. If this call succeeds, data is guaranteed to be durably written to object storage. diff --git a/turbopuffer/vectors.py b/turbopuffer/vectors.py index c3efb16..23a4315 100644 --- a/turbopuffer/vectors.py +++ b/turbopuffer/vectors.py @@ -27,7 +27,7 @@ class VectorRow: id: Union[int, str] vector: Optional[List[float]] = None - attributes: Optional[Dict[str, Optional[str]]] = None + attributes: Optional[Dict[str, Optional[Union[str, int]]]] = None dist: Optional[float] = None @@ -75,7 +75,7 @@ class VectorColumns: ids: Union[List[int], List[str]] vectors: List[Optional[List[float]]] - attributes: Optional[Dict[str, List[Optional[str]]]] = None + attributes: Optional[Dict[str, List[Optional[Union[str, int]]]]] = None distances: Optional[List[float]] = None