Added llmclient dependency (#175)

Future-House · Dec 12, 2024 · 86fef65 · 86fef65
1 parent abf65c3
commit 86fef65
Show file tree

Hide file tree

Showing 26 changed files with 768 additions and 1,721 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -72,7 +72,6 @@ repos:
           - fastapi>=0.109 # Match pyproject.toml
           - fhaviary>=0.8.2 # Match pyproject.toml
           - httpx
-          - litellm>=1.40.15,!=1.49.4,!=1.49.5,!=1.49.6 # Match pyproject.toml
           - numpy>=1.20 # Match pyproject.toml
           - pydantic~=2.0 # Match pyproject.toml
           - tenacity

diff --git a/ldp/alg/optimizer/ape.py b/ldp/alg/optimizer/ape.py
@@ -7,6 +7,8 @@
 from typing import Any, Self, cast
 
 from aviary.core import Message
+from llmclient import LLMResult
+from llmclient import MultipleCompletionLLMModel as LLMModel
 from pydantic import (
     BaseModel,
     ConfigDict,
@@ -21,7 +23,6 @@
 from ldp.alg.optimizer.opt import Optimizer
 from ldp.data_structures import Trajectory
 from ldp.graph import LLMCallOp, OpResult, PromptOp
-from ldp.llms import LLMModel, LLMResult
 
 logger = logging.getLogger(__name__)
 
@@ -285,7 +286,7 @@ async def _get_updated_prompt(
                 )
             ),
         ]
-        result = await self.llm.call(messages, output_type=OutputPrompt)
+        result = await self.llm.call_single(messages, output_type=OutputPrompt)
         message_content = cast(str, cast(list[Message], result.messages)[-1].content)
         try:
             return OutputPrompt.model_validate_json(message_content).prompt

diff --git a/ldp/graph/common_ops.py b/ldp/graph/common_ops.py
@@ -13,16 +13,15 @@
 import numpy as np
 import tree
 from aviary.core import Message, Tool, ToolRequestMessage, is_coroutine_callable
-from pydantic import BaseModel
-
-from ldp.llms import (
+from llmclient import (
     EmbeddingModel,
     HybridEmbeddingModel,
-    LiteEmbeddingModel,
-    LLMModel,
+    LiteLLMEmbeddingModel,
     LLMResult,
     SparseEmbeddingModel,
 )
+from llmclient import MultipleCompletionLLMModel as LLMModel
+from pydantic import BaseModel
 
 from .gradient_estimators import assign_constant_grads
 from .memory import Memory, MemoryModel, UIndexMemoryModel
@@ -254,7 +253,9 @@ async def forward(
             # if no tools are provided, tool_choice must be 'none'
             tool_choice = "none"
 
-        result = await model.call(messages=msgs, tools=tools, tool_choice=tool_choice)
+        result = await model.call_single(
+            messages=msgs, tools=tools, tool_choice=tool_choice
+        )
         if result.messages is None:
             raise ValueError("No messages returned")
 
@@ -306,7 +307,7 @@ async def compute_logprob(
         # are consistent - not sure we'd be sampling from the same distribution as N independent samples.
         # TODO: think about whether sampling params besides temperature need to be accounted for, like top_p
         results = await asyncio.gather(*[
-            model.call(temperature=1, **model_kwargs)
+            model.call_single(temperature=1, **model_kwargs)
             for _ in range(self.num_samples_partition_estimate)
         ])
         temp_factor = 1.0 / temperature - 1.0
@@ -406,18 +407,18 @@ def __init__(
         emb_models: list[EmbeddingModel] = []
         if dense_embedding_dim > 0:
             emb_models.append(
-                LiteEmbeddingModel(
+                LiteLLMEmbeddingModel(
                     name=dense_embedding,
-                    dimensions=dense_embedding_dim,
+                    ndim=dense_embedding_dim,
                     embed_kwargs=embedding_model_kwargs,
                 )
             )
         if sparse_embedding_dim > 0:
-            emb_models.append(SparseEmbeddingModel(dimensions=sparse_embedding_dim))
+            emb_models.append(SparseEmbeddingModel(ndim=sparse_embedding_dim))
         self.embedding = HybridEmbeddingModel(models=emb_models)
 
     async def forward(self, string_input: str) -> np.ndarray:
-        return await self.embedding.embed_text(string_input)
+        return np.array(await self.embedding.embed_document(string_input))
 
     @classmethod
     def backward(

diff --git a/ldp/graph/memory.py b/ldp/graph/memory.py
@@ -7,6 +7,7 @@
 
 import numpy as np
 import numpy.typing as npt
+from llmclient import EmbeddingModel
 from pydantic import (
     BaseModel,
     ConfigDict,
@@ -18,8 +19,6 @@
 )
 from usearch.index import Index
 
-from ldp.llms import EmbeddingModel
-
 if TYPE_CHECKING:
     from .common_ops import MemoryOp
     from .op_utils import CallID
@@ -128,7 +127,7 @@ def enforce_empty(cls, v: dict) -> dict:
 
     async def add_memory(self, memory: Memory) -> None:
         key = await self._add_to_index(
-            embedding=await self.embedding_model.embed_text(memory.query)
+            embedding=np.array(await self.embedding_model.embed_document(memory.query))
         )
         self.memories[key] = memory
 
@@ -138,7 +137,8 @@ async def get_memory(
         self, query: str, matches: int = DEFAULT_MEMORY_MATCHES
     ) -> list[Memory]:
         return await self._search_index(
-            embedding=await self.embedding_model.embed_text(query), matches=matches
+            embedding=np.array(await self.embedding_model.embed_document(query)),
+            matches=matches,
         )
 
     def __len__(self) -> int:
@@ -148,7 +148,7 @@ def __len__(self) -> int:
     async def safe_access_index(self) -> AsyncIterator[TIndex]:
         """Get the internal Index under the protection of an internal Lock."""
         # pylint bug, SEE: https://github.com/pylint-dev/pylint/issues/9813
-        async with self._index_lock:  # pylint: disable=not-async-context-manager
+        async with self._index_lock:
             yield self._index
 
     @abstractmethod
@@ -167,9 +167,9 @@ class UIndexMemoryModel(MemoryModel[Index]):
 
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
-        if not self.embedding_model.dimensions:
+        if not self.embedding_model.ndim:
             raise TypeError("Specify dimensions to the embedding model.")
-        self._index = Index(ndim=self.embedding_model.dimensions)
+        self._index = Index(ndim=self.embedding_model.ndim)
 
     async def _add_to_index(self, embedding: np.ndarray) -> int:
         async with self.safe_access_index() as index:

diff --git a/ldp/graph/modules/reflect.py b/ldp/graph/modules/reflect.py
@@ -5,8 +5,7 @@
 
 from ldp.graph import ConfigOp, FxnOp, LLMCallOp, PromptOp, compute_graph
 from ldp.graph.ops import ResultOrValue
-from ldp.llms import append_to_sys
-from ldp.llms.prompts import indent_xml
+from ldp.llms import append_to_sys, indent_xml
 
 
 class ReflectModuleConfig(BaseModel):

diff --git a/ldp/llms/__init__.py b/ldp/llms/__init__.py
@@ -1,21 +1,24 @@
-from .chat import (
-    JSONSchemaValidationError,
-    LLMModel,
+from llmclient import (
     LLMResult,
-    MultipleCompletionLLMModel,
     sum_logprobs,
     validate_json_completion,
 )
-from .embeddings import (
+from llmclient import MultipleCompletionLLMModel as LLMModel
+from llmclient.embeddings import (
     EmbeddingModel,
     EmbeddingModes,
     HybridEmbeddingModel,
-    LiteEmbeddingModel,
+    LiteLLMEmbeddingModel,
     SparseEmbeddingModel,
 )
+from llmclient.exceptions import (
+    JSONSchemaValidationError,
+)
+
 from .prompts import (
     append_to_messages,
     append_to_sys,
+    indent_xml,
     prepend_sys,
     prepend_sys_and_append_sys,
 )
@@ -27,11 +30,11 @@
     "JSONSchemaValidationError",
     "LLMModel",
     "LLMResult",
-    "LiteEmbeddingModel",
-    "MultipleCompletionLLMModel",
+    "LiteLLMEmbeddingModel",
     "SparseEmbeddingModel",
     "append_to_messages",
     "append_to_sys",
+    "indent_xml",
     "prepend_sys",
     "prepend_sys_and_append_sys",
     "sum_logprobs",