Merge pull request #537 from cheshire-cat-ai/develop

* Fix plugin settings loading * Add `settings_model` function to get the plugin settings model * Validating plugin settings before saving them * Fix `example` deprecation warning * Support Ollama * added user_ids * Update cheshire_cat.py * Update websocket.py * Update websocket.py * Update websocket.py * Update websocket.py * Update websocket.py * updated tests * Moved path methods in utils.py and added deprecation warning for plugins * Removed get_test_plugin_path * Plugin folder as MadHatter object attribute * Mock `cat.utils.get_plugin_path()` * Get the plugin path with utils * Removed unnecessary modules * fix shared working memory overwritten each messages * Refactoring * Support Fastembed * fixed coroutine error * Update utils.py * added GET conversation history * fixed endpoint * added user_id in header * Update headers.py * fixed tests * update working memory, callback and websocket endpoint to handle notification on multiple users * Update plugin.py * Update plugin.py * Add utility to get the path of the current plugin * cat as singleton * issue way * fixed singleton * fix tests for iuser ws messages and memory recall * cat as singleton ready to go * Update auth_static.py * Update test_memory_by_user.py * hotfix plugin loader * Fix hooks before_cat_recall inverted default Default configs passed to hooks are inverted, fixed * FIX: docker build - Dockerfile: COPY whole cat folder in the resulting image - github actions for multiplatform build * Update embedder.py * changed default embedder for fastembedding * changed default embedder for sentence-transformers * Fix issue when using Fastembed (#532) * Update Dockerfile * Fixed bug with 307 http code * Revert "Merge branch 'cheshire-cat-ai:main' into embedder" This reverts commit f7a6138, reversing changes made to 3756f0c. --------- Co-authored-by: Piero Savastano <[email protected]> * Bump qdrant container * version 1.3 --------- Co-authored-by: Nicola Procopio <[email protected]> Co-authored-by: Emanuele Morrone <[email protected]> Co-authored-by: valentimarco <[email protected]> Co-authored-by: Dany <[email protected]> Co-authored-by: Nicorb <[email protected]> Co-authored-by: Riccardo Albero <[email protected]> Co-authored-by: Nicola <[email protected]> Co-authored-by: Samuele Barzaghi <[email protected]> Co-authored-by: Adelina G <[email protected]>
cheshire-cat-ai · Nov 2, 2023 · 0334330 · 0334330
2 parents 464bbe3 + ea3e13e
commit 0334330
Show file tree

Hide file tree

Showing 29 changed files with 462 additions and 231 deletions.
diff --git a/.github/workflows/tag.yml b/.github/workflows/tag.yml
@@ -36,6 +36,17 @@ jobs:
     steps:
       - name: Checkout repository
         uses: actions/checkout@v3
+
+      # Uses the `docker/setup-qemu-action@v3`
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      # Uses the `docker/setup-buildx-action@v3`
+      - name: Set up docker Buildx
+        uses: docker/setup-buildx-action@v3
+        with:
+          platforms: linux/amd64,linux/arm64
+
       # Uses the `docker/login-action`
       # action to log in to the Container registry using the account and password that will publish the packages.
       # Once published, the packages are scoped to the account defined here.

diff --git a/core/Dockerfile b/core/Dockerfile
@@ -4,7 +4,7 @@ FROM python:3.10.11-slim-bullseye
 ### PREPARE BUILD WITH NECESSARY FILES AND FOLDERS ###
 RUN mkdir -p /app && mkdir -p /admin
 COPY ./pyproject.toml /app/pyproject.toml
-COPY ./cat/plugins /app/cat/plugins
+COPY ./cat /app/cat
 COPY ./install_plugin_dependencies.py /app/install_plugin_dependencies.py
 
 ### SYSTEM SETUP ###

diff --git a/core/cat/factory/custom_embedder.py b/core/cat/factory/custom_embedder.py
@@ -64,4 +64,25 @@ def embed_query(self, text: str) -> List[float]:
         ret = httpx.post(self.url, data=payload, timeout=None)
         ret.raise_for_status()
         return ret.json()['data'][0]['embedding']
+
+class CustomFastembedEmbeddings(Embeddings):
+    """Use Fastembed for embedding.
+    """
+    def __init__(self, url, model,max_length) -> None:
+        self.url = url
+        output = httpx.post(f"{url}/embeddings", json={"model": model, "max_length": max_length}, follow_redirects=True, timeout=None)
+        output.raise_for_status()
+
+
+    def embed_documents(self, texts: List[str]):
+        payload = json.dumps({"document": texts})
+        ret = httpx.post(f"{self.url}/embeddings/document", data=payload, timeout=None)
+        ret.raise_for_status()
+        return ret.json()
+
+    def embed_query(self, text: str) -> List[float]:
+        payload = json.dumps({"prompt": text})
+        ret = httpx.post(f"{self.url}/embeddings/prompt", data=payload, timeout=None)
+        ret.raise_for_status()
+        return ret.json()
 
diff --git a/core/cat/factory/custom_llm.py b/core/cat/factory/custom_llm.py
@@ -3,6 +3,7 @@
 import requests
 from langchain.llms.base import LLM
 from langchain.llms.openai import OpenAI
+from langchain.llms.ollama import Ollama
 
 
 class LLMDefault(LLM):
@@ -86,4 +87,3 @@ def __init__(self, **kwargs):
 
         self.url = kwargs['url']
         self.openai_api_base = os.path.join(self.url, "v1")
-
diff --git a/core/cat/factory/embedder.py b/core/cat/factory/embedder.py
@@ -2,7 +2,7 @@
 import langchain
 from pydantic import BaseModel, ConfigDict
 
-from cat.factory.custom_embedder import DumbEmbedder, CustomOpenAIEmbeddings
+from cat.factory.custom_embedder import CustomFastembedEmbeddings, DumbEmbedder, CustomOpenAIEmbeddings
 
 
 # Base class to manage LLM configuration.
@@ -108,7 +108,7 @@ class EmbedderCohereConfig(EmbedderSettings):
 
 
 class EmbedderHuggingFaceHubConfig(EmbedderSettings):
-    repo_id: str = "sentence-transformers/all-MiniLM-L12-v2"
+    repo_id: str = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
     huggingfacehub_api_token: str
     _pyclass: Type = langchain.embeddings.HuggingFaceHubEmbeddings
 
@@ -119,6 +119,20 @@ class EmbedderHuggingFaceHubConfig(EmbedderSettings):
         }
     )
 
+class EmbedderFastEmbedConfig(EmbedderSettings):
+    url: str
+    model: str = "intfloat/multilingual-e5-large"
+    max_length: int = 512
+
+    _pyclass: Type = CustomFastembedEmbeddings
+
+    model_config = ConfigDict(
+        json_schema_extra = {
+            "humanReadableName": "Fast Embedder",
+            "description": "Configuration for Fast embeddings",
+        }
+    )
+
 
 SUPPORTED_EMDEDDING_MODELS = [
     EmbedderDumbConfig,
@@ -128,6 +142,7 @@ class EmbedderHuggingFaceHubConfig(EmbedderSettings):
     EmbedderAzureOpenAIConfig,
     EmbedderCohereConfig,
     EmbedderHuggingFaceHubConfig,
+    EmbedderFastEmbedConfig
 ]
 
 

diff --git a/core/cat/factory/llm.py b/core/cat/factory/llm.py
@@ -1,6 +1,7 @@
 import langchain
 from langchain.chat_models import ChatOpenAI, AzureChatOpenAI
 from langchain.llms import OpenAI, AzureOpenAI
+from langchain.llms.ollama import Ollama
 
 from typing import Dict, List, Type
 import json
@@ -272,6 +273,24 @@ class LLMGooglePalmConfig(LLMSettings):
         }
     )
 
+class LLMOllamaConfig(LLMSettings):
+    base_url: str
+    model: str = "llama2"
+    num_ctx: int = 2048
+    repeat_last_n: int = 64
+    repeat_penalty: float = 1.1
+    temperature: float = 0.8
+
+    _pyclass: Type = Ollama
+
+    model_config = ConfigDict(
+        json_schema_extra = {
+            "humanReadableName": "Ollama",
+            "description": "Configuration for Ollama",
+            "link": "https://ollama.ai/library"
+        }
+    )
+
 
 SUPPORTED_LANGUAGE_MODELS = [
     LLMDefaultConfig,
@@ -286,7 +305,8 @@ class LLMGooglePalmConfig(LLMSettings):
     LLMAzureOpenAIConfig,
     LLMAzureChatOpenAIConfig,
     LLMAnthropicConfig,
-    LLMGooglePalmConfig
+    LLMGooglePalmConfig,
+    LLMOllamaConfig
 ]
 
 # LLM_SCHEMAS contains metadata to let any client know

diff --git a/core/cat/api_auth.py → core/cat/headers.py b/core/cat/api_auth.py → core/cat/headers.py
@@ -51,3 +51,11 @@ def check_api_key(request: Request, api_key: str = Security(api_key_header)) ->
             status_code=403,
             detail={"error": "Invalid API Key"}
         )
+
+
+def check_user_id(request: Request) -> str:
+    user_id = request.headers.get("user_id")
+    if user_id:
+        return user_id
+    else:
+        return "user"
diff --git a/core/cat/looking_glass/agent_manager.py b/core/cat/looking_glass/agent_manager.py
@@ -11,6 +11,7 @@
 from cat.looking_glass import prompts
 from cat.looking_glass.callbacks import NewTokenHandler
 from cat.looking_glass.output_parser import ToolOutputParser
+from cat.memory.working_memory import WorkingMemory
 from cat.utils import verbal_timedelta
 from cat.log import log
 
@@ -72,10 +73,9 @@ def execute_tool_agent(self, agent_input, allowed_tools):
         return out
 
 
-    def execute_memory_chain(self, agent_input, prompt_prefix, prompt_suffix):
+    def execute_memory_chain(self, agent_input, prompt_prefix, prompt_suffix, working_memory: WorkingMemory):
 
         input_variables = [i for i in agent_input.keys() if i in prompt_prefix + prompt_suffix]
-
         # memory chain (second step)
         memory_prompt = PromptTemplate(
             template = prompt_prefix + prompt_suffix,
@@ -88,13 +88,13 @@ def execute_memory_chain(self, agent_input, prompt_prefix, prompt_suffix):
             verbose=True
         )
 
-        out = memory_chain(agent_input, callbacks=[NewTokenHandler(self.cat)])
+        out = memory_chain(agent_input, callbacks=[NewTokenHandler(self.cat, working_memory)])
         out["output"] = out["text"]
         del out["text"]
         return out
 
 
-    def execute_agent(self):
+    def execute_agent(self, working_memory):
         """Instantiate the Agent with tools.
 
         The method formats the main prompt and gather the allowed tools. It also instantiates a conversational Agent
@@ -106,11 +106,10 @@ def execute_agent(self):
             Instance of the Agent provided with a set of tools.
         """
         mad_hatter = self.cat.mad_hatter
-        working_memory = self.cat.working_memory
 
         # prepare input to be passed to the agent.
         #   Info will be extracted from working memory
-        agent_input = self.format_agent_input()
+        agent_input = self.format_agent_input(working_memory)
         agent_input = mad_hatter.execute_hook("before_agent_starts", agent_input)
         # should we ran the default agent?
         fast_reply = {}
@@ -161,7 +160,7 @@ def execute_agent(self):
                     agent_input["tools_output"] = "## Tools output: \n" + tools_result["output"] if tools_result["output"] else ""
 
                     # Execute the memory chain
-                    out = self.execute_memory_chain(agent_input, prompt_prefix, prompt_suffix)
+                    out = self.execute_memory_chain(agent_input, prompt_prefix, prompt_suffix, working_memory)
 
                     # If some tools are used the intermediate step are added to the agent output
                     out["intermediate_steps"] = used_tools
@@ -178,11 +177,11 @@ def execute_agent(self):
         #Adding the tools_output key in agent input, needed by the memory chain
         agent_input["tools_output"] = ""
         # Execute the memory chain
-        out = self.execute_memory_chain(agent_input, prompt_prefix, prompt_suffix)
+        out = self.execute_memory_chain(agent_input, prompt_prefix, prompt_suffix, working_memory)
 
         return out
 
-    def format_agent_input(self):
+    def format_agent_input(self, working_memory):
         """Format the input for the Agent.
 
         The method formats the strings of recalled memories and chat history that will be provided to the Langchain
@@ -206,7 +205,7 @@ def format_agent_input(self):
         agent_prompt_chat_history
         """
 
-        working_memory = self.cat.working_memory
+
 
         # format memories to be inserted in the prompt
         episodic_memory_formatted_content = self.agent_prompt_episodic_memories(

diff --git a/core/cat/looking_glass/callbacks.py b/core/cat/looking_glass/callbacks.py
@@ -4,8 +4,9 @@
 
 class NewTokenHandler(BaseCallbackHandler):
 
-    def __init__(self, cat):
+    def __init__(self, cat, working_memory):
         self.cat = cat
+        self.working_memory = working_memory
 
     def on_llm_new_token(self, token: str, **kwargs) -> None:
-        self.cat.send_ws_message(token, "chat_token")
+        self.cat.send_ws_message(token, "chat_token", self.working_memory)