Skip to content

Commit

Permalink
Merge pull request #537 from cheshire-cat-ai/develop
Browse files Browse the repository at this point in the history
* Fix plugin settings loading

* Add `settings_model` function to get the plugin settings model

* Validating plugin settings before saving them

* Fix `example` deprecation warning

* Support Ollama

* added user_ids

* Update cheshire_cat.py

* Update websocket.py

* Update websocket.py

* Update websocket.py

* Update websocket.py

* Update websocket.py

* updated tests

* Moved path methods in utils.py and added deprecation warning for plugins

* Removed  get_test_plugin_path

* Plugin folder as MadHatter object attribute

* Mock `cat.utils.get_plugin_path()`

* Get the plugin path with utils

* Removed unnecessary modules

* fix shared working memory overwritten each messages

* Refactoring

* Support Fastembed

* fixed coroutine error

* Update utils.py

* added GET conversation history

* fixed endpoint

* added user_id in header

* Update headers.py

* fixed tests

* update working memory, callback and websocket endpoint to handle notification on multiple users

* Update plugin.py

* Update plugin.py

* Add utility to get the path of the current plugin

* cat as singleton

* issue way

* fixed singleton

* fix tests for iuser ws messages and memory recall

* cat as singleton ready to go

* Update auth_static.py

* Update test_memory_by_user.py

* hotfix plugin loader

* Fix hooks before_cat_recall inverted default

Default configs passed to hooks are inverted, fixed

* FIX: docker build

- Dockerfile: COPY whole cat folder in the resulting image
- github actions for multiplatform build

* Update embedder.py

* changed default embedder for fastembedding

* changed default embedder for sentence-transformers

* Fix issue when using Fastembed  (#532)

* Update Dockerfile

* Fixed bug with 307 http code

* Revert "Merge branch 'cheshire-cat-ai:main' into embedder"

This reverts commit f7a6138, reversing
changes made to 3756f0c.

---------

Co-authored-by: Piero Savastano <[email protected]>

* Bump qdrant container

* version 1.3

---------

Co-authored-by: Nicola Procopio <[email protected]>
Co-authored-by: Emanuele Morrone <[email protected]>
Co-authored-by: valentimarco <[email protected]>
Co-authored-by: Dany <[email protected]>
Co-authored-by: Nicorb <[email protected]>
Co-authored-by: Riccardo Albero <[email protected]>
Co-authored-by: Nicola <[email protected]>
Co-authored-by: Samuele Barzaghi <[email protected]>
Co-authored-by: Adelina G <[email protected]>
  • Loading branch information
10 people authored Nov 2, 2023
2 parents 464bbe3 + ea3e13e commit 0334330
Show file tree
Hide file tree
Showing 29 changed files with 462 additions and 231 deletions.
11 changes: 11 additions & 0 deletions .github/workflows/tag.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,17 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v3

# Uses the `docker/setup-qemu-action@v3`
- name: Set up QEMU
uses: docker/setup-qemu-action@v3

# Uses the `docker/setup-buildx-action@v3`
- name: Set up docker Buildx
uses: docker/setup-buildx-action@v3
with:
platforms: linux/amd64,linux/arm64

# Uses the `docker/login-action`
# action to log in to the Container registry using the account and password that will publish the packages.
# Once published, the packages are scoped to the account defined here.
Expand Down
2 changes: 1 addition & 1 deletion core/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ FROM python:3.10.11-slim-bullseye
### PREPARE BUILD WITH NECESSARY FILES AND FOLDERS ###
RUN mkdir -p /app && mkdir -p /admin
COPY ./pyproject.toml /app/pyproject.toml
COPY ./cat/plugins /app/cat/plugins
COPY ./cat /app/cat
COPY ./install_plugin_dependencies.py /app/install_plugin_dependencies.py

### SYSTEM SETUP ###
Expand Down
21 changes: 21 additions & 0 deletions core/cat/factory/custom_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,25 @@ def embed_query(self, text: str) -> List[float]:
ret = httpx.post(self.url, data=payload, timeout=None)
ret.raise_for_status()
return ret.json()['data'][0]['embedding']

class CustomFastembedEmbeddings(Embeddings):
"""Use Fastembed for embedding.
"""
def __init__(self, url, model,max_length) -> None:
self.url = url
output = httpx.post(f"{url}/embeddings", json={"model": model, "max_length": max_length}, follow_redirects=True, timeout=None)
output.raise_for_status()


def embed_documents(self, texts: List[str]):
payload = json.dumps({"document": texts})
ret = httpx.post(f"{self.url}/embeddings/document", data=payload, timeout=None)
ret.raise_for_status()
return ret.json()

def embed_query(self, text: str) -> List[float]:
payload = json.dumps({"prompt": text})
ret = httpx.post(f"{self.url}/embeddings/prompt", data=payload, timeout=None)
ret.raise_for_status()
return ret.json()

2 changes: 1 addition & 1 deletion core/cat/factory/custom_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import requests
from langchain.llms.base import LLM
from langchain.llms.openai import OpenAI
from langchain.llms.ollama import Ollama


class LLMDefault(LLM):
Expand Down Expand Up @@ -86,4 +87,3 @@ def __init__(self, **kwargs):

self.url = kwargs['url']
self.openai_api_base = os.path.join(self.url, "v1")

19 changes: 17 additions & 2 deletions core/cat/factory/embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import langchain
from pydantic import BaseModel, ConfigDict

from cat.factory.custom_embedder import DumbEmbedder, CustomOpenAIEmbeddings
from cat.factory.custom_embedder import CustomFastembedEmbeddings, DumbEmbedder, CustomOpenAIEmbeddings


# Base class to manage LLM configuration.
Expand Down Expand Up @@ -108,7 +108,7 @@ class EmbedderCohereConfig(EmbedderSettings):


class EmbedderHuggingFaceHubConfig(EmbedderSettings):
repo_id: str = "sentence-transformers/all-MiniLM-L12-v2"
repo_id: str = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
huggingfacehub_api_token: str
_pyclass: Type = langchain.embeddings.HuggingFaceHubEmbeddings

Expand All @@ -119,6 +119,20 @@ class EmbedderHuggingFaceHubConfig(EmbedderSettings):
}
)

class EmbedderFastEmbedConfig(EmbedderSettings):
url: str
model: str = "intfloat/multilingual-e5-large"
max_length: int = 512

_pyclass: Type = CustomFastembedEmbeddings

model_config = ConfigDict(
json_schema_extra = {
"humanReadableName": "Fast Embedder",
"description": "Configuration for Fast embeddings",
}
)


SUPPORTED_EMDEDDING_MODELS = [
EmbedderDumbConfig,
Expand All @@ -128,6 +142,7 @@ class EmbedderHuggingFaceHubConfig(EmbedderSettings):
EmbedderAzureOpenAIConfig,
EmbedderCohereConfig,
EmbedderHuggingFaceHubConfig,
EmbedderFastEmbedConfig
]


Expand Down
22 changes: 21 additions & 1 deletion core/cat/factory/llm.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import langchain
from langchain.chat_models import ChatOpenAI, AzureChatOpenAI
from langchain.llms import OpenAI, AzureOpenAI
from langchain.llms.ollama import Ollama

from typing import Dict, List, Type
import json
Expand Down Expand Up @@ -272,6 +273,24 @@ class LLMGooglePalmConfig(LLMSettings):
}
)

class LLMOllamaConfig(LLMSettings):
base_url: str
model: str = "llama2"
num_ctx: int = 2048
repeat_last_n: int = 64
repeat_penalty: float = 1.1
temperature: float = 0.8

_pyclass: Type = Ollama

model_config = ConfigDict(
json_schema_extra = {
"humanReadableName": "Ollama",
"description": "Configuration for Ollama",
"link": "https://ollama.ai/library"
}
)


SUPPORTED_LANGUAGE_MODELS = [
LLMDefaultConfig,
Expand All @@ -286,7 +305,8 @@ class LLMGooglePalmConfig(LLMSettings):
LLMAzureOpenAIConfig,
LLMAzureChatOpenAIConfig,
LLMAnthropicConfig,
LLMGooglePalmConfig
LLMGooglePalmConfig,
LLMOllamaConfig
]

# LLM_SCHEMAS contains metadata to let any client know
Expand Down
8 changes: 8 additions & 0 deletions core/cat/api_auth.py → core/cat/headers.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,11 @@ def check_api_key(request: Request, api_key: str = Security(api_key_header)) ->
status_code=403,
detail={"error": "Invalid API Key"}
)


def check_user_id(request: Request) -> str:
user_id = request.headers.get("user_id")
if user_id:
return user_id
else:
return "user"
19 changes: 9 additions & 10 deletions core/cat/looking_glass/agent_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from cat.looking_glass import prompts
from cat.looking_glass.callbacks import NewTokenHandler
from cat.looking_glass.output_parser import ToolOutputParser
from cat.memory.working_memory import WorkingMemory
from cat.utils import verbal_timedelta
from cat.log import log

Expand Down Expand Up @@ -72,10 +73,9 @@ def execute_tool_agent(self, agent_input, allowed_tools):
return out


def execute_memory_chain(self, agent_input, prompt_prefix, prompt_suffix):
def execute_memory_chain(self, agent_input, prompt_prefix, prompt_suffix, working_memory: WorkingMemory):

input_variables = [i for i in agent_input.keys() if i in prompt_prefix + prompt_suffix]

# memory chain (second step)
memory_prompt = PromptTemplate(
template = prompt_prefix + prompt_suffix,
Expand All @@ -88,13 +88,13 @@ def execute_memory_chain(self, agent_input, prompt_prefix, prompt_suffix):
verbose=True
)

out = memory_chain(agent_input, callbacks=[NewTokenHandler(self.cat)])
out = memory_chain(agent_input, callbacks=[NewTokenHandler(self.cat, working_memory)])
out["output"] = out["text"]
del out["text"]
return out


def execute_agent(self):
def execute_agent(self, working_memory):
"""Instantiate the Agent with tools.
The method formats the main prompt and gather the allowed tools. It also instantiates a conversational Agent
Expand All @@ -106,11 +106,10 @@ def execute_agent(self):
Instance of the Agent provided with a set of tools.
"""
mad_hatter = self.cat.mad_hatter
working_memory = self.cat.working_memory

# prepare input to be passed to the agent.
# Info will be extracted from working memory
agent_input = self.format_agent_input()
agent_input = self.format_agent_input(working_memory)
agent_input = mad_hatter.execute_hook("before_agent_starts", agent_input)
# should we ran the default agent?
fast_reply = {}
Expand Down Expand Up @@ -161,7 +160,7 @@ def execute_agent(self):
agent_input["tools_output"] = "## Tools output: \n" + tools_result["output"] if tools_result["output"] else ""

# Execute the memory chain
out = self.execute_memory_chain(agent_input, prompt_prefix, prompt_suffix)
out = self.execute_memory_chain(agent_input, prompt_prefix, prompt_suffix, working_memory)

# If some tools are used the intermediate step are added to the agent output
out["intermediate_steps"] = used_tools
Expand All @@ -178,11 +177,11 @@ def execute_agent(self):
#Adding the tools_output key in agent input, needed by the memory chain
agent_input["tools_output"] = ""
# Execute the memory chain
out = self.execute_memory_chain(agent_input, prompt_prefix, prompt_suffix)
out = self.execute_memory_chain(agent_input, prompt_prefix, prompt_suffix, working_memory)

return out

def format_agent_input(self):
def format_agent_input(self, working_memory):
"""Format the input for the Agent.
The method formats the strings of recalled memories and chat history that will be provided to the Langchain
Expand All @@ -206,7 +205,7 @@ def format_agent_input(self):
agent_prompt_chat_history
"""

working_memory = self.cat.working_memory


# format memories to be inserted in the prompt
episodic_memory_formatted_content = self.agent_prompt_episodic_memories(
Expand Down
5 changes: 3 additions & 2 deletions core/cat/looking_glass/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

class NewTokenHandler(BaseCallbackHandler):

def __init__(self, cat):
def __init__(self, cat, working_memory):
self.cat = cat
self.working_memory = working_memory

def on_llm_new_token(self, token: str, **kwargs) -> None:
self.cat.send_ws_message(token, "chat_token")
self.cat.send_ws_message(token, "chat_token", self.working_memory)
Loading

0 comments on commit 0334330

Please sign in to comment.