Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cohere support #145

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions holmes/core/tool_calling_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,21 +80,27 @@ def check_llm(self, model, api_key):
if api_key:
os.environ[api_key_env_var] = api_key
model_requirements = litellm.validate_environment(model=model)
if not model_requirements["keys_in_environment"]:
if not model_requirements["keys_in_environment"] and "cohere" not in model:
raise Exception(f"model {model} requires the following environment variables: {model_requirements['missing_keys']}")
elif "cohere" in model and not os.environ.get("COHERE_API_KEY", None):
raise Exception(f"model {model} requires the following environment variable: COHERE_API_KEY")

# this unfortunately does not seem to work for azure if the deployment name is not a well-known model name
#if not litellm.supports_function_calling(model=model):
# raise Exception(f"model {model} does not support function calling. You must use HolmesGPT with a model that supports function calling.")
def get_context_window_size(self) -> int:
if "cohere/command-r-plus" in self.model:
return 128000
return litellm.model_cost[self.model]['max_input_tokens']

def count_tokens_for_message(self, messages: list[dict]) -> int:
return litellm.token_counter(model=self.model,
messages=messages)

def get_maximum_output_token(self) -> int:
return litellm.model_cost[self.model]['max_output_tokens']
if "cohere/command-r-plus" in self.model:
return 4000
return litellm.model_cost[self.model]['max_output_tokens']

def call(self, system_prompt, user_prompt, post_process_prompt: Optional[str] = None, response_format: dict = None) -> LLMResult:
messages = [
Expand Down Expand Up @@ -254,12 +260,11 @@ def truncate_messages_to_fit_context(self, messages: list, max_context_size: int

tool_call_messages = [message for message in messages if message["role"] == "tool"]

if message_size_without_tools >= (max_context_size - maximum_output_token):
if message_size_without_tools >= (max_context_size - maximum_output_token):
logging.error(f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the model's context window for input.")
raise Exception(f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the model's context window for input.")

tool_size = min(10000, int((max_context_size - message_size_without_tools - maximum_output_token) / len(tool_call_messages)))

for message in messages:
if message["role"] == "tool":
message["content"] = message["content"][:tool_size]
Expand Down
Loading