Skip to content

Commit

Permalink
feat: improve logging and add optional perf logging (#248)
Browse files Browse the repository at this point in the history
Mostly minor changes:

- Remove the use of `Console` from server.py. This cascades into other
changes downstream
- Add latency logs behind a `LOG_PERFORMANCE` env var:
  - Add a wrapper for all API calls to measure the latency at API level
- Add detailed performance monitoring to the tool_calling_llm core
`call()` function
- Align the http server (uvicorn) logging pattern with the other logging
calls

Example of perf metrics for tool_calling_llm:

```
2025-01-14 09:41:17.177 INFO     tool_calling_llm.call(TOTAL) 8167ms
2025-01-14 09:41:17.177 INFO     	tool_calling_llm.call(get_all_tools_openai_format) +0ms  0ms
2025-01-14 09:41:17.177 INFO     	tool_calling_llm.call(start iteration 0) +0ms  0ms
2025-01-14 09:41:17.177 INFO     	tool_calling_llm.call(count tokens) +6ms  6ms
2025-01-14 09:41:17.177 INFO     	tool_calling_llm.call(llm.completion) +1831ms  1837ms
2025-01-14 09:41:17.177 INFO     	tool_calling_llm.call(pre-tool-calls) +0ms  1837ms
2025-01-14 09:41:17.177 INFO     	tool_calling_llm.call(tool completed fetch_finding_by_id) +570ms  2407ms
2025-01-14 09:41:17.177 INFO     	tool_calling_llm.call(end iteration 0) +0ms  2407ms
2025-01-14 09:41:17.177 INFO     	tool_calling_llm.call(start iteration 1) +0ms  2407ms
2025-01-14 09:41:17.177 INFO     	tool_calling_llm.call(count tokens) +7ms  2415ms
2025-01-14 09:41:17.177 INFO     	tool_calling_llm.call(llm.completion) +5752ms  8167ms
```
  • Loading branch information
nherment authored and moshemorad committed Jan 27, 2025
1 parent b074e30 commit b3ed4fd
Show file tree
Hide file tree
Showing 9 changed files with 147 additions and 71 deletions.
2 changes: 2 additions & 0 deletions holmes/common/env_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,5 @@ def load_bool(env_var, default: bool):
HOLMES_POST_PROCESSING_PROMPT = os.environ.get("HOLMES_POST_PROCESSING_PROMPT", "")
ROBUSTA_AI = load_bool("ROBUSTA_AI", False)
ROBUSTA_API_ENDPOINT = os.environ.get("ROBUSTA_API_ENDPOINT", "https://api.robusta.dev")

LOG_PERFORMANCE = os.environ.get("LOG_PERFORMANCE", None)
60 changes: 24 additions & 36 deletions holmes/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,11 @@

from pydantic import FilePath, SecretStr, Field
from pydash.arrays import concat
from rich.console import Console


from holmes.core.runbooks import RunbookManager
from holmes.core.supabase_dal import SupabaseDal
from holmes.core.tool_calling_llm import (IssueInvestigator,
from holmes.core.tool_calling_llm import (IssueInvestigator,
ToolCallingLLM,
ToolExecutor)
from holmes.core.tools import ToolsetPattern, get_matching_toolsets, ToolsetStatusEnum, ToolsetTag
Expand All @@ -33,9 +32,7 @@
load_toolsets_from_file)
from holmes.utils.pydantic_utils import RobustaBaseConfig, load_model_from_file
from holmes.utils.definitions import CUSTOM_TOOLSET_LOCATION
from pydantic import ValidationError
from holmes.utils.holmes_sync_toolsets import load_custom_toolsets_config, merge_and_override_bultin_toolsets_with_toolsets_config, extract_config_from_custom_toolsets_config
from holmes.core.tools import YAMLToolset
from holmes.utils.holmes_sync_toolsets import load_custom_toolsets_config, merge_and_override_bultin_toolsets_with_toolsets_config
from holmes.common.env_vars import ROBUSTA_CONFIG_PATH
from holmes.utils.definitions import RobustaConfig
import re
Expand Down Expand Up @@ -112,7 +109,7 @@ class Config(RobustaBaseConfig):

custom_runbooks: List[FilePath] = []
custom_toolsets: List[FilePath] = []

enabled_toolsets_names: List[str] = Field(default_factory=list)

opensearch_clusters: Optional[List[Dict]] = None # Passed through to opensearchpy.OpenSearch constructor
Expand Down Expand Up @@ -145,15 +142,8 @@ def load_from_env(cls):
if val is not None:
kwargs[field_name] = val
kwargs["cluster_name"] = Config.__get_cluster_name()
configs = extract_config_from_custom_toolsets_config()
combined = {}
for item in configs:
if isinstance(item, dict):
for key, value in item.items():
combined[key] = value
combined.update(kwargs)
return cls(**combined)

return cls(**kwargs)

@staticmethod
def __get_cluster_name() -> Optional[str]:
config_file_path = ROBUSTA_CONFIG_PATH
Expand All @@ -174,20 +164,20 @@ def __get_cluster_name() -> Optional[str]:
return None

def create_console_tool_executor(
self, console: Console, allowed_toolsets: ToolsetPattern, dal:Optional[SupabaseDal]
self, allowed_toolsets: ToolsetPattern, dal:Optional[SupabaseDal]
) -> ToolExecutor:
"""
Creates ToolExecutor for the cli
Creates ToolExecutor for the cli
"""
default_toolsets = [toolset for toolset in load_builtin_toolsets(dal, opensearch_clusters=self.opensearch_clusters) if any(tag in (ToolsetTag.CORE, ToolsetTag.CLI) for tag in toolset.tags)]
default_toolsets = [toolset for toolset in load_builtin_toolsets(dal) if any(tag in (ToolsetTag.CORE, ToolsetTag.CLI) for tag in toolset.tags)]

if allowed_toolsets == "*":
matching_toolsets = default_toolsets
else:
matching_toolsets = get_matching_toolsets(
default_toolsets, allowed_toolsets.split(",")
)
)

# Enable all matching toolsets that have CORE or CLI tag
for toolset in matching_toolsets:
toolset.enabled = True
Expand All @@ -199,7 +189,7 @@ def create_console_tool_executor(
toolsets_loaded_from_config,
matched_default_toolsets_by_name,
)

for toolset in filtered_toolsets_by_name.values():
if toolset.enabled:
toolset.check_prerequisites()
Expand All @@ -213,22 +203,22 @@ def create_console_tool_executor(
logging.info(f"Disabled toolset: {ts.name} from {ts.get_path()})")
elif ts.get_status() == ToolsetStatusEnum.FAILED:
logging.info(f"Failed loading toolset {ts.name} from {ts.get_path()}: ({ts.get_error()})")

for ts in default_toolsets:
if ts.name not in filtered_toolsets_by_name.keys():
logging.debug(f"Toolset {ts.name} from {ts.get_path()} was filtered out due to allowed_toolsets value")

enabled_tools = concat(*[ts.tools for ts in enabled_toolsets])
logging.debug(
f"Starting AI session with tools: {[t.name for t in enabled_tools]}"
)
return ToolExecutor(enabled_toolsets)

def create_tool_executor(
self, console: Console, dal:Optional[SupabaseDal]
self, dal:Optional[SupabaseDal]
) -> ToolExecutor:
"""
Creates ToolExecutor for the server endpoints
Creates ToolExecutor for the server endpoints
"""

all_toolsets = load_builtin_toolsets(dal=dal, opensearch_clusters=self.opensearch_clusters)
Expand All @@ -245,21 +235,21 @@ def create_tool_executor(
f"Starting AI session with tools: {[t.name for t in enabled_tools]}"
)
return ToolExecutor(enabled_toolsets)

def create_console_toolcalling_llm(
self, console: Console, allowed_toolsets: ToolsetPattern, dal:Optional[SupabaseDal] = None
self, allowed_toolsets: ToolsetPattern, dal:Optional[SupabaseDal] = None
) -> ToolCallingLLM:
tool_executor = self.create_console_tool_executor(console, allowed_toolsets, dal)
tool_executor = self.create_console_tool_executor(allowed_toolsets, dal)
return ToolCallingLLM(
tool_executor,
self.max_steps,
self._get_llm()
)

def create_toolcalling_llm(
self, console: Console, dal:Optional[SupabaseDal] = None
self, dal:Optional[SupabaseDal] = None
) -> ToolCallingLLM:
tool_executor = self.create_tool_executor(console, dal)
tool_executor = self.create_tool_executor(dal)
return ToolCallingLLM(
tool_executor,
self.max_steps,
Expand All @@ -268,25 +258,23 @@ def create_toolcalling_llm(

def create_issue_investigator(
self,
console: Console,
dal: Optional[SupabaseDal] = None
) -> IssueInvestigator:
all_runbooks = load_builtin_runbooks()
for runbook_path in self.custom_runbooks:
all_runbooks.extend(load_runbooks_from_file(runbook_path))

runbook_manager = RunbookManager(all_runbooks)
tool_executor = self.create_tool_executor(console, dal)
tool_executor = self.create_tool_executor(dal)
return IssueInvestigator(
tool_executor,
runbook_manager,
self.max_steps,
self._get_llm()
)

def create_console_issue_investigator(
self,
console: Console,
allowed_toolsets: ToolsetPattern,
dal: Optional[SupabaseDal] = None
) -> IssueInvestigator:
Expand All @@ -295,7 +283,7 @@ def create_console_issue_investigator(
all_runbooks.extend(load_runbooks_from_file(runbook_path))

runbook_manager = RunbookManager(all_runbooks)
tool_executor = self.create_console_tool_executor(console, allowed_toolsets, dal)
tool_executor = self.create_console_tool_executor(allowed_toolsets, dal)
return IssueInvestigator(
tool_executor,
runbook_manager,
Expand Down
8 changes: 3 additions & 5 deletions holmes/core/investigation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@

from typing import Optional
from rich.console import Console
from holmes.common.env_vars import HOLMES_POST_PROCESSING_PROMPT
from holmes.config import Config
Expand All @@ -8,7 +9,7 @@
from holmes.utils.robusta import load_robusta_api_key


def investigate_issues(investigate_request: InvestigateRequest, dal: SupabaseDal, config: Config, console:Console):
def investigate_issues(investigate_request: InvestigateRequest, dal: SupabaseDal, config: Config):
load_robusta_api_key(dal=dal, config=config)
context = dal.get_issue_data(
investigate_request.context.get("robusta_issue_id")
Expand All @@ -23,9 +24,7 @@ def investigate_issues(investigate_request: InvestigateRequest, dal: SupabaseDal
if context:
raw_data["extra_context"] = context

ai = config.create_issue_investigator(
console, dal=dal
)
ai = config.create_issue_investigator(dal=dal)
issue = Issue(
id=context["id"] if context else "",
name=investigate_request.title,
Expand All @@ -37,7 +36,6 @@ def investigate_issues(investigate_request: InvestigateRequest, dal: SupabaseDal
investigation = ai.investigate(
issue,
prompt=investigate_request.prompt_template,
console=console,
post_processing_prompt=HOLMES_POST_PROCESSING_PROMPT,
instructions=resource_instructions,
global_instructions=global_instructions
Expand Down
4 changes: 2 additions & 2 deletions holmes/core/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,12 @@ def check_llm(self, model:str, api_key:Optional[str]):
"https://docs.litellm.ai/docs/providers/watsonx#usage---models-in-deployment-spaces"
)
else:
#
#
api_key_env_var = f"{provider.upper()}_API_KEY"
if api_key:
os.environ[api_key_env_var] = api_key
model_requirements = litellm.validate_environment(model=model)

if not model_requirements["keys_in_environment"]:
raise Exception(f"model {model} requires the following environment variables: {model_requirements['missing_keys']}")

Expand Down
57 changes: 57 additions & 0 deletions holmes/core/performance_timing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import time
import logging

from functools import wraps

from holmes.common.env_vars import (
LOG_PERFORMANCE,
)

class PerformanceTiming:
def __init__(self, name):
if not LOG_PERFORMANCE:
return
self.ended = False

self.name = name
self.start_time = time.time()
self.last_measure_time = self.start_time
self.last_measure_label = "Start"
self.timings = []

def measure(self, label):
if not LOG_PERFORMANCE:
return
if self.ended:
raise Exception("cannot measure a perf timing that is already ended")
current_time = time.time()

time_since_start = int((current_time - self.start_time) * 1000)
time_since_last = int((current_time - self.last_measure_time) * 1000)

self.timings.append((label, time_since_last, time_since_start))

self.last_measure_time = current_time
self.last_measure_label = label

def end(self):
if not LOG_PERFORMANCE:
return
self.ended = True
current_time = time.time()
time_since_start = int((current_time - self.start_time) * 1000)
message = f'{self.name}(TOTAL) {time_since_start}ms'
logging.info(message)
for label, time_since_last, time_since_start in self.timings:
logging.info(f'\t{self.name}({label}) +{time_since_last}ms {time_since_start}ms')

def log_function_timing(func):
@wraps(func)
def function_timing_wrapper(*args, **kwargs):
start_time = time.perf_counter()
result = func(*args, **kwargs)
end_time = time.perf_counter()
total_time = int((end_time - start_time) * 1000)
logging.info(f'Function "{func.__name__}()" took {total_time}ms')
return result
return function_timing_wrapper
Loading

0 comments on commit b3ed4fd

Please sign in to comment.