From ada949ac4230685b732017c238d76839078726f4 Mon Sep 17 00:00:00 2001 From: Manthan Gupta Date: Fri, 24 Jan 2025 11:25:46 +0530 Subject: [PATCH] update --- evals/reliability/multiple_tool_calls/openai/calculator.py | 6 +++--- evals/reliability/single_tool_calls/openai/calculator.py | 6 +++--- libs/agno/agno/eval/reliability.py | 7 +++++-- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/evals/reliability/multiple_tool_calls/openai/calculator.py b/evals/reliability/multiple_tool_calls/openai/calculator.py index 52a68eeb4..39daba934 100644 --- a/evals/reliability/multiple_tool_calls/openai/calculator.py +++ b/evals/reliability/multiple_tool_calls/openai/calculator.py @@ -2,7 +2,7 @@ from agno.agent import Agent from agno.eval.reliability import ReliabilityEval, ReliabilityResult -from agno.tools.calculator import Calculator +from agno.tools.calculator import CalculatorTools from agno.models.openai import OpenAIChat from agno.run.response import RunResponse @@ -11,7 +11,7 @@ def multiply_and_exponentiate(): agent=Agent( model=OpenAIChat(id="gpt-4o-mini"), - tools=[Calculator(add=True, multiply=True, exponentiate=True)], + tools=[CalculatorTools(add=True, multiply=True, exponentiate=True)], ) response: RunResponse = agent.run("What is 10*5 then to the power of 2? do it step by step") evaluation = ReliabilityEval( @@ -19,7 +19,7 @@ def multiply_and_exponentiate(): expected_tool_calls=["multiply", "exponentiate"], ) result: Optional[ReliabilityResult] = evaluation.run(print_results=True) - assert result.eval_status == "PASSED" + result.assert_passed() if __name__ == "__main__": diff --git a/evals/reliability/single_tool_calls/openai/calculator.py b/evals/reliability/single_tool_calls/openai/calculator.py index a025156d7..b0d71adfc 100644 --- a/evals/reliability/single_tool_calls/openai/calculator.py +++ b/evals/reliability/single_tool_calls/openai/calculator.py @@ -2,7 +2,7 @@ from agno.agent import Agent from agno.eval.reliability import ReliabilityEval, ReliabilityResult -from agno.tools.calculator import Calculator +from agno.tools.calculator import CalculatorTools from agno.models.openai import OpenAIChat from agno.run.response import RunResponse @@ -11,7 +11,7 @@ def factorial(): agent=Agent( model=OpenAIChat(id="gpt-4o-mini"), - tools=[Calculator(factorial=True)], + tools=[CalculatorTools(factorial=True)], ) response: RunResponse = agent.run("What is 10!?") evaluation = ReliabilityEval( @@ -19,7 +19,7 @@ def factorial(): expected_tool_calls=["factorial"], ) result: Optional[ReliabilityResult] = evaluation.run(print_results=True) - assert result.eval_status == "PASSED" + result.assert_passed() if __name__ == "__main__": diff --git a/libs/agno/agno/eval/reliability.py b/libs/agno/agno/eval/reliability.py index 37fb9a5dd..af016f9f4 100644 --- a/libs/agno/agno/eval/reliability.py +++ b/libs/agno/agno/eval/reliability.py @@ -30,6 +30,9 @@ def print_eval(self, console: Optional["Console"] = None): results_table.add_row("Passed Tool Calls", str(self.passed_tool_calls)) console.print(results_table) + def assert_passed(self): + assert self.eval_status == "PASSED" + @dataclass class ReliabilityEval: @@ -96,8 +99,8 @@ def run(self, *, print_summary: bool = False, print_results: bool = False) -> Op if message.tool_calls: if actual_tool_calls is None: actual_tool_calls = message.tool_calls - else: - actual_tool_calls.append(message.tool_calls[0]) # type: ignore + else: + actual_tool_calls.append(message.tool_calls[0]) # type: ignore failed_tool_calls = [] passed_tool_calls = []