update

phidatahq · Jan 24, 2025 · ada949a · ada949a
1 parent a15bd74
commit ada949a
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 8 deletions.
diff --git a/evals/reliability/multiple_tool_calls/openai/calculator.py b/evals/reliability/multiple_tool_calls/openai/calculator.py
@@ -2,7 +2,7 @@
 
 from agno.agent import Agent
 from agno.eval.reliability import ReliabilityEval, ReliabilityResult
-from agno.tools.calculator import Calculator
+from agno.tools.calculator import CalculatorTools
 from agno.models.openai import OpenAIChat
 from agno.run.response import RunResponse
 
@@ -11,15 +11,15 @@ def multiply_and_exponentiate():
 
     agent=Agent(
         model=OpenAIChat(id="gpt-4o-mini"),
-        tools=[Calculator(add=True, multiply=True, exponentiate=True)],
+        tools=[CalculatorTools(add=True, multiply=True, exponentiate=True)],
     )
     response: RunResponse = agent.run("What is 10*5 then to the power of 2? do it step by step")
     evaluation = ReliabilityEval(
         agent_response=response,
         expected_tool_calls=["multiply", "exponentiate"],
     )
     result: Optional[ReliabilityResult] = evaluation.run(print_results=True)
-    assert result.eval_status == "PASSED"
+    result.assert_passed()
 
 
 if __name__ == "__main__":

diff --git a/evals/reliability/single_tool_calls/openai/calculator.py b/evals/reliability/single_tool_calls/openai/calculator.py
@@ -2,7 +2,7 @@
 
 from agno.agent import Agent
 from agno.eval.reliability import ReliabilityEval, ReliabilityResult
-from agno.tools.calculator import Calculator
+from agno.tools.calculator import CalculatorTools
 from agno.models.openai import OpenAIChat
 from agno.run.response import RunResponse
 
@@ -11,15 +11,15 @@ def factorial():
 
     agent=Agent(
         model=OpenAIChat(id="gpt-4o-mini"),
-        tools=[Calculator(factorial=True)],
+        tools=[CalculatorTools(factorial=True)],
     )
     response: RunResponse = agent.run("What is 10!?")
     evaluation = ReliabilityEval(
         agent_response=response,
         expected_tool_calls=["factorial"],
     )
     result: Optional[ReliabilityResult] = evaluation.run(print_results=True)
-    assert result.eval_status == "PASSED"
+    result.assert_passed()
 
 
 if __name__ == "__main__":

diff --git a/libs/agno/agno/eval/reliability.py b/libs/agno/agno/eval/reliability.py
@@ -30,6 +30,9 @@ def print_eval(self, console: Optional["Console"] = None):
         results_table.add_row("Passed Tool Calls", str(self.passed_tool_calls))
         console.print(results_table)
 
+    def assert_passed(self):
+        assert self.eval_status == "PASSED"
+
 
 @dataclass
 class ReliabilityEval:
@@ -96,8 +99,8 @@ def run(self, *, print_summary: bool = False, print_results: bool = False) -> Op
                     if message.tool_calls:
                         if actual_tool_calls is None:
                             actual_tool_calls = message.tool_calls
-                    else:
-                        actual_tool_calls.append(message.tool_calls[0])  # type: ignore
+                        else:
+                            actual_tool_calls.append(message.tool_calls[0])  # type: ignore
 
             failed_tool_calls = []
             passed_tool_calls = []