Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
manthanguptaa committed Jan 24, 2025
1 parent a15bd74 commit ada949a
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 8 deletions.
6 changes: 3 additions & 3 deletions evals/reliability/multiple_tool_calls/openai/calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from agno.agent import Agent
from agno.eval.reliability import ReliabilityEval, ReliabilityResult
from agno.tools.calculator import Calculator
from agno.tools.calculator import CalculatorTools
from agno.models.openai import OpenAIChat
from agno.run.response import RunResponse

Expand All @@ -11,15 +11,15 @@ def multiply_and_exponentiate():

agent=Agent(
model=OpenAIChat(id="gpt-4o-mini"),
tools=[Calculator(add=True, multiply=True, exponentiate=True)],
tools=[CalculatorTools(add=True, multiply=True, exponentiate=True)],
)
response: RunResponse = agent.run("What is 10*5 then to the power of 2? do it step by step")
evaluation = ReliabilityEval(
agent_response=response,
expected_tool_calls=["multiply", "exponentiate"],
)
result: Optional[ReliabilityResult] = evaluation.run(print_results=True)
assert result.eval_status == "PASSED"
result.assert_passed()


if __name__ == "__main__":
Expand Down
6 changes: 3 additions & 3 deletions evals/reliability/single_tool_calls/openai/calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from agno.agent import Agent
from agno.eval.reliability import ReliabilityEval, ReliabilityResult
from agno.tools.calculator import Calculator
from agno.tools.calculator import CalculatorTools
from agno.models.openai import OpenAIChat
from agno.run.response import RunResponse

Expand All @@ -11,15 +11,15 @@ def factorial():

agent=Agent(
model=OpenAIChat(id="gpt-4o-mini"),
tools=[Calculator(factorial=True)],
tools=[CalculatorTools(factorial=True)],
)
response: RunResponse = agent.run("What is 10!?")
evaluation = ReliabilityEval(
agent_response=response,
expected_tool_calls=["factorial"],
)
result: Optional[ReliabilityResult] = evaluation.run(print_results=True)
assert result.eval_status == "PASSED"
result.assert_passed()


if __name__ == "__main__":
Expand Down
7 changes: 5 additions & 2 deletions libs/agno/agno/eval/reliability.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ def print_eval(self, console: Optional["Console"] = None):
results_table.add_row("Passed Tool Calls", str(self.passed_tool_calls))
console.print(results_table)

def assert_passed(self):
assert self.eval_status == "PASSED"


@dataclass
class ReliabilityEval:
Expand Down Expand Up @@ -96,8 +99,8 @@ def run(self, *, print_summary: bool = False, print_results: bool = False) -> Op
if message.tool_calls:
if actual_tool_calls is None:
actual_tool_calls = message.tool_calls
else:
actual_tool_calls.append(message.tool_calls[0]) # type: ignore
else:
actual_tool_calls.append(message.tool_calls[0]) # type: ignore

failed_tool_calls = []
passed_tool_calls = []
Expand Down

0 comments on commit ada949a

Please sign in to comment.