-
Notifications
You must be signed in to change notification settings - Fork 393
/
Copy pathr.py
90 lines (69 loc) · 2.49 KB
/
r.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from deepeval.metrics.dag import (
VerdictNode,
TaskNode,
NonBinaryJudgementNode,
BinaryJudgementNode,
)
from deepeval.metrics.dag.graph import DeepAcyclicGraph
from deepeval.test_case import LLMTestCaseParams, LLMTestCase
from deepeval.metrics import DAGMetric
non_binary = NonBinaryJudgementNode(
criteria="What is the output language?",
children=[
VerdictNode(verdict="english", score=10),
VerdictNode(verdict="French", score=0),
],
)
verdict_node_yes = VerdictNode(verdict=True, child=non_binary)
verdict_node_no = VerdictNode(verdict=False, score=0)
binary = BinaryJudgementNode(
criteria="does the list of extracted words contain the same number of words in the `actual_output`, ignore formatting?",
evaluation_params=[LLMTestCaseParams.ACTUAL_OUTPUT],
children=[verdict_node_yes, verdict_node_no],
)
task_node = TaskNode(
instructions="Extract all words from the `actual output`",
evaluation_params=[LLMTestCaseParams.ACTUAL_OUTPUT],
output_label="list of extracted words",
children=[binary, non_binary],
)
dag_metric = DAGMetric(name="dag", root_node=[task_node], async_mode=False)
# dag_metric.measure(
# test_case=LLMTestCase(input="..", actual_output="Les miserable")
# )
# print(dag_metric.score)
# print(dag_metric.reason)
async def main():
# Perform the measure asynchronously
await dag_metric.a_measure(
test_case=LLMTestCase(input="..", actual_output="Les miserable")
)
# Print results after the measure is complete
print(dag_metric.score)
print(dag_metric.reason)
import asyncio
asyncio.run(main())
from deepeval.metrics.dag import (
TaskNode,
BinaryJudgementNode,
NonBinaryJudgementNode,
VerdictNode,
)
correct_order_node = NonBinaryJudgementNode(
criteria="Are the headings in the correct order: 'intro' => 'body' => 'conclusion'?",
children=[
VerdictNode(verdict="Yes", score=10),
VerdictNode(verdict="Two are out of order", score=4),
VerdictNode(verdict="All out of order", score=2),
],
)
correct_headings_node = BinaryJudgementNode(
criteria="Does the heading contain all three: 'intro', 'body', and 'conclusion'?",
children=[VerdictNode(verdict=False, score=0), correct_order_node],
)
extract_headings_node = TaskNode(
instructions="Extract all headings in `actual_output`",
LLMTestCaseParams=[LLMTestCaseParams.ACTUAL_OUTPUT],
output_label="Summary headings",
children=[correct_headings_node, correct_order_node],
)