-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_evals.py
57 lines (43 loc) · 1.59 KB
/
run_evals.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import pickle
import pandas as pd
# from visualize_results import plot_pair_graph, plot_pair_heatmaps
from generate_mft_dataset import (
run_dataset_generation,
check_dataset_formatting,
get_best_examples,
)
from evaluate_models import Evaluations
def run_evals():
name = "test_name"
# generate data
run_dataset_generation(output_filename=f"{name}.csv", num_examples=10)
check_dataset_formatting(input_filename=f"{name}.csv")
get_best_examples(input_filename=f"formatted_{name}.csv", percent=50)
# evals
eval = Evaluations(eval_models=["mock"]) # ["gpt-3.5", "gpt-4o-mini"]
eval.evals(input_filename=f"formatted_{name}.csv")
# visualisations
with open(f"PREFERENCES.pkl", "rb") as f:
preferences = pickle.load(f)
# print(preferences["pair_preference"][0])
# plot_pair_heatmaps(preferences["pair_preference"][0])
# plot_pair_graph(preferences["pair_preference"][0])
# check inconsistencies
# check variance
if __name__ == "__main__":
model = "claude-3"
file = "half_final_data_27d_21h.csv"
outfile = f"PAIR_PREFERENCES_{model}"
eval = Evaluations(eval_models=[model])
eval.evals(input_filename=file, outfile=outfile, pp=True)
with open(f"{outfile}.pkl", "rb") as f:
data = pickle.load(f)
print(data)
# for model in models:
outfile = f"Triple_PREFERENCES_{model}"
eval = Evaluations(eval_models=[model])
eval.evals(input_filename=file, outfile=outfile, trp=True)
with open(f"{outfile}.pkl", "rb") as f:
data = pickle.load(f)
print(data)
# run_evals()