-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathbuild_master_list.py
99 lines (92 loc) · 1.88 KB
/
build_master_list.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# Build the master list with all the experiments
from pathlib import Path
import polars as pl
from tqdm import tqdm
from src.utils.logging import read_logs
# Putting together all relevant runs
run_ids = [
"0428",
"0429",
"0430",
"0434",
"0435",
"0444",
"0438",
"0451",
"0453",
"0454",
"0459",
"0457",
"0467",
"0468",
"0469",
"0470",
"0471",
"0476",
"0477",
"0478",
"0481",
"0482",
"0483",
"0484",
"0485",
"0486",
"0487",
"0489",
"0494",
"0495",
"0496",
"0497",
"0501",
"0500",
"0502",
"0503",
"0635",
"0636",
"0637",
"0638",
"0665",
"0671",
"0672",
"0673",
"0674",
"0680",
"0682",
"0683",
"0686",
"0688",
"0692",
"0693",
"0695",
"0698",
"0699",
"0703",
"0705",
"0706",
"0707",
"0708",
]
run_ids = sorted(list(set(run_ids)))
base_path = "results/logs/"
dest_path = Path("results/overall")
overall_list = []
for r_path in tqdm(
Path(base_path).iterdir(), total=sum(1 for _ in Path(base_path).iterdir())
):
r_id = str(r_path.stem).split("-")[0]
if r_id in run_ids:
try:
df_raw = read_logs(exp_name=None, exp_path=r_path)
if r_id == "0673": # This run was made before fixing the model label
df_raw = df_raw.with_columns(chosen_model=pl.lit("ridgecv"))
overall_list.append(df_raw)
except pl.exceptions.SchemaError:
print("Failed ", r_path)
except ValueError:
print("Failed ", r_path)
df_overall = pl.concat(overall_list).with_columns(
source_table=pl.col("base_table").str.split("-").list.first()
)
df_overall = df_overall.filter(pl.col("chosen_model") != "linear")
df_overall.write_csv("results/master_list.csv")
df_overall.write_parquet("results/master_list.parquet")