forked from anthony-wang/CrabNet
-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathmatbench_rf.py
106 lines (83 loc) · 2.68 KB
/
matbench_rf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import os
import numpy as np
import warnings
import json
from time import time
from datetime import datetime
from sklearn.ensemble import RandomForestRegressor
from crabnet.utils.get_core_count import get_core_count
from crabnet.utils.modelselectionhelper import modelselectionhelper
from crabnet.utils.utils import NumpyEncoder, CONSTANTS, count_gs_param_combinations
# %%
cons = CONSTANTS()
mat_props_dir = r"data/matbench_cv/"
mat_props = cons.matbench_props
mat_props_names = cons.matbench_names
mat_props_pretty = cons.matbench_names_dict
elem_props = cons.eps
elem_props = ["magpie"]
RNG_SEED = 42
np.random.seed(RNG_SEED)
# %%
models1 = {
"RandomForestRegressor": RandomForestRegressor(),
}
rf_estimators_range = [500]
params1 = {
"RandomForestRegressor": {"n_estimators": rf_estimators_range, "max_depth": [None]},
}
scorings = {
"r2": "r2",
"neg_MAE": "neg_mean_absolute_error",
"neg_RMSE": "neg_root_mean_squared_error",
}
# %%
if __name__ == "__main__":
start_datetime_matbench_classics = datetime.now().strftime("%Y-%m-%d-%H%M%S.%f")
metrics_dir = f"metrics/rf_gridsearch/"
fig_dir = r"figures/GridSearchCV/matbench/"
os.makedirs(metrics_dir, exist_ok=True)
os.makedirs(fig_dir, exist_ok=True)
_, cnt_dict = count_gs_param_combinations(params1)
print(f"Number of parameter combinations for each estimator:\n" f"{cnt_dict}")
cnt_dict_filename = "parameter_combos_matbench.json"
cnt_dict_file = os.path.join(metrics_dir, cnt_dict_filename)
json_content = json.dumps(cnt_dict, cls=NumpyEncoder, indent=4)
with open(cnt_dict_file, "w") as f:
try:
f.write(json_content)
except:
pass
n_cores = get_core_count()
n_jobs = n_cores // 2 - 2
ignore_warnings = True
if ignore_warnings:
maxiter_msg = (
"Maximum number of iteration reached "
"before convergence. Consider increasing max_iter "
"to improve the fit."
)
warnings.filterwarnings("ignore", message=maxiter_msg)
ti_matbench_classics = time()
cv_folds = 2
mshelper1 = modelselectionhelper(
models1,
params1,
elem_props,
mat_props_dir,
mat_props,
metrics_dir,
fig_dir,
scoring=scorings,
n_jobs=n_jobs,
cv=cv_folds,
refit="neg_MAE",
verbose=True,
random_seed=RNG_SEED,
)
dt_matbench_classics = time() - ti_matbench_classics
print("*********** matbench_classics finished ***********")
print(
f"matbench_classics finished, elapsed time: " f"{dt_matbench_classics:0.4g} s"
)
print("*********** matbench_classics finished ***********")