-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_sklearn_model.py
65 lines (50 loc) · 2.1 KB
/
test_sklearn_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""
test training a SVR model using sklearn
and 1024-bit molecular fingerprints
with Chen, et al. dataset (src_id=412)
"""
import numpy as np
from rtp.data import assemble_dataset_arrays, split_train_test_data, FeatureSet
from rtp.sklearn_models import train_random_forest, metrics, model_predictions, train_gradient_booster, train_SVR, plots, train_model
def _main():
# extract the dataset
fp_size = 1024
labels, features, rts = assemble_dataset_arrays([412],
"./idpp.db",
FeatureSet.FP,
fp_size)
# split into training/test sets
train_idx, test_idx = split_train_test_data(rts)
train_labels, train_features, train_rts = labels[train_idx], features[train_idx], rts[train_idx]
test_labels, test_features, test_rts = labels[test_idx], features[test_idx], rts[test_idx]
#get some information about the dataset
print(train_features.shape)
print(train_rts.shape)
print(test_features.shape)
print(test_rts.shape)
#create lists for parameter tuning
n_estimators = [10,50,100,200]
max_depth = [None,10,20,30]
min_samples_split = [2, 3, 4, 5, 10, 20]
min_samples_leaf = [1,2,4, 10, 20, 50]
max_features = [1,5,10]
learning_rate = [.005, .05, .025, 1]
c = [.001, .01, .1, 1, 10, 100, 1000]
gamma= ['scale', 'auto', .0001, .001, .01, .1, 1, 10, 100]
#defining filepath where model instance will be saved
filepath = './test_SVR.pkl'
#Using a grid search to find optimal parameters and training the model
svm = train_SVR(train_features,
train_rts,
filepath,
c,
gamma
)
#getting model predictions
svm_pred = model_predictions(svm, test_features)
#evaluating model performance
eval_svm = metrics(svm_pred, test_rts)
#plotting results
plots(test_rts, svm_pred)
if __name__ == "__main__":
_main()