Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python 2 and 3 compatibility #10

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 26 additions & 24 deletions common_defs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
"imports and definitions shared by various defs files"

from __future__ import print_function
"""imports and definitions shared by various defs files"""

import numpy as np

Expand All @@ -13,35 +15,35 @@
from hyperopt import hp
from hyperopt.pyll.stochastic import sample
except ImportError:
print "In order to achieve operational capability, this programme requires hyperopt to be installed (pip install hyperopt), unless you make get_params() use something else."
#
print("In order to achieve operational capability, this programme requires hyperopt to be installed (pip install hyperopt), unless you make get_params() use something else.")

#

# handle floats which should be integers
# works with flat params
def handle_integers( params ):

new_params = {}
for k, v in params.items():
for k, v in list(params.items()):
if type( v ) == float and int( v ) == v:
new_params[k] = int( v )
else:
new_params[k] = v

return new_params

###

def train_and_eval_sklearn_classifier( clf, data ):

x_train = data['x_train']
y_train = data['y_train']

x_test = data['x_test']
y_test = data['y_test']
clf.fit( x_train, y_train )
y_test = data['y_test']

clf.fit( x_train, y_train )

try:
p = clf.predict_proba( x_train )[:,1] # sklearn convention
except IndexError:
Expand All @@ -51,7 +53,7 @@ def train_and_eval_sklearn_classifier( clf, data ):
auc = AUC( y_train, p )
acc = accuracy( y_train, np.round( p ))

print "\n# training | log loss: {:.2%}, AUC: {:.2%}, accuracy: {:.2%}".format( ll, auc, acc )
print("\n# training | log loss: {:.2%}, AUC: {:.2%}, accuracy: {:.2%}".format( ll, auc, acc ))

#

Expand All @@ -64,31 +66,31 @@ def train_and_eval_sklearn_classifier( clf, data ):
auc = AUC( y_test, p )
acc = accuracy( y_test, np.round( p ))

print "# testing | log loss: {:.2%}, AUC: {:.2%}, accuracy: {:.2%}".format( ll, auc, acc )
print("# testing | log loss: {:.2%}, AUC: {:.2%}, accuracy: {:.2%}".format( ll, auc, acc ))

#return { 'loss': 1 - auc, 'log_loss': ll, 'auc': auc }
return { 'loss': ll, 'log_loss': ll, 'auc': auc }

###

# "clf", even though it's a regressor
def train_and_eval_sklearn_regressor( clf, data ):

x_train = data['x_train']
y_train = data['y_train']

x_test = data['x_test']
y_test = data['y_test']
clf.fit( x_train, y_train )
y_test = data['y_test']

clf.fit( x_train, y_train )
p = clf.predict( x_train )

mse = MSE( y_train, p )
rmse = sqrt( mse )
mae = MAE( y_train, p )


print "\n# training | RMSE: {:.4f}, MAE: {:.4f}".format( rmse, mae )
print("\n# training | RMSE: {:.4f}, MAE: {:.4f}".format( rmse, mae ))

#

Expand All @@ -98,7 +100,7 @@ def train_and_eval_sklearn_regressor( clf, data ):
rmse = sqrt( mse )
mae = MAE( y_test, p )

print "# testing | RMSE: {:.4f}, MAE: {:.4f}".format( rmse, mae )
print("# testing | RMSE: {:.4f}, MAE: {:.4f}".format( rmse, mae ))

return { 'loss': rmse, 'rmse': rmse, 'mae': mae }

3 changes: 2 additions & 1 deletion defs/gb.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import print_function
"function (and parameter space) definitions for hyperband"
"binary classification with gradient boosting"

Expand Down Expand Up @@ -33,7 +34,7 @@ def get_params():
def try_params( n_iterations, params ):

n_estimators = int( round( n_iterations * trees_per_iteration ))
print "n_estimators:", n_estimators
print("n_estimators:", n_estimators)
pprint( params )

clf = GB( n_estimators = n_estimators, verbose = 0, **params )
Expand Down
81 changes: 41 additions & 40 deletions defs/keras_mlp.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import print_function
"function (and parameter space) definitions for hyperband"
"binary classification with Keras (multilayer perceptron)"

Expand All @@ -16,33 +17,33 @@

#

# TODO: advanced activations - 'leakyrelu', 'prelu', 'elu', 'thresholdedrelu', 'srelu'
# TODO: advanced activations - 'leakyrelu', 'prelu', 'elu', 'thresholdedrelu', 'srelu'


max_layers = 5

space = {
'scaler': hp.choice( 's',
'scaler': hp.choice( 's',
( None, 'StandardScaler', 'RobustScaler', 'MinMaxScaler', 'MaxAbsScaler' )),
'n_layers': hp.quniform( 'l', 1, max_layers, 1 ),
#'layer_size': hp.quniform( 'ls', 5, 100, 1 ),
#'activation': hp.choice( 'a', ( 'relu', 'sigmoid', 'tanh' )),
'init': hp.choice( 'i', ( 'uniform', 'normal', 'glorot_uniform',
#'activation': hp.choice( 'a', ( 'relu', 'sigmoid', 'tanh' ))
'init': hp.choice( 'i', ( 'uniform', 'normal', 'glorot_uniform',
'glorot_normal', 'he_uniform', 'he_normal' )),
'batch_size': hp.choice( 'bs', ( 16, 32, 64, 128, 256 )),
'optimizer': hp.choice( 'o', ( 'rmsprop', 'adagrad', 'adadelta', 'adam', 'adamax' ))
'optimizer': hp.choice( 'o', ( 'rmsprop', 'adagrad', 'adadelta', 'adam', 'adamax' ))
}

# for each hidden layer, we choose size, activation and extras individually
for i in range( 1, max_layers + 1 ):
space[ 'layer_{}_size'.format( i )] = hp.quniform( 'ls{}'.format( i ), 2, 200, 1 )
space[ 'layer_{}_activation'.format( i )] = hp.choice( 'a{}'.format( i ),
space[ 'layer_{}_activation'.format( i )] = hp.choice( 'a{}'.format( i ),
( 'relu', 'sigmoid', 'tanh' ))
space[ 'layer_{}_extras'.format( i )] = hp.choice( 'e{}'.format( i ), (
{ 'name': 'dropout', 'rate': hp.uniform( 'd{}'.format( i ), 0.1, 0.5 )},
space[ 'layer_{}_extras'.format( i )] = hp.choice( 'e{}'.format( i ), (
{ 'name': 'dropout', 'rate': hp.uniform( 'd{}'.format( i ), 0.1, 0.5 )},
{ 'name': 'batchnorm' },
{ 'name': None } ))
{ 'name': None } ))

def get_params():

params = sample( space )
Expand All @@ -55,81 +56,81 @@ def get_params():
# print hidden layers config in readable way
def print_layers( params ):
for i in range( 1, params['n_layers'] + 1 ):
print "layer {} | size: {:>3} | activation: {:<7} | extras: {}".format( i,
params['layer_{}_size'.format( i )],
print("layer {} | size: {:>3} | activation: {:<7} | extras: {}".format( i,
params['layer_{}_size'.format( i )],
params['layer_{}_activation'.format( i )],
params['layer_{}_extras'.format( i )]['name'] ),
params['layer_{}_extras'.format( i )]['name'] ), end=' ')
if params['layer_{}_extras'.format( i )]['name'] == 'dropout':
print "- rate: {:.1%}".format( params['layer_{}_extras'.format( i )]['rate'] ),
print
print("- rate: {:.1%}".format( params['layer_{}_extras'.format( i )]['rate'] ), end=' ')
print()

def print_params( params ):
pprint({ k: v for k, v in params.items() if not k.startswith( 'layer_' )})
pprint({ k: v for k, v in list(params.items()) if not k.startswith( 'layer_' )})
print_layers( params )
print
print()

def try_params( n_iterations, params ):
print "iterations:", n_iterations

print("iterations:", n_iterations)
print_params( params )

y_train = data['y_train']
y_test = data['y_test']

if params['scaler']:
scaler = eval( "{}()".format( params['scaler'] ))
x_train_ = scaler.fit_transform( data['x_train'].astype( float ))
x_test_ = scaler.transform( data['x_test'].astype( float ))
else:
x_train_ = data['x_train']
x_test_ = data['x_test']

input_dim = x_train_.shape[1]

model = Sequential()
model.add( Dense( params['layer_1_size'], init = params['init'],
model.add( Dense( params['layer_1_size'], init = params['init'],
activation = params['layer_1_activation'], input_dim = input_dim ))

for i in range( int( params['n_layers'] ) - 1 ):

extras = 'layer_{}_extras'.format( i + 1 )

if params[extras]['name'] == 'dropout':
model.add( Dropout( params[extras]['rate'] ))
elif params[extras]['name'] == 'batchnorm':
model.add( BatchNorm())
model.add( Dense( params['layer_{}_size'.format( i + 2 )], init = params['init'],

model.add( Dense( params['layer_{}_size'.format( i + 2 )], init = params['init'],
activation = params['layer_{}_activation'.format( i + 2 )]))

model.add( Dense( 1, init = params['init'], activation = 'sigmoid' ))

model.compile( optimizer = params['optimizer'], loss = 'binary_crossentropy' )

#print model.summary()

#

validation_data = ( x_test_, y_test )

early_stopping = EarlyStopping( monitor = 'val_loss', patience = 5, verbose = 0 )

history = model.fit( x_train_, y_train,
nb_epoch = int( round( n_iterations )),
batch_size = params['batch_size'],
shuffle = False,
validation_data = validation_data,
callbacks = [ early_stopping ])
batch_size = params['batch_size'],
shuffle = False,
validation_data = validation_data,
callbacks = [ early_stopping ])

#

p = model.predict_proba( x_train_, batch_size = params['batch_size'] )

ll = log_loss( y_train, p )
auc = AUC( y_train, p )
acc = accuracy( y_train, np.round( p ))

print "\n# training | log loss: {:.2%}, AUC: {:.2%}, accuracy: {:.2%}".format( ll, auc, acc )
print("\n# training | log loss: {:.2%}, AUC: {:.2%}, accuracy: {:.2%}".format( ll, auc, acc ))

#

Expand All @@ -139,7 +140,7 @@ def try_params( n_iterations, params ):
auc = AUC( y_test, p )
acc = accuracy( y_test, np.round( p ))

print "# testing | log loss: {:.2%}, AUC: {:.2%}, accuracy: {:.2%}".format( ll, auc, acc )
print("# testing | log loss: {:.2%}, AUC: {:.2%}, accuracy: {:.2%}".format( ll, auc, acc ))

return { 'loss': ll, 'log_loss': ll, 'auc': auc, 'early_stop': model.stop_training }

7 changes: 4 additions & 3 deletions defs/meta.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from __future__ import print_function
# meta classifier
from common_defs import *

models = ( 'xgb', 'gb', 'rf', 'xt', 'sgd', 'polylearn_fm', 'polylearn_pn', 'keras_mlp' )

# import all the functions
for m in models:
exec( "from defs.{} import get_params as get_params_{}" ).format( m, m )
exec( "from defs.{} import try_params as try_params_{}" ).format( m, m )
exec(( "from defs.{} import get_params as get_params_{}" ).format( m, m ))
exec(( "from defs.{} import try_params as try_params_{}" ).format( m, m ))

space = { 'model': hp.choice( 'model', models ) }

Expand All @@ -20,7 +21,7 @@ def get_params():
def try_params( n_iterations, params ):
params_ = dict( params )
m = params_.pop( 'model' )
print m
print(m)

return eval( "try_params_{}( n_iterations, params_ )".format( m ))

Expand Down
3 changes: 2 additions & 1 deletion defs/polylearn_fm.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import print_function
"function (and parameter space) definitions for hyperband"
"binary classification with polylearn FM"

Expand Down Expand Up @@ -36,7 +37,7 @@ def get_params():
def try_params( n_iterations, params ):

max_iter = int( round( n_iterations * iters_per_iteration ))
print "max_iter:", max_iter
print("max_iter:", max_iter)
pprint( params )

if params['scaler']:
Expand Down
5 changes: 3 additions & 2 deletions defs/polylearn_fm_pn.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import print_function
"function (and parameter space) definitions for hyperband"
"binary classification with polylearn FM/PN"

Expand Down Expand Up @@ -47,7 +48,7 @@ def get_params():
def try_params( n_iterations, params ):

max_iter = int( round( n_iterations * iters_per_iteration ))
print "max_iter:", max_iter
print("max_iter:", max_iter)

if params['scaler']:
scaler = eval( "{}()".format( params['scaler'] ))
Expand All @@ -72,7 +73,7 @@ def try_params( n_iterations, params ):
params_.pop( 'classifier' )
params_.update( local_params )

print classifier
print(classifier)
pprint( params_ )

params_.pop( 'scaler' )
Expand Down
3 changes: 2 additions & 1 deletion defs/polylearn_pn.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import print_function
"function (and parameter space) definitions for hyperband"
"binary classification with polylearn polynomial networks"

Expand Down Expand Up @@ -36,7 +37,7 @@ def get_params():
def try_params( n_iterations, params ):

max_iter = int( round( n_iterations * iters_per_iteration ))
print "max_iter:", max_iter
print("max_iter:", max_iter)
pprint( params )

if params['scaler']:
Expand Down
3 changes: 2 additions & 1 deletion defs/rf.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import print_function
"function (and parameter space) definitions for hyperband"
"binary classification with random forest"

Expand Down Expand Up @@ -32,7 +33,7 @@ def get_params():
def try_params( n_iterations, params ):

n_estimators = int( round( n_iterations * trees_per_iteration ))
print "n_estimators:", n_estimators
print("n_estimators:", n_estimators)
pprint( params )

clf = RF( n_estimators = n_estimators, verbose = 0, n_jobs = -1, **params )
Expand Down
Loading