diff --git a/h2o-py/h2o/model/metrics/binomial.py b/h2o-py/h2o/model/metrics/binomial.py index 23130234a84b..260d34e3bc6e 100644 --- a/h2o-py/h2o/model/metrics/binomial.py +++ b/h2o-py/h2o/model/metrics/binomial.py @@ -976,3 +976,26 @@ def thresholds_and_metric_scores(self): if 'thresholds_and_metric_scores' in self._metric_json: return self._metric_json['thresholds_and_metric_scores'] return None + + def kolmogorov_smirnov(self, thresholds= None): + """ + :param thresholds: thresholds parameter must be a list (e.g. ``[0.01, 0.5, 0.99]``). + If None, then the threshold maximizing the KS statistic will be used. + :returns: The Kolmogorov-Smirnov statistic for this set of metrics and thresholds. + + :examples: + + >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator + >>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv") + >>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor() + >>> predictors = ["displacement","power","weight","acceleration","year"] + >>> response = "economy_20mpg" + >>> train, valid = cars.split_frame(ratios = [.8], seed = 1234) + >>> cars_gbm = H2OGradientBoostingEstimator(seed = 1234) + >>> cars_gbm.train(x = predictors, + ... y = response, + ... training_frame = train, + ... validation_frame = valid) + >>> cars_gbm.kolmogorov_smirnov() + """ + return self.metric("ks", thresholds=thresholds) diff --git a/h2o-py/tests/pyunit_math/pyunit_ks_metric.py b/h2o-py/tests/pyunit_math/pyunit_ks_metric.py index c8f90f5de3b8..98f35633fd93 100644 --- a/h2o-py/tests/pyunit_math/pyunit_ks_metric.py +++ b/h2o-py/tests/pyunit_math/pyunit_ks_metric.py @@ -10,7 +10,8 @@ def kolmogorov_smirnov(): model = H2OGradientBoostingEstimator(ntrees=1, gainslift_bins=20) model.train(x=["Origin", "Distance"], y="IsDepDelayed", training_frame=airlines) verify_ks(model, airlines) - + + # Test without Thresholds model = H2OGradientBoostingEstimator(ntrees=1, gainslift_bins=5) model.train(x=["Origin", "Distance"], y="IsDepDelayed", training_frame=airlines) ks = model.kolmogorov_smirnov() @@ -19,6 +20,21 @@ def kolmogorov_smirnov(): print(ks_verification) assert round(ks, 5) != round(ks_verification, 5) + # Test with specific thresholds + model = H2OGradientBoostingEstimator(ntrees=1, gainslift_bins=5) + model.train(x=["Origin", "Distance"], y="IsDepDelayed", training_frame=airlines) + ks = model.kolmogorov_smirnov(thresholds=[0.01, 0.5, 0.99]) + print("KS with thresholds [0.01, 0.5, 0.99]:", ks) + ks_verification = ks_metric(model, airlines) + print("KS verification:", ks_verification) + assert round(ks, 5) != round(ks_verification, 5) + + # Test with invalid Thresholds + try: + ks= model.kolmogorov_smirnov(thresholds= "invalid") + except ValueError as e: + print("Caught excepted exception for invalid thresholds:",e) + model = H2OXGBoostEstimator(gainslift_bins=10) model.train(x=["Origin", "Distance"], y="IsDepDelayed", training_frame=airlines) print(model.gains_lift())