Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Calibration plot (add performance curves) and a new Calibrated Learner widget #3881

Merged
merged 21 commits into from
Jul 12, 2019
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
35a6f4b
Calibration plot: Add plots of ca, sens/spec, prec/recall, ppv/npv
janezd Jun 13, 2019
2fa1750
Calibration plot: Add threshold line
janezd Jun 13, 2019
d47b68b
Calibration plot: Refactor computation of metrics
janezd Jun 13, 2019
585feb2
Testing: Keep 2d array of models when splitting Results by models
janezd Jun 13, 2019
7b876e6
Test Learners: Store models when there is just one; properly stack them
janezd Jun 13, 2019
93b7a72
classification: Add ModelWithThreshold
janezd Jun 13, 2019
ff67b49
Calibration plot: Output selected model
janezd Jun 13, 2019
a4424fb
Orange.evaluation.performance_curves: Add module for computation of p…
janezd Jun 16, 2019
6024897
Calibration plot: Use Orange.evaluation.testing.performance_curves to…
janezd Jun 16, 2019
1cfbeec
Calibration plot: Fix selected model output
janezd Jun 17, 2019
f742ff9
OWLearnerWidget: Let default name appear as placeholder. This allows …
janezd Jun 17, 2019
c5d070d
evaluations.testing: Minor fixes in unit tests
janezd Jun 17, 2019
557fa2e
OWTestLearners: Skip inactive signals (e.g. learner widget outputs None)
janezd Jun 17, 2019
1a8b013
Calibrated Learner: Add widget
janezd Jun 17, 2019
6ac1db1
Calibration plot: Add context settings
janezd Jun 17, 2019
2edcb39
OWCalibration Plot: Unit tests and some fixes
janezd Jun 18, 2019
2049afa
Calibration plot: Test missing probabilities and single classes
janezd Jun 19, 2019
04d05f4
Calibration plot: Minor fixes
janezd Jun 24, 2019
6695ee9
Calibrated Learner: Fix report
janezd Jun 28, 2019
65c69e2
Calibrated Learner: Add icon
janezd Jun 28, 2019
864d7b5
Calibration plot: Nicer report
janezd Jun 28, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Orange/classification/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@
from .rules import *
from .sgd import *
from .neural_network import *
from .calibration import *
176 changes: 176 additions & 0 deletions Orange/classification/calibration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
import numpy as np
from sklearn.isotonic import IsotonicRegression
from sklearn.calibration import _SigmoidCalibration

from Orange.classification import Model, Learner
from Orange.evaluation import TestOnTrainingData
from Orange.evaluation.performance_curves import Curves

__all__ = ["ThresholdClassifier", "ThresholdLearner",
"CalibratedLearner", "CalibratedClassifier"]


class ThresholdClassifier(Model):
"""
A model that wraps a binary model and sets a different threshold.

The target class is the class with index 1. A data instances is classified
to class 1 it the probability of this class equals or exceeds the threshold

Attributes:
base_model (Orange.classification.Model): base mode
threshold (float): decision threshold
"""
def __init__(self, base_model, threshold):
if not base_model.domain.class_var.is_discrete \
or len(base_model.domain.class_var.values) != 2:
raise ValueError("ThresholdClassifier requires a binary class")

super().__init__(base_model.domain, base_model.original_domain)
self.name = f"{base_model.name}, thresh={threshold:.2f}"
self.base_model = base_model
self.threshold = threshold

def __call__(self, data, ret=Model.Value):
probs = self.base_model(data, ret=Model.Probs)
if ret == Model.Probs:
return probs
class_probs = probs[:, 1].ravel()
with np.errstate(invalid="ignore"): # we fix nanx below
vals = (class_probs >= self.threshold).astype(float)
vals[np.isnan(class_probs)] = np.nan
if ret == Model.Value:
return vals
else:
return vals, probs


class ThresholdLearner(Learner):
"""
A learner that runs another learner and then finds the optimal threshold
for CA or F1 on the training data.

Attributes:
base_leaner (Learner): base learner
threshold_criterion (int):
`ThresholdLearner.OptimizeCA` or `ThresholdLearner.OptimizeF1`
"""
__returns__ = ThresholdClassifier

OptimizeCA, OptimizeF1 = range(2)

def __init__(self, base_learner, threshold_criterion=OptimizeCA):
super().__init__()
self.base_learner = base_learner
self.threshold_criterion = threshold_criterion

def fit_storage(self, data):
"""
Induce a model using the provided `base_learner`, compute probabilities
on training data and the find the optimal decision thresholds. In case
of ties, select the threshold that is closest to 0.5.
"""
if not data.domain.class_var.is_discrete \
or len(data.domain.class_var.values) != 2:
raise ValueError("ThresholdLearner requires a binary class")

res = TestOnTrainingData(data, [self.base_learner], store_models=True)
model = res.models[0, 0]
curves = Curves.from_results(res)
curve = [curves.ca, curves.f1][self.threshold_criterion]()
# In case of ties, we want the optimal threshold that is closest to 0.5
best_threshs = curves.probs[curve == np.max(curve)]
threshold = best_threshs[min(np.searchsorted(best_threshs, 0.5),
len(best_threshs) - 1)]
return ThresholdClassifier(model, threshold)


class CalibratedClassifier(Model):
"""
A model that wraps another model and recalibrates probabilities

Attributes:
base_model (Mode): base mode
calibrators (list of callable):
list of functions that get a vector of probabilities and return
calibrated probabilities
"""
def __init__(self, base_model, calibrators):
if not base_model.domain.class_var.is_discrete:
raise ValueError("CalibratedClassifier requires a discrete target")

super().__init__(base_model.domain, base_model.original_domain)
self.base_model = base_model
self.calibrators = calibrators
self.name = f"{base_model.name}, calibrated"

def __call__(self, data, ret=Model.Value):
probs = self.base_model(data, Model.Probs)
cal_probs = self.calibrated_probs(probs)
if ret == Model.Probs:
return cal_probs
vals = np.argmax(cal_probs, axis=1)
if ret == Model.Value:
return vals
else:
return vals, cal_probs

def calibrated_probs(self, probs):
if self.calibrators:
ps = np.hstack(
tuple(
calibr.predict(cls_probs).reshape(-1, 1)
for calibr, cls_probs in zip(self.calibrators, probs.T)))
else:
ps = probs.copy()
sums = np.sum(ps, axis=1)
zero_sums = sums == 0
with np.errstate(invalid="ignore"): # handled below
ps /= sums[:, None]
if zero_sums.any():
ps[zero_sums] = 1 / ps.shape[1]
return ps


class CalibratedLearner(Learner):
"""
Probability calibration for learning algorithms

This learner that wraps another learner, so that after training, it predicts
the probabilities on training data and calibrates them using sigmoid or
isotonic calibration. It then returns a :obj:`CalibratedClassifier`.

Attributes:
base_learner (Learner): base learner
calibration_method (int):
`CalibratedLearner.Sigmoid` or `CalibratedLearner.Isotonic`
"""
__returns__ = CalibratedClassifier

Sigmoid, Isotonic = range(2)

def __init__(self, base_learner, calibration_method=Sigmoid):
super().__init__()
self.base_learner = base_learner
self.calibration_method = calibration_method

def fit_storage(self, data):
"""
Induce a model using the provided `base_learner`, compute probabilities
on training data and use scipy's `_SigmoidCalibration` or
`IsotonicRegression` to prepare calibrators.
"""
res = TestOnTrainingData(data, [self.base_learner], store_models=True)
model = res.models[0, 0]
probabilities = res.probabilities[0]
return self.get_model(model, res.actual, probabilities)

def get_model(self, model, ytrue, probabilities):
if self.calibration_method == CalibratedLearner.Sigmoid:
fitter = _SigmoidCalibration()
else:
fitter = IsotonicRegression(out_of_bounds='clip')
probabilities[np.isinf(probabilities)] = 1
calibrators = [fitter.fit(cls_probs, ytrue)
for cls_idx, cls_probs in enumerate(probabilities.T)]
return CalibratedClassifier(model, calibrators)
203 changes: 203 additions & 0 deletions Orange/classification/tests/test_calibration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
import unittest
from unittest.mock import Mock, patch

import numpy as np

from Orange.base import Model
from Orange.classification.calibration import \
ThresholdLearner, ThresholdClassifier, \
CalibratedLearner, CalibratedClassifier
from Orange.data import Table


class TestThresholdClassifier(unittest.TestCase):
def setUp(self):
probs1 = np.array([0.3, 0.5, 0.2, 0.8, 0.9, 0]).reshape(-1, 1)
self.probs = np.hstack((1 - probs1, probs1))
base_model = Mock(return_value=self.probs)
base_model.domain.class_var.is_discrete = True
base_model.domain.class_var.values = ["a", "b"]
self.model = ThresholdClassifier(base_model, 0.5)
self.data = Mock()

def test_threshold(self):
vals = self.model(self.data)
np.testing.assert_equal(vals, [0, 1, 0, 1, 1, 0])

self.model.threshold = 0.8
vals = self.model(self.data)
np.testing.assert_equal(vals, [0, 0, 0, 1, 1, 0])

self.model.threshold = 0
vals = self.model(self.data)
np.testing.assert_equal(vals, [1] * 6)

def test_return_types(self):
vals = self.model(self.data, ret=Model.Value)
np.testing.assert_equal(vals, [0, 1, 0, 1, 1, 0])

vals = self.model(self.data)
np.testing.assert_equal(vals, [0, 1, 0, 1, 1, 0])

probs = self.model(self.data, ret=Model.Probs)
np.testing.assert_equal(probs, self.probs)

vals, probs = self.model(self.data, ret=Model.ValueProbs)
np.testing.assert_equal(vals, [0, 1, 0, 1, 1, 0])
np.testing.assert_equal(probs, self.probs)

def test_nans(self):
self.probs[1, :] = np.nan
vals, probs = self.model(self.data, ret=Model.ValueProbs)
np.testing.assert_equal(vals, [0, np.nan, 0, 1, 1, 0])
np.testing.assert_equal(probs, self.probs)

def test_non_binary_base(self):
base_model = Mock()
base_model.domain.class_var.is_discrete = True
base_model.domain.class_var.values = ["a"]
self.assertRaises(ValueError, ThresholdClassifier, base_model, 0.5)

base_model.domain.class_var.values = ["a", "b", "c"]
self.assertRaises(ValueError, ThresholdClassifier, base_model, 0.5)

base_model.domain.class_var = Mock()
base_model.domain.class_var.is_discrete = False
self.assertRaises(ValueError, ThresholdClassifier, base_model, 0.5)


class TestThresholdLearner(unittest.TestCase):
@patch("Orange.evaluation.performance_curves.Curves.from_results")
@patch("Orange.classification.calibration.TestOnTrainingData")
def test_fit_storage(self, test_on_training, curves_from_results):
curves_from_results.return_value = curves = Mock()
curves.probs = np.array([0.1, 0.15, 0.3, 0.45, 0.6, 0.8])
curves.ca = lambda: np.array([0.1, 0.7, 0.4, 0.4, 0.3, 0.1])
curves.f1 = lambda: np.array([0.1, 0.2, 0.4, 0.4, 0.3, 0.1])
model = Mock()
model.domain.class_var.is_discrete = True
model.domain.class_var.values = ("a", "b")
data = Table("heart_disease")
learner = Mock()
test_on_training.return_value = res = Mock()
res.models = np.array([[model]])
test_on_training.return_value = res

thresh_learner = ThresholdLearner(
base_learner=learner,
threshold_criterion=ThresholdLearner.OptimizeCA)
thresh_model = thresh_learner(data)
self.assertEqual(thresh_model.threshold, 0.15)
args, kwargs = test_on_training.call_args
self.assertEqual(len(args), 2)
self.assertIs(args[0], data)
self.assertIs(args[1][0], learner)
self.assertEqual(len(args[1]), 1)
self.assertEqual(kwargs, {"store_models": 1})

thresh_learner = ThresholdLearner(
base_learner=learner,
threshold_criterion=ThresholdLearner.OptimizeF1)
thresh_model = thresh_learner(data)
self.assertEqual(thresh_model.threshold, 0.45)

def test_non_binary_class(self):
thresh_learner = ThresholdLearner(
base_learner=Mock(),
threshold_criterion=ThresholdLearner.OptimizeF1)

data = Mock()
data.domain.class_var.is_discrete = True
data.domain.class_var.values = ["a"]
self.assertRaises(ValueError, thresh_learner.fit_storage, data)

data.domain.class_var.values = ["a", "b", "c"]
self.assertRaises(ValueError, thresh_learner.fit_storage, data)

data.domain.class_var = Mock()
data.domain.class_var.is_discrete = False
self.assertRaises(ValueError, thresh_learner.fit_storage, data)


class TestCalibratedClassifier(unittest.TestCase):
def setUp(self):
probs1 = np.array([0.3, 0.5, 0.2, 0.8, 0.9, 0]).reshape(-1, 1)
self.probs = np.hstack((1 - probs1, probs1))
base_model = Mock(return_value=self.probs)
base_model.domain.class_var.is_discrete = True
base_model.domain.class_var.values = ["a", "b"]
self.model = CalibratedClassifier(base_model, None)
self.data = Mock()

def test_call(self):
calprobs = np.arange(self.probs.size).reshape(self.probs.shape)
calprobs = calprobs / np.sum(calprobs, axis=1)[:, None]
calprobs[-1] = [0.7, 0.3]
self.model.calibrated_probs = Mock(return_value=calprobs)

probs = self.model(self.data, ret=Model.Probs)
self.model.calibrated_probs.assert_called_with(self.probs)
np.testing.assert_almost_equal(probs, calprobs)

vals = self.model(self.data, ret=Model.Value)
np.testing.assert_almost_equal(vals, [1, 1, 1, 1, 1, 0])

vals, probs = self.model(self.data, ret=Model.ValueProbs)
np.testing.assert_almost_equal(probs, calprobs)
np.testing.assert_almost_equal(vals, [1, 1, 1, 1, 1, 0])

def test_calibrated_probs(self):
self.model.calibrators = None
calprobs = self.model.calibrated_probs(self.probs)
np.testing.assert_equal(calprobs, self.probs)
self.assertIsNot(calprobs, self.probs)

calibrator = Mock()
calibrator.predict = lambda x: x**2
self.model.calibrators = [calibrator] * 2
calprobs = self.model.calibrated_probs(self.probs)
expprobs = self.probs ** 2 / np.sum(self.probs ** 2, axis=1)[:, None]
np.testing.assert_almost_equal(calprobs, expprobs)

self.probs[1] = 0
self.probs[2] = np.nan
expprobs[1] = 0.5
expprobs[2] = np.nan
calprobs = self.model.calibrated_probs(self.probs)
np.testing.assert_almost_equal(calprobs, expprobs)


class TestCalibratedLearner(unittest.TestCase):
@patch("Orange.classification.calibration._SigmoidCalibration.fit")
@patch("Orange.classification.calibration.TestOnTrainingData")
def test_fit_storage(self, test_on_training, sigmoid_fit):
data = Table("heart_disease")
learner = Mock()

model = Mock()
model.domain.class_var.is_discrete = True
model.domain.class_var.values = ("a", "b")

test_on_training.return_value = res = Mock()
res.models = np.array([[model]])
res.probabilities = np.arange(20, dtype=float).reshape(1, 5, 4)
test_on_training.return_value = res

sigmoid_fit.return_value = Mock()

cal_learner = CalibratedLearner(
base_learner=learner, calibration_method=CalibratedLearner.Sigmoid)
cal_model = cal_learner(data)

self.assertIs(cal_model.base_model, model)
self.assertEqual(cal_model.calibrators, [sigmoid_fit.return_value] * 4)
args, kwargs = test_on_training.call_args
self.assertEqual(len(args), 2)
self.assertIs(args[0], data)
self.assertIs(args[1][0], learner)
self.assertEqual(len(args[1]), 1)
self.assertEqual(kwargs, {"store_models": 1})

for call, cls_probs in zip(sigmoid_fit.call_args_list,
res.probabilities[0].T):
np.testing.assert_equal(call[0][0], cls_probs)
Loading