From 35a6f4bbe76c822177a1932a902ca3f286413413 Mon Sep 17 00:00:00 2001 From: janezd Date: Thu, 13 Jun 2019 15:16:21 +0200 Subject: [PATCH 01/21] Calibration plot: Add plots of ca, sens/spec, prec/recall, ppv/npv --- Orange/widgets/evaluate/owcalibrationplot.py | 243 ++++++++++++------- Orange/widgets/evaluate/utils.py | 2 +- 2 files changed, 161 insertions(+), 84 deletions(-) diff --git a/Orange/widgets/evaluate/owcalibrationplot.py b/Orange/widgets/evaluate/owcalibrationplot.py index c757932adea..e7fb3c502e8 100644 --- a/Orange/widgets/evaluate/owcalibrationplot.py +++ b/Orange/widgets/evaluate/owcalibrationplot.py @@ -1,12 +1,8 @@ -""" -Calibration Plot Widget ------------------------ - -""" -from collections import namedtuple +from functools import partial import numpy as np +from AnyQt.QtCore import Qt from AnyQt.QtWidgets import QListWidget import pyqtgraph as pg @@ -21,19 +17,6 @@ from Orange.widgets import report -Curve = namedtuple( - "Curve", - ["x", "y"] -) - -PlotCurve = namedtuple( - "PlotCurve", - ["curve", - "curve_item", - "rug_item"] -) - - class OWCalibrationPlot(widget.OWWidget): name = "Calibration Plot" description = "Calibration plot based on evaluation of classifiers." @@ -50,6 +33,8 @@ class Warning(widget.OWWidget.Warning): target_index = settings.Setting(0) selected_classifiers = settings.Setting([]) + score = settings.Setting(0) + fold_curves = settings.Setting(False) display_rug = settings.Setting(True) graph_name = "plot" @@ -60,41 +45,43 @@ def __init__(self): self.results = None self.classifier_names = [] self.colors = [] - self._curve_data = {} - - box = gui.vBox(self.controlArea, "Plot") - tbox = gui.vBox(box, "Target Class") - tbox.setFlat(True) + box = gui.vBox(self.controlArea, "Target Class") self.target_cb = gui.comboBox( - tbox, self, "target_index", callback=self._replot, - contentsLength=8) + box, self, "target_index", callback=self._replot, contentsLength=8) + gui.checkBox(box, self, "display_rug", "Show rug", + callback=self._on_display_rug_changed) - cbox = gui.vBox(box, "Classifier") - cbox.setFlat(True) + box = gui.vBox(self.controlArea, "Metrics") + combo = gui.comboBox( + box, self, "score", items=(x[0] for x in self.Metrics), + callback=self.score_changed) + gui.checkBox( + box, self, "fold_curves", "Curves for individual folds", + callback=self._replot) + + self.explanation = gui.widgetLabel( + box, wordWrap=True, fixedWidth=combo.sizeHint().width()) + self.explanation.setContentsMargins(8, 8, 0, 0) + font = self.explanation.font() + font.setPointSizeF(0.85 * font.pointSizeF()) + self.explanation.setFont(font) self.classifiers_list_box = gui.listBox( - box, self, "selected_classifiers", "classifier_names", - selectionMode=QListWidget.MultiSelection, + self.controlArea, self, "selected_classifiers", "classifier_names", + box="Classifier", selectionMode=QListWidget.ExtendedSelection, callback=self._replot) - gui.checkBox(box, self, "display_rug", "Show rug", - callback=self._on_display_rug_changed) - self.plotview = pg.GraphicsView(background="w") self.plot = pg.PlotItem(enableMenu=False) self.plot.setMouseEnabled(False, False) self.plot.hideButtons() - axis = self.plot.getAxis("bottom") - axis.setLabel("Predicted Probability") - - axis = self.plot.getAxis("left") - axis.setLabel("Observed Average") - self.plot.setRange(xRange=(0.0, 1.0), yRange=(0.0, 1.0), padding=0.05) self.plotview.setCentralItem(self.plot) + self.mainArea.layout().addWidget(self.plotview) + self._set_explanation() @Inputs.evaluation_results def set_results(self, results): @@ -117,7 +104,25 @@ def clear(self): self.target_cb.clear() self.target_index = 0 self.colors = [] - self._curve_data = {} + + def score_changed(self): + self._set_explanation() + self._replot() + + def _set_explanation(self): + explanation = self.Metrics[self.score][2] + if explanation: + self.explanation.setText(explanation) + self.explanation.show() + else: + self.explanation.hide() + + axis = self.plot.getAxis("bottom") + axis.setLabel("Predicted probability" if self.score == 0 + else "Threshold probability to classify as positive") + + axis = self.plot.getAxis("left") + axis.setLabel(self.Metrics[self.score][0]) def _initialize(self, results): N = len(results.predicted) @@ -138,35 +143,16 @@ def _initialize(self, results): self.selected_classifiers = list(range(N)) self.target_cb.addItems(results.data.domain.class_var.values) - def plot_curve(self, clf_idx, target): - if (clf_idx, target) in self._curve_data: - return self._curve_data[clf_idx, target] - - ytrue = self.results.actual == target - probs = self.results.probabilities[clf_idx, :, target] + @staticmethod + def plot_metrics(ytrue, probs, metrics, pen_args): sortind = np.argsort(probs) probs = probs[sortind] ytrue = ytrue[sortind] - if probs.size: - xmin, xmax = probs.min(), probs.max() - x = np.linspace(xmin, xmax, 100) - if xmax != xmin: - f = gaussian_smoother(probs, ytrue, sigma=0.15 * (xmax - xmin)) - observed = f(x) - else: - observed = np.full(100, xmax) - else: - x = np.array([]) - observed = np.array([]) - - curve = Curve(x, observed) - curve_item = pg.PlotDataItem( - x, observed, pen=pg.mkPen(self.colors[clf_idx], width=1), - shadowPen=pg.mkPen(self.colors[clf_idx].lighter(160), width=2), - symbol="+", symbolSize=4, - antialias=True - ) + fn = np.cumsum(ytrue) + metrics(ytrue, probs, fn, pen_args) + def _rug(self, ytrue, probs, _fn, pen_args): + color = pen_args["pen"].color() rh = 0.025 rug_x = np.c_[probs, probs] rug_x_true = rug_x[ytrue].ravel() @@ -177,29 +163,103 @@ def plot_curve(self, clf_idx, target): rug_y_false = np.zeros_like(rug_x_false) rug_y_false[1::2] = rh - rug1 = pg.PlotDataItem( - rug_x_false, rug_y_false, pen=self.colors[clf_idx], - connect="pairs", antialias=True - ) - rug2 = pg.PlotDataItem( - rug_x_true, rug_y_true, pen=self.colors[clf_idx], - connect="pairs", antialias=True - ) - self._curve_data[clf_idx, target] = PlotCurve(curve, curve_item, (rug1, rug2)) - return self._curve_data[clf_idx, target] + self.plot.plot( + rug_x_false, rug_y_false, + pen=color, connect="pairs", antialias=True) + self.plot.plot( + rug_x_true, rug_y_true, + pen=color, connect="pairs", antialias=True) + + def _prob_curve(self, ytrue, probs, _fn, pen_args): + if not probs.size: + return + + xmin, xmax = probs.min(), probs.max() + x = np.linspace(xmin, xmax, 100) + if xmax != xmin: + f = gaussian_smoother(probs, ytrue, sigma=0.15 * (xmax - xmin)) + y = f(x) + else: + y = np.full(100, xmax) + + self.plot.plot(x, y, symbol="+", symbolSize=4, **pen_args) + self.plot.plot([0, 1], [0, 1], antialias=True) + + # For the following methods, at point x=i, we will have i negatives, + # fn[i] is the number of false negatives at that point, hence + # tn = i - fn[i] + # tp = real_pos - fn[i] + # fp = real_neg + tn = real_neg - (i - fn[i]) + + def _ca_curve(self, ytrue, probs, fn, pen_args): + # CA = (tn + tp) / n = ((i - fn[i]) + (real_pos - fn[i])) / n + n = len(probs) + real_pos = np.sum(ytrue) + ca = (real_pos + np.arange(n) - 2 * fn) / n + self.plot.plot(probs, ca, **pen_args) + + def _sens_spec_curve(self, ytrue, probs, fn, pen_args): + # sens = tp / p = (real_pos - fn[i]) / real_pos + # spec = tn / n = (i - fn[i]) / real_neg + n = len(probs) + real_pos = np.sum(ytrue) + real_neg = n - real_pos + sens = 1 - fn / real_pos + spec = (np.arange(1, n + 1) - fn) / real_neg + self.plot.plot(probs, sens, **pen_args) + self.plot.plot(probs, spec, **pen_args) + + def _pr_curve(self, ytrue, probs, fn, pen_args): + # precision = tp / pred_pos = (real_pos - fn[i]) / (n - i) + # recall = tp / p = (real_pos - fn[i]) / real_pos + n = len(probs) + real_pos = np.sum(ytrue) + fn = fn[:-1] # prevent falling to zero at the end + prec = (real_pos - fn) / (np.arange(n, 1, -1)) + recall = 1 - fn / real_pos + self.plot.plot(probs[:-1], prec, **pen_args) + self.plot.plot(probs[:-1], recall, **pen_args) + + def _ppv_npv_curve(self, ytrue, probs, fn, pen_args): + # ppv = tp / pred_pos = (real_pos - fn[i]) / (n - i) + # npv = tn / pred_neg = (i - fn[i]) / i + n = len(probs) + real_pos = np.sum(ytrue) + fn = fn[:-1] # prevent falling to zero at the end + ppv = (real_pos - fn) / (np.arange(n, 1, -1)) + npv = 1 - fn / np.arange(1, n) + self.plot.plot(probs[:-1], ppv, **pen_args) + self.plot.plot(probs[:-1], npv, **pen_args) def _setup_plot(self): target = self.target_index - selected = self.selected_classifiers - curves = [self.plot_curve(i, target) for i in selected] + results = self.results + metrics = partial(self.Metrics[self.score][1], self) + plot_folds = self.fold_curves and results.folds is not None + + ytrue = results.actual == target + for clsf in self.selected_classifiers: + probs = results.probabilities[clsf, :, target] + color = self.colors[clsf] + pen_args = dict( + pen=pg.mkPen(color, width=1), + shadowPen=pg.mkPen(color.lighter(160), + width=3 + 5 * plot_folds), + antiAlias=True) + self.plot_metrics(ytrue, probs, metrics, pen_args) - for curve in curves: - self.plot.addItem(curve.curve_item) if self.display_rug: - self.plot.addItem(curve.rug_item[0]) - self.plot.addItem(curve.rug_item[1]) - - self.plot.plot([0, 1], [0, 1], antialias=True) + self.plot_metrics(ytrue, probs, self._rug, pen_args) + + if plot_folds: + pen_args = dict( + pen=pg.mkPen(color, width=1, style=Qt.DashLine), + antiAlias=True) + for fold in range(len(results.folds)): + fold_results = results.get_fold(fold) + fold_ytrue = fold_results.actual == target + fold_probs = fold_results.probabilities[clsf, :, target] + self.plot_metrics(fold_ytrue, fold_probs, metrics, pen_args) def _replot(self): self.plot.clear() @@ -218,6 +278,23 @@ def send_report(self): self.report_plot() self.report_caption(caption) + Metrics = [ + ("Actual probability", _prob_curve, ""), + ("Classification accuracy", _ca_curve, ""), + ("Sensitivity & Specificity", _sens_spec_curve, + "Sensitivity (falling) is the proportion of correctly detected " + "positive instances (TP / P), and specificity (rising) is the " + "proportion of detected negative instances (TP / N)."), + ("Precision & Recall", _pr_curve, + "Precision (rising) is the fraction of retrieved instances " + "that are relevant, TP / (TP + FP), and recall (falling) is " + "the proportion of discovered relevant instances, TP / P."), + ("Pos & Neg predictive value", _ppv_npv_curve, + "Positive predictive value (rising) is the proportion of " + "correct positives, TP / (TP + FP), and negative predictive " + "value the proportion of correct negatives, TN / (TN + FN)."), + ] + def gaussian_smoother(x, y, sigma=1.0): x = np.asarray(x) diff --git a/Orange/widgets/evaluate/utils.py b/Orange/widgets/evaluate/utils.py index 9e2f579dfae..ebe06032777 100644 --- a/Orange/widgets/evaluate/utils.py +++ b/Orange/widgets/evaluate/utils.py @@ -47,7 +47,7 @@ def results_for_preview(data_name=""): from Orange.classification import \ LogisticRegressionLearner, SVMLearner, NuSVMLearner - data = Table(data_name or "ionosphere") + data = Table(data_name or "heart_disease") results = CrossValidation( data, [LogisticRegressionLearner(penalty="l2"), From 2fa175022b0b23f28dfacea9fa90b6a71aabeba0 Mon Sep 17 00:00:00 2001 From: janezd Date: Thu, 13 Jun 2019 20:10:09 +0200 Subject: [PATCH 02/21] Calibration plot: Add threshold line --- Orange/widgets/evaluate/owcalibrationplot.py | 141 ++++++++++++++----- Orange/widgets/gui.py | 3 + 2 files changed, 109 insertions(+), 35 deletions(-) diff --git a/Orange/widgets/evaluate/owcalibrationplot.py b/Orange/widgets/evaluate/owcalibrationplot.py index e7fb3c502e8..8e5e1e1e96d 100644 --- a/Orange/widgets/evaluate/owcalibrationplot.py +++ b/Orange/widgets/evaluate/owcalibrationplot.py @@ -1,9 +1,10 @@ +from collections import namedtuple from functools import partial import numpy as np -from AnyQt.QtCore import Qt -from AnyQt.QtWidgets import QListWidget +from AnyQt.QtCore import Qt, QSize +from AnyQt.QtWidgets import QListWidget, QSizePolicy import pyqtgraph as pg @@ -16,6 +17,10 @@ from Orange.widgets.widget import Input from Orange.widgets import report +metric_definition = namedtuple( + "metric_definition", + ("name", "function", "short_names", "explanation")) + class OWCalibrationPlot(widget.OWWidget): name = "Calibration Plot" @@ -36,6 +41,7 @@ class Warning(widget.OWWidget.Warning): score = settings.Setting(0) fold_curves = settings.Setting(False) display_rug = settings.Setting(True) + threshold = settings.Setting(0.5) graph_name = "plot" @@ -43,22 +49,32 @@ def __init__(self): super().__init__() self.results = None + self.scores = None self.classifier_names = [] self.colors = [] - box = gui.vBox(self.controlArea, "Target Class") + box = gui.vBox(self.controlArea, box="Settings") self.target_cb = gui.comboBox( - box, self, "target_index", callback=self._replot, contentsLength=8) - gui.checkBox(box, self, "display_rug", "Show rug", - callback=self._on_display_rug_changed) + box, self, "target_index", label="Target:", + orientation=Qt.Horizontal, callback=self._replot, contentsLength=8) + gui.checkBox( + box, self, "display_rug", "Show rug", + callback=self._on_display_rug_changed) + gui.checkBox( + box, self, "fold_curves", "Curves for individual folds", + callback=self._replot) + + self.classifiers_list_box = gui.listBox( + self.controlArea, self, "selected_classifiers", "classifier_names", + box="Classifier", selectionMode=QListWidget.ExtendedSelection, + sizePolicy=(QSizePolicy.Preferred, QSizePolicy.Preferred), + sizeHint=QSize(150, 40), + callback=self._replot) box = gui.vBox(self.controlArea, "Metrics") combo = gui.comboBox( - box, self, "score", items=(x[0] for x in self.Metrics), + box, self, "score", items=(metric.name for metric in self.Metrics), callback=self.score_changed) - gui.checkBox( - box, self, "fold_curves", "Curves for individual folds", - callback=self._replot) self.explanation = gui.widgetLabel( box, wordWrap=True, fixedWidth=combo.sizeHint().width()) @@ -67,16 +83,18 @@ def __init__(self): font.setPointSizeF(0.85 * font.pointSizeF()) self.explanation.setFont(font) - self.classifiers_list_box = gui.listBox( - self.controlArea, self, "selected_classifiers", "classifier_names", - box="Classifier", selectionMode=QListWidget.ExtendedSelection, - callback=self._replot) + box = gui.widgetBox(self.controlArea, "Info") + self.info_label = gui.widgetLabel(box) self.plotview = pg.GraphicsView(background="w") self.plot = pg.PlotItem(enableMenu=False) self.plot.setMouseEnabled(False, False) self.plot.hideButtons() + for axis_name in ("bottom", "left"): + axis = self.plot.getAxis(axis_name) + axis.setPen(pg.mkPen(color=0.0)) + self.plot.setRange(xRange=(0.0, 1.0), yRange=(0.0, 1.0), padding=0.05) self.plotview.setCentralItem(self.plot) @@ -110,7 +128,7 @@ def score_changed(self): self._replot() def _set_explanation(self): - explanation = self.Metrics[self.score][2] + explanation = self.Metrics[self.score].explanation if explanation: self.explanation.setText(explanation) self.explanation.show() @@ -122,7 +140,7 @@ def _set_explanation(self): else "Threshold probability to classify as positive") axis = self.plot.getAxis("left") - axis.setLabel(self.Metrics[self.score][0]) + axis.setLabel(self.Metrics[self.score].name) def _initialize(self, results): N = len(results.predicted) @@ -149,7 +167,7 @@ def plot_metrics(ytrue, probs, metrics, pen_args): probs = probs[sortind] ytrue = ytrue[sortind] fn = np.cumsum(ytrue) - metrics(ytrue, probs, fn, pen_args) + return probs, metrics(ytrue, probs, fn, pen_args) def _rug(self, ytrue, probs, _fn, pen_args): color = pen_args["pen"].color() @@ -208,6 +226,7 @@ def _sens_spec_curve(self, ytrue, probs, fn, pen_args): spec = (np.arange(1, n + 1) - fn) / real_neg self.plot.plot(probs, sens, **pen_args) self.plot.plot(probs, spec, **pen_args) + return sens, spec def _pr_curve(self, ytrue, probs, fn, pen_args): # precision = tp / pred_pos = (real_pos - fn[i]) / (n - i) @@ -219,6 +238,7 @@ def _pr_curve(self, ytrue, probs, fn, pen_args): recall = 1 - fn / real_pos self.plot.plot(probs[:-1], prec, **pen_args) self.plot.plot(probs[:-1], recall, **pen_args) + return prec, recall def _ppv_npv_curve(self, ytrue, probs, fn, pen_args): # ppv = tp / pred_pos = (real_pos - fn[i]) / (n - i) @@ -230,12 +250,14 @@ def _ppv_npv_curve(self, ytrue, probs, fn, pen_args): npv = 1 - fn / np.arange(1, n) self.plot.plot(probs[:-1], ppv, **pen_args) self.plot.plot(probs[:-1], npv, **pen_args) + return ppv, npv def _setup_plot(self): target = self.target_index results = self.results - metrics = partial(self.Metrics[self.score][1], self) + metrics = partial(self.Metrics[self.score].function, self) plot_folds = self.fold_curves and results.folds is not None + self.scores = [] ytrue = results.actual == target for clsf in self.selected_classifiers: @@ -246,7 +268,9 @@ def _setup_plot(self): shadowPen=pg.mkPen(color.lighter(160), width=3 + 5 * plot_folds), antiAlias=True) - self.plot_metrics(ytrue, probs, metrics, pen_args) + self.scores.append( + (self.classifier_names[clsf], + self.plot_metrics(ytrue, probs, metrics, pen_args))) if self.display_rug: self.plot_metrics(ytrue, probs, self._rug, pen_args) @@ -265,10 +289,54 @@ def _replot(self): self.plot.clear() if self.results is not None: self._setup_plot() + self.line = pg.InfiniteLine( + pos=self.threshold, movable=True, + pen=pg.mkPen(color="k", style=Qt.DashLine, width=2), + hoverPen=pg.mkPen(color="k", style=Qt.DashLine, width=3), + bounds=(0, 1), + ) + self.line.sigPositionChanged.connect(self.threshold_change) + self.line.sigPositionChangeFinished.connect(self.threshold_change_done) + self.plot.addItem(self.line) + self._update_info() + def _on_display_rug_changed(self): self._replot() + def threshold_change(self): + self.threshold = round(self.line.pos().x(), 2) + self.line.setPos(self.threshold) + self._update_info() + + def _update_info(self): + + text = f""" + + + + """ + if self.scores is not None: + short_names = self.Metrics[self.score].short_names + if short_names: + text += f""" + + {"".join(f"" + for n in short_names)} + """ + for name, (probs, curves) in self.scores: + ind = min(np.searchsorted(probs, self.threshold), + len(probs) - 1) + text += f"" + text += "".join(f'' + for curve in curves) + text += "" + text += "
Threshold: p={self.threshold:.2f}
{n}
{name}:/{curve[ind]:.3f}
" + self.info_label.setText(text) + + def threshold_change_done(self): + ... + def send_report(self): if self.results is None: return @@ -278,22 +346,25 @@ def send_report(self): self.report_plot() self.report_caption(caption) - Metrics = [ - ("Actual probability", _prob_curve, ""), - ("Classification accuracy", _ca_curve, ""), - ("Sensitivity & Specificity", _sens_spec_curve, - "Sensitivity (falling) is the proportion of correctly detected " - "positive instances (TP / P), and specificity (rising) is the " - "proportion of detected negative instances (TP / N)."), - ("Precision & Recall", _pr_curve, - "Precision (rising) is the fraction of retrieved instances " - "that are relevant, TP / (TP + FP), and recall (falling) is " - "the proportion of discovered relevant instances, TP / P."), - ("Pos & Neg predictive value", _ppv_npv_curve, - "Positive predictive value (rising) is the proportion of " - "correct positives, TP / (TP + FP), and negative predictive " - "value the proportion of correct negatives, TN / (TN + FN)."), - ] + Metrics = [metric_definition(*args) for args in ( + ("Actual probability", _prob_curve, (), ""), + ("Classification accuracy", _ca_curve, (), ""), + ("Sensitivity and specificity", _sens_spec_curve, ("sens", "spec"), + "

Sensitivity (falling) is the proportion of correctly " + "detected positive instances (TP / P).

" + "

Specificity (rising) is the proportion of detected " + "negative instances (TP / N).

"), + ("Precision and recall", _pr_curve, ("prec", "recall"), + "

Precision (rising) is the fraction of retrieved instances " + "that are relevant, TP / (TP + FP).

" + "

Recall (falling) is the proportion of discovered relevant " + "instances, TP / P.

"), + ("Pos and neg predictive value", _ppv_npv_curve, ("PPV", "TPV"), + "

Positive predictive value (rising) is the proportion of " + "correct positives, TP / (TP + FP).

" + "

Negative predictive value is the proportion of correct " + "negatives, TN / (TN + FN).

"), + )] def gaussian_smoother(x, y, sigma=1.0): diff --git a/Orange/widgets/gui.py b/Orange/widgets/gui.py index 683b8be2f73..b6a8d84552b 100644 --- a/Orange/widgets/gui.py +++ b/Orange/widgets/gui.py @@ -1783,6 +1783,9 @@ def __init__(self, master, enableDragDrop=False, dragDropCallback=None, def sizeHint(self): return self.size_hint + def minimumSizeHint(self): + return self.size_hint + def dragEnterEvent(self, event): super().dragEnterEvent(event) if self.valid_data_callback: From d47b68b2fb55ef1d0743b4027fa7e79cdfba510f Mon Sep 17 00:00:00 2001 From: janezd Date: Thu, 13 Jun 2019 21:51:43 +0200 Subject: [PATCH 03/21] Calibration plot: Refactor computation of metrics --- Orange/widgets/evaluate/owcalibrationplot.py | 196 ++++++++++--------- 1 file changed, 99 insertions(+), 97 deletions(-) diff --git a/Orange/widgets/evaluate/owcalibrationplot.py b/Orange/widgets/evaluate/owcalibrationplot.py index 8e5e1e1e96d..c0d1f10d9cd 100644 --- a/Orange/widgets/evaluate/owcalibrationplot.py +++ b/Orange/widgets/evaluate/owcalibrationplot.py @@ -1,5 +1,4 @@ from collections import namedtuple -from functools import partial import numpy as np @@ -8,7 +7,7 @@ import pyqtgraph as pg -import Orange +from Orange.evaluation import Results from Orange.widgets import widget, gui, settings from Orange.widgets.evaluate.utils import \ check_results_adequacy, results_for_preview @@ -17,10 +16,77 @@ from Orange.widgets.widget import Input from Orange.widgets import report -metric_definition = namedtuple( + +class Data: + def __init__(self, ytrue, probs): + sortind = np.argsort(probs) + self.probs = probs[sortind] + self.ytrue = ytrue[sortind] + self.fn = np.cumsum(self.ytrue) + self.tot = len(probs) + self.p = self.fn[-1] + self.n = self.tot - self.p + + @property + def tn(self): + return np.arange(self.tot) - self.fn + + @property + def tp(self): + return self.p - self.fn + + @property + def fp(self): + return self.n - self.tn + + +MetricDefinition = namedtuple( "metric_definition", ("name", "function", "short_names", "explanation")) +Metrics = [MetricDefinition(*args) for args in ( + ("Actual probability", + None, + (), + ""), + ("Classification accuracy", + lambda d: (d.probs, ((d.tp + d.tn) / d.tot,)), + (), + ""), + ("F1", + lambda d: (d.probs, (2 * d.tp / (2 * d.tp + d.fp + d.fn),)), + (), + ""), + ("Sensitivity and specificity", + lambda d: (d.probs, (d.tp / d.p, d.tn / d.n)), + ("sens", "spec"), + "

Sensitivity (falling) is the proportion of correctly " + "detected positive instances (TP / P).

" + "

Specificity (rising) is the proportion of detected " + "negative instances (TP / N).

"), + ("Precision and recall", + lambda d: (d.probs[:-1], (d.tp[:-1] / np.arange(d.tot, 1, -1), + d.tp[:-1] / d.p)), + ("prec", "recall"), + "

Precision (rising) is the fraction of retrieved instances " + "that are relevant, TP / (TP + FP).

" + "

Recall (falling) is the proportion of discovered relevant " + "instances, TP / P.

"), + ("Pos and neg predictive value", + lambda d: (d.probs[:-1], (d.tp[:-1] / np.arange(d.tot, 1, -1), + d.tn[:-1] / np.arange(1, d.tot))), + ("PPV", "TPV"), + "

Positive predictive value (rising) is the proportion of " + "correct positives, TP / (TP + FP).

" + "

Negative predictive value is the proportion of correct " + "negatives, TN / (TN + FN).

"), + ("True and false positive rate", + lambda d: (d.probs, (d.tp / d.p, d.fp / d.n)), + ("TPR", "FPR"), + "

True and false positive rate are proportions of detected " + "and omitted positive instances

"), +)] + class OWCalibrationPlot(widget.OWWidget): name = "Calibration Plot" @@ -30,7 +96,7 @@ class OWCalibrationPlot(widget.OWWidget): keywords = [] class Inputs: - evaluation_results = Input("Evaluation Results", Orange.evaluation.Results) + evaluation_results = Input("Evaluation Results", Results) class Warning(widget.OWWidget.Warning): empty_input = widget.Msg( @@ -52,6 +118,7 @@ def __init__(self): self.scores = None self.classifier_names = [] self.colors = [] + self.line = None box = gui.vBox(self.controlArea, box="Settings") self.target_cb = gui.comboBox( @@ -73,7 +140,7 @@ def __init__(self): box = gui.vBox(self.controlArea, "Metrics") combo = gui.comboBox( - box, self, "score", items=(metric.name for metric in self.Metrics), + box, self, "score", items=(metric.name for metric in Metrics), callback=self.score_changed) self.explanation = gui.widgetLabel( @@ -94,6 +161,8 @@ def __init__(self): for axis_name in ("bottom", "left"): axis = self.plot.getAxis(axis_name) axis.setPen(pg.mkPen(color=0.0)) + if axis_name != "bottom": # remove if when pyqtgraph is fixed + axis.setStyle(stopAxisAtTick=(True, True)) self.plot.setRange(xRange=(0.0, 1.0), yRange=(0.0, 1.0), padding=0.05) self.plotview.setCentralItem(self.plot) @@ -128,7 +197,7 @@ def score_changed(self): self._replot() def _set_explanation(self): - explanation = self.Metrics[self.score].explanation + explanation = Metrics[self.score].explanation if explanation: self.explanation.setText(explanation) self.explanation.show() @@ -140,7 +209,7 @@ def _set_explanation(self): else "Threshold probability to classify as positive") axis = self.plot.getAxis("left") - axis.setLabel(self.Metrics[self.score].name) + axis.setLabel(Metrics[self.score].name) def _initialize(self, results): N = len(results.predicted) @@ -161,20 +230,12 @@ def _initialize(self, results): self.selected_classifiers = list(range(N)) self.target_cb.addItems(results.data.domain.class_var.values) - @staticmethod - def plot_metrics(ytrue, probs, metrics, pen_args): - sortind = np.argsort(probs) - probs = probs[sortind] - ytrue = ytrue[sortind] - fn = np.cumsum(ytrue) - return probs, metrics(ytrue, probs, fn, pen_args) - - def _rug(self, ytrue, probs, _fn, pen_args): + def _rug(self, data, pen_args): color = pen_args["pen"].color() rh = 0.025 - rug_x = np.c_[probs, probs] - rug_x_true = rug_x[ytrue].ravel() - rug_x_false = rug_x[~ytrue].ravel() + rug_x = np.c_[data.probs, data.probs] + rug_x_true = rug_x[data.ytrue].ravel() + rug_x_false = rug_x[~data.ytrue].ravel() rug_y_true = np.ones_like(rug_x_true) rug_y_true[1::2] = 1 - rh @@ -188,9 +249,17 @@ def _rug(self, ytrue, probs, _fn, pen_args): rug_x_true, rug_y_true, pen=color, connect="pairs", antialias=True) - def _prob_curve(self, ytrue, probs, _fn, pen_args): + def plot_metrics(self, data, metrics, pen_args): + if metrics is None: + return self._prob_curve(data.ytrue, data.probs, pen_args) + x, ys = metrics(data) + for y in ys: + self.plot.plot(x, y, **pen_args) + return x, ys + + def _prob_curve(self, ytrue, probs, pen_args): if not probs.size: - return + return None xmin, xmax = probs.min(), probs.max() x = np.linspace(xmin, xmax, 100) @@ -202,60 +271,12 @@ def _prob_curve(self, ytrue, probs, _fn, pen_args): self.plot.plot(x, y, symbol="+", symbolSize=4, **pen_args) self.plot.plot([0, 1], [0, 1], antialias=True) - - # For the following methods, at point x=i, we will have i negatives, - # fn[i] is the number of false negatives at that point, hence - # tn = i - fn[i] - # tp = real_pos - fn[i] - # fp = real_neg + tn = real_neg - (i - fn[i]) - - def _ca_curve(self, ytrue, probs, fn, pen_args): - # CA = (tn + tp) / n = ((i - fn[i]) + (real_pos - fn[i])) / n - n = len(probs) - real_pos = np.sum(ytrue) - ca = (real_pos + np.arange(n) - 2 * fn) / n - self.plot.plot(probs, ca, **pen_args) - - def _sens_spec_curve(self, ytrue, probs, fn, pen_args): - # sens = tp / p = (real_pos - fn[i]) / real_pos - # spec = tn / n = (i - fn[i]) / real_neg - n = len(probs) - real_pos = np.sum(ytrue) - real_neg = n - real_pos - sens = 1 - fn / real_pos - spec = (np.arange(1, n + 1) - fn) / real_neg - self.plot.plot(probs, sens, **pen_args) - self.plot.plot(probs, spec, **pen_args) - return sens, spec - - def _pr_curve(self, ytrue, probs, fn, pen_args): - # precision = tp / pred_pos = (real_pos - fn[i]) / (n - i) - # recall = tp / p = (real_pos - fn[i]) / real_pos - n = len(probs) - real_pos = np.sum(ytrue) - fn = fn[:-1] # prevent falling to zero at the end - prec = (real_pos - fn) / (np.arange(n, 1, -1)) - recall = 1 - fn / real_pos - self.plot.plot(probs[:-1], prec, **pen_args) - self.plot.plot(probs[:-1], recall, **pen_args) - return prec, recall - - def _ppv_npv_curve(self, ytrue, probs, fn, pen_args): - # ppv = tp / pred_pos = (real_pos - fn[i]) / (n - i) - # npv = tn / pred_neg = (i - fn[i]) / i - n = len(probs) - real_pos = np.sum(ytrue) - fn = fn[:-1] # prevent falling to zero at the end - ppv = (real_pos - fn) / (np.arange(n, 1, -1)) - npv = 1 - fn / np.arange(1, n) - self.plot.plot(probs[:-1], ppv, **pen_args) - self.plot.plot(probs[:-1], npv, **pen_args) - return ppv, npv + return x, (y, ) def _setup_plot(self): target = self.target_index results = self.results - metrics = partial(self.Metrics[self.score].function, self) + metrics = Metrics[self.score].function plot_folds = self.fold_curves and results.folds is not None self.scores = [] @@ -266,14 +287,15 @@ def _setup_plot(self): pen_args = dict( pen=pg.mkPen(color, width=1), shadowPen=pg.mkPen(color.lighter(160), - width=3 + 5 * plot_folds), + width=4 + 4 * plot_folds), antiAlias=True) + data = Data(ytrue, probs) self.scores.append( (self.classifier_names[clsf], - self.plot_metrics(ytrue, probs, metrics, pen_args))) + self.plot_metrics(data, metrics, pen_args))) if self.display_rug: - self.plot_metrics(ytrue, probs, self._rug, pen_args) + self._rug(data, pen_args) if plot_folds: pen_args = dict( @@ -283,7 +305,8 @@ def _setup_plot(self): fold_results = results.get_fold(fold) fold_ytrue = fold_results.actual == target fold_probs = fold_results.probabilities[clsf, :, target] - self.plot_metrics(fold_ytrue, fold_probs, metrics, pen_args) + self.plot_metrics(Data(fold_ytrue, fold_probs), + metrics, pen_args) def _replot(self): self.plot.clear() @@ -300,7 +323,6 @@ def _replot(self): self.plot.addItem(self.line) self._update_info() - def _on_display_rug_changed(self): self._replot() @@ -317,7 +339,7 @@ def _update_info(self):
""" if self.scores is not None: - short_names = self.Metrics[self.score].short_names + short_names = Metrics[self.score].short_names if short_names: text += f""" @@ -346,26 +368,6 @@ def send_report(self): self.report_plot() self.report_caption(caption) - Metrics = [metric_definition(*args) for args in ( - ("Actual probability", _prob_curve, (), ""), - ("Classification accuracy", _ca_curve, (), ""), - ("Sensitivity and specificity", _sens_spec_curve, ("sens", "spec"), - "

Sensitivity (falling) is the proportion of correctly " - "detected positive instances (TP / P).

" - "

Specificity (rising) is the proportion of detected " - "negative instances (TP / N).

"), - ("Precision and recall", _pr_curve, ("prec", "recall"), - "

Precision (rising) is the fraction of retrieved instances " - "that are relevant, TP / (TP + FP).

" - "

Recall (falling) is the proportion of discovered relevant " - "instances, TP / P.

"), - ("Pos and neg predictive value", _ppv_npv_curve, ("PPV", "TPV"), - "

Positive predictive value (rising) is the proportion of " - "correct positives, TP / (TP + FP).

" - "

Negative predictive value is the proportion of correct " - "negatives, TN / (TN + FN).

"), - )] - def gaussian_smoother(x, y, sigma=1.0): x = np.asarray(x) From 585feb2077e4ea5a3be79bda64665c03b4e07def Mon Sep 17 00:00:00 2001 From: janezd Date: Thu, 13 Jun 2019 23:34:48 +0200 Subject: [PATCH 04/21] Testing: Keep 2d array of models when splitting Results by models --- Orange/evaluation/testing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Orange/evaluation/testing.py b/Orange/evaluation/testing.py index 92c68d1c13f..9e6d0ca071c 100644 --- a/Orange/evaluation/testing.py +++ b/Orange/evaluation/testing.py @@ -317,7 +317,7 @@ def split_by_model(self): res.probabilities = self.probabilities[(i,), :, :] if self.models is not None: - res.models = self.models[:, i] + res.models = self.models[:, i:i+1] res.failed = [self.failed[i]] yield res From 7b876e64fab994f707f1c761f86dddeb4bae7281 Mon Sep 17 00:00:00 2001 From: janezd Date: Thu, 13 Jun 2019 23:35:49 +0200 Subject: [PATCH 05/21] Test Learners: Store models when there is just one; properly stack them --- Orange/widgets/evaluate/owtestlearners.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Orange/widgets/evaluate/owtestlearners.py b/Orange/widgets/evaluate/owtestlearners.py index 0577b448950..c3da72af0f6 100644 --- a/Orange/widgets/evaluate/owtestlearners.py +++ b/Orange/widgets/evaluate/owtestlearners.py @@ -735,7 +735,8 @@ def __update(self): if self.resampling == OWTestLearners.TestOnTest: test_f = partial( - Orange.evaluation.TestOnTestData(store_data=True), + Orange.evaluation.TestOnTestData( + store_data=True, store_models=True), self.data, self.test_data, learners_c, self.preprocessor ) else: @@ -756,7 +757,8 @@ def __update(self): stratified=self.shuffle_stratified, random_state=rstate) elif self.resampling == OWTestLearners.TestOnTrain: - sampler = Orange.evaluation.TestOnTrainingData() + sampler = Orange.evaluation.TestOnTrainingData( + store_models=True) else: assert False, "self.resampling %s" % self.resampling @@ -916,7 +918,7 @@ def is_empty(res): res.probabilities = np.vstack((x.probabilities, y.probabilities)) if x.models is not None: - res.models = [xm + ym for xm, ym in zip(x.models, y.models)] + res.models = np.hstack((x.models, y.models)) return res From 93b7a72ee89f7843ed89a5d4ab7e91234fd2fa4c Mon Sep 17 00:00:00 2001 From: janezd Date: Thu, 13 Jun 2019 23:36:32 +0200 Subject: [PATCH 06/21] classification: Add ModelWithThreshold --- Orange/classification/__init__.py | 1 + Orange/classification/calibration.py | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 Orange/classification/calibration.py diff --git a/Orange/classification/__init__.py b/Orange/classification/__init__.py index f0489b4cb74..842518fca31 100644 --- a/Orange/classification/__init__.py +++ b/Orange/classification/__init__.py @@ -19,3 +19,4 @@ from .rules import * from .sgd import * from .neural_network import * +from .calibration import * diff --git a/Orange/classification/calibration.py b/Orange/classification/calibration.py new file mode 100644 index 00000000000..891b8db81b6 --- /dev/null +++ b/Orange/classification/calibration.py @@ -0,0 +1,22 @@ +from Orange.classification import Model + +__all__ = ["ModelWithThreshold"] + + +class ModelWithThreshold(Model): + def __init__(self, wrapped_model, threshold, target_class=1): + super().__init__(wrapped_model.domain, wrapped_model.original_domain) + self.name = f"{wrapped_model.name}, thresh={threshold:.2f}" + self.wrapped_model = wrapped_model + self.threshold = threshold + self.target_class = target_class + + def __call__(self, data, ret=Model.Value): + probs = self.wrapped_model(data, ret=Model.Probs) + if ret == Model.Probs: + return probs + vals = probs[:, self.target_class].flatten() > self.threshold + if ret == Model.Value: + return vals + else: + return vals, probs From ff67b4920be230566f7853719e70ebc2b9175adf Mon Sep 17 00:00:00 2001 From: janezd Date: Thu, 13 Jun 2019 23:37:32 +0200 Subject: [PATCH 07/21] Calibration plot: Output selected model --- Orange/widgets/evaluate/owcalibrationplot.py | 50 ++++++++++++++++++-- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/Orange/widgets/evaluate/owcalibrationplot.py b/Orange/widgets/evaluate/owcalibrationplot.py index c0d1f10d9cd..3d782030467 100644 --- a/Orange/widgets/evaluate/owcalibrationplot.py +++ b/Orange/widgets/evaluate/owcalibrationplot.py @@ -7,13 +7,14 @@ import pyqtgraph as pg +from Orange.classification import ModelWithThreshold from Orange.evaluation import Results from Orange.widgets import widget, gui, settings from Orange.widgets.evaluate.utils import \ check_results_adequacy, results_for_preview from Orange.widgets.utils import colorpalette, colorbrewer from Orange.widgets.utils.widgetpreview import WidgetPreview -from Orange.widgets.widget import Input +from Orange.widgets.widget import Input, Output, Msg from Orange.widgets import report @@ -98,16 +99,31 @@ class OWCalibrationPlot(widget.OWWidget): class Inputs: evaluation_results = Input("Evaluation Results", Results) + class Outputs: + calibrated_model = Output("Calibrated Model", ModelWithThreshold) + class Warning(widget.OWWidget.Warning): empty_input = widget.Msg( "Empty result on input. Nothing to display.") + class Information(widget.OWWidget.Information): + no_out = "Can't output a model: " + no_output_multiple_folds = Msg( + no_out + "every training data sample produced a different model") + no_output_no_models = Msg( + no_out + "test results do not contain stored models;\n" + "try testing on separate data or on training data") + no_output_multiple_selected = Msg( + no_out + "select a single model - the widget can output only one") + + target_index = settings.Setting(0) selected_classifiers = settings.Setting([]) score = settings.Setting(0) fold_curves = settings.Setting(False) display_rug = settings.Setting(True) threshold = settings.Setting(0.5) + auto_commit = settings.Setting(True) graph_name = "plot" @@ -136,7 +152,7 @@ def __init__(self): box="Classifier", selectionMode=QListWidget.ExtendedSelection, sizePolicy=(QSizePolicy.Preferred, QSizePolicy.Preferred), sizeHint=QSize(150, 40), - callback=self._replot) + callback=self._on_selection_changed) box = gui.vBox(self.controlArea, "Metrics") combo = gui.comboBox( @@ -153,6 +169,9 @@ def __init__(self): box = gui.widgetBox(self.controlArea, "Info") self.info_label = gui.widgetLabel(box) + gui.auto_commit( + self.controlArea, self, "auto_commit", "Apply", commit=self.apply) + self.plotview = pg.GraphicsView(background="w") self.plot = pg.PlotItem(enableMenu=False) self.plot.setMouseEnabled(False, False) @@ -182,6 +201,7 @@ def set_results(self, results): if self.results is not None: self._initialize(results) self._replot() + self.apply() def clear(self): self.plot.clear() @@ -326,13 +346,16 @@ def _replot(self): def _on_display_rug_changed(self): self._replot() + def _on_selection_changed(self): + self._replot() + self.apply() + def threshold_change(self): self.threshold = round(self.line.pos().x(), 2) self.line.setPos(self.threshold) self._update_info() def _update_info(self): - text = f"""
{self.threshold:.2f}
@@ -357,7 +380,26 @@ def _update_info(self): self.info_label.setText(text) def threshold_change_done(self): - ... + self.apply() + + def apply(self): + info = self.Information + wrapped = None + problems = {} + if self.results is not None: + problems = { + info.no_output_multiple_folds: len(self.results.folds) > 1, + info.no_output_no_models: self.results.models is None, + info.no_output_multiple_selected: + len(self.selected_classifiers) != 1} + if not any(problems.values()): + model = self.results.models[0][self.selected_classifiers[0]] + wrapped = ModelWithThreshold(model, self.threshold) + + self.Outputs.calibrated_model.send(wrapped) + for info, shown in problems.items(): + if info.is_shown() != shown: + info(shown=shown) def send_report(self): if self.results is None: From a4424fbb0f264a42e69ee64e2cb20db0cf4ebd9a Mon Sep 17 00:00:00 2001 From: janezd Date: Sun, 16 Jun 2019 22:33:27 +0200 Subject: [PATCH 08/21] Orange.evaluation.performance_curves: Add module for computation of performance curves --- Orange/evaluation/performance_curves.py | 150 ++++++++++++++++++ Orange/evaluation/tests/__init__.py | 0 .../tests/test_performance_curves.py | 125 +++++++++++++++ .../evaluation.performance_curves.rst | 8 + .../source/reference/evaluation.rst | 1 + 5 files changed, 284 insertions(+) create mode 100644 Orange/evaluation/performance_curves.py create mode 100644 Orange/evaluation/tests/__init__.py create mode 100644 Orange/evaluation/tests/test_performance_curves.py create mode 100644 doc/data-mining-library/source/reference/evaluation.performance_curves.rst diff --git a/Orange/evaluation/performance_curves.py b/Orange/evaluation/performance_curves.py new file mode 100644 index 00000000000..c7dee568e53 --- /dev/null +++ b/Orange/evaluation/performance_curves.py @@ -0,0 +1,150 @@ +import numpy as np + + +class Curves: + # names of scores are standard acronyms, pylint: disable=invalid-name + """ + Computation of performance curves (ca, f1, precision, recall and the rest + of the zoo) from test results. + + The class works with binary classes. Attribute `probs` contains ordered + probabilities and all curves represent performance statistics if an + instance is classified as positive if it equals or exceeds the threshold + in `probs`, that is, `sensitivity[i]` is the sensitivity of the classifier + that classifies an instances as positive if the probability of being + positive is at least `probs[i]`. + + Class can be constructed by giving `probs` and `ytrue`, or from test + results (see :obj:`Curves.from_results`). The latter removes instances + with missing class values or predicted probabilities. + + The class treats all results as obtained from a single run instead of + computing separate curves and fancy averaging. + + Arguments: + probs (np.ndarray): vector of predicted probabilities + ytrue (np.ndarray): corresponding true classes + + Attributes: + probs (np.ndarray): ordered vector of predicted probabilities + ytrue (np.ndarray): corresponding true classes + tot (int): total number of data instances + p (int): number of real positive instances + n (int): number of real negative instances + tp (np.ndarray): number of true positives (property computed from `tn`) + fp (np.ndarray): number of false positives (property computed from `tn`) + tn (np.ndarray): number of true negatives (property computed from `tn`) + fn (np.ndarray): number of false negatives (precomputed, not a property) + """ + def __init__(self, ytrue, probs): + sortind = np.argsort(probs) + self.probs = np.hstack((probs[sortind], [1])) + self.ytrue = ytrue[sortind] + self.fn = np.hstack(([0], np.cumsum(self.ytrue))) + self.tot = len(probs) + self.p = self.fn[-1] + self.n = self.tot - self.p + + @classmethod + def from_results(cls, results, target_class=None, model_index=None): + """ + Construct an instance of `Curves` from test results. + + Args: + results (:obj:`Orange.evaluation.testing.Results`): test results + target_class (int): target class index; if the class is binary, + this defaults to `1`, otherwise it must be given + model_index (int): model index; if there is only one model, this + argument can be omitted + + Returns: + curves (:obj:`Curves`) + """ + if model_index is None: + if results.probabilities.shape[0] != 1: + raise ValueError("Argument 'model_index' is required when " + "there are multiple models") + model_index = 0 + if target_class is None: + if results.probabilities.shape[2] != 2: + raise ValueError("Argument 'target_class' is required when the " + "class is not binary") + target_class = 1 + actual = results.actual + probs = results.probabilities[model_index, :, target_class] + nans = np.isnan(actual) + np.isnan(probs) + if nans.any(): + actual = actual[~nans] + probs = probs[~nans] + return cls(actual == target_class, probs) + + @property + def tn(self): + return np.arange(self.tot + 1) - self.fn + + @property + def tp(self): + return self.p - self.fn + + @property + def fp(self): + return self.n - self.tn + + def ca(self): + """Classification accuracy curve""" + return (self.tp + self.tn) / self.tot + + def f1(self): + """F1 curve""" + return 2 * self.tp / (2 * self.tp + self.fp + self.fn) + + def sensitivity(self): + """Sensitivity curve""" + return self.tp / self.p + + def specificity(self): + """Specificity curve""" + return self.tn / self.n + + def precision(self): + """ + Precision curve + + The last element represents precision at threshold 1. Unless such + a probability appears in the data, the precision at this point is + undefined. To avoid this, we copy the previous value to the last. + """ + tp_fp = np.arange(self.tot, -1, -1) + tp_fp[-1] = 1 # avoid division by zero + prec = self.tp / tp_fp + prec[-1] = prec[-2] + return prec + + def recall(self): + """Recall curve""" + return self.sensitivity() + + def ppv(self): + """PPV curve; see the comment at :obj:`precision`""" + return self.precision() + + def npv(self): + """ + NPV curve + + The first value is undefined (no negative instances). To avoid this, + we copy the second value into the first. + """ + tn_fn = np.arange(self.tot + 1) + tn_fn[0] = 1 # avoid division by zero + npv = self.tn / tn_fn + npv[0] = npv[1] + return npv + + def fpr(self): + """FPR curve""" + return self.fp / self.n + + def tpr(self): + """TPR curve""" + return self.sensitivity() diff --git a/Orange/evaluation/tests/__init__.py b/Orange/evaluation/tests/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/Orange/evaluation/tests/test_performance_curves.py b/Orange/evaluation/tests/test_performance_curves.py new file mode 100644 index 00000000000..a73d7165557 --- /dev/null +++ b/Orange/evaluation/tests/test_performance_curves.py @@ -0,0 +1,125 @@ +import unittest +from unittest.mock import patch + +import numpy as np + +from Orange.evaluation.testing import Results +from Orange.evaluation.performance_curves import Curves + + +# Test data and sensitivity/specificity are taken from +# Tom Fawcett: An introduction to ROC analysis, with one true positive instance +# removed, so that the number of positive and negative does not match + +class TestCurves(unittest.TestCase): + def setUp(self): + n, p = (0, 1) + self.data = np.array([ + (p, .8), (n, .7), (p, .6), (p, .55), (p, .54), (n, .53), + (n, .52), (p, .51), (n, .505), (p, .4), (n, .39), (p, .38), + (n, .37), (n, .36), (n, .35), (p, .34), (n, .33), (p, .30), (n, .1) + ]) + + def test_curves(self): + np.random.shuffle(self.data) + ytrue, probs = self.data.T + curves = Curves(ytrue, probs) + + tn = np.array( + [0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 7, 8, 9, 9, 9, 9, 10, 10]) + np.testing.assert_equal(curves.tn, tn) + np.testing.assert_equal(curves.fp, 10 - tn) + np.testing.assert_almost_equal(curves.specificity(), tn / 10) + + tp = np.array( + [9, 9, 8, 8, 7, 7, 7, 7, 6, 6, 5, 5, 4, 4, 4, 3, 2, 1, 1, 0]) + np.testing.assert_equal(curves.tp, tp) + np.testing.assert_equal(curves.fn, 9 - tp) + np.testing.assert_almost_equal(curves.sensitivity(), tp / 9) + + np.testing.assert_almost_equal( + curves.ca(), + np.array([9, 10, 9, 10, 9, 10, 11, 12, 11, 12, 11, 12, 11, 12, + 13, 12, 11, 10, 11, 10]) / 19) + + precision = np.array( + [9 / 19, 9 / 18, 8 / 17, 8 / 16, 7 / 15, 7 / 14, 7 / 13, + 7 / 12, 6 / 11, 6 / 10, 5 / 9, 5 / 8, 4 / 7, 4 / 6, + 4 / 5, 3 / 4, 2 / 3, 1 / 2, 1 / 1, 1]) + np.testing.assert_almost_equal(curves.precision(), precision) + np.testing.assert_almost_equal(curves.recall(), tp / 9) + + np.testing.assert_almost_equal(curves.ppv(), precision) + np.testing.assert_almost_equal( + curves.npv(), + np.array([1, 1 / 1, 1 / 2, 2 / 3, 2 / 4, 3 / 5, 4 / 6, 5 / 7, + 5 / 8, 6 / 9, 6 / 10, 7 / 11, 7 / 12, 8 / 13, 9 / 14, + 9 / 15, 9 / 16, 9 / 17, 10 / 18, 10 / 19])) + + np.testing.assert_almost_equal(curves.tpr(), tp / 9) + np.testing.assert_almost_equal(curves.fpr(), (10 - tn) / 10) + + @patch("Orange.evaluation.performance_curves.Curves.__init__", + return_value=None) + def test_curves_from_results(self, init): + res = Results() + ytrue, probs = self.data.T + res.actual = ytrue.astype(float) + res.probabilities = np.vstack((1 - probs, probs)).T.reshape(1, -1, 2) + Curves.from_results(res) + cytrue, cprobs = init.call_args[0] + np.testing.assert_equal(cytrue, ytrue) + np.testing.assert_equal(cprobs, probs) + + Curves.from_results(res, target_class=0) + cytrue, cprobs = init.call_args[0] + np.testing.assert_equal(cytrue, 1 - ytrue) + np.testing.assert_equal(cprobs, 1 - probs) + + res.actual = ytrue.astype(float) + res.probabilities = np.random.random((2, 19, 2)) + res.probabilities[1] = np.vstack((1 - probs, probs)).T + + Curves.from_results(res, model_index=1) + cytrue, cprobs = init.call_args[0] + np.testing.assert_equal(cytrue, ytrue) + np.testing.assert_equal(cprobs, probs) + + self.assertRaises(ValueError, Curves.from_results, res) + + ytrue[ytrue == 0] = 2 * (np.arange(10) % 2) + res.actual = ytrue.astype(float) + res.probabilities = np.random.random((2, 19, 3)) + res.probabilities[1] = np.vstack( + ((1 - probs) / 3, probs, (1 - probs) * 2 / 3)).T + + Curves.from_results(res, model_index=1, target_class=1) + cytrue, cprobs = init.call_args[0] + np.testing.assert_equal(cytrue, ytrue == 1) + np.testing.assert_equal(cprobs, probs) + + Curves.from_results(res, model_index=1, target_class=0) + cytrue, cprobs = init.call_args[0] + np.testing.assert_equal(cytrue, ytrue == 0) + np.testing.assert_equal(cprobs, (1 - probs) / 3) + + Curves.from_results(res, model_index=1, target_class=2) + cytrue, cprobs = init.call_args[0] + np.testing.assert_equal(cytrue, ytrue == 2) + np.testing.assert_equal(cprobs, (1 - probs) * 2 / 3) + + self.assertRaises(ValueError, Curves.from_results, res, model_index=1) + + @patch("Orange.evaluation.performance_curves.Curves.__init__", + return_value=None) + def test_curves_from_results_nans(self, init): + res = Results() + ytrue, probs = self.data.T + ytrue[0] = np.nan + probs[-1] = np.nan + res.actual = ytrue.astype(float) + res.probabilities = np.vstack((1 - probs, probs)).T.reshape(1, -1, 2) + Curves.from_results(res) + cytrue, cprobs = init.call_args[0] + np.testing.assert_equal(cytrue, ytrue[1:-1]) + np.testing.assert_equal(cprobs, probs[1:-1]) diff --git a/doc/data-mining-library/source/reference/evaluation.performance_curves.rst b/doc/data-mining-library/source/reference/evaluation.performance_curves.rst new file mode 100644 index 00000000000..d9eaa515c0f --- /dev/null +++ b/doc/data-mining-library/source/reference/evaluation.performance_curves.rst @@ -0,0 +1,8 @@ +.. py:currentmodule:: Orange.evaluation.performance_curves + +################## +Performance curves +################## + +.. autoclass:: Orange.evaluation.performance_curves.Curves + :members: diff --git a/doc/data-mining-library/source/reference/evaluation.rst b/doc/data-mining-library/source/reference/evaluation.rst index 422371a41eb..a07c99ae44f 100644 --- a/doc/data-mining-library/source/reference/evaluation.rst +++ b/doc/data-mining-library/source/reference/evaluation.rst @@ -9,3 +9,4 @@ Evaluation (``evaluation``) evaluation.testing evaluation.cd + evaluation.performance_curves From 60248970fc5d98a387caa5803fd6b2f24581b988 Mon Sep 17 00:00:00 2001 From: janezd Date: Sun, 16 Jun 2019 22:47:36 +0200 Subject: [PATCH 09/21] Calibration plot: Use Orange.evaluation.testing.performance_curves to compute curves --- Orange/widgets/evaluate/owcalibrationplot.py | 65 +++++--------------- 1 file changed, 16 insertions(+), 49 deletions(-) diff --git a/Orange/widgets/evaluate/owcalibrationplot.py b/Orange/widgets/evaluate/owcalibrationplot.py index 3d782030467..3e316b990e8 100644 --- a/Orange/widgets/evaluate/owcalibrationplot.py +++ b/Orange/widgets/evaluate/owcalibrationplot.py @@ -9,6 +9,7 @@ from Orange.classification import ModelWithThreshold from Orange.evaluation import Results +from Orange.evaluation.performance_curves import Curves from Orange.widgets import widget, gui, settings from Orange.widgets.evaluate.utils import \ check_results_adequacy, results_for_preview @@ -18,71 +19,37 @@ from Orange.widgets import report -class Data: - def __init__(self, ytrue, probs): - sortind = np.argsort(probs) - self.probs = probs[sortind] - self.ytrue = ytrue[sortind] - self.fn = np.cumsum(self.ytrue) - self.tot = len(probs) - self.p = self.fn[-1] - self.n = self.tot - self.p - - @property - def tn(self): - return np.arange(self.tot) - self.fn - - @property - def tp(self): - return self.p - self.fn - - @property - def fp(self): - return self.n - self.tn - - MetricDefinition = namedtuple( "metric_definition", - ("name", "function", "short_names", "explanation")) + ("name", "functions", "short_names", "explanation")) Metrics = [MetricDefinition(*args) for args in ( - ("Actual probability", - None, - (), - ""), - ("Classification accuracy", - lambda d: (d.probs, ((d.tp + d.tn) / d.tot,)), - (), - ""), - ("F1", - lambda d: (d.probs, (2 * d.tp / (2 * d.tp + d.fp + d.fn),)), - (), - ""), + ("Calibration curve", None, (), ""), + ("Classification accuracy", (Curves.ca, ), (), ""), + ("F1", (Curves.f1, ), (), ""), ("Sensitivity and specificity", - lambda d: (d.probs, (d.tp / d.p, d.tn / d.n)), + (Curves.sensitivity, Curves.specificity), ("sens", "spec"), "

Sensitivity (falling) is the proportion of correctly " "detected positive instances (TP / P).

" "

Specificity (rising) is the proportion of detected " "negative instances (TP / N).

"), ("Precision and recall", - lambda d: (d.probs[:-1], (d.tp[:-1] / np.arange(d.tot, 1, -1), - d.tp[:-1] / d.p)), + (Curves.precision, Curves.recall), ("prec", "recall"), "

Precision (rising) is the fraction of retrieved instances " "that are relevant, TP / (TP + FP).

" "

Recall (falling) is the proportion of discovered relevant " "instances, TP / P.

"), ("Pos and neg predictive value", - lambda d: (d.probs[:-1], (d.tp[:-1] / np.arange(d.tot, 1, -1), - d.tn[:-1] / np.arange(1, d.tot))), + (Curves.ppv, Curves.npv), ("PPV", "TPV"), "

Positive predictive value (rising) is the proportion of " "correct positives, TP / (TP + FP).

" "

Negative predictive value is the proportion of correct " "negatives, TN / (TN + FN).

"), ("True and false positive rate", - lambda d: (d.probs, (d.tp / d.p, d.fp / d.n)), + (Curves.tpr, Curves.fpr), ("TPR", "FPR"), "

True and false positive rate are proportions of detected " "and omitted positive instances

"), @@ -253,7 +220,7 @@ def _initialize(self, results): def _rug(self, data, pen_args): color = pen_args["pen"].color() rh = 0.025 - rug_x = np.c_[data.probs, data.probs] + rug_x = np.c_[data.probs[:-1], data.probs[:-1]] rug_x_true = rug_x[data.ytrue].ravel() rug_x_false = rug_x[~data.ytrue].ravel() @@ -271,11 +238,11 @@ def _rug(self, data, pen_args): def plot_metrics(self, data, metrics, pen_args): if metrics is None: - return self._prob_curve(data.ytrue, data.probs, pen_args) - x, ys = metrics(data) + return self._prob_curve(data.ytrue, data.probs[:-1], pen_args) + ys = [metric(data) for metric in metrics] for y in ys: - self.plot.plot(x, y, **pen_args) - return x, ys + self.plot.plot(data.probs, y, **pen_args) + return data.probs, ys def _prob_curve(self, ytrue, probs, pen_args): if not probs.size: @@ -296,7 +263,7 @@ def _prob_curve(self, ytrue, probs, pen_args): def _setup_plot(self): target = self.target_index results = self.results - metrics = Metrics[self.score].function + metrics = Metrics[self.score].functions plot_folds = self.fold_curves and results.folds is not None self.scores = [] @@ -309,7 +276,7 @@ def _setup_plot(self): shadowPen=pg.mkPen(color.lighter(160), width=4 + 4 * plot_folds), antiAlias=True) - data = Data(ytrue, probs) + data = Curves(ytrue, probs) self.scores.append( (self.classifier_names[clsf], self.plot_metrics(data, metrics, pen_args))) From 1cfbeece2ec7c853683b9286643257f3ce9a55a0 Mon Sep 17 00:00:00 2001 From: janezd Date: Mon, 17 Jun 2019 17:51:57 +0200 Subject: [PATCH 10/21] Calibration plot: Fix selected model output --- Orange/classification/calibration.py | 174 ++++++++++++++- .../classification/tests/test_calibration.py | 203 ++++++++++++++++++ Orange/widgets/evaluate/owcalibrationplot.py | 90 +++++--- .../source/reference/classification.rst | 18 ++ 4 files changed, 451 insertions(+), 34 deletions(-) create mode 100644 Orange/classification/tests/test_calibration.py diff --git a/Orange/classification/calibration.py b/Orange/classification/calibration.py index 891b8db81b6..46bf2e8f242 100644 --- a/Orange/classification/calibration.py +++ b/Orange/classification/calibration.py @@ -1,22 +1,176 @@ -from Orange.classification import Model +import numpy as np +from sklearn.isotonic import IsotonicRegression +from sklearn.calibration import _SigmoidCalibration -__all__ = ["ModelWithThreshold"] +from Orange.classification import Model, Learner +from Orange.evaluation import TestOnTrainingData +from Orange.evaluation.performance_curves import Curves +__all__ = ["ThresholdClassifier", "ThresholdLearner", + "CalibratedLearner", "CalibratedClassifier"] -class ModelWithThreshold(Model): - def __init__(self, wrapped_model, threshold, target_class=1): - super().__init__(wrapped_model.domain, wrapped_model.original_domain) - self.name = f"{wrapped_model.name}, thresh={threshold:.2f}" - self.wrapped_model = wrapped_model + +class ThresholdClassifier(Model): + """ + A model that wraps a binary model and sets a different threshold. + + The target class is the class with index 1. A data instances is classified + to class 1 it the probability of this class equals or exceeds the threshold + + Attributes: + base_model (Orange.classification.Model): base mode + threshold (float): decision threshold + """ + def __init__(self, base_model, threshold): + if not base_model.domain.class_var.is_discrete \ + or len(base_model.domain.class_var.values) != 2: + raise ValueError("ThresholdClassifier requires a binary class") + + super().__init__(base_model.domain, base_model.original_domain) + self.name = f"{base_model.name}, thresh={threshold:.2f}" + self.base_model = base_model self.threshold = threshold - self.target_class = target_class def __call__(self, data, ret=Model.Value): - probs = self.wrapped_model(data, ret=Model.Probs) + probs = self.base_model(data, ret=Model.Probs) if ret == Model.Probs: return probs - vals = probs[:, self.target_class].flatten() > self.threshold + class_probs = probs[:, 1].ravel() + with np.errstate(invalid="ignore"): # we fix nanx below + vals = (class_probs >= self.threshold).astype(float) + vals[np.isnan(class_probs)] = np.nan if ret == Model.Value: return vals else: return vals, probs + + +class ThresholdLearner(Learner): + """ + A learner that runs another learner and then finds the optimal threshold + for CA or F1 on the training data. + + Attributes: + base_leaner (Learner): base learner + threshold_criterion (int): + `ThresholdLearner.OptimizeCA` or `ThresholdLearner.OptimizeF1` + """ + __returns__ = ThresholdClassifier + + OptimizeCA, OptimizeF1 = range(2) + + def __init__(self, base_learner, threshold_criterion=OptimizeCA): + super().__init__() + self.base_learner = base_learner + self.threshold_criterion = threshold_criterion + + def fit_storage(self, data): + """ + Induce a model using the provided `base_learner`, compute probabilities + on training data and the find the optimal decision thresholds. In case + of ties, select the threshold that is closest to 0.5. + """ + if not data.domain.class_var.is_discrete \ + or len(data.domain.class_var.values) != 2: + raise ValueError("ThresholdLearner requires a binary class") + + res = TestOnTrainingData(data, [self.base_learner], store_models=True) + model = res.models[0, 0] + curves = Curves.from_results(res) + curve = [curves.ca, curves.f1][self.threshold_criterion]() + # In case of ties, we want the optimal threshold that is closest to 0.5 + best_threshs = curves.probs[curve == np.max(curve)] + threshold = best_threshs[min(np.searchsorted(best_threshs, 0.5), + len(best_threshs) - 1)] + return ThresholdClassifier(model, threshold) + + +class CalibratedClassifier(Model): + """ + A model that wraps another model and recalibrates probabilities + + Attributes: + base_model (Mode): base mode + calibrators (list of callable): + list of functions that get a vector of probabilities and return + calibrated probabilities + """ + def __init__(self, base_model, calibrators): + if not base_model.domain.class_var.is_discrete: + raise ValueError("CalibratedClassifier requires a discrete target") + + super().__init__(base_model.domain, base_model.original_domain) + self.base_model = base_model + self.calibrators = calibrators + self.name = f"{base_model.name}, calibrated" + + def __call__(self, data, ret=Model.Value): + probs = self.base_model(data, Model.Probs) + cal_probs = self.calibrated_probs(probs) + if ret == Model.Probs: + return cal_probs + vals = np.argmax(cal_probs, axis=1) + if ret == Model.Value: + return vals + else: + return vals, cal_probs + + def calibrated_probs(self, probs): + if self.calibrators: + ps = np.hstack( + tuple( + calibr.predict(cls_probs).reshape(-1, 1) + for calibr, cls_probs in zip(self.calibrators, probs.T))) + else: + ps = probs.copy() + sums = np.sum(ps, axis=1) + zero_sums = sums == 0 + with np.errstate(invalid="ignore"): # handled below + ps /= sums[:, None] + if zero_sums.any(): + ps[zero_sums] = 1 / ps.shape[1] + return ps + + +class CalibratedLearner(Learner): + """ + Probability calibration for learning algorithms + + This learner that wraps another learner, so that after training, it predicts + the probabilities on training data and calibrates them using sigmoid or + isotonic calibration. It then returns a :obj:`CalibratedClassifier`. + + Attributes: + base_learner (Learner): base learner + calibration_method (int): + `CalibratedLearner.Sigmoid` or `CalibratedLearner.Isotonic` + """ + __returns__ = CalibratedClassifier + + Sigmoid, Isotonic = range(2) + + def __init__(self, base_learner, calibration_method=Sigmoid): + super().__init__() + self.base_learner = base_learner + self.calibration_method = calibration_method + + def fit_storage(self, data): + """ + Induce a model using the provided `base_learner`, compute probabilities + on training data and use scipy's `_SigmoidCalibration` or + `IsotonicRegression` to prepare calibrators. + """ + res = TestOnTrainingData(data, [self.base_learner], store_models=True) + model = res.models[0, 0] + probabilities = res.probabilities[0] + return self.get_model(model, res.actual, probabilities) + + def get_model(self, model, ytrue, probabilities): + if self.calibration_method == CalibratedLearner.Sigmoid: + fitter = _SigmoidCalibration() + else: + fitter = IsotonicRegression(out_of_bounds='clip') + probabilities[np.isinf(probabilities)] = 1 + calibrators = [fitter.fit(cls_probs, ytrue) + for cls_idx, cls_probs in enumerate(probabilities.T)] + return CalibratedClassifier(model, calibrators) diff --git a/Orange/classification/tests/test_calibration.py b/Orange/classification/tests/test_calibration.py new file mode 100644 index 00000000000..a538a3b1870 --- /dev/null +++ b/Orange/classification/tests/test_calibration.py @@ -0,0 +1,203 @@ +import unittest +from unittest.mock import Mock, patch + +import numpy as np + +from Orange.base import Model +from Orange.classification.calibration import \ + ThresholdLearner, ThresholdClassifier, \ + CalibratedLearner, CalibratedClassifier +from Orange.data import Table + + +class TestThresholdClassifier(unittest.TestCase): + def setUp(self): + probs1 = np.array([0.3, 0.5, 0.2, 0.8, 0.9, 0]).reshape(-1, 1) + self.probs = np.hstack((1 - probs1, probs1)) + base_model = Mock(return_value=self.probs) + base_model.domain.class_var.is_discrete = True + base_model.domain.class_var.values = ["a", "b"] + self.model = ThresholdClassifier(base_model, 0.5) + self.data = Mock() + + def test_threshold(self): + vals = self.model(self.data) + np.testing.assert_equal(vals, [0, 1, 0, 1, 1, 0]) + + self.model.threshold = 0.8 + vals = self.model(self.data) + np.testing.assert_equal(vals, [0, 0, 0, 1, 1, 0]) + + self.model.threshold = 0 + vals = self.model(self.data) + np.testing.assert_equal(vals, [1] * 6) + + def test_return_types(self): + vals = self.model(self.data, ret=Model.Value) + np.testing.assert_equal(vals, [0, 1, 0, 1, 1, 0]) + + vals = self.model(self.data) + np.testing.assert_equal(vals, [0, 1, 0, 1, 1, 0]) + + probs = self.model(self.data, ret=Model.Probs) + np.testing.assert_equal(probs, self.probs) + + vals, probs = self.model(self.data, ret=Model.ValueProbs) + np.testing.assert_equal(vals, [0, 1, 0, 1, 1, 0]) + np.testing.assert_equal(probs, self.probs) + + def test_nans(self): + self.probs[1, :] = np.nan + vals, probs = self.model(self.data, ret=Model.ValueProbs) + np.testing.assert_equal(vals, [0, np.nan, 0, 1, 1, 0]) + np.testing.assert_equal(probs, self.probs) + + def test_non_binary_base(self): + base_model = Mock() + base_model.domain.class_var.is_discrete = True + base_model.domain.class_var.values = ["a"] + self.assertRaises(ValueError, ThresholdClassifier, base_model, 0.5) + + base_model.domain.class_var.values = ["a", "b", "c"] + self.assertRaises(ValueError, ThresholdClassifier, base_model, 0.5) + + base_model.domain.class_var = Mock() + base_model.domain.class_var.is_discrete = False + self.assertRaises(ValueError, ThresholdClassifier, base_model, 0.5) + + +class TestThresholdLearner(unittest.TestCase): + @patch("Orange.evaluation.performance_curves.Curves.from_results") + @patch("Orange.classification.calibration.TestOnTrainingData") + def test_fit_storage(self, test_on_training, curves_from_results): + curves_from_results.return_value = curves = Mock() + curves.probs = np.array([0.1, 0.15, 0.3, 0.45, 0.6, 0.8]) + curves.ca = lambda: np.array([0.1, 0.7, 0.4, 0.4, 0.3, 0.1]) + curves.f1 = lambda: np.array([0.1, 0.2, 0.4, 0.4, 0.3, 0.1]) + model = Mock() + model.domain.class_var.is_discrete = True + model.domain.class_var.values = ("a", "b") + data = Table("heart_disease") + learner = Mock() + test_on_training.return_value = res = Mock() + res.models = np.array([[model]]) + test_on_training.return_value = res + + thresh_learner = ThresholdLearner( + base_learner=learner, + threshold_criterion=ThresholdLearner.OptimizeCA) + thresh_model = thresh_learner(data) + self.assertEqual(thresh_model.threshold, 0.15) + args, kwargs = test_on_training.call_args + self.assertEqual(len(args), 2) + self.assertIs(args[0], data) + self.assertIs(args[1][0], learner) + self.assertEqual(len(args[1]), 1) + self.assertEqual(kwargs, {"store_models": 1}) + + thresh_learner = ThresholdLearner( + base_learner=learner, + threshold_criterion=ThresholdLearner.OptimizeF1) + thresh_model = thresh_learner(data) + self.assertEqual(thresh_model.threshold, 0.45) + + def test_non_binary_class(self): + thresh_learner = ThresholdLearner( + base_learner=Mock(), + threshold_criterion=ThresholdLearner.OptimizeF1) + + data = Mock() + data.domain.class_var.is_discrete = True + data.domain.class_var.values = ["a"] + self.assertRaises(ValueError, thresh_learner.fit_storage, data) + + data.domain.class_var.values = ["a", "b", "c"] + self.assertRaises(ValueError, thresh_learner.fit_storage, data) + + data.domain.class_var = Mock() + data.domain.class_var.is_discrete = False + self.assertRaises(ValueError, thresh_learner.fit_storage, data) + + +class TestCalibratedClassifier(unittest.TestCase): + def setUp(self): + probs1 = np.array([0.3, 0.5, 0.2, 0.8, 0.9, 0]).reshape(-1, 1) + self.probs = np.hstack((1 - probs1, probs1)) + base_model = Mock(return_value=self.probs) + base_model.domain.class_var.is_discrete = True + base_model.domain.class_var.values = ["a", "b"] + self.model = CalibratedClassifier(base_model, None) + self.data = Mock() + + def test_call(self): + calprobs = np.arange(self.probs.size).reshape(self.probs.shape) + calprobs = calprobs / np.sum(calprobs, axis=1)[:, None] + calprobs[-1] = [0.7, 0.3] + self.model.calibrated_probs = Mock(return_value=calprobs) + + probs = self.model(self.data, ret=Model.Probs) + self.model.calibrated_probs.assert_called_with(self.probs) + np.testing.assert_almost_equal(probs, calprobs) + + vals = self.model(self.data, ret=Model.Value) + np.testing.assert_almost_equal(vals, [1, 1, 1, 1, 1, 0]) + + vals, probs = self.model(self.data, ret=Model.ValueProbs) + np.testing.assert_almost_equal(probs, calprobs) + np.testing.assert_almost_equal(vals, [1, 1, 1, 1, 1, 0]) + + def test_calibrated_probs(self): + self.model.calibrators = None + calprobs = self.model.calibrated_probs(self.probs) + np.testing.assert_equal(calprobs, self.probs) + self.assertIsNot(calprobs, self.probs) + + calibrator = Mock() + calibrator.predict = lambda x: x**2 + self.model.calibrators = [calibrator] * 2 + calprobs = self.model.calibrated_probs(self.probs) + expprobs = self.probs ** 2 / np.sum(self.probs ** 2, axis=1)[:, None] + np.testing.assert_almost_equal(calprobs, expprobs) + + self.probs[1] = 0 + self.probs[2] = np.nan + expprobs[1] = 0.5 + expprobs[2] = np.nan + calprobs = self.model.calibrated_probs(self.probs) + np.testing.assert_almost_equal(calprobs, expprobs) + + +class TestCalibratedLearner(unittest.TestCase): + @patch("Orange.classification.calibration._SigmoidCalibration.fit") + @patch("Orange.classification.calibration.TestOnTrainingData") + def test_fit_storage(self, test_on_training, sigmoid_fit): + data = Table("heart_disease") + learner = Mock() + + model = Mock() + model.domain.class_var.is_discrete = True + model.domain.class_var.values = ("a", "b") + + test_on_training.return_value = res = Mock() + res.models = np.array([[model]]) + res.probabilities = np.arange(20, dtype=float).reshape(1, 5, 4) + test_on_training.return_value = res + + sigmoid_fit.return_value = Mock() + + cal_learner = CalibratedLearner( + base_learner=learner, calibration_method=CalibratedLearner.Sigmoid) + cal_model = cal_learner(data) + + self.assertIs(cal_model.base_model, model) + self.assertEqual(cal_model.calibrators, [sigmoid_fit.return_value] * 4) + args, kwargs = test_on_training.call_args + self.assertEqual(len(args), 2) + self.assertIs(args[0], data) + self.assertIs(args[1][0], learner) + self.assertEqual(len(args[1]), 1) + self.assertEqual(kwargs, {"store_models": 1}) + + for call, cls_probs in zip(sigmoid_fit.call_args_list, + res.probabilities[0].T): + np.testing.assert_equal(call[0][0], cls_probs) diff --git a/Orange/widgets/evaluate/owcalibrationplot.py b/Orange/widgets/evaluate/owcalibrationplot.py index 3e316b990e8..63c1b0190a2 100644 --- a/Orange/widgets/evaluate/owcalibrationplot.py +++ b/Orange/widgets/evaluate/owcalibrationplot.py @@ -7,7 +7,8 @@ import pyqtgraph as pg -from Orange.classification import ModelWithThreshold +from Orange.base import Model +from Orange.classification import ThresholdClassifier, CalibratedLearner from Orange.evaluation import Results from Orange.evaluation.performance_curves import Curves from Orange.widgets import widget, gui, settings @@ -67,26 +68,27 @@ class Inputs: evaluation_results = Input("Evaluation Results", Results) class Outputs: - calibrated_model = Output("Calibrated Model", ModelWithThreshold) + calibrated_model = Output("Calibrated Model", Model) class Warning(widget.OWWidget.Warning): - empty_input = widget.Msg( - "Empty result on input. Nothing to display.") + empty_input = widget.Msg("Empty result on input. Nothing to display.") class Information(widget.OWWidget.Information): no_out = "Can't output a model: " no_output_multiple_folds = Msg( - no_out + "every training data sample produced a different model") + no_out + "each training data sample produces a different model") no_output_no_models = Msg( no_out + "test results do not contain stored models;\n" "try testing on separate data or on training data") no_output_multiple_selected = Msg( no_out + "select a single model - the widget can output only one") + non_binary_class = Msg(no_out + "cannot calibrate non-binary classes") target_index = settings.Setting(0) selected_classifiers = settings.Setting([]) score = settings.Setting(0) + output_calibration = settings.Setting(0) fold_curves = settings.Setting(False) display_rug = settings.Setting(True) threshold = settings.Setting(0.5) @@ -103,10 +105,13 @@ def __init__(self): self.colors = [] self.line = None + self._last_score_value = -1 + box = gui.vBox(self.controlArea, box="Settings") self.target_cb = gui.comboBox( box, self, "target_index", label="Target:", - orientation=Qt.Horizontal, callback=self._replot, contentsLength=8) + orientation=Qt.Horizontal, callback=self.target_index_changed, + contentsLength=8) gui.checkBox( box, self, "display_rug", "Show rug", callback=self._on_display_rug_changed) @@ -133,6 +138,11 @@ def __init__(self): font.setPointSizeF(0.85 * font.pointSizeF()) self.explanation.setFont(font) + gui.radioButtons( + box, self, value="output_calibration", + btnLabels=("Sigmoid calibration", "Isotonic calibration"), + label="Output model calibration", callback=self.apply) + box = gui.widgetBox(self.controlArea, "Info") self.info_label = gui.widgetLabel(box) @@ -159,7 +169,7 @@ def __init__(self): @Inputs.evaluation_results def set_results(self, results): self.clear() - results = check_results_adequacy(results, self.Error) + results = check_results_adequacy(results, self.Error, check_nan=False) if results is not None and not results.actual.size: self.Warning.empty_input() else: @@ -179,9 +189,19 @@ def clear(self): self.target_index = 0 self.colors = [] + def target_index_changed(self): + if len(self.results.domain.class_var.values) == 2: + self.threshold = 1 - self.threshold + self._set_explanation() + self._replot() + self.apply() + def score_changed(self): self._set_explanation() self._replot() + if self._last_score_value != self.score: + self.apply() + self._last_score_value = self.score def _set_explanation(self): explanation = Metrics[self.score].explanation @@ -191,6 +211,11 @@ def _set_explanation(self): else: self.explanation.hide() + if self.score == 0: + self.controls.output_calibration.show() + else: + self.controls.output_calibration.hide() + axis = self.plot.getAxis("bottom") axis.setLabel("Predicted probability" if self.score == 0 else "Threshold probability to classify as positive") @@ -292,22 +317,23 @@ def _setup_plot(self): fold_results = results.get_fold(fold) fold_ytrue = fold_results.actual == target fold_probs = fold_results.probabilities[clsf, :, target] - self.plot_metrics(Data(fold_ytrue, fold_probs), + self.plot_metrics(Curves(fold_ytrue, fold_probs), metrics, pen_args) def _replot(self): self.plot.clear() if self.results is not None: self._setup_plot() - self.line = pg.InfiniteLine( - pos=self.threshold, movable=True, - pen=pg.mkPen(color="k", style=Qt.DashLine, width=2), - hoverPen=pg.mkPen(color="k", style=Qt.DashLine, width=3), - bounds=(0, 1), - ) - self.line.sigPositionChanged.connect(self.threshold_change) - self.line.sigPositionChangeFinished.connect(self.threshold_change_done) - self.plot.addItem(self.line) + if self.score != 0: + self.line = pg.InfiniteLine( + pos=self.threshold, movable=True, + pen=pg.mkPen(color="k", style=Qt.DashLine, width=2), + hoverPen=pg.mkPen(color="k", style=Qt.DashLine, width=3), + bounds=(0, 1), + ) + self.line.sigPositionChanged.connect(self.threshold_change) + self.line.sigPositionChangeFinished.connect(self.threshold_change_done) + self.plot.addItem(self.line) self._update_info() def _on_display_rug_changed(self): @@ -336,7 +362,10 @@ def _update_info(self): {"
".join(f"" for n in short_names)} """ - for name, (probs, curves) in self.scores: + for name, probs_curves in self.scores: + if probs_curves is None: + continue + probs, curves = probs_curves ind = min(np.searchsorted(probs, self.threshold), len(probs) - 1) text += f"" @@ -353,15 +382,28 @@ def apply(self): info = self.Information wrapped = None problems = {} - if self.results is not None: + results = self.results + if results is not None: problems = { - info.no_output_multiple_folds: len(self.results.folds) > 1, - info.no_output_no_models: self.results.models is None, + info.no_output_multiple_folds: len(results.folds) > 1, + info.no_output_no_models: results.models is None, info.no_output_multiple_selected: - len(self.selected_classifiers) != 1} + len(self.selected_classifiers) != 1, + info.non_binary_class: + self.score != 0 + and len(results.domain.class_var.values) != 2} if not any(problems.values()): - model = self.results.models[0][self.selected_classifiers[0]] - wrapped = ModelWithThreshold(model, self.threshold) + clsf_idx = self.selected_classifiers[0] + model = results.models[0, clsf_idx] + if self.score == 0: + cal_learner = CalibratedLearner( + None, self.output_calibration) + wrapped = cal_learner.get_model( + model, results.actual, results.probabilities[clsf_idx]) + else: + threshold = [1 - self.threshold, + self.threshold][self.target_index] + wrapped = ThresholdClassifier(model, threshold) self.Outputs.calibrated_model.send(wrapped) for info, shown in problems.items(): diff --git a/doc/data-mining-library/source/reference/classification.rst b/doc/data-mining-library/source/reference/classification.rst index 5095e147f2a..55792fa340f 100644 --- a/doc/data-mining-library/source/reference/classification.rst +++ b/doc/data-mining-library/source/reference/classification.rst @@ -196,3 +196,21 @@ CN2 Rule Induction .. autoclass:: CN2SDUnorderedLearner :members: + + +Calibration and threshold optimization +-------------------------------------- + +.. automodule:: Orange.classification.calibration + +.. autoclass:: ThresholdClassifier + :members: + +.. autoclass:: ThresholdLearner + :members: + +.. autoclass:: CalibratedClassifier + :members: + +.. autoclass:: CalibratedLearner + :members: From f742ff919d4c2985a38cdee5790cb7230962c398 Mon Sep 17 00:00:00 2001 From: janezd Date: Mon, 17 Jun 2019 18:11:10 +0200 Subject: [PATCH 11/21] OWLearnerWidget: Let default name appear as placeholder. This allows derived widget to change the default name without interferring with user-changed settings. --- Orange/widgets/tests/base.py | 3 ++- Orange/widgets/utils/owlearnerwidget.py | 13 ++++++------- Orange/widgets/utils/tests/test_owlearnerwidget.py | 1 - 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/Orange/widgets/tests/base.py b/Orange/widgets/tests/base.py index 635dd2e5fd8..1204e1c6ed5 100644 --- a/Orange/widgets/tests/base.py +++ b/Orange/widgets/tests/base.py @@ -672,7 +672,8 @@ def test_output_learner_name(self): new_name = "Learner Name" self.widget.apply_button.button.click() self.assertEqual(self.widget.learner.name, - self.widget.name_line_edit.text()) + self.widget.name_line_edit.text() + or self.widget.name_line_edit.placeholderText()) self.widget.name_line_edit.setText(new_name) self.widget.apply_button.button.click() self.wait_until_stop_blocking() diff --git a/Orange/widgets/utils/owlearnerwidget.py b/Orange/widgets/utils/owlearnerwidget.py index 3c6ee6ea65f..63b2795c78e 100644 --- a/Orange/widgets/utils/owlearnerwidget.py +++ b/Orange/widgets/utils/owlearnerwidget.py @@ -65,7 +65,7 @@ class OWBaseLearner(OWWidget, metaclass=OWBaseLearnerMeta, openclass=True): LEARNER = None supports_sparse = True - learner_name = Setting(None, schema_only=True) + learner_name = Setting("", schema_only=True) want_main_area = False resizing_enabled = False auto_apply = Setting(True) @@ -95,8 +95,6 @@ def __init__(self): self.data = None self.valid_data = False self.learner = None - if self.learner_name is None: - self.learner_name = self.name self.model = None self.preprocessors = None self.outdated_settings = False @@ -149,7 +147,7 @@ def update_learner(self): if self.learner and issubclass(self.LEARNER, Fitter): self.learner.use_default_preprocessors = True if self.learner is not None: - self.learner.name = self.learner_name + self.learner.name = self.learner_name or self.name self.Outputs.learner.send(self.learner) self.outdated_settings = False self.Warning.outdated_learner.clear() @@ -168,7 +166,7 @@ def update_model(self): except BaseException as exc: self.show_fitting_failed(exc) else: - self.model.name = self.learner_name + self.model.name = self.learner_name or self.name self.model.instances = self.data self.Outputs.model.send(self.model) @@ -198,7 +196,7 @@ def settings_changed(self, *args, **kwargs): def _change_name(self, instance, output): if instance: - instance.name = self.learner_name + instance.name = self.learner_name or self.name if self.auto_apply: output.send(instance) @@ -207,7 +205,7 @@ def learner_name_changed(self): self._change_name(self.model, self.Outputs.model) def send_report(self): - self.report_items((("Name", self.learner_name),)) + self.report_items((("Name", self.learner_name or self.name),)) model_parameters = self.get_learner_parameters() if model_parameters: @@ -264,6 +262,7 @@ def add_regression_layout(self, box): def add_learner_name_widget(self): self.name_line_edit = gui.lineEdit( self.controlArea, self, 'learner_name', box='Name', + placeholderText=self.name, tooltip='The name will identify this model in other widgets', orientation=Qt.Horizontal, callback=self.learner_name_changed) diff --git a/Orange/widgets/utils/tests/test_owlearnerwidget.py b/Orange/widgets/utils/tests/test_owlearnerwidget.py index 99f792196b6..9a43365a473 100644 --- a/Orange/widgets/utils/tests/test_owlearnerwidget.py +++ b/Orange/widgets/utils/tests/test_owlearnerwidget.py @@ -105,7 +105,6 @@ class WidgetA(OWBaseLearner): LEARNER = KNNLearner w1 = self.create_widget(WidgetA) - self.assertEqual(w1.learner_name, "A") w1.learner_name = "MyWidget" settings = w1.settingsHandler.pack_data(w1) From c5d070df60bddecb81f3c17d8f70a1d6949fbb70 Mon Sep 17 00:00:00 2001 From: janezd Date: Mon, 17 Jun 2019 18:17:15 +0200 Subject: [PATCH 12/21] evaluations.testing: Minor fixes in unit tests --- Orange/evaluation/testing.py | 2 +- Orange/tests/test_evaluation_testing.py | 2 +- .../evaluate/tests/test_owcalibrationplot.py | 13 ------------- 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/Orange/evaluation/testing.py b/Orange/evaluation/testing.py index 9e6d0ca071c..22a917283e0 100644 --- a/Orange/evaluation/testing.py +++ b/Orange/evaluation/testing.py @@ -317,7 +317,7 @@ def split_by_model(self): res.probabilities = self.probabilities[(i,), :, :] if self.models is not None: - res.models = self.models[:, i:i+1] + res.models = self.models[:, i:i + 1] res.failed = [self.failed[i]] yield res diff --git a/Orange/tests/test_evaluation_testing.py b/Orange/tests/test_evaluation_testing.py index a57910eb971..3bc21d3f2e8 100644 --- a/Orange/tests/test_evaluation_testing.py +++ b/Orange/tests/test_evaluation_testing.py @@ -233,7 +233,7 @@ def test_split_by_model(self): self.assertTrue((result.predicted == res.predicted[i]).all()) self.assertTrue((result.probabilities == res.probabilities[i]).all()) self.assertEqual(len(result.models), 5) - for model in result.models: + for model in result.models[0]: self.assertIsInstance(model, learners[i].__returns__) self.assertSequenceEqual(result.learners, [res.learners[i]]) diff --git a/Orange/widgets/evaluate/tests/test_owcalibrationplot.py b/Orange/widgets/evaluate/tests/test_owcalibrationplot.py index 0575e03e8d1..ac07e8a2fff 100644 --- a/Orange/widgets/evaluate/tests/test_owcalibrationplot.py +++ b/Orange/widgets/evaluate/tests/test_owcalibrationplot.py @@ -1,7 +1,6 @@ import copy import warnings -import numpy as np from sklearn.exceptions import ConvergenceWarning from Orange.data import Table @@ -42,15 +41,3 @@ def test_empty(self): res.predicted = res.predicted[:, 0] res.probabilities = res.probabilities[:, :0, :] self.send_signal(self.widget.Inputs.evaluation_results, res) - - def test_nan_input(self): - res = copy.copy(self.res) - res.actual = res.actual.copy() - res.probabilities = res.probabilities.copy() - - res.actual[0] = np.nan - res.probabilities[:, [0, 3], :] = np.nan - self.send_signal(self.widget.Inputs.evaluation_results, res) - self.assertTrue(self.widget.Error.invalid_results.is_shown()) - self.send_signal(self.widget.Inputs.evaluation_results, None) - self.assertFalse(self.widget.Error.invalid_results.is_shown()) From 557fa2e78c91f339d415278941623ee77865f3eb Mon Sep 17 00:00:00 2001 From: janezd Date: Mon, 17 Jun 2019 18:26:20 +0200 Subject: [PATCH 13/21] OWTestLearners: Skip inactive signals (e.g. learner widget outputs None) --- Orange/widgets/evaluate/owtestlearners.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Orange/widgets/evaluate/owtestlearners.py b/Orange/widgets/evaluate/owtestlearners.py index c3da72af0f6..d534bbe6a32 100644 --- a/Orange/widgets/evaluate/owtestlearners.py +++ b/Orange/widgets/evaluate/owtestlearners.py @@ -315,7 +315,7 @@ def set_learner(self, learner, key): # Removed self._invalidate([key]) del self.learners[key] - else: + elif learner is not None: self.learners[key] = InputLearner(learner, None, None) self._invalidate([key]) From 1a8b013561486c7e7c87c814a3ed9f8ceac5d36f Mon Sep 17 00:00:00 2001 From: janezd Date: Mon, 17 Jun 2019 21:43:23 +0200 Subject: [PATCH 14/21] Calibrated Learner: Add widget --- Orange/widgets/model/owcalibratedlearner.py | 111 ++++++++++++ .../model/tests/test_owcalibratedlearner.py | 158 ++++++++++++++++++ 2 files changed, 269 insertions(+) create mode 100644 Orange/widgets/model/owcalibratedlearner.py create mode 100644 Orange/widgets/model/tests/test_owcalibratedlearner.py diff --git a/Orange/widgets/model/owcalibratedlearner.py b/Orange/widgets/model/owcalibratedlearner.py new file mode 100644 index 00000000000..558ac331539 --- /dev/null +++ b/Orange/widgets/model/owcalibratedlearner.py @@ -0,0 +1,111 @@ +from Orange.classification import CalibratedLearner, ThresholdLearner, \ + NaiveBayesLearner +from Orange.data import Table +from Orange.modelling import Learner +from Orange.widgets import gui +from Orange.widgets.widget import Input +from Orange.widgets.settings import Setting +from Orange.widgets.utils.owlearnerwidget import OWBaseLearner +from Orange.widgets.utils.widgetpreview import WidgetPreview + + +class OWCalibratedLearner(OWBaseLearner): + name = "Calibrated Learner" + description = "Wraps another learner with probability calibration and " \ + "decision threshold optimization" + icon = "icons/CalibratedLearner.svg" + priority = 20 + keywords = ["calibration", "threshold"] + + LEARNER = CalibratedLearner + + SigmoidCalibration, IsotonicCalibration, NoCalibration = range(3) + CalibrationOptions = ("Sigmoid calibration", + "Isotonic calibration", + "No calibration") + CalibrationShort = ("Sigmoid", "Isotonic", "") + CalibrationMap = { + SigmoidCalibration: CalibratedLearner.Sigmoid, + IsotonicCalibration: CalibratedLearner.Isotonic} + + OptimizeCA, OptimizeF1, NoThresholdOptimization = range(3) + ThresholdOptions = ("Optimize classification accuracy", + "Optimize F1 score", + "No threshold optimization") + ThresholdShort = ("CA", "F1", "") + ThresholdMap = { + OptimizeCA: ThresholdLearner.OptimizeCA, + OptimizeF1: ThresholdLearner.OptimizeF1} + + learner_name = Setting("", schema_only=True) + calibration = Setting(SigmoidCalibration) + threshold = Setting(OptimizeCA) + + class Inputs(OWBaseLearner.Inputs): + base_learner = Input("Base Learner", Learner) + + def __init__(self): + super().__init__() + self.base_learner = None + + def add_main_layout(self): + gui.radioButtons( + self.controlArea, self, "calibration", self.CalibrationOptions, + box="Probability calibration", + callback=self.calibration_options_changed) + gui.radioButtons( + self.controlArea, self, "threshold", self.ThresholdOptions, + box="Decision threshold optimization", + callback=self.calibration_options_changed) + + @Inputs.base_learner + def set_learner(self, learner): + self.base_learner = learner + self._set_default_name() + self.unconditional_apply() + + def _set_default_name(self): + if self.base_learner is None: + self.name = "Calibrated learner" + else: + self.name = " + ".join(part for part in ( + self.base_learner.name.title(), + self.CalibrationShort[self.calibration], + self.ThresholdShort[self.threshold]) if part) + self.controls.learner_name.setPlaceholderText(self.name) + + def calibration_options_changed(self): + self._set_default_name() + self.apply() + + def create_learner(self): + class IdentityWrapper(Learner): + def fit_storage(self, data): + return self.base_learner.fit_storage(data) + + if self.base_learner is None: + return None + learner = self.base_learner + if self.calibration != self.NoCalibration: + learner = CalibratedLearner(learner, + self.CalibrationMap[self.calibration]) + if self.threshold != self.NoThresholdOptimization: + learner = ThresholdLearner(learner, + self.ThresholdMap[self.threshold]) + if self.preprocessors: + if learner is self.base_learner: + learner = IdentityWrapper() + learner.preprocessors = (self.preprocessors, ) + return learner + + def get_learner_parameters(self): + return (("Calibrate probabilities", + self.CalibrationOptions[self.calibrate]), + ("Threshold optimization", + self.ThresholdOptions[self.threshold])) + + +if __name__ == "__main__": # pragma: no cover + WidgetPreview(OWCalibratedLearner).run( + Table("heart_disease"), + set_learner=NaiveBayesLearner()) diff --git a/Orange/widgets/model/tests/test_owcalibratedlearner.py b/Orange/widgets/model/tests/test_owcalibratedlearner.py new file mode 100644 index 00000000000..400d483a592 --- /dev/null +++ b/Orange/widgets/model/tests/test_owcalibratedlearner.py @@ -0,0 +1,158 @@ +from unittest.mock import Mock + +from Orange.classification import ThresholdLearner, CalibratedLearner, \ + NaiveBayesLearner, ThresholdClassifier, CalibratedClassifier +from Orange.classification.base_classification import ModelClassification, \ + LearnerClassification +from Orange.classification.naive_bayes import NaiveBayesModel +from Orange.data import Table +from Orange.widgets.model.owcalibratedlearner import OWCalibratedLearner +from Orange.widgets.tests.base import WidgetTest, WidgetLearnerTestMixin, \ + datasets + + +class TestOWCalibratedLearner(WidgetTest, WidgetLearnerTestMixin): + def setUp(self): + self.widget = self.create_widget( + OWCalibratedLearner, stored_settings={"auto_apply": False}) + self.send_signal(self.widget.Inputs.base_learner, NaiveBayesLearner()) + + self.data = Table("heart_disease") + self.valid_datasets = (self.data,) + self.inadequate_dataset = (Table(datasets.path("testing_dataset_reg")),) + self.learner_class = LearnerClassification + self.model_class = ModelClassification + self.model_name = 'Calibrated classifier' + self.parameters = [] + + def test_output_learner(self): + """Check if learner is on output after apply""" + # Overridden to change the output type in the last test + initial = self.get_output("Learner") + self.assertIsNotNone(initial, "Does not initialize the learner output") + self.widget.apply_button.button.click() + newlearner = self.get_output("Learner") + self.assertIsNot(initial, newlearner, + "Does not send a new learner instance on `Apply`.") + self.assertIsNotNone(newlearner) + self.assertIsInstance( + newlearner, + (CalibratedLearner, ThresholdLearner, NaiveBayesLearner)) + + def test_output_model(self): + """Check if model is on output after sending data and apply""" + # Overridden to change the output type in the last two test + self.assertIsNone(self.get_output(self.widget.Outputs.model)) + self.widget.apply_button.button.click() + self.assertIsNone(self.get_output(self.widget.Outputs.model)) + self.send_signal('Data', self.data) + self.widget.apply_button.button.click() + self.wait_until_stop_blocking() + model = self.get_output(self.widget.Outputs.model) + self.assertIsNotNone(model) + self.assertIsInstance( + model, (CalibratedClassifier, ThresholdClassifier, NaiveBayesModel)) + + def test_create_learner(self): + widget = self.widget #: OWCalibratedLearner + self.widget.base_learner = Mock() + + widget.calibration = widget.SigmoidCalibration + widget.threshold = widget.OptimizeF1 + learner = self.widget.create_learner() + self.assertIsInstance(learner, ThresholdLearner) + self.assertEqual(learner.threshold_criterion, learner.OptimizeF1) + cal_learner = learner.base_learner + self.assertIsInstance(cal_learner, CalibratedLearner) + self.assertEqual(cal_learner.calibration_method, cal_learner.Sigmoid) + self.assertIs(cal_learner.base_learner, self.widget.base_learner) + + widget.calibration = widget.IsotonicCalibration + widget.threshold = widget.OptimizeCA + learner = self.widget.create_learner() + self.assertIsInstance(learner, ThresholdLearner) + self.assertEqual(learner.threshold_criterion, learner.OptimizeCA) + cal_learner = learner.base_learner + self.assertIsInstance(cal_learner, CalibratedLearner) + self.assertEqual(cal_learner.calibration_method, cal_learner.Isotonic) + self.assertIs(cal_learner.base_learner, self.widget.base_learner) + + widget.calibration = widget.NoCalibration + widget.threshold = widget.OptimizeCA + learner = self.widget.create_learner() + self.assertIsInstance(learner, ThresholdLearner) + self.assertEqual(learner.threshold_criterion, learner.OptimizeCA) + self.assertIs(learner.base_learner, self.widget.base_learner) + + widget.calibration = widget.IsotonicCalibration + widget.threshold = widget.NoThresholdOptimization + learner = self.widget.create_learner() + self.assertIsInstance(learner, CalibratedLearner) + self.assertEqual(learner.calibration_method, cal_learner.Isotonic) + self.assertIs(learner.base_learner, self.widget.base_learner) + + widget.calibration = widget.NoCalibration + widget.threshold = widget.NoThresholdOptimization + learner = self.widget.create_learner() + self.assertIs(learner, self.widget.base_learner) + + widget.calibration = widget.SigmoidCalibration + widget.threshold = widget.OptimizeF1 + widget.base_learner = None + learner = self.widget.create_learner() + self.assertIsNone(learner) + + def test_preprocessors(self): + widget = self.widget #: OWCalibratedLearner + self.widget.base_learner = Mock() + self.widget.base_learner.preprocessors = () + + widget.calibration = widget.SigmoidCalibration + widget.threshold = widget.OptimizeF1 + widget.preprocessors = Mock() + learner = self.widget.create_learner() + self.assertEqual(learner.preprocessors, (widget.preprocessors, )) + self.assertEqual(learner.base_learner.preprocessors, ()) + self.assertEqual(learner.base_learner.base_learner.preprocessors, ()) + + widget.calibration = widget.NoCalibration + widget.threshold = widget.NoThresholdOptimization + learner = self.widget.create_learner() + self.assertIsNot(learner, self.widget.base_learner) + self.assertFalse( + isinstance(learner, (CalibratedLearner, ThresholdLearner))) + self.assertEqual(learner.preprocessors, (widget.preprocessors, )) + + def test_set_learner_calls_unconditional_apply(self): + widget = self.widget + self.assertIsNotNone(self.get_output(widget.Outputs.learner)) + + widget.auto_apply = False + self.send_signal(widget.Inputs.base_learner, None) + self.assertIsNone(self.get_output(widget.Outputs.learner)) + + def test_name_changes(self): + widget = self.widget + widget.auto_apply = True + learner = NaiveBayesLearner() + learner.name = "foo" + self.send_signal(widget.Inputs.base_learner, learner) + + widget.calibration = widget.IsotonicCalibration + widget.threshold = widget.OptimizeCA + widget.controls.calibration.group.buttonClicked[int].emit( + widget.IsotonicCalibration) + + learner = self.get_output(widget.Outputs.learner) + self.assertEqual(learner.name, "Foo + Isotonic + CA") + + widget.calibration = widget.NoCalibration + widget.threshold = widget.OptimizeCA + widget.controls.calibration.group.buttonClicked[int].emit( + widget.NoCalibration) + learner = self.get_output(widget.Outputs.learner) + self.assertEqual(learner.name, "Foo + CA") + + self.send_signal(widget.Inputs.base_learner, None) + self.assertEqual(widget.controls.learner_name.placeholderText(), + "Calibrated learner") From 6ac1db1995e011f08328706779e40ba9c9ca326c Mon Sep 17 00:00:00 2001 From: janezd Date: Mon, 17 Jun 2019 22:19:54 +0200 Subject: [PATCH 15/21] Calibration plot: Add context settings --- Orange/widgets/evaluate/contexthandlers.py | 63 +++++++------------- Orange/widgets/evaluate/owcalibrationplot.py | 12 +++- 2 files changed, 32 insertions(+), 43 deletions(-) diff --git a/Orange/widgets/evaluate/contexthandlers.py b/Orange/widgets/evaluate/contexthandlers.py index d79def2ca60..3ad2796698d 100644 --- a/Orange/widgets/evaluate/contexthandlers.py +++ b/Orange/widgets/evaluate/contexthandlers.py @@ -1,47 +1,30 @@ +from Orange.data import Variable from Orange.widgets import settings -from Orange.widgets.utils import getdeepattr class EvaluationResultsContextHandler(settings.ContextHandler): - def __init__(self, targetAttr, selectedAttr): - super().__init__() - self.targetAttr, self.selectedAttr = targetAttr, selectedAttr + """Context handler for evaluation results""" - #noinspection PyMethodOverriding - def match(self, context, cnames, cvalues): - return (cnames, cvalues) == ( - context.classifierNames, context.classValues) and 2 + def open_context(self, widget, classes, classifier_names): + if isinstance(classes, Variable): + if classes.is_discrete: + classes = classes.values + else: + classes = None + super().open_context(widget, classes, classifier_names) - def fast_save(self, widget, name, value): - context = widget.current_context - if name == self.targetAttr: - context.targetClass = value - elif name == self.selectedAttr: - context.selectedClassifiers = list(value) + def new_context(self, classes, classifier_names): + context = super().new_context() + context.classes = classes + context.classifier_names = classifier_names + return context - def settings_from_widget(self, widget, *args): - super().settings_from_widget(widget, *args) - context = widget.current_context - context.targetClass = getdeepattr(widget, self.targetAttr) - context.selectedClassifiers = list(getdeepattr(self.selectedAttr)) - - def settings_to_widget(self, widget, *args): - super().settings_to_widget(widget, *args) - context = widget.current_context - if context.targetClass is not None: - setattr(widget, self.targetAttr, context.targetClass) - if context.selectedClassifiers is not None: - setattr(widget, self.selectedAttr, context.selectedClassifiers) - - #noinspection PyMethodOverriding - def find_or_create_context(self, widget, results): - cnames = [c.name for c in results.classifiers] - cvalues = results.classValues - context, isNew = super().find_or_create_context( - widget, results.classifierNames, results.classValues) - if isNew: - context.classifierNames = results.classifierNames - context.classValues = results.classValues - context.selectedClassifiers = None - context.targetClass = None - return context, isNew + def match(self, context, classes, classifier_names): + if classifier_names != context.classifier_names: + return self.NO_MATCH + elif isinstance(classes, Variable) and classes.is_continuous: + return (self.PERFECT_MATCH if context.classes is None + else self.NO_MATCH) + else: + return (self.PERFECT_MATCH if context.classes == classes + else self.NO_MATCH) diff --git a/Orange/widgets/evaluate/owcalibrationplot.py b/Orange/widgets/evaluate/owcalibrationplot.py index 63c1b0190a2..a38ecd30ed0 100644 --- a/Orange/widgets/evaluate/owcalibrationplot.py +++ b/Orange/widgets/evaluate/owcalibrationplot.py @@ -12,6 +12,8 @@ from Orange.evaluation import Results from Orange.evaluation.performance_curves import Curves from Orange.widgets import widget, gui, settings +from Orange.widgets.evaluate.contexthandlers import \ + EvaluationResultsContextHandler from Orange.widgets.evaluate.utils import \ check_results_adequacy, results_for_preview from Orange.widgets.utils import colorpalette, colorbrewer @@ -84,9 +86,9 @@ class Information(widget.OWWidget.Information): no_out + "select a single model - the widget can output only one") non_binary_class = Msg(no_out + "cannot calibrate non-binary classes") - - target_index = settings.Setting(0) - selected_classifiers = settings.Setting([]) + settingsHandler = EvaluationResultsContextHandler() + target_index = settings.ContextSetting(0) + selected_classifiers = settings.ContextSetting([]) score = settings.Setting(0) output_calibration = settings.Setting(0) fold_curves = settings.Setting(False) @@ -168,6 +170,7 @@ def __init__(self): @Inputs.evaluation_results def set_results(self, results): + self.closeContext() self.clear() results = check_results_adequacy(results, self.Error, check_nan=False) if results is not None and not results.actual.size: @@ -177,6 +180,9 @@ def set_results(self, results): self.results = results if self.results is not None: self._initialize(results) + class_var = self.results.domain.class_var + self.target_index = int(len(class_var.values) == 2) + self.openContext(class_var, self.classifier_names) self._replot() self.apply() From 2edcb391aa185de53efd6da1d6f5f991c56e8f97 Mon Sep 17 00:00:00 2001 From: janezd Date: Tue, 18 Jun 2019 22:52:32 +0200 Subject: [PATCH 16/21] OWCalibration Plot: Unit tests and some fixes --- Orange/evaluation/testing.py | 4 +- Orange/tests/test_evaluation_testing.py | 2 +- Orange/widgets/evaluate/owcalibrationplot.py | 62 +- Orange/widgets/evaluate/tests/base.py | 2 +- .../evaluate/tests/test_owcalibrationplot.py | 543 +++++++++++++++++- 5 files changed, 575 insertions(+), 38 deletions(-) diff --git a/Orange/evaluation/testing.py b/Orange/evaluation/testing.py index 22a917283e0..93c0d563238 100644 --- a/Orange/evaluation/testing.py +++ b/Orange/evaluation/testing.py @@ -171,7 +171,7 @@ def set_or_raise(value, exp_values, msg): "mismatching number of class values") nmethods = set_or_raise( nmethods, [learners is not None and len(learners), - models is not None and len(models), + models is not None and models.shape[1], failed is not None and len(failed), predicted is not None and predicted.shape[0], probabilities is not None and probabilities.shape[0]], @@ -365,7 +365,7 @@ def __new__(cls, "and train_data are omitted") return self - warn("calling Validation's constructor with data and learners" + warn("calling Validation's constructor with data and learners " "is deprecated;\nconstruct an instance and call it", DeprecationWarning, stacklevel=2) diff --git a/Orange/tests/test_evaluation_testing.py b/Orange/tests/test_evaluation_testing.py index 3bc21d3f2e8..a5f78cb2972 100644 --- a/Orange/tests/test_evaluation_testing.py +++ b/Orange/tests/test_evaluation_testing.py @@ -756,7 +756,7 @@ def setUp(self): self.row_indices = np.arange(100) self.folds = (range(50), range(10, 60)), (range(50, 100), range(50)) self.learners = [MajorityLearner(), MajorityLearner()] - self.models = [Mock(), Mock()] + self.models = np.array([[Mock(), Mock()]]) self.predicted = np.zeros((2, 100)) self.probabilities = np.zeros((2, 100, 3)) self.failed = [False, True] diff --git a/Orange/widgets/evaluate/owcalibrationplot.py b/Orange/widgets/evaluate/owcalibrationplot.py index a38ecd30ed0..e3b828fd2e2 100644 --- a/Orange/widgets/evaluate/owcalibrationplot.py +++ b/Orange/widgets/evaluate/owcalibrationplot.py @@ -14,8 +14,7 @@ from Orange.widgets import widget, gui, settings from Orange.widgets.evaluate.contexthandlers import \ EvaluationResultsContextHandler -from Orange.widgets.evaluate.utils import \ - check_results_adequacy, results_for_preview +from Orange.widgets.evaluate.utils import results_for_preview from Orange.widgets.utils import colorpalette, colorbrewer from Orange.widgets.utils.widgetpreview import WidgetPreview from Orange.widgets.widget import Input, Output, Msg @@ -72,7 +71,8 @@ class Inputs: class Outputs: calibrated_model = Output("Calibrated Model", Model) - class Warning(widget.OWWidget.Warning): + class Error(widget.OWWidget.Error): + non_discrete_target = Msg("Calibration plot requires a discrete target") empty_input = widget.Msg("Empty result on input. Nothing to display.") class Information(widget.OWWidget.Information): @@ -84,7 +84,8 @@ class Information(widget.OWWidget.Information): "try testing on separate data or on training data") no_output_multiple_selected = Msg( no_out + "select a single model - the widget can output only one") - non_binary_class = Msg(no_out + "cannot calibrate non-binary classes") + no_output_non_binary_class = Msg( + no_out + "cannot calibrate non-binary classes") settingsHandler = EvaluationResultsContextHandler() target_index = settings.ContextSetting(0) @@ -145,8 +146,8 @@ def __init__(self): btnLabels=("Sigmoid calibration", "Isotonic calibration"), label="Output model calibration", callback=self.apply) - box = gui.widgetBox(self.controlArea, "Info") - self.info_label = gui.widgetLabel(box) + self.info_box = gui.widgetBox(self.controlArea, "Info") + self.info_label = gui.widgetLabel(self.info_box) gui.auto_commit( self.controlArea, self, "auto_commit", "Apply", commit=self.apply) @@ -159,6 +160,10 @@ def __init__(self): for axis_name in ("bottom", "left"): axis = self.plot.getAxis(axis_name) axis.setPen(pg.mkPen(color=0.0)) + # Remove the condition (that is, allow setting this for bottom + # axis) when pyqtgraph is fixed + # Issue: https://github.com/pyqtgraph/pyqtgraph/issues/930 + # Pull request: https://github.com/pyqtgraph/pyqtgraph/pull/932 if axis_name != "bottom": # remove if when pyqtgraph is fixed axis.setStyle(stopAxisAtTick=(True, True)) @@ -172,11 +177,14 @@ def __init__(self): def set_results(self, results): self.closeContext() self.clear() - results = check_results_adequacy(results, self.Error, check_nan=False) + self.Error.clear() + self.Information.clear() + if results is not None and not results.domain.has_discrete_class: + self.Error.non_discrete_target() + results = None if results is not None and not results.actual.size: - self.Warning.empty_input() - else: - self.Warning.empty_input.clear() + self.Error.empty_input() + results = None self.results = results if self.results is not None: self._initialize(results) @@ -219,8 +227,10 @@ def _set_explanation(self): if self.score == 0: self.controls.output_calibration.show() + self.info_box.hide() else: self.controls.output_calibration.hide() + self.info_box.show() axis = self.plot.getAxis("bottom") axis.setLabel("Predicted probability" if self.score == 0 @@ -230,23 +240,23 @@ def _set_explanation(self): axis.setLabel(Metrics[self.score].name) def _initialize(self, results): - N = len(results.predicted) + n = len(results.predicted) names = getattr(results, "learner_names", None) if names is None: - names = ["#{}".format(i + 1) for i in range(N)] + names = ["#{}".format(i + 1) for i in range(n)] self.classifier_names = names scheme = colorbrewer.colorSchemes["qualitative"]["Dark2"] - if N > len(scheme): + if n > len(scheme): scheme = colorpalette.DefaultRGBColors - self.colors = colorpalette.ColorPaletteGenerator(N, scheme) + self.colors = colorpalette.ColorPaletteGenerator(n, scheme) - for i in range(N): + for i in range(n): item = self.classifiers_list_box.item(i) item.setIcon(colorpalette.ColorPixmap(self.colors[i])) - self.selected_classifiers = list(range(N)) - self.target_cb.addItems(results.data.domain.class_var.values) + self.selected_classifiers = list(range(n)) + self.target_cb.addItems(results.domain.class_var.values) def _rug(self, data, pen_args): color = pen_args["pen"].color() @@ -288,7 +298,6 @@ def _prob_curve(self, ytrue, probs, pen_args): y = np.full(100, xmax) self.plot.plot(x, y, symbol="+", symbolSize=4, **pen_args) - self.plot.plot([0, 1], [0, 1], antialias=True) return x, (y, ) def _setup_plot(self): @@ -326,6 +335,9 @@ def _setup_plot(self): self.plot_metrics(Curves(fold_ytrue, fold_probs), metrics, pen_args) + if self.score == 0: + self.plot.plot([0, 1], [0, 1], antialias=True) + def _replot(self): self.plot.clear() if self.results is not None: @@ -379,7 +391,7 @@ def _update_info(self): for curve in curves) text += "" text += "
Threshold: p={n}
{name}:
" - self.info_label.setText(text) + self.info_label.setText(text) def threshold_change_done(self): self.apply() @@ -395,7 +407,7 @@ def apply(self): info.no_output_no_models: results.models is None, info.no_output_multiple_selected: len(self.selected_classifiers) != 1, - info.non_binary_class: + info.no_output_non_binary_class: self.score != 0 and len(results.domain.class_var.values) != 2} if not any(problems.values()): @@ -419,11 +431,19 @@ def apply(self): def send_report(self): if self.results is None: return + self.report_items(( + ("Target class", self.target_cb.currentText()), + ("Output model calibration", + self.score == 0 and self.controls.score.currentText()), + )) caption = report.list_legend(self.classifiers_list_box, self.selected_classifiers) - self.report_items((("Target class", self.target_cb.currentText()),)) self.report_plot() self.report_caption(caption) + self.report_caption(self.controls.score.currentText()) + + if self.score != 0: + self.report_raw(self.info_label.text()) def gaussian_smoother(x, y, sigma=1.0): diff --git a/Orange/widgets/evaluate/tests/base.py b/Orange/widgets/evaluate/tests/base.py index 3100f1e1905..93fafea1e51 100644 --- a/Orange/widgets/evaluate/tests/base.py +++ b/Orange/widgets/evaluate/tests/base.py @@ -17,6 +17,6 @@ def test_many_evaluation_results(self): classification.NaiveBayesLearner(), classification.SGDClassificationLearner() ] - res = evaluation.CrossValidation(data, learners, k=2, store_data=True) + res = evaluation.CrossValidation(k=2, store_data=True)(data, learners) # this is a mixin; pylint: disable=no-member self.send_signal("Evaluation Results", res) diff --git a/Orange/widgets/evaluate/tests/test_owcalibrationplot.py b/Orange/widgets/evaluate/tests/test_owcalibrationplot.py index ac07e8a2fff..21cc067e50e 100644 --- a/Orange/widgets/evaluate/tests/test_owcalibrationplot.py +++ b/Orange/widgets/evaluate/tests/test_owcalibrationplot.py @@ -1,11 +1,18 @@ import copy import warnings +from unittest.mock import Mock, patch + +import numpy as np +from AnyQt.QtCore import QItemSelection +from pyqtgraph import InfiniteLine from sklearn.exceptions import ConvergenceWarning -from Orange.data import Table +from Orange.data import Table, DiscreteVariable, Domain, ContinuousVariable import Orange.evaluation import Orange.classification +from Orange.evaluation import Results +from Orange.evaluation.performance_curves import Curves from Orange.widgets.evaluate.tests.base import EvaluateTest from Orange.widgets.evaluate.owcalibrationplot import OWCalibrationPlot @@ -18,26 +25,536 @@ class TestOWCalibrationPlot(WidgetTest, EvaluateTest): def setUpClass(cls): super().setUpClass() cls.lenses = data = Table(test_filename("datasets/lenses.tab")) - cls.res = Orange.evaluation.TestOnTestData( - train_data=data[::2], test_data=data[1::2], - learners=[Orange.classification.MajorityLearner(), - Orange.classification.KNNLearner()], - store_data=True, - ) + majority = Orange.classification.MajorityLearner() + majority.name = "majority" + knn3 = Orange.classification.KNNLearner(n_neighbors=3) + knn3.name = "knn-3" + knn1 = Orange.classification.KNNLearner(n_neighbors=1) + knn1.name = "knn-1" + cls.lenses_results = Orange.evaluation.TestOnTestData( + store_data=True, store_models=True)( + data=data[::2], test_data=data[1::2], + learners=[majority, knn3, knn1]) + cls.lenses_results.learner_names = ["majority", "knn-3", "knn-1"] def setUp(self): super().setUp() + + n, p = (0, 1) + actual, probs = np.array([ + (p, .8), (n, .7), (p, .6), (p, .55), (p, .54), (n, .53), (n, .52), + (p, .51), (n, .505), (p, .4), (n, .39), (p, .38), (n, .37), + (n, .36), (n, .35), (p, .34), (n, .33), (p, .30), (n, .1)]).T + self.curves = Curves(actual, probs) + probs2 = (probs + 0.5) / 2 + 1 + self.curves2 = Curves(actual, probs2) + pred = probs > 0.5 + pred2 = probs2 > 0.5 + probs = np.vstack((1 - probs, probs)).T + probs2 = np.vstack((1 - probs2, probs2)).T + domain = Domain([], DiscreteVariable("y", values=("a", "b"))) + self.results = Results( + domain=domain, + actual=actual, + folds=(Ellipsis, ), + models=np.array([[Mock(), Mock()]]), + row_indices=np.arange(19), + predicted=np.array((pred, pred2)), + probabilities=np.array([probs, probs2])) + self.widget = self.create_widget(OWCalibrationPlot) # type: OWCalibrationPlot warnings.filterwarnings("ignore", ".*", ConvergenceWarning) - def test_basic(self): - self.send_signal(self.widget.Inputs.evaluation_results, self.res) - self.widget.controls.display_rug.click() + def test_initialization(self): + """Test initialization of lists and combos""" + def check_clsfr_names(names): + self.assertEqual(widget.classifier_names, names) + clsf_list = widget.controls.selected_classifiers + self.assertEqual( + [clsf_list.item(i).text() for i in range(clsf_list.count())], + names) + + widget = self.widget + tcomb = widget.controls.target_index + + self.send_signal(widget.Inputs.evaluation_results, self.lenses_results) + check_clsfr_names(["majority", "knn-3", "knn-1"]) + self.assertEqual(widget.selected_classifiers, [0, 1, 2]) + self.assertEqual( + [tcomb.itemText(i) for i in range(tcomb.count())], + self.lenses.domain.class_var.values) + self.assertEqual(widget.target_index, 0) + + self.send_signal(widget.Inputs.evaluation_results, self.results) + check_clsfr_names(["#1", "#2"]) + self.assertEqual(widget.selected_classifiers, [0, 1]) + self.assertEqual( + [tcomb.itemText(i) for i in range(tcomb.count())], ["a", "b"]) + self.assertEqual(widget.target_index, 1) - def test_empty(self): - res = copy.copy(self.res) + self.send_signal(widget.Inputs.evaluation_results, None) + check_clsfr_names([]) + self.assertEqual(widget.selected_classifiers, []) + self.assertEqual(widget.controls.target_index.count(), 0) + + def test_empty_input_error(self): + """Show an error when data is present but empty""" + widget = self.widget + + res = copy.copy(self.results) res.row_indices = res.row_indices[:0] res.actual = res.actual[:0] res.predicted = res.predicted[:, 0] res.probabilities = res.probabilities[:, :0, :] - self.send_signal(self.widget.Inputs.evaluation_results, res) + self.send_signal(widget.Inputs.evaluation_results, self.results) + self.assertFalse(widget.Error.empty_input.is_shown()) + self.assertTrue(bool(widget.plot.items)) + + self.send_signal(widget.Inputs.evaluation_results, res) + self.assertTrue(widget.Error.empty_input.is_shown()) + self.assertIsNone(widget.results) + self.assertFalse(bool(widget.plot.items)) + + self.send_signal(widget.Inputs.evaluation_results, self.results) + self.assertFalse(widget.Error.empty_input.is_shown()) + self.assertTrue(bool(widget.plot.items)) + + def test_regression_input_error(self): + """Show an error for regression data""" + widget = self.widget + + res = copy.copy(self.results) + res.domain = Domain([], ContinuousVariable("y")) + res.row_indices = res.row_indices[:0] + res.actual = res.actual[:0] + res.predicted = res.predicted[:, 0] + res.probabilities = res.probabilities[:, :0, :] + self.send_signal(widget.Inputs.evaluation_results, self.results) + self.assertFalse(widget.Error.non_discrete_target.is_shown()) + self.assertTrue(bool(widget.plot.items)) + + self.send_signal(widget.Inputs.evaluation_results, res) + self.assertTrue(widget.Error.non_discrete_target.is_shown()) + self.assertIsNone(widget.results) + self.assertFalse(bool(widget.plot.items)) + + self.send_signal(widget.Inputs.evaluation_results, self.results) + self.assertFalse(widget.Error.non_discrete_target.is_shown()) + self.assertTrue(bool(widget.plot.items)) + + @staticmethod + def _set_combo(combo, val): + combo.setCurrentIndex(val) + combo.activated[int].emit(val) + combo.activated[str].emit(combo.currentText()) + + @staticmethod + def _set_radio_buttons(radios, val): + radios.buttons[val].click() + + @staticmethod + def _set_list_selection(listview, selection): + model = listview.model() + selectionmodel = listview.selectionModel() + itemselection = QItemSelection() + for item in selection: + itemselection.select(model.index(item, 0), model.index(item, 0)) + selectionmodel.select(itemselection, selectionmodel.ClearAndSelect) + + def _set_threshold(self, pos, done): + _, line = self._get_curves() + line.setPos(pos) + if done: + line.sigPositionChangeFinished.emit(line) + else: + line.sigPositionChanged.emit(line) + + def _get_curves(self): + plot_items = self.widget.plot.items[:] + for i, item in enumerate(plot_items): + if isinstance(item, InfiniteLine): + del plot_items[i] + return plot_items, item + return plot_items, None + + @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier") + @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner") + def test_plotting_curves(self, *_): + """Curve coordinates match those computed by `Curves`""" + widget = self.widget + widget.display_rug = False + self.send_signal(widget.Inputs.evaluation_results, self.results) + widget.selected_classifiers = [0] + combo = widget.controls.score + + c = self.curves + combinations = ([c.ca()], + [c.f1()], + [c.sensitivity(), c.specificity()], + [c.precision(), c.recall()], + [c.ppv(), c.npv()], + [c.tpr(), c.fpr()]) + for idx, curves_data in enumerate(combinations, start=1): + self._set_combo(combo, idx) + curves, line = self._get_curves() + self.assertEqual(len(curves), len(curves_data)) + self.assertIsNotNone(line) + for curve in curves: + x, y = curve.getData() + np.testing.assert_almost_equal(x, self.curves.probs) + for i, curve_data in enumerate(curves_data): + if np.max(curve_data - y) < 1e-6: + del curves_data[i] + break + else: + self.fail(f"invalid curve for {combo.currentText()}") + + @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier") + @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner") + def test_multiple_fold_curves(self, *_): + widget = self.widget + widget.display_rug = False + widget.fold_curves = False + self.send_signal(widget.Inputs.evaluation_results, self.results) + self._set_list_selection(widget.controls.selected_classifiers, [0]) + self._set_combo(widget.controls.score, 1) # CA + + self.results.folds = [slice(1, 5), slice(5, 19)] + self.results.models = np.array([[Mock(), Mock()]] * 2) + curves, _ = self._get_curves() + self.assertEqual(len(curves), 1) + + widget.controls.fold_curves.click() + curves, _ = self._get_curves() + self.assertEqual(len(curves), 3) + + widget.controls.fold_curves.click() + curves, _ = self._get_curves() + self.assertEqual(len(curves), 1) + + @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier") + @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner") + def test_change_target_class(self, *_): + """Changing target combo changes the curves""" + widget = self.widget + widget.display_rug = False + self.send_signal(widget.Inputs.evaluation_results, self.results) + widget.selected_classifiers = [0] + score_combo = widget.controls.score + target_combo = widget.controls.target_index + + self._set_combo(score_combo, 1) # ca + self._set_combo(target_combo, 1) + (ca, ), _ = self._get_curves() + np.testing.assert_almost_equal(ca.getData()[1], self.curves.ca()) + + self._set_combo(target_combo, 0) + (ca, ), _ = self._get_curves() + curves = Curves(1 - self.curves.ytrue, 1 - self.curves.probs[:-1]) + np.testing.assert_almost_equal(ca.getData()[1], curves.ca()) + + def test_changing_score_explanation(self): + """Changing score hides/shows explanation and options for calibration""" + widget = self.widget + score_combo = widget.controls.score + explanation = widget.explanation + calibrations = widget.controls.output_calibration + + self._set_combo(score_combo, 1) # ca + self.assertTrue(explanation.isHidden()) + self.assertTrue(calibrations.isHidden()) + + self._set_combo(score_combo, 0) # calibration + self.assertTrue(explanation.isHidden()) + self.assertFalse(calibrations.isHidden()) + + self._set_combo(score_combo, 3) # sens/spec + self.assertFalse(explanation.isHidden()) + self.assertTrue(calibrations.isHidden()) + + def test_rug(self): + """Test rug appearance and positions""" + def get_rugs(): + rugs = [None, None] + for item in widget.plot.items: + if item.curve.opts.get("connect", "") == "pairs": + x, y = item.getData() + np.testing.assert_almost_equal(x[::2], x[1::2]) + rugs[int(y[0] == 1)] = x[::2] + return rugs + + widget = self.widget + widget.display_rug = True + model_list = widget.controls.selected_classifiers + self.send_signal(widget.Inputs.evaluation_results, self.results) + + self._set_list_selection(model_list, [0]) + probs = self.curves.probs[:-1] + truex = probs[self.curves.ytrue == 1] + falsex = probs[self.curves.ytrue == 0] + bottom, top = get_rugs() + np.testing.assert_almost_equal(bottom, falsex) + np.testing.assert_almost_equal(top, truex) + + # Switching targets should switch rugs and takes other probabilities + self._set_combo(widget.controls.target_index, 0) + bottom, top = get_rugs() + np.testing.assert_almost_equal(bottom, (1 - truex)[::-1]) + np.testing.assert_almost_equal(top, (1 - falsex)[::-1]) + self._set_combo(widget.controls.target_index, 1) + + # Changing models gives a different rug + self._set_list_selection(model_list, [1]) + probs2 = self.curves2.probs[:-1] + truex2 = probs2[self.curves2.ytrue == 1] + falsex2 = probs2[self.curves2.ytrue == 0] + bottom, top = get_rugs() + np.testing.assert_almost_equal(bottom, falsex2) + np.testing.assert_almost_equal(top, truex2) + + # Two models - two rugs - four rug items + self._set_list_selection(model_list, [0, 1]) + self.assertEqual(sum(item.curve.opts.get("connect", "") == "pairs" + for item in widget.plot.items), 4) + + # No models - no rugs + self._set_list_selection(model_list, []) + self.assertEqual(get_rugs(), [None, None]) + + # Bring the rug back + self._set_list_selection(model_list, [1]) + self.assertIsNotNone(get_rugs()[0]) + + # Disable it with checkbox + widget.controls.display_rug.click() + self.assertEqual(get_rugs(), [None, None]) + + def test_calibration_curve(self): + """Test the correct number of calibration curves""" + widget = self.widget + model_list = widget.controls.selected_classifiers + widget.display_rug = False + + self.send_signal(widget.Inputs.evaluation_results, self.results) + self.assertEqual(len(widget.plot.items), 3) # 2 + diagonal + + self._set_list_selection(model_list, [1]) + self.assertEqual(len(widget.plot.items), 2) + + self._set_list_selection(model_list, []) + self.assertEqual(len(widget.plot.items), 1) + + def test_threshold_change_updates_info(self): + """Changing the threshold updates info label""" + widget = self.widget + self.send_signal(widget.Inputs.evaluation_results, self.results) + self._set_combo(widget.controls.score, 1) + + original_text = widget.info_label.text() + self._set_threshold(0.3, False) + self.assertNotEqual(widget.info_label.text(), original_text) + + def test_threshold_rounding(self): + """Threshold is rounded to two decimals""" + widget = self.widget + self.send_signal(widget.Inputs.evaluation_results, self.results) + self._set_combo(widget.controls.score, 1) + self._set_threshold(0.367, False) + self.assertAlmostEqual(widget.threshold, 0.37) + + def test_threshold_flips_on_two_classes(self): + """Threshold changes to 1 - threshold if *binary* class is switched""" + widget = self.widget + self.send_signal(widget.Inputs.evaluation_results, self.results) + self._set_combo(widget.controls.target_index, 0) + self._set_combo(widget.controls.score, 1) # CA + self._set_threshold(0.25, False) + self.assertEqual(widget.threshold, 0.25) + self._set_combo(widget.controls.target_index, 1) + self.assertEqual(widget.threshold, 0.75) + + self.send_signal(widget.Inputs.evaluation_results, self.lenses_results) + self._set_combo(widget.controls.target_index, 0) + self._set_combo(widget.controls.score, 1) # CA + self._set_threshold(0.25, False) + self.assertEqual(widget.threshold, 0.25) + self._set_combo(widget.controls.target_index, 1) + self.assertEqual(widget.threshold, 0.25) + + + @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier") + @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner") + def test_apply_no_output(self, *_): + """Test no output warnings""" + widget = self.widget + model_list = widget.controls.selected_classifiers + + info = widget.Information + infos = (info.no_output_multiple_folds, + info.no_output_no_models, + info.no_output_multiple_selected, + info.no_output_non_binary_class) + multiple_folds, no_models, multiple_selected, non_binary_class = infos + + def test_shown(shown): + for info in infos: + self.assertEqual( + info.is_shown(), info in shown, + f"{info} is unexpectedly " + f"{'' if info.is_shown() else 'not'} shown") + output = self.get_output(widget.Outputs.calibrated_model) + if shown: + self.assertIsNone(output) + else: + self.assertIsNotNone(output) + + self.send_signal(widget.Inputs.evaluation_results, self.results) + self._set_combo(widget.controls.score, 1) # CA + test_shown({multiple_selected}) + + self._set_list_selection(model_list, [0]) + test_shown(()) + self._set_list_selection(model_list, [0, 1]) + + self.results.models = None + self.send_signal(widget.Inputs.evaluation_results, self.results) + test_shown({multiple_selected, no_models}) + + self.send_signal(widget.Inputs.evaluation_results, self.lenses_results) + test_shown({multiple_selected, non_binary_class}) + + self._set_list_selection(model_list, [0]) + test_shown({non_binary_class}) + + self.results.folds = [slice(0, 5), slice(5, 10), slice(10, 19)] + self.results.models = np.array([[Mock(), Mock()]] * 3) + + self.send_signal(widget.Inputs.evaluation_results, self.results) + test_shown({multiple_selected, multiple_folds}) + + self._set_list_selection(model_list, [0]) + test_shown({multiple_folds}) + + self._set_combo(widget.controls.score, 0) # calibration + self.send_signal(widget.Inputs.evaluation_results, self.lenses_results) + self._set_list_selection(model_list, [0, 1]) + test_shown({multiple_selected}) + self._set_list_selection(model_list, [0]) + test_shown(()) + + @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier") + def test_output_threshold_classifier(self, threshold_classifier): + """Test threshold classifier on output""" + widget = self.widget + model_list = widget.controls.selected_classifiers + models = self.results.models.ravel() + target_combo = widget.controls.target_index + self.send_signal(widget.Inputs.evaluation_results, self.results) + self._set_list_selection(model_list, [0]) + widget.target_index = 1 + + widget.threshold = 0.3 + self._set_combo(widget.controls.score, 1) # CA + model = self.get_output(widget.Outputs.calibrated_model) + threshold_classifier.assert_called_with(models[0], 0.3) + self.assertIs(model, threshold_classifier.return_value) + threshold_classifier.reset_mock() + + widget.auto_commit = True + self._set_threshold(0.4, False) + threshold_classifier.assert_not_called() + + widget.auto_commit = False + self._set_threshold(0.35, True) + threshold_classifier.assert_not_called() + + widget.auto_commit = True + self._set_threshold(0.4, True) + threshold_classifier.assert_called_with(models[0], 0.4) + self.assertIs(model, threshold_classifier.return_value) + threshold_classifier.reset_mock() + + self._set_combo(target_combo, 0) + threshold_classifier.assert_called_with(models[0], 0.4) + self.assertIs(model, threshold_classifier.return_value) + threshold_classifier.reset_mock() + + self._set_combo(target_combo, 1) + threshold_classifier.assert_called_with(models[0], 0.4) + self.assertIs(model, threshold_classifier.return_value) + threshold_classifier.reset_mock() + + self._set_list_selection(model_list, [1]) + threshold_classifier.assert_called_with(models[1], 0.4) + self.assertIs(model, threshold_classifier.return_value) + threshold_classifier.reset_mock() + + @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner") + def test_output_calibrated_classifier(self, calibrated_learner): + """Test calibrated classifier on output""" + calibrated_instance = calibrated_learner.return_value + get_model = calibrated_instance.get_model + + widget = self.widget + model_list = widget.controls.selected_classifiers + models = self.lenses_results.models.ravel() + results = self.lenses_results + self.send_signal(widget.Inputs.evaluation_results, results) + self._set_combo(widget.controls.score, 0) + + self._set_list_selection(model_list, [1]) + + self._set_radio_buttons(widget.controls.output_calibration, 0) + calibrated_learner.assert_called_with(None, 0) + model, actual, probabilities = get_model.call_args[0] + self.assertIs(model, models[1]) + np.testing.assert_equal(actual, results.actual) + np.testing.assert_equal(probabilities, results.probabilities[1]) + self.assertIs(self.get_output(widget.Outputs.calibrated_model), + get_model.return_value) + calibrated_learner.reset_mock() + get_model.reset_mock() + + self._set_radio_buttons(widget.controls.output_calibration, 1) + calibrated_learner.assert_called_with(None, 1) + model, actual, probabilities = get_model.call_args[0] + self.assertIs(model, models[1]) + np.testing.assert_equal(actual, results.actual) + np.testing.assert_equal(probabilities, results.probabilities[1]) + self.assertIs(self.get_output(widget.Outputs.calibrated_model), + get_model.return_value) + calibrated_learner.reset_mock() + get_model.reset_mock() + + self._set_list_selection(model_list, [0]) + self._set_radio_buttons(widget.controls.output_calibration, 1) + calibrated_learner.assert_called_with(None, 1) + model, actual, probabilities = get_model.call_args[0] + self.assertIs(model, models[0]) + np.testing.assert_equal(actual, results.actual) + np.testing.assert_equal(probabilities, results.probabilities[0]) + self.assertIs(self.get_output(widget.Outputs.calibrated_model), + get_model.return_value) + calibrated_learner.reset_mock() + get_model.reset_mock() + + def test_contexts(self): + """Test storing and retrieving context settings""" + widget = self.widget + model_list = widget.controls.selected_classifiers + target_combo = widget.controls.target_index + self.send_signal(widget.Inputs.evaluation_results, self.lenses_results) + self._set_list_selection(model_list, [0, 2]) + self._set_combo(target_combo, 2) + self.send_signal(widget.Inputs.evaluation_results, self.results) + self._set_list_selection(model_list, [0]) + self._set_combo(target_combo, 0) + self.send_signal(widget.Inputs.evaluation_results, self.lenses_results) + self.assertEqual(widget.selected_classifiers, [0, 2]) + self.assertEqual(widget.target_index, 2) + + def test_report(self): + """Test that report does not crash""" + widget = self.widget + self.send_signal(widget.Inputs.evaluation_results, self.lenses_results) + widget.send_report() From 2049afae83b0fcf46a3733ff407572e6583f2b60 Mon Sep 17 00:00:00 2001 From: janezd Date: Wed, 19 Jun 2019 13:19:03 +0200 Subject: [PATCH 17/21] Calibration plot: Test missing probabilities and single classes --- Orange/widgets/evaluate/owcalibrationplot.py | 159 +++++++++++------- .../evaluate/tests/test_owcalibrationplot.py | 142 ++++++++++++---- 2 files changed, 208 insertions(+), 93 deletions(-) diff --git a/Orange/widgets/evaluate/owcalibrationplot.py b/Orange/widgets/evaluate/owcalibrationplot.py index e3b828fd2e2..637db82aa9b 100644 --- a/Orange/widgets/evaluate/owcalibrationplot.py +++ b/Orange/widgets/evaluate/owcalibrationplot.py @@ -74,18 +74,23 @@ class Outputs: class Error(widget.OWWidget.Error): non_discrete_target = Msg("Calibration plot requires a discrete target") empty_input = widget.Msg("Empty result on input. Nothing to display.") + nan_classes = \ + widget.Msg("Remove test data instances with unknown classes") + all_target_class = widget.Msg( + "All data instances belong to target class") + no_target_class = widget.Msg( + "No data instances belong to target class") + + class Warning(widget.OWWidget.Warning): + omitted_folds = widget.Msg( + "Test folds where all data belongs to (non)-target are not shown") + omitted_nan_prob_points = widget.Msg( + "Instance for which the model couldn't compute probabilities are" + "skipped") + no_valid_data = widget.Msg("No valid data for model(s) {}") class Information(widget.OWWidget.Information): - no_out = "Can't output a model: " - no_output_multiple_folds = Msg( - no_out + "each training data sample produces a different model") - no_output_no_models = Msg( - no_out + "test results do not contain stored models;\n" - "try testing on separate data or on training data") - no_output_multiple_selected = Msg( - no_out + "select a single model - the widget can output only one") - no_output_non_binary_class = Msg( - no_out + "cannot calibrate non-binary classes") + no_output = Msg("Can't output a model: {}") settingsHandler = EvaluationResultsContextHandler() target_index = settings.ContextSetting(0) @@ -179,19 +184,23 @@ def set_results(self, results): self.clear() self.Error.clear() self.Information.clear() - if results is not None and not results.domain.has_discrete_class: - self.Error.non_discrete_target() - results = None - if results is not None and not results.actual.size: - self.Error.empty_input() - results = None - self.results = results - if self.results is not None: - self._initialize(results) - class_var = self.results.domain.class_var - self.target_index = int(len(class_var.values) == 2) - self.openContext(class_var, self.classifier_names) - self._replot() + + self.results = None + if results is not None: + if not results.domain.has_discrete_class: + self.Error.non_discrete_target() + elif not results.actual.size: + self.Error.empty_input() + elif np.any(np.isnan(results.actual)): + self.Error.nan_classes() + else: + self.results = results + self._initialize(results) + class_var = self.results.domain.class_var + self.target_index = int(len(class_var.values) == 2) + self.openContext(class_var, self.classifier_names) + self._replot() + self.apply() def clear(self): @@ -286,9 +295,6 @@ def plot_metrics(self, data, metrics, pen_args): return data.probs, ys def _prob_curve(self, ytrue, probs, pen_args): - if not probs.size: - return None - xmin, xmax = probs.min(), probs.max() x = np.linspace(xmin, xmax, 100) if xmax != xmin: @@ -307,16 +313,25 @@ def _setup_plot(self): plot_folds = self.fold_curves and results.folds is not None self.scores = [] - ytrue = results.actual == target + if not self._check_class_presence(results.actual == target): + return + + self.Warning.omitted_folds.clear() + self.Warning.omitted_nan_prob_points.clear() + no_valid_models = [] + shadow_width = 4 + 4 * plot_folds for clsf in self.selected_classifiers: - probs = results.probabilities[clsf, :, target] + data = Curves.from_results(results, target, clsf) + if data.tot == 0: # all probabilities are nan + no_valid_models.append(clsf) + continue + if data.tot != results.probabilities.shape[1]: # some are nan + self.Warning.omitted_nan_prob_points() + color = self.colors[clsf] pen_args = dict( - pen=pg.mkPen(color, width=1), - shadowPen=pg.mkPen(color.lighter(160), - width=4 + 4 * plot_folds), - antiAlias=True) - data = Curves(ytrue, probs) + pen=pg.mkPen(color, width=1), antiAlias=True, + shadowPen=pg.mkPen(color.lighter(160), width=shadow_width)) self.scores.append( (self.classifier_names[clsf], self.plot_metrics(data, metrics, pen_args))) @@ -330,19 +345,20 @@ def _setup_plot(self): antiAlias=True) for fold in range(len(results.folds)): fold_results = results.get_fold(fold) - fold_ytrue = fold_results.actual == target - fold_probs = fold_results.probabilities[clsf, :, target] - self.plot_metrics(Curves(fold_ytrue, fold_probs), - metrics, pen_args) + fold_curve = Curves.from_results(fold_results, target, clsf) + # Can't check this before: p and n can be 0 because of + # nan probabilities + if fold_curve.p * fold_curve.n == 0: + self.Warning.omitted_folds() + self.plot_metrics(fold_curve, metrics, pen_args) + + if no_valid_models: + self.Warning.no_valid_data( + ", ".join(self.classifier_names[i] for i in no_valid_models)) if self.score == 0: self.plot.plot([0, 1], [0, 1], antialias=True) - - def _replot(self): - self.plot.clear() - if self.results is not None: - self._setup_plot() - if self.score != 0: + else: self.line = pg.InfiniteLine( pos=self.threshold, movable=True, pen=pg.mkPen(color="k", style=Qt.DashLine, width=2), @@ -350,8 +366,25 @@ def _replot(self): bounds=(0, 1), ) self.line.sigPositionChanged.connect(self.threshold_change) - self.line.sigPositionChangeFinished.connect(self.threshold_change_done) + self.line.sigPositionChangeFinished.connect( + self.threshold_change_done) self.plot.addItem(self.line) + + def _check_class_presence(self, ytrue): + self.Error.all_target_class.clear() + self.Error.no_target_class.clear() + if np.max(ytrue) == 0: + self.Error.no_target_class() + return False + if np.min(ytrue) == 1: + self.Error.all_target_class() + return False + return True + + def _replot(self): + self.plot.clear() + if self.results is not None: + self._setup_plot() self._update_info() def _on_display_rug_changed(self): @@ -380,10 +413,7 @@ def _update_info(self): {"".join(f"" for n in short_names)} """ - for name, probs_curves in self.scores: - if probs_curves is None: - continue - probs, curves = probs_curves + for name, (probs, curves) in self.scores: ind = min(np.searchsorted(probs, self.threshold), len(probs) - 1) text += f"" @@ -397,20 +427,28 @@ def threshold_change_done(self): self.apply() def apply(self): - info = self.Information + self.Information.no_output.clear() wrapped = None - problems = {} results = self.results if results is not None: - problems = { - info.no_output_multiple_folds: len(results.folds) > 1, - info.no_output_no_models: results.models is None, - info.no_output_multiple_selected: - len(self.selected_classifiers) != 1, - info.no_output_non_binary_class: - self.score != 0 - and len(results.domain.class_var.values) != 2} - if not any(problems.values()): + problems = [ + msg for condition, msg in ( + (len(results.folds) > 1, + "each training data sample produces a different model"), + (results.models is None, + "test results do not contain stored models - try testing on" + "separate data or on training data"), + (len(self.selected_classifiers) != 1, + "select a single model - the widget can output only one"), + (self.score != 0 and len(results.domain.class_var.values) != 2, + "cannot calibrate non-binary classes")) + if condition] + if len(problems) == 1: + self.Information.no_output(problems[0]) + elif problems: + self.Information.no_output( + "".join(f"\n - {problem}" for problem in problems)) + else: clsf_idx = self.selected_classifiers[0] model = results.models[0, clsf_idx] if self.score == 0: @@ -424,9 +462,6 @@ def apply(self): wrapped = ThresholdClassifier(model, threshold) self.Outputs.calibrated_model.send(wrapped) - for info, shown in problems.items(): - if info.is_shown() != shown: - info(shown=shown) def send_report(self): if self.results is None: diff --git a/Orange/widgets/evaluate/tests/test_owcalibrationplot.py b/Orange/widgets/evaluate/tests/test_owcalibrationplot.py index 21cc067e50e..2d28c050fa2 100644 --- a/Orange/widgets/evaluate/tests/test_owcalibrationplot.py +++ b/Orange/widgets/evaluate/tests/test_owcalibrationplot.py @@ -21,22 +21,6 @@ class TestOWCalibrationPlot(WidgetTest, EvaluateTest): - @classmethod - def setUpClass(cls): - super().setUpClass() - cls.lenses = data = Table(test_filename("datasets/lenses.tab")) - majority = Orange.classification.MajorityLearner() - majority.name = "majority" - knn3 = Orange.classification.KNNLearner(n_neighbors=3) - knn3.name = "knn-3" - knn1 = Orange.classification.KNNLearner(n_neighbors=1) - knn1.name = "knn-1" - cls.lenses_results = Orange.evaluation.TestOnTestData( - store_data=True, store_models=True)( - data=data[::2], test_data=data[1::2], - learners=[majority, knn3, knn1]) - cls.lenses_results.learner_names = ["majority", "knn-3", "knn-1"] - def setUp(self): super().setUp() @@ -56,12 +40,25 @@ def setUp(self): self.results = Results( domain=domain, actual=actual, - folds=(Ellipsis, ), + folds=np.array([Ellipsis]), models=np.array([[Mock(), Mock()]]), row_indices=np.arange(19), predicted=np.array((pred, pred2)), probabilities=np.array([probs, probs2])) + self.lenses = data = Table(test_filename("datasets/lenses.tab")) + majority = Orange.classification.MajorityLearner() + majority.name = "majority" + knn3 = Orange.classification.KNNLearner(n_neighbors=3) + knn3.name = "knn-3" + knn1 = Orange.classification.KNNLearner(n_neighbors=1) + knn1.name = "knn-1" + self.lenses_results = Orange.evaluation.TestOnTestData( + store_data=True, store_models=True)( + data=data[::2], test_data=data[1::2], + learners=[majority, knn3, knn1]) + self.lenses_results.learner_names = ["majority", "knn-3", "knn-1"] + self.widget = self.create_widget(OWCalibrationPlot) # type: OWCalibrationPlot warnings.filterwarnings("ignore", ".*", ConvergenceWarning) @@ -389,24 +386,31 @@ def test_apply_no_output(self, *_): widget = self.widget model_list = widget.controls.selected_classifiers - info = widget.Information - infos = (info.no_output_multiple_folds, - info.no_output_no_models, - info.no_output_multiple_selected, - info.no_output_non_binary_class) - multiple_folds, no_models, multiple_selected, non_binary_class = infos + multiple_folds, multiple_selected, no_models, non_binary_class = "abcd" + messages = { + multiple_folds: + "each training data sample produces a different model", + no_models: + "test results do not contain stored models - try testing on" + "separate data or on training data", + multiple_selected: + "select a single model - the widget can output only one", + non_binary_class: + "cannot calibrate non-binary classes"} def test_shown(shown): - for info in infos: - self.assertEqual( - info.is_shown(), info in shown, - f"{info} is unexpectedly " - f"{'' if info.is_shown() else 'not'} shown") + widget_msg = widget.Information.no_output output = self.get_output(widget.Outputs.calibrated_model) - if shown: - self.assertIsNone(output) - else: + if not shown: + self.assertFalse(widget_msg.is_shown()) self.assertIsNotNone(output) + else: + self.assertTrue(widget_msg.is_shown()) + self.assertIsNone(output) + for msg_id in shown: + msg = messages[msg_id] + self.assertIn(msg, widget_msg.formatted, + f"{msg} not included in the message") self.send_signal(widget.Inputs.evaluation_results, self.results) self._set_combo(widget.controls.score, 1) # CA @@ -558,3 +562,79 @@ def test_report(self): widget = self.widget self.send_signal(widget.Inputs.evaluation_results, self.lenses_results) widget.send_report() + + @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier") + @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner") + def test_single_class(self, *_): + """Curves are not plotted if all data belongs to (non)-target""" + def check_error(shown): + for error in (errors.no_target_class, errors.all_target_class, + errors.nan_classes): + self.assertEqual(error.is_shown(), error is shown, + f"{error} is unexpectedly" + f"{'' if error.is_shown() else ' not'} shown") + if shown is not None: + self.assertEqual(len(widget.plot.items), 0) + else: + self.assertGreater(len(widget.plot.items), 0) + + widget = self.widget + errors = widget.Error + widget.display_rug = True + combo = widget.controls.score + + original_actual = self.results.actual.copy() + self.send_signal(widget.Inputs.evaluation_results, self.results) + widget.selected_classifiers = [0] + for idx in range(combo.count()): + self._set_combo(combo, idx) + self.results.actual[:] = 0 + self.send_signal(widget.Inputs.evaluation_results, self.results) + check_error(errors.no_target_class) + + self.results.actual[:] = 1 + self.send_signal(widget.Inputs.evaluation_results, self.results) + check_error(errors.all_target_class) + + self.results.actual[:] = original_actual + self.results.actual[3] = np.nan + self.send_signal(widget.Inputs.evaluation_results, self.results) + check_error(errors.nan_classes) + + self.results.actual[:] = original_actual + self.send_signal(widget.Inputs.evaluation_results, self.results) + check_error(None) + + @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier") + @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner") + def test_single_class_folds(self, *_): + """Curves for single-class folds are not plotted""" + widget = self.widget + widget.display_rug = False + widget.fold_curves = False + + results = self.lenses_results + results.folds = [slice(0, 5), slice(5, 19)] + results.models = results.models.repeat(2, axis=0) + results.actual[:3] = 0 + results.probabilities[1, 3:5] = np.nan + # after this, model 1 has just negative instances in fold 0 + self.send_signal(widget.Inputs.evaluation_results, results) + self._set_combo(widget.controls.score, 1) # CA + self.assertFalse(widget.Warning.omitted_folds.is_shown()) + widget.controls.fold_curves.click() + self.assertTrue(widget.Warning.omitted_folds.is_shown()) + + @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier") + @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner") + def test_warn_nan_probabilities(self, *_): + """Warn about omitted points with nan probabiities""" + widget = self.widget + widget.display_rug = False + widget.fold_curves = False + + self.results.probabilities[1, 3] = np.nan + self.send_signal(widget.Inputs.evaluation_results, self.results) + self.assertTrue(widget.Warning.omitted_nan_prob_points.is_shown()) + self._set_list_selection(widget.controls.selected_classifiers, [0, 2]) + self.assertFalse(widget.Warning.omitted_folds.is_shown()) From 04d05f447f14ec69b534b6117caaa9c1b4ce2f98 Mon Sep 17 00:00:00 2001 From: janezd Date: Mon, 24 Jun 2019 21:50:58 +0200 Subject: [PATCH 18/21] Calibration plot: Minor fixes --- Orange/widgets/evaluate/owcalibrationplot.py | 9 ++++++--- Orange/widgets/evaluate/tests/test_owcalibrationplot.py | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Orange/widgets/evaluate/owcalibrationplot.py b/Orange/widgets/evaluate/owcalibrationplot.py index 637db82aa9b..55b1f57c2a9 100644 --- a/Orange/widgets/evaluate/owcalibrationplot.py +++ b/Orange/widgets/evaluate/owcalibrationplot.py @@ -400,6 +400,9 @@ def threshold_change(self): self._update_info() def _update_info(self): + def elided(s): + return s[:17] + "..." if len(s) > 20 else s + text = f"""
{n}
{name}:
@@ -416,7 +419,7 @@ def _update_info(self): for name, (probs, curves) in self.scores: ind = min(np.searchsorted(probs, self.threshold), len(probs) - 1) - text += f"" + text += f"" text += "".join(f'' for curve in curves) text += "" @@ -436,8 +439,8 @@ def apply(self): (len(results.folds) > 1, "each training data sample produces a different model"), (results.models is None, - "test results do not contain stored models - try testing on" - "separate data or on training data"), + "test results do not contain stored models - try testing " + "on separate data or on training data"), (len(self.selected_classifiers) != 1, "select a single model - the widget can output only one"), (self.score != 0 and len(results.domain.class_var.values) != 2, diff --git a/Orange/widgets/evaluate/tests/test_owcalibrationplot.py b/Orange/widgets/evaluate/tests/test_owcalibrationplot.py index 2d28c050fa2..e4f18231686 100644 --- a/Orange/widgets/evaluate/tests/test_owcalibrationplot.py +++ b/Orange/widgets/evaluate/tests/test_owcalibrationplot.py @@ -391,7 +391,7 @@ def test_apply_no_output(self, *_): multiple_folds: "each training data sample produces a different model", no_models: - "test results do not contain stored models - try testing on" + "test results do not contain stored models - try testing on " "separate data or on training data", multiple_selected: "select a single model - the widget can output only one", From 6695ee942204296ee11c1fff150de6148b626414 Mon Sep 17 00:00:00 2001 From: janezd Date: Fri, 28 Jun 2019 13:40:07 +0200 Subject: [PATCH 19/21] Calibrated Learner: Fix report --- Orange/widgets/model/owcalibratedlearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Orange/widgets/model/owcalibratedlearner.py b/Orange/widgets/model/owcalibratedlearner.py index 558ac331539..0edf3184797 100644 --- a/Orange/widgets/model/owcalibratedlearner.py +++ b/Orange/widgets/model/owcalibratedlearner.py @@ -100,7 +100,7 @@ def fit_storage(self, data): def get_learner_parameters(self): return (("Calibrate probabilities", - self.CalibrationOptions[self.calibrate]), + self.CalibrationOptions[self.calibration]), ("Threshold optimization", self.ThresholdOptions[self.threshold])) From 65c69e2b890ff123f78dedde1089be9e4f81e9de Mon Sep 17 00:00:00 2001 From: janezd Date: Fri, 28 Jun 2019 14:06:37 +0200 Subject: [PATCH 20/21] Calibrated Learner: Add icon --- .../widgets/model/icons/CalibratedLearner.svg | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 Orange/widgets/model/icons/CalibratedLearner.svg diff --git a/Orange/widgets/model/icons/CalibratedLearner.svg b/Orange/widgets/model/icons/CalibratedLearner.svg new file mode 100644 index 00000000000..360a0d188ba --- /dev/null +++ b/Orange/widgets/model/icons/CalibratedLearner.svg @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + From 864d7b59fafc7f2322088929a23af0491fe1b2c7 Mon Sep 17 00:00:00 2001 From: janezd Date: Fri, 28 Jun 2019 14:24:06 +0200 Subject: [PATCH 21/21] Calibration plot: Nicer report --- Orange/widgets/evaluate/owcalibrationplot.py | 43 ++++++++++++++------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/Orange/widgets/evaluate/owcalibrationplot.py b/Orange/widgets/evaluate/owcalibrationplot.py index 55b1f57c2a9..562c3d5aa01 100644 --- a/Orange/widgets/evaluate/owcalibrationplot.py +++ b/Orange/widgets/evaluate/owcalibrationplot.py @@ -399,15 +399,29 @@ def threshold_change(self): self.line.setPos(self.threshold) self._update_info() - def _update_info(self): - def elided(s): - return s[:17] + "..." if len(s) > 20 else s - - text = f"""
Threshold: p=
{name}:
{elided(name)}:/{curve[ind]:.3f}
- - - - """ + def get_info_text(self, short): + if short: + def elided(s): + return s[:17] + "..." if len(s) > 20 else s + + text = f"""
Threshold: p={self.threshold:.2f}
+ + + + """ + + else: + def elided(s): + return s + + text = f"""
Threshold: p={self.threshold:.2f}
+ + + + + """ + if self.scores is not None: short_names = Metrics[self.score].short_names if short_names: @@ -424,7 +438,10 @@ def elided(s): for curve in curves) text += "" text += "
Threshold:p = {self.threshold:.2f}
+
" - self.info_label.setText(text) + return text + + def _update_info(self): + self.info_label.setText(self.get_info_text(short=True)) def threshold_change_done(self): self.apply() @@ -472,7 +489,9 @@ def send_report(self): self.report_items(( ("Target class", self.target_cb.currentText()), ("Output model calibration", - self.score == 0 and self.controls.score.currentText()), + self.score == 0 + and ("Sigmoid calibration", + "Isotonic calibration")[self.output_calibration]) )) caption = report.list_legend(self.classifiers_list_box, self.selected_classifiers) @@ -481,7 +500,7 @@ def send_report(self): self.report_caption(self.controls.score.currentText()) if self.score != 0: - self.report_raw(self.info_label.text()) + self.report_raw(self.get_info_text(short=False)) def gaussian_smoother(x, y, sigma=1.0):