diff --git a/Orange/widgets/evaluate/owcalibrationplot.py b/Orange/widgets/evaluate/owcalibrationplot.py
index e3b828fd2e2..637db82aa9b 100644
--- a/Orange/widgets/evaluate/owcalibrationplot.py
+++ b/Orange/widgets/evaluate/owcalibrationplot.py
@@ -74,18 +74,23 @@ class Outputs:
class Error(widget.OWWidget.Error):
non_discrete_target = Msg("Calibration plot requires a discrete target")
empty_input = widget.Msg("Empty result on input. Nothing to display.")
+ nan_classes = \
+ widget.Msg("Remove test data instances with unknown classes")
+ all_target_class = widget.Msg(
+ "All data instances belong to target class")
+ no_target_class = widget.Msg(
+ "No data instances belong to target class")
+ class Warning(widget.OWWidget.Warning):
+ omitted_folds = widget.Msg(
+ "Test folds where all data belongs to (non)-target are not shown")
+ omitted_nan_prob_points = widget.Msg(
+ "Instance for which the model couldn't compute probabilities are"
+ "skipped")
+ no_valid_data = widget.Msg("No valid data for model(s) {}")
class Information(widget.OWWidget.Information):
- no_out = "Can't output a model: "
- no_output_multiple_folds = Msg(
- no_out + "each training data sample produces a different model")
- no_output_no_models = Msg(
- no_out + "test results do not contain stored models;\n"
- "try testing on separate data or on training data")
- no_output_multiple_selected = Msg(
- no_out + "select a single model - the widget can output only one")
- no_output_non_binary_class = Msg(
- no_out + "cannot calibrate non-binary classes")
+ no_output = Msg("Can't output a model: {}")
settingsHandler = EvaluationResultsContextHandler()
target_index = settings.ContextSetting(0)
@@ -179,19 +184,23 @@ def set_results(self, results):
- if results is not None and not results.domain.has_discrete_class:
- self.Error.non_discrete_target()
- results = None
- if results is not None and not results.actual.size:
- self.Error.empty_input()
- results = None
- self.results = results
- if self.results is not None:
- self._initialize(results)
- class_var = self.results.domain.class_var
- self.target_index = int(len(class_var.values) == 2)
- self.openContext(class_var, self.classifier_names)
- self._replot()
+ self.results = None
+ if results is not None:
+ if not results.domain.has_discrete_class:
+ self.Error.non_discrete_target()
+ elif not results.actual.size:
+ self.Error.empty_input()
+ elif np.any(np.isnan(results.actual)):
+ self.Error.nan_classes()
+ else:
+ self.results = results
+ self._initialize(results)
+ class_var = self.results.domain.class_var
+ self.target_index = int(len(class_var.values) == 2)
+ self.openContext(class_var, self.classifier_names)
+ self._replot()
def clear(self):
@@ -286,9 +295,6 @@ def plot_metrics(self, data, metrics, pen_args):
return data.probs, ys
def _prob_curve(self, ytrue, probs, pen_args):
- if not probs.size:
- return None
xmin, xmax = probs.min(), probs.max()
x = np.linspace(xmin, xmax, 100)
if xmax != xmin:
@@ -307,16 +313,25 @@ def _setup_plot(self):
plot_folds = self.fold_curves and results.folds is not None
self.scores = []
- ytrue = results.actual == target
+ if not self._check_class_presence(results.actual == target):
+ return
+ self.Warning.omitted_folds.clear()
+ self.Warning.omitted_nan_prob_points.clear()
+ no_valid_models = []
+ shadow_width = 4 + 4 * plot_folds
for clsf in self.selected_classifiers:
- probs = results.probabilities[clsf, :, target]
+ data = Curves.from_results(results, target, clsf)
+ if data.tot == 0: # all probabilities are nan
+ no_valid_models.append(clsf)
+ continue
+ if data.tot != results.probabilities.shape[1]: # some are nan
+ self.Warning.omitted_nan_prob_points()
color = self.colors[clsf]
pen_args = dict(
- pen=pg.mkPen(color, width=1),
- shadowPen=pg.mkPen(color.lighter(160),
- width=4 + 4 * plot_folds),
- antiAlias=True)
- data = Curves(ytrue, probs)
+ pen=pg.mkPen(color, width=1), antiAlias=True,
+ shadowPen=pg.mkPen(color.lighter(160), width=shadow_width))
self.plot_metrics(data, metrics, pen_args)))
@@ -330,19 +345,20 @@ def _setup_plot(self):
for fold in range(len(results.folds)):
fold_results = results.get_fold(fold)
- fold_ytrue = fold_results.actual == target
- fold_probs = fold_results.probabilities[clsf, :, target]
- self.plot_metrics(Curves(fold_ytrue, fold_probs),
- metrics, pen_args)
+ fold_curve = Curves.from_results(fold_results, target, clsf)
+ # Can't check this before: p and n can be 0 because of
+ # nan probabilities
+ if fold_curve.p * fold_curve.n == 0:
+ self.Warning.omitted_folds()
+ self.plot_metrics(fold_curve, metrics, pen_args)
+ if no_valid_models:
+ self.Warning.no_valid_data(
+ ", ".join(self.classifier_names[i] for i in no_valid_models))
if self.score == 0:
self.plot.plot([0, 1], [0, 1], antialias=True)
- def _replot(self):
- self.plot.clear()
- if self.results is not None:
- self._setup_plot()
- if self.score != 0:
+ else:
self.line = pg.InfiniteLine(
pos=self.threshold, movable=True,
pen=pg.mkPen(color="k", style=Qt.DashLine, width=2),
@@ -350,8 +366,25 @@ def _replot(self):
bounds=(0, 1),
- self.line.sigPositionChangeFinished.connect(self.threshold_change_done)
+ self.line.sigPositionChangeFinished.connect(
+ self.threshold_change_done)
+ def _check_class_presence(self, ytrue):
+ self.Error.all_target_class.clear()
+ self.Error.no_target_class.clear()
+ if np.max(ytrue) == 0:
+ self.Error.no_target_class()
+ return False
+ if np.min(ytrue) == 1:
+ self.Error.all_target_class()
+ return False
+ return True
+ def _replot(self):
+ self.plot.clear()
+ if self.results is not None:
+ self._setup_plot()
def _on_display_rug_changed(self):
@@ -380,10 +413,7 @@ def _update_info(self):
| ".join(f"{n} | "
for n in short_names)}
- for name, probs_curves in self.scores:
- if probs_curves is None:
- continue
- probs, curves = probs_curves
+ for name, (probs, curves) in self.scores:
ind = min(np.searchsorted(probs, self.threshold),
len(probs) - 1)
text += f"{name}: | "
@@ -397,20 +427,28 @@ def threshold_change_done(self):
def apply(self):
- info = self.Information
+ self.Information.no_output.clear()
wrapped = None
- problems = {}
results = self.results
if results is not None:
- problems = {
- info.no_output_multiple_folds: len(results.folds) > 1,
- info.no_output_no_models: results.models is None,
- info.no_output_multiple_selected:
- len(self.selected_classifiers) != 1,
- info.no_output_non_binary_class:
- self.score != 0
- and len(results.domain.class_var.values) != 2}
- if not any(problems.values()):
+ problems = [
+ msg for condition, msg in (
+ (len(results.folds) > 1,
+ "each training data sample produces a different model"),
+ (results.models is None,
+ "test results do not contain stored models - try testing on"
+ "separate data or on training data"),
+ (len(self.selected_classifiers) != 1,
+ "select a single model - the widget can output only one"),
+ (self.score != 0 and len(results.domain.class_var.values) != 2,
+ "cannot calibrate non-binary classes"))
+ if condition]
+ if len(problems) == 1:
+ self.Information.no_output(problems[0])
+ elif problems:
+ self.Information.no_output(
+ "".join(f"\n - {problem}" for problem in problems))
+ else:
clsf_idx = self.selected_classifiers[0]
model = results.models[0, clsf_idx]
if self.score == 0:
@@ -424,9 +462,6 @@ def apply(self):
wrapped = ThresholdClassifier(model, threshold)
- for info, shown in problems.items():
- if info.is_shown() != shown:
- info(shown=shown)
def send_report(self):
if self.results is None:
diff --git a/Orange/widgets/evaluate/tests/test_owcalibrationplot.py b/Orange/widgets/evaluate/tests/test_owcalibrationplot.py
index 21cc067e50e..2d28c050fa2 100644
--- a/Orange/widgets/evaluate/tests/test_owcalibrationplot.py
+++ b/Orange/widgets/evaluate/tests/test_owcalibrationplot.py
@@ -21,22 +21,6 @@
class TestOWCalibrationPlot(WidgetTest, EvaluateTest):
- @classmethod
- def setUpClass(cls):
- super().setUpClass()
- cls.lenses = data = Table(test_filename("datasets/lenses.tab"))
- majority = Orange.classification.MajorityLearner()
- majority.name = "majority"
- knn3 = Orange.classification.KNNLearner(n_neighbors=3)
- knn3.name = "knn-3"
- knn1 = Orange.classification.KNNLearner(n_neighbors=1)
- knn1.name = "knn-1"
- cls.lenses_results = Orange.evaluation.TestOnTestData(
- store_data=True, store_models=True)(
- data=data[::2], test_data=data[1::2],
- learners=[majority, knn3, knn1])
- cls.lenses_results.learner_names = ["majority", "knn-3", "knn-1"]
def setUp(self):
@@ -56,12 +40,25 @@ def setUp(self):
self.results = Results(
- folds=(Ellipsis, ),
+ folds=np.array([Ellipsis]),
models=np.array([[Mock(), Mock()]]),
predicted=np.array((pred, pred2)),
probabilities=np.array([probs, probs2]))
+ self.lenses = data = Table(test_filename("datasets/lenses.tab"))
+ majority = Orange.classification.MajorityLearner()
+ majority.name = "majority"
+ knn3 = Orange.classification.KNNLearner(n_neighbors=3)
+ knn3.name = "knn-3"
+ knn1 = Orange.classification.KNNLearner(n_neighbors=1)
+ knn1.name = "knn-1"
+ self.lenses_results = Orange.evaluation.TestOnTestData(
+ store_data=True, store_models=True)(
+ data=data[::2], test_data=data[1::2],
+ learners=[majority, knn3, knn1])
+ self.lenses_results.learner_names = ["majority", "knn-3", "knn-1"]
self.widget = self.create_widget(OWCalibrationPlot) # type: OWCalibrationPlot
warnings.filterwarnings("ignore", ".*", ConvergenceWarning)
@@ -389,24 +386,31 @@ def test_apply_no_output(self, *_):
widget = self.widget
model_list = widget.controls.selected_classifiers
- info = widget.Information
- infos = (info.no_output_multiple_folds,
- info.no_output_no_models,
- info.no_output_multiple_selected,
- info.no_output_non_binary_class)
- multiple_folds, no_models, multiple_selected, non_binary_class = infos
+ multiple_folds, multiple_selected, no_models, non_binary_class = "abcd"
+ messages = {
+ multiple_folds:
+ "each training data sample produces a different model",
+ no_models:
+ "test results do not contain stored models - try testing on"
+ "separate data or on training data",
+ multiple_selected:
+ "select a single model - the widget can output only one",
+ non_binary_class:
+ "cannot calibrate non-binary classes"}
def test_shown(shown):
- for info in infos:
- self.assertEqual(
- info.is_shown(), info in shown,
- f"{info} is unexpectedly "
- f"{'' if info.is_shown() else 'not'} shown")
+ widget_msg = widget.Information.no_output
output = self.get_output(widget.Outputs.calibrated_model)
- if shown:
- self.assertIsNone(output)
- else:
+ if not shown:
+ self.assertFalse(widget_msg.is_shown())
+ else:
+ self.assertTrue(widget_msg.is_shown())
+ self.assertIsNone(output)
+ for msg_id in shown:
+ msg = messages[msg_id]
+ self.assertIn(msg, widget_msg.formatted,
+ f"{msg} not included in the message")
self.send_signal(widget.Inputs.evaluation_results, self.results)
self._set_combo(widget.controls.score, 1) # CA
@@ -558,3 +562,79 @@ def test_report(self):
widget = self.widget
self.send_signal(widget.Inputs.evaluation_results, self.lenses_results)
+ @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier")
+ @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner")
+ def test_single_class(self, *_):
+ """Curves are not plotted if all data belongs to (non)-target"""
+ def check_error(shown):
+ for error in (errors.no_target_class, errors.all_target_class,
+ errors.nan_classes):
+ self.assertEqual(error.is_shown(), error is shown,
+ f"{error} is unexpectedly"
+ f"{'' if error.is_shown() else ' not'} shown")
+ if shown is not None:
+ self.assertEqual(len(widget.plot.items), 0)
+ else:
+ self.assertGreater(len(widget.plot.items), 0)
+ widget = self.widget
+ errors = widget.Error
+ widget.display_rug = True
+ combo = widget.controls.score
+ original_actual = self.results.actual.copy()
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ widget.selected_classifiers = [0]
+ for idx in range(combo.count()):
+ self._set_combo(combo, idx)
+ self.results.actual[:] = 0
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ check_error(errors.no_target_class)
+ self.results.actual[:] = 1
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ check_error(errors.all_target_class)
+ self.results.actual[:] = original_actual
+ self.results.actual[3] = np.nan
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ check_error(errors.nan_classes)
+ self.results.actual[:] = original_actual
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ check_error(None)
+ @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier")
+ @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner")
+ def test_single_class_folds(self, *_):
+ """Curves for single-class folds are not plotted"""
+ widget = self.widget
+ widget.display_rug = False
+ widget.fold_curves = False
+ results = self.lenses_results
+ results.folds = [slice(0, 5), slice(5, 19)]
+ results.models = results.models.repeat(2, axis=0)
+ results.actual[:3] = 0
+ results.probabilities[1, 3:5] = np.nan
+ # after this, model 1 has just negative instances in fold 0
+ self.send_signal(widget.Inputs.evaluation_results, results)
+ self._set_combo(widget.controls.score, 1) # CA
+ self.assertFalse(widget.Warning.omitted_folds.is_shown())
+ widget.controls.fold_curves.click()
+ self.assertTrue(widget.Warning.omitted_folds.is_shown())
+ @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier")
+ @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner")
+ def test_warn_nan_probabilities(self, *_):
+ """Warn about omitted points with nan probabiities"""
+ widget = self.widget
+ widget.display_rug = False
+ widget.fold_curves = False
+ self.results.probabilities[1, 3] = np.nan
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ self.assertTrue(widget.Warning.omitted_nan_prob_points.is_shown())
+ self._set_list_selection(widget.controls.selected_classifiers, [0, 2])
+ self.assertFalse(widget.Warning.omitted_folds.is_shown())