diff --git a/Orange/widgets/evaluate/owcalibrationplot.py b/Orange/widgets/evaluate/owcalibrationplot.py index e3b828fd2e2..176f5152468 100644 --- a/Orange/widgets/evaluate/owcalibrationplot.py +++ b/Orange/widgets/evaluate/owcalibrationplot.py @@ -74,18 +74,23 @@ class Outputs: class Error(widget.OWWidget.Error): non_discrete_target = Msg("Calibration plot requires a discrete target") empty_input = widget.Msg("Empty result on input. Nothing to display.") + nan_classes = \ + widget.Msg("Remove test data instances with unknown classes") + all_target_class = widget.Msg( + "All data instances belong to target class") + no_target_class = widget.Msg( + "No data instances belong to target class") + + class Warning(widget.OWWidget.Warning): + omitted_folds = widget.Msg( + "Test folds where all data belongs to (non)-target are not shown") + omitted_nan_prob_points = widget.Msg( + "Instance for which the model couldn't compute probabilities are" + "skipped") + no_valid_data = widget.Msg("No valid data for model(s) {}") class Information(widget.OWWidget.Information): - no_out = "Can't output a model: " - no_output_multiple_folds = Msg( - no_out + "each training data sample produces a different model") - no_output_no_models = Msg( - no_out + "test results do not contain stored models;\n" - "try testing on separate data or on training data") - no_output_multiple_selected = Msg( - no_out + "select a single model - the widget can output only one") - no_output_non_binary_class = Msg( - no_out + "cannot calibrate non-binary classes") + no_output = Msg("Can't output a model: {}") settingsHandler = EvaluationResultsContextHandler() target_index = settings.ContextSetting(0) @@ -179,19 +184,23 @@ def set_results(self, results): self.clear() self.Error.clear() self.Information.clear() - if results is not None and not results.domain.has_discrete_class: - self.Error.non_discrete_target() - results = None - if results is not None and not results.actual.size: - self.Error.empty_input() - results = None - self.results = results - if self.results is not None: - self._initialize(results) - class_var = self.results.domain.class_var - self.target_index = int(len(class_var.values) == 2) - self.openContext(class_var, self.classifier_names) - self._replot() + + self.results = None + if results is not None: + if not results.domain.has_discrete_class: + self.Error.non_discrete_target() + elif not results.actual.size: + self.Error.empty_input() + elif np.any(np.isnan(results.actual)): + self.Error.nan_classes() + else: + self.results = results + self._initialize(results) + class_var = self.results.domain.class_var + self.target_index = int(len(class_var.values) == 2) + self.openContext(class_var, self.classifier_names) + self._replot() + self.apply() def clear(self): @@ -286,9 +295,6 @@ def plot_metrics(self, data, metrics, pen_args): return data.probs, ys def _prob_curve(self, ytrue, probs, pen_args): - if not probs.size: - return None - xmin, xmax = probs.min(), probs.max() x = np.linspace(xmin, xmax, 100) if xmax != xmin: @@ -307,16 +313,25 @@ def _setup_plot(self): plot_folds = self.fold_curves and results.folds is not None self.scores = [] - ytrue = results.actual == target + if not self._check_class_presence(results.actual == target): + return + + self.Warning.omitted_folds.clear() + self.Warning.omitted_nan_prob_points.clear() + no_valid_models = [] + shadow_width = 4 + 4 * plot_folds for clsf in self.selected_classifiers: - probs = results.probabilities[clsf, :, target] + data = Curves.from_results(results, target, clsf) + if data.tot == 0: # all probabilities are nan + no_valid_models.append(clsf) + continue + if data.tot != results.probabilities.shape[1]: # some are nan + self.Warning.omitted_nan_prob_points() + color = self.colors[clsf] pen_args = dict( - pen=pg.mkPen(color, width=1), - shadowPen=pg.mkPen(color.lighter(160), - width=4 + 4 * plot_folds), - antiAlias=True) - data = Curves(ytrue, probs) + pen=pg.mkPen(color, width=1), antiAlias=True, + shadowPen=pg.mkPen(color.lighter(160), width=shadow_width)) self.scores.append( (self.classifier_names[clsf], self.plot_metrics(data, metrics, pen_args))) @@ -330,19 +345,20 @@ def _setup_plot(self): antiAlias=True) for fold in range(len(results.folds)): fold_results = results.get_fold(fold) - fold_ytrue = fold_results.actual == target - fold_probs = fold_results.probabilities[clsf, :, target] - self.plot_metrics(Curves(fold_ytrue, fold_probs), - metrics, pen_args) + fold_curve = Curves.from_results(fold_results, target, clsf) + # Can't check this before: p and n can be 0 because of + # nan probabilities + if fold_curve.p * fold_curve.n == 0: + self.Warning.omitted_folds() + self.plot_metrics(fold_curve, metrics, pen_args) + + if no_valid_models: + self.Warning.no_valid_data( + ", ".join(self.classifier_names[i] for i in no_valid_models)) if self.score == 0: self.plot.plot([0, 1], [0, 1], antialias=True) - - def _replot(self): - self.plot.clear() - if self.results is not None: - self._setup_plot() - if self.score != 0: + else: self.line = pg.InfiniteLine( pos=self.threshold, movable=True, pen=pg.mkPen(color="k", style=Qt.DashLine, width=2), @@ -350,8 +366,25 @@ def _replot(self): bounds=(0, 1), ) self.line.sigPositionChanged.connect(self.threshold_change) - self.line.sigPositionChangeFinished.connect(self.threshold_change_done) + self.line.sigPositionChangeFinished.connect( + self.threshold_change_done) self.plot.addItem(self.line) + + def _check_class_presence(self, ytrue): + self.Error.all_target_class.clear() + self.Error.no_target_class.clear() + if np.max(ytrue) == 0: + self.Error.no_target_class() + return False + if np.min(ytrue) == 1: + self.Error.all_target_class() + return False + return True + + def _replot(self): + self.plot.clear() + if self.results is not None: + self._setup_plot() self._update_info() def _on_display_rug_changed(self): @@ -397,20 +430,28 @@ def threshold_change_done(self): self.apply() def apply(self): - info = self.Information + self.Information.no_output.clear() wrapped = None - problems = {} results = self.results if results is not None: - problems = { - info.no_output_multiple_folds: len(results.folds) > 1, - info.no_output_no_models: results.models is None, - info.no_output_multiple_selected: - len(self.selected_classifiers) != 1, - info.no_output_non_binary_class: - self.score != 0 - and len(results.domain.class_var.values) != 2} - if not any(problems.values()): + problems = [ + msg for condition, msg in ( + (len(results.folds) > 1, + "each training data sample produces a different model"), + (results.models is None, + "test results do not contain stored models - try testing on" + "separate data or on training data"), + (len(self.selected_classifiers) != 1, + "select a single model - the widget can output only one"), + (self.score != 0 and len(results.domain.class_var.values) != 2, + "cannot calibrate non-binary classes")) + if condition] + if len(problems) == 1: + self.Information.no_output(problems[0]) + elif problems: + self.Information.no_output( + "".join(f"\n - {problem}" for problem in problems)) + else: clsf_idx = self.selected_classifiers[0] model = results.models[0, clsf_idx] if self.score == 0: @@ -424,9 +465,6 @@ def apply(self): wrapped = ThresholdClassifier(model, threshold) self.Outputs.calibrated_model.send(wrapped) - for info, shown in problems.items(): - if info.is_shown() != shown: - info(shown=shown) def send_report(self): if self.results is None: diff --git a/Orange/widgets/evaluate/tests/test_owcalibrationplot.py b/Orange/widgets/evaluate/tests/test_owcalibrationplot.py index 21cc067e50e..2d28c050fa2 100644 --- a/Orange/widgets/evaluate/tests/test_owcalibrationplot.py +++ b/Orange/widgets/evaluate/tests/test_owcalibrationplot.py @@ -21,22 +21,6 @@ class TestOWCalibrationPlot(WidgetTest, EvaluateTest): - @classmethod - def setUpClass(cls): - super().setUpClass() - cls.lenses = data = Table(test_filename("datasets/lenses.tab")) - majority = Orange.classification.MajorityLearner() - majority.name = "majority" - knn3 = Orange.classification.KNNLearner(n_neighbors=3) - knn3.name = "knn-3" - knn1 = Orange.classification.KNNLearner(n_neighbors=1) - knn1.name = "knn-1" - cls.lenses_results = Orange.evaluation.TestOnTestData( - store_data=True, store_models=True)( - data=data[::2], test_data=data[1::2], - learners=[majority, knn3, knn1]) - cls.lenses_results.learner_names = ["majority", "knn-3", "knn-1"] - def setUp(self): super().setUp() @@ -56,12 +40,25 @@ def setUp(self): self.results = Results( domain=domain, actual=actual, - folds=(Ellipsis, ), + folds=np.array([Ellipsis]), models=np.array([[Mock(), Mock()]]), row_indices=np.arange(19), predicted=np.array((pred, pred2)), probabilities=np.array([probs, probs2])) + self.lenses = data = Table(test_filename("datasets/lenses.tab")) + majority = Orange.classification.MajorityLearner() + majority.name = "majority" + knn3 = Orange.classification.KNNLearner(n_neighbors=3) + knn3.name = "knn-3" + knn1 = Orange.classification.KNNLearner(n_neighbors=1) + knn1.name = "knn-1" + self.lenses_results = Orange.evaluation.TestOnTestData( + store_data=True, store_models=True)( + data=data[::2], test_data=data[1::2], + learners=[majority, knn3, knn1]) + self.lenses_results.learner_names = ["majority", "knn-3", "knn-1"] + self.widget = self.create_widget(OWCalibrationPlot) # type: OWCalibrationPlot warnings.filterwarnings("ignore", ".*", ConvergenceWarning) @@ -389,24 +386,31 @@ def test_apply_no_output(self, *_): widget = self.widget model_list = widget.controls.selected_classifiers - info = widget.Information - infos = (info.no_output_multiple_folds, - info.no_output_no_models, - info.no_output_multiple_selected, - info.no_output_non_binary_class) - multiple_folds, no_models, multiple_selected, non_binary_class = infos + multiple_folds, multiple_selected, no_models, non_binary_class = "abcd" + messages = { + multiple_folds: + "each training data sample produces a different model", + no_models: + "test results do not contain stored models - try testing on" + "separate data or on training data", + multiple_selected: + "select a single model - the widget can output only one", + non_binary_class: + "cannot calibrate non-binary classes"} def test_shown(shown): - for info in infos: - self.assertEqual( - info.is_shown(), info in shown, - f"{info} is unexpectedly " - f"{'' if info.is_shown() else 'not'} shown") + widget_msg = widget.Information.no_output output = self.get_output(widget.Outputs.calibrated_model) - if shown: - self.assertIsNone(output) - else: + if not shown: + self.assertFalse(widget_msg.is_shown()) self.assertIsNotNone(output) + else: + self.assertTrue(widget_msg.is_shown()) + self.assertIsNone(output) + for msg_id in shown: + msg = messages[msg_id] + self.assertIn(msg, widget_msg.formatted, + f"{msg} not included in the message") self.send_signal(widget.Inputs.evaluation_results, self.results) self._set_combo(widget.controls.score, 1) # CA @@ -558,3 +562,79 @@ def test_report(self): widget = self.widget self.send_signal(widget.Inputs.evaluation_results, self.lenses_results) widget.send_report() + + @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier") + @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner") + def test_single_class(self, *_): + """Curves are not plotted if all data belongs to (non)-target""" + def check_error(shown): + for error in (errors.no_target_class, errors.all_target_class, + errors.nan_classes): + self.assertEqual(error.is_shown(), error is shown, + f"{error} is unexpectedly" + f"{'' if error.is_shown() else ' not'} shown") + if shown is not None: + self.assertEqual(len(widget.plot.items), 0) + else: + self.assertGreater(len(widget.plot.items), 0) + + widget = self.widget + errors = widget.Error + widget.display_rug = True + combo = widget.controls.score + + original_actual = self.results.actual.copy() + self.send_signal(widget.Inputs.evaluation_results, self.results) + widget.selected_classifiers = [0] + for idx in range(combo.count()): + self._set_combo(combo, idx) + self.results.actual[:] = 0 + self.send_signal(widget.Inputs.evaluation_results, self.results) + check_error(errors.no_target_class) + + self.results.actual[:] = 1 + self.send_signal(widget.Inputs.evaluation_results, self.results) + check_error(errors.all_target_class) + + self.results.actual[:] = original_actual + self.results.actual[3] = np.nan + self.send_signal(widget.Inputs.evaluation_results, self.results) + check_error(errors.nan_classes) + + self.results.actual[:] = original_actual + self.send_signal(widget.Inputs.evaluation_results, self.results) + check_error(None) + + @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier") + @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner") + def test_single_class_folds(self, *_): + """Curves for single-class folds are not plotted""" + widget = self.widget + widget.display_rug = False + widget.fold_curves = False + + results = self.lenses_results + results.folds = [slice(0, 5), slice(5, 19)] + results.models = results.models.repeat(2, axis=0) + results.actual[:3] = 0 + results.probabilities[1, 3:5] = np.nan + # after this, model 1 has just negative instances in fold 0 + self.send_signal(widget.Inputs.evaluation_results, results) + self._set_combo(widget.controls.score, 1) # CA + self.assertFalse(widget.Warning.omitted_folds.is_shown()) + widget.controls.fold_curves.click() + self.assertTrue(widget.Warning.omitted_folds.is_shown()) + + @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier") + @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner") + def test_warn_nan_probabilities(self, *_): + """Warn about omitted points with nan probabiities""" + widget = self.widget + widget.display_rug = False + widget.fold_curves = False + + self.results.probabilities[1, 3] = np.nan + self.send_signal(widget.Inputs.evaluation_results, self.results) + self.assertTrue(widget.Warning.omitted_nan_prob_points.is_shown()) + self._set_list_selection(widget.controls.selected_classifiers, [0, 2]) + self.assertFalse(widget.Warning.omitted_folds.is_shown())