diff --git a/Orange/preprocess/preprocess.py b/Orange/preprocess/preprocess.py index 0600258babc..b800d14e104 100644 --- a/Orange/preprocess/preprocess.py +++ b/Orange/preprocess/preprocess.py @@ -488,6 +488,18 @@ def transform(var): return data.transform(domain) +class ApplyDomain(Preprocess): + def __init__(self, domain, name): + self._domain = domain + self._name = name + + def __call__(self, data): + return data.transform(self._domain) + + def __str__(self): + return self._name + + class PreprocessorList(Preprocess): """ Store a list of preprocessors and on call apply them to the dataset. diff --git a/Orange/widgets/data/icons/Transform.svg b/Orange/widgets/data/icons/Transform.svg new file mode 100644 index 00000000000..60059cdcf58 --- /dev/null +++ b/Orange/widgets/data/icons/Transform.svg @@ -0,0 +1,15 @@ + + + + + + + + + + diff --git a/Orange/widgets/data/owtransform.py b/Orange/widgets/data/owtransform.py new file mode 100644 index 00000000000..6523eb21ca3 --- /dev/null +++ b/Orange/widgets/data/owtransform.py @@ -0,0 +1,112 @@ +from Orange.data import Table +from Orange.preprocess.preprocess import Preprocess, Discretize +from Orange.widgets import gui +from Orange.widgets.utils.sql import check_sql_input +from Orange.widgets.widget import OWWidget, Input, Output, Msg + + +class OWTransform(OWWidget): + name = "Transform" + description = "Transform data table." + icon = "icons/Transform.svg" + priority = 2110 + keywords = [] + + class Inputs: + data = Input("Data", Table, default=True) + preprocessor = Input("Preprocessor", Preprocess) + + class Outputs: + transformed_data = Output("Transformed Data", Table) + + class Error(OWWidget.Error): + pp_error = Msg("An error occurred while transforming data.\n{}") + + resizing_enabled = False + want_main_area = False + + def __init__(self): + super().__init__() + self.data = None + self.preprocessor = None + self.transformed_data = None + + info_box = gui.widgetBox(self.controlArea, "Info") + self.input_label = gui.widgetLabel(info_box, "") + self.preprocessor_label = gui.widgetLabel(info_box, "") + self.output_label = gui.widgetLabel(info_box, "") + self.set_input_label_text() + self.set_preprocessor_label_text() + + def set_input_label_text(self): + text = "No data on input." + if self.data is not None: + text = "Input data with {:,} instances and {:,} features.".format( + len(self.data), + len(self.data.domain.attributes)) + self.input_label.setText(text) + + def set_preprocessor_label_text(self): + text = "No preprocessor on input." + if self.transformed_data is not None: + text = "Preprocessor {} applied.".format(self.preprocessor) + elif self.preprocessor is not None: + text = "Preprocessor {} on input.".format(self.preprocessor) + self.preprocessor_label.setText(text) + + def set_output_label_text(self): + text = "" + if self.transformed_data: + text = "Output data includes {:,} features.".format( + len(self.transformed_data.domain.attributes)) + self.output_label.setText(text) + + @Inputs.data + @check_sql_input + def set_data(self, data): + self.data = data + self.set_input_label_text() + + @Inputs.preprocessor + def set_preprocessor(self, preprocessor): + self.preprocessor = preprocessor + + def handleNewSignals(self): + self.apply() + + def apply(self): + self.clear_messages() + self.transformed_data = None + if self.data is not None and self.preprocessor is not None: + try: + self.transformed_data = self.preprocessor(self.data) + except Exception as ex: # pylint: disable=broad-except + self.Error.pp_error(ex) + self.Outputs.transformed_data.send(self.transformed_data) + + self.set_preprocessor_label_text() + self.set_output_label_text() + + def send_report(self): + if self.preprocessor is not None: + self.report_items("Settings", + (("Preprocessor", self.preprocessor),)) + if self.data is not None: + self.report_data("Data", self.data) + if self.transformed_data is not None: + self.report_data("Transformed data", self.transformed_data) + + +if __name__ == "__main__": + from AnyQt.QtWidgets import QApplication + + app = QApplication([]) + ow = OWTransform() + d = Table("iris") + pp = Discretize() + ow.set_data(d) + ow.set_preprocessor(pp) + ow.handleNewSignals() + ow.show() + app.exec_() + ow.saveSettings() diff --git a/Orange/widgets/data/tests/test_owtransform.py b/Orange/widgets/data/tests/test_owtransform.py new file mode 100644 index 00000000000..cd24a8c7cbd --- /dev/null +++ b/Orange/widgets/data/tests/test_owtransform.py @@ -0,0 +1,92 @@ +# Test methods with long descriptive names can omit docstrings +# pylint: disable=missing-docstring +from Orange.data import Table +from Orange.preprocess import Discretize +from Orange.preprocess.preprocess import Preprocess +from Orange.widgets.data.owtransform import OWTransform +from Orange.widgets.tests.base import WidgetTest +from Orange.widgets.unsupervised.owpca import OWPCA + + +class TestOWTransform(WidgetTest): + def setUp(self): + self.widget = self.create_widget(OWTransform) + self.data = Table("iris") + self.preprocessor = Discretize() + + def test_output(self): + # send data and preprocessor + self.send_signal(self.widget.Inputs.data, self.data) + self.send_signal(self.widget.Inputs.preprocessor, self.preprocessor) + output = self.get_output(self.widget.Outputs.transformed_data) + self.assertIsInstance(output, Table) + self.assertEqual("Input data with 150 instances and 4 features.", + self.widget.input_label.text()) + self.assertEqual("Preprocessor Discretize() applied.", + self.widget.preprocessor_label.text()) + self.assertEqual("Output data includes 4 features.", + self.widget.output_label.text()) + + # remove preprocessor + self.send_signal(self.widget.Inputs.preprocessor, None) + output = self.get_output(self.widget.Outputs.transformed_data) + self.assertIsNone(output) + self.assertEqual("Input data with 150 instances and 4 features.", + self.widget.input_label.text()) + self.assertEqual("No preprocessor on input.", self.widget.preprocessor_label.text()) + self.assertEqual("", self.widget.output_label.text()) + + # send preprocessor + self.send_signal(self.widget.Inputs.preprocessor, self.preprocessor) + output = self.get_output(self.widget.Outputs.transformed_data) + self.assertIsInstance(output, Table) + self.assertEqual("Input data with 150 instances and 4 features.", + self.widget.input_label.text()) + self.assertEqual("Preprocessor Discretize() applied.", + self.widget.preprocessor_label.text()) + self.assertEqual("Output data includes 4 features.", + self.widget.output_label.text()) + + # remove data + self.send_signal(self.widget.Inputs.data, None) + output = self.get_output(self.widget.Outputs.transformed_data) + self.assertIsNone(output) + self.assertEqual("No data on input.", self.widget.input_label.text()) + self.assertEqual("Preprocessor Discretize() on input.", + self.widget.preprocessor_label.text()) + self.assertEqual("", self.widget.output_label.text()) + + # remove preprocessor + self.send_signal(self.widget.Inputs.preprocessor, None) + self.assertEqual("No data on input.", self.widget.input_label.text()) + self.assertEqual("No preprocessor on input.", + self.widget.preprocessor_label.text()) + self.assertEqual("", self.widget.output_label.text()) + + def test_input_pca_preprocessor(self): + owpca = self.create_widget(OWPCA) + self.send_signal(owpca.Inputs.data, self.data, widget=owpca) + owpca.components_spin.setValue(2) + pp = self.get_output(owpca.Outputs.preprocessor, widget=owpca) + self.assertIsNotNone(pp, Preprocess) + + self.send_signal(self.widget.Inputs.data, self.data) + self.send_signal(self.widget.Inputs.preprocessor, pp) + output = self.get_output(self.widget.Outputs.transformed_data) + self.assertIsInstance(output, Table) + self.assertEqual(output.X.shape, (len(self.data), 2)) + + def test_error_transforming(self): + self.send_signal(self.widget.Inputs.data, self.data) + self.send_signal(self.widget.Inputs.preprocessor, Preprocess()) + self.assertTrue(self.widget.Error.pp_error.is_shown()) + output = self.get_output(self.widget.Outputs.transformed_data) + self.assertIsNone(output) + self.send_signal(self.widget.Inputs.data, None) + self.assertFalse(self.widget.Error.pp_error.is_shown()) + + def test_send_report(self): + self.send_signal(self.widget.Inputs.data, self.data) + self.widget.report_button.click() + self.send_signal(self.widget.Inputs.data, None) + self.widget.report_button.click() diff --git a/Orange/widgets/unsupervised/owpca.py b/Orange/widgets/unsupervised/owpca.py index 434c05398ce..4656e481e3e 100644 --- a/Orange/widgets/unsupervised/owpca.py +++ b/Orange/widgets/unsupervised/owpca.py @@ -10,6 +10,7 @@ from Orange.data import Table, Domain, StringVariable, ContinuousVariable from Orange.data.sql.table import SqlTable, AUTO_DL_LIMIT from Orange.preprocess import Normalize +from Orange.preprocess.preprocess import Preprocess, ApplyDomain from Orange.projection import PCA, TruncatedSVD from Orange.widgets import widget, gui, settings from Orange.widgets.widget import Input, Output @@ -44,6 +45,7 @@ class Outputs: transformed_data = Output("Transformed data", Table) components = Output("Components", Table) pca = Output("PCA", PCA, dynamic=False) + preprocessor = Output("Preprocessor", Preprocess) settingsHandler = settings.DomainContextHandler() @@ -290,6 +292,7 @@ def clear_outputs(self): self.Outputs.transformed_data.send(None) self.Outputs.components.send(None) self.Outputs.pca.send(self._pca_projector) + self.Outputs.preprocessor.send(None) def get_model(self): if self.rpca is None: @@ -455,7 +458,7 @@ def _update_axis(self): axis.setTicks([[(i, str(i+1)) for i in range(0, p, d)]]) def commit(self): - transformed = components = None + transformed = components = pp = None if self._pca is not None: if self._transformed is None: # Compute the full transform (MAX_COMPONENTS components) only once. @@ -479,10 +482,13 @@ def commit(self): metas=metas) components.name = 'components' + pp = ApplyDomain(domain, "PCA") + self._pca_projector.component = self.ncomponents self.Outputs.transformed_data.send(transformed) self.Outputs.components.send(components) self.Outputs.pca.send(self._pca_projector) + self.Outputs.preprocessor.send(pp) def send_report(self): if self.data is None: diff --git a/Orange/widgets/unsupervised/tests/test_owpca.py b/Orange/widgets/unsupervised/tests/test_owpca.py index adea6b808c6..6bf4a050e04 100644 --- a/Orange/widgets/unsupervised/tests/test_owpca.py +++ b/Orange/widgets/unsupervised/tests/test_owpca.py @@ -4,6 +4,7 @@ import scipy.sparse as sp from Orange.data import Table, Domain, ContinuousVariable, TimeVariable +from Orange.preprocess.preprocess import Preprocess from Orange.widgets.tests.base import WidgetTest from Orange.widgets.unsupervised.owpca import OWPCA, DECOMPOSITIONS @@ -131,3 +132,16 @@ def test_do_not_mask_features(self): self.widget.set_data(data) ndata = Table("iris.tab") self.assertEqual(data.domain[0], ndata.domain[0]) + + def test_output_preprocessor(self): + data = Table("iris") + self.send_signal(self.widget.Inputs.data, data) + pp = self.get_output(self.widget.Outputs.preprocessor) + self.assertIsInstance(pp, Preprocess) + transformed_data = pp(data[::10]) + self.assertIsInstance(transformed_data, Table) + self.assertEqual(transformed_data.X.shape, (15, 2)) + output = self.get_output(self.widget.Outputs.transformed_data) + np.testing.assert_array_equal(transformed_data.X, output.X[::10]) + self.assertEqual([a.name for a in transformed_data.domain.attributes], + [m.name for m in output.domain.attributes])