Skip to content

Commit

Permalink
Merge pull request #3346 from VesnaT/owtransform
Browse files Browse the repository at this point in the history
[ENH] Transform: Add new widget
  • Loading branch information
BlazZupan authored Nov 12, 2018
2 parents aea66c6 + 741245f commit 8690622
Show file tree
Hide file tree
Showing 6 changed files with 252 additions and 1 deletion.
12 changes: 12 additions & 0 deletions Orange/preprocess/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,18 @@ def transform(var):
return data.transform(domain)


class ApplyDomain(Preprocess):
def __init__(self, domain, name):
self._domain = domain
self._name = name

def __call__(self, data):
return data.transform(self._domain)

def __str__(self):
return self._name


class PreprocessorList(Preprocess):
"""
Store a list of preprocessors and on call apply them to the dataset.
Expand Down
15 changes: 15 additions & 0 deletions Orange/widgets/data/icons/Transform.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
112 changes: 112 additions & 0 deletions Orange/widgets/data/owtransform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
from Orange.data import Table
from Orange.preprocess.preprocess import Preprocess, Discretize
from Orange.widgets import gui
from Orange.widgets.utils.sql import check_sql_input
from Orange.widgets.widget import OWWidget, Input, Output, Msg


class OWTransform(OWWidget):
name = "Transform"
description = "Transform data table."
icon = "icons/Transform.svg"
priority = 2110
keywords = []

class Inputs:
data = Input("Data", Table, default=True)
preprocessor = Input("Preprocessor", Preprocess)

class Outputs:
transformed_data = Output("Transformed Data", Table)

class Error(OWWidget.Error):
pp_error = Msg("An error occurred while transforming data.\n{}")

resizing_enabled = False
want_main_area = False

def __init__(self):
super().__init__()
self.data = None
self.preprocessor = None
self.transformed_data = None

info_box = gui.widgetBox(self.controlArea, "Info")
self.input_label = gui.widgetLabel(info_box, "")
self.preprocessor_label = gui.widgetLabel(info_box, "")
self.output_label = gui.widgetLabel(info_box, "")
self.set_input_label_text()
self.set_preprocessor_label_text()

def set_input_label_text(self):
text = "No data on input."
if self.data is not None:
text = "Input data with {:,} instances and {:,} features.".format(
len(self.data),
len(self.data.domain.attributes))
self.input_label.setText(text)

def set_preprocessor_label_text(self):
text = "No preprocessor on input."
if self.transformed_data is not None:
text = "Preprocessor {} applied.".format(self.preprocessor)
elif self.preprocessor is not None:
text = "Preprocessor {} on input.".format(self.preprocessor)
self.preprocessor_label.setText(text)

def set_output_label_text(self):
text = ""
if self.transformed_data:
text = "Output data includes {:,} features.".format(
len(self.transformed_data.domain.attributes))
self.output_label.setText(text)

@Inputs.data
@check_sql_input
def set_data(self, data):
self.data = data
self.set_input_label_text()

@Inputs.preprocessor
def set_preprocessor(self, preprocessor):
self.preprocessor = preprocessor

def handleNewSignals(self):
self.apply()

def apply(self):
self.clear_messages()
self.transformed_data = None
if self.data is not None and self.preprocessor is not None:
try:
self.transformed_data = self.preprocessor(self.data)
except Exception as ex: # pylint: disable=broad-except
self.Error.pp_error(ex)
self.Outputs.transformed_data.send(self.transformed_data)

self.set_preprocessor_label_text()
self.set_output_label_text()

def send_report(self):
if self.preprocessor is not None:
self.report_items("Settings",
(("Preprocessor", self.preprocessor),))
if self.data is not None:
self.report_data("Data", self.data)
if self.transformed_data is not None:
self.report_data("Transformed data", self.transformed_data)


if __name__ == "__main__":
from AnyQt.QtWidgets import QApplication

app = QApplication([])
ow = OWTransform()
d = Table("iris")
pp = Discretize()
ow.set_data(d)
ow.set_preprocessor(pp)
ow.handleNewSignals()
ow.show()
app.exec_()
ow.saveSettings()
92 changes: 92 additions & 0 deletions Orange/widgets/data/tests/test_owtransform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Test methods with long descriptive names can omit docstrings
# pylint: disable=missing-docstring
from Orange.data import Table
from Orange.preprocess import Discretize
from Orange.preprocess.preprocess import Preprocess
from Orange.widgets.data.owtransform import OWTransform
from Orange.widgets.tests.base import WidgetTest
from Orange.widgets.unsupervised.owpca import OWPCA


class TestOWTransform(WidgetTest):
def setUp(self):
self.widget = self.create_widget(OWTransform)
self.data = Table("iris")
self.preprocessor = Discretize()

def test_output(self):
# send data and preprocessor
self.send_signal(self.widget.Inputs.data, self.data)
self.send_signal(self.widget.Inputs.preprocessor, self.preprocessor)
output = self.get_output(self.widget.Outputs.transformed_data)
self.assertIsInstance(output, Table)
self.assertEqual("Input data with 150 instances and 4 features.",
self.widget.input_label.text())
self.assertEqual("Preprocessor Discretize() applied.",
self.widget.preprocessor_label.text())
self.assertEqual("Output data includes 4 features.",
self.widget.output_label.text())

# remove preprocessor
self.send_signal(self.widget.Inputs.preprocessor, None)
output = self.get_output(self.widget.Outputs.transformed_data)
self.assertIsNone(output)
self.assertEqual("Input data with 150 instances and 4 features.",
self.widget.input_label.text())
self.assertEqual("No preprocessor on input.", self.widget.preprocessor_label.text())
self.assertEqual("", self.widget.output_label.text())

# send preprocessor
self.send_signal(self.widget.Inputs.preprocessor, self.preprocessor)
output = self.get_output(self.widget.Outputs.transformed_data)
self.assertIsInstance(output, Table)
self.assertEqual("Input data with 150 instances and 4 features.",
self.widget.input_label.text())
self.assertEqual("Preprocessor Discretize() applied.",
self.widget.preprocessor_label.text())
self.assertEqual("Output data includes 4 features.",
self.widget.output_label.text())

# remove data
self.send_signal(self.widget.Inputs.data, None)
output = self.get_output(self.widget.Outputs.transformed_data)
self.assertIsNone(output)
self.assertEqual("No data on input.", self.widget.input_label.text())
self.assertEqual("Preprocessor Discretize() on input.",
self.widget.preprocessor_label.text())
self.assertEqual("", self.widget.output_label.text())

# remove preprocessor
self.send_signal(self.widget.Inputs.preprocessor, None)
self.assertEqual("No data on input.", self.widget.input_label.text())
self.assertEqual("No preprocessor on input.",
self.widget.preprocessor_label.text())
self.assertEqual("", self.widget.output_label.text())

def test_input_pca_preprocessor(self):
owpca = self.create_widget(OWPCA)
self.send_signal(owpca.Inputs.data, self.data, widget=owpca)
owpca.components_spin.setValue(2)
pp = self.get_output(owpca.Outputs.preprocessor, widget=owpca)
self.assertIsNotNone(pp, Preprocess)

self.send_signal(self.widget.Inputs.data, self.data)
self.send_signal(self.widget.Inputs.preprocessor, pp)
output = self.get_output(self.widget.Outputs.transformed_data)
self.assertIsInstance(output, Table)
self.assertEqual(output.X.shape, (len(self.data), 2))

def test_error_transforming(self):
self.send_signal(self.widget.Inputs.data, self.data)
self.send_signal(self.widget.Inputs.preprocessor, Preprocess())
self.assertTrue(self.widget.Error.pp_error.is_shown())
output = self.get_output(self.widget.Outputs.transformed_data)
self.assertIsNone(output)
self.send_signal(self.widget.Inputs.data, None)
self.assertFalse(self.widget.Error.pp_error.is_shown())

def test_send_report(self):
self.send_signal(self.widget.Inputs.data, self.data)
self.widget.report_button.click()
self.send_signal(self.widget.Inputs.data, None)
self.widget.report_button.click()
8 changes: 7 additions & 1 deletion Orange/widgets/unsupervised/owpca.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from Orange.data import Table, Domain, StringVariable, ContinuousVariable
from Orange.data.sql.table import SqlTable, AUTO_DL_LIMIT
from Orange.preprocess import Normalize
from Orange.preprocess.preprocess import Preprocess, ApplyDomain
from Orange.projection import PCA, TruncatedSVD
from Orange.widgets import widget, gui, settings
from Orange.widgets.widget import Input, Output
Expand Down Expand Up @@ -44,6 +45,7 @@ class Outputs:
transformed_data = Output("Transformed data", Table)
components = Output("Components", Table)
pca = Output("PCA", PCA, dynamic=False)
preprocessor = Output("Preprocessor", Preprocess)

settingsHandler = settings.DomainContextHandler()

Expand Down Expand Up @@ -290,6 +292,7 @@ def clear_outputs(self):
self.Outputs.transformed_data.send(None)
self.Outputs.components.send(None)
self.Outputs.pca.send(self._pca_projector)
self.Outputs.preprocessor.send(None)

def get_model(self):
if self.rpca is None:
Expand Down Expand Up @@ -455,7 +458,7 @@ def _update_axis(self):
axis.setTicks([[(i, str(i+1)) for i in range(0, p, d)]])

def commit(self):
transformed = components = None
transformed = components = pp = None
if self._pca is not None:
if self._transformed is None:
# Compute the full transform (MAX_COMPONENTS components) only once.
Expand All @@ -479,10 +482,13 @@ def commit(self):
metas=metas)
components.name = 'components'

pp = ApplyDomain(domain, "PCA")

self._pca_projector.component = self.ncomponents
self.Outputs.transformed_data.send(transformed)
self.Outputs.components.send(components)
self.Outputs.pca.send(self._pca_projector)
self.Outputs.preprocessor.send(pp)

def send_report(self):
if self.data is None:
Expand Down
14 changes: 14 additions & 0 deletions Orange/widgets/unsupervised/tests/test_owpca.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import scipy.sparse as sp

from Orange.data import Table, Domain, ContinuousVariable, TimeVariable
from Orange.preprocess.preprocess import Preprocess
from Orange.widgets.tests.base import WidgetTest
from Orange.widgets.unsupervised.owpca import OWPCA, DECOMPOSITIONS

Expand Down Expand Up @@ -131,3 +132,16 @@ def test_do_not_mask_features(self):
self.widget.set_data(data)
ndata = Table("iris.tab")
self.assertEqual(data.domain[0], ndata.domain[0])

def test_output_preprocessor(self):
data = Table("iris")
self.send_signal(self.widget.Inputs.data, data)
pp = self.get_output(self.widget.Outputs.preprocessor)
self.assertIsInstance(pp, Preprocess)
transformed_data = pp(data[::10])
self.assertIsInstance(transformed_data, Table)
self.assertEqual(transformed_data.X.shape, (15, 2))
output = self.get_output(self.widget.Outputs.transformed_data)
np.testing.assert_array_equal(transformed_data.X, output.X[::10])
self.assertEqual([a.name for a in transformed_data.domain.attributes],
[m.name for m in output.domain.attributes])

0 comments on commit 8690622

Please sign in to comment.