From 1d80036fd7b1d4091e8bdd814336e0dc26e81d57 Mon Sep 17 00:00:00 2001 From: Marko Toplak Date: Thu, 7 Dec 2023 16:32:33 +0100 Subject: [PATCH 1/3] Fix impute.Model for derived domains The compute_value was missing transformation into the variable space it was working upon. --- Orange/preprocess/impute.py | 10 ++++++---- Orange/tests/test_impute.py | 23 ++++++++++++++++++++++- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/Orange/preprocess/impute.py b/Orange/preprocess/impute.py index c67c4a97434..21005b87247 100644 --- a/Orange/preprocess/impute.py +++ b/Orange/preprocess/impute.py @@ -172,7 +172,7 @@ def copy(self): return FixedValueByType(*self.defaults.values()) -class ReplaceUnknownsModel(Reprable): +class ReplaceUnknownsModel(Transformation): """ Replace unknown values with predicted values using a `Orange.base.Model` @@ -185,15 +185,14 @@ class ReplaceUnknownsModel(Reprable): """ def __init__(self, variable, model): assert model.domain.class_var == variable - self.variable = variable + super().__init__(variable) self.model = model def __call__(self, data): if isinstance(data, Orange.data.Instance): data = Orange.data.Table.from_list(data.domain, [data]) domain = data.domain - column = data.get_column(self.variable, copy=True) - + column = data.transform(self._target_domain).get_column(self.variable, copy=True) mask = np.isnan(column) if not np.any(mask): return column @@ -207,6 +206,9 @@ def __call__(self, data): column[mask] = predicted return column + def transform(self, c): + assert False, "abstract in Transformation, never used here" + def __eq__(self, other): return type(self) is type(other) \ and self.variable == other.variable \ diff --git a/Orange/tests/test_impute.py b/Orange/tests/test_impute.py index 9c6fba5b336..82d217ebe70 100644 --- a/Orange/tests/test_impute.py +++ b/Orange/tests/test_impute.py @@ -9,7 +9,7 @@ from Orange import preprocess from Orange.preprocess import impute, SklImpute from Orange import data -from Orange.data import Unknown, Table +from Orange.data import Unknown, Table, Domain from Orange.classification import MajorityLearner, SimpleTreeLearner from Orange.regression import MeanLearner @@ -293,6 +293,27 @@ def test_bad_domain(self): self.assertRaises(ValueError, imputer, data=table, variable=table.domain[0]) + def test_missing_imputed_columns(self): + housing = Table("housing") + + learner = SimpleTreeLearner(min_instances=10, max_depth=10) + method = preprocess.impute.Model(learner) + + ivar = method(housing, housing.domain.attributes[0]) + imputed = housing.transform( + Domain([ivar], + housing.domain.class_var) + ) + removed_imputed = imputed.transform( + Domain([], housing.domain.class_var)) + + r = removed_imputed.transform(imputed.domain) + + no_class = removed_imputed.transform(Domain(removed_imputed.domain.attributes, None)) + model_prediction_for_unknowns = ivar.compute_value.model(no_class[0]) + + np.testing.assert_equal(r.X, model_prediction_for_unknowns) + class TestRandom(unittest.TestCase): def test_replacement(self): From 3752aeaafbd5f8a33966b495d3ac96fbe89e1d35 Mon Sep 17 00:00:00 2001 From: Vesna Tanko Date: Fri, 8 Dec 2023 13:46:59 +0100 Subject: [PATCH 2/3] ReplaceUnknownsModel: Handle removed target imputation --- Orange/preprocess/impute.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Orange/preprocess/impute.py b/Orange/preprocess/impute.py index 21005b87247..5f3bb972e1b 100644 --- a/Orange/preprocess/impute.py +++ b/Orange/preprocess/impute.py @@ -2,6 +2,7 @@ import scipy.sparse as sp import Orange.data +from Orange.data.table import DomainTransformationError from Orange.statistics import distribution, basic_stats from Orange.util import Reprable from .transformation import Transformation, Lookup @@ -202,8 +203,12 @@ def __call__(self, data): data = data.transform( Orange.data.Domain(domain.attributes, None, domain.metas) ) - predicted = self.model(data[mask]) - column[mask] = predicted + try: + column[mask] = self.model(data[mask]) + except DomainTransformationError: + # owpredictions showed error when imputing target using a Model + # based imputer (owpredictions removes the target before predicing) + pass return column def transform(self, c): From 0547537b76b0d9e50778423a869dcf284b5ec470 Mon Sep 17 00:00:00 2001 From: Marko Toplak Date: Fri, 8 Dec 2023 15:47:33 +0100 Subject: [PATCH 3/3] satisfy trubar --- i18n/si.jaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/i18n/si.jaml b/i18n/si.jaml index 4ca4743d9a0..a04703c0d76 100644 --- a/i18n/si.jaml +++ b/i18n/si.jaml @@ -2685,6 +2685,9 @@ preprocess/impute.py: def `__call__`: "'{}' has no values": false "'{}' has an unknown distribution": false + class `ReplaceUnknownsModel`: + def `transform`: + abstract in Transformation, never used here: false preprocess/normalize.py: Normalizer: false preprocess/preprocess.py: