From 449bb82c8a2dc2ab2380b709882744453770ce33 Mon Sep 17 00:00:00 2001 From: janezd Date: Wed, 20 Apr 2022 21:24:36 +0200 Subject: [PATCH] Feature Constructor: Vectorize when possible --- Orange/widgets/data/owfeatureconstructor.py | 43 ++++++++++++++++--- .../data/tests/test_owfeatureconstructor.py | 2 +- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/Orange/widgets/data/owfeatureconstructor.py b/Orange/widgets/data/owfeatureconstructor.py index 5413e144a5d..8fddf6a1723 100644 --- a/Orange/widgets/data/owfeatureconstructor.py +++ b/Orange/widgets/data/owfeatureconstructor.py @@ -108,7 +108,12 @@ class FeatureEditor(QFrame): """.lstrip() - FUNCTIONS = dict(chain([(key, val) for key, val in math.__dict__.items() + # import functions from math - but if the function with the same name + # exists in np, import it from there + # TODO: We need to check that functions are equivalent + # TODO: Do we really want to import all functions from math? + FUNCTIONS = dict(chain([(key, np.__dict__.get(key, val)) + for key, val in math.__dict__.items() if not key.startswith("_")], [(key, val) for key, val in builtins.__dict__.items() if key in {"str", "float", "int", "len", @@ -1095,9 +1100,12 @@ def make_lambda(expression, args, env=None): __GLOBALS = {name: getattr(builtins, name) for name in __ALLOWED if hasattr(builtins, name)} -__GLOBALS.update({name: getattr(math, name) for name in dir(math) +# TODO: See the above TODO's about checking that np and math functions are equivalent etc. :) +__GLOBALS.update({name: getattr(np, name, getattr(math, name)) + for name in dir(math) if not name.startswith("_")}) +# TODO: These functions are not shown in the widget's combo. Intentionally? __GLOBALS.update({ "normalvariate": random.normalvariate, "gauss": random.gauss, @@ -1164,7 +1172,30 @@ def __init__(self, expression, args, extra_env=None, cast=None, use_values=False def __call__(self, instance, *_): if isinstance(instance, Orange.data.Table): - return [self(inst) for inst in instance] + y = None + try: + args = [] + for _, var in self.args: + col = instance.get_column_view(var)[0] + if var.is_string: + # Strings can't be handled well with numpy. + # For instance, indexing a string column will return + # i-th value instead of i-th character of each value + break + if var.is_discrete and not self.use_values: + col = np.array(var.values)[col] + args.append(col) + else: + y = self.func(*args) + # We allow nans, but infs come from division by zero etc + # However, np.sqrt(-1) is nan. + # TODO: catch warnings and fallback? + if np.any(np.isinf(y)): + y = None + except Exception as err: # pylint: disable=bare-except + pass + if y is None: + return [self(inst) for inst in instance] else: try: args = [str(instance[var]) if var.is_string @@ -1178,9 +1209,9 @@ def __call__(self, instance, *_): if not self.mask_exceptions: raise return np.nan - if self.cast: - y = self.cast(y) - return y + if self.cast: + y = self.cast(y) + return y def __reduce__(self): return type(self), (self.expression, self.args, diff --git a/Orange/widgets/data/tests/test_owfeatureconstructor.py b/Orange/widgets/data/tests/test_owfeatureconstructor.py index 11147737598..5f01a022395 100644 --- a/Orange/widgets/data/tests/test_owfeatureconstructor.py +++ b/Orange/widgets/data/tests/test_owfeatureconstructor.py @@ -500,7 +500,7 @@ def test_report(self): class TestFeatureEditor(unittest.TestCase): def test_has_functions(self): self.assertIs(FeatureEditor.FUNCTIONS["abs"], abs) - self.assertIs(FeatureEditor.FUNCTIONS["sqrt"], math.sqrt) + self.assertIs(FeatureEditor.FUNCTIONS["sqrt"], np.sqrt) class FeatureConstructorHandlerTests(unittest.TestCase):