Skip to content

Commit

Permalink
Feature Constructor: Vectorize when possible
Browse files Browse the repository at this point in the history
  • Loading branch information
janezd committed Apr 21, 2022
1 parent 909ba5f commit 449bb82
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 7 deletions.
43 changes: 37 additions & 6 deletions Orange/widgets/data/owfeatureconstructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,12 @@ class FeatureEditor(QFrame):
""".lstrip()

FUNCTIONS = dict(chain([(key, val) for key, val in math.__dict__.items()
# import functions from math - but if the function with the same name
# exists in np, import it from there
# TODO: We need to check that functions are equivalent
# TODO: Do we really want to import all functions from math?
FUNCTIONS = dict(chain([(key, np.__dict__.get(key, val))
for key, val in math.__dict__.items()
if not key.startswith("_")],
[(key, val) for key, val in builtins.__dict__.items()
if key in {"str", "float", "int", "len",
Expand Down Expand Up @@ -1095,9 +1100,12 @@ def make_lambda(expression, args, env=None):
__GLOBALS = {name: getattr(builtins, name) for name in __ALLOWED
if hasattr(builtins, name)}

__GLOBALS.update({name: getattr(math, name) for name in dir(math)
# TODO: See the above TODO's about checking that np and math functions are equivalent etc. :)
__GLOBALS.update({name: getattr(np, name, getattr(math, name))
for name in dir(math)
if not name.startswith("_")})

# TODO: These functions are not shown in the widget's combo. Intentionally?
__GLOBALS.update({
"normalvariate": random.normalvariate,
"gauss": random.gauss,
Expand Down Expand Up @@ -1164,7 +1172,30 @@ def __init__(self, expression, args, extra_env=None, cast=None, use_values=False

def __call__(self, instance, *_):
if isinstance(instance, Orange.data.Table):
return [self(inst) for inst in instance]
y = None
try:
args = []
for _, var in self.args:
col = instance.get_column_view(var)[0]
if var.is_string:
# Strings can't be handled well with numpy.
# For instance, indexing a string column will return
# i-th value instead of i-th character of each value
break
if var.is_discrete and not self.use_values:
col = np.array(var.values)[col]
args.append(col)
else:
y = self.func(*args)
# We allow nans, but infs come from division by zero etc
# However, np.sqrt(-1) is nan.
# TODO: catch warnings and fallback?
if np.any(np.isinf(y)):
y = None
except Exception as err: # pylint: disable=bare-except
pass
if y is None:
return [self(inst) for inst in instance]
else:
try:
args = [str(instance[var]) if var.is_string
Expand All @@ -1178,9 +1209,9 @@ def __call__(self, instance, *_):
if not self.mask_exceptions:
raise
return np.nan
if self.cast:
y = self.cast(y)
return y
if self.cast:
y = self.cast(y)
return y

def __reduce__(self):
return type(self), (self.expression, self.args,
Expand Down
2 changes: 1 addition & 1 deletion Orange/widgets/data/tests/test_owfeatureconstructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ def test_report(self):
class TestFeatureEditor(unittest.TestCase):
def test_has_functions(self):
self.assertIs(FeatureEditor.FUNCTIONS["abs"], abs)
self.assertIs(FeatureEditor.FUNCTIONS["sqrt"], math.sqrt)
self.assertIs(FeatureEditor.FUNCTIONS["sqrt"], np.sqrt)


class FeatureConstructorHandlerTests(unittest.TestCase):
Expand Down

0 comments on commit 449bb82

Please sign in to comment.