Skip to content

Commit

Permalink
Merge pull request #5975 from ales-erjavec/feature-constructor-opt
Browse files Browse the repository at this point in the history
[ENH] Feature constructor optimization
  • Loading branch information
VesnaT authored May 23, 2022
2 parents 25ba55a + 92b2864 commit 67e9629
Show file tree
Hide file tree
Showing 6 changed files with 141 additions and 54 deletions.
39 changes: 39 additions & 0 deletions Orange/data/variable.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re
import warnings
from collections.abc import Iterable
from typing import Sequence

from datetime import datetime, timedelta, timezone
from numbers import Number, Real, Integral
Expand Down Expand Up @@ -168,6 +169,44 @@ def __new__(cls, variable, value=Unknown):
self._value = value
return self

@staticmethod
def _as_values_primitive(variable, data) -> Sequence['Value']:
assert variable.is_primitive()
_Value = Value
_float_new = float.__new__
res = [Value(variable, np.nan)] * len(data)
for i, v in enumerate(data):
v = _float_new(_Value, v)
v.variable = variable
res[i] = v
return res

@staticmethod
def _as_values_non_primitive(variable, data) -> Sequence['Value']:
assert not variable.is_primitive()
_Value = Value
_float_new = float.__new__
data_arr = np.array(data, dtype=object)
NA = data_arr == variable.Unknown
fdata = np.full(len(data), np.finfo(float).min)
fdata[NA] = np.nan
res = [Value(variable, Variable.Unknown)] * len(data)
for i, (v, fval) in enumerate(zip(data, fdata)):
val = _float_new(_Value, fval)
val.variable = variable
val._value = v
res[i] = val
return res

@staticmethod
def _as_values(variable, data):
"""Equivalent but faster then `[Value(variable, v) for v in data]
"""
if variable.is_primitive():
return Value._as_values_primitive(variable, data)
else:
return Value._as_values_non_primitive(variable, data)

def __init__(self, _, __=Unknown):
# __new__ does the job, pylint: disable=super-init-not-called
pass
Expand Down
10 changes: 10 additions & 0 deletions Orange/tests/test_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,13 @@ def test_hash(self):
self.assertTrue(val == v and hash(val) == hash(v))
val = Value(DiscreteVariable("var", ["red", "green", "blue"]), 1)
self.assertRaises(TypeError, hash, val)

def test_as_values(self):
x = ContinuousVariable("x")
values = Value._as_values(x, [0., 1., 2.]) # pylint: disable=protected-access
self.assertIsInstance(values[0], Value)
self.assertEqual(values[0], 0)
s = StringVariable("s")
values = Value._as_values(s, ["a", "b", ""]) # pylint: disable=protected-access
self.assertIsInstance(values[0], Value)
self.assertEqual(values[0], "a")
19 changes: 1 addition & 18 deletions Orange/widgets/data/oweditdomain.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

from Orange.preprocess.transformation import Transformation, Identity, Lookup
from Orange.widgets import widget, gui, settings
from Orange.widgets.utils import itemmodels
from Orange.widgets.utils import itemmodels, ftry
from Orange.widgets.utils.buttons import FixedSizeButton
from Orange.widgets.utils.itemmodels import signal_blocking
from Orange.widgets.utils.widgetpreview import WidgetPreview
Expand All @@ -50,8 +50,6 @@
MArray = np.ma.MaskedArray
DType = Union[np.dtype, type]

A = TypeVar("A") # pylint: disable=invalid-name
B = TypeVar("B") # pylint: disable=invalid-name
V = TypeVar("V", bound=Orange.data.Variable) # pylint: disable=invalid-name
H = TypeVar("H", bound=Hashable) # pylint: disable=invalid-name

Expand Down Expand Up @@ -2631,21 +2629,6 @@ def apply_transform_string(var, trs):
return variable


def ftry(
func: Callable[..., A],
error: Union[Type[BaseException], Tuple[Type[BaseException]]],
default: B
) -> Callable[..., Union[A, B]]:
"""
Wrap a `func` such that if `errors` occur `default` is returned instead."""
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except error:
return default
return wrapper


class DictMissingConst(dict):
"""
`dict` with a constant for `__missing__()` value.
Expand Down
101 changes: 66 additions & 35 deletions Orange/widgets/data/owfeatureconstructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from traceback import format_exception_only
from collections import namedtuple, OrderedDict
from itertools import chain, count
from itertools import chain, count, starmap
from typing import List, Dict, Any

import numpy as np
Expand All @@ -32,15 +32,19 @@
from orangewidget.utils.combobox import ComboBoxSearch

import Orange
from Orange.data import Variable, Table, Value, Instance
from Orange.data.util import get_unique_names
from Orange.widgets import gui
from Orange.widgets.settings import ContextSetting, DomainContextHandler
from Orange.widgets.utils import itemmodels, vartype
from Orange.widgets.utils import (
itemmodels, vartype, ftry, unique_everseen as unique
)
from Orange.widgets.utils.sql import check_sql_input
from Orange.widgets import report
from Orange.widgets.utils.widgetpreview import WidgetPreview
from Orange.widgets.widget import OWWidget, Msg, Input, Output


FeatureDescriptor = \
namedtuple("FeatureDescriptor", ["name", "expression"])

Expand Down Expand Up @@ -729,11 +733,14 @@ def duplicateFeature(self):

@staticmethod
def check_attrs_values(attr, data):
for i in range(len(data)):
for var in attr:
if not math.isnan(data[i, var]) \
and int(data[i, var]) >= len(var.values):
return var.name
for var in attr:
col, _ = data.get_column_view(var)
mask = ~np.isnan(col)
grater_or_equal = np.greater_equal(
col, len(var.values), out=mask, where=mask
)
if grater_or_equal.any():
return var.name
return None

def _validate_descriptors(self, desc):
Expand Down Expand Up @@ -1162,25 +1169,59 @@ def __init__(self, expression, args, extra_env=None, cast=None, use_values=False
self.mask_exceptions = True
self.use_values = use_values

def __call__(self, instance, *_):
if isinstance(instance, Orange.data.Table):
return [self(inst) for inst in instance]
def __call__(self, table, *_):
if isinstance(table, Table):
return self.__call_table(table)
else:
try:
args = [str(instance[var]) if var.is_string
else var.values[int(instance[var])] if var.is_discrete and not self.use_values
else instance[var]
for _, var in self.args]
y = self.func(*args)
# user's expression can contain arbitrary errors
# this also covers missing attributes
except: # pylint: disable=bare-except
if not self.mask_exceptions:
raise
return np.nan
if self.cast:
y = self.cast(y)
return y
return self.__call_instance(table)

def __call_table(self, table):
try:
cols = [self.extract_column(table, var) for _, var in self.args]
except ValueError:
if self.mask_exceptions:
return np.full(len(table), np.nan)
else:
raise

if not cols:
args = [()] * len(table)
else:
args = zip(*cols)
f = self.func
if self.mask_exceptions:
y = list(starmap(ftry(f, Exception, np.nan), args))
else:
y = list(starmap(f, args))
if self.cast is not None:
cast = self.cast
y = [cast(y_) for y_ in y]
return y

def __call_instance(self, instance: Instance):
table = Table.from_numpy(
instance.domain,
np.array([instance.x]),
np.array([instance.y]),
np.array([instance.metas]),
)
return self.__call_table(table)[0]

def extract_column(self, table: Table, var: Variable):
data, _ = table.get_column_view(var)
if var.is_string:
return list(map(var.str_val, data))
elif var.is_discrete and not self.use_values:
values = np.array([*var.values, None], dtype=object)
idx = data.astype(int)
idx[~np.isfinite(data)] = len(values) - 1
return values[idx].tolist()
elif var.is_time: # time always needs Values due to str(val) formatting
return Value._as_values(var, data.tolist()) # pylint: disable=protected-access
elif not self.use_values:
return data.tolist()
else:
return Value._as_values(var, data.tolist()) # pylint: disable=protected-access

def __reduce__(self):
return type(self), (self.expression, self.args,
Expand All @@ -1190,15 +1231,5 @@ def __repr__(self):
return "{0.__name__}{1!r}".format(*self.__reduce__())


def unique(seq):
seen = set()
unique_el = []
for el in seq:
if el not in seen:
unique_el.append(el)
seen.add(el)
return unique_el


if __name__ == "__main__": # pragma: no cover
WidgetPreview(OWFeatureConstructor).run(Orange.data.Table("iris"))
6 changes: 6 additions & 0 deletions Orange/widgets/data/tests/test_owfeatureconstructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,12 @@ def test_missing_variable(self):
self.assertTrue(np.all(np.isnan(r)))
self.assertTrue(np.isnan(f(data2[0])))

def test_time_str(self):
data = Table.from_numpy(Domain([TimeVariable("T", have_date=True)]), [[0], [0]])
f = FeatureFunc("str(T)", [("T", data.domain[0])])
c = f(data)
self.assertEqual(c, ["1970-01-01", "1970-01-01"])

def test_invalid_expression_variable(self):
iris = Table("iris")
f = FeatureFunc("1 / petal_length",
Expand Down
20 changes: 19 additions & 1 deletion Orange/widgets/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import sys
from collections import deque
from typing import (
TypeVar, Callable, Any, Iterable, Optional, Hashable, Type, Union
TypeVar, Callable, Any, Iterable, Optional, Hashable, Type, Union, Tuple
)
from xml.sax.saxutils import escape

Expand Down Expand Up @@ -92,6 +92,8 @@ def qname(type_: type) -> str:

_T1 = TypeVar("_T1") # pylint: disable=invalid-name
_E = TypeVar("_E", bound=enum.Enum) # pylint: disable=invalid-name
_A = TypeVar("_A") # pylint: disable=invalid-name
_B = TypeVar("_B") # pylint: disable=invalid-name


def apply_all(seq, op):
Expand All @@ -101,6 +103,22 @@ def apply_all(seq, op):
deque(map(op, seq), maxlen=0)


def ftry(
func: Callable[..., _A],
error: Union[Type[BaseException], Tuple[Type[BaseException]]],
default: _B
) -> Callable[..., Union[_A, _B]]:
"""
Wrap a `func` such that if `errors` occur `default` is returned instead.
"""
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except error:
return default
return wrapper


def unique_everseen(iterable, key=None):
# type: (Iterable[_T1], Optional[Callable[[_T1], Hashable]]) -> Iterable[_T1]
"""
Expand Down

0 comments on commit 67e9629

Please sign in to comment.