diff --git a/Orange/data/variable.py b/Orange/data/variable.py index cec860c36a3..88f979f259d 100644 --- a/Orange/data/variable.py +++ b/Orange/data/variable.py @@ -1,6 +1,7 @@ import re import warnings from collections.abc import Iterable +from typing import Sequence from datetime import datetime, timedelta, timezone from numbers import Number, Real, Integral @@ -168,6 +169,44 @@ def __new__(cls, variable, value=Unknown): self._value = value return self + @staticmethod + def _as_values_primitive(variable, data) -> Sequence['Value']: + assert variable.is_primitive() + _Value = Value + _float_new = float.__new__ + res = [Value(variable, np.nan)] * len(data) + for i, v in enumerate(data): + v = _float_new(_Value, v) + v.variable = variable + res[i] = v + return res + + @staticmethod + def _as_values_non_primitive(variable, data) -> Sequence['Value']: + assert not variable.is_primitive() + _Value = Value + _float_new = float.__new__ + data_arr = np.array(data, dtype=object) + NA = data_arr == variable.Unknown + fdata = np.full(len(data), np.finfo(float).min) + fdata[NA] = np.nan + res = [Value(variable, Variable.Unknown)] * len(data) + for i, (v, fval) in enumerate(zip(data, fdata)): + val = _float_new(_Value, fval) + val.variable = variable + val._value = v + res[i] = val + return res + + @staticmethod + def _as_values(variable, data): + """Equivalent but faster then `[Value(variable, v) for v in data] + """ + if variable.is_primitive(): + return Value._as_values_primitive(variable, data) + else: + return Value._as_values_non_primitive(variable, data) + def __init__(self, _, __=Unknown): # __new__ does the job, pylint: disable=super-init-not-called pass diff --git a/Orange/tests/test_value.py b/Orange/tests/test_value.py index 30a68c4d086..ddf30cbab97 100644 --- a/Orange/tests/test_value.py +++ b/Orange/tests/test_value.py @@ -64,3 +64,13 @@ def test_hash(self): self.assertTrue(val == v and hash(val) == hash(v)) val = Value(DiscreteVariable("var", ["red", "green", "blue"]), 1) self.assertRaises(TypeError, hash, val) + + def test_as_values(self): + x = ContinuousVariable("x") + values = Value._as_values(x, [0., 1., 2.]) # pylint: disable=protected-access + self.assertIsInstance(values[0], Value) + self.assertEqual(values[0], 0) + s = StringVariable("s") + values = Value._as_values(s, ["a", "b", ""]) # pylint: disable=protected-access + self.assertIsInstance(values[0], Value) + self.assertEqual(values[0], "a") diff --git a/Orange/widgets/data/oweditdomain.py b/Orange/widgets/data/oweditdomain.py index fbf0df8f36f..db82a431d95 100644 --- a/Orange/widgets/data/oweditdomain.py +++ b/Orange/widgets/data/oweditdomain.py @@ -40,7 +40,7 @@ from Orange.preprocess.transformation import Transformation, Identity, Lookup from Orange.widgets import widget, gui, settings -from Orange.widgets.utils import itemmodels +from Orange.widgets.utils import itemmodels, ftry from Orange.widgets.utils.buttons import FixedSizeButton from Orange.widgets.utils.itemmodels import signal_blocking from Orange.widgets.utils.widgetpreview import WidgetPreview @@ -50,8 +50,6 @@ MArray = np.ma.MaskedArray DType = Union[np.dtype, type] -A = TypeVar("A") # pylint: disable=invalid-name -B = TypeVar("B") # pylint: disable=invalid-name V = TypeVar("V", bound=Orange.data.Variable) # pylint: disable=invalid-name H = TypeVar("H", bound=Hashable) # pylint: disable=invalid-name @@ -2631,21 +2629,6 @@ def apply_transform_string(var, trs): return variable -def ftry( - func: Callable[..., A], - error: Union[Type[BaseException], Tuple[Type[BaseException]]], - default: B -) -> Callable[..., Union[A, B]]: - """ - Wrap a `func` such that if `errors` occur `default` is returned instead.""" - def wrapper(*args, **kwargs): - try: - return func(*args, **kwargs) - except error: - return default - return wrapper - - class DictMissingConst(dict): """ `dict` with a constant for `__missing__()` value. diff --git a/Orange/widgets/data/owfeatureconstructor.py b/Orange/widgets/data/owfeatureconstructor.py index 5413e144a5d..ec84ae2df98 100644 --- a/Orange/widgets/data/owfeatureconstructor.py +++ b/Orange/widgets/data/owfeatureconstructor.py @@ -18,7 +18,7 @@ from traceback import format_exception_only from collections import namedtuple, OrderedDict -from itertools import chain, count +from itertools import chain, count, starmap from typing import List, Dict, Any import numpy as np @@ -32,15 +32,19 @@ from orangewidget.utils.combobox import ComboBoxSearch import Orange +from Orange.data import Variable, Table, Value, Instance from Orange.data.util import get_unique_names from Orange.widgets import gui from Orange.widgets.settings import ContextSetting, DomainContextHandler -from Orange.widgets.utils import itemmodels, vartype +from Orange.widgets.utils import ( + itemmodels, vartype, ftry, unique_everseen as unique +) from Orange.widgets.utils.sql import check_sql_input from Orange.widgets import report from Orange.widgets.utils.widgetpreview import WidgetPreview from Orange.widgets.widget import OWWidget, Msg, Input, Output + FeatureDescriptor = \ namedtuple("FeatureDescriptor", ["name", "expression"]) @@ -729,11 +733,14 @@ def duplicateFeature(self): @staticmethod def check_attrs_values(attr, data): - for i in range(len(data)): - for var in attr: - if not math.isnan(data[i, var]) \ - and int(data[i, var]) >= len(var.values): - return var.name + for var in attr: + col, _ = data.get_column_view(var) + mask = ~np.isnan(col) + grater_or_equal = np.greater_equal( + col, len(var.values), out=mask, where=mask + ) + if grater_or_equal.any(): + return var.name return None def _validate_descriptors(self, desc): @@ -1162,25 +1169,59 @@ def __init__(self, expression, args, extra_env=None, cast=None, use_values=False self.mask_exceptions = True self.use_values = use_values - def __call__(self, instance, *_): - if isinstance(instance, Orange.data.Table): - return [self(inst) for inst in instance] + def __call__(self, table, *_): + if isinstance(table, Table): + return self.__call_table(table) else: - try: - args = [str(instance[var]) if var.is_string - else var.values[int(instance[var])] if var.is_discrete and not self.use_values - else instance[var] - for _, var in self.args] - y = self.func(*args) - # user's expression can contain arbitrary errors - # this also covers missing attributes - except: # pylint: disable=bare-except - if not self.mask_exceptions: - raise - return np.nan - if self.cast: - y = self.cast(y) - return y + return self.__call_instance(table) + + def __call_table(self, table): + try: + cols = [self.extract_column(table, var) for _, var in self.args] + except ValueError: + if self.mask_exceptions: + return np.full(len(table), np.nan) + else: + raise + + if not cols: + args = [()] * len(table) + else: + args = zip(*cols) + f = self.func + if self.mask_exceptions: + y = list(starmap(ftry(f, Exception, np.nan), args)) + else: + y = list(starmap(f, args)) + if self.cast is not None: + cast = self.cast + y = [cast(y_) for y_ in y] + return y + + def __call_instance(self, instance: Instance): + table = Table.from_numpy( + instance.domain, + np.array([instance.x]), + np.array([instance.y]), + np.array([instance.metas]), + ) + return self.__call_table(table)[0] + + def extract_column(self, table: Table, var: Variable): + data, _ = table.get_column_view(var) + if var.is_string: + return list(map(var.str_val, data)) + elif var.is_discrete and not self.use_values: + values = np.array([*var.values, None], dtype=object) + idx = data.astype(int) + idx[~np.isfinite(data)] = len(values) - 1 + return values[idx].tolist() + elif var.is_time: # time always needs Values due to str(val) formatting + return Value._as_values(var, data.tolist()) # pylint: disable=protected-access + elif not self.use_values: + return data.tolist() + else: + return Value._as_values(var, data.tolist()) # pylint: disable=protected-access def __reduce__(self): return type(self), (self.expression, self.args, @@ -1190,15 +1231,5 @@ def __repr__(self): return "{0.__name__}{1!r}".format(*self.__reduce__()) -def unique(seq): - seen = set() - unique_el = [] - for el in seq: - if el not in seen: - unique_el.append(el) - seen.add(el) - return unique_el - - if __name__ == "__main__": # pragma: no cover WidgetPreview(OWFeatureConstructor).run(Orange.data.Table("iris")) diff --git a/Orange/widgets/data/tests/test_owfeatureconstructor.py b/Orange/widgets/data/tests/test_owfeatureconstructor.py index 11147737598..0f708411f7a 100644 --- a/Orange/widgets/data/tests/test_owfeatureconstructor.py +++ b/Orange/widgets/data/tests/test_owfeatureconstructor.py @@ -305,6 +305,12 @@ def test_missing_variable(self): self.assertTrue(np.all(np.isnan(r))) self.assertTrue(np.isnan(f(data2[0]))) + def test_time_str(self): + data = Table.from_numpy(Domain([TimeVariable("T", have_date=True)]), [[0], [0]]) + f = FeatureFunc("str(T)", [("T", data.domain[0])]) + c = f(data) + self.assertEqual(c, ["1970-01-01", "1970-01-01"]) + def test_invalid_expression_variable(self): iris = Table("iris") f = FeatureFunc("1 / petal_length", diff --git a/Orange/widgets/utils/__init__.py b/Orange/widgets/utils/__init__.py index 8ff68a60994..90bee93ca33 100644 --- a/Orange/widgets/utils/__init__.py +++ b/Orange/widgets/utils/__init__.py @@ -3,7 +3,7 @@ import sys from collections import deque from typing import ( - TypeVar, Callable, Any, Iterable, Optional, Hashable, Type, Union + TypeVar, Callable, Any, Iterable, Optional, Hashable, Type, Union, Tuple ) from xml.sax.saxutils import escape @@ -92,6 +92,8 @@ def qname(type_: type) -> str: _T1 = TypeVar("_T1") # pylint: disable=invalid-name _E = TypeVar("_E", bound=enum.Enum) # pylint: disable=invalid-name +_A = TypeVar("_A") # pylint: disable=invalid-name +_B = TypeVar("_B") # pylint: disable=invalid-name def apply_all(seq, op): @@ -101,6 +103,22 @@ def apply_all(seq, op): deque(map(op, seq), maxlen=0) +def ftry( + func: Callable[..., _A], + error: Union[Type[BaseException], Tuple[Type[BaseException]]], + default: _B +) -> Callable[..., Union[_A, _B]]: + """ + Wrap a `func` such that if `errors` occur `default` is returned instead. + """ + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except error: + return default + return wrapper + + def unique_everseen(iterable, key=None): # type: (Iterable[_T1], Optional[Callable[[_T1], Hashable]]) -> Iterable[_T1] """