diff --git a/Orange/data/table.py b/Orange/data/table.py index a4afe265e45..6fb4efb9122 100644 --- a/Orange/data/table.py +++ b/Orange/data/table.py @@ -50,11 +50,25 @@ def __init__(self): # here instead of as a class variable of a Table so that caching also works # with descendants of Table. self.conversion_cache = None + self.domain_cache = None _thread_local = _ThreadLocal() +def _idcache_save(cachedict, keys, value): + cachedict[tuple(map(id, keys))] = \ + value, [weakref.ref(k) for k in keys] + + +def _idcache_restore(cachedict, keys): + shared, weakrefs = cachedict.get(tuple(map(id, keys)), (None, [])) + for r in weakrefs: + if r() is None: + return None + return shared + + class DomainTransformationError(Exception): pass @@ -179,6 +193,153 @@ def __init__(self, domain): setattr(self, v.name.replace(" ", "_"), v) +class _ArrayConversion: + + def __init__(self, target, src_cols, variables, is_sparse, source_domain): + self.target = target + self.src_cols = src_cols + self.is_sparse = is_sparse + self.subarray_from = self._can_copy_all(src_cols, source_domain) + self.variables = variables + dtype = np.float64 + if any(isinstance(var, StringVariable) for var in self.variables): + dtype = object + self.dtype = dtype + self.row_selection_needed = any(not isinstance(x, Integral) + for x in src_cols) + + def _can_copy_all(self, src_cols, source_domain): + n_src_attrs = len(source_domain.attributes) + if all(isinstance(x, Integral) and 0 <= x < n_src_attrs + for x in src_cols): + return "X" + if all(isinstance(x, Integral) and x < 0 for x in src_cols): + return "metas" + if all(isinstance(x, Integral) and x >= n_src_attrs + for x in src_cols): + return "Y" + + def get_subarray(self, source, row_indices, n_rows): + if not len(self.src_cols): + if self.is_sparse: + return sp.csr_matrix((n_rows, 0), dtype=source.X.dtype) + else: + return np.zeros((n_rows, 0), dtype=source.X.dtype) + + match_density = assure_array_sparse if self.is_sparse else assure_array_dense + n_src_attrs = len(source.domain.attributes) + if self.subarray_from == "X": + arr = match_density(_subarray(source.X, row_indices, self.src_cols)) + elif self.subarray_from == "metas": + arr = match_density(_subarray(source.metas, row_indices, + [-1 - x for x in self.src_cols])) + elif self.subarray_from == "Y": + arr = match_density(_subarray( + source._Y, row_indices, + [x - n_src_attrs for x in self.src_cols])) + else: + assert False + if arr.dtype != self.dtype: + arr = arr.astype(self.dtype) + assert arr.ndim == 2 + return arr + + def get_columns(self, source, row_indices, n_rows, out=None, target_indices=None): + n_src_attrs = len(source.domain.attributes) + + data = [] + sp_col = [] + sp_row = [] + match_density = ( + assure_column_sparse if self.is_sparse else assure_column_dense + ) + + # converting to csc before instead of each column is faster + # do not convert if not required + if any(isinstance(x, int) for x in self.src_cols): + X = csc_matrix(source.X) if self.is_sparse else source.X + Y = csc_matrix(source._Y) if self.is_sparse else source._Y + + if self.row_selection_needed: + if row_indices is ...: + sourceri = source + else: + sourceri = source[row_indices] + + shared_cache = _thread_local.conversion_cache + for i, col in enumerate(self.src_cols): + if col is None: + col_array = match_density( + np.full((n_rows, 1), self.variables[i].Unknown) + ) + elif not isinstance(col, Integral): + if isinstance(col, SharedComputeValue): + shared = _idcache_restore(shared_cache, (col.compute_shared, source)) + if shared is None: + shared = col.compute_shared(sourceri) + _idcache_save(shared_cache, (col.compute_shared, source), shared) + col_array = match_density( + col(sourceri, shared_data=shared)) + else: + col_array = match_density(col(sourceri)) + elif col < 0: + col_array = match_density( + source.metas[row_indices, -1 - col] + ) + elif col < n_src_attrs: + col_array = match_density(X[row_indices, col]) + else: + col_array = match_density( + Y[row_indices, col - n_src_attrs] + ) + + if self.is_sparse: + # col_array should be coo matrix + data.append(col_array.data) + sp_col.append(np.full(len(col_array.data), i)) + sp_row.append(col_array.indices) # row indices should be same + else: + out[target_indices, i] = col_array + + if self.is_sparse: + # creating csr directly would need plenty of manual work which + # would probably slow down the process - conversion coo to csr + # is fast + out = sp.coo_matrix( + (np.hstack(data), (np.hstack(sp_row), np.hstack(sp_col))), + shape=(n_rows, len(self.src_cols)), + dtype=self.dtype + ) + out = out.tocsr() + + return out + + +class _FromTableConversion: + + def __init__(self, source, destination): + conversion = DomainConversion(source, destination) + + self.X = _ArrayConversion("X", conversion.attributes, + destination.attributes, conversion.sparse_X, + source) + self.Y = _ArrayConversion("Y", conversion.class_vars, + destination.class_vars, conversion.sparse_Y, + source) + self.metas = _ArrayConversion("metas", conversion.metas, + destination.metas, conversion.sparse_metas, + source) + + self.subarray = [] + self.columnwise = [] + + for part in [self.X, self.Y, self.metas]: + if part.subarray_from is None: + self.columnwise.append(part) + else: + self.subarray.append(part) + + # noinspection PyPep8Naming class Table(Sequence, Storage): __file__ = None @@ -318,124 +479,16 @@ def from_table(cls, domain, source, row_indices=...): :rtype: Orange.data.Table """ - def valid_refs(weakrefs): - for r in weakrefs: - if r() is None: - return False - return True - - def get_columns(row_indices, src_cols, n_rows, dtype=np.float64, - is_sparse=False, variables=[]): - if not len(src_cols): - if is_sparse: - return sp.csr_matrix((n_rows, 0), dtype=source.X.dtype) - else: - return np.zeros((n_rows, 0), dtype=source.X.dtype) - - # match density for subarrays - match_density = assure_array_sparse if is_sparse else assure_array_dense - n_src_attrs = len(source.domain.attributes) - if all(isinstance(x, Integral) and 0 <= x < n_src_attrs - for x in src_cols): - return match_density(_subarray(source.X, row_indices, src_cols)) - if all(isinstance(x, Integral) and x < 0 for x in src_cols): - arr = match_density(_subarray(source.metas, row_indices, - [-1 - x for x in src_cols])) - if arr.dtype != dtype: - return arr.astype(dtype) - return arr - if all(isinstance(x, Integral) and x >= n_src_attrs - for x in src_cols): - return match_density(_subarray( - source._Y, row_indices, - [x - n_src_attrs for x in src_cols])) - - # initialize arrays & set `match_density` for columns - # F-order enables faster writing to the array while accessing and - # matrix operations work with same speed (e.g. dot) - a = None if is_sparse else np.zeros( - (n_rows, len(src_cols)), order="F", dtype=dtype) - data = [] - sp_col = [] - sp_row = [] - match_density = ( - assure_column_sparse if is_sparse else assure_column_dense - ) - - # converting to csc before instead of each column is faster - # do not convert if not required - if any([isinstance(x, int) for x in src_cols]): - X = csc_matrix(source.X) if is_sparse else source.X - Y = csc_matrix(source._Y) if is_sparse else source._Y - - shared_cache = _thread_local.conversion_cache - for i, col in enumerate(src_cols): - if col is None: - col_array = match_density( - np.full((n_rows, 1), variables[i].Unknown) - ) - elif not isinstance(col, Integral): - if isinstance(col, SharedComputeValue): - shared, weakrefs = shared_cache.get( - (id(col.compute_shared), id(source)), - (None, None) - ) - if shared is None or not valid_refs(weakrefs): - shared, _ = shared_cache[(id(col.compute_shared), id(source))] = \ - col.compute_shared(source), \ - (weakref.ref(col.compute_shared), weakref.ref(source)) - - if row_indices is not ...: - col_array = match_density( - col(source, shared_data=shared)[row_indices]) - else: - col_array = match_density( - col(source, shared_data=shared)) - else: - if row_indices is not ...: - col_array = match_density(col(source)[row_indices]) - else: - col_array = match_density(col(source)) - elif col < 0: - col_array = match_density( - source.metas[row_indices, -1 - col] - ) - elif col < n_src_attrs: - col_array = match_density(X[row_indices, col]) - else: - col_array = match_density( - Y[row_indices, col - n_src_attrs] - ) - - if is_sparse: - # col_array should be coo matrix - data.append(col_array.data) - sp_col.append(np.full(len(col_array.data), i)) - sp_row.append(col_array.indices) # row indices should be same - else: - a[:, i] = col_array - - if is_sparse: - # creating csr directly would need plenty of manual work which - # would probably slow down the process - conversion coo to csr - # is fast - a = sp.coo_matrix( - (np.hstack(data), (np.hstack(sp_row), np.hstack(sp_col))), - shape=(n_rows, len(src_cols)), - dtype=dtype - ) - a = a.tocsr() - - return a + PART = 5000 new_cache = _thread_local.conversion_cache is None try: if new_cache: _thread_local.conversion_cache = {} + _thread_local.domain_cache = {} else: - cached, weakrefs = \ - _thread_local.conversion_cache.get((id(domain), id(source)), (None, None)) - if cached and valid_refs(weakrefs): + cached = _idcache_restore(_thread_local.conversion_cache, (domain, source)) + if cached is not None: return cached if domain is source.domain: table = cls.from_table_rows(source, row_indices) @@ -446,35 +499,81 @@ def get_columns(row_indices, src_cols, n_rows, dtype=np.float64, table = assure_domain_conversion_sparsity(table, source) return table - if isinstance(row_indices, slice): - n_rows = len(range(*row_indices.indices(source.X.shape[0]))) - elif row_indices is ...: + if row_indices is ...: n_rows = len(source) + elif isinstance(row_indices, slice): + row_indices_range = range(*row_indices.indices(source.X.shape[0])) + n_rows = len(row_indices_range) else: n_rows = len(row_indices) self = cls() self.domain = domain - conversion = DomainConversion(source.domain, domain) - self.X = get_columns(row_indices, conversion.attributes, n_rows, - is_sparse=conversion.sparse_X, - variables=domain.attributes) - if self.X.ndim == 1: - self.X = self.X.reshape(-1, len(self.domain.attributes)) - - self.Y = get_columns(row_indices, conversion.class_vars, n_rows, - is_sparse=conversion.sparse_Y, - variables=domain.class_vars) - - dtype = np.float64 - if any(isinstance(var, StringVariable) for var in domain.metas): - dtype = object - self.metas = get_columns(row_indices, conversion.metas, - n_rows, dtype, - is_sparse=conversion.sparse_metas, - variables=domain.metas) - if self.metas.ndim == 1: - self.metas = self.metas.reshape(-1, len(self.domain.metas)) + + table_conversion = \ + _idcache_restore(_thread_local.domain_cache, (domain, source.domain)) + if table_conversion is None: + table_conversion = _FromTableConversion(source.domain, domain) + _idcache_save(_thread_local.domain_cache, (domain, source.domain), + table_conversion) + + # if an array can be a subarray of the input table, this needs to be done + # on the whole table, because this avoids needless copies of contents + + for array_conv in table_conversion.subarray: + out = array_conv.get_subarray(source, row_indices, n_rows) + setattr(self, array_conv.target, out) + + parts = {} + + for array_conv in table_conversion.columnwise: + if array_conv.is_sparse: + parts[array_conv.target] = [] + else: + # F-order enables faster writing to the array while accessing and + # matrix operations work with same speed (e.g. dot) + parts[array_conv.target] = \ + np.zeros((n_rows, len(array_conv.src_cols)), + order="F", dtype=array_conv.dtype) + + if n_rows <= PART: + for array_conv in table_conversion.columnwise: + out = array_conv.get_columns(source, row_indices, n_rows, + parts[array_conv.target], + ...) + setattr(self, array_conv.target, out) + else: + i_done = 0 + + while i_done < n_rows: + target_indices = slice(i_done, min(n_rows, i_done + PART)) + if row_indices is ...: + source_indices = target_indices + elif isinstance(row_indices, slice): + r = row_indices_range[target_indices] + source_indices = slice(r.start, r.stop, r.step) + else: + source_indices = row_indices[target_indices] + part_rows = min(n_rows, i_done+PART) - i_done + + for array_conv in table_conversion.columnwise: + out = array_conv.get_columns(source, source_indices, part_rows, + parts[array_conv.target], + target_indices) + if array_conv.is_sparse: # dense arrays are populated in-place + parts[array_conv.target].append(out) + + i_done += PART + + # clear cache after a part is done + if new_cache: + _thread_local.conversion_cache = {} + + for array_conv in table_conversion.columnwise: + cparts = parts[array_conv.target] + out = cparts if not array_conv.is_sparse else sp.vstack(cparts) + setattr(self, array_conv.target, out) + if source.has_weights(): self.W = source.W[row_indices] else: @@ -485,12 +584,12 @@ def get_columns(row_indices, src_cols, n_rows, dtype=np.float64, else: cls._init_ids(self) self.attributes = getattr(source, 'attributes', {}) - _thread_local.conversion_cache[(id(domain), id(source))] = \ - self, (weakref.ref(domain), weakref.ref(source)) + _idcache_save(_thread_local.conversion_cache, (domain, source), self) return self finally: if new_cache: _thread_local.conversion_cache = None + _thread_local.domain_cache = None def transform(self, domain): """ @@ -1924,6 +2023,13 @@ def _optimize_indices(indices, maxlen): if indices is ...: return slice(None, None, 1) + # a very common case for column selection + if len(indices) == 1 and not isinstance(indices[0], bool): + if indices[0] >= 0: + return slice(indices[0], indices[0] + 1, 1) + else: + return slice(indices[0], indices[0] - 1, -1) + if len(indices) >= 1: indices = np.asarray(indices) if indices.dtype != bool: diff --git a/Orange/data/util.py b/Orange/data/util.py index f1b58874e8f..c0245e58d52 100644 --- a/Orange/data/util.py +++ b/Orange/data/util.py @@ -155,6 +155,9 @@ def assure_column_sparse(a): def assure_column_dense(a): + # quick check and exit for the most common case + if isinstance(a, np.ndarray) and len(a.shape) == 1: + return a a = assure_array_dense(a) # column assignments must be (n, ) return a.reshape(-1) diff --git a/Orange/preprocess/transformation.py b/Orange/preprocess/transformation.py index b09e6810b53..01ea719b5ce 100644 --- a/Orange/preprocess/transformation.py +++ b/Orange/preprocess/transformation.py @@ -17,6 +17,12 @@ def __init__(self, variable): """ self.variable = variable + if self.variable is not None: + if self.variable.is_primitive(): + self.need_domain = Domain([self.variable]) + else: + self.need_domain = Domain([], metas=[self.variable]) + def __call__(self, data): """ Return transformed column from the data by extracting the column view @@ -25,13 +31,10 @@ def __call__(self, data): inst = isinstance(data, Instance) if inst: data = Table.from_list(data.domain, [data]) + data = data.transform(self.need_domain) if self.variable.is_primitive(): - domain = Domain([self.variable]) - data = Table.from_table(domain, data) col = data.X else: - domain = Domain([], metas=[self.variable]) - data = Table.from_table(domain, data) col = data.metas if not sp.issparse(col): col = col.squeeze(axis=1) diff --git a/Orange/tests/test_data_util.py b/Orange/tests/test_data_util.py index efe28f3c40f..55a81a1f48a 100644 --- a/Orange/tests/test_data_util.py +++ b/Orange/tests/test_data_util.py @@ -53,7 +53,7 @@ def test_with_row_indices(self): def test_single_call(self): obj = DummyPlus(Mock(return_value=1)) self.assertEqual(obj.compute_shared.call_count, 0) - data = Orange.data.Table("iris") + data = Orange.data.Table("iris")[45:55] # two classes domain = Orange.data.Domain([at.copy(compute_value=obj) for at in data.domain.attributes], data.domain.class_vars) diff --git a/Orange/tests/test_table.py b/Orange/tests/test_table.py index a0ed922a6af..86ac5dc101f 100644 --- a/Orange/tests/test_table.py +++ b/Orange/tests/test_table.py @@ -1643,16 +1643,13 @@ def test_can_filter_row_with_slice_from_table_rows(self, from_table_rows): new_table, self.table, rows=slice_) from_table_rows.assert_called() - @patch.object(Table, "from_table_rows", wraps=Table.from_table_rows) - def test_can_filter_row_with_slice_from_table(self, from_table_rows): + def test_can_filter_row_with_slice_from_table(self): # calling from_table with a domain copy will use indexing in from_table for slice_ in self.interesting_slices: - from_table_rows.reset_mock() new_table = data.Table.from_table( self.domain.copy(), self.table, row_indices=slice_) self.assert_table_with_filter_matches( new_table, self.table, rows=slice_) - from_table_rows.assert_not_called() def test_can_use_attributes_as_new_columns(self): a, _, _ = column_sizes(self.table) @@ -1795,28 +1792,18 @@ def test_from_table_shared_compute_value(self): ContinuousVariable( name=at.name, compute_value=PreprocessSharedComputeValue( - None, PreprocessShared(iris.domain, None) + i, None, PreprocessShared(iris.domain, None) ) ) - for at in iris.domain.attributes + for i, at in enumerate(iris.domain.attributes) ] ) new_table = Table.from_table(d1, iris) - np.testing.assert_array_equal( - new_table.X[:3], - [[5.1, 5.1, 5.1, 5.1], - [4.9, 4.9, 4.9, 4.9], - [4.7, 4.7, 4.7, 4.7]] - ) + np.testing.assert_array_equal(new_table.X, iris.X.todense() * 2) new_table = Table.from_table(d1, iris, row_indices=[0, 1, 2]) - np.testing.assert_array_equal( - new_table.X[:3], - [[5.1, 5.1, 5.1, 5.1], - [4.9, 4.9, 4.9, 4.9], - [4.7, 4.7, 4.7, 4.7]] - ) + np.testing.assert_array_equal(new_table.X, iris.X.todense()[:3] * 2) def assert_table_with_filter_matches( self, new_table, old_table, @@ -1972,7 +1959,7 @@ def test_optimize_indices(self): # single element self.assertEqual(_optimize_indices([1], 2), slice(1, 2, 1)) - np.testing.assert_equal(_optimize_indices([1], 1), [1]) + self.assertEqual(_optimize_indices([-2], 5), slice(-2, -3, -1)) class TableElementAssignmentTest(TableTests): @@ -2729,7 +2716,7 @@ def test_from_table_sparse_metas_with_strings(self): class ConcurrencyTests(unittest.TestCase): def test_from_table_non_blocking(self): - iris = Table("iris") + iris = Table("iris")[:10] def slow_compute_value(d): sleep(0.1) @@ -2762,9 +2749,10 @@ def __init__(self, domain, callback): self.callback = callback def __call__(self, data_): - self.callback(data_) - data_.transform(self.domain) - return data_.X[:, 0] + if self.callback: + self.callback(data_) + transformed = data_.transform(self.domain) + return transformed.X[:, 0] * 2 class PreprocessShared: @@ -2776,21 +2764,22 @@ def __init__(self, domain, callback): def __call__(self, data_): if self.callback: self.callback(data_) - data_.transform(self.domain) - return True + transformed = data_.transform(self.domain) + return transformed.X * 2 class PreprocessSharedComputeValue(SharedComputeValue): - def __init__(self, callback, shared): + def __init__(self, col, callback, shared): super().__init__(compute_shared=shared) + self.col = col self.callback = callback # pylint: disable=arguments-differ def compute(self, data_, shared_data): if self.callback: self.callback(data_) - return data_.X[:, 0] + return shared_data[:, self.col] def preprocess_domain_single(domain, callback): @@ -2808,8 +2797,8 @@ def preprocess_domain_shared(domain, callback, callback_shared): shared = PreprocessShared(domain, callback_shared) return Domain([ ContinuousVariable(name=at.name, - compute_value=PreprocessSharedComputeValue(callback, shared)) - for at in domain.attributes]) + compute_value=PreprocessSharedComputeValue(i, callback, shared)) + for i, at in enumerate(domain.attributes)]) def preprocess_domain_single_stupid(domain, callback): @@ -2826,21 +2815,23 @@ def preprocess_domain_single_stupid(domain, callback): class EfficientTransformTests(unittest.TestCase): def setUp(self): - self.iris = Table("iris") + self.iris = Table("iris")[:10] def test_simple(self): call_cv = Mock() d1 = preprocess_domain_single(self.iris.domain, call_cv) - self.iris.transform(d1) + t = self.iris.transform(d1) self.assertEqual(4, call_cv.call_count) + np.testing.assert_equal(t.X, self.iris.X * 2) def test_shared(self): call_cv = Mock() call_shared = Mock() d1 = preprocess_domain_shared(self.iris.domain, call_cv, call_shared) - self.iris.transform(d1) + t = self.iris.transform(d1) self.assertEqual(4, call_cv.call_count) self.assertEqual(1, call_shared.call_count) + np.testing.assert_equal(t.X, self.iris.X * 2) def test_simple_simple_shared(self): call_cv = Mock() @@ -2848,9 +2839,10 @@ def test_simple_simple_shared(self): d2 = preprocess_domain_single(d1, call_cv) call_shared = Mock() d3 = preprocess_domain_shared(d2, call_cv, call_shared) - self.iris.transform(d3) + t = self.iris.transform(d3) self.assertEqual(1, call_shared.call_count) self.assertEqual(12, call_cv.call_count) + np.testing.assert_equal(t.X, self.iris.X * 2**3) def test_simple_simple_shared_simple(self): call_cv = Mock() @@ -2859,9 +2851,10 @@ def test_simple_simple_shared_simple(self): call_shared = Mock() d3 = preprocess_domain_shared(d2, call_cv, call_shared) d4 = preprocess_domain_single(d3, call_cv) - self.iris.transform(d4) + t = self.iris.transform(d4) self.assertEqual(1, call_shared.call_count) self.assertEqual(16, call_cv.call_count) + np.testing.assert_equal(t.X, self.iris.X * 2**4) def test_simple_simple_shared_simple_shared_simple(self): call_cv = Mock() @@ -2872,16 +2865,18 @@ def test_simple_simple_shared_simple_shared_simple(self): d4 = preprocess_domain_single(d3, call_cv) d5 = preprocess_domain_shared(d4, call_cv, call_shared) d6 = preprocess_domain_single(d5, call_cv) - self.iris.transform(d6) + t = self.iris.transform(d6) self.assertEqual(2, call_shared.call_count) self.assertEqual(24, call_cv.call_count) + np.testing.assert_equal(t.X, self.iris.X * 2**6) def test_simple_simple_stupid(self): call_cv = Mock() d1 = preprocess_domain_single_stupid(self.iris.domain, call_cv) d2 = preprocess_domain_single_stupid(d1, call_cv) - self.iris.transform(d2) + t = self.iris.transform(d2) self.assertEqual(8, call_cv.call_count) + np.testing.assert_equal(t.X[:, 0], self.iris.X[:, 0] * 4) if __name__ == "__main__": diff --git a/benchmark/bench_transform.py b/benchmark/bench_transform.py index 8aee44dafdf..4ca481a9801 100644 --- a/benchmark/bench_transform.py +++ b/benchmark/bench_transform.py @@ -4,6 +4,8 @@ import scipy.sparse from Orange.data import Table, ContinuousVariable, Domain +from Orange.tests.test_table import preprocess_domain_single, preprocess_domain_shared + from .base import Benchmark, benchmark @@ -12,6 +14,11 @@ def add_unknown_attribute(table): return table.transform(new_domain) +def add_unknown_class(table): + new_domain = Domain(table.domain.attributes, class_vars=[ContinuousVariable("x")]) + return table.transform(new_domain) + + class BenchTransform(Benchmark): def setup_dense(self, rows, cols): @@ -51,3 +58,58 @@ def bench_copy_sparse_square(self): def bench_copy_sparse_wide(self): t = add_unknown_attribute(self.table) self.assertIsInstance(t.X, scipy.sparse.csr_matrix) + + @benchmark(setup=partial(setup_dense, rows=10000, cols=100), number=5) + def bench_subarray_dense_long(self): + # adding a class should link X + add_unknown_class(self.table) + + def setup_dense_transforms(self, rows, cols, transforms): + self.setup_dense(rows, cols) + self.domains = [] # pylint: disable=attribute-defined-outside-init + self.callbacks = [] # pylint: disable=attribute-defined-outside-init + domain = self.table.domain + for t in transforms: + if t == "single": + call_cv = None + domain = preprocess_domain_single(domain, call_cv) + self.callbacks.append((call_cv,)) + elif t == "shared": + call_cv, call_shared = None, None + domain = preprocess_domain_shared(domain, call_cv, call_shared) + self.callbacks.append((call_cv, call_shared)) + else: + raise RuntimeError + self.domains.append(domain) + + @benchmark(setup=partial(setup_dense_transforms, rows=1000, cols=100, + transforms=["single"]), number=5) + def bench_transform_single(self): + t = self.table.transform(self.domains[-1]) + np.testing.assert_almost_equal(t.X, self.table.X * 2) + + @benchmark(setup=partial(setup_dense_transforms, rows=1000, cols=100, + transforms=["single", "single"]), number=5) + def bench_transform_single_single(self): + t = self.table.transform(self.domains[-1]) + np.testing.assert_almost_equal(t.X, self.table.X * 2**2) + + @benchmark(setup=partial(setup_dense_transforms, rows=1000, cols=100, + transforms=["shared"]), number=5) + def bench_transform_shared(self): + t = self.table.transform(self.domains[-1]) + np.testing.assert_almost_equal(t.X, self.table.X * 2) + + @benchmark(setup=partial(setup_dense_transforms, rows=1000, cols=100, + transforms=["single", "single", "shared", "single"]), number=5) + def bench_transform_single_single_shared_single(self): + t = self.table.transform(self.domains[-1]) + np.testing.assert_almost_equal(t.X, self.table.X * 2**4) + + + @benchmark(setup=partial(setup_dense_transforms, rows=1000, cols=100, + transforms=["single", "single", "shared", + "single", "shared", "single"]), number=5) + def bench_transform_single_single_shared_single_shared_single(self): + t = self.table.transform(self.domains[-1]) + np.testing.assert_almost_equal(t.X, self.table.X * 2**6)