Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIX] Pivot: Display time variable in time format #5212

Merged
merged 1 commit into from
Jan 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 43 additions & 17 deletions Orange/widgets/data/owpivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ class Pivot:
ContVarFunctions = (Sum, Mean, Min, Max, Mode, Median, Var)
DiscVarFunctions = (Majority,)
TimeVarFunctions = (Mean, Min, Max, Mode, Median)
FloatFunctions = (Count, Count_defined, Sum, Var)

class Tables:
table = None # type: Table
Expand Down Expand Up @@ -303,11 +304,23 @@ def map_values(index, _X):
_X[:, index][_X[:, index] == value] = j
return values

create_time_var = \
isinstance(val_var, TimeVariable) and \
all(fun in self.TimeVarFunctions for fun in agg_funs)
create_cont_var = \
not val_var or val_var.is_continuous and \
(not isinstance(val_var, TimeVariable) or
all(fun in self.FloatFunctions for fun in agg_funs))

vals = np.array(self._col_var.values)[self._col_var_groups.astype(int)]
if not val_var or val_var.is_continuous:
cv = ContinuousVariable
attrs = [[cv(f"{v}", 1) for v in vals]] * 2
attrs.extend([[cv("Total", 1)]] * 2)
if create_time_var:
kwargs = {"have_date": val_var.have_date,
"have_time": val_var.have_time}
attrs = [[TimeVariable(f"{v}", **kwargs) for v in vals]] * 2
attrs.extend([[TimeVariable("Total", **kwargs)]] * 2)
elif create_cont_var:
attrs = [[ContinuousVariable(f"{v}", 1) for v in vals]] * 2
attrs.extend([[ContinuousVariable("Total", 1)]] * 2)
else:
attrs = []
for x in (X, X_h):
Expand Down Expand Up @@ -354,15 +367,19 @@ def __get_pivot_tab_x(self, val_var, agg_funs):
gt = self._group_tables
n_fun = len(agg_funs)
n_rows, n_cols = len(self._row_var_groups), len(self._col_var_groups)
kwargs = {"fill_value": np.nan, "dtype": float} \
if not val_var or val_var.is_continuous \
is_float_type = not val_var or val_var.is_continuous
if isinstance(val_var, TimeVariable):
is_float_type = \
all(fun in self.TimeVarFunctions for fun in agg_funs) or \
all(fun in self.FloatFunctions for fun in agg_funs)
kwargs = {"fill_value": np.nan, "dtype": float} if is_float_type \
else {"fill_value": "", "dtype": object}
X = np.full((n_rows * n_fun, 2 + n_cols), **kwargs)
X_h = np.full((n_fun, 2 + n_cols), **kwargs)
X_v = np.full((n_rows * n_fun, 1), **kwargs)
X_t = np.full((n_fun, 1), **kwargs)
for i, fun in enumerate(agg_funs):
args = (val_var, fun)
args = (val_var, fun, is_float_type)
X[i::n_fun, 2:] = self.__rows_for_function(n_rows, n_cols, *args)
X[i::n_fun, :2] = np.array([[row_val, agg_funs.index(fun)] for
row_val in self._row_var_groups])
Expand All @@ -372,13 +389,14 @@ def __get_pivot_tab_x(self, val_var, agg_funs):
X_t[i] = self.__total_for_function(gt.total, *args)
return X, X_h, X_v, X_t

def __total_for_function(self, group_tab, val_var, fun):
def __total_for_function(self, group_tab, val_var, fun, is_float_type):
ref = self._indepen_agg_done.get(fun, None) \
or self._depen_agg_done[fun][val_var]
ref -= int(bool(not self.single_var_grouping))
return self.__check_continuous(val_var, group_tab.X[:, ref], fun)
return self.__check_continuous(val_var, group_tab.X[:, ref],
fun, is_float_type)

def __rows_for_function(self, n_rows, n_cols, val_var, fun):
def __rows_for_function(self, n_rows, n_cols, val_var, fun, is_float_type):
ref = self._indepen_agg_done.get(fun, None) \
or self._depen_agg_done[fun][val_var]
column = self._group_tables.table.X[:, ref]
Expand All @@ -387,14 +405,23 @@ def __rows_for_function(self, n_rows, n_cols, val_var, fun):
rows[np.diag_indices_from(rows)] = column
else:
rows = column.reshape(n_rows, n_cols)
return self.__check_continuous(val_var, rows, fun)
return self.__check_continuous(val_var, rows, fun, is_float_type)

def __check_continuous(self, val_var, column, fun):
def __check_continuous(self, val_var, column, fun, is_float_type):
if val_var and not val_var.is_continuous:
column = column.astype(str)
if fun in self.DiscVarFunctions:
for j, val in enumerate(val_var.values):
column[column == str(float(j))] = val
elif isinstance(val_var, TimeVariable) and not is_float_type:
shape = column.shape
column = column.flatten()
column_ = column.astype(str)
if fun in self.TimeVarFunctions:
for i in range(column.shape[0]):
if not np.isnan(column[i]):
column_[i] = val_var.repr_val(column[i])
return column_.reshape(shape)
return column

@staticmethod
Expand Down Expand Up @@ -618,10 +645,8 @@ def __set_vertical_headers(self, table):
self.table_model.setItem(i + 1, 1, item)

def _set_values(self, table):
attrs = table.domain.attributes
for i, j in product(range(len(table)), range(len(table[0]))):
# data is read faster when reading directly from table.X
value = table.X[i, j] if attrs[j].is_continuous else table[i, j]
value = table[i, j]
item = self._create_value_item(str(value))
self.table_model.setItem(i + self._n_leading_rows,
j + self._n_leading_cols, item)
Expand Down Expand Up @@ -982,9 +1007,10 @@ def get_filtered_data(self):
elif isinstance(at, ContinuousVariable):
f.append(FilterContinuous(at, FilterContinuous.Equal, val))
cond.append(Values(f))
return Values([f for f in cond], conjunction=False)(self.data)
return Values(cond, conjunction=False)(self.data)

def sizeHint(self):
@staticmethod
def sizeHint():
return QSize(640, 525)

def send_report(self):
Expand Down
49 changes: 49 additions & 0 deletions Orange/widgets/data/tests/test_owpivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,55 @@ def test_pivot_disc_val_var(self):
[8, 1, np.nan, np.nan]])
self.assert_table_equal(pivot_tab, Table(Domain(atts), X))

def test_pivot_time_val_var(self):
domain = Domain([Dv("d1", ("a", "b")), Dv("d2", ("c", "d")),
Tv("t1", have_date=1)])
X = np.array([[0, 1, 1e9], [0, 0, 1e8], [1, 0, 2e8], [1, 1, np.nan]])
table = Table(domain, X)

# Min
pivot = Pivot(table, [Pivot.Min],
domain[0], domain[1], domain[2])
atts = (domain[0], Dv("Aggregate", ["Min"]),
Tv("c", have_date=1), Tv("d", have_date=1))
X = np.array([[0, 0, 1e8, 1e9],
[1, 0, 2e8, np.nan]])
self.assert_table_equal(pivot.pivot_table, Table(Domain(atts), X))

# Min, Max
pivot = Pivot(table, [Pivot.Min, Pivot.Max],
domain[0], domain[1], domain[2])
atts = (domain[0], Dv("Aggregate", ["Min", "Max"]),
Tv("c", have_date=1), Tv("d", have_date=1))
X = np.array([[0, 0, 1e8, 1e9],
[0, 1, 1e8, 1e9],
[1, 0, 2e8, np.nan],
[1, 1, 2e8, np.nan]])
self.assert_table_equal(pivot.pivot_table, Table(Domain(atts), X))

# Count defined, Sum
pivot = Pivot(table, [Pivot.Count_defined, Pivot.Sum],
domain[0], domain[1], domain[2])
atts = (domain[0], Dv("Aggregate", ["Count defined", "Sum"]),
Cv("c"), Cv("d"))
X = np.array([[0, 0, 1, 1],
[0, 1, 1e8, 1e9],
[1, 0, 1, 0],
[1, 1, 2e8, 0]])
self.assert_table_equal(pivot.pivot_table, Table(Domain(atts), X))

# Count defined, Max
pivot = Pivot(table, [Pivot.Count_defined, Pivot.Max],
domain[0], domain[1], domain[2])
atts = (domain[0], Dv("Aggregate", ["Count defined", "Max"]),
Dv("c", ["1.0", "1973-03-03", "1976-05-03"]),
Dv("d", ["0.0", "1.0", "2001-09-09"]))
X = np.array([[0, 0, 0, 1],
[0, 1, 1, 2],
[1, 0, 0, 0],
[1, 1, 2, np.nan]])
self.assert_table_equal(pivot.pivot_table, Table(Domain(atts), X))

def test_pivot_attr_combinations(self):
domain = self.table1.domain
for var1, var2, var3 in ((domain[1], domain[3], domain[5]), # d d d
Expand Down