Skip to content

Commit

Permalink
Merge pull request #5212 from VesnaT/pivot_datetime
Browse files Browse the repository at this point in the history
[FIX] Pivot: Display time variable in time format
  • Loading branch information
ajdapretnar authored Jan 29, 2021
2 parents 67b6068 + 510aa22 commit fc77d66
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 17 deletions.
60 changes: 43 additions & 17 deletions Orange/widgets/data/owpivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ class Pivot:
ContVarFunctions = (Sum, Mean, Min, Max, Mode, Median, Var)
DiscVarFunctions = (Majority,)
TimeVarFunctions = (Mean, Min, Max, Mode, Median)
FloatFunctions = (Count, Count_defined, Sum, Var)

class Tables:
table = None # type: Table
Expand Down Expand Up @@ -303,11 +304,23 @@ def map_values(index, _X):
_X[:, index][_X[:, index] == value] = j
return values

create_time_var = \
isinstance(val_var, TimeVariable) and \
all(fun in self.TimeVarFunctions for fun in agg_funs)
create_cont_var = \
not val_var or val_var.is_continuous and \
(not isinstance(val_var, TimeVariable) or
all(fun in self.FloatFunctions for fun in agg_funs))

vals = np.array(self._col_var.values)[self._col_var_groups.astype(int)]
if not val_var or val_var.is_continuous:
cv = ContinuousVariable
attrs = [[cv(f"{v}", 1) for v in vals]] * 2
attrs.extend([[cv("Total", 1)]] * 2)
if create_time_var:
kwargs = {"have_date": val_var.have_date,
"have_time": val_var.have_time}
attrs = [[TimeVariable(f"{v}", **kwargs) for v in vals]] * 2
attrs.extend([[TimeVariable("Total", **kwargs)]] * 2)
elif create_cont_var:
attrs = [[ContinuousVariable(f"{v}", 1) for v in vals]] * 2
attrs.extend([[ContinuousVariable("Total", 1)]] * 2)
else:
attrs = []
for x in (X, X_h):
Expand Down Expand Up @@ -354,15 +367,19 @@ def __get_pivot_tab_x(self, val_var, agg_funs):
gt = self._group_tables
n_fun = len(agg_funs)
n_rows, n_cols = len(self._row_var_groups), len(self._col_var_groups)
kwargs = {"fill_value": np.nan, "dtype": float} \
if not val_var or val_var.is_continuous \
is_float_type = not val_var or val_var.is_continuous
if isinstance(val_var, TimeVariable):
is_float_type = \
all(fun in self.TimeVarFunctions for fun in agg_funs) or \
all(fun in self.FloatFunctions for fun in agg_funs)
kwargs = {"fill_value": np.nan, "dtype": float} if is_float_type \
else {"fill_value": "", "dtype": object}
X = np.full((n_rows * n_fun, 2 + n_cols), **kwargs)
X_h = np.full((n_fun, 2 + n_cols), **kwargs)
X_v = np.full((n_rows * n_fun, 1), **kwargs)
X_t = np.full((n_fun, 1), **kwargs)
for i, fun in enumerate(agg_funs):
args = (val_var, fun)
args = (val_var, fun, is_float_type)
X[i::n_fun, 2:] = self.__rows_for_function(n_rows, n_cols, *args)
X[i::n_fun, :2] = np.array([[row_val, agg_funs.index(fun)] for
row_val in self._row_var_groups])
Expand All @@ -372,13 +389,14 @@ def __get_pivot_tab_x(self, val_var, agg_funs):
X_t[i] = self.__total_for_function(gt.total, *args)
return X, X_h, X_v, X_t

def __total_for_function(self, group_tab, val_var, fun):
def __total_for_function(self, group_tab, val_var, fun, is_float_type):
ref = self._indepen_agg_done.get(fun, None) \
or self._depen_agg_done[fun][val_var]
ref -= int(bool(not self.single_var_grouping))
return self.__check_continuous(val_var, group_tab.X[:, ref], fun)
return self.__check_continuous(val_var, group_tab.X[:, ref],
fun, is_float_type)

def __rows_for_function(self, n_rows, n_cols, val_var, fun):
def __rows_for_function(self, n_rows, n_cols, val_var, fun, is_float_type):
ref = self._indepen_agg_done.get(fun, None) \
or self._depen_agg_done[fun][val_var]
column = self._group_tables.table.X[:, ref]
Expand All @@ -387,14 +405,23 @@ def __rows_for_function(self, n_rows, n_cols, val_var, fun):
rows[np.diag_indices_from(rows)] = column
else:
rows = column.reshape(n_rows, n_cols)
return self.__check_continuous(val_var, rows, fun)
return self.__check_continuous(val_var, rows, fun, is_float_type)

def __check_continuous(self, val_var, column, fun):
def __check_continuous(self, val_var, column, fun, is_float_type):
if val_var and not val_var.is_continuous:
column = column.astype(str)
if fun in self.DiscVarFunctions:
for j, val in enumerate(val_var.values):
column[column == str(float(j))] = val
elif isinstance(val_var, TimeVariable) and not is_float_type:
shape = column.shape
column = column.flatten()
column_ = column.astype(str)
if fun in self.TimeVarFunctions:
for i in range(column.shape[0]):
if not np.isnan(column[i]):
column_[i] = val_var.repr_val(column[i])
return column_.reshape(shape)
return column

@staticmethod
Expand Down Expand Up @@ -618,10 +645,8 @@ def __set_vertical_headers(self, table):
self.table_model.setItem(i + 1, 1, item)

def _set_values(self, table):
attrs = table.domain.attributes
for i, j in product(range(len(table)), range(len(table[0]))):
# data is read faster when reading directly from table.X
value = table.X[i, j] if attrs[j].is_continuous else table[i, j]
value = table[i, j]
item = self._create_value_item(str(value))
self.table_model.setItem(i + self._n_leading_rows,
j + self._n_leading_cols, item)
Expand Down Expand Up @@ -982,9 +1007,10 @@ def get_filtered_data(self):
elif isinstance(at, ContinuousVariable):
f.append(FilterContinuous(at, FilterContinuous.Equal, val))
cond.append(Values(f))
return Values([f for f in cond], conjunction=False)(self.data)
return Values(cond, conjunction=False)(self.data)

def sizeHint(self):
@staticmethod
def sizeHint():
return QSize(640, 525)

def send_report(self):
Expand Down
49 changes: 49 additions & 0 deletions Orange/widgets/data/tests/test_owpivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,55 @@ def test_pivot_disc_val_var(self):
[8, 1, np.nan, np.nan]])
self.assert_table_equal(pivot_tab, Table(Domain(atts), X))

def test_pivot_time_val_var(self):
domain = Domain([Dv("d1", ("a", "b")), Dv("d2", ("c", "d")),
Tv("t1", have_date=1)])
X = np.array([[0, 1, 1e9], [0, 0, 1e8], [1, 0, 2e8], [1, 1, np.nan]])
table = Table(domain, X)

# Min
pivot = Pivot(table, [Pivot.Min],
domain[0], domain[1], domain[2])
atts = (domain[0], Dv("Aggregate", ["Min"]),
Tv("c", have_date=1), Tv("d", have_date=1))
X = np.array([[0, 0, 1e8, 1e9],
[1, 0, 2e8, np.nan]])
self.assert_table_equal(pivot.pivot_table, Table(Domain(atts), X))

# Min, Max
pivot = Pivot(table, [Pivot.Min, Pivot.Max],
domain[0], domain[1], domain[2])
atts = (domain[0], Dv("Aggregate", ["Min", "Max"]),
Tv("c", have_date=1), Tv("d", have_date=1))
X = np.array([[0, 0, 1e8, 1e9],
[0, 1, 1e8, 1e9],
[1, 0, 2e8, np.nan],
[1, 1, 2e8, np.nan]])
self.assert_table_equal(pivot.pivot_table, Table(Domain(atts), X))

# Count defined, Sum
pivot = Pivot(table, [Pivot.Count_defined, Pivot.Sum],
domain[0], domain[1], domain[2])
atts = (domain[0], Dv("Aggregate", ["Count defined", "Sum"]),
Cv("c"), Cv("d"))
X = np.array([[0, 0, 1, 1],
[0, 1, 1e8, 1e9],
[1, 0, 1, 0],
[1, 1, 2e8, 0]])
self.assert_table_equal(pivot.pivot_table, Table(Domain(atts), X))

# Count defined, Max
pivot = Pivot(table, [Pivot.Count_defined, Pivot.Max],
domain[0], domain[1], domain[2])
atts = (domain[0], Dv("Aggregate", ["Count defined", "Max"]),
Dv("c", ["1.0", "1973-03-03", "1976-05-03"]),
Dv("d", ["0.0", "1.0", "2001-09-09"]))
X = np.array([[0, 0, 0, 1],
[0, 1, 1, 2],
[1, 0, 0, 0],
[1, 1, 2, np.nan]])
self.assert_table_equal(pivot.pivot_table, Table(Domain(atts), X))

def test_pivot_attr_combinations(self):
domain = self.table1.domain
for var1, var2, var3 in ((domain[1], domain[3], domain[5]), # d d d
Expand Down

0 comments on commit fc77d66

Please sign in to comment.