Skip to content

Commit

Permalink
Merge pull request #5763 from PrimozGodec/fix-groupby
Browse files Browse the repository at this point in the history
[FIX] Group by: compute mode when all values in group nan
  • Loading branch information
janezd authored Jan 7, 2022
2 parents 06d7f24 + 8929ba8 commit 5d83588
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 4 deletions.
10 changes: 6 additions & 4 deletions Orange/widgets/data/owgroupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import Any, Dict, List, Optional, Set

import pandas as pd
from numpy import nan
from AnyQt.QtCore import (
QAbstractTableModel,
QEvent,
Expand Down Expand Up @@ -58,7 +59,7 @@ def concatenate(x):
"Mean": Aggregation("mean", {ContinuousVariable, TimeVariable}),
"Median": Aggregation("median", {ContinuousVariable, TimeVariable}),
"Mode": Aggregation(
lambda x: pd.Series.mode(x)[0], {ContinuousVariable, TimeVariable}
lambda x: pd.Series.mode(x).get(0, nan), {ContinuousVariable, TimeVariable}
),
"Standard deviation": Aggregation("std", {ContinuousVariable, TimeVariable}),
"Variance": Aggregation("var", {ContinuousVariable, TimeVariable}),
Expand Down Expand Up @@ -405,7 +406,7 @@ def __gb_changed(self) -> None:
self.gb_attrs = [values[row.row()] for row in sorted(rows)]
# everything cached in result should be recomputed on gb change
self.result = Result()
self.commit()
self.commit.deferred()

def __aggregation_changed(self, agg: str) -> None:
"""
Expand All @@ -421,7 +422,7 @@ def __aggregation_changed(self, agg: str) -> None:
else:
self.aggregations[attr].discard(agg)
self.agg_table_model.update_aggregation(attr)
self.commit()
self.commit.deferred()

@Inputs.data
def set_data(self, data: Table) -> None:
Expand Down Expand Up @@ -449,11 +450,12 @@ def set_data(self, data: Table) -> None:
self.agg_table_model.set_domain(data.domain if data else None)
self._set_gb_selection()

self.commit()
self.commit.now()

#########################
# Task connected methods

@gui.deferred
def commit(self) -> None:
self.Error.clear()
self.Warning.clear()
Expand Down
61 changes: 61 additions & 0 deletions Orange/widgets/data/tests/test_owgroupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from Orange.data import (
Table,
table_to_frame,
Domain,
ContinuousVariable,
)
from Orange.data.tests.test_aggregate import create_sample_data
from Orange.widgets.data.owgroupby import OWGroupBy
Expand Down Expand Up @@ -689,6 +691,65 @@ def test_time_variable(self):
output = self.get_output(self.widget.Outputs.data)
self.assertEqual(2, len(output))

def test_only_nan_in_group(self):
data = Table(
Domain([ContinuousVariable("A"), ContinuousVariable("B")]),
np.array([[1, np.nan], [2, 1], [1, np.nan], [2, 1]]),
)
self.send_signal(self.widget.Inputs.data, data)

# select feature A as group-by
self._set_selection(self.widget.gb_attrs_view, [0])
# select all aggregations for feature B
self.select_table_rows(self.widget.agg_table_view, [1])
for cb in self.widget.agg_checkboxes.values():
while not cb.isChecked():
cb.click()

# unselect all aggregations for attr A
self.select_table_rows(self.widget.agg_table_view, [0])
for cb in self.widget.agg_checkboxes.values():
while cb.isChecked():
cb.click()

expected_columns = [
"B - Mean",
"B - Median",
"B - Mode",
"B - Standard deviation",
"B - Variance",
"B - Sum",
"B - Min. value",
"B - Max. value",
"B - Span",
"B - First value",
"B - Last value",
"B - Random value",
"B - Count defined",
"B - Count",
"B - Proportion defined",
"B - Concatenate",
"A",
]
n = np.nan
expected_df = pd.DataFrame(
[
[n, n, n, n, n, 0, n, n, n, n, n, n, 0, 2, 0, "", 1],
[1, 1, 1, 0, 0, 2, 1, 1, 0, 1, 1, 1, 2, 2, 1, "1.0 1.0", 2],
],
columns=expected_columns,
)
output_df = table_to_frame(
self.get_output(self.widget.Outputs.data), include_metas=True
)
pd.testing.assert_frame_equal(
output_df,
expected_df,
check_dtype=False,
check_column_type=False,
check_categorical=False,
)


if __name__ == "__main__":
unittest.main()

0 comments on commit 5d83588

Please sign in to comment.