Skip to content

Commit

Permalink
BinDefinition: Separate full and short labels
Browse files Browse the repository at this point in the history
  • Loading branch information
janezd committed Oct 21, 2019
1 parent 04d9602 commit 29f8b43
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 22 deletions.
40 changes: 29 additions & 11 deletions Orange/preprocess/discretize.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ def _split_eq_width(self, min, max):
class BinDefinition(NamedTuple):
thresholds: np.ndarray # thresholds, including the top
labels: List[str] # friendly-formatted thresholds
short_labels: List[str] # shorter labels (e.g. simplified dates)
width: Union[float, None] # widths, if uniform; otherwise None
width_label: str # friendly-formatted width (e.g. '50' or '2 weeks')

Expand All @@ -190,14 +191,25 @@ class BinDefinition(NamedTuple):
# Name of the class has to be the same to match the namedtuple name
# pylint: disable=function-redefined
class BinDefinition(BinDefinition):
def __new__(cls, thresholds, labels="%g", width=None, width_label=""):
if isinstance(labels, str):
labels = [labels % x for x in thresholds]
elif isinstance(labels, Callable):
labels = [labels(x) for x in thresholds]
def __new__(cls, thresholds, labels="%g",
short_labels=None, width=None, width_label=""):

def get_labels(fmt, default=None):
if fmt is None:
return default
if isinstance(fmt, str):
return [fmt % x for x in thresholds]
elif isinstance(fmt, Callable):
return [fmt(x) for x in thresholds]
else:
return fmt

labels = get_labels(labels)
short_labels = get_labels(short_labels, labels)
if not width_label and width is not None:
width_label = f"{width:g}"
return super().__new__(cls, thresholds, labels, width, width_label)
return super().__new__(
cls, thresholds, labels, short_labels, width, width_label)

@property
def start(self) -> float:
Expand Down Expand Up @@ -299,7 +311,7 @@ def decimal_binnings(
if min_bins <= nbins <= max_bins \
and (not bins or bins[-1].nbins != nbins):
bin_def = BinDefinition(mn_ + width * np.arange(nbins + 1),
label_fmt, width)
label_fmt, None, width)
bins.append(bin_def)
return bins

Expand Down Expand Up @@ -362,12 +374,14 @@ def _time_binnings(mn, mx, min_pts, max_pts):
continue
times = [time.struct_time(t + (0, 0, 0)) for t in times]
thresholds = [calendar.timegm(t) for t in times]
labels = _simplified_labels([time.strftime(fmt, t) for t in times])
labels = [time.strftime(fmt, t) for t in times]
short_labels = _simplified_labels(labels)
if place == 2 and step >= 7:
unit_label = f"{step // 7} week{'s' * (step > 7)}"
else:
unit_label = f"{step} {unit}{'s' * (step > 1)}"
new_bins = BinDefinition(thresholds, labels, None, unit_label)
new_bins = BinDefinition(
thresholds, labels, short_labels, None, unit_label)
if not bins or new_bins.nbins != bins[-1].nbins:
bins.append(new_bins)
return bins
Expand Down Expand Up @@ -417,13 +431,16 @@ def _month_days(year, month,


def _simplified_labels(labels):
labels = labels[:]
to_remove = "42"
while True:
firsts = {f for f, *_ in (lab.split() for lab in labels)}
if len(firsts) > 1:
break
to_remove = firsts.pop()
flen = len(to_remove) + 1
if any(len(lab) == flen for lab in labels):
break
labels = [lab[flen:] for lab in labels]
for i in range(len(labels) - 1, 0, -1):
for k, c, d in zip(count(), labels[i].split(), labels[i - 1].split()):
Expand All @@ -442,8 +459,9 @@ def _unique_time_bins(unique):
fmt = f'{"%y " if times[0][0] >= 1950 else "%Y "} %b %d'
fmt += " %H:%M" * (len({t[2:] for t in times}) > 1)
fmt += ":%S" * bool(np.all(unique % 60 == 0))
return BinDefinition(_unique_thresholds(unique),
[time.strftime(fmt, x) for x in times])
labels = [time.strftime(fmt, x) for x in times]
short_labels = _simplified_labels(labels)
return BinDefinition(_unique_thresholds(unique), labels, short_labels)


def _unique_thresholds(unique):
Expand Down
2 changes: 1 addition & 1 deletion Orange/preprocess/tests/test_discretize.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def setUp(self):
def test_binning(self):
def testbin(start, end):
bins = _time_binnings(create(*start), create(*end), 3, 51)
return [(bin.width_label, bin.labels, bin.thresholds)
return [(bin.width_label, bin.short_labels, bin.thresholds)
for bin in reversed(bins)]

self.assertEqual(
Expand Down
14 changes: 5 additions & 9 deletions Orange/widgets/unsupervised/owsom.py
Original file line number Diff line number Diff line change
Expand Up @@ -839,11 +839,7 @@ def set_color_bins(self):
else:
binning = decimal_binnings(col, min_bins=4)[-1]
self.thresholds = binning.thresholds[1:-1]
self.bin_labels = binning.labels[1:-1]
# Second label may had been truncated; put back the missing part
split0 = binning.labels[0].split()
split1 = binning.labels[1].split()
self.bin_labels[0] = " ".join(split0[:-len(split1)] + split1)
self.bin_labels = (binning.labels[1:-1], binning.short_labels[1:-1])
palette = ContinuousPaletteGenerator(*self.attr_color.colors)
nbins = len(self.thresholds) + 1
self.colors = [palette[i / (nbins - 1)] for i in range(nbins)]
Expand Down Expand Up @@ -881,11 +877,11 @@ def create_legend(self):
self.set_legend_pos()

def _bin_names(self):
labels, short_labels = self.bin_labels
return \
[f"< {self.bin_labels[0]}"] \
+ [f"{x} - {y}"
for x, y in zip(self.bin_labels, self.bin_labels[1:])] \
+ [f"≥ {self.bin_labels[-1]}"]
[f"< {labels[0]}"] \
+ [f"{x} - {y}" for x, y in zip(labels, short_labels[1:])] \
+ [f"≥ {labels[-1]}"]

def set_legend_pos(self):
if self.legend is None:
Expand Down
2 changes: 1 addition & 1 deletion Orange/widgets/visualize/owdistributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,7 @@ def _set_cont_ticks(self):
axis = self.ploti.getAxis("bottom")
if self.var and self.var.is_time:
binning = self.binnings[self.number_of_bins]
labels = np.array(binning.labels)
labels = np.array(binning.short_labels)
thresholds = np.array(binning.thresholds)
lengths = np.array([len(lab) for lab in labels])
slengths = set(lengths)
Expand Down

0 comments on commit 29f8b43

Please sign in to comment.