Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
noahnovsak committed Nov 23, 2023
1 parent c4f9c21 commit 3e8d853
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 76 deletions.
18 changes: 9 additions & 9 deletions orangecontrib/prototypes/interactions.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np


def get_row_ids(ar):
def hash_rows(ar):
row_ids = ar[:, 0].copy()
# Assuming the data has been discretized into fewer
# than 10000 bins and that `ar` has up to 3 columns,
Expand Down Expand Up @@ -30,7 +30,7 @@ def distribution(ar):
# implementation doesn't release the GIL. The simplest
# solution seems to be generating unique numbers/ids
# based on the contents of each row.
ar = get_row_ids(ar)
ar = hash_rows(ar)

_, counts = np.unique(ar, return_counts=True)
return counts / ar.shape[0]
Expand All @@ -47,9 +47,9 @@ def __init__(self, data):
self.class_entropy = 0
self.information_gain = np.zeros(data.X.shape[1])

self.precompute()
self.preprocess()

def precompute(self):
def preprocess(self):
"""
Precompute information gain of each attribute to speed up
computation and to create heuristic.
Expand All @@ -68,12 +68,12 @@ def precompute(self):
- entropy(np.column_stack((self.data.X[:, attr], self.data.Y)))

def __call__(self, attr1, attr2):
attrs = np.column_stack((self.data.X[:, attr1], self.data.X[:, attr2]))
attrs = self.data.X[:, (attr1, attr2)]
return self.class_entropy \
- self.information_gain[attr1] \
- self.information_gain[attr2] \
+ entropy(attrs) \
- entropy(np.column_stack((attrs, self.data.Y)))
- self.information_gain[attr1] \
- self.information_gain[attr2] \
+ entropy(attrs) \
- entropy(np.column_stack((attrs, self.data.Y)))

def normalize(self, score):
return score / self.class_entropy
22 changes: 12 additions & 10 deletions orangecontrib/prototypes/ranktablemodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ def __init__(self, *args, **kwargs):
# ``rowCount`` returns the lowest of `_rows` and `_max_view_rows`:
# how large the model/view thinks it is

def sortInd(self):
@property
def __sortInd(self):
return self._AbstractSortTableModel__sortInd

def sortColumnData(self, column):
Expand All @@ -49,7 +50,7 @@ def extendSortFrom(self, sorted_rows: int):
data = self.sortColumnData(self.sortColumn())
new_ind = np.arange(sorted_rows, self._rows)
order = 1 if self.sortOrder() == Qt.AscendingOrder else -1
sorter = self.sortInd()[::order]
sorter = self.__sortInd[::order]
new_sorter = np.argsort(data[sorted_rows:])
loc = np.searchsorted(data[:sorted_rows],
data[sorted_rows:][new_sorter],
Expand Down Expand Up @@ -88,18 +89,21 @@ def clear(self):
self.resetSorting()
self.endResetModel()

def append(self, rows: list[list[float]]):
def extend(self, rows: list[list[float]]):
if not isinstance(self._data, np.ndarray):
return self.initialize(rows)
self.initialize(rows)
return

n_rows = len(rows)
if n_rows == 0:
return

n_data = len(self._data)
insert = self._rows < self._max_view_rows

if insert:
self.beginInsertRows(QModelIndex(), self._rows, min(self._max_view_rows, self._rows + n_rows) - 1)
self.beginInsertRows(QModelIndex(), self._rows,
min(self._max_view_rows, self._rows + n_rows) - 1)

if self._rows + n_rows >= n_data:
n_data = min(max(n_data + n_rows, 2 * n_data), self._max_data_rows)
Expand All @@ -126,11 +130,9 @@ class RankModel(ArrayTableModel):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

self.domain = None # type: Domain
self.domain_model = DomainModel(DomainModel.ATTRIBUTES)

def set_domain(self, domain: Domain):
self.domain = domain
self.domain_model.set_domain(domain)
n_attrs = len(domain.attributes)
self._max_data_rows = n_attrs * (n_attrs - 1) // 2
Expand All @@ -143,16 +145,16 @@ def resetSorting(self):

def data(self, index: QModelIndex, role=Qt.DisplayRole):
if not index.isValid():
return
return None

column = index.column()

if column >= self.columnCount() - 2 and role != Qt.EditRole:
# use domain model for all data (except editrole) in last two columns
try:
row = self.mapToSourceRows(index.row())
value = self.domain_model.index(int(self._data[row, column]))
return self.domain_model.data(value, role)
except IndexError:
return
return None

return super().data(index, role)
107 changes: 51 additions & 56 deletions orangecontrib/prototypes/widgets/owinteractions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
from AnyQt.QtWidgets import QTableView, QHeaderView, \
QStyleOptionViewItem, QApplication, QStyle, QLineEdit

from Orange.data import Table, Variable
from orangecontrib.prototypes.ranktablemodel import RankModel
from orangecontrib.prototypes.interactions import InteractionScorer

from Orange.data import Table, Domain, Variable
from Orange.preprocess import Discretize, Remove
from Orange.widgets import gui
from Orange.widgets.widget import OWWidget, AttributeList, Msg
Expand All @@ -19,31 +22,23 @@
from Orange.widgets.utils.itemmodels import DomainModel
from Orange.widgets.settings import Setting, ContextSetting, DomainContextHandler

from orangecontrib.prototypes.ranktablemodel import RankModel
from orangecontrib.prototypes.interactions import InteractionScorer


class ModelQueue:
"""
Another queueing object, similar to ``queue.Queue``.
The main difference is that ``get()`` returns all its
contents at the same time, instead of one by one.
"""
def __init__(self):
self.lock = Lock()
self.model = []
self.state = None
self.mutex = Lock()
self.queue = []
self.latest_state = None

def put(self, row, state):
with self.lock:
self.model.append(row)
self.state = state
with self.mutex:
self.queue.append(row)
self.latest_state = state

def get(self):
with self.lock:
model, self.model = self.model, []
state, self.state = self.state, None
return model, state
with self.mutex:
queue, self.queue = self.queue, []
state, self.latest_state = self.latest_state, None
return queue, state


def run(compute_score: Callable, row_for_state: Callable,
Expand All @@ -52,8 +47,8 @@ def run(compute_score: Callable, row_for_state: Callable,
"""
Replaces ``run_vizrank``, with some minor adjustments.
- ``ModelQueue`` replaces ``queue.Queue``
- `row_for_state` parameter added
- `scores` parameter removed
- `row_for_state` can be called here, assuming we are not adding `Qt` objects to the model
- `scores` removed
"""
task.set_status("Getting combinations...")
task.set_progress_value(0.1)
Expand Down Expand Up @@ -103,16 +98,16 @@ def reset_flag():

class Heuristic:
RANDOM, INFO_GAIN = 0, 1
type = {RANDOM: "Random Search",
INFO_GAIN: "Information Gain Heuristic"}
mode = {RANDOM: "Random Search",
INFO_GAIN: "Low Information Gain First"}

def __init__(self, weights, type=None):
def __init__(self, weights, mode=RANDOM):
self.n_attributes = len(weights)
self.attributes = np.arange(self.n_attributes)
if type == self.RANDOM:
np.random.shuffle(self.attributes)
if type == self.INFO_GAIN:
if mode == Heuristic.INFO_GAIN:
self.attributes = self.attributes[np.argsort(weights)]
else:
np.random.shuffle(self.attributes)

def generate_states(self):
# prioritize two mid ranked attributes over highest first
Expand Down Expand Up @@ -196,7 +191,6 @@ class OWInteractions(OWWidget, ConcurrentWidgetMixin):
name = "Interactions"
description = "Compute all pairwise attribute interactions."
icon = "icons/Interactions.svg"
category = "Unsupervised"

class Inputs:
data = Input("Data", Table)
Expand All @@ -208,8 +202,8 @@ class Outputs:
selection = ContextSetting([])
feature: Variable
feature = ContextSetting(None)
heuristic_type: int
heuristic_type = Setting(0)
heuristic_mode: int
heuristic_mode = Setting(0)

want_main_area = False
want_control_area = True
Expand All @@ -230,16 +224,16 @@ def __init__(self):
self.saved_state = None
self.progress = 0

self.data = None # type: Table
self.pp_data = None # type: Table
self.original_domain: Domain = ...
self.data: Table = ...
self.n_attrs = 0

self.scorer = None
self.heuristic = None
self.feature_index = None

gui.comboBox(self.controlArea, self, "heuristic_type",
items=Heuristic.type.values(),
gui.comboBox(self.controlArea, self, "heuristic_mode",
items=Heuristic.mode.values(),
callback=self.on_heuristic_combo_changed,)

self.feature_model = DomainModel(order=DomainModel.ATTRIBUTES,
Expand Down Expand Up @@ -279,8 +273,8 @@ def set_data(self, data):
self.closeContext()
self.clear_messages()
self.selection = {}
self.data = data
self.pp_data = None
self.original_domain = data and data.domain
self.data = None
self.n_attrs = 0
if data is not None:
if len(data) < 2:
Expand All @@ -289,20 +283,20 @@ def set_data(self, data):
self.Warning.no_class_var()
else:
remover = Remove(Remove.RemoveConstant)
pp_data = Discretize()(remover(data))
data = Discretize()(remover(data))
if remover.attr_results["removed"]:
self.Information.removed_cons_feat()
if len(pp_data.domain.attributes) < 2:
if len(data.domain.attributes) < 2:
self.Warning.not_enough_vars()
else:
self.pp_data = pp_data
self.n_attrs = len(pp_data.domain.attributes)
self.scorer = InteractionScorer(pp_data)
self.heuristic = Heuristic(self.scorer.information_gain, self.heuristic_type)
self.model.set_domain(pp_data.domain)
self.data = data
self.n_attrs = len(data.domain.attributes)
self.scorer = InteractionScorer(data)
self.heuristic = Heuristic(self.scorer.information_gain, self.heuristic_mode)
self.model.set_domain(data.domain)
self.proxy.scorer = self.scorer
self.feature_model.set_domain(self.pp_data and self.pp_data.domain)
self.openContext(self.pp_data)
self.feature_model.set_domain(self.data and self.data.domain)
self.openContext(self.data)
self.initialize()

def initialize(self):
Expand All @@ -316,17 +310,17 @@ def initialize(self):
self.model.clear()
self.filter.setText("")
self.button.setText("Start")
self.button.setEnabled(self.pp_data is not None)
if self.pp_data is not None:
self.button.setEnabled(self.data is not None)
if self.data is not None:
self.toggle()

def commit(self):
if self.data is None:
if self.original_domain is None:
self.Outputs.features.send(None)
return

self.Outputs.features.send(AttributeList(
[self.data.domain[attr] for attr in self.selection]))
[self.original_domain[attr] for attr in self.selection]))

def toggle(self):
self.keep_running = not self.keep_running
Expand Down Expand Up @@ -373,18 +367,19 @@ def on_filter_changed(self, text):
self.proxy.setFilterFixedString(text)

def on_feature_combo_changed(self):
self.feature_index = self.feature and self.pp_data.domain.index(self.feature)
self.feature_index = self.feature and self.data.domain.index(self.feature)
self.initialize()

def on_heuristic_combo_changed(self):
if self.pp_data is not None:
self.heuristic = Heuristic(self.scorer.information_gain, self.heuristic_type)
if self.data is not None:
self.heuristic = Heuristic(self.scorer.information_gain, self.heuristic_mode)
self.initialize()

def compute_score(self, state):
scores = (self.scorer(*state),
self.scorer.information_gain[state[0]],
self.scorer.information_gain[state[1]])
attr1, attr2 = state
scores = (self.scorer(attr1, attr2),
self.scorer.information_gain[attr1],
self.scorer.information_gain[attr2])
return tuple(self.scorer.normalize(score) for score in scores)

@staticmethod
Expand Down Expand Up @@ -420,7 +415,7 @@ def on_partial_result(self, result):
add_to_model, latest_state = result
if add_to_model:
self.saved_state = latest_state
self.model.append(add_to_model)
self.model.extend(add_to_model)
self.progress = len(self.model)
self.progressBarSet(self.progress * 100 // self.state_count())

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def test_heuristic(self):
"""Check attribute pairs returned by heuristic"""
scorer = InteractionScorer(self.zoo)
heuristic = Heuristic(scorer.information_gain,
type=Heuristic.INFO_GAIN)
Heuristic.INFO_GAIN)
self.assertListEqual(list(heuristic.get_states(None))[:9],
[(14, 6), (14, 10), (14, 15), (6, 10),
(14, 5), (6, 15), (14, 11), (6, 5), (10, 15)])
Expand Down

0 comments on commit 3e8d853

Please sign in to comment.