cleanup

biolab · Nov 23, 2023 · 3e8d853 · 3e8d853
1 parent c4f9c21
commit 3e8d853
Show file tree

Hide file tree

Showing 4 changed files with 73 additions and 76 deletions.
diff --git a/orangecontrib/prototypes/interactions.py b/orangecontrib/prototypes/interactions.py
@@ -1,7 +1,7 @@
 import numpy as np
 
 
-def get_row_ids(ar):
+def hash_rows(ar):
     row_ids = ar[:, 0].copy()
     # Assuming the data has been discretized into fewer
     # than 10000 bins and that `ar` has up to 3 columns,
@@ -30,7 +30,7 @@ def distribution(ar):
         # implementation doesn't release the GIL. The simplest
         # solution seems to be generating unique numbers/ids
         # based on the contents of each row.
-        ar = get_row_ids(ar)
+        ar = hash_rows(ar)
 
     _, counts = np.unique(ar, return_counts=True)
     return counts / ar.shape[0]
@@ -47,9 +47,9 @@ def __init__(self, data):
         self.class_entropy = 0
         self.information_gain = np.zeros(data.X.shape[1])
 
-        self.precompute()
+        self.preprocess()
 
-    def precompute(self):
+    def preprocess(self):
         """
         Precompute information gain of each attribute to speed up
         computation and to create heuristic.
@@ -68,12 +68,12 @@ def precompute(self):
                                - entropy(np.column_stack((self.data.X[:, attr], self.data.Y)))
 
     def __call__(self, attr1, attr2):
-        attrs = np.column_stack((self.data.X[:, attr1], self.data.X[:, attr2]))
+        attrs = self.data.X[:, (attr1, attr2)]
         return self.class_entropy \
-               - self.information_gain[attr1] \
-               - self.information_gain[attr2] \
-               + entropy(attrs) \
-               - entropy(np.column_stack((attrs, self.data.Y)))
+            - self.information_gain[attr1] \
+            - self.information_gain[attr2] \
+            + entropy(attrs) \
+            - entropy(np.column_stack((attrs, self.data.Y)))
 
     def normalize(self, score):
         return score / self.class_entropy
diff --git a/orangecontrib/prototypes/ranktablemodel.py b/orangecontrib/prototypes/ranktablemodel.py
@@ -39,7 +39,8 @@ def __init__(self, *args, **kwargs):
         # ``rowCount`` returns the lowest of `_rows` and `_max_view_rows`:
         # how large the model/view thinks it is
 
-    def sortInd(self):
+    @property
+    def __sortInd(self):
         return self._AbstractSortTableModel__sortInd
 
     def sortColumnData(self, column):
@@ -49,7 +50,7 @@ def extendSortFrom(self, sorted_rows: int):
         data = self.sortColumnData(self.sortColumn())
         new_ind = np.arange(sorted_rows, self._rows)
         order = 1 if self.sortOrder() == Qt.AscendingOrder else -1
-        sorter = self.sortInd()[::order]
+        sorter = self.__sortInd[::order]
         new_sorter = np.argsort(data[sorted_rows:])
         loc = np.searchsorted(data[:sorted_rows],
                               data[sorted_rows:][new_sorter],
@@ -88,18 +89,21 @@ def clear(self):
         self.resetSorting()
         self.endResetModel()
 
-    def append(self, rows: list[list[float]]):
+    def extend(self, rows: list[list[float]]):
         if not isinstance(self._data, np.ndarray):
-            return self.initialize(rows)
+            self.initialize(rows)
+            return
 
         n_rows = len(rows)
         if n_rows == 0:
             return
+
         n_data = len(self._data)
         insert = self._rows < self._max_view_rows
 
         if insert:
-            self.beginInsertRows(QModelIndex(), self._rows, min(self._max_view_rows, self._rows + n_rows) - 1)
+            self.beginInsertRows(QModelIndex(), self._rows,
+                                 min(self._max_view_rows, self._rows + n_rows) - 1)
 
         if self._rows + n_rows >= n_data:
             n_data = min(max(n_data + n_rows, 2 * n_data), self._max_data_rows)
@@ -126,11 +130,9 @@ class RankModel(ArrayTableModel):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
-        self.domain = None  # type: Domain
         self.domain_model = DomainModel(DomainModel.ATTRIBUTES)
 
     def set_domain(self, domain: Domain):
-        self.domain = domain
         self.domain_model.set_domain(domain)
         n_attrs = len(domain.attributes)
         self._max_data_rows = n_attrs * (n_attrs - 1) // 2
@@ -143,16 +145,16 @@ def resetSorting(self):
 
     def data(self, index: QModelIndex, role=Qt.DisplayRole):
         if not index.isValid():
-            return
+            return None
 
         column = index.column()
-
         if column >= self.columnCount() - 2 and role != Qt.EditRole:
+            # use domain model for all data (except editrole) in last two columns
             try:
                 row = self.mapToSourceRows(index.row())
                 value = self.domain_model.index(int(self._data[row, column]))
                 return self.domain_model.data(value, role)
             except IndexError:
-                return
+                return None
 
         return super().data(index, role)
diff --git a/orangecontrib/prototypes/widgets/owinteractions.py b/orangecontrib/prototypes/widgets/owinteractions.py
@@ -9,7 +9,10 @@
 from AnyQt.QtWidgets import QTableView, QHeaderView, \
     QStyleOptionViewItem, QApplication, QStyle, QLineEdit
 
-from Orange.data import Table, Variable
+from orangecontrib.prototypes.ranktablemodel import RankModel
+from orangecontrib.prototypes.interactions import InteractionScorer
+
+from Orange.data import Table, Domain, Variable
 from Orange.preprocess import Discretize, Remove
 from Orange.widgets import gui
 from Orange.widgets.widget import OWWidget, AttributeList, Msg
@@ -19,31 +22,23 @@
 from Orange.widgets.utils.itemmodels import DomainModel
 from Orange.widgets.settings import Setting, ContextSetting, DomainContextHandler
 
-from orangecontrib.prototypes.ranktablemodel import RankModel
-from orangecontrib.prototypes.interactions import InteractionScorer
-
 
 class ModelQueue:
-    """
-    Another queueing object, similar to ``queue.Queue``.
-    The main difference is that ``get()`` returns all its
-    contents at the same time, instead of one by one.
-    """
     def __init__(self):
-        self.lock = Lock()
-        self.model = []
-        self.state = None
+        self.mutex = Lock()
+        self.queue = []
+        self.latest_state = None
 
     def put(self, row, state):
-        with self.lock:
-            self.model.append(row)
-            self.state = state
+        with self.mutex:
+            self.queue.append(row)
+            self.latest_state = state
 
     def get(self):
-        with self.lock:
-            model, self.model = self.model, []
-            state, self.state = self.state, None
-        return model, state
+        with self.mutex:
+            queue, self.queue = self.queue, []
+            state, self.latest_state = self.latest_state, None
+        return queue, state
 
 
 def run(compute_score: Callable, row_for_state: Callable,
@@ -52,8 +47,8 @@ def run(compute_score: Callable, row_for_state: Callable,
     """
     Replaces ``run_vizrank``, with some minor adjustments.
         - ``ModelQueue`` replaces ``queue.Queue``
-        - `row_for_state` parameter added
-        - `scores` parameter removed
+        - `row_for_state` can be called here, assuming we are not adding `Qt` objects to the model
+        - `scores` removed
     """
     task.set_status("Getting combinations...")
     task.set_progress_value(0.1)
@@ -103,16 +98,16 @@ def reset_flag():
 
 class Heuristic:
     RANDOM, INFO_GAIN = 0, 1
-    type = {RANDOM: "Random Search",
-            INFO_GAIN: "Information Gain Heuristic"}
+    mode = {RANDOM: "Random Search",
+            INFO_GAIN: "Low Information Gain First"}
 
-    def __init__(self, weights, type=None):
+    def __init__(self, weights, mode=RANDOM):
         self.n_attributes = len(weights)
         self.attributes = np.arange(self.n_attributes)
-        if type == self.RANDOM:
-            np.random.shuffle(self.attributes)
-        if type == self.INFO_GAIN:
+        if mode == Heuristic.INFO_GAIN:
             self.attributes = self.attributes[np.argsort(weights)]
+        else:
+            np.random.shuffle(self.attributes)
 
     def generate_states(self):
         # prioritize two mid ranked attributes over highest first
@@ -196,7 +191,6 @@ class OWInteractions(OWWidget, ConcurrentWidgetMixin):
     name = "Interactions"
     description = "Compute all pairwise attribute interactions."
     icon = "icons/Interactions.svg"
-    category = "Unsupervised"
 
     class Inputs:
         data = Input("Data", Table)
@@ -208,8 +202,8 @@ class Outputs:
     selection = ContextSetting([])
     feature: Variable
     feature = ContextSetting(None)
-    heuristic_type: int
-    heuristic_type = Setting(0)
+    heuristic_mode: int
+    heuristic_mode = Setting(0)
 
     want_main_area = False
     want_control_area = True
@@ -230,16 +224,16 @@ def __init__(self):
         self.saved_state = None
         self.progress = 0
 
-        self.data = None  # type: Table
-        self.pp_data = None  # type: Table
+        self.original_domain: Domain = ...
+        self.data: Table = ...
         self.n_attrs = 0
 
         self.scorer = None
         self.heuristic = None
         self.feature_index = None
 
-        gui.comboBox(self.controlArea, self, "heuristic_type",
-                     items=Heuristic.type.values(),
+        gui.comboBox(self.controlArea, self, "heuristic_mode",
+                     items=Heuristic.mode.values(),
                      callback=self.on_heuristic_combo_changed,)
 
         self.feature_model = DomainModel(order=DomainModel.ATTRIBUTES,
@@ -279,8 +273,8 @@ def set_data(self, data):
         self.closeContext()
         self.clear_messages()
         self.selection = {}
-        self.data = data
-        self.pp_data = None
+        self.original_domain = data and data.domain
+        self.data = None
         self.n_attrs = 0
         if data is not None:
             if len(data) < 2:
@@ -289,20 +283,20 @@ def set_data(self, data):
                 self.Warning.no_class_var()
             else:
                 remover = Remove(Remove.RemoveConstant)
-                pp_data = Discretize()(remover(data))
+                data = Discretize()(remover(data))
                 if remover.attr_results["removed"]:
                     self.Information.removed_cons_feat()
-                if len(pp_data.domain.attributes) < 2:
+                if len(data.domain.attributes) < 2:
                     self.Warning.not_enough_vars()
                 else:
-                    self.pp_data = pp_data
-                    self.n_attrs = len(pp_data.domain.attributes)
-                    self.scorer = InteractionScorer(pp_data)
-                    self.heuristic = Heuristic(self.scorer.information_gain, self.heuristic_type)
-                    self.model.set_domain(pp_data.domain)
+                    self.data = data
+                    self.n_attrs = len(data.domain.attributes)
+                    self.scorer = InteractionScorer(data)
+                    self.heuristic = Heuristic(self.scorer.information_gain, self.heuristic_mode)
+                    self.model.set_domain(data.domain)
                     self.proxy.scorer = self.scorer
-        self.feature_model.set_domain(self.pp_data and self.pp_data.domain)
-        self.openContext(self.pp_data)
+        self.feature_model.set_domain(self.data and self.data.domain)
+        self.openContext(self.data)
         self.initialize()
 
     def initialize(self):
@@ -316,17 +310,17 @@ def initialize(self):
         self.model.clear()
         self.filter.setText("")
         self.button.setText("Start")
-        self.button.setEnabled(self.pp_data is not None)
-        if self.pp_data is not None:
+        self.button.setEnabled(self.data is not None)
+        if self.data is not None:
             self.toggle()
 
     def commit(self):
-        if self.data is None:
+        if self.original_domain is None:
             self.Outputs.features.send(None)
             return
 
         self.Outputs.features.send(AttributeList(
-            [self.data.domain[attr] for attr in self.selection]))
+            [self.original_domain[attr] for attr in self.selection]))
 
     def toggle(self):
         self.keep_running = not self.keep_running
@@ -373,18 +367,19 @@ def on_filter_changed(self, text):
         self.proxy.setFilterFixedString(text)
 
     def on_feature_combo_changed(self):
-        self.feature_index = self.feature and self.pp_data.domain.index(self.feature)
+        self.feature_index = self.feature and self.data.domain.index(self.feature)
         self.initialize()
 
     def on_heuristic_combo_changed(self):
-        if self.pp_data is not None:
-            self.heuristic = Heuristic(self.scorer.information_gain, self.heuristic_type)
+        if self.data is not None:
+            self.heuristic = Heuristic(self.scorer.information_gain, self.heuristic_mode)
         self.initialize()
 
     def compute_score(self, state):
-        scores = (self.scorer(*state),
-                  self.scorer.information_gain[state[0]],
-                  self.scorer.information_gain[state[1]])
+        attr1, attr2 = state
+        scores = (self.scorer(attr1, attr2),
+                  self.scorer.information_gain[attr1],
+                  self.scorer.information_gain[attr2])
         return tuple(self.scorer.normalize(score) for score in scores)
 
     @staticmethod
@@ -420,7 +415,7 @@ def on_partial_result(self, result):
         add_to_model, latest_state = result
         if add_to_model:
             self.saved_state = latest_state
-            self.model.append(add_to_model)
+            self.model.extend(add_to_model)
             self.progress = len(self.model)
             self.progressBarSet(self.progress * 100 // self.state_count())
 

diff --git a/orangecontrib/prototypes/widgets/tests/test_owinteractions.py b/orangecontrib/prototypes/widgets/tests/test_owinteractions.py
@@ -265,7 +265,7 @@ def test_heuristic(self):
         """Check attribute pairs returned by heuristic"""
         scorer = InteractionScorer(self.zoo)
         heuristic = Heuristic(scorer.information_gain,
-                              type=Heuristic.INFO_GAIN)
+                              Heuristic.INFO_GAIN)
         self.assertListEqual(list(heuristic.get_states(None))[:9],
                              [(14, 6), (14, 10), (14, 15), (6, 10),
                               (14, 5), (6, 15), (14, 11), (6, 5), (10, 15)])