Skip to content

Commit

Permalink
Merge pull request #6930 from janezd/dataset-filter-overrides-domain
Browse files Browse the repository at this point in the history
[ENH] Datasets: Let the filter override domain and language
  • Loading branch information
markotoplak authored Nov 15, 2024
2 parents 0e10bfd + f0aa6a0 commit 87334be
Show file tree
Hide file tree
Showing 3 changed files with 168 additions and 46 deletions.
97 changes: 68 additions & 29 deletions Orange/widgets/data/owdatasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@
GENERAL_DOMAIN = None
ALL_DOMAINS = "" # The setting is Optional[str], so don't use other types here

# The number of characters at which filter overrides the domain and language
FILTER_OVERRIDE_LENGTH = 4


def ensure_local(index_url, file_path, local_cache_path,
force=False, progress_advance=None):
Expand Down Expand Up @@ -178,16 +181,17 @@ def domain(self):
def filterAcceptsRow(self, row, parent):
source = self.sourceModel()
data = source.index(row, 0).data(Qt.UserRole)
return (super().filterAcceptsRow(row, parent)
and (self.__language is None or data.language == self.__language)
and self.__domain in (ALL_DOMAINS, data.domain)
and (data.publication_status == Namespace.PUBLISHED or (
self.__filter is not None
and len(self.__filter) >= 5
and data.title.casefold().startswith(self.__filter)
))
in_filter = (
self.__filter is not None
and len(self.__filter) >= FILTER_OVERRIDE_LENGTH
and self.__filter in data.title.casefold()
)

published_ok = data.publication_status == Namespace.PUBLISHED
domain_ok = self.__domain in (ALL_DOMAINS, data.domain)
language_ok = self.__language in (None, data.language)
return (super().filterAcceptsRow(row, parent)
and (published_ok and domain_ok and language_ok
or in_filter))

class OWDataSets(OWWidget):
name = "Datasets"
Expand Down Expand Up @@ -237,9 +241,10 @@ class Outputs:
data = Output("Data", Orange.data.Table)

#: Selected dataset id
selected_id = Setting(None) # type: Optional[str]
selected_id: Optional[str] = Setting(None)
language = Setting(DEFAULT_LANG)
domain = Setting(GENERAL_DOMAIN)
filter_hint: Optional[str] = Setting(None)
settings_version = 2

#: main area splitter state
Expand Down Expand Up @@ -269,26 +274,40 @@ def __init__(self):
self.filterLineEdit = QLineEdit(
textChanged=self.filter, placeholderText="Search for data set ..."
)
self.filterLineEdit.setToolTip(
"Typing four letters or more overrides domain and language filters")
layout.addWidget(self.filterLineEdit)

self.combo_elements = []

layout.addSpacing(20)
layout.addWidget(QLabel("Show data sets in "))
label = QLabel("Show data sets in ")
layout.addWidget(label)
self.combo_elements.append(label)

lang_combo = self.language_combo = QComboBox()
languages = [self.DEFAULT_LANG, self.ALL_LANGUAGES]
if self.language is not None and self.language not in languages:
languages.insert(1, self.language)
lang_combo.addItems(languages)
lang_combo.setCurrentText(self.language)
if self.language is None:
lang_combo.setCurrentIndex(lang_combo.count() - 1)
else:
lang_combo.setCurrentText(self.language)
lang_combo.activated.connect(self._on_language_changed)
layout.addWidget(lang_combo)
self.combo_elements.append(lang_combo)

layout.addSpacing(20)
layout.addWidget(QLabel("Domain:"))
domain_combo = self.domain_combo = QComboBox()
domain_combo.addItem(self.GENERAL_DOMAIN_LABEL)
domain_combo.activated.connect(self._on_domain_changed)
if self.core_widget:
layout.addSpacing(20)
label = QLabel("Domain:")
layout.addWidget(label)
self.combo_elements.append(label)
layout.addWidget(domain_combo)
self.combo_elements.append(domain_combo)

self.mainArea.layout().addLayout(layout)

Expand Down Expand Up @@ -428,7 +447,7 @@ def update_language_combo(self):
if self.DEFAULT_LANG not in languages:
combo.addItem(self.DEFAULT_LANG)
combo.addItems(languages + [self.ALL_LANGUAGES])
if current_language in languages:
if current_language in languages or current_language == self.ALL_LANGUAGES:
combo.setCurrentText(current_language)
elif self.DEFAULT_LANG in languages:
combo.setCurrentText(self.DEFAULT_LANG)
Expand Down Expand Up @@ -482,23 +501,24 @@ def update_model(self):
# for settings do not use os.path.join (Windows separator is different)
if file_path[-1] == self.selected_id:
current_index = i
# for selected_id, set publication status so that unlisted data load correctly
datainfo.publication_status = Namespace.PUBLISHED
if self.core_widget:
self.domain = datainfo.domain
if self.domain == "sc": # domain from the list of ignored domain
self.domain = ALL_DOMAINS
combo = self.domain_combo
if self.domain == GENERAL_DOMAIN:
combo.setCurrentIndex(0)
elif self.domain == ALL_DOMAINS:
combo.setCurrentIndex(combo.count() - 1)
else:
combo.setCurrentText(self.domain)
self.__update_domain_combo()
self._on_domain_changed()

return model, current_index

def __update_domain_combo(self):
combo = self.domain_combo
if self.domain == GENERAL_DOMAIN:
combo.setCurrentIndex(0)
elif self.domain == ALL_DOMAINS:
combo.setCurrentIndex(combo.count() - 1)
else:
combo.setCurrentText(self.domain)

def _on_language_changed(self):
combo = self.language_combo
if combo.currentIndex() == combo.count() - 1:
Expand Down Expand Up @@ -542,6 +562,16 @@ def __set_index(self, f):

def set_model(self, model, current_index):
self.view.model().setSourceModel(model)
if current_index != -1:
for hint in (
self.filter_hint,
model.index(current_index, 0).data(Qt.UserRole).title):
if self.view.model().filterAcceptsRow(current_index,
QModelIndex()):
break
self.filterLineEdit.setText(hint)
self.filter()

self.view.selectionModel().selectionChanged.connect(
self.__on_selection
)
Expand Down Expand Up @@ -606,6 +636,18 @@ def selected_dataset(self):

def filter(self):
filter_string = self.filterLineEdit.text().strip()
enable_combos = len(filter_string) < FILTER_OVERRIDE_LENGTH
if enable_combos is not self.domain_combo.isEnabled():
for element in self.combo_elements:
element.setEnabled(enable_combos)
if enable_combos:
self.__update_domain_combo()
self.language_combo.setCurrentText(self.language)
else:
self.domain_combo.setCurrentText(self.ALL_DOMAINS_LABEL)
self.language_combo.setCurrentText(self.ALL_LANGUAGES)

self.filter_hint = filter_string
proxyModel = self.view.model()
if proxyModel:
proxyModel.setFilterFixedString(filter_string)
Expand All @@ -620,13 +662,8 @@ def __on_selection(self):
di = current.data(Qt.UserRole)
text = description_html(di)
self.descriptionlabel.setText(text)
# for settings do not use os.path.join (Windows separator is different)
self.selected_id = di.file_path[-1]
# do not clear a dataset once you select it if it was unlisted
di.publication_status = Namespace.PUBLISHED
else:
self.descriptionlabel.setText("")
self.selected_id = None

def commit(self):
"""
Expand All @@ -639,6 +676,7 @@ def commit(self):
di = self.selected_dataset()
if di is not None:
self.Error.clear()
self.selected_id = di.file_path[-1]

if self.__awaiting_state is not None:
# disconnect from the __commit_complete
Expand Down Expand Up @@ -673,6 +711,7 @@ def commit(self):
self.setBlocking(False)
self.commit_cached(di.file_path)
else:
self.selected_id = None
self.load_and_output(None)

@Slot(object)
Expand Down
97 changes: 91 additions & 6 deletions Orange/widgets/data/tests/test_owdatasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,12 @@ def wait_and_return(_):
self.assertEqual(w.selected_id, "bar.tab")
self.assertEqual(w.domain_combo.currentText(), "edu")

def __titles(self, widget):
model = widget.view.model()
return {
model.index(row, 0).data(Qt.UserRole).title
for row in range(model.rowCount())}

@patch("Orange.widgets.data.owdatasets.list_remote",
Mock(side_effect=requests.exceptions.ConnectionError))
@patch("Orange.widgets.data.owdatasets.list_local",
Expand All @@ -142,18 +148,16 @@ def wait_and_return(_):
@patch("Orange.widgets.data.owdatasets.log", Mock())
def test_filtering_unlisted(self):
def titles():
return {
model.index(row, 0).data(Qt.UserRole).title
for row in range(model.rowCount()) }
return self.__titles(w)

w = self.create_widget(OWDataSets) # type: OWDataSets
model = w.view.model()
self.assertEqual(titles(), {"a published data set", "an unp unp"})

model.setFilterFixedString("an u")
model.setFilterFixedString("unp")
self.assertEqual(titles(), {"an unp unp"})

model.setFilterFixedString("an Un")
model.setFilterFixedString("an U")
self.assertEqual(titles(), {"an unlisted data set", "an unp unp"})

model.setFilterFixedString("")
Expand All @@ -162,6 +166,71 @@ def titles():
model.setFilterFixedString(None)
self.assertEqual(titles(), {"a published data set", "an unp unp"})

@patch("Orange.widgets.data.owdatasets.list_remote",
Mock(return_value={('core', 'foo.tab'): {"title": "Foo data set",
"language": "English"},
('core', 'bar.tab'): {"title": "Bar data set",
"domain": "Testing"},
('core', 'bax.tab'): {"title": "Bax data set",
"language": "Slovenščina"}
}))
@patch("Orange.widgets.data.owdatasets.list_local",
Mock(return_value={}))
@patch("Orange.widgets.data.owdatasets.OWDataSets.commit", Mock())
def test_filter_overrides_language_and_domain(self):
w = self.create_widget(OWDataSets) # type: OWDataSets
self.wait_until_stop_blocking(w)
w.language_combo.setCurrentText("Slovenščina")
w.language_combo.activated.emit(w.language_combo.currentIndex())
w.domain_combo.setCurrentText(w.GENERAL_DOMAIN_LABEL)
w.domain_combo.activated.emit(w.domain_combo.currentIndex())

self.assertEqual(self.__titles(w), {"Bax data set"})

w.filterLineEdit.setText("data ")
self.assertEqual(self.__titles(w), {"Foo data set",
"Bar data set",
"Bax data set"})
self.assertEqual(w.language_combo.currentText(), w.ALL_LANGUAGES)
self.assertFalse(w.language_combo.isEnabled())
self.assertEqual(w.domain_combo.currentText(), w.ALL_DOMAINS_LABEL)
self.assertFalse(w.domain_combo.isEnabled())

w.filterLineEdit.setText("da")
self.assertEqual(self.__titles(w), {"Bax data set"})
self.assertEqual(w.language_combo.currentText(), "Slovenščina")
self.assertTrue(w.language_combo.isEnabled())
self.assertEqual(w.domain_combo.currentText(), w.GENERAL_DOMAIN_LABEL)
self.assertTrue(w.domain_combo.isEnabled())


w.filterLineEdit.setText("bar d")
self.assertEqual(self.__titles(w), {"Bar data set"})

w.filterLineEdit.setText("bax d")
self.assertEqual(self.__titles(w), {"Bax data set"})

w.language_combo.setCurrentText("English")
w.language_combo.activated.emit(2)
self.assertEqual(self.__titles(w), {"Bax data set"})

settings = w.settingsHandler.pack_data(w)

w2 = self.create_widget(OWDataSets, stored_settings=settings)
self.wait_until_stop_blocking(w2)
self.assertEqual(w2.language_combo.currentText(), "English")
self.assertEqual(self.__titles(w2), {"Foo data set"})

w.selected_id = "bax.tab"
settings = w.settingsHandler.pack_data(w)
w2 = self.create_widget(OWDataSets, stored_settings=settings)
self.wait_until_stop_blocking(w2)
self.assertEqual(w2.language_combo.currentText(), w2.ALL_LANGUAGES)
self.assertFalse(w2.language_combo.isEnabled())
self.assertEqual(w2.filterLineEdit.text(), "bax d")
self.assertEqual(self.__titles(w2), {"Bax data set"})


@patch("Orange.widgets.data.owdatasets.list_remote",
Mock(return_value={('core', 'foo.tab'): {"language": "English"},
('core', 'bar.tab'): {"language": "Slovenščina"}}))
Expand All @@ -183,6 +252,22 @@ def test_remember_language(self):
self.wait_until_stop_blocking(w2)
self.assertEqual(w2.language_combo.currentText(), "Klingon")

@patch("Orange.widgets.data.owdatasets.list_remote",
Mock(return_value={('core', 'foo.tab'): {"language": "English"},
('core', 'bar.tab'): {"language": "Slovenščina"}}))
@patch("Orange.widgets.data.owdatasets.list_local",
Mock(return_value={}))
def test_remember_all_languages(self):
w = self.create_widget(OWDataSets) # type: OWDataSets
self.wait_until_stop_blocking(w)
w.language_combo.setCurrentText(w.ALL_LANGUAGES)
w.language_combo.activated.emit(w.language_combo.currentIndex())
settings = w.settingsHandler.pack_data(w)

w2 = self.create_widget(OWDataSets, stored_settings=settings)
self.wait_until_stop_blocking(w2)
self.assertEqual(w2.language_combo.currentText(), w2.ALL_LANGUAGES)

@patch("Orange.widgets.data.owdatasets.list_remote",
Mock(return_value={('core', 'iris.tab'): {}}))
@patch("Orange.widgets.data.owdatasets.list_local",
Expand All @@ -196,8 +281,8 @@ def test_download_iris(self):
# select the only dataset
sel_type = QItemSelectionModel.ClearAndSelect | QItemSelectionModel.Rows
w.view.selectionModel().select(w.view.model().index(0, 0), sel_type)
self.assertEqual(w.selected_id, "iris.tab")
w.commit()
self.assertEqual(w.selected_id, "iris.tab")
iris = self.get_output(w.Outputs.data, w)
self.assertEqual(len(iris), 150)

Expand Down
Loading

0 comments on commit 87334be

Please sign in to comment.