From 3a85c85d37eb9c38a8771a7b3f20eec583264590 Mon Sep 17 00:00:00 2001 From: nikicc Date: Fri, 18 Aug 2017 12:12:36 +0200 Subject: [PATCH 1/4] OWCorpus: Add table input --- orangecontrib/text/widgets/owcorpus.py | 54 +++++++++++++++++++------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/orangecontrib/text/widgets/owcorpus.py b/orangecontrib/text/widgets/owcorpus.py index d86b70983..112c91f44 100644 --- a/orangecontrib/text/widgets/owcorpus.py +++ b/orangecontrib/text/widgets/owcorpus.py @@ -1,11 +1,12 @@ import os +from Orange.data import Table from Orange.data.io import FileFormat from Orange.widgets import gui from Orange.widgets.utils.itemmodels import VariableListModel from Orange.widgets.data.owselectcolumns import VariablesListItemView from Orange.widgets.settings import Setting, ContextSetting, PerfectDomainContextHandler -from Orange.widgets.widget import OWWidget, Msg, Output +from Orange.widgets.widget import OWWidget, Msg, Input, Output from orangecontrib.text.corpus import Corpus, get_sample_corpora_dir from orangecontrib.text.widgets.utils import widgets @@ -17,8 +18,11 @@ class OWCorpus(OWWidget): priority = 100 replaces = ["orangecontrib.text.widgets.owloadcorpus.OWLoadCorpus"] + class Inputs: + data = Input('Data', Table) + class Outputs: - corpus = Output("Corpus", Corpus) + corpus = Output('Corpus', Corpus) want_main_area = False resizing_enabled = True @@ -89,33 +93,53 @@ def __init__(self): # Documentation Data Sets & Report box = gui.hBox(self.controlArea) - gui.button(box, self, "Browse documentation corpora", - callback=lambda: self.file_widget.browse( - get_sample_corpora_dir()), - autoDefault=False, - ) + self.browse_documentation = gui.button( + box, self, "Browse documentation corpora", + callback=lambda: self.file_widget.browse( + get_sample_corpora_dir()), + autoDefault=False, + ) box.layout().addWidget(self.report_button) # load first file self.file_widget.select(0) - def open_file(self, path): + @Inputs.data + def set_data(self, data): + have_data = data is not None + + # Enable/Disable command when data from input + self.file_widget.setEnabled(not have_data) + self.browse_documentation.setEnabled(not have_data) + + if have_data: + self.open_file(data=data) + else: + self.file_widget.reload() + + def open_file(self, path=None, data=None): self.closeContext() self.Error.read_file.clear() self.used_attrs_model[:] = [] self.unused_attrs_model[:] = [] - if path: + if data: + self.corpus = Corpus.from_table(data.domain, data) + elif path: try: self.corpus = Corpus.from_file(path) self.corpus.name = os.path.splitext(os.path.basename(path))[0] - self.update_info() - self.used_attrs = list(self.corpus.text_features) - self.openContext(self.corpus) - self.used_attrs_model.extend(self.used_attrs) - self.unused_attrs_model.extend([f for f in self.corpus.domain.metas - if f.is_string and f not in self.used_attrs_model]) except BaseException as err: self.Error.read_file(path, str(err)) + else: + return + + self.update_info() + self.used_attrs = list(self.corpus.text_features) + self.openContext(self.corpus) + self.used_attrs_model.extend(self.used_attrs) + self.unused_attrs_model.extend( + [f for f in self.corpus.domain.metas + if f.is_string and f not in self.used_attrs_model]) def update_info(self): def describe(corpus): From 5be117e0acfc00dc0aee1b352408162c10b25020 Mon Sep 17 00:00:00 2001 From: nikicc Date: Fri, 18 Aug 2017 14:19:33 +0200 Subject: [PATCH 2/4] OWCorpus: Error when no text features --- orangecontrib/text/widgets/owcorpus.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/orangecontrib/text/widgets/owcorpus.py b/orangecontrib/text/widgets/owcorpus.py index 112c91f44..28d7340f6 100644 --- a/orangecontrib/text/widgets/owcorpus.py +++ b/orangecontrib/text/widgets/owcorpus.py @@ -49,6 +49,7 @@ class Outputs: class Error(OWWidget.Error): read_file = Msg("Can't read file {} ({})") + corpus_without_text_features = Msg("Corpus doesn't have any textual features.") def __init__(self): super().__init__() @@ -119,7 +120,7 @@ def set_data(self, data): def open_file(self, path=None, data=None): self.closeContext() - self.Error.read_file.clear() + self.Error.clear() self.used_attrs_model[:] = [] self.unused_attrs_model[:] = [] if data: @@ -135,6 +136,10 @@ def open_file(self, path=None, data=None): self.update_info() self.used_attrs = list(self.corpus.text_features) + if not self.corpus.text_features: + self.Error.corpus_without_text_features() + self.Outputs.corpus.send(None) + return self.openContext(self.corpus) self.used_attrs_model.extend(self.used_attrs) self.unused_attrs_model.extend( From 8c3ee9c8d1402a17222dee8d6c5b88c056efbb3d Mon Sep 17 00:00:00 2001 From: nikicc Date: Fri, 18 Aug 2017 14:57:42 +0200 Subject: [PATCH 3/4] OWCorpus: Error when none text feats used --- orangecontrib/text/widgets/owcorpus.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/orangecontrib/text/widgets/owcorpus.py b/orangecontrib/text/widgets/owcorpus.py index 28d7340f6..10a76b7b2 100644 --- a/orangecontrib/text/widgets/owcorpus.py +++ b/orangecontrib/text/widgets/owcorpus.py @@ -49,6 +49,7 @@ class Outputs: class Error(OWWidget.Error): read_file = Msg("Can't read file {} ({})") + no_text_features_used = Msg("At least one text feature must be used.") corpus_without_text_features = Msg("Corpus doesn't have any textual features.") def __init__(self): @@ -121,8 +122,8 @@ def set_data(self, data): def open_file(self, path=None, data=None): self.closeContext() self.Error.clear() - self.used_attrs_model[:] = [] self.unused_attrs_model[:] = [] + self.used_attrs_model[:] = [] if data: self.corpus = Corpus.from_table(data.domain, data) elif path: @@ -173,6 +174,7 @@ def describe(corpus): self.info_label.setText(describe(self.corpus)) def update_feature_selection(self): + self.Error.no_text_features_used.clear() # TODO fix VariablesListItemView so it does not emit # duplicated data when reordering inside a single window def remove_duplicates(l): @@ -187,9 +189,14 @@ def remove_duplicates(l): remove_duplicates(self.used_attrs_model)) self.used_attrs = list(self.used_attrs_model) + if len(self.unused_attrs_model) > 0 and not self.corpus.text_features: + self.Error.no_text_features_used() + # prevent sending "empty" corpora dom = self.corpus.domain - empty = not (dom.variables or dom.metas) or len(self.corpus) == 0 + empty = not (dom.variables or dom.metas) \ + or len(self.corpus) == 0 \ + or not self.corpus.text_features self.Outputs.corpus.send(self.corpus if not empty else None) def send_report(self): From 521f178297293c7d9d024ed9ed90ce7e067acb15 Mon Sep 17 00:00:00 2001 From: nikicc Date: Fri, 18 Aug 2017 15:06:42 +0200 Subject: [PATCH 4/4] OWCorpusViewer: Change input to Corpus --- orangecontrib/text/widgets/owcorpusviewer.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/orangecontrib/text/widgets/owcorpusviewer.py b/orangecontrib/text/widgets/owcorpusviewer.py index 814ad0eb7..9df84ef66 100644 --- a/orangecontrib/text/widgets/owcorpusviewer.py +++ b/orangecontrib/text/widgets/owcorpusviewer.py @@ -9,7 +9,6 @@ QAbstractItemView, QHeaderView, QSplitter, QApplication) -from Orange.data import Table from Orange.data.domain import filter_visible from Orange.widgets import gui, widget from Orange.widgets.settings import Setting, ContextSetting, PerfectDomainContextHandler @@ -24,7 +23,7 @@ class OWCorpusViewer(OWWidget): priority = 500 class Inputs: - data = Input("Data", Table) + corpus = Input("Corpus", Corpus) class Outputs: matching_docs = Output("Matching Docs", Corpus, default=True) @@ -123,15 +122,13 @@ def copy_to_clipboard(self): text = self.doc_webview.selectedText() QApplication.clipboard().setText(text) - @Inputs.data - def set_data(self, data=None): + @Inputs.corpus + def set_data(self, corpus=None): self.closeContext() self.reset_widget() - self.corpus = data + self.corpus = corpus self.search_features = [] - if data is not None: - if not isinstance(data, Corpus): - self.corpus = Corpus.from_table(data.domain, data) + if corpus is not None: domain = self.corpus.domain # Enable/disable tokens checkbox if not self.corpus.has_tokens():