Skip to content

Commit

Permalink
Merge pull request #457 from PrimozGodec/remove-extend
Browse files Browse the repository at this point in the history
[FIX] Remove extend function from corpus
  • Loading branch information
ajdapretnar authored Oct 1, 2019
2 parents 70be2b8 + 7e6dc50 commit 8e72b30
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 41 deletions.
16 changes: 0 additions & 16 deletions orangecontrib/text/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,22 +112,6 @@ def _infer_text_features(self):
include_feats.append(first)
self.set_text_features(include_feats)

def extend(self, instances):
if self.domain != instances.domain:
raise NotImplementedError(
'Extending corpora with different domains is not supported.')
super().extend(instances)
if self._tokens is None or instances._tokens is None:
self._tokens = None
else:
self._tokens = np.append(self._tokens, instances._tokens)
self._dictionary = corpora.Dictionary(self._tokens)
if self.pos_tags is None or instances.pos_tags is None:
self.pos_tags = None
else:
self.pos_tags = np.append(self.pos_tags, instances.pos_tags)
self._ngrams_corpus = None # Todo: extend instead of reset

def extend_corpus(self, metadata, Y):
"""
Append documents to corpus.
Expand Down
25 changes: 0 additions & 25 deletions orangecontrib/text/tests/test_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,31 +62,6 @@ def test_corpus_from_init(self):
c2 = Corpus(c.domain, c.X, c.Y, c.metas, c.text_features)
self.assertEqual(c, c2)

@unittest.skipIf(LooseVersion(Orange.__version__) < LooseVersion('3.4.3'),
'Not supported in versions of Orange below 3.4.3')
def test_extend(self):
c = Corpus.from_file('deerwester')
c2 = c[:5]
self.assertEqual(len(c2), 5)
n = len(c)
self.pos_tagger.tag_corpus(c)
self.assertIsNot(c._tokens, None)
self.assertIsNot(c.pos_tags, None)
self.assertIs(c2._tokens, None)
self.assertIs(c2.pos_tags, None)

c.extend(c2)
self.assertEqual(len(c), n + 5)
self.assertIs(c._tokens, None)
self.assertIs(c.pos_tags, None)

self.pos_tagger.tag_corpus(c)
self.pos_tagger.tag_corpus(c2)
c.extend(c2)
self.assertEqual(len(c), n + 10)
self.assertEqual(len(c._tokens), n + 10)
self.assertEqual(len(c.pos_tags), n + 10)

def test_extend_corpus(self):
c = Corpus.from_file('book-excerpts')
n_classes = len(c.domain.class_var.values)
Expand Down

0 comments on commit 8e72b30

Please sign in to comment.