Skip to content

Commit

Permalink
Check for dropped tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewdalpino committed Oct 15, 2024
1 parent 389066a commit a453675
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion tests/test_tokenizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ def test_tokenize(self):
for i, token in enumerate(tokens):
self.assertEqual(token, expected[i])

self.assertEqual(tokenizer.dropped, 6)

class TestCanonical(unittest.TestCase):
def test_tokenize(self):
tokenizer = tokenizers.Canonical(tokenizers.Kmer(k=6))
Expand All @@ -33,4 +35,7 @@ def test_tokenize(self):
expected = ['CGGT', 'TCAG', 'TAAT']

for i, token in enumerate(tokens):
self.assertEqual(token, expected[i])
self.assertEqual(token, expected[i])

self.assertEqual(tokenizer.dropped, 1)

0 comments on commit a453675

Please sign in to comment.