Skip to content

Commit

Permalink
fix bug
Browse files Browse the repository at this point in the history
  • Loading branch information
Taka008 committed Aug 13, 2023
1 parent 327c78b commit fd27790
Showing 1 changed file with 2 additions and 5 deletions.
7 changes: 2 additions & 5 deletions src/kwja/utils/seq2seq_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,8 @@ def tokenize(self, mrph_lines: List[List[str]], tgt_mrphs: Dict[str, Dict[str, s
tokenized: List[str] = [x for x in self.tokenizer.tokenize(mrph) if x != "▁"] + [
special_tokens[idx_in_mrph]
]
if is_partial:
if partial_anno_type == "canon" or (partial_anno_type == "norm" and idx_in_mrph in {0, 2}):
output.extend(tokenized)
else:
output.extend([self.pad_token] * len(tokenized))
if is_partial and partial_anno_type == "":
output.extend([self.pad_token] * len(tokenized))
else:
output.extend(tokenized)
return output
Expand Down

0 comments on commit fd27790

Please sign in to comment.