Skip to content

Commit

Permalink
tweak
Browse files Browse the repository at this point in the history
  • Loading branch information
omukazu committed Aug 6, 2023
1 parent feb70a6 commit 10524c2
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 8 deletions.
3 changes: 2 additions & 1 deletion src/kwja/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def tasks_callback(value: str) -> str:
"""sort and validate specified tasks"""
values: List[str] = [v for v in value.split(",") if v]
tasks: List[str] = []
for candidate_task in ("typo", "senter", "seq2seq", "char", "word"):
for candidate_task in ("typo", "char", "seq2seq", "word"):
if candidate_task in values:
tasks.append(candidate_task)
values.remove(candidate_task)
Expand All @@ -263,6 +263,7 @@ def tasks_callback(value: str) -> str:
}

if tuple(tasks) not in valid_task_combinations:
print(tuple(tasks))

Check warning on line 266 in src/kwja/cli/cli.py

View check run for this annotation

Codecov / codecov/patch

src/kwja/cli/cli.py#L266

Added line #L266 was not covered by tests
raise typer.BadParameter(
"task combination is invalid. "
f"Please specify one of {', '.join(repr(','.join(ts)) for ts in valid_task_combinations)}."
Expand Down
16 changes: 9 additions & 7 deletions src/kwja/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,6 @@
MASKED = -1024.0
RESOURCE_PATH = resource_files(kwja) / "resource"

# ---------- senter module ----------
SENT_SEGMENTATION_TAGS = ("B", "I")

# ---------- seq2seq module----------
NEW_LINE_TOKEN: str = "<extra_id_0>" # "<br>"
FULL_SPACE_TOKEN: str = "<extra_id_1>" # "<full_space>"
NO_CANON_TOKEN: str = "<extra_id_2>" # "<no_canon>"

# ---------- word (inference) dataset ----------
SPLIT_INTO_WORDS_MODEL_NAMES = [
Expand All @@ -40,6 +33,9 @@
TOKEN2TYPO_CORR_OP_TAG: Dict[str, str] = {v: k for k, v in TYPO_CORR_OP_TAG2TOKEN.items()}
DUMMY_TOKEN = "<dummy>"

# ---------- char module|sentence segmentation ----------
SENT_SEGMENTATION_TAGS = ("B", "I")


# ---------- char module|word segmentation ----------
WORD_SEGMENTATION_TAGS = ("B", "I")
Expand Down Expand Up @@ -212,6 +208,12 @@
)


# ---------- seq2seq module----------
NEW_LINE_TOKEN: str = "<extra_id_0>" # "<br>"
FULL_SPACE_TOKEN: str = "<extra_id_1>" # "<full_space>"
NO_CANON_TOKEN: str = "<extra_id_2>" # "<no_canon>"


# ---------- word module ----------
class WordTask(Enum):
READING_PREDICTION = "reading_prediction"
Expand Down

0 comments on commit 10524c2

Please sign in to comment.