Skip to content

Commit

Permalink
Also output the known tags in a dataset after the dataset has been re…
Browse files Browse the repository at this point in the history
…tagged in the srparser
  • Loading branch information
AngledLuffa committed Oct 17, 2024
1 parent 614b936 commit e87f437
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,7 @@ private void train(List<Pair<String, FileFilter>> trainTreebankPath,
log.info("Retagging with tagger model: " + op.testOptions.taggerSerializedFile);
log.info("Known tags in the tagger model: " + tagger.tagSet());
redoTags(binarizedTrees, tagger, nThreads);
log.info("Tags in training set: " + Trees.uniqueTags(binarizedTrees));
retagTimer.done("Retagging");
}

Expand Down
21 changes: 21 additions & 0 deletions src/edu/stanford/nlp/trees/Trees.java
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,27 @@ private static void preTerminals(Tree t, List<Tree> l) {
}
}

public static Set<String> uniqueTags(List<Tree> trees) {
Set<String> allTags = new HashSet<>();
for (Tree tree : trees) {
uniqueTags(tree, allTags);
}
return allTags;
}

public static Set<String> uniqueTags(Tree tree) {
List<Label> labels = tree.preTerminalYield();
return uniqueTags(tree, new HashSet<>());
}

public static Set<String> uniqueTags(Tree tree, Set<String> tags) {
List<Label> labels = tree.preTerminalYield();
for (Label label : labels) {
tags.add(label.value());
}
return tags;
}


/**
* returns the labels of the leaves in a Tree in the order that they're found.
Expand Down

0 comments on commit e87f437

Please sign in to comment.