diff --git a/src/main/java/org/apache/lucene/analysis/ko/KoreanTokenPrepareable.java b/src/main/java/org/apache/lucene/analysis/ko/KoreanTokenPrepareable.java index d43ffd8..ac74049 100644 --- a/src/main/java/org/apache/lucene/analysis/ko/KoreanTokenPrepareable.java +++ b/src/main/java/org/apache/lucene/analysis/ko/KoreanTokenPrepareable.java @@ -7,7 +7,7 @@ import static org.openkoreantext.processor.tokenizer.KoreanTokenizer.KoreanToken; /** - * To prepare korean token sequence + * To prepare korean token sequence. */ public interface KoreanTokenPrepareable { /** diff --git a/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextAnalyzer.java b/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextAnalyzer.java index 500d343..e7e8c37 100644 --- a/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextAnalyzer.java +++ b/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextAnalyzer.java @@ -8,6 +8,9 @@ import java.util.Arrays; import java.util.List; +/** + * A default korean analyzer. + */ public class OpenKoreanTextAnalyzer extends StopwordAnalyzerBase { private final static CharArraySet STOP_WORD_SET; diff --git a/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextNormalizer.java b/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextNormalizer.java index 6f5d4cf..f897f8f 100644 --- a/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextNormalizer.java +++ b/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextNormalizer.java @@ -6,6 +6,12 @@ import java.io.IOException; import java.io.Reader; +/** + * A character filter for normalizing input text. + * For normalizing text, it delegates input to {@link OpenKoreanTextProcessor}. + * + * ex) 그랰ㅋㅋ -> 그래ㅋㅋ, 재밌닿ㅎㅎㅎ -> 재밌다ㅎㅎ + */ public class OpenKoreanTextNormalizer extends BaseCharFilter { private static final int READER_BUFFER_SIZE = 2048; diff --git a/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextPhraseExtractor.java b/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextPhraseExtractor.java index 9e090ae..8b4dbf1 100644 --- a/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextPhraseExtractor.java +++ b/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextPhraseExtractor.java @@ -11,6 +11,11 @@ import static org.openkoreantext.processor.phrase_extractor.KoreanPhraseExtractor.KoreanPhrase; import static org.openkoreantext.processor.tokenizer.KoreanTokenizer.KoreanToken; +/** + * Phrase Extractor. For extracting phrase, it delegates token to {@link OpenKoreanTextProcessor} + * + * {@see https://github.com/open-korean-text/open-korean-text/blob/master/src/main/scala/org/openkoreantext/processor/phrase_extractor/KoreanPhraseExtractor.scala} + */ public class OpenKoreanTextPhraseExtractor extends OpenKoreanTextTokenFilter { public OpenKoreanTextPhraseExtractor(TokenStream input) { diff --git a/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextStemmer.java b/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextStemmer.java index cec5b23..09a22e1 100644 --- a/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextStemmer.java +++ b/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextStemmer.java @@ -9,6 +9,11 @@ import static org.openkoreantext.processor.tokenizer.KoreanTokenizer.KoreanToken; +/** + * Stems Adjectives and Verbs tokens. + * + * {@see https://github.com/open-korean-text/open-korean-text/blob/master/src/main/scala/org/openkoreantext/processor/stemmer/KoreanStemmer.scala} + */ public final class OpenKoreanTextStemmer extends OpenKoreanTextTokenFilter { public OpenKoreanTextStemmer(TokenStream input) { diff --git a/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextTokenFilter.java b/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextTokenFilter.java index 55c7892..28b6327 100644 --- a/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextTokenFilter.java +++ b/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextTokenFilter.java @@ -13,6 +13,9 @@ import static org.openkoreantext.processor.tokenizer.KoreanTokenizer.KoreanToken; +/** + * Abstract token filter for processing korean tokens. + */ public abstract class OpenKoreanTextTokenFilter extends TokenFilter implements KoreanTokenPrepareable { private final CharTermAttribute charTermAttribute = addAttribute(CharTermAttribute.class); diff --git a/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextTokenizer.java b/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextTokenizer.java index 320d37e..79a9c37 100644 --- a/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextTokenizer.java +++ b/src/main/java/org/apache/lucene/analysis/ko/OpenKoreanTextTokenizer.java @@ -15,6 +15,11 @@ import java.util.List; import java.util.Set; +/** + * Provides Korean tokenization. + * + * {@see https://github.com/open-korean-text/open-korean-text/blob/master/src/main/scala/org/openkoreantext/processor/tokenizer/KoreanTokenizer.scala} + */ public class OpenKoreanTextTokenizer extends Tokenizer implements KoreanTokenPrepareable { private static final int READER_BUFFER_SIZE = 1024; diff --git a/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextAnalyzerProvider.java index 4d3adb0..ef40fbf 100644 --- a/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextAnalyzerProvider.java @@ -6,6 +6,9 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +/** + * A analyzer provider for openkoreantext. + */ public class OpenKoreanTextAnalyzerProvider extends AbstractIndexAnalyzerProvider { private final OpenKoreanTextAnalyzer analyzer; diff --git a/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextNormalizerFactory.java b/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextNormalizerFactory.java index 5043c54..b3ecb5d 100644 --- a/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextNormalizerFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextNormalizerFactory.java @@ -9,6 +9,9 @@ import java.io.Reader; +/** + * A ES character-filter factory for {@link OpenKoreanTextNormalizer}. + */ public class OpenKoreanTextNormalizerFactory extends AbstractCharFilterFactory implements MultiTermAwareComponent { public OpenKoreanTextNormalizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { diff --git a/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextPhraseExtractorFactory.java b/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextPhraseExtractorFactory.java index e33da26..486696c 100644 --- a/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextPhraseExtractorFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextPhraseExtractorFactory.java @@ -6,6 +6,9 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +/** + * A ES token filter factory for {@link OpenKoreanTextPhraseExtractor}. + */ public class OpenKoreanTextPhraseExtractorFactory extends AbstractTokenFilterFactory { public OpenKoreanTextPhraseExtractorFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { diff --git a/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextRedundantFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextRedundantFilterFactory.java index b18ab0b..3344ad4 100644 --- a/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextRedundantFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextRedundantFilterFactory.java @@ -6,6 +6,9 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +/** + * A ES token filter factory for {@link OpenKoreanTextRedundantFilter}. + */ public class OpenKoreanTextRedundantFilterFactory extends AbstractTokenFilterFactory { public OpenKoreanTextRedundantFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { diff --git a/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextStemmerFactory.java b/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextStemmerFactory.java index 71e643e..a419b6d 100644 --- a/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextStemmerFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextStemmerFactory.java @@ -6,6 +6,9 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +/** + * A ES token filter factory for {@link OpenKoreanTextStemmer}. + */ public class OpenKoreanTextStemmerFactory extends AbstractTokenFilterFactory { public OpenKoreanTextStemmerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { diff --git a/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextTokenizerFactory.java b/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextTokenizerFactory.java index c868d6a..28747e9 100644 --- a/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextTokenizerFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/OpenKoreanTextTokenizerFactory.java @@ -7,6 +7,9 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +/** + * A ES tokenizer factory for {@link OpenKoreanTextTokenizer}. + */ public class OpenKoreanTextTokenizerFactory extends AbstractTokenizerFactory { public OpenKoreanTextTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {