Skip to content

Commit

Permalink
add docs
Browse files Browse the repository at this point in the history
  • Loading branch information
keepcosmos committed Dec 24, 2017
1 parent 56b3219 commit 363163e
Showing 13 changed files with 46 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -7,7 +7,7 @@
import static org.openkoreantext.processor.tokenizer.KoreanTokenizer.KoreanToken;

/**
* To prepare korean token sequence
* To prepare korean token sequence.
*/
public interface KoreanTokenPrepareable {
/**
Original file line number Diff line number Diff line change
@@ -8,6 +8,9 @@
import java.util.Arrays;
import java.util.List;

/**
* A default korean analyzer.
*/
public class OpenKoreanTextAnalyzer extends StopwordAnalyzerBase {

private final static CharArraySet STOP_WORD_SET;
Original file line number Diff line number Diff line change
@@ -6,6 +6,12 @@
import java.io.IOException;
import java.io.Reader;

/**
* A character filter for normalizing input text.
* For normalizing text, it delegates input to {@link OpenKoreanTextProcessor}.
*
* ex) 그랰ㅋㅋ -> 그래ㅋㅋ, 재밌닿ㅎㅎㅎ -> 재밌다ㅎㅎ
*/
public class OpenKoreanTextNormalizer extends BaseCharFilter {
private static final int READER_BUFFER_SIZE = 2048;

Original file line number Diff line number Diff line change
@@ -11,6 +11,11 @@
import static org.openkoreantext.processor.phrase_extractor.KoreanPhraseExtractor.KoreanPhrase;
import static org.openkoreantext.processor.tokenizer.KoreanTokenizer.KoreanToken;

/**
* Phrase Extractor. For extracting phrase, it delegates token to {@link OpenKoreanTextProcessor}
*
* {@see https://github.com/open-korean-text/open-korean-text/blob/master/src/main/scala/org/openkoreantext/processor/phrase_extractor/KoreanPhraseExtractor.scala}
*/
public class OpenKoreanTextPhraseExtractor extends OpenKoreanTextTokenFilter {

public OpenKoreanTextPhraseExtractor(TokenStream input) {
Original file line number Diff line number Diff line change
@@ -9,6 +9,11 @@

import static org.openkoreantext.processor.tokenizer.KoreanTokenizer.KoreanToken;

/**
* Stems Adjectives and Verbs tokens.
*
* {@see https://github.com/open-korean-text/open-korean-text/blob/master/src/main/scala/org/openkoreantext/processor/stemmer/KoreanStemmer.scala}
*/
public final class OpenKoreanTextStemmer extends OpenKoreanTextTokenFilter {

public OpenKoreanTextStemmer(TokenStream input) {
Original file line number Diff line number Diff line change
@@ -13,6 +13,9 @@

import static org.openkoreantext.processor.tokenizer.KoreanTokenizer.KoreanToken;

/**
* Abstract token filter for processing korean tokens.
*/
public abstract class OpenKoreanTextTokenFilter extends TokenFilter implements KoreanTokenPrepareable {

private final CharTermAttribute charTermAttribute = addAttribute(CharTermAttribute.class);
Original file line number Diff line number Diff line change
@@ -15,6 +15,11 @@
import java.util.List;
import java.util.Set;

/**
* Provides Korean tokenization.
*
* {@see https://github.com/open-korean-text/open-korean-text/blob/master/src/main/scala/org/openkoreantext/processor/tokenizer/KoreanTokenizer.scala}
*/
public class OpenKoreanTextTokenizer extends Tokenizer implements KoreanTokenPrepareable {

private static final int READER_BUFFER_SIZE = 1024;
Original file line number Diff line number Diff line change
@@ -6,6 +6,9 @@
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;

/**
* A analyzer provider for openkoreantext.
*/
public class OpenKoreanTextAnalyzerProvider extends AbstractIndexAnalyzerProvider<OpenKoreanTextAnalyzer> {

private final OpenKoreanTextAnalyzer analyzer;
Original file line number Diff line number Diff line change
@@ -9,6 +9,9 @@

import java.io.Reader;

/**
* A ES character-filter factory for {@link OpenKoreanTextNormalizer}.
*/
public class OpenKoreanTextNormalizerFactory extends AbstractCharFilterFactory implements MultiTermAwareComponent {

public OpenKoreanTextNormalizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
Original file line number Diff line number Diff line change
@@ -6,6 +6,9 @@
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;

/**
* A ES token filter factory for {@link OpenKoreanTextPhraseExtractor}.
*/
public class OpenKoreanTextPhraseExtractorFactory extends AbstractTokenFilterFactory {

public OpenKoreanTextPhraseExtractorFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
Original file line number Diff line number Diff line change
@@ -6,6 +6,9 @@
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;

/**
* A ES token filter factory for {@link OpenKoreanTextRedundantFilter}.
*/
public class OpenKoreanTextRedundantFilterFactory extends AbstractTokenFilterFactory {

public OpenKoreanTextRedundantFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
Original file line number Diff line number Diff line change
@@ -6,6 +6,9 @@
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;

/**
* A ES token filter factory for {@link OpenKoreanTextStemmer}.
*/
public class OpenKoreanTextStemmerFactory extends AbstractTokenFilterFactory {

public OpenKoreanTextStemmerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
Original file line number Diff line number Diff line change
@@ -7,6 +7,9 @@
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;

/**
* A ES tokenizer factory for {@link OpenKoreanTextTokenizer}.
*/
public class OpenKoreanTextTokenizerFactory extends AbstractTokenizerFactory {

public OpenKoreanTextTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {

0 comments on commit 363163e

Please sign in to comment.