diff --git a/docs/.doctrees/environment.pickle b/docs/.doctrees/environment.pickle index 9f55157..4cc4ab1 100644 Binary files a/docs/.doctrees/environment.pickle and b/docs/.doctrees/environment.pickle differ diff --git a/docs/.doctrees/gruut.doctree b/docs/.doctrees/gruut.doctree index 04190c4..9fbc881 100644 Binary files a/docs/.doctrees/gruut.doctree and b/docs/.doctrees/gruut.doctree differ diff --git a/docs/.doctrees/index.doctree b/docs/.doctrees/index.doctree index 2a0e42f..b6756cb 100644 Binary files a/docs/.doctrees/index.doctree and b/docs/.doctrees/index.doctree differ diff --git a/docs/_sources/index.rst.txt b/docs/_sources/index.rst.txt index 7f4b26a..ce43215 100644 --- a/docs/_sources/index.rst.txt +++ b/docs/_sources/index.rst.txt @@ -36,7 +36,51 @@ Output:: . ‖ -Note that "wound" and "read" have different pronunciations in different contexts. +Note that "wound" and "read" have different pronunciations in different (grammatical) contexts. + +A subset of :ref:`SSML ` is also supported: + +.. code-block:: python + + from gruut import sentences + + ssml_text = """ + + Today at 4pm, 2/1/2000. + Un mese fà, 2/1/2000. + """ + + for sent in sentences(ssml_text, ssml=True): + for word in sent: + if word.phonemes: + print(sent.idx, word.lang, word.text, *word.phonemes) + +with the output:: + + 0 en-US Today t ə d ˈeɪ + 0 en-US at ˈæ t + 0 en-US four f ˈɔ ɹ + 0 en-US P p ˈi + 0 en-US M ˈɛ m + 0 en-US , | + 0 en-US February f ˈɛ b j u ˌɛ ɹ i + 0 en-US first f ˈɚ s t + 0 en-US , | + 0 en-US two t ˈu + 0 en-US thousand θ ˈaʊ z ə n d + 0 en-US . ‖ + 1 it Un u n + 1 it mese ˈm e s e + 1 it fà f a + 1 it , | + 1 it due d j u + 1 it gennaio d͡ʒ e n n ˈa j o + 1 it duemila d u e ˈm i l a + 1 it . ‖ Installation ------------ @@ -60,6 +104,7 @@ Supported Languages * German (``de``) * English (``en``) * Spanish (``es``) +* Persian/Farsi (``fa``) * French (``fr``) * Italian (``it``) * Dutch (``nl``) @@ -177,6 +222,38 @@ which outputs: See ``gruut --help`` for more options. + +.. _ssml_support: + +SSML Support +-------------------------- + +A subset of `the SSML standard `_ is supported: + +* ```` - wrap around SSML text + * ``lang`` - set language for document +* ``

`` - paragraph + * ``lang`` - set language for paragraph +* ```` - sentence (disables automatic sentence breaking) + * ``lang`` - set language for sentence +* ```` / ```` - word (disables automatic tokenization) + * ``lang`` - set language for word + * ``role`` - set word role (see [word roles](#word-roles)) +* ```` - set language inner text +* ```` - set voice of inner text +* ```` - force interpretation of inner text + * ``interpret-as`` one of "spell-out", "date", "number", "time", or "currency" + * ``format`` - way to format text depending on ``interpret-as`` + * number - one of "cardinal", "ordinal", "digits", "year" + * date - string with "d" (cardinal day), "o" (ordinal day), "m" (month), or "y" (year) +* ```` - Pause for given amount of time + * time - seconds ("123s") or milliseconds ("123ms") +* ```` - substitute ``alias`` for inner text +* ```` - supply phonemes for inner text + * ``ph`` - phonemes for each word of inner text, separated by whitespace + * ``alphabet`` - if "ipa", phonemes are intelligently split ("aːˈb" -> "aː", "ˈb") + + .. _database: Database diff --git a/docs/genindex.html b/docs/genindex.html index 7ca5efe..b2a08ea 100644 --- a/docs/genindex.html +++ b/docs/genindex.html @@ -254,16 +254,22 @@

E

  • (gruut.const.Node attribute)
  • +
  • en_get_ordinal() (in module gruut.lang) +
  • en_is_initialism() (in module gruut.lang)
  • +
  • en_parse_time() (in module gruut.lang) +
  • +
  • en_verbalize_time() (in module gruut.lang) +
  • + + - + - -
    • post_process_sentence (gruut.const.TextProcessorSettings attribute)
        @@ -801,6 +857,8 @@

        R

      • text_and_elements() (in module gruut.utils) +
      • +
      • text_spoken (gruut.const.Sentence attribute)
      • text_with_ws (gruut.const.BreakWordNode attribute) @@ -914,7 +974,15 @@

        T

      • (class in gruut.const)
      • +
      • Time (class in gruut.const) +
      • time (gruut.const.BreakNode attribute) +
      • +
      • TIME (gruut.const.InterpretAs attribute) +
      • +
      • time (gruut.const.WordNode attribute) +
      • +
      • trailing_ws (gruut.const.Word attribute)
      • train() (in module gruut.g2p)
      • @@ -925,6 +993,14 @@

        T

        V

        +
        Returns
        @@ -1700,6 +1881,17 @@

        Submodules +
        +gruut.utils.remove_non_word_chars(s)
        +

        Removes non-word characters from a string

        +
        +
        Return type
        +

        str

        +
        +
        +
        +
        gruut.utils.resolve_lang(lang)
        @@ -1768,7 +1960,7 @@

        Submodules
        -process(text, lang=None, ssml=False, pos=True, phonemize=True, post_process=True, add_speak_tag=True)
        +process(text, lang=None, ssml=False, pos=True, phonemize=True, post_process=True, add_speak_tag=True, detect_numbers=True, detect_currency=True, detect_dates=True, detect_times=True, verbalize_numbers=True, verbalize_currency=True, verbalize_dates=True, verbalize_times=True)

        Processes text or SSML

        Parameters
        @@ -1779,7 +1971,15 @@

        Submodulesbool) – False if part of speech tagging should be disabled

      • phonemize (bool) – False if phonemization should be disabled

      • post_process (bool) – False if sentence/graph post-processing should be disabled

      • -
      • add_speak_tag (bool) – False if <speak> should not automatically be added to input text

      • +
      • add_speak_tag (bool) – True if <speak> should be automatically added to input text when ssml=True

      • +
      • detect_numbers (bool) – True if numbers should be annotated in text (interpret_as=”number”)

      • +
      • detect_currency (bool) – True if currency amounts should be annotated in text (interpret_as=”currency”)

      • +
      • detect_dates (bool) – True if dates should be annotated in text (interpret_as=”date”)

      • +
      • detect_times (bool) – True if clock times should be annotated in text (interpret_as=”time”)

      • +
      • verbalize_numbers (bool) – True if annotated numbers should be expanded into words

      • +
      • verbalize_currency (bool) – True if annotated currency amounts should be expanded into words

      • +
      • verbalize_dates (bool) – True if annotated dates should be expanded into words

      • +
      • verbalize_times (bool) – True if annotated clock times should be expanded into words

      • Returns
        @@ -1817,7 +2017,7 @@

        Submodules
        -class gruut.TextProcessorSettings(lang, split_words=<function default_split_words>, join_str=' ', keep_whitespace=True, is_non_word=None, get_whitespace=<function default_get_whitespace>, normalize_whitespace=<function default_normalize_whitespace>, begin_punctuations=None, begin_punctuations_pattern=None, end_punctuations=None, end_punctuations_pattern=None, replacements=<factory>, abbreviations=<factory>, spell_out_words=<factory>, major_breaks=<factory>, major_breaks_pattern=None, minor_breaks=<factory>, minor_breaks_pattern=None, word_breaks=<factory>, word_breaks_pattern=None, is_maybe_number=<function has_digit>, babel_locale=None, num2words_lang=None, default_currency='USD', currencies=<factory>, currency_symbols=<factory>, is_maybe_currency=<function has_digit>, dateparser_lang=None, is_maybe_date=<function has_digit>, default_date_format=InterpretAsFormat.DATE_MDY_ORDINAL, get_parts_of_speech=None, is_initialism=None, split_initialism=None, lookup_phonemes=None, guess_phonemes=None, pre_process_text=None, post_process_sentence=None)
        +class gruut.TextProcessorSettings(lang, split_words=<function default_split_words>, join_str=' ', keep_whitespace=True, is_non_word=None, get_whitespace=<function default_get_whitespace>, normalize_whitespace=<function default_normalize_whitespace>, begin_punctuations=None, begin_punctuations_pattern=None, end_punctuations=None, end_punctuations_pattern=None, replacements=<factory>, abbreviations=<factory>, spell_out_words=<factory>, major_breaks=<factory>, major_breaks_pattern=None, minor_breaks=<factory>, minor_breaks_pattern=None, word_breaks=<factory>, word_breaks_pattern=None, is_maybe_number=<function has_digit>, get_ordinal=None, babel_locale=None, num2words_lang=None, default_currency='USD', currencies=<factory>, currency_symbols=<factory>, is_maybe_currency=<function has_digit>, dateparser_lang=None, is_maybe_date=<function has_digit>, default_date_format=InterpretAsFormat.DATE_MDY_ORDINAL, is_maybe_time=<function has_digit>, parse_time=None, verbalize_time=None, get_parts_of_speech=None, is_initialism=None, split_initialism=None, lookup_phonemes=None, guess_phonemes=None, pre_process_text=None, post_process_sentence=None)

        Bases: object

        Language specific settings for text processing

        @@ -1886,6 +2086,12 @@

        Submodules +
        +get_ordinal: Optional[Callable[[str], Optional[int]]] = None
        +

        Returns integer value of an ordinal string (e.g., 1st -> 1) or None if not an ordinal

        +

        +
        get_parts_of_speech: Optional[gruut.const.GetPartsOfSpeech] = None
        @@ -1948,6 +2154,17 @@

        Submodules +
        +is_maybe_time()
        +

        True if a word may be a clock time (parsing will be attempted)

        +
        +
        Return type
        +

        bool

        +
        +
        +

        +
        is_non_word: Optional[Callable[[str], bool]] = None
        @@ -2019,6 +2236,12 @@

        Submodules +
        +parse_time: Optional[Callable[[str], Optional[gruut.const.Time]]] = None
        +

        Parse word text into a Time object or None

        +

        +
        post_process_sentence: Optional[gruut.const.PostProcessSentence] = None
        @@ -2060,6 +2283,12 @@

        Submodules +
        +verbalize_time: Optional[Callable[[gruut.const.Time], Iterable[str]]] = None
        +

        Convert Time to words

        +

        +
        word_breaks: Set[str]
        @@ -2097,7 +2326,7 @@

        Submodules
        -gruut.sentences(text, lang='en_US', ssml=False, espeak=False, major_breaks=True, minor_breaks=True, punctuations=True, **process_args)
        +gruut.sentences(text, lang='en_US', ssml=False, espeak=False, major_breaks=True, minor_breaks=True, punctuations=True, explicit_lang=True, phonemes=True, break_phonemes=True, pos=True, **process_args)

        Process text and return sentences

        Parameters
        diff --git a/docs/index.html b/docs/index.html index 9fd20df..7d28989 100644 --- a/docs/index.html +++ b/docs/index.html @@ -67,7 +67,49 @@

        gruut . ‖ -

        Note that “wound” and “read” have different pronunciations in different contexts.

        +

        Note that “wound” and “read” have different pronunciations in different (grammatical) contexts.

        +

        A subset of SSML is also supported:

        +
        from gruut import sentences
        +
        +ssml_text = """<?xml version="1.0" encoding="ISO-8859-1"?>
        +<speak version="1.1" xmlns="http://www.w3.org/2001/10/synthesis"
        +    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
        +    xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
        +                http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"
        +    xml:lang="en-US">
        +<s>Today at 4pm, 2/1/2000.</s>
        +<s xml:lang="it">Un mese fà, 2/1/2000.</s>
        +</speak>"""
        +
        +for sent in sentences(ssml_text, ssml=True):
        +    for word in sent:
        +        if word.phonemes:
        +            print(sent.idx, word.lang, word.text, *word.phonemes)
        +
        +
        +

        with the output:

        +
        0 en-US Today t ə d ˈeɪ
        +0 en-US at ˈæ t
        +0 en-US four f ˈɔ ɹ
        +0 en-US P p ˈi
        +0 en-US M ˈɛ m
        +0 en-US , |
        +0 en-US February f ˈɛ b j u ˌɛ ɹ i
        +0 en-US first f ˈɚ s t
        +0 en-US , |
        +0 en-US two t ˈu
        +0 en-US thousand θ ˈaʊ z ə n d
        +0 en-US . ‖
        +1 it Un u n
        +1 it mese ˈm e s e
        +1 it fà f a
        +1 it , |
        +1 it due d j u
        +1 it gennaio d͡ʒ e n n ˈa j o
        +1 it duemila d u e ˈm i l a
        +1 it . ‖
        +
        +

        Installation

        To install gruut with U.S. English support only:

        @@ -87,6 +129,7 @@

        Supported Languagesde)

      • English (en)

      • Spanish (es)

      • +
      • Persian/Farsi (fa)

      • French (fr)

      • Italian (it)

      • Dutch (nl)

      • @@ -195,8 +238,76 @@

        Command-Linegruut --help for more options.

        +
        +

        SSML Support

        +

        A subset of the SSML standard is supported:

        +
          +
        • +
          <speak> - wrap around SSML text
            +
          • lang - set language for document

          • +
          +
          +
          +
        • +
        • +
          <p> - paragraph
            +
          • lang - set language for paragraph

          • +
          +
          +
          +
        • +
        • +
          <s> - sentence (disables automatic sentence breaking)
            +
          • lang - set language for sentence

          • +
          +
          +
          +
        • +
        • +
          <w> / <token> - word (disables automatic tokenization)
            +
          • lang - set language for word

          • +
          • role - set word role (see [word roles](#word-roles))

          • +
          +
          +
          +
        • +
        • <lang lang="..."> - set language inner text

        • +
        • <voice name="..."> - set voice of inner text

        • +
        • +
          <say-as interpret-as=""> - force interpretation of inner text
            +
          • interpret-as one of “spell-out”, “date”, “number”, “time”, or “currency”

          • +
          • +
            format - way to format text depending on interpret-as
              +
            • number - one of “cardinal”, “ordinal”, “digits”, “year”

            • +
            • date - string with “d” (cardinal day), “o” (ordinal day), “m” (month), or “y” (year)

            • +
            +
            +
            +
          • +
          +
          +
          +
        • +
        • +
          <break time=""> - Pause for given amount of time
            +
          • time - seconds (“123s”) or milliseconds (“123ms”)

          • +
          +
          +
          +
        • +
        • <sub alias=""> - substitute alias for inner text

        • +
        • +
          <phoneme ph="..."> - supply phonemes for inner text
            +
          • ph - phonemes for each word of inner text, separated by whitespace

          • +
          • alphabet - if “ipa”, phonemes are intelligently split (“aːˈb” -> “aː”, “ˈb”)

          • +
          +
          +
          +
        • +
        +
        -

        Database

        +

        Database

        Word pronunciations and other metadata are stored in SQLite databases with the following tables:

        +
      • SSML Support
      • Database
      • G2P Models
      • POS Taggers
      • diff --git a/docs/objects.inv b/docs/objects.inv index 0925e45..7820a6e 100644 Binary files a/docs/objects.inv and b/docs/objects.inv differ diff --git a/docs/searchindex.js b/docs/searchindex.js index 7ce358b..52fa7a6 100644 --- a/docs/searchindex.js +++ b/docs/searchindex.js @@ -1 +1 @@ -Search.setIndex({docnames:["gruut","index","modules"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":4,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,sphinx:56},filenames:["gruut.rst","index.rst","modules.rst"],objects:{"":{gruut:[0,0,0,"-"]},"gruut.TextProcessor":{get_settings:[0,2,1,""],post_process_graph:[0,2,1,""],process:[0,2,1,""],sentences:[0,2,1,""],words:[0,2,1,""]},"gruut.TextProcessorSettings":{abbreviations:[0,3,1,""],babel_locale:[0,3,1,""],begin_punctuations:[0,3,1,""],begin_punctuations_pattern:[0,3,1,""],currencies:[0,3,1,""],currency_symbols:[0,3,1,""],dateparser_lang:[0,3,1,""],default_currency:[0,3,1,""],default_date_format:[0,3,1,""],end_punctuations:[0,3,1,""],end_punctuations_pattern:[0,3,1,""],get_parts_of_speech:[0,3,1,""],get_whitespace:[0,2,1,""],guess_phonemes:[0,3,1,""],is_initialism:[0,3,1,""],is_maybe_currency:[0,2,1,""],is_maybe_date:[0,2,1,""],is_maybe_number:[0,2,1,""],is_non_word:[0,3,1,""],join_str:[0,3,1,""],keep_whitespace:[0,3,1,""],lang:[0,3,1,""],lookup_phonemes:[0,3,1,""],major_breaks:[0,3,1,""],major_breaks_pattern:[0,3,1,""],minor_breaks:[0,3,1,""],minor_breaks_pattern:[0,3,1,""],normalize_whitespace:[0,2,1,""],num2words_lang:[0,3,1,""],post_process_sentence:[0,3,1,""],pre_process_text:[0,3,1,""],replacements:[0,3,1,""],spell_out_words:[0,3,1,""],split_initialism:[0,3,1,""],split_words:[0,2,1,""],word_breaks:[0,3,1,""],word_breaks_pattern:[0,3,1,""]},"gruut.const":{BreakNode:[0,1,1,""],BreakType:[0,1,1,""],BreakWordNode:[0,1,1,""],EndElement:[0,1,1,""],GetPartsOfSpeech:[0,1,1,""],GraphType:[0,1,1,""],GuessPhonemes:[0,1,1,""],IgnoreNode:[0,1,1,""],InterpretAs:[0,1,1,""],InterpretAsFormat:[0,1,1,""],LookupPhonemes:[0,1,1,""],Node:[0,1,1,""],ParagraphNode:[0,1,1,""],PostProcessSentence:[0,1,1,""],PunctuationWordNode:[0,1,1,""],Sentence:[0,1,1,""],SentenceNode:[0,1,1,""],SpeakNode:[0,1,1,""],TextProcessorSettings:[0,1,1,""],Word:[0,1,1,""],WordNode:[0,1,1,""],WordRole:[0,1,1,""],default_get_whitespace:[0,4,1,""],default_normalize_whitespace:[0,4,1,""],default_split_words:[0,4,1,""],has_digit:[0,4,1,""],maybe_compile_regex:[0,4,1,""]},"gruut.const.BreakNode":{time:[0,3,1,""]},"gruut.const.BreakType":{MAJOR:[0,3,1,""],MINOR:[0,3,1,""]},"gruut.const.BreakWordNode":{break_type:[0,3,1,""],text:[0,3,1,""],text_with_ws:[0,3,1,""]},"gruut.const.EndElement":{element:[0,3,1,""]},"gruut.const.GraphType":{add_edge:[0,2,1,""],add_edges_from:[0,2,1,""],add_node:[0,2,1,""],nodes:[0,3,1,""],out_degree:[0,2,1,""],out_edges:[0,2,1,""],predecessors:[0,2,1,""],remove_edges_from:[0,2,1,""],successors:[0,2,1,""]},"gruut.const.IgnoreNode":{node:[0,3,1,""]},"gruut.const.InterpretAs":{CURRENCY:[0,3,1,""],DATE:[0,3,1,""],NUMBER:[0,3,1,""],SPELL_OUT:[0,3,1,""]},"gruut.const.InterpretAsFormat":{DATE_DMY:[0,3,1,""],DATE_DMY_ORDINAL:[0,3,1,""],DATE_DM_ORDINAL:[0,3,1,""],DATE_MD:[0,3,1,""],DATE_MDY:[0,3,1,""],DATE_MDY_ORDINAL:[0,3,1,""],DATE_MD_ORDINAL:[0,3,1,""],DATE_MY:[0,3,1,""],DATE_Y:[0,3,1,""],DATE_YM:[0,3,1,""],DATE_YMD:[0,3,1,""],DATE_YMD_ORDINAL:[0,3,1,""],NUMBER_CARDINAL:[0,3,1,""],NUMBER_DIGITS:[0,3,1,""],NUMBER_ORDINAL:[0,3,1,""],NUMBER_YEAR:[0,3,1,""]},"gruut.const.Node":{element:[0,3,1,""],implicit:[0,3,1,""],lang:[0,3,1,""],node:[0,3,1,""],voice:[0,3,1,""]},"gruut.const.ParagraphNode":{node:[0,3,1,""]},"gruut.const.PunctuationWordNode":{text:[0,3,1,""],text_with_ws:[0,3,1,""]},"gruut.const.Sentence":{idx:[0,3,1,""],lang:[0,3,1,""],text:[0,3,1,""],text_with_ws:[0,3,1,""],voice:[0,3,1,""],words:[0,3,1,""]},"gruut.const.SentenceNode":{node:[0,3,1,""]},"gruut.const.SpeakNode":{node:[0,3,1,""]},"gruut.const.TextProcessorSettings":{abbreviations:[0,3,1,""],babel_locale:[0,3,1,""],begin_punctuations:[0,3,1,""],begin_punctuations_pattern:[0,3,1,""],currencies:[0,3,1,""],currency_symbols:[0,3,1,""],dateparser_lang:[0,3,1,""],default_currency:[0,3,1,""],default_date_format:[0,3,1,""],end_punctuations:[0,3,1,""],end_punctuations_pattern:[0,3,1,""],get_parts_of_speech:[0,3,1,""],get_whitespace:[0,2,1,""],guess_phonemes:[0,3,1,""],is_initialism:[0,3,1,""],is_maybe_currency:[0,2,1,""],is_maybe_date:[0,2,1,""],is_maybe_number:[0,2,1,""],is_non_word:[0,3,1,""],join_str:[0,3,1,""],keep_whitespace:[0,3,1,""],lang:[0,3,1,""],lookup_phonemes:[0,3,1,""],major_breaks:[0,3,1,""],major_breaks_pattern:[0,3,1,""],minor_breaks:[0,3,1,""],minor_breaks_pattern:[0,3,1,""],normalize_whitespace:[0,2,1,""],num2words_lang:[0,3,1,""],post_process_sentence:[0,3,1,""],pre_process_text:[0,3,1,""],replacements:[0,3,1,""],spell_out_words:[0,3,1,""],split_initialism:[0,3,1,""],split_words:[0,2,1,""],word_breaks:[0,3,1,""],word_breaks_pattern:[0,3,1,""]},"gruut.const.Word":{idx:[0,3,1,""],is_break:[0,3,1,""],is_major_break:[0,3,1,""],is_minor_break:[0,3,1,""],is_punctuation:[0,3,1,""],is_spoken:[0,3,1,""],lang:[0,3,1,""],phonemes:[0,3,1,""],pos:[0,3,1,""],sent_idx:[0,3,1,""],text:[0,3,1,""],text_with_ws:[0,3,1,""],voice:[0,3,1,""]},"gruut.const.WordNode":{currency_name:[0,3,1,""],currency_symbol:[0,3,1,""],date:[0,3,1,""],format:[0,3,1,""],interpret_as:[0,3,1,""],number:[0,3,1,""],phonemes:[0,3,1,""],pos:[0,3,1,""],role:[0,3,1,""],text:[0,3,1,""],text_with_ws:[0,3,1,""]},"gruut.const.WordRole":{DEFAULT:[0,3,1,""],LETTER:[0,3,1,""]},"gruut.corpus2db":{main:[0,4,1,""]},"gruut.g2p":{GraphemesToPhonemes:[0,1,1,""],do_predict:[0,4,1,""],do_test:[0,4,1,""],do_train:[0,4,1,""],main:[0,4,1,""],train:[0,4,1,""]},"gruut.g2p.GraphemesToPhonemes":{decode_string:[0,2,1,""],encode_string:[0,2,1,""],grapheme2features:[0,2,1,""],word2features:[0,2,1,""]},"gruut.g2p_phonetisaurus":{PhonetisaurusGraph:[0,1,1,""],do_predict:[0,4,1,""],do_test:[0,4,1,""],main:[0,4,1,""]},"gruut.g2p_phonetisaurus.PhonetisaurusGraph":{g2p:[0,2,1,""],g2p_one:[0,2,1,""],load:[0,2,1,""]},"gruut.lang":{ArabicPreProcessText:[0,1,1,""],DelayedGraphemesToPhonemes:[0,1,1,""],DelayedPartOfSpeechTagger:[0,1,1,""],DelayedSqlitePhonemizer:[0,1,1,""],FarsiPartOfSpeechTagger:[0,1,1,""],en_is_initialism:[0,4,1,""],fa_post_process_sentence:[0,4,1,""],fr_has_silent_consonant:[0,4,1,""],fr_is_vowel:[0,4,1,""],fr_post_process_sentence:[0,4,1,""],get_ar_settings:[0,4,1,""],get_cs_settings:[0,4,1,""],get_de_settings:[0,4,1,""],get_en_us_settings:[0,4,1,""],get_es_settings:[0,4,1,""],get_fa_settings:[0,4,1,""],get_fr_settings:[0,4,1,""],get_it_settings:[0,4,1,""],get_nl_settings:[0,4,1,""],get_pt_settings:[0,4,1,""],get_ru_settings:[0,4,1,""],get_settings:[0,4,1,""],get_sv_settings:[0,4,1,""],get_sw_settings:[0,4,1,""],get_zh_settings:[0,4,1,""]},"gruut.lexicon2db":{main:[0,4,1,""]},"gruut.phonemize":{SqlitePhonemizer:[0,1,1,""]},"gruut.phonemize.SqlitePhonemizer":{DEFAULT_ROLE:[0,3,1,""]},"gruut.pos":{PartOfSpeechTagger:[0,1,1,""],do_predict:[0,4,1,""],do_print_labels:[0,4,1,""],do_test:[0,4,1,""],do_train:[0,4,1,""],main:[0,4,1,""],train_model:[0,4,1,""]},"gruut.pos.PartOfSpeechTagger":{decode_string:[0,2,1,""],encode_string:[0,2,1,""],local_features:[0,2,1,""],sent2features:[0,2,1,""],word2features:[0,2,1,""]},"gruut.text_processor":{TextProcessor:[0,1,1,""]},"gruut.text_processor.TextProcessor":{get_settings:[0,2,1,""],post_process_graph:[0,2,1,""],process:[0,2,1,""],sentences:[0,2,1,""],words:[0,2,1,""]},"gruut.utils":{attrib_no_namespace:[0,4,1,""],find_lang_dir:[0,4,1,""],get_currency_names:[0,4,1,""],grouper:[0,4,1,""],leaves:[0,4,1,""],pairwise:[0,4,1,""],pipeline_split:[0,4,1,""],pipeline_transform:[0,4,1,""],print_graph:[0,4,1,""],resolve_lang:[0,4,1,""],sliding_window:[0,4,1,""],tag_no_namespace:[0,4,1,""],text_and_elements:[0,4,1,""]},gruut:{"const":[0,0,0,"-"],TextProcessor:[0,1,1,""],TextProcessorSettings:[0,1,1,""],corpus2db:[0,0,0,"-"],g2p:[0,0,0,"-"],g2p_phonetisaurus:[0,0,0,"-"],get_supported_languages:[0,4,1,""],is_language_supported:[0,4,1,""],lang:[0,0,0,"-"],lexicon2db:[0,0,0,"-"],phonemize:[0,0,0,"-"],pos:[0,0,0,"-"],sentences:[0,4,1,""],text_processor:[0,0,0,"-"],utils:[0,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","method","Python method"],"3":["py","attribute","Python attribute"],"4":["py","function","Python function"]},objtypes:{"0":"py:module","1":"py:class","2":"py:method","3":"py:attribute","4":"py:function"},terms:{"0":[0,1],"1":[0,1],"10":1,"100":[0,1],"12":0,"2":[0,1],"2021":0,"25":0,"3":0,"4":[],"5000":0,"6":0,"\u00f0":1,"\u014b":1,"\u0259":[0,1],"\u025a":1,"\u026a":1,"\u0279":1,"\u027e":1,"\u0283":0,"\u028a":1,"\u028c":1,"\u02c8\u0251":1,"\u02c8\u0251\u02d0":1,"\u02c8\u0254":1,"\u02c8\u025b":[0,1],"\u02c8\u026a":1,"\u02c8a":1,"\u02c8a\u026a":[0,1],"\u02c8a\u028a":1,"\u02c8e":1,"\u02c8e\u026a":1,"\u02c8i":1,"\u02c8i\u02d0":1,"\u02c8o\u028a":0,"\u02c8u":1,"\u02c8u\u02d0":1,"break":[0,1],"case":[0,1],"class":[0,1],"const":2,"default":[0,1],"do":1,"e\u026a":0,"enum":0,"final":0,"float":0,"function":[0,1],"i\u02d0":1,"import":[0,1],"int":0,"long":1,"new":0,"null":1,"return":[0,1],"static":0,"true":[0,1],"try":[0,1],A:[0,1],For:1,If:1,In:1,The:[0,1],These:[0,1],To:1,_:0,abbrevi:[0,1],abc:0,accept:0,ad:0,adapt:1,add:[0,1],add_begin:0,add_bo:0,add_digit:0,add_edg:0,add_edges_from:0,add_end:0,add_eo:0,add_length:0,add_nod:0,add_punctu:0,add_speak_tag:0,addit:1,adolfvonkleist:0,after:[0,1],alia:0,alias:0,align:[0,1],all:[0,1],also:0,amount:[0,1],an:[0,1],ani:0,apart:[0,1],app:1,appli:0,ar:[0,1],arab:[0,1],arabicpreprocesstext:0,arg:0,argument:0,around:1,arrai:0,ascii:0,attempt:0,attrib_no_namespac:0,attribut:0,automat:0,avail:1,b:0,babel:0,babel_local:0,base:0,beam:0,beam_scal:0,befor:0,begin:0,begin_punctu:0,begin_punctuations_pattern:0,better:1,between:0,bia:0,bin:[0,1],block:0,bool:0,break_phonem:0,break_typ:0,breaknod:0,breaktyp:0,breakwordnod:0,built:0,c1:0,c2:0,c:0,cach:0,call:0,callabl:0,can:[0,1],cardin:0,chang:1,chars_back:0,chars_backward:0,chars_forward:0,chars_front:0,check:1,chines:0,chunk:0,cli:0,code:0,collect:0,com:0,combin:0,come:0,comma:1,compil:0,conll:1,conllu:[0,1],conllu_path:0,consol:0,conson:0,contain:[0,1],content:2,context:[0,1],control:1,convert:[0,1],corpu:[0,1],corpus2db:[1,2],corpus_path:0,creat:[0,1],credit:[0,1],crf:[0,1],crf_tagger:0,crfsuit:[0,1],cs:1,currenc:[0,1],currency_nam:0,currency_symbol:0,custom:[0,1],czech:[0,1],d:1,data:[0,1],databas:0,date:[0,1],date_dm_ordin:0,date_dmi:0,date_dmy_ordin:0,date_i:0,date_md:0,date_md_ordin:0,date_mdi:0,date_mdy_ordin:0,date_mi:0,date_ym:0,date_ymd:0,date_ymd_ordin:0,dateparser_lang:0,datetim:0,db:[0,1],db_conn:0,db_path:0,de:1,decim:0,decod:0,decode_str:0,decreas:0,default_curr:0,default_date_format:0,default_get_whitespac:0,default_lang:0,default_normalize_whitespac:0,default_rol:0,default_split_word:0,defin:[0,1],delayedgraphemestophonem:0,delayedpartofspeechtagg:0,delayedsqlitephonem:0,depend:[0,1],depth:0,deriv:1,detail:1,dict:[0,1],dictionari:0,differ:1,digit:0,directori:0,disabl:0,disambigu:[0,1],dmy:0,do_predict:0,do_print_label:0,do_test:0,do_train:0,document:0,doe:1,dollar:1,done:1,download:1,dst:0,dt:[],dure:[0,1],dutch:[0,1],e:[0,1],ea430c5fb78b:0,each:[0,1],edg:0,edit:1,element:0,elementtre:0,empti:0,en:[0,1],en_is_initi:0,en_u:0,encod:0,encode_str:0,end:0,end_punctu:0,end_punctuations_pattern:0,endel:0,english:[0,1],ensur:0,entir:0,entri:0,ep:0,eps_phonem:0,es:1,espeak:0,espeak_word:1,etc:[0,1],etre:0,everyth:1,exampl:[0,1],exist:1,expand:[0,1],explicit_lang:0,extend:1,extern:1,extra:1,f:1,fa_post_process_sent:0,factori:0,fals:[0,1],farsi:0,farsipartofspeechtagg:0,fast:0,featur:0,fewer:1,field:1,file:[0,1],fillvalu:0,find:[0,1],find_lang_dir:0,first:[0,1],fix:0,flag:1,follow:[0,1],fork:1,form:0,format:[0,1],found:0,fr:1,fr_has_silent_conson:0,fr_is_vowel:0,fr_post_process_sent:0,french:[0,1],from:[0,1],fst2npz:0,fst:[0,1],full:0,further:1,g2p:2,g2p_align:1,g2p_arg:0,g2p_model:0,g2p_one:0,g2p_phonetisauru:2,g:[0,1],gather:1,gener:1,genit:0,german:[0,1],get:0,get_ar_set:0,get_cs_set:0,get_currency_nam:0,get_de_set:0,get_en_us_set:0,get_es_set:0,get_fa_set:0,get_fr_set:0,get_it_set:0,get_nl_set:0,get_parts_of_speech:0,get_phonem:1,get_pt_set:0,get_ru_set:0,get_set:0,get_supported_languag:0,get_sv_set:0,get_sw_set:0,get_token:1,get_whitespac:0,get_zh_set:0,getpartsofspeech:0,github:[0,1],graph:0,graph_path:0,graphem:[0,1],grapheme2featur:0,grapheme_separ:0,graphemestophonem:0,graphtyp:0,group_separ:0,grouper:0,gruut_lang_:0,guess:[0,1],guess_phonem:0,guesser:0,guessphonem:0,h:[0,1],ha:1,hansen:1,has_digit:0,have:[0,1],hazm:0,he:1,help:1,http:[0,1],human:1,hundr:1,i:[0,1],id:1,idx:[0,1],ignor:0,ignorenod:0,implicit:0,includ:1,incom:0,indent:0,index:[0,1],initi:[0,1],input:0,insid:0,instal:0,instanc:1,instead:0,instruct:1,integ:1,interfac:1,interpret:0,interpret_a:0,interpreta:0,interpretasformat:0,io:1,ipa:1,is_break:[0,1],is_initi:0,is_language_support:0,is_last:0,is_major_break:[0,1],is_maybe_curr:0,is_maybe_d:0,is_maybe_numb:0,is_minor_break:[0,1],is_non_word:0,is_punctu:[0,1],is_spoken:[0,1],italian:[0,1],item_separ:0,iter:0,its:1,jjr:1,join_str:0,k:1,kaldi:1,keep_whitespac:0,kei:[0,1],keyword:0,known:0,kwarg:0,l:1,label:[0,1],lambda:[],lang:[1,2],lang_alias:1,lang_dir:0,languag:0,last:0,last_char:0,last_phonem:0,lead:0,leaf:0,least:0,leav:0,length:0,letter:0,level:0,lexicon2db:[1,2],lexicon:[0,1],liason:0,like:[0,1],line:0,list:[0,1],load:0,load_g2p_guess:0,load_phoneme_lexicon:0,load_pos_tagg:0,local:0,local_featur:0,locale_str:0,look:[0,1],lookup_phonem:0,lookupphonem:0,lower:1,lowest:1,m:[0,1],mai:0,main:[0,1],major:0,major_break:0,major_breaks_pattern:0,manual:1,map:[0,1],map_lexicon:1,marker:0,match:[0,1],max:0,max_guess:0,max_iter:0,maybe_compile_regex:0,md:0,mdy:0,metadata:1,method:[0,1],michel:1,min_beam:0,minor:0,minor_break:0,minor_breaks_pattern:0,mishkal:0,mo:0,model:0,model_path:0,model_prefix:0,modul:[1,2],moi:0,more:[0,1],most:1,multipl:[0,1],must:1,mutablemap:0,my:0,n:[0,1],name:[0,1],namespac:0,necessari:0,need:1,networkx:0,next:[0,1],ng:1,nl:1,nn:1,node:0,none:0,normal:[0,1],normalize_whitespac:0,note:1,npz:0,num2word:1,num2words_lang:0,number:[0,1],number_cardin:0,number_digit:0,number_ordin:0,number_year:0,numpi:0,o:0,object:0,occur:0,off:0,om:0,omi:0,onc:1,one:[0,1],onli:1,oper:1,option:[0,1],order:0,ordin:0,org:0,origin:0,other:1,out:0,out_degre:0,out_edg:0,outgo:0,output:[0,1],output_path:0,over:0,overrid:0,own:1,packag:[1,2],page:1,pair:0,pairwis:0,paragraph:0,paragraphnod:0,paramet:0,parent_nod:0,pars:0,part:[0,1],partofspeechtagg:0,pass:[0,1],path:0,pattern:0,per:1,perform:[0,1],period:1,phonem:2,phoneme_join:0,phonemizer_arg:0,phonet:[0,1],phonetisauru:[0,1],phonetisaurusgraph:0,phonolog:1,phrase:0,pip:[0,1],pipeline_split:0,pipeline_transform:0,pleas:1,po:2,point:0,portugues:[0,1],possibl:1,post:0,post_process:0,post_process_graph:0,post_process_sent:0,postprocesssent:0,pre:[0,1],pre_process_text:0,prebuilt:1,predecessor:0,predict:[0,1],prefix:0,preload:0,present:0,primari:1,print:[0,1],print_func:0,print_graph:0,prioriti:1,probabl:0,process:0,process_arg:0,pron_ord:1,pronounc:0,pronunci:[0,1],properli:1,properti:[],protocol:0,pt:1,punctuat:[0,1],punctuationwordnod:0,put:1,py:[0,1],python3:[0,1],python:[0,1],q:1,quot:0,re:[0,1],read:[0,1],regex:0,rel:0,remov:0,remove_edges_from:0,remove_phonem:0,replac:0,repo:1,repres:[0,1],requir:0,resolv:0,resolve_lang:0,respect:1,retain:0,right:0,role:[0,1],root:0,ru:1,run:1,russian:[0,1],s0:0,s1:0,s2:0,s3:0,s:[0,1],sai:[0,1],script:1,search:[0,1],search_dir:0,see:[0,1],sent2featur:0,sent:1,sent_idx:[0,1],sent_nod:0,sentenc:[0,1],sentencenod:0,separ:[0,1],sequenc:0,set:[0,1],settings_arg:0,sh:1,share:0,short_lang:0,should:0,silent:0,simpli:1,singl:0,size:0,slide:0,sliding_window:0,small:0,someth:0,space:0,spanish:[0,1],speak:0,speaknod:0,specif:[0,1],specifi:0,speech:[0,1],spell:0,spell_out:0,spell_out_word:0,split:[0,1],split_func:0,split_initi:0,split_word:0,spoken:0,sqlite3:0,sqlite:[0,1],sqlitephonem:0,src:0,ssml:[0,1],stai:0,state:0,step:1,store:1,str:0,str_or_pattern:0,string:[0,1],sub:[0,1],submodul:2,successor:0,suit:0,support:0,surround:0,sv:1,sw:1,swahili:[0,1],swedish:[0,1],symbol:0,synesthesiam:1,t:[0,1],tag:[0,1],tag_no_namespac:0,tagger:0,tagger_arg:0,tail:0,target:1,templat:0,ten:1,test:[0,1],text:[0,1],text_and_el:0,text_processor:[1,2],text_with_w:[0,1],textprocessor:[0,1],textprocessorset:0,than:1,thi:[0,1],think:1,through:0,time:0,token:[0,1],tokenzi:1,top:0,towardsdatasci:0,trail:0,train:[0,1],train_model:0,transform:0,transform_func:0,translat:1,tree:0,treebank:1,tri:0,tt:[0,1],tupl:0,twenti:0,two:[0,1],txt:1,type:0,u:1,under:0,union:0,univer:0,univers:1,universaldepend:0,unknown:1,unless:0,up:[0,1],updat:1,upo:0,us:[0,1],usd:0,user:[0,1],usual:1,util:2,v:1,valu:0,vbz:[],verbal:0,version:0,voic:[0,1],vowel:0,w:[0,1],wa:1,what:1,when:0,where:[0,1],which:1,whitespac:[0,1],whose:0,wikipedia:1,wiktionari:1,wiktionary2dict:1,window:0,without:0,word2featur:0,word:[0,1],word_break:0,word_breaks_pattern:0,word_phonem:1,word_transform_func:0,wordnod:0,wordrol:0,words_backward:0,words_forward:0,would:[0,1],wound:1,wrapper:0,written:0,xdg_config_hom:0,xml:0,xpo:[0,1],y:0,year:0,yield:0,ym:0,ymd:0,ymo:0,you:1,your:1,z:1,zero:0},titles:["gruut package","gruut","gruut"],titleterms:{"const":0,"new":1,ad:1,command:1,content:0,corpus2db:0,databas:1,espeak:1,g2p:[0,1],g2p_phonetisauru:0,gruut:[0,1,2],indic:1,instal:1,lang:0,languag:1,lexicon2db:0,line:1,model:1,modul:0,packag:0,phonem:[0,1],po:[0,1],submodul:0,support:1,tabl:1,tagger:1,text_processor:0,usag:1,util:0}}) \ No newline at end of file +Search.setIndex({docnames:["gruut","index","modules"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":4,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,sphinx:56},filenames:["gruut.rst","index.rst","modules.rst"],objects:{"":{gruut:[0,0,0,"-"]},"gruut.TextProcessor":{get_settings:[0,2,1,""],post_process_graph:[0,2,1,""],process:[0,2,1,""],sentences:[0,2,1,""],words:[0,2,1,""]},"gruut.TextProcessorSettings":{abbreviations:[0,3,1,""],babel_locale:[0,3,1,""],begin_punctuations:[0,3,1,""],begin_punctuations_pattern:[0,3,1,""],currencies:[0,3,1,""],currency_symbols:[0,3,1,""],dateparser_lang:[0,3,1,""],default_currency:[0,3,1,""],default_date_format:[0,3,1,""],end_punctuations:[0,3,1,""],end_punctuations_pattern:[0,3,1,""],get_ordinal:[0,3,1,""],get_parts_of_speech:[0,3,1,""],get_whitespace:[0,2,1,""],guess_phonemes:[0,3,1,""],is_initialism:[0,3,1,""],is_maybe_currency:[0,2,1,""],is_maybe_date:[0,2,1,""],is_maybe_number:[0,2,1,""],is_maybe_time:[0,2,1,""],is_non_word:[0,3,1,""],join_str:[0,3,1,""],keep_whitespace:[0,3,1,""],lang:[0,3,1,""],lookup_phonemes:[0,3,1,""],major_breaks:[0,3,1,""],major_breaks_pattern:[0,3,1,""],minor_breaks:[0,3,1,""],minor_breaks_pattern:[0,3,1,""],normalize_whitespace:[0,2,1,""],num2words_lang:[0,3,1,""],parse_time:[0,3,1,""],post_process_sentence:[0,3,1,""],pre_process_text:[0,3,1,""],replacements:[0,3,1,""],spell_out_words:[0,3,1,""],split_initialism:[0,3,1,""],split_words:[0,2,1,""],verbalize_time:[0,3,1,""],word_breaks:[0,3,1,""],word_breaks_pattern:[0,3,1,""]},"gruut.const":{BreakNode:[0,1,1,""],BreakType:[0,1,1,""],BreakWordNode:[0,1,1,""],EndElement:[0,1,1,""],GetPartsOfSpeech:[0,1,1,""],GraphType:[0,1,1,""],GuessPhonemes:[0,1,1,""],IgnoreNode:[0,1,1,""],InterpretAs:[0,1,1,""],InterpretAsFormat:[0,1,1,""],LookupPhonemes:[0,1,1,""],Node:[0,1,1,""],ParagraphNode:[0,1,1,""],PostProcessSentence:[0,1,1,""],PunctuationWordNode:[0,1,1,""],Sentence:[0,1,1,""],SentenceNode:[0,1,1,""],SpeakNode:[0,1,1,""],TextProcessorSettings:[0,1,1,""],Time:[0,1,1,""],Word:[0,1,1,""],WordNode:[0,1,1,""],WordRole:[0,1,1,""],default_get_whitespace:[0,4,1,""],default_normalize_whitespace:[0,4,1,""],default_split_words:[0,4,1,""],has_digit:[0,4,1,""],maybe_compile_regex:[0,4,1,""]},"gruut.const.BreakNode":{get_milliseconds:[0,2,1,""],time:[0,3,1,""]},"gruut.const.BreakType":{MAJOR:[0,3,1,""],MINOR:[0,3,1,""]},"gruut.const.BreakWordNode":{break_type:[0,3,1,""],text:[0,3,1,""],text_with_ws:[0,3,1,""]},"gruut.const.EndElement":{element:[0,3,1,""]},"gruut.const.GraphType":{add_edge:[0,2,1,""],add_edges_from:[0,2,1,""],add_node:[0,2,1,""],nodes:[0,3,1,""],out_degree:[0,2,1,""],out_edges:[0,2,1,""],predecessors:[0,2,1,""],remove_edges_from:[0,2,1,""],successors:[0,2,1,""]},"gruut.const.IgnoreNode":{node:[0,3,1,""]},"gruut.const.InterpretAs":{CURRENCY:[0,3,1,""],DATE:[0,3,1,""],NUMBER:[0,3,1,""],SPELL_OUT:[0,3,1,""],TIME:[0,3,1,""]},"gruut.const.InterpretAsFormat":{DATE_DMY:[0,3,1,""],DATE_DMY_ORDINAL:[0,3,1,""],DATE_DM_ORDINAL:[0,3,1,""],DATE_MD:[0,3,1,""],DATE_MDY:[0,3,1,""],DATE_MDY_ORDINAL:[0,3,1,""],DATE_MD_ORDINAL:[0,3,1,""],DATE_MY:[0,3,1,""],DATE_Y:[0,3,1,""],DATE_YM:[0,3,1,""],DATE_YMD:[0,3,1,""],DATE_YMD_ORDINAL:[0,3,1,""],NUMBER_CARDINAL:[0,3,1,""],NUMBER_DIGITS:[0,3,1,""],NUMBER_ORDINAL:[0,3,1,""],NUMBER_YEAR:[0,3,1,""]},"gruut.const.Node":{element:[0,3,1,""],implicit:[0,3,1,""],lang:[0,3,1,""],node:[0,3,1,""],voice:[0,3,1,""]},"gruut.const.ParagraphNode":{node:[0,3,1,""]},"gruut.const.PunctuationWordNode":{text:[0,3,1,""],text_with_ws:[0,3,1,""]},"gruut.const.Sentence":{idx:[0,3,1,""],lang:[0,3,1,""],par_idx:[0,3,1,""],pause_after_ms:[0,3,1,""],pause_before_ms:[0,3,1,""],text:[0,3,1,""],text_spoken:[0,3,1,""],text_with_ws:[0,3,1,""],voice:[0,3,1,""],words:[0,3,1,""]},"gruut.const.SentenceNode":{node:[0,3,1,""]},"gruut.const.SpeakNode":{node:[0,3,1,""]},"gruut.const.TextProcessorSettings":{abbreviations:[0,3,1,""],babel_locale:[0,3,1,""],begin_punctuations:[0,3,1,""],begin_punctuations_pattern:[0,3,1,""],currencies:[0,3,1,""],currency_symbols:[0,3,1,""],dateparser_lang:[0,3,1,""],default_currency:[0,3,1,""],default_date_format:[0,3,1,""],end_punctuations:[0,3,1,""],end_punctuations_pattern:[0,3,1,""],get_ordinal:[0,3,1,""],get_parts_of_speech:[0,3,1,""],get_whitespace:[0,2,1,""],guess_phonemes:[0,3,1,""],is_initialism:[0,3,1,""],is_maybe_currency:[0,2,1,""],is_maybe_date:[0,2,1,""],is_maybe_number:[0,2,1,""],is_maybe_time:[0,2,1,""],is_non_word:[0,3,1,""],join_str:[0,3,1,""],keep_whitespace:[0,3,1,""],lang:[0,3,1,""],lookup_phonemes:[0,3,1,""],major_breaks:[0,3,1,""],major_breaks_pattern:[0,3,1,""],minor_breaks:[0,3,1,""],minor_breaks_pattern:[0,3,1,""],normalize_whitespace:[0,2,1,""],num2words_lang:[0,3,1,""],parse_time:[0,3,1,""],post_process_sentence:[0,3,1,""],pre_process_text:[0,3,1,""],replacements:[0,3,1,""],spell_out_words:[0,3,1,""],split_initialism:[0,3,1,""],split_words:[0,2,1,""],verbalize_time:[0,3,1,""],word_breaks:[0,3,1,""],word_breaks_pattern:[0,3,1,""]},"gruut.const.Time":{hours:[0,3,1,""],minutes:[0,3,1,""],period:[0,3,1,""]},"gruut.const.Word":{idx:[0,3,1,""],is_break:[0,3,1,""],is_major_break:[0,3,1,""],is_minor_break:[0,3,1,""],is_punctuation:[0,3,1,""],is_spoken:[0,3,1,""],lang:[0,3,1,""],leading_ws:[0,3,1,""],par_idx:[0,3,1,""],pause_after_ms:[0,3,1,""],pause_before_ms:[0,3,1,""],phonemes:[0,3,1,""],pos:[0,3,1,""],sent_idx:[0,3,1,""],text:[0,3,1,""],text_with_ws:[0,3,1,""],trailing_ws:[0,3,1,""],voice:[0,3,1,""]},"gruut.const.WordNode":{currency_name:[0,3,1,""],currency_symbol:[0,3,1,""],date:[0,3,1,""],format:[0,3,1,""],in_lexicon:[0,3,1,""],interpret_as:[0,3,1,""],number:[0,3,1,""],phonemes:[0,3,1,""],pos:[0,3,1,""],role:[0,3,1,""],text:[0,3,1,""],text_with_ws:[0,3,1,""],time:[0,3,1,""]},"gruut.const.WordRole":{DEFAULT:[0,3,1,""],LETTER:[0,3,1,""]},"gruut.corpus2db":{main:[0,4,1,""]},"gruut.g2p":{GraphemesToPhonemes:[0,1,1,""],do_predict:[0,4,1,""],do_test:[0,4,1,""],do_train:[0,4,1,""],main:[0,4,1,""],train:[0,4,1,""]},"gruut.g2p.GraphemesToPhonemes":{decode_string:[0,2,1,""],encode_string:[0,2,1,""],grapheme2features:[0,2,1,""],word2features:[0,2,1,""]},"gruut.g2p_phonetisaurus":{PhonetisaurusGraph:[0,1,1,""],do_predict:[0,4,1,""],do_test:[0,4,1,""],main:[0,4,1,""]},"gruut.g2p_phonetisaurus.PhonetisaurusGraph":{g2p:[0,2,1,""],g2p_one:[0,2,1,""],load:[0,2,1,""]},"gruut.lang":{ArabicPreProcessText:[0,1,1,""],DelayedGraphemesToPhonemes:[0,1,1,""],DelayedPartOfSpeechTagger:[0,1,1,""],DelayedSqlitePhonemizer:[0,1,1,""],FarsiPartOfSpeechTagger:[0,1,1,""],en_get_ordinal:[0,4,1,""],en_is_initialism:[0,4,1,""],en_parse_time:[0,4,1,""],en_verbalize_time:[0,4,1,""],fa_post_process_sentence:[0,4,1,""],fr_has_silent_consonant:[0,4,1,""],fr_is_vowel:[0,4,1,""],fr_post_process_sentence:[0,4,1,""],get_ar_settings:[0,4,1,""],get_cs_settings:[0,4,1,""],get_de_settings:[0,4,1,""],get_en_us_settings:[0,4,1,""],get_es_settings:[0,4,1,""],get_fa_settings:[0,4,1,""],get_fr_settings:[0,4,1,""],get_it_settings:[0,4,1,""],get_nl_settings:[0,4,1,""],get_pt_settings:[0,4,1,""],get_ru_settings:[0,4,1,""],get_settings:[0,4,1,""],get_sv_settings:[0,4,1,""],get_sw_settings:[0,4,1,""],get_zh_settings:[0,4,1,""]},"gruut.lexicon2db":{main:[0,4,1,""]},"gruut.phonemize":{SqlitePhonemizer:[0,1,1,""]},"gruut.phonemize.SqlitePhonemizer":{DEFAULT_ROLE:[0,3,1,""]},"gruut.pos":{PartOfSpeechTagger:[0,1,1,""],do_predict:[0,4,1,""],do_print_labels:[0,4,1,""],do_test:[0,4,1,""],do_train:[0,4,1,""],main:[0,4,1,""],train_model:[0,4,1,""]},"gruut.pos.PartOfSpeechTagger":{decode_string:[0,2,1,""],encode_string:[0,2,1,""],local_features:[0,2,1,""],sent2features:[0,2,1,""],word2features:[0,2,1,""]},"gruut.text_processor":{TextProcessor:[0,1,1,""]},"gruut.text_processor.TextProcessor":{get_settings:[0,2,1,""],post_process_graph:[0,2,1,""],process:[0,2,1,""],sentences:[0,2,1,""],words:[0,2,1,""]},"gruut.utils":{attrib_no_namespace:[0,4,1,""],find_lang_dir:[0,4,1,""],get_currency_names:[0,4,1,""],grouper:[0,4,1,""],leaves:[0,4,1,""],pairwise:[0,4,1,""],pipeline_split:[0,4,1,""],pipeline_transform:[0,4,1,""],print_graph:[0,4,1,""],remove_non_word_chars:[0,4,1,""],resolve_lang:[0,4,1,""],sliding_window:[0,4,1,""],tag_no_namespace:[0,4,1,""],text_and_elements:[0,4,1,""]},gruut:{"const":[0,0,0,"-"],TextProcessor:[0,1,1,""],TextProcessorSettings:[0,1,1,""],corpus2db:[0,0,0,"-"],g2p:[0,0,0,"-"],g2p_phonetisaurus:[0,0,0,"-"],get_supported_languages:[0,4,1,""],is_language_supported:[0,4,1,""],lang:[0,0,0,"-"],lexicon2db:[0,0,0,"-"],phonemize:[0,0,0,"-"],pos:[0,0,0,"-"],sentences:[0,4,1,""],text_processor:[0,0,0,"-"],utils:[0,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","method","Python method"],"3":["py","attribute","Python attribute"],"4":["py","function","Python function"]},objtypes:{"0":"py:module","1":"py:class","2":"py:method","3":"py:attribute","4":"py:function"},terms:{"0":[0,1],"01pm":0,"1":[0,1],"10":1,"100":[0,1],"12":0,"123":[0,1],"123m":[0,1],"1st":0,"2":[0,1],"2000":1,"2001":1,"2021":0,"25":0,"3":0,"4":0,"4pm":1,"5000":0,"6":0,"8859":1,"\u00f0":1,"\u014b":1,"\u0259":[0,1],"\u025a":1,"\u026a":1,"\u0279":1,"\u027e":1,"\u0283":0,"\u028a":1,"\u028c":1,"\u0292":1,"\u02c8\u00e6":1,"\u02c8\u0251":1,"\u02c8\u0251\u02d0":1,"\u02c8\u0254":1,"\u02c8\u025a":1,"\u02c8\u025b":[0,1],"\u02c8\u026a":1,"\u02c8a":1,"\u02c8a\u026a":[0,1],"\u02c8a\u028a":1,"\u02c8b":1,"\u02c8e":1,"\u02c8e\u026a":1,"\u02c8i":1,"\u02c8i\u02d0":1,"\u02c8m":1,"\u02c8o\u028a":0,"\u02c8u":1,"\u02c8u\u02d0":1,"\u02cc\u025b":1,"\u03b8":1,"a\u02d0":1,"a\u02d0\u02c8b":1,"break":[0,1],"case":[0,1],"class":[0,1],"const":2,"default":[0,1],"do":1,"e\u026a":0,"enum":0,"f\u00e0":1,"final":0,"float":0,"function":[0,1],"i\u02d0":1,"import":[0,1],"int":0,"long":1,"new":0,"null":1,"return":[0,1],"static":0,"true":[0,1],"try":[0,1],A:[0,1],For:1,If:1,In:1,The:[0,1],These:[0,1],To:1,_:0,abbrevi:[0,1],abc:0,accept:0,ad:0,adapt:1,add:[0,1],add_begin:0,add_bo:0,add_digit:0,add_edg:0,add_edges_from:0,add_end:0,add_eo:0,add_length:0,add_nod:0,add_punctu:0,add_speak_tag:0,addit:1,adolfvonkleist:0,after:[0,1],alia:[0,1],alias:0,align:[0,1],all:[0,1],alphabet:1,also:[0,1],amount:[0,1],an:[0,1],ani:0,annot:0,apart:[0,1],app:1,appli:0,ar:[0,1],arab:[0,1],arabicpreprocesstext:0,arg:0,argument:0,around:1,arrai:0,ascii:0,attempt:0,attrib_no_namespac:0,attribut:0,automat:[0,1],avail:1,b:[0,1],babel:0,babel_local:0,base:0,beam:0,beam_scal:0,befor:0,begin:0,begin_punctu:0,begin_punctuations_pattern:0,better:1,between:0,bia:0,bin:[0,1],block:0,bool:0,break_phonem:0,break_typ:0,breaknod:0,breaktyp:0,breakwordnod:0,built:0,c1:0,c2:0,c:0,cach:0,call:0,callabl:0,can:[0,1],cardin:[0,1],casing_func:0,chang:1,charact:0,chars_back:0,chars_backward:0,chars_forward:0,chars_front:0,check:1,chines:0,chunk:0,cli:0,clock:0,code:0,collect:0,com:0,combin:0,come:0,comma:1,compil:0,conll:1,conllu:[0,1],conllu_path:0,consol:0,conson:0,contain:[0,1],content:2,context:[0,1],control:1,convert:[0,1],corpu:[0,1],corpus2db:[1,2],corpus_path:0,creat:[0,1],credit:[0,1],crf:[0,1],crf_tagger:0,crfsuit:[0,1],cs:1,currenc:[0,1],currency_nam:0,currency_symbol:0,custom:[0,1],czech:[0,1],d:1,dai:1,data:[0,1],databas:0,date:[0,1],date_dm_ordin:0,date_dmi:0,date_dmy_ordin:0,date_i:0,date_md:0,date_md_ordin:0,date_mdi:0,date_mdy_ordin:0,date_mi:0,date_ym:0,date_ymd:0,date_ymd_ordin:0,dateparser_lang:0,datetim:0,db:[0,1],db_conn:0,db_path:0,de:1,decim:0,decod:0,decode_str:0,decreas:0,default_curr:0,default_date_format:0,default_get_whitespac:0,default_lang:0,default_normalize_whitespac:0,default_rol:0,default_split_word:0,defin:[0,1],delayedgraphemestophonem:0,delayedpartofspeechtagg:0,delayedsqlitephonem:0,depend:[0,1],depth:0,deriv:1,detail:1,detect_curr:0,detect_d:0,detect_numb:0,detect_tim:0,dict:[0,1],dictionari:0,differ:1,digit:[0,1],directori:0,disabl:[0,1],disambigu:[0,1],dmy:0,do_predict:0,do_print_label:0,do_test:0,do_train:0,document:[0,1],doe:1,dollar:1,done:1,download:1,dst:0,dt:[],due:1,duemila:1,dure:[0,1],dutch:[0,1],e:[0,1],ea430c5fb78b:0,each:[0,1],edg:0,edit:1,element:0,elementtre:0,empti:0,en:[0,1],en_get_ordin:0,en_is_initi:0,en_parse_tim:0,en_u:0,en_verbalize_tim:0,encod:[0,1],encode_str:0,end:0,end_punctu:0,end_punctuations_pattern:0,endel:0,english:[0,1],ensur:0,entir:0,entri:0,ep:0,eps_phonem:0,es:1,espeak:0,espeak_word:1,etc:[0,1],etre:0,everyth:1,exampl:[0,1],exist:1,expand:[0,1],explicit_lang:0,extend:1,extern:1,extra:1,f:1,fa:1,fa_post_process_sent:0,factori:0,fals:[0,1],farsi:[0,1],farsipartofspeechtagg:0,fast:0,featur:0,februari:1,fewer:1,field:1,file:[0,1],fillvalu:0,find:[0,1],find_lang_dir:0,first:[0,1],fix:0,flag:1,follow:[0,1],forc:1,fork:1,form:0,format:[0,1],found:0,four:1,fr:1,fr_has_silent_conson:0,fr_is_vowel:0,fr_post_process_sent:0,french:[0,1],from:[0,1],fst2npz:0,fst:[0,1],full:0,further:1,g2p:2,g2p_align:1,g2p_arg:0,g2p_model:0,g2p_one:0,g2p_phonetisauru:2,g:[0,1],gather:1,gener:1,genit:0,gennaio:1,german:[0,1],get:0,get_ar_set:0,get_cs_set:0,get_currency_nam:0,get_de_set:0,get_en_us_set:0,get_es_set:0,get_fa_set:0,get_fr_set:0,get_it_set:0,get_millisecond:0,get_nl_set:0,get_ordin:0,get_parts_of_speech:0,get_phonem:1,get_pt_set:0,get_ru_set:0,get_set:0,get_supported_languag:0,get_sv_set:0,get_sw_set:0,get_token:1,get_whitespac:0,get_zh_set:0,getpartsofspeech:0,github:[0,1],given:1,grammat:1,graph:0,graph_path:0,graphem:[0,1],grapheme2featur:0,grapheme_separ:0,graphemestophonem:0,graphtyp:0,group_separ:0,grouper:0,gruut_lang_:0,guess:[0,1],guess_phonem:0,guesser:0,guessphonem:0,h:[0,1],ha:1,hansen:1,has_digit:0,have:[0,1],hazm:0,he:1,help:1,hour:0,http:[0,1],human:1,hundr:1,i:[0,1],id:1,idx:[0,1],ignor:0,ignorenod:0,implicit:0,in_lexicon:0,includ:1,incom:0,indent:0,index:[0,1],initi:[0,1],inner:1,input:0,insid:0,instal:0,instanc:1,instead:0,instruct:1,integ:[0,1],intellig:1,interfac:1,interpret:[0,1],interpret_a:0,interpreta:0,interpretasformat:0,io:1,ipa:1,is_break:[0,1],is_initi:0,is_language_support:0,is_last:0,is_major_break:[0,1],is_maybe_curr:0,is_maybe_d:0,is_maybe_numb:0,is_maybe_tim:0,is_minor_break:[0,1],is_non_word:0,is_punctu:[0,1],is_spoken:[0,1],iso:1,italian:[0,1],item_separ:0,iter:0,its:1,j:1,jjr:1,join_str:0,k:1,kaldi:1,keep_whitespac:0,kei:[0,1],keyword:0,known:0,kwarg:0,l:1,label:[0,1],lambda:[],lang:[1,2],lang_alias:1,lang_dir:0,languag:0,last:0,last_char:0,last_phonem:0,lead:0,leading_w:0,leaf:0,least:0,leav:0,length:0,letter:0,level:0,lexicon2db:[1,2],lexicon:[0,1],liason:0,like:[0,1],line:0,list:[0,1],load:0,load_g2p_guess:0,load_phoneme_lexicon:0,load_pos_tagg:0,local:0,local_featur:0,locale_str:0,look:[0,1],lookup_phonem:0,lookupphonem:0,lower:1,lowest:1,m:[0,1],mai:0,main:[0,1],major:0,major_break:0,major_breaks_pattern:0,manual:1,map:[0,1],map_lexicon:1,marker:0,match:[0,1],max:0,max_guess:0,max_iter:0,maybe_compile_regex:0,md:0,mdy:0,mese:1,metadata:1,method:[0,1],michel:1,millisecond:[0,1],min_beam:0,minor:0,minor_break:0,minor_breaks_pattern:0,minut:0,mishkal:0,mo:0,model:0,model_path:0,model_prefix:0,modul:[1,2],moi:0,month:1,more:[0,1],most:1,multipl:[0,1],must:1,mutablemap:0,my:0,n:[0,1],name:[0,1],namespac:0,necessari:0,need:1,networkx:0,next:[0,1],ng:1,nl:1,nn:1,node:0,non:0,none:0,normal:[0,1],normalize_whitespac:0,note:1,npz:0,num2word:1,num2words_lang:0,number:[0,1],number_cardin:0,number_digit:0,number_ordin:0,number_year:0,numpi:0,o:[0,1],object:0,occur:0,off:0,om:0,omi:0,onc:1,one:[0,1],onli:[0,1],oper:1,option:[0,1],order:0,ordin:[0,1],org:[0,1],origin:0,other:1,out:[0,1],out_degre:0,out_edg:0,outgo:0,output:[0,1],output_path:0,over:0,overrid:0,own:1,p:[0,1],packag:[1,2],page:1,pair:0,pairwis:0,par_idx:0,paragraph:[0,1],paragraphnod:0,paramet:0,parent_nod:0,pars:0,parse_tim:0,part:[0,1],partofspeechtagg:0,pass:[0,1],path:0,pattern:0,paus:[0,1],pause_after_m:0,pause_before_m:0,per:1,perform:[0,1],period:[0,1],persian:1,ph:1,phonem:2,phoneme_join:0,phonemizer_arg:0,phonet:[0,1],phonetisauru:[0,1],phonetisaurusgraph:0,phonolog:1,phrase:0,pip:[0,1],pipeline_split:0,pipeline_transform:0,pleas:1,po:2,point:0,portugues:[0,1],possibl:1,post:0,post_process:0,post_process_graph:0,post_process_sent:0,postprocesssent:0,pre:[0,1],pre_process_text:0,prebuilt:1,predecessor:0,predict:[0,1],prefix:0,preload:0,present:0,primari:1,print:[0,1],print_func:0,print_graph:0,prioriti:1,probabl:0,process:0,process_arg:0,pron_ord:1,pronounc:0,pronunci:[0,1],properli:1,properti:[],protocol:[],pt:1,punctuat:[0,1],punctuationwordnod:0,put:1,py:[0,1],python3:[0,1],python:[0,1],q:1,quot:0,re:[0,1],read:[0,1],regex:0,rel:0,remov:0,remove_edges_from:0,remove_non_word_char:0,remove_phonem:0,replac:0,repo:1,repres:[0,1],requir:0,resolv:0,resolve_lang:0,respect:1,retain:0,right:0,role:[0,1],root:0,ru:1,run:1,russian:[0,1],s0:0,s1:0,s2:0,s3:0,s:[0,1],sai:[0,1],schemaloc:1,script:1,search:[0,1],search_dir:0,second:[0,1],see:[0,1],sent2featur:0,sent:1,sent_idx:[0,1],sent_nod:0,sentenc:[0,1],sentencenod:0,separ:[0,1],sequenc:0,set:[0,1],settings_arg:0,sh:1,share:0,short_lang:0,should:0,silent:0,simpli:1,singl:0,size:0,slide:0,sliding_window:0,small:0,someth:0,space:0,spanish:[0,1],speak:[0,1],speaknod:0,specif:[0,1],specifi:0,speech:[0,1],spell:[0,1],spell_out:0,spell_out_word:0,split:[0,1],split_func:0,split_initi:0,split_word:0,spoken:0,sqlite3:0,sqlite:[0,1],sqlitephonem:0,src:0,ssml:0,ssml_text:1,stai:0,standard:1,state:0,step:1,store:1,str:0,str_or_pattern:0,string:[0,1],sub:[0,1],submodul:2,subset:1,substitut:1,successor:0,suit:0,suppli:1,support:0,surround:0,sv:1,sw:1,swahili:[0,1],swedish:[0,1],symbol:0,synesthesiam:1,synthesi:1,synthesis11:1,t:[0,1],tag:[0,1],tag_no_namespac:0,tagger:0,tagger_arg:0,tail:0,target:1,templat:0,ten:1,test:[0,1],text:[0,1],text_and_el:0,text_processor:[1,2],text_spoken:0,text_with_w:[0,1],textprocessor:[0,1],textprocessorset:0,than:1,thi:[0,1],think:1,thousand:1,through:0,time:[0,1],todai:1,token:[0,1],tokenzi:1,top:0,towardsdatasci:0,tr:1,trail:0,trailing_w:0,train:[0,1],train_model:0,transform:0,transform_func:0,translat:1,tree:0,treebank:1,tri:0,tt:[0,1],tupl:0,twenti:0,two:[0,1],txt:1,type:0,u:1,un:1,under:0,union:0,univer:0,univers:1,universaldepend:0,unknown:1,unless:0,up:[0,1],updat:1,upo:0,us:[0,1],usd:0,user:[0,1],usual:1,util:2,v:1,valu:0,vbz:[],verbal:0,verbalize_curr:0,verbalize_d:0,verbalize_numb:0,verbalize_tim:0,version:[0,1],voic:[0,1],vowel:0,w3:1,w:[0,1],wa:1,wai:1,what:1,when:0,where:[0,1],which:1,whitespac:[0,1],whose:0,wikipedia:1,wiktionari:1,wiktionary2dict:1,window:0,without:0,word2featur:0,word:[0,1],word_break:0,word_breaks_pattern:0,word_phonem:1,word_transform_func:0,wordnod:0,wordrol:0,words_backward:0,words_forward:0,would:[0,1],wound:1,wrap:1,wrapper:0,written:0,www:1,xdg_config_hom:0,xml:[0,1],xmln:1,xmlschema:1,xpo:[0,1],xsd:1,xsi:1,y:[0,1],year:[0,1],yield:0,ym:0,ymd:0,ymo:0,you:1,your:1,z:1,zero:0},titles:["gruut package","gruut","gruut"],titleterms:{"const":0,"new":1,ad:1,command:1,content:0,corpus2db:0,databas:1,espeak:1,g2p:[0,1],g2p_phonetisauru:0,gruut:[0,1,2],indic:1,instal:1,lang:0,languag:1,lexicon2db:0,line:1,model:1,modul:0,packag:0,phonem:[0,1],po:[0,1],ssml:1,submodul:0,support:1,tabl:1,tagger:1,text_processor:0,usag:1,util:0}}) \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index 7f4b26a..ce43215 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -36,7 +36,51 @@ Output:: . ‖ -Note that "wound" and "read" have different pronunciations in different contexts. +Note that "wound" and "read" have different pronunciations in different (grammatical) contexts. + +A subset of :ref:`SSML ` is also supported: + +.. code-block:: python + + from gruut import sentences + + ssml_text = """ + + Today at 4pm, 2/1/2000. + Un mese fà, 2/1/2000. + """ + + for sent in sentences(ssml_text, ssml=True): + for word in sent: + if word.phonemes: + print(sent.idx, word.lang, word.text, *word.phonemes) + +with the output:: + + 0 en-US Today t ə d ˈeɪ + 0 en-US at ˈæ t + 0 en-US four f ˈɔ ɹ + 0 en-US P p ˈi + 0 en-US M ˈɛ m + 0 en-US , | + 0 en-US February f ˈɛ b j u ˌɛ ɹ i + 0 en-US first f ˈɚ s t + 0 en-US , | + 0 en-US two t ˈu + 0 en-US thousand θ ˈaʊ z ə n d + 0 en-US . ‖ + 1 it Un u n + 1 it mese ˈm e s e + 1 it fà f a + 1 it , | + 1 it due d j u + 1 it gennaio d͡ʒ e n n ˈa j o + 1 it duemila d u e ˈm i l a + 1 it . ‖ Installation ------------ @@ -60,6 +104,7 @@ Supported Languages * German (``de``) * English (``en``) * Spanish (``es``) +* Persian/Farsi (``fa``) * French (``fr``) * Italian (``it``) * Dutch (``nl``) @@ -177,6 +222,38 @@ which outputs: See ``gruut --help`` for more options. + +.. _ssml_support: + +SSML Support +-------------------------- + +A subset of `the SSML standard `_ is supported: + +* ```` - wrap around SSML text + * ``lang`` - set language for document +* ``

        `` - paragraph + * ``lang`` - set language for paragraph +* ```` - sentence (disables automatic sentence breaking) + * ``lang`` - set language for sentence +* ```` / ```` - word (disables automatic tokenization) + * ``lang`` - set language for word + * ``role`` - set word role (see [word roles](#word-roles)) +* ```` - set language inner text +* ```` - set voice of inner text +* ```` - force interpretation of inner text + * ``interpret-as`` one of "spell-out", "date", "number", "time", or "currency" + * ``format`` - way to format text depending on ``interpret-as`` + * number - one of "cardinal", "ordinal", "digits", "year" + * date - string with "d" (cardinal day), "o" (ordinal day), "m" (month), or "y" (year) +* ```` - Pause for given amount of time + * time - seconds ("123s") or milliseconds ("123ms") +* ```` - substitute ``alias`` for inner text +* ```` - supply phonemes for inner text + * ``ph`` - phonemes for each word of inner text, separated by whitespace + * ``alphabet`` - if "ipa", phonemes are intelligently split ("aːˈb" -> "aː", "ˈb") + + .. _database: Database diff --git a/scripts/check-code.sh b/scripts/check-code.sh index ca6fdea..2684200 100755 --- a/scripts/check-code.sh +++ b/scripts/check-code.sh @@ -16,7 +16,7 @@ if [[ -z "${no_venv}" ]]; then fi fi -python_files=("${src_dir}/gruut/"*.py "${src_dir}/tests/"*.py) +python_files=("${src_dir}/gruut/"*.py "${src_dir}/tests/"*.py "${src_dir}/setup.py") # Add bin scripts selectively bin_scripts=('clean-metadata' 'fst2npy' 'map_lexicon' 'phonemize_lexicon' 'reorder_lexicon' 'espeak_word') diff --git a/scripts/format-code.sh b/scripts/format-code.sh index 43e7548..feef2b3 100755 --- a/scripts/format-code.sh +++ b/scripts/format-code.sh @@ -16,7 +16,7 @@ if [[ -z "${no_venv}" ]]; then fi fi -python_files=("${src_dir}/gruut/"*.py "${src_dir}/tests/"*.py) +python_files=("${src_dir}/gruut/"*.py "${src_dir}/tests/"*.py "${src_dir}/setup.py") # Add bin scripts selectively bin_scripts=('clean-metadata' 'fst2npy' 'map_lexicon' 'phonemize_lexicon' 'reorder_lexicon' 'espeak_word') diff --git a/setup.py b/setup.py index e8b6446..2cf7945 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,4 @@ """Setup file for gruut""" -import os from collections import defaultdict from pathlib import Path @@ -19,11 +18,11 @@ requirements = [] requirements_path = this_dir / "requirements.txt" if requirements_path.is_file(): - with open(requirements_path, "r") as requirements_file: + with open(requirements_path, "r", encoding="utf-8") as requirements_file: requirements = requirements_file.read().splitlines() version_path = module_dir / "VERSION" -with open(version_path, "r") as version_file: +with open(version_path, "r", encoding="utf-8") as version_file: version = version_file.read().strip() # x.y.z -> x.y.0