Bumped version to 1.2.0 and added support for google cloud wavenet vo…

…ices!
DonFlymoor · Sep 22, 2020 · 5115092 · 5115092
1 parent c83fd84
commit 5115092
Show file tree

Hide file tree

Showing 4 changed files with 181 additions and 76 deletions.
diff --git a/changelog.txt b/changelog.txt
@@ -17,3 +17,8 @@ version
 - Changed yuoung_grandma to young_grandma
 - REPEAT modifier defaults to 2
 - Added support for ogg sound effects
+
+9/21/2020
+1.2.0
+- Bumped version to 1.2.0
+- Added support for google cloud wavenet voices, much higher quality!
diff --git a/setup.py b/setup.py
@@ -18,7 +18,7 @@
 EMAIL = '[email protected]'
 AUTHOR = 'Don Flymoor'
 REQUIRES_PYTHON = '>=3.6.0'
-VERSION = '1.1.3'
+VERSION = '1.2.0'
 
 # What packages are required for this module to be executed?
 REQUIRED = [

diff --git a/voxtalkz/__init__.py b/voxtalkz/__init__.py
@@ -4,4 +4,4 @@
 
 from .voxtalkz import *
 
-__version__ = '1.1.3'
+__version__ = '1.2.0'
diff --git a/voxtalkz/voxtalkz.py b/voxtalkz/voxtalkz.py
@@ -5,15 +5,20 @@
 
 from gtts import gTTS
 import sys, os.path as path
-import io, time
+import io
+import time
 from pydub import AudioSegment
 import __builtin__
 import datetime
+import requests 
+import json
+import os
+import base64
 
-file = 'null'
+_File = 'null'
 
 def print(args):
-    with open(file,'w') as file:
+    with open(_File,'w') as file:
         file.write(args)
     return __builtin__.print(*args, **kwargs)
 
@@ -36,48 +41,81 @@ class voxTalkz():
     sound effects must be placed into the effects folder as .mp3, .wav, or .ogg files
     '''
 
-    def __init__(self, file, name, debug=False, timeme=False):
-        global file
+    def __init__(self, file, name, debug=False, cloudKey=False, timeme=False):
+        _File = file
         self.homedir = path.expanduser('~')
         self.name = name
         self.debug = debug
-        self.file = file
+        self._File = _File
         self.filename = name+'.mp3'
         self.SoundFile = AudioSegment.empty()
         self.Pause = AudioSegment.empty()
-        self.Crew_Effects = {}
+        self.Actors_Effects = {}
         # List of people
-        self.Crew = {"indian_man":"bn",
-                    "american_woman":"en-us",
-                    "scottish_woman":"ca",
-                    "russian_woman":"sk",
-                    "drawling_man":"cy",
-                    "autistic_woman":"da",
-                    "marika":"de",
-                    "au_woman":"en-au",
-                    "british_woman":"en-gb",
-                    "indian_woman":"en-in",
-                    "spanish_woman":"es-es",
-                    #"spanishenglish_woman":"es-en",
-                    #"indian_man":"et",
-                    "french_woman":"fr",
-                    "hindu_woman":"hi",
-                    "alien_man":"is",
-                    "italian_man":"it",
-                    "scottish_woman":"ja",
-                    "phone_woman":"kn",
-                    "korean_man":"ko",
-                    "dramatized_woman":"mr",
-                    "dutch_woman":"nl",
-                    "silly_man":"pl",
-                    "robot_man":"sq",
-                    "dramitized_girl":"sv",
-                    "bored_teen":"te",
-                    "happy_girl":"th",
-                    "boss_lady":"tl",
-                    "young_grandma":"vi",
-                    "spoiled_girl":"zh-cn",
-                    "american_woman":"en"}
+        if not cloudKey:
+            self.Actors = {"indian_man":"bn",
+                        "american_woman":"en-us",
+                        "scottish_woman":"ca",
+                        "russian_woman":"sk",
+                        "drawling_man":"cy",
+                        "autistic_woman":"da",
+                        "marika":"de",
+                        "au_woman":"en-au",
+                        "british_woman":"en-gb",
+                        "indian_woman":"en-in",
+                        "spanish_woman":"es-es",
+                        #"spanishenglish_woman":"es-en",
+                        #"indian_man":"et",
+                        "french_woman":"fr",
+                        "hindu_woman":"hi",
+                        "alien_man":"is",
+                        "italian_man":"it",
+                        "scottish_woman":"ja",
+                        "phone_woman":"kn",
+                        "korean_man":"ko",
+                        "dramatized_woman":"mr",
+                        "dutch_woman":"nl",
+                        "silly_man":"pl",
+                        "robot_man":"sq",
+                        "dramitized_girl":"sv",
+                        "bored_teen":"te",
+                        "happy_girl":"th",
+                        "boss_lady":"tl",
+                        "young_grandma":"vi",
+                        "spoiled_girl":"zh-cn",
+                        "american_woman":"en"}
+        else:
+            self.Actors = {
+            "australian_woman":["en-AU-Wavenet-A","en-AU"],
+            "australian_man":["en-AU-Wavenet-B","en-AU"],
+            "personal_australian_woman":["en-AU-Wavenet-C","en-AU"],
+            "personal_australian_man":["en-AU-Wavenet-D","en-AU"],
+
+            "indian_woman":["en-IN-Wavenet-A","en-IN"],
+            "indian_man":["en-IN-Wavenet-B","en-IN"],
+            "personal_indian_woman":["en-IN-Wavenet-C","en-IN"],
+            "personal_indian_man":["en-IN-Wavenet-D","en-IN"],
+
+            "british_woman":["en-GB-Wavenet-A","en-GB"],
+            "british_man":["en-GB-Wavenet-B","en-GB"],
+            "personal_british_woman":["en-GB-Wavenet-C","en-GB"],
+            "urgent_british_woman":["en-GB-Wavenet-F","en-GB"],
+            "personal_british_man":["en-GB-Wavenet-D","en-GB"],
+
+            "young_american_man":["en-US-Wavenet-A","en-US"],
+            "middleage_american_man":["en-US-Wavenet-B","en-US"],
+            "american_man":["en-US-Wavenet-B","en-US"],
+            "middleage_american_woman":["en-US-Wavenet-C","en-US"],
+            "american_woman":["en-US-Wavenet-C","en-US"],
+            "middleage_personal_american_man":["en-US-Wavenet-D","en-US"],
+            "middleage_personal_american_woman":["en-US-Wavenet-E","en-US"],
+            "young_personal_american_woman":["en-US-Wavenet-F","en-US"],
+            "distracted_middleage_american_woman":["en-US-Wavenet-G","en-US"],
+            "young_american_woman":["en-US-Wavenet-H","en-US"],
+            "young_personal_american_man":["en-US-Wavenet-I","en-US"],
+            "cocky_american_man":["en-US-Wavenet-J","en-US"]
+            }
+
     def ToSound(self):
         # parse the file
         parsed = self.Parse(self.file)
@@ -147,9 +185,35 @@ def Parse(self,file,FILE=False):
         return List
 
     def help(self):
-        print("Usage: python3 -m voxtalkz [input file, output file] \n\nConverts play-like script to a .mp3 file \nScript file must be written in this manner: \n\n#The first time a unknown name is called, instead of making the person talk, the name will be assigned to a person. \nSusan:american_woman\n#Then the person will \"talk\"\nSusan:Hello, world!\n#Comments are allowed!\n*soundeffect \n\nEffects can be applied by adding an @ symbal the the effect name, like so:\nperson1:hello, world!@VOLUME=8\nA second effect can be applied by using the pipe(\"|\") like so:\nperson1:Hello, World!@FADE|VOLUME=8\n")
+        print("Usage: python3 -m voxtalkz [input file, output file] --flags\n\
+            \n\
+            Converts play-like script to a .mp3 file \n\
+            Script file must be written in this manner: \n\
+            \n\
+            #The first time a unknown name is called, instead of making the person talk, the name will be assigned to a person. \n\
+            Susan:american_woman\n\
+            #Then the person will \"talk\"\n\
+            Susan:Hello, world!\n\
+            #Comments are allowed!\n\
+            *soundeffect \n\
+            \n\
+            Effects can be applied by adding an @ symbal the the effect name, like so:\n\
+            person1:hello, world!@VOLUME=8\nA second effect can be applied by using the pipe(\"|\") like so:\n\
+            person1:Hello, World!@FADE|VOLUME=8\n\
+            ")
+        print('flags:\n\
+                --debug        | the program tells you what it\'s doing\n\
+                --help         | print this help message\n\
+                --cloud=apiKey | use Google Cloud TextToSpeech API, must have API key after it\n\
+                ')
         print("List of all effects:")
-        list = ["@FADE | Fade to nothing","@FADE_IN | Fade in from silent","@OVERLAY | Overlays the sound onto what has already been recorded. Use @OVERLAY=VAR1 to START the overlay at the begining of where you assigned @VAR=1","@REPEAT= | Repeat audio segment however many times you specify. e.g. (american_woman:Hello, world!@REPEAT=10) would produce someone saying \"Hello, world!\" ten times","@VAR=    | Assign a number to a temporary table. Only used with @OVERLAY","@VOLUME= | Set volume change in decibels. A negitive number will reduce the volume","@PITCH=  | Set pitch change. e.g. \"american_woman:Hello, world!@PITCH=0.3\" would make the person sound like a little girl, while \"american_woman:Hello, world!@PITCH=-0.3\" would sound like an old woman"]
+        list = ["@FADE | Fade to nothing",\
+                "@FADE_IN | Fade in from silent",\
+                "@OVERLAY | Overlays the sound onto what has already been recorded. Use @OVERLAY=VAR1 to START the overlay at the begining of where you assigned @VAR=1",\
+                "@REPEAT= | Repeat audio segment however many times you specify. e.g. (american_woman:Hello, world!@REPEAT=10) would produce someone saying \"Hello, world!\" ten times",\
+                "@VAR=    | Assign a number to a temporary table. Only used with @OVERLAY",\
+                "@VOLUME= | Set volume change in decibels. A negitive number will reduce the volume",\
+                "@PITCH=  | Set pitch change. e.g. \"american_woman:Hello, world!@PITCH=0.3\" would make the person sound like a little girl, while \"american_woman:Hello, world!@PITCH=-0.3\" would sound like an old woman"]
         for string in list:
             print("    "+string)
         print("\nList of all actors:")
@@ -233,45 +297,71 @@ def ListToSound(self, Lists):
                             pass
                 # Open sound to a variable
 
-            elif List[0] in self.Crew:
+            elif List[0] in self.Actors:
                 if self.debug:
                     print('Making %s say \'%s\'... '%(List[0],List[1]))
-                utterance = gTTS(text=List[1], lang=self.Crew[List[0]], slow=False, lang_check=False)
-                if self.debug:
-                    print('Done!\n')
-                # Create an empty file-like object
-                File = io.BytesIO()
-                # Write what the person 'said' to the object
-                if self.debug:
-                    print("Recording what %s said..."%(List[0]))
-                while True:
-                    try:
-                        utterance.write_to_fp(File)
-                        break
-                    except Exception as E:
-                        wait = input('Somthing seems to be wrong with the internet (or the file). Please type \'save\' to save file, \'help\' to display what went wrong, or \'continue\' if the internet connection is restored. Anything else will exit.')
-                        if wait == "continue":
-                            pass
-                        elif wait == "save":
-                            self.SoundFile = SoundFile
-                            self.save()
-                        elif wait == "pass":
+
+                # If us
+                if not self.cloudKey:
+                    utterance = gTTS(text=List[1], lang=self.Actors[List[0]], slow=False, lang_check=False)
+                    if self.debug:
+                        print('Done!\n')
+                    # Create an empty file-like object
+                    File = io.BytesIO()
+                    # Write what the person 'said' to the object
+                    if self.debug:
+                        print("Recording what %s said..."%(List[0]))
+                    while True:
+                        try:
+                            utterance.write_to_fp(File)
                             break
-                        elif wait == "help":
-                            print(E)
-                        else:
-                            return False
-                # I think this makes the file readable? Not sure
-                File.seek(0)
-                audio_segment = AudioSegment.from_mp3(File)
+                        except Exception as E:
+                            wait = input('Somthing seems to be wrong with the internet (or the file). Please type \'save\' to save file, \'help\' to display what went wrong, or \'continue\' if the internet connection is restored. Anything else will exit.')
+                            if wait == "continue":
+                                pass
+                            elif wait == "save":
+                                self.SoundFile = SoundFile
+                                self.save()
+                            elif wait == "pass":
+                                break
+                            elif wait == "help":
+                                print(E)
+                            else:
+                                return False
+                else:
+                    try:
+                        url = "https://texttospeech.googleapis.com/v1beta1/text:synthesize"
+
+                        text = List[1]
+
+                        data = {
+                                "input": {"text": text},
+                                "voice": {"name":  self.Actors[List[0]][0], "languageCode": self.Actors[List[0]][1]},
+                                "audioConfig": {"audioEncoding": "MP3"}
+                              };
+
+                        headers = {"content-type": "application/json", "X-Goog-Api-Key": self.cloudKey }
+
+                        r = requests.post(url=url, json=data, headers=headers)
+
+                        content = json.loads(r.content)
+                        audioIO = content['audioContent']
+                        audioDecoded = base64.b64decode(audioIO)
+                        File = io.BytesIO()
+                        with open(File,'wb') as file:
+                            file.write(audioDecoded)
+
+                    # I think this makes the file readable? Not sure
+                    File.seek(0)
+                    audio_segment = AudioSegment.from_mp3(File)
                 if self.debug:
                     print('Done!\n')
 
                 try:
                     if effects:
-                        effects = self.Crew_Effects[List[0]] + "|" + effects
+                        effects = self.Actors_Effects[List[0]] + "|" + effects
                     else:
-                        effects = self.Crew_Effects[List[0]]
+                        effects = self.Actors_Effects[List[0]]
                 except:
                     pass
                 #self.SoundFile += self.Pause.read()
@@ -283,14 +373,14 @@ def ListToSound(self, Lists):
                     List[1] = List[1].strip()
 
                     if effects:
-                        self.Crew_Effects.__setitem__(List[0],effects)
+                        self.Actors_Effects.__setitem__(List[0],effects)
                         effects = False
-                    self.Crew.__setitem__(List[0], self.Crew[List[1]])
+                    self.Actors.__setitem__(List[0], self.Actors[List[1]])
                     if self.debug:
                         print('%s is now a %s\n'%(List[0],List[1]))
                 except:
                     print("%s is NOT a type of person! Using american_woman..."%List[1])
-                    self.Crew.__setitem__(List[0], self.Crew['american_woman'])
+                    self.Actors.__setitem__(List[0], self.Actors['american_woman'])
 
             # Apply effects
             if effects != False:
@@ -371,13 +461,23 @@ def say(script, filename, debug=False):
 if __name__ == "__main__":
     args = sys.argv
     debug = False
+    cloudKey = False
     if ("--debug") in args:
         args.remove("--debug")
         debug = True
 
     if ("--help" or "-h") in args:
         voxTalkz('', '').help()
 
+    if ("--cloud") in args:
+        try:
+            cloudKey = args[args.index('--cloud')+1]
+            del args[args.index('--cloud')+1]
+            args.remove("--cloud")
+        except:
+            print('Usage: --cloud [text-to-speach API key]')
+            args.remove("--cloud")
+
     elif len(args) != 3:
         print("Expecting two arguments! Usage: voxtalkz [input file, output file] ")
 
@@ -393,5 +493,5 @@ def say(script, filename, debug=False):
         if script:
             filename = args[2]
             print(f"Outputting to {filename}")
-            voxTalkz(script, filename, debug).ToSound()
+            voxTalkz(script, filename, debug, cloudKey=cloudKey).ToSound()
Original file line number	Diff line number	Diff line change
Expand Up		@@ -4,4 +4,4 @@

		from .voxtalkz import *

		__version__ = '1.1.3'
		__version__ = '1.2.0'