Skip to content

Commit

Permalink
Bumped version to 1.2.0 and added support for google cloud wavenet vo…
Browse files Browse the repository at this point in the history
…ices!
  • Loading branch information
DonFlymoor committed Sep 22, 2020
1 parent c83fd84 commit 5115092
Show file tree
Hide file tree
Showing 4 changed files with 181 additions and 76 deletions.
5 changes: 5 additions & 0 deletions changelog.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,8 @@ version
- Changed yuoung_grandma to young_grandma
- REPEAT modifier defaults to 2
- Added support for ogg sound effects

9/21/2020
1.2.0
- Bumped version to 1.2.0
- Added support for google cloud wavenet voices, much higher quality!
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
EMAIL = '[email protected]'
AUTHOR = 'Don Flymoor'
REQUIRES_PYTHON = '>=3.6.0'
VERSION = '1.1.3'
VERSION = '1.2.0'

# What packages are required for this module to be executed?
REQUIRED = [
Expand Down
2 changes: 1 addition & 1 deletion voxtalkz/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

from .voxtalkz import *

__version__ = '1.1.3'
__version__ = '1.2.0'
248 changes: 174 additions & 74 deletions voxtalkz/voxtalkz.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,20 @@

from gtts import gTTS
import sys, os.path as path
import io, time
import io
import time
from pydub import AudioSegment
import __builtin__
import datetime
import requests
import json
import os
import base64

file = 'null'
_File = 'null'

def print(args):
with open(file,'w') as file:
with open(_File,'w') as file:
file.write(args)
return __builtin__.print(*args, **kwargs)

Expand All @@ -36,48 +41,81 @@ class voxTalkz():
sound effects must be placed into the effects folder as .mp3, .wav, or .ogg files
'''

def __init__(self, file, name, debug=False, timeme=False):
global file
def __init__(self, file, name, debug=False, cloudKey=False, timeme=False):
_File = file
self.homedir = path.expanduser('~')
self.name = name
self.debug = debug
self.file = file
self._File = _File
self.filename = name+'.mp3'
self.SoundFile = AudioSegment.empty()
self.Pause = AudioSegment.empty()
self.Crew_Effects = {}
self.Actors_Effects = {}
# List of people
self.Crew = {"indian_man":"bn",
"american_woman":"en-us",
"scottish_woman":"ca",
"russian_woman":"sk",
"drawling_man":"cy",
"autistic_woman":"da",
"marika":"de",
"au_woman":"en-au",
"british_woman":"en-gb",
"indian_woman":"en-in",
"spanish_woman":"es-es",
#"spanishenglish_woman":"es-en",
#"indian_man":"et",
"french_woman":"fr",
"hindu_woman":"hi",
"alien_man":"is",
"italian_man":"it",
"scottish_woman":"ja",
"phone_woman":"kn",
"korean_man":"ko",
"dramatized_woman":"mr",
"dutch_woman":"nl",
"silly_man":"pl",
"robot_man":"sq",
"dramitized_girl":"sv",
"bored_teen":"te",
"happy_girl":"th",
"boss_lady":"tl",
"young_grandma":"vi",
"spoiled_girl":"zh-cn",
"american_woman":"en"}
if not cloudKey:
self.Actors = {"indian_man":"bn",
"american_woman":"en-us",
"scottish_woman":"ca",
"russian_woman":"sk",
"drawling_man":"cy",
"autistic_woman":"da",
"marika":"de",
"au_woman":"en-au",
"british_woman":"en-gb",
"indian_woman":"en-in",
"spanish_woman":"es-es",
#"spanishenglish_woman":"es-en",
#"indian_man":"et",
"french_woman":"fr",
"hindu_woman":"hi",
"alien_man":"is",
"italian_man":"it",
"scottish_woman":"ja",
"phone_woman":"kn",
"korean_man":"ko",
"dramatized_woman":"mr",
"dutch_woman":"nl",
"silly_man":"pl",
"robot_man":"sq",
"dramitized_girl":"sv",
"bored_teen":"te",
"happy_girl":"th",
"boss_lady":"tl",
"young_grandma":"vi",
"spoiled_girl":"zh-cn",
"american_woman":"en"}
else:
self.Actors = {
"australian_woman":["en-AU-Wavenet-A","en-AU"],
"australian_man":["en-AU-Wavenet-B","en-AU"],
"personal_australian_woman":["en-AU-Wavenet-C","en-AU"],
"personal_australian_man":["en-AU-Wavenet-D","en-AU"],

"indian_woman":["en-IN-Wavenet-A","en-IN"],
"indian_man":["en-IN-Wavenet-B","en-IN"],
"personal_indian_woman":["en-IN-Wavenet-C","en-IN"],
"personal_indian_man":["en-IN-Wavenet-D","en-IN"],

"british_woman":["en-GB-Wavenet-A","en-GB"],
"british_man":["en-GB-Wavenet-B","en-GB"],
"personal_british_woman":["en-GB-Wavenet-C","en-GB"],
"urgent_british_woman":["en-GB-Wavenet-F","en-GB"],
"personal_british_man":["en-GB-Wavenet-D","en-GB"],

"young_american_man":["en-US-Wavenet-A","en-US"],
"middleage_american_man":["en-US-Wavenet-B","en-US"],
"american_man":["en-US-Wavenet-B","en-US"],
"middleage_american_woman":["en-US-Wavenet-C","en-US"],
"american_woman":["en-US-Wavenet-C","en-US"],
"middleage_personal_american_man":["en-US-Wavenet-D","en-US"],
"middleage_personal_american_woman":["en-US-Wavenet-E","en-US"],
"young_personal_american_woman":["en-US-Wavenet-F","en-US"],
"distracted_middleage_american_woman":["en-US-Wavenet-G","en-US"],
"young_american_woman":["en-US-Wavenet-H","en-US"],
"young_personal_american_man":["en-US-Wavenet-I","en-US"],
"cocky_american_man":["en-US-Wavenet-J","en-US"]
}

def ToSound(self):
# parse the file
parsed = self.Parse(self.file)
Expand Down Expand Up @@ -147,9 +185,35 @@ def Parse(self,file,FILE=False):
return List

def help(self):
print("Usage: python3 -m voxtalkz [input file, output file] \n\nConverts play-like script to a .mp3 file \nScript file must be written in this manner: \n\n#The first time a unknown name is called, instead of making the person talk, the name will be assigned to a person. \nSusan:american_woman\n#Then the person will \"talk\"\nSusan:Hello, world!\n#Comments are allowed!\n*soundeffect \n\nEffects can be applied by adding an @ symbal the the effect name, like so:\nperson1:hello, world!@VOLUME=8\nA second effect can be applied by using the pipe(\"|\") like so:\nperson1:Hello, World!@FADE|VOLUME=8\n")
print("Usage: python3 -m voxtalkz [input file, output file] --flags\n\
\n\
Converts play-like script to a .mp3 file \n\
Script file must be written in this manner: \n\
\n\
#The first time a unknown name is called, instead of making the person talk, the name will be assigned to a person. \n\
Susan:american_woman\n\
#Then the person will \"talk\"\n\
Susan:Hello, world!\n\
#Comments are allowed!\n\
*soundeffect \n\
\n\
Effects can be applied by adding an @ symbal the the effect name, like so:\n\
person1:hello, world!@VOLUME=8\nA second effect can be applied by using the pipe(\"|\") like so:\n\
person1:Hello, World!@FADE|VOLUME=8\n\
")
print('flags:\n\
--debug | the program tells you what it\'s doing\n\
--help | print this help message\n\
--cloud=apiKey | use Google Cloud TextToSpeech API, must have API key after it\n\
')
print("List of all effects:")
list = ["@FADE | Fade to nothing","@FADE_IN | Fade in from silent","@OVERLAY | Overlays the sound onto what has already been recorded. Use @OVERLAY=VAR1 to START the overlay at the begining of where you assigned @VAR=1","@REPEAT= | Repeat audio segment however many times you specify. e.g. (american_woman:Hello, world!@REPEAT=10) would produce someone saying \"Hello, world!\" ten times","@VAR= | Assign a number to a temporary table. Only used with @OVERLAY","@VOLUME= | Set volume change in decibels. A negitive number will reduce the volume","@PITCH= | Set pitch change. e.g. \"american_woman:Hello, world!@PITCH=0.3\" would make the person sound like a little girl, while \"american_woman:Hello, world!@PITCH=-0.3\" would sound like an old woman"]
list = ["@FADE | Fade to nothing",\
"@FADE_IN | Fade in from silent",\
"@OVERLAY | Overlays the sound onto what has already been recorded. Use @OVERLAY=VAR1 to START the overlay at the begining of where you assigned @VAR=1",\
"@REPEAT= | Repeat audio segment however many times you specify. e.g. (american_woman:Hello, world!@REPEAT=10) would produce someone saying \"Hello, world!\" ten times",\
"@VAR= | Assign a number to a temporary table. Only used with @OVERLAY",\
"@VOLUME= | Set volume change in decibels. A negitive number will reduce the volume",\
"@PITCH= | Set pitch change. e.g. \"american_woman:Hello, world!@PITCH=0.3\" would make the person sound like a little girl, while \"american_woman:Hello, world!@PITCH=-0.3\" would sound like an old woman"]
for string in list:
print(" "+string)
print("\nList of all actors:")
Expand Down Expand Up @@ -233,45 +297,71 @@ def ListToSound(self, Lists):
pass
# Open sound to a variable

elif List[0] in self.Crew:
elif List[0] in self.Actors:
if self.debug:
print('Making %s say \'%s\'... '%(List[0],List[1]))
utterance = gTTS(text=List[1], lang=self.Crew[List[0]], slow=False, lang_check=False)
if self.debug:
print('Done!\n')
# Create an empty file-like object
File = io.BytesIO()
# Write what the person 'said' to the object
if self.debug:
print("Recording what %s said..."%(List[0]))
while True:
try:
utterance.write_to_fp(File)
break
except Exception as E:
wait = input('Somthing seems to be wrong with the internet (or the file). Please type \'save\' to save file, \'help\' to display what went wrong, or \'continue\' if the internet connection is restored. Anything else will exit.')
if wait == "continue":
pass
elif wait == "save":
self.SoundFile = SoundFile
self.save()
elif wait == "pass":

# If us
if not self.cloudKey:
utterance = gTTS(text=List[1], lang=self.Actors[List[0]], slow=False, lang_check=False)
if self.debug:
print('Done!\n')
# Create an empty file-like object
File = io.BytesIO()
# Write what the person 'said' to the object
if self.debug:
print("Recording what %s said..."%(List[0]))
while True:
try:
utterance.write_to_fp(File)
break
elif wait == "help":
print(E)
else:
return False
# I think this makes the file readable? Not sure
File.seek(0)
audio_segment = AudioSegment.from_mp3(File)
except Exception as E:
wait = input('Somthing seems to be wrong with the internet (or the file). Please type \'save\' to save file, \'help\' to display what went wrong, or \'continue\' if the internet connection is restored. Anything else will exit.')
if wait == "continue":
pass
elif wait == "save":
self.SoundFile = SoundFile
self.save()
elif wait == "pass":
break
elif wait == "help":
print(E)
else:
return False
else:
try:
url = "https://texttospeech.googleapis.com/v1beta1/text:synthesize"

text = List[1]

data = {
"input": {"text": text},
"voice": {"name": self.Actors[List[0]][0], "languageCode": self.Actors[List[0]][1]},
"audioConfig": {"audioEncoding": "MP3"}
};

headers = {"content-type": "application/json", "X-Goog-Api-Key": self.cloudKey }

r = requests.post(url=url, json=data, headers=headers)

content = json.loads(r.content)
audioIO = content['audioContent']
audioDecoded = base64.b64decode(audioIO)
File = io.BytesIO()
with open(File,'wb') as file:
file.write(audioDecoded)

# I think this makes the file readable? Not sure
File.seek(0)
audio_segment = AudioSegment.from_mp3(File)
if self.debug:
print('Done!\n')

try:
if effects:
effects = self.Crew_Effects[List[0]] + "|" + effects
effects = self.Actors_Effects[List[0]] + "|" + effects
else:
effects = self.Crew_Effects[List[0]]
effects = self.Actors_Effects[List[0]]
except:
pass
#self.SoundFile += self.Pause.read()
Expand All @@ -283,14 +373,14 @@ def ListToSound(self, Lists):
List[1] = List[1].strip()

if effects:
self.Crew_Effects.__setitem__(List[0],effects)
self.Actors_Effects.__setitem__(List[0],effects)
effects = False
self.Crew.__setitem__(List[0], self.Crew[List[1]])
self.Actors.__setitem__(List[0], self.Actors[List[1]])
if self.debug:
print('%s is now a %s\n'%(List[0],List[1]))
except:
print("%s is NOT a type of person! Using american_woman..."%List[1])
self.Crew.__setitem__(List[0], self.Crew['american_woman'])
self.Actors.__setitem__(List[0], self.Actors['american_woman'])

# Apply effects
if effects != False:
Expand Down Expand Up @@ -371,13 +461,23 @@ def say(script, filename, debug=False):
if __name__ == "__main__":
args = sys.argv
debug = False
cloudKey = False
if ("--debug") in args:
args.remove("--debug")
debug = True

if ("--help" or "-h") in args:
voxTalkz('', '').help()

if ("--cloud") in args:
try:
cloudKey = args[args.index('--cloud')+1]
del args[args.index('--cloud')+1]
args.remove("--cloud")
except:
print('Usage: --cloud [text-to-speach API key]')
args.remove("--cloud")

elif len(args) != 3:
print("Expecting two arguments! Usage: voxtalkz [input file, output file] ")

Expand All @@ -393,5 +493,5 @@ def say(script, filename, debug=False):
if script:
filename = args[2]
print(f"Outputting to {filename}")
voxTalkz(script, filename, debug).ToSound()
voxTalkz(script, filename, debug, cloudKey=cloudKey).ToSound()

0 comments on commit 5115092

Please sign in to comment.