Skip to content

Commit

Permalink
draft page
Browse files Browse the repository at this point in the history
  • Loading branch information
dkounadis committed May 21, 2024
1 parent 27eadf9 commit eef7f06
Show file tree
Hide file tree
Showing 3 changed files with 610 additions and 592 deletions.
113 changes: 65 additions & 48 deletions demo.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,67 @@
import subprocess
import soundfile
import sox

## VOICES mimic3
#
# /scratch/dkounadis/.envs/.tts/lib/python3.8/site-packages/mimic3_tts/

spk1 = 'en_US/vctk_low#p236'
rate1 = 1.24

spk2 = 'en_UK/apope_low'
rate2 = 1.64

pitch_semitones = -4

text = ('<speak>'
'<prosody volume=\'64\'>'
f'<prosody rate=\'{rate1}\'>'
f'<voice name=\'{spk1}\'>'
'<s>'
'A an exemplary voice.'
'</s>'
'</voice>'
'</prosody>'
'</prosody>'
f'<prosody rate=\'{rate2}\'>'
f'<voice name=\'{spk2}\'>'
'<s>'
'.Another pleasant voice.'
'</s>'
'</voice>'
'</prosody>'
'</speak>')

with open('_tmp_ssml.txt', 'w') as f:
f.write(text)

raw_tts = 'test.wav'
ps = subprocess.Popen(f'cat _tmp_ssml.txt | mimic3 --ssml > {raw_tts}', shell=True)
ps.wait()

x, fs = soundfile.read(raw_tts)
tfm = sox.Transformer()
tfm.pitch(pitch_semitones)
x_shift = tfm.build_array(
input_array=x,
sample_rate_in=fs)

soundfile.write(f'test_pitch.wav', x_shift, fs)
import msinference



my_text = "Metamorphosis of cultural heritage to augmented hypermedia for accessibility and inclusion."
_voice = 'en_US/vctk_low#p276' # https://audeering.github.io/shift/
affect = True # False = Non-Affective voices
out_wav = f'example_{affect=}.wav'


if affect:

# Mimic-3

reference_wav = '_spk.wav'
rate = 4 # high speed sounds nice when used as speaker-reference audio for 2nd stage (StyleTTS2)
_ssml = (
'<speak>'
f'<prosody volume=\'24\'>'
f'<prosody rate=\'{rate}\'>'
f'<voice name=\'{_voice}\'>'
f'<s>Sweet dreams are made of this, ... !!! I travel the world and the seven seas.</s>'
'</voice>'
'</prosody>'
'</prosody>')
_ssml += '</speak>'
with open('_tmp_ssml.txt', 'w') as f:
f.write(_ssml)
ps = subprocess.Popen(f'cat _tmp_ssml.txt | mimic3 --ssml > {reference_wav}', shell=True)
ps.wait() # using ps to call mimic3 because samples dont have time to be written in stdout buffer

# StyleTTS2

x = msinference.inference(my_text,
msinference.compute_style(reference_wav),
alpha=0.3,
beta=0.7,
diffusion_steps=7,
embedding_scale=1)
soundfile.write(out_wav, x, 24000)



else:



# Non Affective TTS

rate = .84
_ssml = (
'<speak>'
f'<prosody volume=\'94\'>'
f'<prosody rate=\'{rate}\'>'
f'<voice name=\'{_voice}\'>'
f'<s>\'{my_text}\'</s>'
'</voice>'
'</prosody>'
'</prosody>')
_ssml += '</speak>'
with open('_tmp_ssml.txt', 'w') as f:
f.write(_ssml)
ps = subprocess.Popen(f'cat _tmp_ssml.txt | mimic3 --ssml > {out_wav}', shell=True)
ps.wait()

11 changes: 6 additions & 5 deletions generate_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,7 +679,7 @@ def emotion_predictor(

# == markdown table

y = sorted(y, key=lambda d: d['emotion'][0]) # sort wav_files by valence
y = sorted(y, key=lambda d: d['emotion'][1]) # sort wav_files by valence

# SORTING OUTPUT IS LIST - 0-th ELEMENT = LOWEST VALENCE
#_________________________________________________
Expand Down Expand Up @@ -709,16 +709,17 @@ def emotion_predictor(

table = (
f'<html lang="en">\n<body>\n<h1>Available TTS Voices.</h1>'
f'\nYou can use the basic/affective version of every voice in \n'
f'<a href="https://github.com/audeering/shift/blob/main/demo.py">demo.py</a><hr>'
f'\nIn \n'
f'<a href="https://github.com/audeering/shift/blob/main/demo.py">demo.py</a> '
f'you can use the Affective or Non-Affective version of each voice.<hr>'
f'<table><tr><td>' # count
f'</td><td>\n\n voice \n\n</td>'
f'<td>\n\n Basic \n\n</td>'
f'<td>\n\n Non-Affective \n\n</td>'
f'<td>\n\n emotion volatility \n\n</td>'
f'<td>\n\n Affective \n\n</td>'
)

for i, tup in enumerate(reversed(y)): # i is new index
for i, tup in enumerate(y):

_voice, emotion, tgt_wav, affect_wav, fig_file, str_voice = tup.values()
print('\n\n', _voice, '\n\n')
Expand Down
Loading

0 comments on commit eef7f06

Please sign in to comment.