-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrop.py
42 lines (36 loc) · 1.25 KB
/
crop.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import os
import re
import sys
import yaml
import logging
from utils.audio import Audio
from utils.trimmer import Trimmer
from utils.clean import tokenize
def main(audio_filepath, text_filepath):
text = get_text(text_filepath)
token_clean = '\.|,|;|:|\?|!|\.\.\.'
tokenized_text = ' '.join(tokenize(text))
clean_text = re.sub(token_clean,'',tokenized_text).lower()
audio_file = Audio(audio_filepath)
trimmer = Trimmer(clean_text, audio_file)
start, end, start_word_index, end_word_index = trimmer.crop_longaudio()
if start and end:
if end_word_index == None:
end_word_index = -1
else:
end_word_index += 1
print(start, end, tokenized_text.split()[start_word_index],
tokenized_text.split()[end_word_index])
def get_text(text_file):
text_dict = yaml.load(open(text_file))
text = ''
for element in text_dict['text']:
text += element[1]
return text
if __name__ == "__main__":
audio_filepath = sys.argv[1]
text_filepath = sys.argv[2]
logging.basicConfig(level=logging.INFO,
format="%(asctime)s-%(levelname)s: %(message)s",
handlers=[logging.StreamHandler()])
main(audio_filepath, text_filepath)