Skip to content

Commit

Permalink
Merge pull request #390 from AI4Bharat/translation_bugs
Browse files Browse the repository at this point in the history
Adjust audio speed up
  • Loading branch information
Shruti1229 authored Jul 19, 2023
2 parents 29480d3 + a57dd36 commit 32155a1
Showing 1 changed file with 46 additions and 16 deletions.
62 changes: 46 additions & 16 deletions backend/voiceover/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,10 +318,36 @@ def generate_tts_output(
return voiceover_payload


def equal_sentences(ind, previous_sentence, current_sentence, delete_indices):
if "text" not in current_sentence:
delete_indices.append(ind)
elif (
get_original_duration(
previous_sentence["start_time"], current_sentence["start_time"]
)
== 0
and get_original_duration(
previous_sentence["end_time"], current_sentence["end_time"]
)
== 0
):
delete_indices.append(ind)
else:
pass


def process_translation_payload(translation_obj, target_language):
tts_input = []
empty_sentences = []
delete_indices = []
translation = translation_obj.payload
for ind, text in enumerate(translation["payload"]):
if ind != 0:
equal_sentences(ind, translation["payload"][ind - 1], text, delete_indices)

delete_indices.reverse()
for index in delete_indices:
translation["payload"].pop(index)
for ind, text in enumerate(translation["payload"]):
if not compare_time(text["end_time"], text["start_time"])[0]:
return {
Expand Down Expand Up @@ -557,7 +583,7 @@ def adjust_audio(audio_file, original_time, audio_speed):
# audio_time_difference = original_time - seconds
audio = AudioSegment.from_file(audio_file)
audio_time_difference = (original_time * 1000 - len(audio)) / 1000
if audio_time_difference > 0.11:
if audio_time_difference > 0:
logging.info("Add silence in the audio of %s", str(audio_time_difference))
# duration in millisecond
silence_segment = AudioSegment.silent(duration=audio_time_difference * 1000)
Expand All @@ -567,21 +593,16 @@ def adjust_audio(audio_file, original_time, audio_speed):
combined_audio.export(audio_file, format="flac")
elif audio_time_difference == 0:
logging.info("No time difference")
elif audio_time_difference < 0:
elif audio_time_difference < -0.001:
logging.info("Speed up the audio by %s", str(seconds / original_time))
sound = AudioSegment.from_file(audio_file)
# sound.export("temp_original_" + str(ind) + ".mp3", format="mp3")
# generate a slower audio for example
faster_sound = speedup(sound, seconds / original_time, 100)
final_sound = faster_sound[: original_time * 1000]
final_sound.export(audio_file, format="flac")
audio = AudioFileClip(audio_file)
seconds = audio.duration
logging.info("Seconds of adjusted flac audio %s", str(seconds))
# audio = MP3(audio_file)
# faster_sound.export("temp_" + str(ind) + ".mp3", format="mp3")
# speed_change(sound, 0.5)
# adjust_speed(audio_file, seconds / original_time)
if (seconds / original_time) > 0:
faster_sound = speedup(sound, seconds / original_time, 100)
final_sound = faster_sound[: original_time * 1000]
final_sound.export(audio_file, format="flac")
audio = AudioFileClip(audio_file)
seconds = audio.duration
logging.info("Seconds of adjusted flac audio %s", str(seconds))
else:
pass

Expand Down Expand Up @@ -627,6 +648,7 @@ def integrate_all_audios(file_name, payload, video_duration):
payload["payload"][str(0)]["time_difference"],
-1,
)

first_start_time = payload["payload"]["0"]["start_time"]
difference_between_payloads = get_original_duration(
"00:00:00.000", first_start_time
Expand Down Expand Up @@ -667,6 +689,7 @@ def integrate_all_audios(file_name, payload, video_duration):
file_name + "_" + str(index - 1) + ".flac", format="flac"
)
if index == length_payload - 1:
original_time = payload["payload"][str(index)]["time_difference"]
end_time = payload["payload"][str(index)]["end_time"]
audio_2_decoded = base64.b64decode(
payload["payload"][str(index)]["audio"]["audioContent"]
Expand All @@ -680,6 +703,9 @@ def integrate_all_audios(file_name, payload, video_duration):
file_name + "_" + str(index) + ".flac", "wb"
) as out_f23:
out_f23.write(audio_2_decoded)
adjust_audio(
file_name + "_" + str(index) + ".flac", original_time, -1
)
silence_segment = AudioSegment.silent(
duration=last_segment_difference * 1000
)
Expand All @@ -697,7 +723,11 @@ def integrate_all_audios(file_name, payload, video_duration):
else:
with open(file_name + "_" + str(index) + ".flac", "wb") as out_f23:
out_f23.write(audio_2_decoded)
adjust_audio(
file_name + "_" + str(index) + ".flac", original_time, -1
)
audio_file_paths.append(file_name + "_" + str(index) + ".flac")

else:
logging.info("Index of Audio : #%s", str(index))
original_time = payload["payload"][str(index)]["time_difference"]
Expand All @@ -720,13 +750,13 @@ def integrate_all_audios(file_name, payload, video_duration):
final_clip.write_audiofile(file_name + str(i) + ".wav")
final_paths.append(file_name + str(i) + ".wav")
elif i == batch_size - 1:
audio_batch_paths = audio_file_paths[(i) * 20 : len(audio_file_paths)]
audio_batch_paths = audio_file_paths[(i * 20) : len(audio_file_paths)]
clips = [AudioFileClip(c) for c in audio_batch_paths]
final_clip = concatenate_audioclips(clips)
final_clip.write_audiofile(file_name + str(i) + ".wav")
final_paths.append(file_name + str(i) + ".wav")
else:
audio_batch_paths = audio_file_paths[(i) * 20 : (i + 1) * 20]
audio_batch_paths = audio_file_paths[(i * 20) : ((i + 1) * 20)]
clips = [AudioFileClip(c) for c in audio_batch_paths]
final_clip = concatenate_audioclips(clips)
final_clip.write_audiofile(file_name + str(i) + ".wav")
Expand Down

0 comments on commit 32155a1

Please sign in to comment.