-
Notifications
You must be signed in to change notification settings - Fork 32
/
app.py
98 lines (73 loc) · 2.87 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import os
import wave
import pyaudio
import numpy as np
from scipy.io import wavfile
from faster_whisper import WhisperModel
import voice_service as vs
from rag.AIVoiceAssistant import AIVoiceAssistant
DEFAULT_MODEL_SIZE = "medium"
DEFAULT_CHUNK_LENGTH = 10
ai_assistant = AIVoiceAssistant()
def is_silence(data, max_amplitude_threshold=3000):
"""Check if audio data contains silence."""
max_amplitude = np.max(np.abs(data))
return max_amplitude <= max_amplitude_threshold
def record_audio_chunk(audio, stream, chunk_length=DEFAULT_CHUNK_LENGTH):
frames = []
for _ in range(0, int(16000 / 1024 * chunk_length)):
data = stream.read(1024)
frames.append(data)
temp_file_path = 'temp_audio_chunk.wav'
with wave.open(temp_file_path, 'wb') as wf:
wf.setnchannels(1)
wf.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
wf.setframerate(16000)
wf.writeframes(b''.join(frames))
# Check if the recorded chunk contains silence
try:
samplerate, data = wavfile.read(temp_file_path)
if is_silence(data):
os.remove(temp_file_path)
return True
else:
return False
except Exception as e:
print(f"Error while reading audio file: {e}")
return False
def transcribe_audio(model, file_path):
segments, info = model.transcribe(file_path, beam_size=7)
transcription = ' '.join(segment.text for segment in segments)
return transcription
def main():
model_size = DEFAULT_MODEL_SIZE + ".en"
model = WhisperModel(model_size, device="cuda", compute_type="float16", num_workers=10)
audio = pyaudio.PyAudio()
stream = audio.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024)
customer_input_transcription = ""
try:
while True:
chunk_file = "temp_audio_chunk.wav"
# Record audio chunk
print("_")
if not record_audio_chunk(audio, stream):
# Transcribe audio
transcription = transcribe_audio(model, chunk_file)
os.remove(chunk_file)
print("Customer:{}".format(transcription))
# Add customer input to transcript
customer_input_transcription += "Customer: " + transcription + "\n"
# Process customer input and get response from AI assistant
output = ai_assistant.interact_with_llm(transcription)
if output:
output = output.lstrip()
vs.play_text_to_speech(output)
print("AI Assistant:{}".format(output))
except KeyboardInterrupt:
print("\nStopping...")
finally:
stream.stop_stream()
stream.close()
audio.terminate()
if __name__ == "__main__":
main()