Refactor TranscriptionTextArea to use whisper.ThreadedWhisperModel fo…

…r continuous transcription
locaal-ai · Oct 23, 2024 · 12a4bdd · 12a4bdd
1 parent 909a14e
commit 12a4bdd
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 180 deletions.
diff --git a/src/audio/textual_transcription_textarea.py b/src/audio/textual_transcription_textarea.py
@@ -1,5 +1,4 @@
 from os import path
-from audio.whisper_transcribe import ContinuousTranscriberProcess
 from queue import Queue
 import time
 from typing import List
@@ -9,10 +8,12 @@
 from textual.message import Message
 from textual.worker import get_current_worker
 import wave
-import platformdirs
 
 from audio.AudioCapture import AudioCapture
 from notes.manager import NoteManager
+from simpler_whisper import whisper
+
+from utils import resource_path
 
 
 class TranscriptionTextArea(TextArea):
@@ -42,8 +43,8 @@ def generate_transcription_content(self):
 
         return content
 
-    def process_transcription(self, transcription: str, is_partial: bool):
-        if not transcription or len(transcription.strip()) == 0:
+    def process_transcription(self, chunk_id: int, transcription: str, is_partial: bool):
+        if not transcription or len(transcription) == 0:
             return
         if is_partial:
             self.partial_transcription = transcription
@@ -60,7 +61,7 @@ def update_transcriptions(self):
         return updated
 
     def send_audio_to_transcriber(self, audio_data: np.ndarray):
-        self.transcriber.process(audio_data)
+        self.transcriber.queue_audio(audio_data)
         if self.wav_file is not None:
             audio_data_int = (audio_data * 32767).astype(np.int16)
             self.wav_file.writeframes(audio_data_int.tobytes())
@@ -89,10 +90,14 @@ def start_transcription(self):
         self.wav_file.setsampwidth(2)
         self.wav_file.setframerate(16000)
 
-        self.transcriber = ContinuousTranscriberProcess(self.process_transcription)
+        self.transcriber = whisper.ThreadedWhisperModel(
+            resource_path.resource_path("data/ggml-small.en-q5_1.bin"),
+            use_gpu=True,
+            max_duration_sec=10,
+        )
         self.audio_capture = AudioCapture(self.send_audio_to_transcriber)
 
-        self.transcriber.start()
+        self.transcriber.start(self.process_transcription)
         self.audio_capture.start_recording()
         self.is_transcribing = True
 

diff --git a/src/audio/whisper_transcribe.py b/src/audio/whisper_transcribe.py