Merge pull request #1 from locaal-ai/roy.threaded_whisper

Roy.threaded whisper
locaal-ai · Oct 25, 2024 · 051a64a · 051a64a
2 parents 8441c5e + 96f019d
commit 051a64a
Show file tree

Hide file tree

Showing 8 changed files with 640 additions and 78 deletions.
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
@@ -74,17 +74,18 @@ jobs:
       env:
         SIMPLER_WHISPER_ACCELERATION: ${{ matrix.acceleration }}
         SIMPLER_WHISPER_PLATFORM: ${{ matrix.platform }}
+        SIMPLER_WHISPER_PYTHON_VERSION: ${{ matrix.python-version }}
       run: |
         python setup.py build_ext --inplace
         python -m build --wheel
-  
+
     - name: Install built wheel Non-Windows
       if: startsWith(matrix.os, 'windows') == false
       run: |
         pip install dist/*.whl
-  
+
     - name: Install built wheel Windows
-      if: startsWith(matrix.os, 'windows') == true  
+      if: startsWith(matrix.os, 'windows') == true
       shell: pwsh
       run: |
         $wheelFile = Get-ChildItem dist/*.whl | Select-Object -First 1
@@ -104,15 +105,15 @@ jobs:
       run: |
         import os
         import glob
-        
+
         wheel_file = glob.glob('dist/*.whl')[0]
         base_name = os.path.basename(wheel_file)
         name_parts = base_name.split('-')
-        
+
         # Insert acceleration and platform before the last part (which is like 'any.whl')
         new_name_parts = name_parts[:-1] + ['${{ matrix.acceleration }}', '${{ matrix.platform }}'] + [name_parts[-1]]
         new_name = '-'.join(new_name_parts)
-        
+
         new_path = os.path.join('dist', new_name)
         os.rename(wheel_file, new_path)
         print(f"Renamed {base_name} to {new_name}")
@@ -125,10 +126,9 @@ jobs:
           $wheelName += "-${{ matrix.acceleration }}"
         }
         echo "WHEEL_NAME=$wheelName" >> $env:GITHUB_ENV
-      
+
     - name: Upload wheel
       uses: actions/upload-artifact@v4
       with:
         name: ${{ env.WHEEL_NAME }}
         path: dist/*.whl
-
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -5,7 +5,7 @@ set(CMAKE_CXX_STANDARD 11)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
 # Find Python
-find_package(Python COMPONENTS Interpreter Development NumPy REQUIRED)
+find_package(Python ${PYTHON_VERSION} EXACT COMPONENTS Interpreter Development NumPy REQUIRED)
 
 # Fetch pybind11
 include(FetchContent)
@@ -40,4 +40,4 @@ if(WIN32 OR APPLE)
             $<TARGET_FILE_DIR:_whisper_cpp>
         )
     endforeach()
-endif()
+endif()
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 ![Build and Test](https://img.shields.io/github/actions/workflow/status/locaal-ai/simpler-whisper/build.yaml)
 
-A zero-dependency simple Python wrapper for [whisper.cpp](https://github.com/ggerganov/whisper.cpp), providing an easy-to-use interface for speech recognition using the Whisper model. 
+A zero-dependency simple Python wrapper for [whisper.cpp](https://github.com/ggerganov/whisper.cpp), providing an easy-to-use interface for speech recognition using the Whisper model.
 
 Why is it better than [faster-whisper](https://github.com/SYSTRAN/faster-whisper) and [pywhispercpp](https://github.com/abdeladim-s/pywhispercpp):
 - Zero-dependency: Everything is shipped with the built wheel, no Python dependency (on `av` or `ctranslate2` etc.) except for `numpy`.
@@ -30,7 +30,7 @@ pip install simpler-whisper
 import simpler_whisper.whisper
 import numpy as np
 
-# Load the model file. 
+# Load the model file.
 # It's on you to download one from https://huggingface.co/ggerganov/whisper.cpp
 model = simpler_whisper.whisper.load_model("path/to/model.bin")
 
@@ -79,7 +79,7 @@ Simpler Whisper supports various build configurations to optimize for different
 ### Example: Building for Windows with CUDA acceleration
 
 ```powershell
-$env:SIMPLER_WHISPER_ACCELERATION=cuda
+$env:SIMPLER_WHISPER_ACCELERATION="cuda"
 pip install .
 ```
 
@@ -91,4 +91,4 @@ SIMPLER_WHISPER_PLATFORM=arm64 pip install .
 
 ## License
 
-This project is licensed under the MIT License - see the LICENSE file for details.
+This project is licensed under the MIT License - see the LICENSE file for details.
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "simpler-whisper"
-version = "0.1.0"
+version = "0.2.0"
 authors = [
     {name = "Roy Shilkrot", email = "[email protected]"},
 ]
@@ -33,4 +33,4 @@ dependencies = [
 packages = ["simpler_whisper"]
 
 [tool.setuptools.package-data]
-simpler_whisper = ["*.dll", "*.pyd", "*.so", "*.metal"]
+simpler_whisper = ["*.dll", "*.pyd", "*.so", "*.metal"]
diff --git a/setup.py b/setup.py
@@ -6,73 +6,108 @@
 import platform
 import sysconfig
 
+
 class CMakeExtension(Extension):
-    def __init__(self, name, sourcedir=''):
+    def __init__(self, name, sourcedir=""):
         Extension.__init__(self, name, sources=[])
         self.sourcedir = os.path.abspath(sourcedir)
 
+
 class CMakeBuild(build_ext):
     def run(self):
         try:
-            out = subprocess.check_output(['cmake', '--version'])
+            out = subprocess.check_output(["cmake", "--version"])
         except OSError:
-            raise RuntimeError("CMake must be installed to build the following extensions: " +
-                               ", ".join(e.name for e in self.extensions))
+            raise RuntimeError(
+                "CMake must be installed to build the following extensions: "
+                + ", ".join(e.name for e in self.extensions)
+            )
 
         for ext in self.extensions:
             self.build_extension(ext)
 
     def build_extension(self, ext):
+        # This is the critical change - we need to get the proper extension suffix
+        ext_suffix = sysconfig.get_config_var("EXT_SUFFIX")
+
+        # Get the full path where the extension should be placed
         extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name)))
-
+
+        # Ensure the extension directory exists
+        os.makedirs(extdir, exist_ok=True)
+
         # Get acceleration and platform from environment variables
-        acceleration = os.environ.get('SIMPLER_WHISPER_ACCELERATION', 'cpu')
-        target_platform = os.environ.get('SIMPLER_WHISPER_PLATFORM', platform.machine())
-
+        acceleration = os.environ.get("SIMPLER_WHISPER_ACCELERATION", "cpu")
+        target_platform = os.environ.get("SIMPLER_WHISPER_PLATFORM", platform.machine())
+        python_version = os.environ.get(
+            "SIMPLER_WHISPER_PYTHON_VERSION",
+            f"{sys.version_info.major}.{sys.version_info.minor}",
+        )
+
         cmake_args = [
-            f'-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}',
-            f'-DACCELERATION={acceleration}',
+            f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}",
+            f"-DPYTHON_EXTENSION_SUFFIX={ext_suffix}",  # Pass the extension suffix to CMake
+            f"-DACCELERATION={acceleration}",
+            f"-DPYTHON_VERSION={python_version}",
         ]
 
         env = os.environ.copy()
 
         # Add platform-specific arguments
         if platform.system() == "Darwin":  # macOS
-            cmake_args += [f'-DCMAKE_OSX_ARCHITECTURES={target_platform}']
-            # add MACOS_ARCH env variable to specify the target platform
+            cmake_args += [
+                f"-DCMAKE_OSX_ARCHITECTURES={target_platform}",
+                "-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON",
+                "-DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON",
+                f"-DCMAKE_INSTALL_NAME_DIR=@rpath",
+            ]
             env["MACOS_ARCH"] = target_platform
 
-        cfg = 'Debug' if self.debug else 'Release'
-        build_args = ['--config', cfg]
+        cfg = "Debug" if self.debug else "Release"
+        build_args = ["--config", cfg]
 
         if platform.system() == "Windows":
-            cmake_args += [f'-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}']
+            cmake_args += [f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}"]
             if sys.maxsize > 2**32:
-                cmake_args += ['-A', 'x64']
-            build_args += ['--', '/m']
+                cmake_args += ["-A", "x64"]
+            build_args += ["--", "/m"]
         else:
-            cmake_args += [f'-DCMAKE_BUILD_TYPE={cfg}']
-            build_args += ['--', '-j2']
+            cmake_args += [f"-DCMAKE_BUILD_TYPE={cfg}"]
+            build_args += ["--", "-j2"]
+
+        env["CXXFLAGS"] = (
+            f'{env.get("CXXFLAGS", "")} -DVERSION_INFO=\\"{self.distribution.get_version()}\\"'
+        )
 
-        env['CXXFLAGS'] = f'{env.get("CXXFLAGS", "")} -DVERSION_INFO=\\"{self.distribution.get_version()}\\"'
-
         if not os.path.exists(self.build_temp):
             os.makedirs(self.build_temp)
-        
+
         print("CMake args:", cmake_args)
         print("Build args:", build_args)
-
-        subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env)
-        subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp)
+        print(f"Extension will be built in: {extdir}")
+        print(
+            f"Building for Python {python_version} on {target_platform} with acceleration: {acceleration}"
+        )
+
+        subprocess.check_call(
+            ["cmake", ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env
+        )
+        subprocess.check_call(
+            ["cmake", "--build", "."] + build_args, cwd=self.build_temp
+        )
+
 
 setup(
-    name='simpler-whisper',
-    version='0.1.0',
-    author='Roy Shilkrot',
-    author_email='[email protected]',
-    description='A simple Python wrapper for whisper.cpp',
-    long_description='',
-    ext_modules=[CMakeExtension('simpler_whisper._whisper_cpp')],
+    name="simpler-whisper",
+    version="0.2.0",
+    author="Roy Shilkrot",
+    author_email="[email protected]",
+    description="A simple Python wrapper for whisper.cpp",
+    long_description="",
+    ext_modules=[CMakeExtension("simpler_whisper._whisper_cpp")],
     cmdclass=dict(build_ext=CMakeBuild),
     zip_safe=False,
-)
+    packages=[
+        "simpler_whisper"
+    ],  # Add this line to ensure the package directory is created
+)
diff --git a/simpler_whisper/whisper.py b/simpler_whisper/whisper.py
@@ -1,4 +1,5 @@
 import numpy as np
+from typing import Callable, List
 from . import _whisper_cpp
 
 
@@ -21,10 +22,117 @@ def __del__(self):
             del self.model
 
 
+class ThreadedWhisperModel:
+    def __init__(
+        self,
+        model_path: str,
+        callback: Callable[[int, str, bool], None],
+        use_gpu=False,
+        max_duration_sec=10.0,
+        sample_rate=16000,
+    ):
+        """
+        Initialize a threaded Whisper model for continuous audio processing.
+
+        Args:
+            model_path (str): Path to the Whisper model file
+            use_gpu (bool): Whether to use GPU acceleration
+            max_duration_sec (float): Maximum duration in seconds before finalizing a segment
+            sample_rate (int): Audio sample rate (default: 16000)
+        """
+        self.model = _whisper_cpp.ThreadedWhisperModel(
+            model_path, use_gpu, max_duration_sec, sample_rate
+        )
+        self._is_running = False
+        self.callback = callback
+
+    def handle_result(self, chunk_id: int, text: str, is_partial: bool):
+        if self.callback is not None:
+            self.callback(chunk_id, text, is_partial)
+
+    def start(self, result_check_interval_ms=100):
+        """
+        Start the processing threads with a callback for results.
+
+        Args:
+            callback: Function that takes three arguments:
+                     - chunk_id (int): Unique identifier for the audio chunk
+                     - segments (str): Transcribed text for the audio chunk
+                     - is_partial (bool): Whether this is a partial result
+            result_check_interval_ms (int): How often to check for results
+        """
+        if self._is_running:
+            return
+
+        self.model.start(self.handle_result, result_check_interval_ms)
+        self._is_running = True
+
+    def stop(self):
+        """
+        Stop processing and clean up resources.
+        Any remaining audio will be processed as a final segment.
+        """
+        if not self._is_running:
+            return
+
+        self.model.stop()
+        self._is_running = False
+
+    def queue_audio(self, audio):
+        """
+        Queue audio for processing.
+
+        Args:
+            audio: Audio samples as numpy array or array-like object.
+                  Will be converted to float32.
+
+        Returns:
+            chunk_id (int): Unique identifier for this audio chunk
+        """
+        # Ensure audio is a numpy array of float32
+        audio = np.array(audio, dtype=np.float32)
+        return self.model.queue_audio(audio)
+
+    def set_max_duration(self, max_duration_sec, sample_rate=16000):
+        """
+        Change the maximum duration for partial segments.
+
+        Args:
+            max_duration_sec (float): New maximum duration in seconds
+            sample_rate (int): Audio sample rate (default: 16000)
+        """
+        self.model.set_max_duration(max_duration_sec, sample_rate)
+
+    def __del__(self):
+        # Ensure threads are stopped and resources cleaned up
+        if hasattr(self, "model"):
+            if self._is_running:
+                self.stop()
+            del self.model
+
+
 def load_model(model_path: str, use_gpu=False) -> WhisperModel:
     return WhisperModel(model_path, use_gpu)
 
 
+def load_threaded_model(
+    model_path: str, use_gpu=False, max_duration_sec=10.0, sample_rate=16000
+) -> ThreadedWhisperModel:
+    """
+    Load a threaded Whisper model for continuous audio processing.
+
+    Args:
+        model_path (str): Path to the Whisper model file
+        use_gpu (bool): Whether to use GPU acceleration
+        max_duration_sec (float): Maximum duration in seconds before finalizing a segment
+        sample_rate (int): Audio sample rate (default: 16000)
+
+    Returns:
+        ThreadedWhisperModel: A model instance ready for processing
+    """
+    return ThreadedWhisperModel(model_path, use_gpu, max_duration_sec, sample_rate)
+
+
 def set_log_callback(callback):
     """
     Set a custom logging callback function.