Skip to content

Commit

Permalink
Merge pull request #1 from locaal-ai/roy.threaded_whisper
Browse files Browse the repository at this point in the history
Roy.threaded whisper
  • Loading branch information
royshil authored Oct 25, 2024
2 parents 8441c5e + 96f019d commit 051a64a
Show file tree
Hide file tree
Showing 8 changed files with 640 additions and 78 deletions.
16 changes: 8 additions & 8 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,17 +74,18 @@ jobs:
env:
SIMPLER_WHISPER_ACCELERATION: ${{ matrix.acceleration }}
SIMPLER_WHISPER_PLATFORM: ${{ matrix.platform }}
SIMPLER_WHISPER_PYTHON_VERSION: ${{ matrix.python-version }}
run: |
python setup.py build_ext --inplace
python -m build --wheel
- name: Install built wheel Non-Windows
if: startsWith(matrix.os, 'windows') == false
run: |
pip install dist/*.whl
- name: Install built wheel Windows
if: startsWith(matrix.os, 'windows') == true
if: startsWith(matrix.os, 'windows') == true
shell: pwsh
run: |
$wheelFile = Get-ChildItem dist/*.whl | Select-Object -First 1
Expand All @@ -104,15 +105,15 @@ jobs:
run: |
import os
import glob
wheel_file = glob.glob('dist/*.whl')[0]
base_name = os.path.basename(wheel_file)
name_parts = base_name.split('-')
# Insert acceleration and platform before the last part (which is like 'any.whl')
new_name_parts = name_parts[:-1] + ['${{ matrix.acceleration }}', '${{ matrix.platform }}'] + [name_parts[-1]]
new_name = '-'.join(new_name_parts)
new_path = os.path.join('dist', new_name)
os.rename(wheel_file, new_path)
print(f"Renamed {base_name} to {new_name}")
Expand All @@ -125,10 +126,9 @@ jobs:
$wheelName += "-${{ matrix.acceleration }}"
}
echo "WHEEL_NAME=$wheelName" >> $env:GITHUB_ENV
- name: Upload wheel
uses: actions/upload-artifact@v4
with:
name: ${{ env.WHEEL_NAME }}
path: dist/*.whl

4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

# Find Python
find_package(Python COMPONENTS Interpreter Development NumPy REQUIRED)
find_package(Python ${PYTHON_VERSION} EXACT COMPONENTS Interpreter Development NumPy REQUIRED)

# Fetch pybind11
include(FetchContent)
Expand Down Expand Up @@ -40,4 +40,4 @@ if(WIN32 OR APPLE)
$<TARGET_FILE_DIR:_whisper_cpp>
)
endforeach()
endif()
endif()
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

![Build and Test](https://img.shields.io/github/actions/workflow/status/locaal-ai/simpler-whisper/build.yaml)

A zero-dependency simple Python wrapper for [whisper.cpp](https://github.com/ggerganov/whisper.cpp), providing an easy-to-use interface for speech recognition using the Whisper model.
A zero-dependency simple Python wrapper for [whisper.cpp](https://github.com/ggerganov/whisper.cpp), providing an easy-to-use interface for speech recognition using the Whisper model.

Why is it better than [faster-whisper](https://github.com/SYSTRAN/faster-whisper) and [pywhispercpp](https://github.com/abdeladim-s/pywhispercpp):
- Zero-dependency: Everything is shipped with the built wheel, no Python dependency (on `av` or `ctranslate2` etc.) except for `numpy`.
Expand Down Expand Up @@ -30,7 +30,7 @@ pip install simpler-whisper
import simpler_whisper.whisper
import numpy as np

# Load the model file.
# Load the model file.
# It's on you to download one from https://huggingface.co/ggerganov/whisper.cpp
model = simpler_whisper.whisper.load_model("path/to/model.bin")

Expand Down Expand Up @@ -79,7 +79,7 @@ Simpler Whisper supports various build configurations to optimize for different
### Example: Building for Windows with CUDA acceleration

```powershell
$env:SIMPLER_WHISPER_ACCELERATION=cuda
$env:SIMPLER_WHISPER_ACCELERATION="cuda"
pip install .
```

Expand All @@ -91,4 +91,4 @@ SIMPLER_WHISPER_PLATFORM=arm64 pip install .

## License

This project is licensed under the MIT License - see the LICENSE file for details.
This project is licensed under the MIT License - see the LICENSE file for details.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "simpler-whisper"
version = "0.1.0"
version = "0.2.0"
authors = [
{name = "Roy Shilkrot", email = "[email protected]"},
]
Expand Down Expand Up @@ -33,4 +33,4 @@ dependencies = [
packages = ["simpler_whisper"]

[tool.setuptools.package-data]
simpler_whisper = ["*.dll", "*.pyd", "*.so", "*.metal"]
simpler_whisper = ["*.dll", "*.pyd", "*.so", "*.metal"]
101 changes: 68 additions & 33 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,73 +6,108 @@
import platform
import sysconfig


class CMakeExtension(Extension):
def __init__(self, name, sourcedir=''):
def __init__(self, name, sourcedir=""):
Extension.__init__(self, name, sources=[])
self.sourcedir = os.path.abspath(sourcedir)


class CMakeBuild(build_ext):
def run(self):
try:
out = subprocess.check_output(['cmake', '--version'])
out = subprocess.check_output(["cmake", "--version"])
except OSError:
raise RuntimeError("CMake must be installed to build the following extensions: " +
", ".join(e.name for e in self.extensions))
raise RuntimeError(
"CMake must be installed to build the following extensions: "
+ ", ".join(e.name for e in self.extensions)
)

for ext in self.extensions:
self.build_extension(ext)

def build_extension(self, ext):
# This is the critical change - we need to get the proper extension suffix
ext_suffix = sysconfig.get_config_var("EXT_SUFFIX")

# Get the full path where the extension should be placed
extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name)))


# Ensure the extension directory exists
os.makedirs(extdir, exist_ok=True)

# Get acceleration and platform from environment variables
acceleration = os.environ.get('SIMPLER_WHISPER_ACCELERATION', 'cpu')
target_platform = os.environ.get('SIMPLER_WHISPER_PLATFORM', platform.machine())

acceleration = os.environ.get("SIMPLER_WHISPER_ACCELERATION", "cpu")
target_platform = os.environ.get("SIMPLER_WHISPER_PLATFORM", platform.machine())
python_version = os.environ.get(
"SIMPLER_WHISPER_PYTHON_VERSION",
f"{sys.version_info.major}.{sys.version_info.minor}",
)

cmake_args = [
f'-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}',
f'-DACCELERATION={acceleration}',
f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}",
f"-DPYTHON_EXTENSION_SUFFIX={ext_suffix}", # Pass the extension suffix to CMake
f"-DACCELERATION={acceleration}",
f"-DPYTHON_VERSION={python_version}",
]

env = os.environ.copy()

# Add platform-specific arguments
if platform.system() == "Darwin": # macOS
cmake_args += [f'-DCMAKE_OSX_ARCHITECTURES={target_platform}']
# add MACOS_ARCH env variable to specify the target platform
cmake_args += [
f"-DCMAKE_OSX_ARCHITECTURES={target_platform}",
"-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON",
"-DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON",
f"-DCMAKE_INSTALL_NAME_DIR=@rpath",
]
env["MACOS_ARCH"] = target_platform

cfg = 'Debug' if self.debug else 'Release'
build_args = ['--config', cfg]
cfg = "Debug" if self.debug else "Release"
build_args = ["--config", cfg]

if platform.system() == "Windows":
cmake_args += [f'-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}']
cmake_args += [f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}"]
if sys.maxsize > 2**32:
cmake_args += ['-A', 'x64']
build_args += ['--', '/m']
cmake_args += ["-A", "x64"]
build_args += ["--", "/m"]
else:
cmake_args += [f'-DCMAKE_BUILD_TYPE={cfg}']
build_args += ['--', '-j2']
cmake_args += [f"-DCMAKE_BUILD_TYPE={cfg}"]
build_args += ["--", "-j2"]

env["CXXFLAGS"] = (
f'{env.get("CXXFLAGS", "")} -DVERSION_INFO=\\"{self.distribution.get_version()}\\"'
)

env['CXXFLAGS'] = f'{env.get("CXXFLAGS", "")} -DVERSION_INFO=\\"{self.distribution.get_version()}\\"'

if not os.path.exists(self.build_temp):
os.makedirs(self.build_temp)

print("CMake args:", cmake_args)
print("Build args:", build_args)

subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env)
subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp)
print(f"Extension will be built in: {extdir}")
print(
f"Building for Python {python_version} on {target_platform} with acceleration: {acceleration}"
)

subprocess.check_call(
["cmake", ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env
)
subprocess.check_call(
["cmake", "--build", "."] + build_args, cwd=self.build_temp
)


setup(
name='simpler-whisper',
version='0.1.0',
author='Roy Shilkrot',
author_email='[email protected]',
description='A simple Python wrapper for whisper.cpp',
long_description='',
ext_modules=[CMakeExtension('simpler_whisper._whisper_cpp')],
name="simpler-whisper",
version="0.2.0",
author="Roy Shilkrot",
author_email="[email protected]",
description="A simple Python wrapper for whisper.cpp",
long_description="",
ext_modules=[CMakeExtension("simpler_whisper._whisper_cpp")],
cmdclass=dict(build_ext=CMakeBuild),
zip_safe=False,
)
packages=[
"simpler_whisper"
], # Add this line to ensure the package directory is created
)
108 changes: 108 additions & 0 deletions simpler_whisper/whisper.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
from typing import Callable, List
from . import _whisper_cpp


Expand All @@ -21,10 +22,117 @@ def __del__(self):
del self.model


class ThreadedWhisperModel:
def __init__(
self,
model_path: str,
callback: Callable[[int, str, bool], None],
use_gpu=False,
max_duration_sec=10.0,
sample_rate=16000,
):
"""
Initialize a threaded Whisper model for continuous audio processing.
Args:
model_path (str): Path to the Whisper model file
use_gpu (bool): Whether to use GPU acceleration
max_duration_sec (float): Maximum duration in seconds before finalizing a segment
sample_rate (int): Audio sample rate (default: 16000)
"""
self.model = _whisper_cpp.ThreadedWhisperModel(
model_path, use_gpu, max_duration_sec, sample_rate
)
self._is_running = False
self.callback = callback

def handle_result(self, chunk_id: int, text: str, is_partial: bool):
if self.callback is not None:
self.callback(chunk_id, text, is_partial)

def start(self, result_check_interval_ms=100):
"""
Start the processing threads with a callback for results.
Args:
callback: Function that takes three arguments:
- chunk_id (int): Unique identifier for the audio chunk
- segments (str): Transcribed text for the audio chunk
- is_partial (bool): Whether this is a partial result
result_check_interval_ms (int): How often to check for results
"""
if self._is_running:
return

self.model.start(self.handle_result, result_check_interval_ms)
self._is_running = True

def stop(self):
"""
Stop processing and clean up resources.
Any remaining audio will be processed as a final segment.
"""
if not self._is_running:
return

self.model.stop()
self._is_running = False

def queue_audio(self, audio):
"""
Queue audio for processing.
Args:
audio: Audio samples as numpy array or array-like object.
Will be converted to float32.
Returns:
chunk_id (int): Unique identifier for this audio chunk
"""
# Ensure audio is a numpy array of float32
audio = np.array(audio, dtype=np.float32)
return self.model.queue_audio(audio)

def set_max_duration(self, max_duration_sec, sample_rate=16000):
"""
Change the maximum duration for partial segments.
Args:
max_duration_sec (float): New maximum duration in seconds
sample_rate (int): Audio sample rate (default: 16000)
"""
self.model.set_max_duration(max_duration_sec, sample_rate)

def __del__(self):
# Ensure threads are stopped and resources cleaned up
if hasattr(self, "model"):
if self._is_running:
self.stop()
del self.model


def load_model(model_path: str, use_gpu=False) -> WhisperModel:
return WhisperModel(model_path, use_gpu)


def load_threaded_model(
model_path: str, use_gpu=False, max_duration_sec=10.0, sample_rate=16000
) -> ThreadedWhisperModel:
"""
Load a threaded Whisper model for continuous audio processing.
Args:
model_path (str): Path to the Whisper model file
use_gpu (bool): Whether to use GPU acceleration
max_duration_sec (float): Maximum duration in seconds before finalizing a segment
sample_rate (int): Audio sample rate (default: 16000)
Returns:
ThreadedWhisperModel: A model instance ready for processing
"""
return ThreadedWhisperModel(model_path, use_gpu, max_duration_sec, sample_rate)


def set_log_callback(callback):
"""
Set a custom logging callback function.
Expand Down
Loading

0 comments on commit 051a64a

Please sign in to comment.