Skip to content

Commit

Permalink
Refactor version numbers in pyproject.toml and setup.py
Browse files Browse the repository at this point in the history
  • Loading branch information
royshil committed Oct 25, 2024
1 parent 33e2a3c commit e771682
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 29 deletions.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "simpler-whisper"
version = "0.1.0"
version = "0.2.0"
authors = [
{name = "Roy Shilkrot", email = "[email protected]"},
]
Expand Down Expand Up @@ -33,4 +33,4 @@ dependencies = [
packages = ["simpler_whisper"]

[tool.setuptools.package-data]
simpler_whisper = ["*.dll", "*.pyd", "*.so", "*.metal"]
simpler_whisper = ["*.dll", "*.pyd", "*.so", "*.metal"]
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def build_extension(self, ext):

setup(
name='simpler-whisper',
version='0.1.0',
version='0.2.0',
author='Roy Shilkrot',
author_email='[email protected]',
description='A simple Python wrapper for whisper.cpp',
Expand Down
27 changes: 17 additions & 10 deletions src/whisper_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@ py::function g_py_log_callback;
// C++ callback function that will be passed to whisper_log_set
void cpp_log_callback(ggml_log_level level, const char *text, void *)
{
if (!g_py_log_callback.is_none())
if (!g_py_log_callback.is_none() && text != nullptr && strlen(text) > 0)
{
g_py_log_callback(level, text);
py::gil_scoped_acquire gil;
g_py_log_callback(level, std::string(text));
}
}

Expand Down Expand Up @@ -263,7 +264,7 @@ class ThreadedWhisperModel

while (running)
{
AudioChunk chunk;
AudioChunk all_chunks;
bool has_chunk = false;

// Get next chunk from input queue
Expand All @@ -279,10 +280,13 @@ class ThreadedWhisperModel
break;
}

if (!input_queue.empty())
// take all chunks from the queue and create a single chunk
while (!input_queue.empty())
{
chunk = std::move(input_queue.front());
AudioChunk chunk = std::move(input_queue.front());
input_queue.pop();
all_chunks.data.insert(all_chunks.data.end(), chunk.data.begin(), chunk.data.end());
all_chunks.id = chunk.id;
has_chunk = true;
}
}
Expand All @@ -293,11 +297,11 @@ class ThreadedWhisperModel
{
std::lock_guard<std::mutex> lock(buffer_mutex);
size_t old_size = accumulated_buffer.size();
accumulated_buffer.resize(old_size + chunk.data.size());
std::copy(chunk.data.begin(), chunk.data.end(),
accumulated_buffer.resize(old_size + all_chunks.data.size());
std::copy(all_chunks.data.begin(), all_chunks.data.end(),
accumulated_buffer.begin() + old_size);

current_chunk_id = chunk.id;
current_chunk_id = all_chunks.id;
}

// Process the accumulated audio
Expand Down Expand Up @@ -420,8 +424,11 @@ PYBIND11_MODULE(_whisper_cpp, m)
m.def("set_log_callback", &set_log_callback, "Set the log callback function");

py::enum_<ggml_log_level>(m, "LogLevel")
.value("ERROR", GGML_LOG_LEVEL_ERROR)
.value("WARN", GGML_LOG_LEVEL_WARN)
.value("NONE", GGML_LOG_LEVEL_NONE)
.value("INFO", GGML_LOG_LEVEL_INFO)
.value("WARN", GGML_LOG_LEVEL_WARN)
.value("ERROR", GGML_LOG_LEVEL_ERROR)
.value("DEBUG", GGML_LOG_LEVEL_DEBUG)
.value("CONT", GGML_LOG_LEVEL_CONT)
.export_values();
}
51 changes: 35 additions & 16 deletions test_simpler_whisper.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import argparse
import sys

# Remove the current directory from sys.path to avoid conflicts with the installed package
Expand All @@ -15,13 +16,23 @@
)


log_levels = {LogLevel.ERROR: "ERROR", LogLevel.WARN: "WARN", LogLevel.INFO: "INFO"}


def my_log_callback(level, message):
log_levels = {LogLevel.ERROR: "ERROR", LogLevel.WARN: "WARN", LogLevel.INFO: "INFO"}
print(f"whisper.cpp [{log_levels.get(level, 'UNKNOWN')}] {message.strip()}")
if message is not None and len(message.strip()) > 0:
print(f"whisper.cpp [{log_levels.get(level, 'UNKNOWN')}] {message.strip()}")


# Path to your Whisper model file
model_path = R"ggml-tiny.en-q5_1.bin"
# Parse command-line arguments
parser = argparse.ArgumentParser(description="Test simpler-whisper model.")
parser.add_argument("model_path", type=str, help="Path to the Whisper model file")
parser.add_argument("audio_file", type=str, help="Path to the audio file")
args = parser.parse_args()

model_path = args.model_path
audio_file = args.audio_file


def test_simpler_whisper():
Expand Down Expand Up @@ -70,6 +81,8 @@ def test_simpler_whisper():


def test_threaded_whisper():
set_log_callback(my_log_callback)

def handle_result(chunk_id: int, text: str, is_partial: bool):
print(
f"Chunk {chunk_id} results ({'partial' if is_partial else 'final'}): {text}"
Expand All @@ -86,9 +99,7 @@ def handle_result(chunk_id: int, text: str, is_partial: bool):
# load audio from file with av
import av

container = av.open(
R"local_path_to_audio_file"
)
container = av.open(audio_file)
audio = container.streams.audio[0]
print(audio)
frame_generator = container.decode(audio)
Expand All @@ -98,17 +109,25 @@ def handle_result(chunk_id: int, text: str, is_partial: bool):
model.start()

for i, frame in enumerate(frame_generator):
# Read audio chunk
incoming_audio = frame.to_ndarray().mean(axis=0)
incoming_audio = incoming_audio / 32768.0 # normalize to [-1, 1]
# resample to 16kHz
samples = resampy.resample(incoming_audio, frame.rate, 16000)

# Queue some audio (will get partial results until 10 seconds accumulate)
chunk_id = model.queue_audio(samples)
# sleep for the size of the audio chunk
try:
time.sleep(len(samples) / 16000)
# Read audio chunk
incoming_audio = frame.to_ndarray()
# check if stereo
if incoming_audio.shape[0] == 2:
incoming_audio = incoming_audio.mean(axis=0)
# check if the type is int16 or float32
if incoming_audio.dtype == np.int16:
incoming_audio = incoming_audio / 32768.0 # normalize to [-1, 1]
# resample to 16kHz if needed
if frame.rate != 16000:
samples = resampy.resample(incoming_audio, frame.rate, 16000)
else:
samples = incoming_audio

# Queue some audio (will get partial results until 10 seconds accumulate)
chunk_id = model.queue_audio(samples)
# sleep for the size of the audio chunk
time.sleep(float(len(samples)) / float(16000))
except:
break

Expand Down

0 comments on commit e771682

Please sign in to comment.