-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcaption_anything.py
executable file
·70 lines (65 loc) · 2.55 KB
/
caption_anything.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env python
# -*- coding: utf-8 -*-
##
## Copyright 2024 Henry Kroll <[email protected]>
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
## MA 02110-1301, USA.
##
## Gtk includes
import gi
gi.require_version('Gtk', '4.0')
gi.require_version('Adw', '1')
from gi.repository import Adw
import sys, os
from transformers.pipelines.audio_utils import ffmpeg_read
from interface import MainWindow
print("Importing FlaxWhisperPipline... Please wait...")
from tqdm_loader import tqdm_generate, init_pipeline
sample_rate = 16000
task = "transcribe"
return_timestamps = False
pipeline = None
class cpWindow(MainWindow):
def transcribe(self, fname, start_time, end_time):
with open(fname, "rb") as f:
inputs = f.read()
# ffmpeg_read also converts inputs to the required type numpy.ndarray
inputs = ffmpeg_read(inputs, sample_rate)
inputs = {"array": inputs, "sampling_rate": sample_rate}
text_chunk, runtime = tqdm_generate(inputs, task=task, return_timestamps=return_timestamps)
os.remove(fname)
# Show the captions
if text_chunk != "you":
print(text_chunk)
self.show_caption(text_chunk)
self.text.append([start_time, end_time, text_chunk])
def get_pipeline(self):
global pipeline
pipeline = init_pipeline()
self.captions_box.set_css_classes(['info'])
self.captions_box.set_text("Language model ready.")
self.allow_transcribing = True # Allow transcribing
print("Ready")
class MyApp(Adw.Application):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.connect('activate', self.on_activate)
def on_activate(self, app):
self.win = cpWindow(application=app)
self.win.present()
self.win.captions_box.set_text("Loading language model...")
app = MyApp(application_id="com.comptune.rec")
app.run(sys.argv)