-
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: use vad audio result to whisper to transcribe
- Loading branch information
1 parent
19487ac
commit 01dbaeb
Showing
10 changed files
with
388 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,6 +35,7 @@ words: | |
- live2dcubismcore | ||
- live2dcubismframework | ||
- Llmmarker | ||
- micvad | ||
- Myriam | ||
- Neko | ||
- nekomeowww | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,7 @@ export function useMicVAD(deviceId: MaybeRef<ConstrainDOMString | undefined>, op | |
...getDefaultRealTimeVADOptions('v5'), | ||
positiveSpeechThreshold: 0.2, // default is 0.5 | ||
negativeSpeechThreshold: 0.08, // default is 0.5 - 0.15 | ||
minSpeechFrames: 60, // default is 9 | ||
minSpeechFrames: 5, // default is 9 | ||
// WORKAROUND: temporary workaround for onnxruntime-web, since @ricky0123/vad-web | ||
// uses hardcoded version of [email protected] to fetch the already non-existing | ||
// ort-wasm-simd-threaded.mjs file and its WASM binary, we are going to force | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
import type { MessageEvents, MessageGenerate, ProgressMessageEvents } from '../libs/workers/types' | ||
|
||
export function useWhisper(url: string) { | ||
const { post: whisperPost, data: whisperData, terminate } = useWebWorker<MessageEvents>(url, { type: 'module' }) | ||
|
||
const status = ref<'loading' | 'ready' | null>(null) | ||
const loadingMessage = ref('') | ||
const loadingProgress = ref<ProgressMessageEvents[]>([]) | ||
const transcribing = ref(false) | ||
const tps = ref<number>(0) | ||
const result = ref('') | ||
|
||
watch(whisperData, (e) => { | ||
switch (e.status) { | ||
case 'loading': | ||
status.value = 'loading' | ||
loadingMessage.value = e.data | ||
break | ||
|
||
case 'initiate': | ||
loadingProgress.value.push(e) | ||
break | ||
|
||
case 'progress': | ||
loadingProgress.value = loadingProgress.value.map((item) => { | ||
if (item.file === e.file) { | ||
return { ...item, ...e } | ||
} | ||
return item | ||
}) | ||
break | ||
|
||
case 'done': | ||
loadingProgress.value = loadingProgress.value.filter(item => item.file !== e.file) | ||
break | ||
|
||
case 'ready': | ||
status.value = 'ready' | ||
break | ||
|
||
case 'start': | ||
transcribing.value = true | ||
break | ||
|
||
case 'update': | ||
tps.value = e.tps | ||
break | ||
|
||
case 'complete': | ||
transcribing.value = false | ||
result.value = e.output[0] || '' | ||
// eslint-disable-next-line no-console | ||
console.debug('Whisper result:', result.value) | ||
break | ||
} | ||
}) | ||
|
||
onUnmounted(() => { | ||
terminate() | ||
}) | ||
|
||
return { | ||
transcribe: (message: MessageGenerate) => whisperPost(message), | ||
status, | ||
loadingMessage, | ||
loadingProgress, | ||
transcribing, | ||
tps, | ||
result, | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
export interface EventLoading { | ||
status: 'loading' | ||
data: string | ||
} | ||
|
||
export interface EventInitiate { | ||
status: 'initiate' | ||
name: string | ||
file: string | ||
// Not used | ||
progress?: number | ||
loaded?: number | ||
total?: number | ||
} | ||
|
||
export interface EventDownload { | ||
status: 'download' | ||
name: string | ||
file: string | ||
// Not used | ||
progress?: number | ||
loaded?: number | ||
total?: number | ||
} | ||
|
||
export interface EventProgress { | ||
status: 'progress' | ||
name: string | ||
file: string | ||
progress: number | ||
loaded: number | ||
total: number | ||
} | ||
|
||
export interface EventDone { | ||
status: 'done' | ||
name: string | ||
file: string | ||
// Not used | ||
progress?: number | ||
loaded?: number | ||
total?: number | ||
} | ||
|
||
export interface EventReady { | ||
status: 'ready' | ||
} | ||
|
||
export interface EventStart { | ||
status: 'start' | ||
} | ||
|
||
export interface EventUpdate { | ||
status: 'update' | ||
tps: number | ||
output: string | ||
numTokens: number | ||
} | ||
|
||
export interface EventComplete { | ||
status: 'complete' | ||
output: string[] | ||
} | ||
|
||
export type MessageEvents = EventLoading | EventInitiate | EventDownload | EventProgress | EventDone | EventReady | EventStart | EventUpdate | EventComplete | ||
export type ProgressMessageEvents = EventInitiate | EventProgress | EventDone | ||
|
||
export interface MessageGenerate { | ||
type: 'generate' | ||
data: { | ||
audio: string | ||
language: string | ||
} | ||
} |
Oops, something went wrong.