-
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
50190a3
commit e4a0cc7
Showing
2 changed files
with
87 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
<script setup lang="ts"> | ||
import type { AssistantMessage, Message, SystemMessage } from '@xsai/shared-chat-completion' | ||
import type { Emotion } from '../constants/emotions' | ||
import { MicVAD } from '@ricky0123/vad-web' | ||
import { useLocalStorage } from '@vueuse/core' | ||
import { storeToRefs } from 'pinia' | ||
|
@@ -51,6 +52,9 @@ const audioAnalyser = ref<AnalyserNode>() | |
const mouthOpenSize = ref(0) | ||
const nowSpeaking = ref(false) | ||
const lipSyncStarted = ref(false) | ||
const micVad = ref<MicVAD>() | ||
const { audioInputs } = useDevicesList({ constraints: { audio: true }, requestPermissions: true }) | ||
const selectedAudioDevice = ref<MediaDeviceInfo>() | ||
const nowSpeakingAvatarBorderOpacity = computed<number>(() => { | ||
if (!nowSpeaking.value) | ||
|
@@ -71,6 +75,67 @@ function handleModelChange(event: Event) { | |
openAIModel.value = found | ||
} | ||
async function handleMicVADActivation(deviceId: string) { | ||
if (micVad.value) { | ||
micVad.value.destroy() | ||
micVad.value = undefined | ||
console.warn('existing MicVAD destroyed') | ||
} | ||
const media = await navigator.mediaDevices.getUserMedia({ audio: { deviceId } }) | ||
// Use of MicVAD is inspired by Open-LLM-VTuber | ||
// Source code reference: https://github.com/t41372/Open-LLM-VTuber/blob/92cbf4349b84a68b0035bc825bc3d1d61fd0f063/static/index.html#L119 | ||
micVad.value = await MicVAD.new({ | ||
stream: media, | ||
model: 'v5', | ||
positiveSpeechThreshold: 0.2, // default is 0.5 | ||
negativeSpeechThreshold: 0.08, // default is 0.5 - 0.15 | ||
minSpeechFrames: 60, // default is 9 | ||
onSpeechStart: () => { | ||
// TODO: interrupt the playback | ||
// TODO: interrupt any of the ongoing TTS | ||
// TODO: interrupt any of the ongoing LLM requests | ||
// TODO: interrupt any of the ongoing animation of Live2D or VRM | ||
// TODO: once interrupted, we should somehow switch to listen or thinking | ||
// emotion / expression? | ||
listening.value = true | ||
}, | ||
// VAD misfire means while speech end is detected but | ||
// the frames of the segment of the audio buffer | ||
// is not enough to be considered as a speech segment | ||
// which controlled by the `minSpeechFrames` parameter | ||
onVADMisfire: () => { | ||
// TODO: do audio buffer send to whisper | ||
listening.value = false | ||
}, | ||
onSpeechEnd: () => { | ||
// TODO: do audio buffer send to whisper | ||
listening.value = false | ||
}, | ||
// WORKAROUND: temporary workaround for onnxruntime-web, since @ricky0123/vad-web | ||
// uses hardcoded version of [email protected] to fetch the already non-existing | ||
// ort-wasm-simd-threaded.mjs file and its WASM binary, we are going to force | ||
// the onnxruntime-web to use the latest version of onnxruntime-web from jsdelivr | ||
// to fetch the correct ort-wasm-simd-threaded.wasm binary | ||
onnxWASMBasePath: 'https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/', | ||
}) | ||
micVad.value.start() | ||
} | ||
async function handleAudioInputChange(event: Event) { | ||
const target = event.target as HTMLSelectElement | ||
const found = audioInputs.value.find(d => d.deviceId === target.value) | ||
if (!found) { | ||
selectedAudioDevice.value = undefined | ||
return | ||
} | ||
selectedAudioDevice.value = found | ||
await handleMicVADActivation(found.deviceId) | ||
} | ||
const audioQueue = useQueue<{ audioBuffer: AudioBuffer, text: string }>({ | ||
handlers: [ | ||
(ctx) => { | ||
|
@@ -250,6 +315,11 @@ watch([openAiApiBaseURL, openAiApiKey], async ([baseUrl, apiKey]) => { | |
supportedModels.value = await models(baseUrl, apiKey) | ||
}) | ||
onUnmounted(() => { | ||
if (micVad.value) | ||
micVad.value.destroy() | ||
}) | ||
onMounted(async () => { | ||
if (!openAiApiBaseURL.value || !openAiApiKey.value) | ||
return | ||
|
@@ -354,6 +424,22 @@ onUnmounted(() => { | |
</div> | ||
<div flex="~ row" my="2" space-x="2" w-full self-end> | ||
<div flex="~ col" w-full space-y="2"> | ||
<select | ||
p="2" | ||
bg="zinc-100 dark:zinc-700" w-full rounded-lg | ||
outline-none | ||
@change="handleAudioInputChange" | ||
> | ||
<option disabled> | ||
Select a Audio Input | ||
</option> | ||
<option v-if="selectedAudioDevice" :value="selectedAudioDevice.deviceId"> | ||
{{ selectedAudioDevice.label }} | ||
</option> | ||
<option v-for="m in audioInputs" :key="m.deviceId" :value="m.deviceId"> | ||
{{ m.label }} | ||
</option> | ||
</select> | ||
<select | ||
p="2" | ||
bg="zinc-100 dark:zinc-700" w-full rounded-lg | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters