Skip to content

Commit

Permalink
feat: to composable
Browse files Browse the repository at this point in the history
  • Loading branch information
nekomeowww committed Dec 11, 2024
1 parent e4a0cc7 commit b3d8bf9
Show file tree
Hide file tree
Showing 7 changed files with 113 additions and 57 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ pnpm dev

- [pixiv/ChatVRM](https://github.com/pixiv/ChatVRM)
- [josephrocca/ChatVRM-js: A JS conversion/adaptation of parts of the ChatVRM (TypeScript) code for standalone use in OpenCharacters and elsewhere](https://github.com/josephrocca/ChatVRM-js)
- Design of UI and style was inspired by [Cookard](https://store.steampowered.com/app/2919650/Cookard/), [UNBEATABLE](https://store.steampowered.com/app/2240620/UNBEATABLE/), and [Sensei! I like you so much!](https://store.steampowered.com/app/2957700/_/), and artworks of [Ayame by Mercedes Bazan](https://dribbble.com/shots/22157656-Ayame) with [Wish by Mercedes Bazan](https://dribbble.com/shots/24501019-Wish)
2 changes: 2 additions & 0 deletions cspell.config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ words:
- cubismmotionqueuemanager
- cubismusermodel
- cubismviewmatrix
- defu
- demi
- elevenlabs
- gltf
Expand All @@ -43,6 +44,7 @@ words:
- nuxt
- nuxtjs
- ofetch
- onnx
- onnxruntime
- openai
- pinia
Expand Down
2 changes: 2 additions & 0 deletions packages/stage/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,11 @@
"@unocss/reset": "^0.65.1",
"@vueuse/core": "^12.0.0",
"@vueuse/head": "^2.0.0",
"@vueuse/shared": "^12.0.0",
"@xsai/model": "^0.0.19",
"@xsai/shared-chat-completion": "^0.0.19",
"@xsai/stream-text": "^0.0.19",
"defu": "^6.1.4",
"nprogress": "^0.2.0",
"ofetch": "^1.4.1",
"onnxruntime-web": "^1.20.1",
Expand Down
2 changes: 2 additions & 0 deletions packages/stage/src/auto-imports.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ declare global {
const useMemoize: typeof import('@vueuse/core')['useMemoize']
const useMemory: typeof import('@vueuse/core')['useMemory']
const useMessageContentQueue: typeof import('./composables/queues')['useMessageContentQueue']
const useMicVAD: typeof import('./composables/micvad')['useMicVAD']
const useModel: typeof import('vue')['useModel']
const useMounted: typeof import('@vueuse/core')['useMounted']
const useMouse: typeof import('@vueuse/core')['useMouse']
Expand Down Expand Up @@ -514,6 +515,7 @@ declare module 'vue' {
readonly useMemoize: UnwrapRef<typeof import('@vueuse/core')['useMemoize']>
readonly useMemory: UnwrapRef<typeof import('@vueuse/core')['useMemory']>
readonly useMessageContentQueue: UnwrapRef<typeof import('./composables/queues')['useMessageContentQueue']>
readonly useMicVAD: UnwrapRef<typeof import('./composables/micvad')['useMicVAD']>
readonly useModel: UnwrapRef<typeof import('vue')['useModel']>
readonly useMounted: UnwrapRef<typeof import('@vueuse/core')['useMounted']>
readonly useMouse: UnwrapRef<typeof import('@vueuse/core')['useMouse']>
Expand Down
83 changes: 26 additions & 57 deletions packages/stage/src/components/MainStage.vue
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
<script setup lang="ts">
import type { AssistantMessage, Message, SystemMessage } from '@xsai/shared-chat-completion'
import type { Emotion } from '../constants/emotions'
import { MicVAD } from '@ricky0123/vad-web'
import { useLocalStorage } from '@vueuse/core'
import { storeToRefs } from 'pinia'
import { computed, onMounted, ref, watch } from 'vue'
import Avatar from '../assets/live2d/models/hiyori_free_zh/avatar.png'
import { useMarkdown } from '../composables/markdown'
import { useMicVAD } from '../composables/micvad'
import { useQueue } from '../composables/queue'
import { useDelayMessageQueue, useEmotionsMessageQueue, useMessageContentQueue } from '../composables/queues'
import { llmInferenceEndToken } from '../constants'
Expand Down Expand Up @@ -52,9 +52,9 @@ const audioAnalyser = ref<AnalyserNode>()
const mouthOpenSize = ref(0)
const nowSpeaking = ref(false)
const lipSyncStarted = ref(false)
const micVad = ref<MicVAD>()
const { audioInputs } = useDevicesList({ constraints: { audio: true }, requestPermissions: true })
const selectedAudioDevice = ref<MediaDeviceInfo>()
const selectedAudioDeviceId = computed(() => selectedAudioDevice.value?.deviceId)
const nowSpeakingAvatarBorderOpacity = computed<number>(() => {
if (!nowSpeaking.value)
Expand All @@ -64,6 +64,30 @@ const nowSpeakingAvatarBorderOpacity = computed<number>(() => {
+ (nowSpeakingAvatarBorderOpacityMax - nowSpeakingAvatarBorderOpacityMin) * mouthOpenSize.value) / 100)
})
useMicVAD(selectedAudioDeviceId, {
onSpeechStart: () => {
// TODO: interrupt the playback
// TODO: interrupt any of the ongoing TTS
// TODO: interrupt any of the ongoing LLM requests
// TODO: interrupt any of the ongoing animation of Live2D or VRM
// TODO: once interrupted, we should somehow switch to listen or thinking
// emotion / expression?
listening.value = true
},
// VAD misfire means while speech end is detected but
// the frames of the segment of the audio buffer
// is not enough to be considered as a speech segment
// which controlled by the `minSpeechFrames` parameter
onVADMisfire: () => {
// TODO: do audio buffer send to whisper
listening.value = false
},
onSpeechEnd: () => {
// TODO: do audio buffer send to whisper
listening.value = false
},
})
function handleModelChange(event: Event) {
const target = event.target as HTMLSelectElement
const found = supportedModels.value.find(m => m.id === target.value)
Expand All @@ -75,55 +99,6 @@ function handleModelChange(event: Event) {
openAIModel.value = found
}
async function handleMicVADActivation(deviceId: string) {
if (micVad.value) {
micVad.value.destroy()
micVad.value = undefined
console.warn('existing MicVAD destroyed')
}
const media = await navigator.mediaDevices.getUserMedia({ audio: { deviceId } })
// Use of MicVAD is inspired by Open-LLM-VTuber
// Source code reference: https://github.com/t41372/Open-LLM-VTuber/blob/92cbf4349b84a68b0035bc825bc3d1d61fd0f063/static/index.html#L119
micVad.value = await MicVAD.new({
stream: media,
model: 'v5',
positiveSpeechThreshold: 0.2, // default is 0.5
negativeSpeechThreshold: 0.08, // default is 0.5 - 0.15
minSpeechFrames: 60, // default is 9
onSpeechStart: () => {
// TODO: interrupt the playback
// TODO: interrupt any of the ongoing TTS
// TODO: interrupt any of the ongoing LLM requests
// TODO: interrupt any of the ongoing animation of Live2D or VRM
// TODO: once interrupted, we should somehow switch to listen or thinking
// emotion / expression?
listening.value = true
},
// VAD misfire means while speech end is detected but
// the frames of the segment of the audio buffer
// is not enough to be considered as a speech segment
// which controlled by the `minSpeechFrames` parameter
onVADMisfire: () => {
// TODO: do audio buffer send to whisper
listening.value = false
},
onSpeechEnd: () => {
// TODO: do audio buffer send to whisper
listening.value = false
},
// WORKAROUND: temporary workaround for onnxruntime-web, since @ricky0123/vad-web
// uses hardcoded version of [email protected] to fetch the already non-existing
// ort-wasm-simd-threaded.mjs file and its WASM binary, we are going to force
// the onnxruntime-web to use the latest version of onnxruntime-web from jsdelivr
// to fetch the correct ort-wasm-simd-threaded.wasm binary
onnxWASMBasePath: 'https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/',
})
micVad.value.start()
}
async function handleAudioInputChange(event: Event) {
const target = event.target as HTMLSelectElement
const found = audioInputs.value.find(d => d.deviceId === target.value)
Expand All @@ -133,7 +108,6 @@ async function handleAudioInputChange(event: Event) {
}
selectedAudioDevice.value = found
await handleMicVADActivation(found.deviceId)
}
const audioQueue = useQueue<{ audioBuffer: AudioBuffer, text: string }>({
Expand Down Expand Up @@ -315,11 +289,6 @@ watch([openAiApiBaseURL, openAiApiKey], async ([baseUrl, apiKey]) => {
supportedModels.value = await models(baseUrl, apiKey)
})
onUnmounted(() => {
if (micVad.value)
micVad.value.destroy()
})
onMounted(async () => {
if (!openAiApiBaseURL.value || !openAiApiKey.value)
return
Expand Down
74 changes: 74 additions & 0 deletions packages/stage/src/composables/micvad.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import type { RealTimeVADOptions } from '@ricky0123/vad-web'
import { getDefaultRealTimeVADOptions, MicVAD } from '@ricky0123/vad-web'
import { usePermission } from '@vueuse/core'
import { tryOnMounted } from '@vueuse/shared'
import { defu } from 'defu'

export function useMicVAD(deviceId: MaybeRef<ConstrainDOMString | undefined>, options?: Partial<RealTimeVADOptions> & { auto?: boolean }) {
const opts = defu<Partial<RealTimeVADOptions> & { auto?: boolean }, Array<Omit<RealTimeVADOptions, 'stream'> & { auto?: boolean }>>(options ?? {}, {
...getDefaultRealTimeVADOptions('v5'),
positiveSpeechThreshold: 0.2, // default is 0.5
negativeSpeechThreshold: 0.08, // default is 0.5 - 0.15
minSpeechFrames: 60, // default is 9
// WORKAROUND: temporary workaround for onnxruntime-web, since @ricky0123/vad-web
// uses hardcoded version of [email protected] to fetch the already non-existing
// ort-wasm-simd-threaded.mjs file and its WASM binary, we are going to force
// the onnxruntime-web to use the latest version of onnxruntime-web from jsdelivr
// to fetch the correct ort-wasm-simd-threaded.wasm binary
onnxWASMBasePath: 'https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/',
auto: true,
})

const micVad = ref<MicVAD>()
const microphoneAccess = usePermission('microphone')

async function update() {
if (micVad.value) {
micVad.value.destroy()
micVad.value = undefined
console.warn('existing MicVAD destroyed')
}
if (!microphoneAccess.value)
return

const id = unref(deviceId)
if (!id)
return

const media = await navigator.mediaDevices.getUserMedia({ audio: { deviceId: id } })

// Use of MicVAD is inspired by Open-LLM-VTuber
// Source code reference: https://github.com/t41372/Open-LLM-VTuber/blob/92cbf4349b84a68b0035bc825bc3d1d61fd0f063/static/index.html#L119
micVad.value = await MicVAD.new({
...opts,
stream: media,
})

if (opts.auto)
micVad.value.start()
}

watch(microphoneAccess, update, { immediate: true })
watch(toRef(deviceId), update, { immediate: true })
tryOnMounted(update)
onUnmounted(() => {
if (micVad.value) {
micVad.value.destroy()
micVad.value = undefined
}
})

return {
destroy: () => {
if (micVad.value) {
micVad.value.destroy()
micVad.value = undefined
}
},
start: () => {
if (micVad.value) {
micVad.value.start()
}
},
}
}
6 changes: 6 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit b3d8bf9

Please sign in to comment.