diff --git a/cspell.config.yaml b/cspell.config.yaml index da369b0..3dda4e1 100644 --- a/cspell.config.yaml +++ b/cspell.config.yaml @@ -32,6 +32,7 @@ words: - kwaa - live2dcubismcore - live2dcubismframework + - Llmmarker - Myriam - Neko - nekomeowww diff --git a/package.json b/package.json index 9023444..0e48929 100644 --- a/package.json +++ b/package.json @@ -53,12 +53,6 @@ "packages/*", "docs" ], - "pnpm": { - "override": { - "@babel/preset-env": "7.26.0", - "workbox-build": "7.3.0" - } - }, "simple-git-hooks": { "pre-commit": "pnpm lint-staged" }, diff --git a/packages/stage/src/auto-imports.d.ts b/packages/stage/src/auto-imports.d.ts index 769d353..02faa22 100644 --- a/packages/stage/src/auto-imports.d.ts +++ b/packages/stage/src/auto-imports.d.ts @@ -196,6 +196,7 @@ declare global { const useLLM: typeof import('./stores/llm')['useLLM'] const useLastChanged: typeof import('@vueuse/core')['useLastChanged'] const useLink: typeof import('vue-router/auto')['useLink'] + const useLlmmarkerParser: typeof import('./composables/llmmarkerParser')['useLlmmarkerParser'] const useLocalStorage: typeof import('@vueuse/core')['useLocalStorage'] const useMagicKeys: typeof import('@vueuse/core')['useMagicKeys'] const useManualRefHistory: typeof import('@vueuse/core')['useManualRefHistory'] @@ -503,6 +504,7 @@ declare module 'vue' { readonly useLLM: UnwrapRef readonly useLastChanged: UnwrapRef readonly useLink: UnwrapRef + readonly useLlmmarkerParser: UnwrapRef readonly useLocalStorage: UnwrapRef readonly useMagicKeys: UnwrapRef readonly useManualRefHistory: UnwrapRef diff --git a/packages/stage/src/components/MainStage.vue b/packages/stage/src/components/MainStage.vue index 5174dba..5cb5e28 100644 --- a/packages/stage/src/components/MainStage.vue +++ b/packages/stage/src/components/MainStage.vue @@ -17,7 +17,6 @@ import { useLLM } from '../stores/llm' import { useSettings } from '../stores/settings' import BasicTextarea from './BasicTextarea.vue' -// import AudioWaveform from './AudioWaveform.vue' import Live2DViewer from './Live2DViewer.vue' import Settings from './Settings.vue' import ThreeDScene from './ThreeDScene.vue' @@ -107,7 +106,6 @@ const ttsQueue = useQueue({ voice: 'Myriam', // Beatrice is not 'childish' like the others // voice: 'Beatrice', - // text: body.text, model_id: 'eleven_multilingual_v2', voice_settings: { stability: 0.4, @@ -150,13 +148,13 @@ const emotionsQueue = useQueue({ ], }) -const emotionMessageContentQueue = useEmotionsMessageQueue(emotionsQueue, messageContentQueue) +const emotionMessageContentQueue = useEmotionsMessageQueue(emotionsQueue) emotionMessageContentQueue.onHandlerEvent('emotion', (emotion) => { // eslint-disable-next-line no-console console.debug('emotion detected', emotion) }) -const delaysQueue = useDelayMessageQueue(emotionMessageContentQueue) +const delaysQueue = useDelayMessageQueue() delaysQueue.onHandlerEvent('delay', (delay) => { // eslint-disable-next-line no-console console.debug('delay detected', delay) @@ -216,49 +214,31 @@ async function onSendMessage(sendingMessage: string) { live2DViewerRef.value?.setMotion(EmotionThinkMotionName) const res = await stream(openAiApiBaseURL.value, openAiApiKey.value, openAIModel.value.id, messages.value.slice(0, messages.value.length - 1)) + let fullText = '' - enum States { - Literal = 'literal', - Special = 'special', - } - - let state = States.Literal - let buffer = '' + const parser = useLlmmarkerParser({ + onLiteral: async (literal) => { + await messageContentQueue.add(literal) + streamingMessage.value.content += literal + }, + onSpecial: async (special) => { + await delaysQueue.add(special) + await emotionMessageContentQueue.add(special) + }, + }) for await (const textPart of asyncIteratorFromReadableStream(res.textStream, async v => v)) { - for (const textSingleChar of textPart) { - let newState: States = state - - if (textSingleChar === '<') - newState = States.Special - else if (textSingleChar === '>') - newState = States.Literal - - if (state === States.Literal && newState === States.Special) { - streamingMessage.value.content += buffer - buffer = '' - } - - if (state === States.Special && newState === States.Literal) - buffer = '' // Clear buffer when exiting Special state - - if (state === States.Literal && newState === States.Literal) { - streamingMessage.value.content += textSingleChar - buffer = '' - } - - await delaysQueue.add(textSingleChar) - state = newState - buffer += textSingleChar - } + fullText += textPart + await parser.consume(textPart) } - if (buffer) - streamingMessage.value.content += buffer - + await parser.end() await delaysQueue.add(llmInferenceEndToken) messageInput.value = '' + + // eslint-disable-next-line no-console + console.debug('Full text:', fullText) } watch([openAiApiBaseURL, openAiApiKey], async ([baseUrl, apiKey]) => { diff --git a/packages/stage/src/composables/llmmarkerParser.test.ts b/packages/stage/src/composables/llmmarkerParser.test.ts new file mode 100644 index 0000000..2f07db3 --- /dev/null +++ b/packages/stage/src/composables/llmmarkerParser.test.ts @@ -0,0 +1,144 @@ +import { describe, expect, it } from 'vitest' +import { useLlmmarkerParser } from './llmmarkerParser' + +describe('useLlmmarkerParser', async () => { + it('should parse pure literals', async () => { + const fullText = 'Hello, world!' + const collectedLiterals: string[] = [] + const collectedSpecials: string[] = [] + + const parser = useLlmmarkerParser({ + onLiteral(literal) { + collectedLiterals.push(literal) + }, + onSpecial(special) { + collectedSpecials.push(special) + }, + }) + + for (const char of fullText) { + await parser.consume(char) + } + + await parser.end() + + expect(collectedLiterals).toEqual('Hello, world!'.split('')) + expect(collectedSpecials).toEqual([]) + }) + + it('should parse pure specials', async () => { + const fullText = '<|Hello, world!|>' + const collectedLiterals: string[] = [] + const collectedSpecials: string[] = [] + + const parser = useLlmmarkerParser({ + onLiteral(literal) { + collectedLiterals.push(literal) + }, + onSpecial(special) { + collectedSpecials.push(special) + }, + }) + + for (const char of fullText) { + await parser.consume(char) + } + + await parser.end() + + expect(collectedLiterals).toEqual([]) + expect(collectedSpecials).toEqual(['<|Hello, world!|>']) + }) + + it('should not include unfinished special', async () => { + const fullText = '<|Hello, world' + const collectedLiterals: string[] = [] + const collectedSpecials: string[] = [] + + const parser = useLlmmarkerParser({ + onLiteral(literal) { + collectedLiterals.push(literal) + }, + onSpecial(special) { + collectedSpecials.push(special) + }, + }) + + for (const char of fullText) { + await parser.consume(char) + } + + await parser.end() + + expect(collectedLiterals).toEqual([]) + expect(collectedSpecials).toEqual([]) + }) + + it('should parse with mixed input, ends with special', async () => { + const fullText = 'This is sentence 1, <|HELLO|> and this is sentence 2.<|WORLD|>' + const collectedLiterals: string[] = [] + const collectedSpecials: string[] = [] + + const parser = useLlmmarkerParser({ + onLiteral(literal) { + collectedLiterals.push(literal) + }, + onSpecial(special) { + collectedSpecials.push(special) + }, + }) + + for (const char of fullText) { + await parser.consume(char) + } + + await parser.end() + + expect(collectedLiterals).toEqual([...'This is sentence 1, '.split(''), ...' and this is sentence 2.'.split('')]) + expect(collectedSpecials).toEqual(['<|HELLO|>', '<|WORLD|>']) + }) + + it('should parse correctly', async () => { + const testCases: { input: string, expectedLiterals: string[], expectedSpecials: string[] }[] = [ + { + input: `<|A|> Wow, hello there!`, + expectedLiterals: ' Wow, hello there!'.split(''), + expectedSpecials: ['<|A|>'], + }, + { + input: `<|A|> Hello!`, + expectedLiterals: ' Hello!'.split(''), + expectedSpecials: ['<|A|>'], + }, + { + input: `<|A|> Hello! <|B|>`, + expectedLiterals: ' Hello! '.split(''), + expectedSpecials: ['<|A|>', '<|B|>'], + }, + ] + + for (const tc of testCases) { + const { input, expectedLiterals, expectedSpecials } = tc + const collectedLiterals: string[] = [] + const collectedSpecials: string[] = [] + + const parser = useLlmmarkerParser({ + onLiteral(literal) { + collectedLiterals.push(literal) + }, + onSpecial(special) { + collectedSpecials.push(special) + }, + }) + + for (const char of input) { + await parser.consume(char) + } + + await parser.end() + + expect(collectedLiterals).toEqual(expectedLiterals) + expect(collectedSpecials).toEqual(expectedSpecials) + } + }) +}) diff --git a/packages/stage/src/composables/llmmarkerParser.ts b/packages/stage/src/composables/llmmarkerParser.ts new file mode 100644 index 0000000..1e67333 --- /dev/null +++ b/packages/stage/src/composables/llmmarkerParser.ts @@ -0,0 +1,86 @@ +enum States { + Literal = 'literal', + Special = 'special', +} + +function peek(array: string, index: number, offset: number): string | undefined { + if (index + offset < 0 || index + offset >= (array.length - 1)) + return '' + + return array[index + offset] +} + +export function useLlmmarkerParser(options: { + onLiteral?: (literal: string) => void | Promise + onSpecial?: (special: string) => void | Promise +}) { + let state = States.Literal + let buffer = '' + + return { + async consume(textPart: string) { + for (let i = 0; i < textPart.length; i++) { + let current = textPart[i] + let newState: States = state + + // read + if (current === '<' && peek(textPart, i, 1) === '|') { + current += peek(textPart, i, 1) + newState = States.Special + i++ + } + else if (current === '|' && peek(textPart, i, 1) === '>') { + current += peek(textPart, i, 1) + newState = States.Literal + i++ + } + else if (current === '<') { + newState = States.Special + } + else if (current === '>') { + newState = States.Literal + } + + // handle + if (state === States.Literal && newState === States.Special) { + if (buffer !== '') { + await options.onLiteral?.(buffer) + buffer = '' + } + } + else if (state === States.Special && newState === States.Literal) { + if (buffer !== '') { + buffer += current + await options.onSpecial?.(buffer) + buffer = '' // Clear buffer when exiting Special state + } + } + + if (state === States.Literal && newState === States.Literal) { + await options.onLiteral?.(current) + buffer = '' + } + else if (state === States.Special && newState === States.Literal) { + buffer = '' + } + else { + buffer += current + } + + state = newState + } + }, + async end() { + if (buffer !== '') { + if (state === States.Literal) { + await options.onLiteral?.(buffer) + } + else { + if (buffer.endsWith('|>')) { + await options.onSpecial?.(buffer) + } + } + } + }, + } +} diff --git a/packages/stage/src/composables/queues.ts b/packages/stage/src/composables/queues.ts index cbf4271..52dffbb 100644 --- a/packages/stage/src/composables/queues.ts +++ b/packages/stage/src/composables/queues.ts @@ -5,56 +5,30 @@ import { llmInferenceEndToken } from '../constants' import { EMOTION_VALUES } from '../constants/emotions' import { useQueue } from './queue' -export function useEmotionsMessageQueue(emotionsQueue: ReturnType>, messageContentQueue: ReturnType>) { +export function useEmotionsMessageQueue(emotionsQueue: ReturnType>) { function splitEmotion(content: string) { for (const emotion of EMOTION_VALUES) { // doesn't include the emotion, continue if (!content.includes(emotion)) continue - // find the emotion and push the content before the emotion to the queue - const emotionIndex = content.indexOf(emotion) - const beforeEmotion = content.slice(0, emotionIndex) - const afterEmotion = content.slice(emotionIndex + emotion.length) - return { ok: true, emotion: emotion as Emotion, - before: beforeEmotion, - after: afterEmotion, } } return { ok: false, emotion: '' as Emotion, - before: content, - after: '', } } - const processed = ref('') - return useQueue({ handlers: [ async (ctx) => { - // inference ended, push the last content to the message queue - if (ctx.data.includes(llmInferenceEndToken)) { - const content = processed.value.trim() - if (content) - await messageContentQueue.add(content) - - processed.value = '' - - return - } // if the message is an emotion, push the last content to the message queue if (EMOTION_VALUES.includes(ctx.data as Emotion)) { - const content = processed.value.trim() - if (content) - await messageContentQueue.add(content) - - processed.value = '' ctx.emit('emotion', ctx.data as Emotion) await emotionsQueue.add(ctx.data as Emotion) @@ -62,34 +36,12 @@ export function useEmotionsMessageQueue(emotionsQueue: ReturnType>) { +export function useDelayMessageQueue() { function splitDelays(content: string) { - // doesn't include the emotion, continue + // doesn't include the delay, continue if (!(/<\|DELAY:\d+\|>/i.test(content))) { return { ok: false, delay: 0, - before: content, - after: '', } } @@ -116,29 +66,21 @@ export function useDelayMessageQueue(useEmotionsMessageQueue: ReturnType setTimeout(resolve, ms)) } - const delaysQueueProcessedTemp = ref('') return useQueue({ handlers: [ async (ctx) => { - // inference ended, push the last content to the message queue - if (ctx.data.includes(llmInferenceEndToken)) { - const content = delaysQueueProcessedTemp.value.trim() - if (content) - await useEmotionsMessageQueue.add(content) - - delaysQueueProcessedTemp.value = '' - return - } - - { - // iterate through the message to find the emotions - const { ok, before, delay, after } = splitDelays(ctx.data) - if (ok && before) { - await useEmotionsMessageQueue.add(before) - - if (delay) { - ctx.emit('delay', delay) - await sleep(delay * 1000) - } - - if (after) - await useEmotionsMessageQueue.add(after) - } - else { - // if none of the emotions are found, push the content to the temp queue - delaysQueueProcessedTemp.value += ctx.data - } - } - // iterate through the message to find the emotions - { - const { ok, before, delay, after } = splitDelays(delaysQueueProcessedTemp.value) - if (ok && before) { - await useEmotionsMessageQueue.add(before) - - if (delay) { - ctx.emit('delay', delay) - await sleep(delay * 1000) - } - - if (after) - await useEmotionsMessageQueue.add(after) - delaysQueueProcessedTemp.value = '' - } + const { ok, delay } = splitDelays(ctx.data) + if (ok) { + ctx.emit('delay', delay) + await sleep(delay * 1000) } }, ], diff --git a/packages/stage/vite.config.ts b/packages/stage/vite.config.ts index 94d34f2..36d3dfa 100644 --- a/packages/stage/vite.config.ts +++ b/packages/stage/vite.config.ts @@ -62,7 +62,7 @@ export default defineConfig({ // https://github.com/posva/unplugin-vue-router VueRouter({ extensions: ['.vue', '.md'], - dts: 'src/typed-router.d.ts', + dts: path.resolve(__dirname, 'src/typed-router.d.ts'), }), // https://github.com/JohnCampionJr/vite-plugin-vue-layouts diff --git a/vitest.workspace.ts b/vitest.workspace.ts new file mode 100644 index 0000000..b18a56c --- /dev/null +++ b/vitest.workspace.ts @@ -0,0 +1,5 @@ +import { defineWorkspace } from 'vitest/config' + +export default defineWorkspace([ + 'packages/*', +])