-
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: parser issue, and audio dispatching issue
- Loading branch information
1 parent
26c3794
commit 3735157
Showing
9 changed files
with
266 additions
and
153 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,6 +32,7 @@ words: | |
- kwaa | ||
- live2dcubismcore | ||
- live2dcubismframework | ||
- Llmmarker | ||
- Myriam | ||
- Neko | ||
- nekomeowww | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
import { describe, expect, it } from 'vitest' | ||
import { useLlmmarkerParser } from './llmmarkerParser' | ||
|
||
describe('useLlmmarkerParser', async () => { | ||
it('should parse pure literals', async () => { | ||
const fullText = 'Hello, world!' | ||
const collectedLiterals: string[] = [] | ||
const collectedSpecials: string[] = [] | ||
|
||
const parser = useLlmmarkerParser({ | ||
onLiteral(literal) { | ||
collectedLiterals.push(literal) | ||
}, | ||
onSpecial(special) { | ||
collectedSpecials.push(special) | ||
}, | ||
}) | ||
|
||
for (const char of fullText) { | ||
await parser.consume(char) | ||
} | ||
|
||
await parser.end() | ||
|
||
expect(collectedLiterals).toEqual('Hello, world!'.split('')) | ||
expect(collectedSpecials).toEqual([]) | ||
}) | ||
|
||
it('should parse pure specials', async () => { | ||
const fullText = '<|Hello, world!|>' | ||
const collectedLiterals: string[] = [] | ||
const collectedSpecials: string[] = [] | ||
|
||
const parser = useLlmmarkerParser({ | ||
onLiteral(literal) { | ||
collectedLiterals.push(literal) | ||
}, | ||
onSpecial(special) { | ||
collectedSpecials.push(special) | ||
}, | ||
}) | ||
|
||
for (const char of fullText) { | ||
await parser.consume(char) | ||
} | ||
|
||
await parser.end() | ||
|
||
expect(collectedLiterals).toEqual([]) | ||
expect(collectedSpecials).toEqual(['<|Hello, world!|>']) | ||
}) | ||
|
||
it('should not include unfinished special', async () => { | ||
const fullText = '<|Hello, world' | ||
const collectedLiterals: string[] = [] | ||
const collectedSpecials: string[] = [] | ||
|
||
const parser = useLlmmarkerParser({ | ||
onLiteral(literal) { | ||
collectedLiterals.push(literal) | ||
}, | ||
onSpecial(special) { | ||
collectedSpecials.push(special) | ||
}, | ||
}) | ||
|
||
for (const char of fullText) { | ||
await parser.consume(char) | ||
} | ||
|
||
await parser.end() | ||
|
||
expect(collectedLiterals).toEqual([]) | ||
expect(collectedSpecials).toEqual([]) | ||
}) | ||
|
||
it('should parse with mixed input, ends with special', async () => { | ||
const fullText = 'This is sentence 1, <|HELLO|> and this is sentence 2.<|WORLD|>' | ||
const collectedLiterals: string[] = [] | ||
const collectedSpecials: string[] = [] | ||
|
||
const parser = useLlmmarkerParser({ | ||
onLiteral(literal) { | ||
collectedLiterals.push(literal) | ||
}, | ||
onSpecial(special) { | ||
collectedSpecials.push(special) | ||
}, | ||
}) | ||
|
||
for (const char of fullText) { | ||
await parser.consume(char) | ||
} | ||
|
||
await parser.end() | ||
|
||
expect(collectedLiterals).toEqual([...'This is sentence 1, '.split(''), ...' and this is sentence 2.'.split('')]) | ||
expect(collectedSpecials).toEqual(['<|HELLO|>', '<|WORLD|>']) | ||
}) | ||
|
||
it('should parse correctly', async () => { | ||
const testCases: { input: string, expectedLiterals: string[], expectedSpecials: string[] }[] = [ | ||
{ | ||
input: `<|A|> Wow, hello there!`, | ||
expectedLiterals: ' Wow, hello there!'.split(''), | ||
expectedSpecials: ['<|A|>'], | ||
}, | ||
{ | ||
input: `<|A|> Hello!`, | ||
expectedLiterals: ' Hello!'.split(''), | ||
expectedSpecials: ['<|A|>'], | ||
}, | ||
{ | ||
input: `<|A|> Hello! <|B|>`, | ||
expectedLiterals: ' Hello! '.split(''), | ||
expectedSpecials: ['<|A|>', '<|B|>'], | ||
}, | ||
] | ||
|
||
for (const tc of testCases) { | ||
const { input, expectedLiterals, expectedSpecials } = tc | ||
const collectedLiterals: string[] = [] | ||
const collectedSpecials: string[] = [] | ||
|
||
const parser = useLlmmarkerParser({ | ||
onLiteral(literal) { | ||
collectedLiterals.push(literal) | ||
}, | ||
onSpecial(special) { | ||
collectedSpecials.push(special) | ||
}, | ||
}) | ||
|
||
for (const char of input) { | ||
await parser.consume(char) | ||
} | ||
|
||
await parser.end() | ||
|
||
expect(collectedLiterals).toEqual(expectedLiterals) | ||
expect(collectedSpecials).toEqual(expectedSpecials) | ||
} | ||
}) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
enum States { | ||
Literal = 'literal', | ||
Special = 'special', | ||
} | ||
|
||
function peek(array: string, index: number, offset: number): string | undefined { | ||
if (index + offset < 0 || index + offset >= (array.length - 1)) | ||
return '' | ||
|
||
return array[index + offset] | ||
} | ||
|
||
export function useLlmmarkerParser(options: { | ||
onLiteral?: (literal: string) => void | Promise<void> | ||
onSpecial?: (special: string) => void | Promise<void> | ||
}) { | ||
let state = States.Literal | ||
let buffer = '' | ||
|
||
return { | ||
async consume(textPart: string) { | ||
for (let i = 0; i < textPart.length; i++) { | ||
let current = textPart[i] | ||
let newState: States = state | ||
|
||
// read | ||
if (current === '<' && peek(textPart, i, 1) === '|') { | ||
current += peek(textPart, i, 1) | ||
newState = States.Special | ||
i++ | ||
} | ||
else if (current === '|' && peek(textPart, i, 1) === '>') { | ||
current += peek(textPart, i, 1) | ||
newState = States.Literal | ||
i++ | ||
} | ||
else if (current === '<') { | ||
newState = States.Special | ||
} | ||
else if (current === '>') { | ||
newState = States.Literal | ||
} | ||
|
||
// handle | ||
if (state === States.Literal && newState === States.Special) { | ||
if (buffer !== '') { | ||
await options.onLiteral?.(buffer) | ||
buffer = '' | ||
} | ||
} | ||
else if (state === States.Special && newState === States.Literal) { | ||
if (buffer !== '') { | ||
buffer += current | ||
await options.onSpecial?.(buffer) | ||
buffer = '' // Clear buffer when exiting Special state | ||
} | ||
} | ||
|
||
if (state === States.Literal && newState === States.Literal) { | ||
await options.onLiteral?.(current) | ||
buffer = '' | ||
} | ||
else if (state === States.Special && newState === States.Literal) { | ||
buffer = '' | ||
} | ||
else { | ||
buffer += current | ||
} | ||
|
||
state = newState | ||
} | ||
}, | ||
async end() { | ||
if (buffer !== '') { | ||
if (state === States.Literal) { | ||
await options.onLiteral?.(buffer) | ||
} | ||
else { | ||
if (buffer.endsWith('|>')) { | ||
await options.onSpecial?.(buffer) | ||
} | ||
} | ||
} | ||
}, | ||
} | ||
} |
Oops, something went wrong.