Skip to content

Commit

Permalink
feat(elevenlabs): re-add (#56)
Browse files Browse the repository at this point in the history
  • Loading branch information
nbsp authored Oct 18, 2024
1 parent 76d09b2 commit f82aa10
Show file tree
Hide file tree
Showing 15 changed files with 477 additions and 22 deletions.
6 changes: 6 additions & 0 deletions .changeset/breezy-books-behave.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
"@livekit/agents": patch
"@livekit/agents-plugin-elevenlabs": minor
---

re-add ElevenLabs TTS plugin
10 changes: 5 additions & 5 deletions agents/src/tokenize/basic/basic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
//
// SPDX-License-Identifier: Apache-2.0
import * as tokenizer from '../index.js';
import { BufferedSentenceStream } from '../token_stream.js';
import { BufferedSentenceStream, BufferedWordStream } from '../token_stream.js';
import { hyphenator } from './hyphenator.js';
import { splitParagraphs } from './paragraph.js';
import { splitSentences } from './sentence.js';
import { splitWords } from './word';
import { splitWords } from './word.js';

interface TokenizerOptions {
language: string;
Expand Down Expand Up @@ -41,7 +41,7 @@ export class SentenceTokenizer extends tokenizer.SentenceTokenizer {
}
}

export class WordTokenizer extends tokenizer.SentenceTokenizer {
export class WordTokenizer extends tokenizer.WordTokenizer {
#ignorePunctuation: boolean;

constructor(ignorePunctuation = true) {
Expand All @@ -55,8 +55,8 @@ export class WordTokenizer extends tokenizer.SentenceTokenizer {
}

// eslint-disable-next-line @typescript-eslint/no-unused-vars
stream(language?: string): tokenizer.SentenceStream {
return new BufferedSentenceStream(
stream(language?: string): tokenizer.WordStream {
return new BufferedWordStream(
(text: string) => splitWords(text, this.#ignorePunctuation),
1,
1,
Expand Down
2 changes: 1 addition & 1 deletion agents/src/tokenize/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ export {
WordStream,
} from './tokenizer.js';

export { BufferedSentenceStream, BufferedTokenStream } from './token_stream.js';
export { BufferedSentenceStream, BufferedTokenStream, BufferedWordStream } from './token_stream.js';

export { basic };
20 changes: 15 additions & 5 deletions agents/src/tokenize/token_stream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,14 @@ export class BufferedTokenStream implements AsyncIterableIterator<TokenData> {

while (true) {
const tokens = this.#func(this.#inBuf);
if (tokens.length <= 1) break;
if (tokens.length === 0) break;

if (this.#outBuf) this.#outBuf += ' ';

const tok = tokens.pop();
let tokText = tok! as string;
if (typeof tok! !== 'string') {
tokText = tok![0];
const tok = tokens.shift()!;
let tokText = tok as string;
if (tok.length > 1 && typeof tok[1] === 'number') {
tokText = tok[0];
}

this.#outBuf += tokText;
Expand Down Expand Up @@ -130,6 +130,11 @@ export class BufferedSentenceStream extends SentenceStream {
this.#stream.pushText(text);
}

close() {
super.close();
this.#stream.close();
}

next(): Promise<IteratorResult<TokenData>> {
return this.#stream.next();
}
Expand All @@ -147,6 +152,11 @@ export class BufferedWordStream extends WordStream {
this.#stream.pushText(text);
}

close() {
super.close();
this.#stream.close();
}

next(): Promise<IteratorResult<TokenData>> {
return this.#stream.next();
}
Expand Down
28 changes: 18 additions & 10 deletions agents/src/tokenize/tokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,18 @@ export abstract class SentenceStream {
protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');
protected input = new AsyncIterableQueue<string | typeof SentenceStream.FLUSH_SENTINEL>();
protected queue = new AsyncIterableQueue<TokenData>();
protected closed = false;
#closed = false;

get closed(): boolean {
return this.#closed;
}

/** Push a string of text to the tokenizer */
pushText(text: string) {
if (this.input.closed) {
throw new Error('Input is closed');
}
if (this.closed) {
if (this.#closed) {
throw new Error('Stream is closed');
}
this.input.put(text);
Expand All @@ -46,7 +50,7 @@ export abstract class SentenceStream {
if (this.input.closed) {
throw new Error('Input is closed');
}
if (this.closed) {
if (this.#closed) {
throw new Error('Stream is closed');
}
this.input.put(SentenceStream.FLUSH_SENTINEL);
Expand All @@ -57,7 +61,7 @@ export abstract class SentenceStream {
if (this.input.closed) {
throw new Error('Input is closed');
}
if (this.closed) {
if (this.#closed) {
throw new Error('Stream is closed');
}
this.input.close();
Expand All @@ -71,7 +75,7 @@ export abstract class SentenceStream {
close() {
this.input.close();
this.queue.close();
this.closed = true;
this.#closed = true;
}

[Symbol.asyncIterator](): SentenceStream {
Expand All @@ -92,14 +96,18 @@ export abstract class WordStream {
protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');
protected input = new AsyncIterableQueue<string | typeof WordStream.FLUSH_SENTINEL>();
protected queue = new AsyncIterableQueue<TokenData>();
protected closed = false;
#closed = false;

get closed(): boolean {
return this.#closed;
}

/** Push a string of text to the tokenizer */
pushText(text: string) {
if (this.input.closed) {
throw new Error('Input is closed');
}
if (this.closed) {
if (this.#closed) {
throw new Error('Stream is closed');
}
this.input.put(text);
Expand All @@ -110,7 +118,7 @@ export abstract class WordStream {
if (this.input.closed) {
throw new Error('Input is closed');
}
if (this.closed) {
if (this.#closed) {
throw new Error('Stream is closed');
}
this.input.put(WordStream.FLUSH_SENTINEL);
Expand All @@ -121,7 +129,7 @@ export abstract class WordStream {
if (this.input.closed) {
throw new Error('Input is closed');
}
if (this.closed) {
if (this.#closed) {
throw new Error('Stream is closed');
}
this.input.close();
Expand All @@ -135,7 +143,7 @@ export abstract class WordStream {
close() {
this.input.close();
this.queue.close();
this.closed = true;
this.#closed = true;
}

[Symbol.asyncIterator](): WordStream {
Expand Down
2 changes: 1 addition & 1 deletion agents/src/tts/tts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ export interface SynthesizedAudio {
/** Synthesized audio frame */
frame: AudioFrame;
/** Current segment of the synthesized audio */
deltaText: string;
deltaText?: string;
}

/**
Expand Down
1 change: 1 addition & 0 deletions examples/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
},
"dependencies": {
"@livekit/agents": "workspace:*",
"@livekit/agents-plugin-elevenlabs": "workspace:*",
"@livekit/agents-plugin-openai": "workspace:*",
"@livekit/rtc-node": "^0.10.2",
"zod": "^3.23.8"
Expand Down
47 changes: 47 additions & 0 deletions examples/src/tts.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
//
// SPDX-License-Identifier: Apache-2.0
import { type JobContext, WorkerOptions, cli, defineAgent } from '@livekit/agents';
import { TTS } from '@livekit/agents-plugin-elevenlabs';
import {
AudioSource,
LocalAudioTrack,
RoomEvent,
TrackPublishOptions,
TrackSource,
} from '@livekit/rtc-node';
import { fileURLToPath } from 'node:url';

export default defineAgent({
entry: async (ctx: JobContext) => {
await ctx.connect();

console.log('starting TTS example agent');

const source = new AudioSource(22050, 1);
const track = LocalAudioTrack.createAudioTrack('agent-mic', source);
const options = new TrackPublishOptions();
options.source = TrackSource.SOURCE_MICROPHONE;

await ctx.room.localParticipant?.publishTrack(track, options);
const stream = new TTS().stream();

ctx.room.on(RoomEvent.LocalTrackSubscribed, async () => {
console.log('speaking "Hello!"');
stream.pushText('Hello!');
stream.flush();

await new Promise<void>((resolve) => setTimeout(resolve, 2000));

console.log('speaking "Goodbye!"');
stream.pushText('Goodbye!');
stream.flush();
});

for await (const audio of stream) {
await source.captureFrame(audio.frame);
}
},
});

cli.runApp(new WorkerOptions({ agent: fileURLToPath(import.meta.url) }));
20 changes: 20 additions & 0 deletions plugins/elevenlabs/api-extractor.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/**
* Config file for API Extractor. For more info, please visit: https://api-extractor.com
*/
{
"$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json",

/**
* Optionally specifies another JSON config file that this file extends from. This provides a way for
* standard settings to be shared across multiple projects.
*
* If the path starts with "./" or "../", the path is resolved relative to the folder of the file that contains
* the "extends" field. Otherwise, the first path segment is interpreted as an NPM package name, and will be
* resolved using NodeJS require().
*
* SUPPORTED TOKENS: none
* DEFAULT VALUE: ""
*/
"extends": "../../api-extractor-shared.json",
"mainEntryPointFilePath": "./dist/index.d.ts"
}
27 changes: 27 additions & 0 deletions plugins/elevenlabs/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"name": "@livekit/agents-plugin-elevenlabs",
"version": "0.1.0",
"description": "ElevenLabs plugin for LiveKit Node Agents",
"main": "dist/index.js",
"types": "dist/index.d.ts",
"author": "LiveKit",
"type": "module",
"scripts": {
"build": "tsc",
"clean": "rm -rf dist",
"clean:build": "pnpm clean && pnpm build",
"lint": "eslint -f unix \"src/**/*.{ts,js}\"",
"api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript",
"api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose"
},
"devDependencies": {
"@microsoft/api-extractor": "^7.35.0",
"@types/ws": "^8.5.10",
"typescript": "^5.0.0"
},
"dependencies": {
"@livekit/agents": "workspace:*",
"@livekit/rtc-node": "^0.10.2",
"ws": "^8.16.0"
}
}
5 changes: 5 additions & 0 deletions plugins/elevenlabs/src/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
//
// SPDX-License-Identifier: Apache-2.0

export * from './tts.js';
20 changes: 20 additions & 0 deletions plugins/elevenlabs/src/models.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
//
// SPDX-License-Identifier: Apache-2.0

export type TTSModels =
| 'eleven_monolingual_v1'
| 'eleven_multilingual_v1'
| 'eleven_multilingual_v2'
| 'eleven_turbo_v2'
| 'eleven_turbo_v2_5';

export type TTSEncoding =
// XXX(nbsp): MP3 is not yet supported
// | 'mp3_22050_32'
// | 'mp3_44100_32'
// | 'mp3_44100_64'
// | 'mp3_44100_96'
// | 'mp3_44100_128'
// | 'mp3_44100_192'
'pcm_16000' | 'pcm_22050' | 'pcm_44100';
Loading

0 comments on commit f82aa10

Please sign in to comment.