Skip to content

Commit

Permalink
feat(llm): add ChatContext (#120)
Browse files Browse the repository at this point in the history
  • Loading branch information
nbsp authored Nov 2, 2024
1 parent 9e2506e commit 7bdf719
Show file tree
Hide file tree
Showing 7 changed files with 326 additions and 24 deletions.
7 changes: 7 additions & 0 deletions .changeset/purple-beds-clean.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
"@livekit/agents": patch
"@livekit/agents-plugin-openai": patch
"livekit-agents-examples": patch
---

add ChatContext
136 changes: 136 additions & 0 deletions agents/src/llm/chat_context.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
//
// SPDX-License-Identifier: Apache-2.0
import type { AudioFrame } from '@livekit/rtc-node';
import type { CallableFunctionResult, FunctionContext } from './function_context.js';

export enum ChatRole {
SYSTEM,
USER,
ASSISTANT,
TOOL,
}

export interface ChatImage {
image: string | AudioFrame;
inferenceWidth?: number;
inferenceHeight?: number;
/**
* @internal
* Used by LLM implementations to store a processed version of the image for later use.
*/
cache: { [id: string | number | symbol]: any };
}

export interface ChatAudio {
frame: AudioFrame | AudioFrame[];
}

export type ChatContent = string | ChatImage | ChatAudio;

const defaultCreateChatMessage = {
text: '',
images: [],
role: ChatRole.SYSTEM,
};

export class ChatMessage {
readonly role: ChatRole;
readonly id?: string;
readonly name?: string;
readonly content?: ChatContent | ChatContent[];
readonly toolCalls?: FunctionContext;
readonly toolCallId?: string;
readonly toolException?: Error;

/** @internal */
constructor({
role,
id,
name,
content,
toolCalls,
toolCallId,
toolException,
}: {
role: ChatRole;
id?: string;
name?: string;
content?: ChatContent | ChatContent[];
toolCalls?: FunctionContext;
toolCallId?: string;
toolException?: Error;
}) {
this.role = role;
this.id = id;
this.name = name;
this.content = content;
this.toolCalls = toolCalls;
this.toolCallId = toolCallId;
this.toolException = toolException;
}

static createToolFromFunctionResult(func: CallableFunctionResult): ChatMessage {
if (!func.result && !func.error) {
throw new TypeError('CallableFunctionResult must include result or error');
}

return new ChatMessage({
role: ChatRole.TOOL,
name: func.name,
content: func.result || `Error: ${func.error}`,
toolCallId: func.toolCallId,
toolException: func.error,
});
}

static createToolCalls(toolCalls: FunctionContext, text = '') {
return new ChatMessage({
role: ChatRole.ASSISTANT,
toolCalls,
content: text,
});
}

static create(
options: Partial<{
text?: string;
images: ChatImage[];
role: ChatRole;
}>,
): ChatMessage {
const { text, images, role } = { ...defaultCreateChatMessage, ...options };

if (!images.length) {
return new ChatMessage({
role: ChatRole.ASSISTANT,
content: text,
});
} else {
return new ChatMessage({
role,
content: [...(text ? [text] : []), ...images],
});
}
}

/** Returns a structured clone of this message. */
copy(): ChatMessage {
return structuredClone(this);
}
}

export class ChatContext {
messages: ChatMessage[] = [];
metadata: { [id: string]: any } = {};

append(msg: { text?: string; images: ChatImage[]; role: ChatRole }): ChatContext {
this.messages.push(ChatMessage.create(msg));
return this;
}

/** Returns a structured clone of this context. */
copy(): ChatContext {
return structuredClone(this);
}
}
8 changes: 8 additions & 0 deletions agents/src/llm/function_context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@ export interface CallableFunction<P extends z.ZodTypeAny = any, R = any> {
execute: (args: inferParameters<P>) => PromiseLike<R>;
}

/** A currently-running function call, called by the LLM. */
export interface CallableFunctionResult {
name: string;
toolCallId: string;
result?: any;
error?: any;
}

/** An object containing callable functions and their names */
export type FunctionContext = {
[name: string]: CallableFunction;
Expand Down
12 changes: 10 additions & 2 deletions agents/src/llm/index.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
//
// SPDX-License-Identifier: Apache-2.0
import {
export {
type CallableFunction,
type CallableFunctionResult,
type FunctionContext,
type inferParameters,
oaiParams,
} from './function_context.js';

export { CallableFunction, FunctionContext, inferParameters, oaiParams };
export {
type ChatImage,
type ChatAudio,
type ChatContent,
ChatRole,
ChatMessage,
ChatContext,
} from './chat_context.js';
8 changes: 6 additions & 2 deletions agents/src/multimodal/multimodal_agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,16 @@ export class MultimodalAgent extends EventEmitter {

constructor({
model,
chatCtx,
fncCtx,
}: {
model: RealtimeModel;
fncCtx?: llm.FunctionContext | undefined;
chatCtx?: llm.ChatContext;
fncCtx?: llm.FunctionContext;
}) {
super();
this.model = model;
this.#chatCtx = chatCtx;
this.#fncCtx = fncCtx;
}

Expand All @@ -83,6 +86,7 @@ export class MultimodalAgent extends EventEmitter {
#logger = log();
#session: RealtimeSession | null = null;
#fncCtx: llm.FunctionContext | undefined = undefined;
#chatCtx: llm.ChatContext | undefined = undefined;

#_started: boolean = false;
#_pendingFunctionCalls: Set<string> = new Set();
Expand Down Expand Up @@ -200,7 +204,7 @@ export class MultimodalAgent extends EventEmitter {
}
}

this.#session = this.model.session({ fncCtx: this.#fncCtx });
this.#session = this.model.session({ fncCtx: this.#fncCtx, chatCtx: this.#chatCtx });
this.#started = true;

// eslint-disable-next-line @typescript-eslint/no-explicit-any
Expand Down
13 changes: 7 additions & 6 deletions examples/src/minimal_assistant.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
//
// SPDX-License-Identifier: Apache-2.0
import { type JobContext, WorkerOptions, cli, defineAgent, multimodal } from '@livekit/agents';
import { type JobContext, WorkerOptions, cli, defineAgent, llm, multimodal } from '@livekit/agents';
import * as openai from '@livekit/agents-plugin-openai';
import { fileURLToPath } from 'node:url';
import { z } from 'zod';
Expand Down Expand Up @@ -52,11 +52,12 @@ export default defineAgent({
.start(ctx.room, participant)
.then((session) => session as openai.realtime.RealtimeSession);

session.conversation.item.create({
type: 'message',
role: 'user',
content: [{ type: 'input_text', text: 'Say "How can I help you today?"' }],
});
session.conversation.item.create(
llm.ChatMessage.create({
role: llm.ChatRole.USER,
text: 'Say "How can I help you today?"',
}),
);
session.response.create();
},
});
Expand Down
Loading

0 comments on commit 7bdf719

Please sign in to comment.