diff --git a/.changeset/purple-beds-clean.md b/.changeset/purple-beds-clean.md
new file mode 100644
index 00000000..e746bf37
--- /dev/null
+++ b/.changeset/purple-beds-clean.md
@@ -0,0 +1,7 @@
+---
+"@livekit/agents": patch
+"@livekit/agents-plugin-openai": patch
+"livekit-agents-examples": patch
+---
+
+add ChatContext 
diff --git a/agents/src/llm/chat_context.ts b/agents/src/llm/chat_context.ts
new file mode 100644
index 00000000..0043e2e6
--- /dev/null
+++ b/agents/src/llm/chat_context.ts
@@ -0,0 +1,136 @@
+// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import type { AudioFrame } from '@livekit/rtc-node';
+import type { CallableFunctionResult, FunctionContext } from './function_context.js';
+
+export enum ChatRole {
+  SYSTEM,
+  USER,
+  ASSISTANT,
+  TOOL,
+}
+
+export interface ChatImage {
+  image: string | AudioFrame;
+  inferenceWidth?: number;
+  inferenceHeight?: number;
+  /**
+   * @internal
+   * Used by LLM implementations to store a processed version of the image for later use.
+   */
+  cache: { [id: string | number | symbol]: any };
+}
+
+export interface ChatAudio {
+  frame: AudioFrame | AudioFrame[];
+}
+
+export type ChatContent = string | ChatImage | ChatAudio;
+
+const defaultCreateChatMessage = {
+  text: '',
+  images: [],
+  role: ChatRole.SYSTEM,
+};
+
+export class ChatMessage {
+  readonly role: ChatRole;
+  readonly id?: string;
+  readonly name?: string;
+  readonly content?: ChatContent | ChatContent[];
+  readonly toolCalls?: FunctionContext;
+  readonly toolCallId?: string;
+  readonly toolException?: Error;
+
+  /** @internal */
+  constructor({
+    role,
+    id,
+    name,
+    content,
+    toolCalls,
+    toolCallId,
+    toolException,
+  }: {
+    role: ChatRole;
+    id?: string;
+    name?: string;
+    content?: ChatContent | ChatContent[];
+    toolCalls?: FunctionContext;
+    toolCallId?: string;
+    toolException?: Error;
+  }) {
+    this.role = role;
+    this.id = id;
+    this.name = name;
+    this.content = content;
+    this.toolCalls = toolCalls;
+    this.toolCallId = toolCallId;
+    this.toolException = toolException;
+  }
+
+  static createToolFromFunctionResult(func: CallableFunctionResult): ChatMessage {
+    if (!func.result && !func.error) {
+      throw new TypeError('CallableFunctionResult must include result or error');
+    }
+
+    return new ChatMessage({
+      role: ChatRole.TOOL,
+      name: func.name,
+      content: func.result || `Error: ${func.error}`,
+      toolCallId: func.toolCallId,
+      toolException: func.error,
+    });
+  }
+
+  static createToolCalls(toolCalls: FunctionContext, text = '') {
+    return new ChatMessage({
+      role: ChatRole.ASSISTANT,
+      toolCalls,
+      content: text,
+    });
+  }
+
+  static create(
+    options: Partial<{
+      text?: string;
+      images: ChatImage[];
+      role: ChatRole;
+    }>,
+  ): ChatMessage {
+    const { text, images, role } = { ...defaultCreateChatMessage, ...options };
+
+    if (!images.length) {
+      return new ChatMessage({
+        role: ChatRole.ASSISTANT,
+        content: text,
+      });
+    } else {
+      return new ChatMessage({
+        role,
+        content: [...(text ? [text] : []), ...images],
+      });
+    }
+  }
+
+  /** Returns a structured clone of this message. */
+  copy(): ChatMessage {
+    return structuredClone(this);
+  }
+}
+
+export class ChatContext {
+  messages: ChatMessage[] = [];
+  metadata: { [id: string]: any } = {};
+
+  append(msg: { text?: string; images: ChatImage[]; role: ChatRole }): ChatContext {
+    this.messages.push(ChatMessage.create(msg));
+    return this;
+  }
+
+  /** Returns a structured clone of this context. */
+  copy(): ChatContext {
+    return structuredClone(this);
+  }
+}
diff --git a/agents/src/llm/function_context.ts b/agents/src/llm/function_context.ts
index e6673abb..af193b78 100644
--- a/agents/src/llm/function_context.ts
+++ b/agents/src/llm/function_context.ts
@@ -18,6 +18,14 @@ export interface CallableFunction<P extends z.ZodTypeAny = any, R = any> {
   execute: (args: inferParameters<P>) => PromiseLike<R>;
 }
 
+/** A currently-running function call, called by the LLM. */
+export interface CallableFunctionResult {
+  name: string;
+  toolCallId: string;
+  result?: any;
+  error?: any;
+}
+
 /** An object containing callable functions and their names */
 export type FunctionContext = {
   [name: string]: CallableFunction;
diff --git a/agents/src/llm/index.ts b/agents/src/llm/index.ts
index 80336ecb..a2672fde 100644
--- a/agents/src/llm/index.ts
+++ b/agents/src/llm/index.ts
@@ -1,11 +1,19 @@
 // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
 //
 // SPDX-License-Identifier: Apache-2.0
-import {
+export {
   type CallableFunction,
+  type CallableFunctionResult,
   type FunctionContext,
   type inferParameters,
   oaiParams,
 } from './function_context.js';
 
-export { CallableFunction, FunctionContext, inferParameters, oaiParams };
+export {
+  type ChatImage,
+  type ChatAudio,
+  type ChatContent,
+  ChatRole,
+  ChatMessage,
+  ChatContext,
+} from './chat_context.js';
diff --git a/agents/src/multimodal/multimodal_agent.ts b/agents/src/multimodal/multimodal_agent.ts
index 045ef868..d7cf1395 100644
--- a/agents/src/multimodal/multimodal_agent.ts
+++ b/agents/src/multimodal/multimodal_agent.ts
@@ -64,13 +64,16 @@ export class MultimodalAgent extends EventEmitter {
 
   constructor({
     model,
+    chatCtx,
     fncCtx,
   }: {
     model: RealtimeModel;
-    fncCtx?: llm.FunctionContext | undefined;
+    chatCtx?: llm.ChatContext;
+    fncCtx?: llm.FunctionContext;
   }) {
     super();
     this.model = model;
+    this.#chatCtx = chatCtx;
     this.#fncCtx = fncCtx;
   }
 
@@ -83,6 +86,7 @@ export class MultimodalAgent extends EventEmitter {
   #logger = log();
   #session: RealtimeSession | null = null;
   #fncCtx: llm.FunctionContext | undefined = undefined;
+  #chatCtx: llm.ChatContext | undefined = undefined;
 
   #_started: boolean = false;
   #_pendingFunctionCalls: Set<string> = new Set();
@@ -200,7 +204,7 @@ export class MultimodalAgent extends EventEmitter {
         }
       }
 
-      this.#session = this.model.session({ fncCtx: this.#fncCtx });
+      this.#session = this.model.session({ fncCtx: this.#fncCtx, chatCtx: this.#chatCtx });
       this.#started = true;
 
       // eslint-disable-next-line @typescript-eslint/no-explicit-any
diff --git a/examples/src/minimal_assistant.ts b/examples/src/minimal_assistant.ts
index 765f118a..fcfca73d 100644
--- a/examples/src/minimal_assistant.ts
+++ b/examples/src/minimal_assistant.ts
@@ -1,7 +1,7 @@
 // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
 //
 // SPDX-License-Identifier: Apache-2.0
-import { type JobContext, WorkerOptions, cli, defineAgent, multimodal } from '@livekit/agents';
+import { type JobContext, WorkerOptions, cli, defineAgent, llm, multimodal } from '@livekit/agents';
 import * as openai from '@livekit/agents-plugin-openai';
 import { fileURLToPath } from 'node:url';
 import { z } from 'zod';
@@ -52,11 +52,12 @@ export default defineAgent({
       .start(ctx.room, participant)
       .then((session) => session as openai.realtime.RealtimeSession);
 
-    session.conversation.item.create({
-      type: 'message',
-      role: 'user',
-      content: [{ type: 'input_text', text: 'Say "How can I help you today?"' }],
-    });
+    session.conversation.item.create(
+      llm.ChatMessage.create({
+        role: llm.ChatRole.USER,
+        text: 'Say "How can I help you today?"',
+      }),
+    );
     session.response.create();
   },
 });
diff --git a/plugins/openai/src/realtime/realtime_model.ts b/plugins/openai/src/realtime/realtime_model.ts
index 23b16e5a..af33a185 100644
--- a/plugins/openai/src/realtime/realtime_model.ts
+++ b/plugins/openai/src/realtime/realtime_model.ts
@@ -1,7 +1,15 @@
 // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
 //
 // SPDX-License-Identifier: Apache-2.0
-import { AsyncIterableQueue, Future, Queue, llm, log, multimodal } from '@livekit/agents';
+import {
+  AsyncIterableQueue,
+  Future,
+  Queue,
+  llm,
+  log,
+  mergeFrames,
+  multimodal,
+} from '@livekit/agents';
 import { AudioFrame } from '@livekit/rtc-node';
 import { once } from 'node:events';
 import { WebSocket } from 'ws';
@@ -108,6 +116,7 @@ class InputAudioBuffer {
 
 class ConversationItem {
   #session: RealtimeSession;
+  #logger = log();
 
   constructor(session: RealtimeSession) {
     this.#session = session;
@@ -129,12 +138,127 @@ class ConversationItem {
     });
   }
 
-  create(item: api_proto.ConversationItemCreateContent, previousItemId?: string): void {
-    this.#session.queueMsg({
-      type: 'conversation.item.create',
-      item,
-      previous_item_id: previousItemId,
-    });
+  // create(item: api_proto.ConversationItemCreateContent, previousItemId?: string): void {
+  create(message: llm.ChatMessage, previousItemId?: string): void {
+    if (!message.content) {
+      return;
+    }
+
+    let event: api_proto.ConversationItemCreateEvent;
+
+    if (message.toolCallId) {
+      if (typeof message.content !== 'string') {
+        throw new TypeError('message.content must be a string');
+      }
+
+      event = {
+        type: 'conversation.item.create',
+        previous_item_id: previousItemId,
+        item: {
+          type: 'function_call_output',
+          call_id: message.toolCallId,
+          output: message.content,
+        },
+      };
+    } else {
+      let content = message.content;
+      if (!Array.isArray(content)) {
+        content = [content];
+      }
+
+      if (message.role === llm.ChatRole.USER) {
+        const contents: (api_proto.InputTextContent | api_proto.InputAudioContent)[] = [];
+        for (const c of content) {
+          if (typeof c === 'string') {
+            contents.push({
+              type: 'input_text',
+              text: c,
+            });
+          } else if (
+            // typescript type guard for determining ChatAudio vs ChatImage
+            ((c: llm.ChatAudio | llm.ChatImage): c is llm.ChatAudio => {
+              return (c as llm.ChatAudio).frame !== undefined;
+            })(c)
+          ) {
+            contents.push({
+              type: 'input_audio',
+              audio: Buffer.from(mergeFrames(c.frame).data.buffer).toString('base64'),
+            });
+          }
+        }
+
+        event = {
+          type: 'conversation.item.create',
+          previous_item_id: previousItemId,
+          item: {
+            type: 'message',
+            role: 'user',
+            content: contents,
+          },
+        };
+      } else if (message.role === llm.ChatRole.ASSISTANT) {
+        const contents: api_proto.TextContent[] = [];
+        for (const c of content) {
+          if (typeof c === 'string') {
+            contents.push({
+              type: 'text',
+              text: c,
+            });
+          } else if (
+            // typescript type guard for determining ChatAudio vs ChatImage
+            ((c: llm.ChatAudio | llm.ChatImage): c is llm.ChatAudio => {
+              return (c as llm.ChatAudio).frame !== undefined;
+            })(c)
+          ) {
+            this.#logger.warn('audio content in assistant message is not supported');
+          }
+        }
+
+        event = {
+          type: 'conversation.item.create',
+          previous_item_id: previousItemId,
+          item: {
+            type: 'message',
+            role: 'assistant',
+            content: contents,
+          },
+        };
+      } else if (message.role === llm.ChatRole.SYSTEM) {
+        const contents: api_proto.InputTextContent[] = [];
+        for (const c of content) {
+          if (typeof c === 'string') {
+            contents.push({
+              type: 'input_text',
+              text: c,
+            });
+          } else if (
+            // typescript type guard for determining ChatAudio vs ChatImage
+            ((c: llm.ChatAudio | llm.ChatImage): c is llm.ChatAudio => {
+              return (c as llm.ChatAudio).frame !== undefined;
+            })(c)
+          ) {
+            this.#logger.warn('audio content in system message is not supported');
+          }
+        }
+
+        event = {
+          type: 'conversation.item.create',
+          previous_item_id: previousItemId,
+          item: {
+            type: 'message',
+            role: 'system',
+            content: contents,
+          },
+        };
+      } else {
+        this.#logger
+          .child({ message })
+          .warn('chat message is not supported inside the realtime API');
+        return;
+      }
+    }
+
+    this.#session.queueMsg(event);
   }
 }
 
@@ -302,6 +426,7 @@ export class RealtimeModel extends multimodal.RealtimeModel {
 
   session({
     fncCtx,
+    chatCtx,
     modalities = this.#defaultOpts.modalities,
     instructions = this.#defaultOpts.instructions,
     voice = this.#defaultOpts.voice,
@@ -313,6 +438,7 @@ export class RealtimeModel extends multimodal.RealtimeModel {
     maxResponseOutputTokens = this.#defaultOpts.maxResponseOutputTokens,
   }: {
     fncCtx?: llm.FunctionContext;
+    chatCtx?: llm.ChatContext;
     modalities?: ['text', 'audio'] | ['text'];
     instructions?: string;
     voice?: api_proto.Voice;
@@ -341,7 +467,10 @@ export class RealtimeModel extends multimodal.RealtimeModel {
       entraToken: this.#defaultOpts.entraToken,
     };
 
-    const newSession = new RealtimeSession(opts, fncCtx);
+    const newSession = new RealtimeSession(opts, {
+      chatCtx: chatCtx || new llm.ChatContext(),
+      fncCtx,
+    });
     this.#sessions.push(newSession);
     return newSession;
   }
@@ -352,6 +481,7 @@ export class RealtimeModel extends multimodal.RealtimeModel {
 }
 
 export class RealtimeSession extends multimodal.RealtimeSession {
+  #chatCtx: llm.ChatContext | undefined = undefined;
   #fncCtx: llm.FunctionContext | undefined = undefined;
   #opts: ModelOptions;
   #pendingResponses: { [id: string]: RealtimeResponse } = {};
@@ -363,10 +493,14 @@ export class RealtimeSession extends multimodal.RealtimeSession {
   #closing = true;
   #sendQueue = new Queue<api_proto.ClientEvent>();
 
-  constructor(opts: ModelOptions, fncCtx?: llm.FunctionContext | undefined) {
+  constructor(
+    opts: ModelOptions,
+    { fncCtx, chatCtx }: { fncCtx?: llm.FunctionContext; chatCtx?: llm.ChatContext },
+  ) {
     super();
 
     this.#opts = opts;
+    this.#chatCtx = chatCtx;
     this.#fncCtx = fncCtx;
 
     this.#task = this.#start();
@@ -385,6 +519,10 @@ export class RealtimeSession extends multimodal.RealtimeSession {
     });
   }
 
+  get chatCtx(): llm.ChatContext | undefined {
+    return this.#chatCtx;
+  }
+
   get fncCtx(): llm.FunctionContext | undefined {
     return this.#fncCtx;
   }
@@ -869,11 +1007,11 @@ export class RealtimeSession extends multimodal.RealtimeSession {
             callId: item.call_id,
           });
           this.conversation.item.create(
-            {
-              type: 'function_call_output',
-              call_id: item.call_id,
-              output: content,
-            },
+            llm.ChatMessage.createToolFromFunctionResult({
+              name: item.name,
+              toolCallId: item.call_id,
+              result: content,
+            }),
             output.itemId,
           );
           this.response.create();