est-frame/node_modules/openai/resources/beta/realtime/realtime.d.ts

import { APIResource } from "../../../resource.js";
import * as RealtimeAPI from "./realtime.js";
import * as Shared from "../../shared.js";
import * as SessionsAPI from "./sessions.js";
import { Session as SessionsAPISession, SessionCreateParams, SessionCreateResponse, Sessions } from "./sessions.js";
export declare class Realtime extends APIResource {
    sessions: SessionsAPI.Sessions;
}
/**
 * Returned when a conversation is created. Emitted right after session creation.
 */
export interface ConversationCreatedEvent {
    /**
     * The conversation resource.
     */
    conversation: ConversationCreatedEvent.Conversation;
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The event type, must be `conversation.created`.
     */
    type: 'conversation.created';
}
export declare namespace ConversationCreatedEvent {
    /**
     * The conversation resource.
     */
    interface Conversation {
        /**
         * The unique ID of the conversation.
         */
        id?: string;
        /**
         * The object type, must be `realtime.conversation`.
         */
        object?: 'realtime.conversation';
    }
}
/**
 * The item to add to the conversation.
 */
export interface ConversationItem {
    /**
     * The unique ID of the item, this can be generated by the client to help manage
     * server-side context, but is not required because the server will generate one if
     * not provided.
     */
    id?: string;
    /**
     * The arguments of the function call (for `function_call` items).
     */
    arguments?: string;
    /**
     * The ID of the function call (for `function_call` and `function_call_output`
     * items). If passed on a `function_call_output` item, the server will check that a
     * `function_call` item with the same ID exists in the conversation history.
     */
    call_id?: string;
    /**
     * The content of the message, applicable for `message` items.
     *
     * - Message items of role `system` support only `input_text` content
     * - Message items of role `user` support `input_text` and `input_audio` content
     * - Message items of role `assistant` support `text` content.
     */
    content?: Array<ConversationItemContent>;
    /**
     * The name of the function being called (for `function_call` items).
     */
    name?: string;
    /**
     * Identifier for the API object being returned - always `realtime.item`.
     */
    object?: 'realtime.item';
    /**
     * The output of the function call (for `function_call_output` items).
     */
    output?: string;
    /**
     * The role of the message sender (`user`, `assistant`, `system`), only applicable
     * for `message` items.
     */
    role?: 'user' | 'assistant' | 'system';
    /**
     * The status of the item (`completed`, `incomplete`). These have no effect on the
     * conversation, but are accepted for consistency with the
     * `conversation.item.created` event.
     */
    status?: 'completed' | 'incomplete';
    /**
     * The type of the item (`message`, `function_call`, `function_call_output`).
     */
    type?: 'message' | 'function_call' | 'function_call_output';
}
export interface ConversationItemContent {
    /**
     * ID of a previous conversation item to reference (for `item_reference` content
     * types in `response.create` events). These can reference both client and server
     * created items.
     */
    id?: string;
    /**
     * Base64-encoded audio bytes, used for `input_audio` content type.
     */
    audio?: string;
    /**
     * The text content, used for `input_text` and `text` content types.
     */
    text?: string;
    /**
     * The transcript of the audio, used for `input_audio` content type.
     */
    transcript?: string;
    /**
     * The content type (`input_text`, `input_audio`, `item_reference`, `text`).
     */
    type?: 'input_text' | 'input_audio' | 'item_reference' | 'text';
}
/**
 * Add a new Item to the Conversation's context, including messages, function
 * calls, and function call responses. This event can be used both to populate a
 * "history" of the conversation and to add new items mid-stream, but has the
 * current limitation that it cannot populate assistant audio messages.
 *
 * If successful, the server will respond with a `conversation.item.created` event,
 * otherwise an `error` event will be sent.
 */
export interface ConversationItemCreateEvent {
    /**
     * The item to add to the conversation.
     */
    item: ConversationItem;
    /**
     * The event type, must be `conversation.item.create`.
     */
    type: 'conversation.item.create';
    /**
     * Optional client-generated ID used to identify this event.
     */
    event_id?: string;
    /**
     * The ID of the preceding item after which the new item will be inserted. If not
     * set, the new item will be appended to the end of the conversation. If set to
     * `root`, the new item will be added to the beginning of the conversation. If set
     * to an existing ID, it allows an item to be inserted mid-conversation. If the ID
     * cannot be found, an error will be returned and the item will not be added.
     */
    previous_item_id?: string;
}
/**
 * Returned when a conversation item is created. There are several scenarios that
 * produce this event:
 *
 * - The server is generating a Response, which if successful will produce either
 *   one or two Items, which will be of type `message` (role `assistant`) or type
 *   `function_call`.
 * - The input audio buffer has been committed, either by the client or the server
 *   (in `server_vad` mode). The server will take the content of the input audio
 *   buffer and add it to a new user message Item.
 * - The client has sent a `conversation.item.create` event to add a new Item to
 *   the Conversation.
 */
export interface ConversationItemCreatedEvent {
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The item to add to the conversation.
     */
    item: ConversationItem;
    /**
     * The ID of the preceding item in the Conversation context, allows the client to
     * understand the order of the conversation.
     */
    previous_item_id: string;
    /**
     * The event type, must be `conversation.item.created`.
     */
    type: 'conversation.item.created';
}
/**
 * Send this event when you want to remove any item from the conversation history.
 * The server will respond with a `conversation.item.deleted` event, unless the
 * item does not exist in the conversation history, in which case the server will
 * respond with an error.
 */
export interface ConversationItemDeleteEvent {
    /**
     * The ID of the item to delete.
     */
    item_id: string;
    /**
     * The event type, must be `conversation.item.delete`.
     */
    type: 'conversation.item.delete';
    /**
     * Optional client-generated ID used to identify this event.
     */
    event_id?: string;
}
/**
 * Returned when an item in the conversation is deleted by the client with a
 * `conversation.item.delete` event. This event is used to synchronize the server's
 * understanding of the conversation history with the client's view.
 */
export interface ConversationItemDeletedEvent {
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The ID of the item that was deleted.
     */
    item_id: string;
    /**
     * The event type, must be `conversation.item.deleted`.
     */
    type: 'conversation.item.deleted';
}
/**
 * This event is the output of audio transcription for user audio written to the
 * user audio buffer. Transcription begins when the input audio buffer is committed
 * by the client or server (in `server_vad` mode). Transcription runs
 * asynchronously with Response creation, so this event may come before or after
 * the Response events.
 *
 * Realtime API models accept audio natively, and thus input transcription is a
 * separate process run on a separate ASR (Automatic Speech Recognition) model,
 * currently always `whisper-1`. Thus the transcript may diverge somewhat from the
 * model's interpretation, and should be treated as a rough guide.
 */
export interface ConversationItemInputAudioTranscriptionCompletedEvent {
    /**
     * The index of the content part containing the audio.
     */
    content_index: number;
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The ID of the user message item containing the audio.
     */
    item_id: string;
    /**
     * The transcribed text.
     */
    transcript: string;
    /**
     * The event type, must be `conversation.item.input_audio_transcription.completed`.
     */
    type: 'conversation.item.input_audio_transcription.completed';
}
/**
 * Returned when input audio transcription is configured, and a transcription
 * request for a user message failed. These events are separate from other `error`
 * events so that the client can identify the related Item.
 */
export interface ConversationItemInputAudioTranscriptionFailedEvent {
    /**
     * The index of the content part containing the audio.
     */
    content_index: number;
    /**
     * Details of the transcription error.
     */
    error: ConversationItemInputAudioTranscriptionFailedEvent.Error;
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The ID of the user message item.
     */
    item_id: string;
    /**
     * The event type, must be `conversation.item.input_audio_transcription.failed`.
     */
    type: 'conversation.item.input_audio_transcription.failed';
}
export declare namespace ConversationItemInputAudioTranscriptionFailedEvent {
    /**
     * Details of the transcription error.
     */
    interface Error {
        /**
         * Error code, if any.
         */
        code?: string;
        /**
         * A human-readable error message.
         */
        message?: string;
        /**
         * Parameter related to the error, if any.
         */
        param?: string;
        /**
         * The type of error.
         */
        type?: string;
    }
}
/**
 * Send this event to truncate a previous assistant message’s audio. The server
 * will produce audio faster than realtime, so this event is useful when the user
 * interrupts to truncate audio that has already been sent to the client but not
 * yet played. This will synchronize the server's understanding of the audio with
 * the client's playback.
 *
 * Truncating audio will delete the server-side text transcript to ensure there is
 * not text in the context that hasn't been heard by the user.
 *
 * If successful, the server will respond with a `conversation.item.truncated`
 * event.
 */
export interface ConversationItemTruncateEvent {
    /**
     * Inclusive duration up to which audio is truncated, in milliseconds. If the
     * audio_end_ms is greater than the actual audio duration, the server will respond
     * with an error.
     */
    audio_end_ms: number;
    /**
     * The index of the content part to truncate. Set this to 0.
     */
    content_index: number;
    /**
     * The ID of the assistant message item to truncate. Only assistant message items
     * can be truncated.
     */
    item_id: string;
    /**
     * The event type, must be `conversation.item.truncate`.
     */
    type: 'conversation.item.truncate';
    /**
     * Optional client-generated ID used to identify this event.
     */
    event_id?: string;
}
/**
 * Returned when an earlier assistant audio message item is truncated by the client
 * with a `conversation.item.truncate` event. This event is used to synchronize the
 * server's understanding of the audio with the client's playback.
 *
 * This action will truncate the audio and remove the server-side text transcript
 * to ensure there is no text in the context that hasn't been heard by the user.
 */
export interface ConversationItemTruncatedEvent {
    /**
     * The duration up to which the audio was truncated, in milliseconds.
     */
    audio_end_ms: number;
    /**
     * The index of the content part that was truncated.
     */
    content_index: number;
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The ID of the assistant message item that was truncated.
     */
    item_id: string;
    /**
     * The event type, must be `conversation.item.truncated`.
     */
    type: 'conversation.item.truncated';
}
/**
 * The item to add to the conversation.
 */
export interface ConversationItemWithReference {
    /**
     * For an item of type (`message` | `function_call` | `function_call_output`) this
     * field allows the client to assign the unique ID of the item. It is not required
     * because the server will generate one if not provided.
     *
     * For an item of type `item_reference`, this field is required and is a reference
     * to any item that has previously existed in the conversation.
     */
    id?: string;
    /**
     * The arguments of the function call (for `function_call` items).
     */
    arguments?: string;
    /**
     * The ID of the function call (for `function_call` and `function_call_output`
     * items). If passed on a `function_call_output` item, the server will check that a
     * `function_call` item with the same ID exists in the conversation history.
     */
    call_id?: string;
    /**
     * The content of the message, applicable for `message` items.
     *
     * - Message items of role `system` support only `input_text` content
     * - Message items of role `user` support `input_text` and `input_audio` content
     * - Message items of role `assistant` support `text` content.
     */
    content?: Array<ConversationItemContent>;
    /**
     * The name of the function being called (for `function_call` items).
     */
    name?: string;
    /**
     * Identifier for the API object being returned - always `realtime.item`.
     */
    object?: 'realtime.item';
    /**
     * The output of the function call (for `function_call_output` items).
     */
    output?: string;
    /**
     * The role of the message sender (`user`, `assistant`, `system`), only applicable
     * for `message` items.
     */
    role?: 'user' | 'assistant' | 'system';
    /**
     * The status of the item (`completed`, `incomplete`). These have no effect on the
     * conversation, but are accepted for consistency with the
     * `conversation.item.created` event.
     */
    status?: 'completed' | 'incomplete';
    /**
     * The type of the item (`message`, `function_call`, `function_call_output`,
     * `item_reference`).
     */
    type?: 'message' | 'function_call' | 'function_call_output' | 'item_reference';
}
/**
 * Returned when an error occurs, which could be a client problem or a server
 * problem. Most errors are recoverable and the session will stay open, we
 * recommend to implementors to monitor and log error messages by default.
 */
export interface ErrorEvent {
    /**
     * Details of the error.
     */
    error: ErrorEvent.Error;
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The event type, must be `error`.
     */
    type: 'error';
}
export declare namespace ErrorEvent {
    /**
     * Details of the error.
     */
    interface Error {
        /**
         * A human-readable error message.
         */
        message: string;
        /**
         * The type of error (e.g., "invalid_request_error", "server_error").
         */
        type: string;
        /**
         * Error code, if any.
         */
        code?: string | null;
        /**
         * The event_id of the client event that caused the error, if applicable.
         */
        event_id?: string | null;
        /**
         * Parameter related to the error, if any.
         */
        param?: string | null;
    }
}
/**
 * Send this event to append audio bytes to the input audio buffer. The audio
 * buffer is temporary storage you can write to and later commit. In Server VAD
 * mode, the audio buffer is used to detect speech and the server will decide when
 * to commit. When Server VAD is disabled, you must commit the audio buffer
 * manually.
 *
 * The client may choose how much audio to place in each event up to a maximum of
 * 15 MiB, for example streaming smaller chunks from the client may allow the VAD
 * to be more responsive. Unlike made other client events, the server will not send
 * a confirmation response to this event.
 */
export interface InputAudioBufferAppendEvent {
    /**
     * Base64-encoded audio bytes. This must be in the format specified by the
     * `input_audio_format` field in the session configuration.
     */
    audio: string;
    /**
     * The event type, must be `input_audio_buffer.append`.
     */
    type: 'input_audio_buffer.append';
    /**
     * Optional client-generated ID used to identify this event.
     */
    event_id?: string;
}
/**
 * Send this event to clear the audio bytes in the buffer. The server will respond
 * with an `input_audio_buffer.cleared` event.
 */
export interface InputAudioBufferClearEvent {
    /**
     * The event type, must be `input_audio_buffer.clear`.
     */
    type: 'input_audio_buffer.clear';
    /**
     * Optional client-generated ID used to identify this event.
     */
    event_id?: string;
}
/**
 * Returned when the input audio buffer is cleared by the client with a
 * `input_audio_buffer.clear` event.
 */
export interface InputAudioBufferClearedEvent {
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The event type, must be `input_audio_buffer.cleared`.
     */
    type: 'input_audio_buffer.cleared';
}
/**
 * Send this event to commit the user input audio buffer, which will create a new
 * user message item in the conversation. This event will produce an error if the
 * input audio buffer is empty. When in Server VAD mode, the client does not need
 * to send this event, the server will commit the audio buffer automatically.
 *
 * Committing the input audio buffer will trigger input audio transcription (if
 * enabled in session configuration), but it will not create a response from the
 * model. The server will respond with an `input_audio_buffer.committed` event.
 */
export interface InputAudioBufferCommitEvent {
    /**
     * The event type, must be `input_audio_buffer.commit`.
     */
    type: 'input_audio_buffer.commit';
    /**
     * Optional client-generated ID used to identify this event.
     */
    event_id?: string;
}
/**
 * Returned when an input audio buffer is committed, either by the client or
 * automatically in server VAD mode. The `item_id` property is the ID of the user
 * message item that will be created, thus a `conversation.item.created` event will
 * also be sent to the client.
 */
export interface InputAudioBufferCommittedEvent {
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The ID of the user message item that will be created.
     */
    item_id: string;
    /**
     * The ID of the preceding item after which the new item will be inserted.
     */
    previous_item_id: string;
    /**
     * The event type, must be `input_audio_buffer.committed`.
     */
    type: 'input_audio_buffer.committed';
}
/**
 * Sent by the server when in `server_vad` mode to indicate that speech has been
 * detected in the audio buffer. This can happen any time audio is added to the
 * buffer (unless speech is already detected). The client may want to use this
 * event to interrupt audio playback or provide visual feedback to the user.
 *
 * The client should expect to receive a `input_audio_buffer.speech_stopped` event
 * when speech stops. The `item_id` property is the ID of the user message item
 * that will be created when speech stops and will also be included in the
 * `input_audio_buffer.speech_stopped` event (unless the client manually commits
 * the audio buffer during VAD activation).
 */
export interface InputAudioBufferSpeechStartedEvent {
    /**
     * Milliseconds from the start of all audio written to the buffer during the
     * session when speech was first detected. This will correspond to the beginning of
     * audio sent to the model, and thus includes the `prefix_padding_ms` configured in
     * the Session.
     */
    audio_start_ms: number;
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The ID of the user message item that will be created when speech stops.
     */
    item_id: string;
    /**
     * The event type, must be `input_audio_buffer.speech_started`.
     */
    type: 'input_audio_buffer.speech_started';
}
/**
 * Returned in `server_vad` mode when the server detects the end of speech in the
 * audio buffer. The server will also send an `conversation.item.created` event
 * with the user message item that is created from the audio buffer.
 */
export interface InputAudioBufferSpeechStoppedEvent {
    /**
     * Milliseconds since the session started when speech stopped. This will correspond
     * to the end of audio sent to the model, and thus includes the
     * `min_silence_duration_ms` configured in the Session.
     */
    audio_end_ms: number;
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The ID of the user message item that will be created.
     */
    item_id: string;
    /**
     * The event type, must be `input_audio_buffer.speech_stopped`.
     */
    type: 'input_audio_buffer.speech_stopped';
}
/**
 * Emitted at the beginning of a Response to indicate the updated rate limits. When
 * a Response is created some tokens will be "reserved" for the output tokens, the
 * rate limits shown here reflect that reservation, which is then adjusted
 * accordingly once the Response is completed.
 */
export interface RateLimitsUpdatedEvent {
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * List of rate limit information.
     */
    rate_limits: Array<RateLimitsUpdatedEvent.RateLimit>;
    /**
     * The event type, must be `rate_limits.updated`.
     */
    type: 'rate_limits.updated';
}
export declare namespace RateLimitsUpdatedEvent {
    interface RateLimit {
        /**
         * The maximum allowed value for the rate limit.
         */
        limit?: number;
        /**
         * The name of the rate limit (`requests`, `tokens`).
         */
        name?: 'requests' | 'tokens';
        /**
         * The remaining value before the limit is reached.
         */
        remaining?: number;
        /**
         * Seconds until the rate limit resets.
         */
        reset_seconds?: number;
    }
}
/**
 * All events that the client can send to the Realtime API
 */
export type RealtimeClientEvent = SessionUpdateEvent | InputAudioBufferAppendEvent | InputAudioBufferCommitEvent | InputAudioBufferClearEvent | ConversationItemCreateEvent | ConversationItemTruncateEvent | ConversationItemDeleteEvent | ResponseCreateEvent | ResponseCancelEvent;
/**
 * The response resource.
 */
export interface RealtimeResponse {
    /**
     * The unique ID of the response.
     */
    id?: string;
    /**
     * Which conversation the response is added to, determined by the `conversation`
     * field in the `response.create` event. If `auto`, the response will be added to
     * the default conversation and the value of `conversation_id` will be an id like
     * `conv_1234`. If `none`, the response will not be added to any conversation and
     * the value of `conversation_id` will be `null`. If responses are being triggered
     * by server VAD, the response will be added to the default conversation, thus the
     * `conversation_id` will be an id like `conv_1234`.
     */
    conversation_id?: string;
    /**
     * Maximum number of output tokens for a single assistant response, inclusive of
     * tool calls, that was used in this response.
     */
    max_output_tokens?: number | 'inf';
    /**
     * Set of 16 key-value pairs that can be attached to an object. This can be useful
     * for storing additional information about the object in a structured format, and
     * querying for objects via API or the dashboard.
     *
     * Keys are strings with a maximum length of 64 characters. Values are strings with
     * a maximum length of 512 characters.
     */
    metadata?: Shared.Metadata | null;
    /**
     * The set of modalities the model used to respond. If there are multiple
     * modalities, the model will pick one, for example if `modalities` is
     * `["text", "audio"]`, the model could be responding in either text or audio.
     */
    modalities?: Array<'text' | 'audio'>;
    /**
     * The object type, must be `realtime.response`.
     */
    object?: 'realtime.response';
    /**
     * The list of output items generated by the response.
     */
    output?: Array<ConversationItem>;
    /**
     * The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
     */
    output_audio_format?: 'pcm16' | 'g711_ulaw' | 'g711_alaw';
    /**
     * The final status of the response (`completed`, `cancelled`, `failed`, or
     * `incomplete`).
     */
    status?: 'completed' | 'cancelled' | 'failed' | 'incomplete';
    /**
     * Additional details about the status.
     */
    status_details?: RealtimeResponseStatus;
    /**
     * Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
     */
    temperature?: number;
    /**
     * Usage statistics for the Response, this will correspond to billing. A Realtime
     * API session will maintain a conversation context and append new Items to the
     * Conversation, thus output from previous turns (text and audio tokens) will
     * become the input for later turns.
     */
    usage?: RealtimeResponseUsage;
    /**
     * The voice the model used to respond. Current voice options are `alloy`, `ash`,
     * `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
     */
    voice?: 'alloy' | 'ash' | 'ballad' | 'coral' | 'echo' | 'sage' | 'shimmer' | 'verse';
}
/**
 * Additional details about the status.
 */
export interface RealtimeResponseStatus {
    /**
     * A description of the error that caused the response to fail, populated when the
     * `status` is `failed`.
     */
    error?: RealtimeResponseStatus.Error;
    /**
     * The reason the Response did not complete. For a `cancelled` Response, one of
     * `turn_detected` (the server VAD detected a new start of speech) or
     * `client_cancelled` (the client sent a cancel event). For an `incomplete`
     * Response, one of `max_output_tokens` or `content_filter` (the server-side safety
     * filter activated and cut off the response).
     */
    reason?: 'turn_detected' | 'client_cancelled' | 'max_output_tokens' | 'content_filter';
    /**
     * The type of error that caused the response to fail, corresponding with the
     * `status` field (`completed`, `cancelled`, `incomplete`, `failed`).
     */
    type?: 'completed' | 'cancelled' | 'incomplete' | 'failed';
}
export declare namespace RealtimeResponseStatus {
    /**
     * A description of the error that caused the response to fail, populated when the
     * `status` is `failed`.
     */
    interface Error {
        /**
         * Error code, if any.
         */
        code?: string;
        /**
         * The type of error.
         */
        type?: string;
    }
}
/**
 * Usage statistics for the Response, this will correspond to billing. A Realtime
 * API session will maintain a conversation context and append new Items to the
 * Conversation, thus output from previous turns (text and audio tokens) will
 * become the input for later turns.
 */
export interface RealtimeResponseUsage {
    /**
     * Details about the input tokens used in the Response.
     */
    input_token_details?: RealtimeResponseUsage.InputTokenDetails;
    /**
     * The number of input tokens used in the Response, including text and audio
     * tokens.
     */
    input_tokens?: number;
    /**
     * Details about the output tokens used in the Response.
     */
    output_token_details?: RealtimeResponseUsage.OutputTokenDetails;
    /**
     * The number of output tokens sent in the Response, including text and audio
     * tokens.
     */
    output_tokens?: number;
    /**
     * The total number of tokens in the Response including input and output text and
     * audio tokens.
     */
    total_tokens?: number;
}
export declare namespace RealtimeResponseUsage {
    /**
     * Details about the input tokens used in the Response.
     */
    interface InputTokenDetails {
        /**
         * The number of audio tokens used in the Response.
         */
        audio_tokens?: number;
        /**
         * The number of cached tokens used in the Response.
         */
        cached_tokens?: number;
        /**
         * The number of text tokens used in the Response.
         */
        text_tokens?: number;
    }
    /**
     * Details about the output tokens used in the Response.
     */
    interface OutputTokenDetails {
        /**
         * The number of audio tokens used in the Response.
         */
        audio_tokens?: number;
        /**
         * The number of text tokens used in the Response.
         */
        text_tokens?: number;
    }
}
/**
 * All events that the Realtime API can send back
 */
export type RealtimeServerEvent = ErrorEvent | SessionCreatedEvent | SessionUpdatedEvent | ConversationCreatedEvent | InputAudioBufferCommittedEvent | InputAudioBufferClearedEvent | InputAudioBufferSpeechStartedEvent | InputAudioBufferSpeechStoppedEvent | ConversationItemCreatedEvent | ConversationItemInputAudioTranscriptionCompletedEvent | ConversationItemInputAudioTranscriptionFailedEvent | ConversationItemTruncatedEvent | ConversationItemDeletedEvent | ResponseCreatedEvent | ResponseDoneEvent | ResponseOutputItemAddedEvent | ResponseOutputItemDoneEvent | ResponseContentPartAddedEvent | ResponseContentPartDoneEvent | ResponseTextDeltaEvent | ResponseTextDoneEvent | ResponseAudioTranscriptDeltaEvent | ResponseAudioTranscriptDoneEvent | ResponseAudioDeltaEvent | ResponseAudioDoneEvent | ResponseFunctionCallArgumentsDeltaEvent | ResponseFunctionCallArgumentsDoneEvent | RateLimitsUpdatedEvent;
/**
 * Returned when the model-generated audio is updated.
 */
export interface ResponseAudioDeltaEvent {
    /**
     * The index of the content part in the item's content array.
     */
    content_index: number;
    /**
     * Base64-encoded audio data delta.
     */
    delta: string;
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The ID of the item.
     */
    item_id: string;
    /**
     * The index of the output item in the response.
     */
    output_index: number;
    /**
     * The ID of the response.
     */
    response_id: string;
    /**
     * The event type, must be `response.audio.delta`.
     */
    type: 'response.audio.delta';
}
/**
 * Returned when the model-generated audio is done. Also emitted when a Response is
 * interrupted, incomplete, or cancelled.
 */
export interface ResponseAudioDoneEvent {
    /**
     * The index of the content part in the item's content array.
     */
    content_index: number;
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The ID of the item.
     */
    item_id: string;
    /**
     * The index of the output item in the response.
     */
    output_index: number;
    /**
     * The ID of the response.
     */
    response_id: string;
    /**
     * The event type, must be `response.audio.done`.
     */
    type: 'response.audio.done';
}
/**
 * Returned when the model-generated transcription of audio output is updated.
 */
export interface ResponseAudioTranscriptDeltaEvent {
    /**
     * The index of the content part in the item's content array.
     */
    content_index: number;
    /**
     * The transcript delta.
     */
    delta: string;
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The ID of the item.
     */
    item_id: string;
    /**
     * The index of the output item in the response.
     */
    output_index: number;
    /**
     * The ID of the response.
     */
    response_id: string;
    /**
     * The event type, must be `response.audio_transcript.delta`.
     */
    type: 'response.audio_transcript.delta';
}
/**
 * Returned when the model-generated transcription of audio output is done
 * streaming. Also emitted when a Response is interrupted, incomplete, or
 * cancelled.
 */
export interface ResponseAudioTranscriptDoneEvent {
    /**
     * The index of the content part in the item's content array.
     */
    content_index: number;
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The ID of the item.
     */
    item_id: string;
    /**
     * The index of the output item in the response.
     */
    output_index: number;
    /**
     * The ID of the response.
     */
    response_id: string;
    /**
     * The final transcript of the audio.
     */
    transcript: string;
    /**
     * The event type, must be `response.audio_transcript.done`.
     */
    type: 'response.audio_transcript.done';
}
/**
 * Send this event to cancel an in-progress response. The server will respond with
 * a `response.cancelled` event or an error if there is no response to cancel.
 */
export interface ResponseCancelEvent {
    /**
     * The event type, must be `response.cancel`.
     */
    type: 'response.cancel';
    /**
     * Optional client-generated ID used to identify this event.
     */
    event_id?: string;
    /**
     * A specific response ID to cancel - if not provided, will cancel an in-progress
     * response in the default conversation.
     */
    response_id?: string;
}
/**
 * Returned when a new content part is added to an assistant message item during
 * response generation.
 */
export interface ResponseContentPartAddedEvent {
    /**
     * The index of the content part in the item's content array.
     */
    content_index: number;
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The ID of the item to which the content part was added.
     */
    item_id: string;
    /**
     * The index of the output item in the response.
     */
    output_index: number;
    /**
     * The content part that was added.
     */
    part: ResponseContentPartAddedEvent.Part;
    /**
     * The ID of the response.
     */
    response_id: string;
    /**
     * The event type, must be `response.content_part.added`.
     */
    type: 'response.content_part.added';
}
export declare namespace ResponseContentPartAddedEvent {
    /**
     * The content part that was added.
     */
    interface Part {
        /**
         * Base64-encoded audio data (if type is "audio").
         */
        audio?: string;
        /**
         * The text content (if type is "text").
         */
        text?: string;
        /**
         * The transcript of the audio (if type is "audio").
         */
        transcript?: string;
        /**
         * The content type ("text", "audio").
         */
        type?: 'text' | 'audio';
    }
}
/**
 * Returned when a content part is done streaming in an assistant message item.
 * Also emitted when a Response is interrupted, incomplete, or cancelled.
 */
export interface ResponseContentPartDoneEvent {
    /**
     * The index of the content part in the item's content array.
     */
    content_index: number;
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The ID of the item.
     */
    item_id: string;
    /**
     * The index of the output item in the response.
     */
    output_index: number;
    /**
     * The content part that is done.
     */
    part: ResponseContentPartDoneEvent.Part;
    /**
     * The ID of the response.
     */
    response_id: string;
    /**
     * The event type, must be `response.content_part.done`.
     */
    type: 'response.content_part.done';
}
export declare namespace ResponseContentPartDoneEvent {
    /**
     * The content part that is done.
     */
    interface Part {
        /**
         * Base64-encoded audio data (if type is "audio").
         */
        audio?: string;
        /**
         * The text content (if type is "text").
         */
        text?: string;
        /**
         * The transcript of the audio (if type is "audio").
         */
        transcript?: string;
        /**
         * The content type ("text", "audio").
         */
        type?: 'text' | 'audio';
    }
}
/**
 * This event instructs the server to create a Response, which means triggering
 * model inference. When in Server VAD mode, the server will create Responses
 * automatically.
 *
 * A Response will include at least one Item, and may have two, in which case the
 * second will be a function call. These Items will be appended to the conversation
 * history.
 *
 * The server will respond with a `response.created` event, events for Items and
 * content created, and finally a `response.done` event to indicate the Response is
 * complete.
 *
 * The `response.create` event includes inference configuration like
 * `instructions`, and `temperature`. These fields will override the Session's
 * configuration for this Response only.
 */
export interface ResponseCreateEvent {
    /**
     * The event type, must be `response.create`.
     */
    type: 'response.create';
    /**
     * Optional client-generated ID used to identify this event.
     */
    event_id?: string;
    /**
     * Create a new Realtime response with these parameters
     */
    response?: ResponseCreateEvent.Response;
}
export declare namespace ResponseCreateEvent {
    /**
     * Create a new Realtime response with these parameters
     */
    interface Response {
        /**
         * Controls which conversation the response is added to. Currently supports `auto`
         * and `none`, with `auto` as the default value. The `auto` value means that the
         * contents of the response will be added to the default conversation. Set this to
         * `none` to create an out-of-band response which will not add items to default
         * conversation.
         */
        conversation?: (string & {}) | 'auto' | 'none';
        /**
         * Input items to include in the prompt for the model. Using this field creates a
         * new context for this Response instead of using the default conversation. An
         * empty array `[]` will clear the context for this Response. Note that this can
         * include references to items from the default conversation.
         */
        input?: Array<RealtimeAPI.ConversationItemWithReference>;
        /**
         * The default system instructions (i.e. system message) prepended to model calls.
         * This field allows the client to guide the model on desired responses. The model
         * can be instructed on response content and format, (e.g. "be extremely succinct",
         * "act friendly", "here are examples of good responses") and on audio behavior
         * (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
         * instructions are not guaranteed to be followed by the model, but they provide
         * guidance to the model on the desired behavior.
         *
         * Note that the server sets default instructions which will be used if this field
         * is not set and are visible in the `session.created` event at the start of the
         * session.
         */
        instructions?: string;
        /**
         * Maximum number of output tokens for a single assistant response, inclusive of
         * tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
         * `inf` for the maximum available tokens for a given model. Defaults to `inf`.
         */
        max_response_output_tokens?: number | 'inf';
        /**
         * Set of 16 key-value pairs that can be attached to an object. This can be useful
         * for storing additional information about the object in a structured format, and
         * querying for objects via API or the dashboard.
         *
         * Keys are strings with a maximum length of 64 characters. Values are strings with
         * a maximum length of 512 characters.
         */
        metadata?: Shared.Metadata | null;
        /**
         * The set of modalities the model can respond with. To disable audio, set this to
         * ["text"].
         */
        modalities?: Array<'text' | 'audio'>;
        /**
         * The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
         */
        output_audio_format?: 'pcm16' | 'g711_ulaw' | 'g711_alaw';
        /**
         * Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
         */
        temperature?: number;
        /**
         * How the model chooses tools. Options are `auto`, `none`, `required`, or specify
         * a function, like `{"type": "function", "function": {"name": "my_function"}}`.
         */
        tool_choice?: string;
        /**
         * Tools (functions) available to the model.
         */
        tools?: Array<Response.Tool>;
        /**
         * The voice the model uses to respond. Voice cannot be changed during the session
         * once the model has responded with audio at least once. Current voice options are
         * `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
         */
        voice?: 'alloy' | 'ash' | 'ballad' | 'coral' | 'echo' | 'sage' | 'shimmer' | 'verse';
    }
    namespace Response {
        interface Tool {
            /**
             * The description of the function, including guidance on when and how to call it,
             * and guidance about what to tell the user when calling (if anything).
             */
            description?: string;
            /**
             * The name of the function.
             */
            name?: string;
            /**
             * Parameters of the function in JSON Schema.
             */
            parameters?: unknown;
            /**
             * The type of the tool, i.e. `function`.
             */
            type?: 'function';
        }
    }
}
/**
 * Returned when a new Response is created. The first event of response creation,
 * where the response is in an initial state of `in_progress`.
 */
export interface ResponseCreatedEvent {
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The response resource.
     */
    response: RealtimeResponse;
    /**
     * The event type, must be `response.created`.
     */
    type: 'response.created';
}
/**
 * Returned when a Response is done streaming. Always emitted, no matter the final
 * state. The Response object included in the `response.done` event will include
 * all output Items in the Response but will omit the raw audio data.
 */
export interface ResponseDoneEvent {
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The response resource.
     */
    response: RealtimeResponse;
    /**
     * The event type, must be `response.done`.
     */
    type: 'response.done';
}
/**
 * Returned when the model-generated function call arguments are updated.
 */
export interface ResponseFunctionCallArgumentsDeltaEvent {
    /**
     * The ID of the function call.
     */
    call_id: string;
    /**
     * The arguments delta as a JSON string.
     */
    delta: string;
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The ID of the function call item.
     */
    item_id: string;
    /**
     * The index of the output item in the response.
     */
    output_index: number;
    /**
     * The ID of the response.
     */
    response_id: string;
    /**
     * The event type, must be `response.function_call_arguments.delta`.
     */
    type: 'response.function_call_arguments.delta';
}
/**
 * Returned when the model-generated function call arguments are done streaming.
 * Also emitted when a Response is interrupted, incomplete, or cancelled.
 */
export interface ResponseFunctionCallArgumentsDoneEvent {
    /**
     * The final arguments as a JSON string.
     */
    arguments: string;
    /**
     * The ID of the function call.
     */
    call_id: string;
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The ID of the function call item.
     */
    item_id: string;
    /**
     * The index of the output item in the response.
     */
    output_index: number;
    /**
     * The ID of the response.
     */
    response_id: string;
    /**
     * The event type, must be `response.function_call_arguments.done`.
     */
    type: 'response.function_call_arguments.done';
}
/**
 * Returned when a new Item is created during Response generation.
 */
export interface ResponseOutputItemAddedEvent {
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The item to add to the conversation.
     */
    item: ConversationItem;
    /**
     * The index of the output item in the Response.
     */
    output_index: number;
    /**
     * The ID of the Response to which the item belongs.
     */
    response_id: string;
    /**
     * The event type, must be `response.output_item.added`.
     */
    type: 'response.output_item.added';
}
/**
 * Returned when an Item is done streaming. Also emitted when a Response is
 * interrupted, incomplete, or cancelled.
 */
export interface ResponseOutputItemDoneEvent {
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The item to add to the conversation.
     */
    item: ConversationItem;
    /**
     * The index of the output item in the Response.
     */
    output_index: number;
    /**
     * The ID of the Response to which the item belongs.
     */
    response_id: string;
    /**
     * The event type, must be `response.output_item.done`.
     */
    type: 'response.output_item.done';
}
/**
 * Returned when the text value of a "text" content part is updated.
 */
export interface ResponseTextDeltaEvent {
    /**
     * The index of the content part in the item's content array.
     */
    content_index: number;
    /**
     * The text delta.
     */
    delta: string;
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The ID of the item.
     */
    item_id: string;
    /**
     * The index of the output item in the response.
     */
    output_index: number;
    /**
     * The ID of the response.
     */
    response_id: string;
    /**
     * The event type, must be `response.text.delta`.
     */
    type: 'response.text.delta';
}
/**
 * Returned when the text value of a "text" content part is done streaming. Also
 * emitted when a Response is interrupted, incomplete, or cancelled.
 */
export interface ResponseTextDoneEvent {
    /**
     * The index of the content part in the item's content array.
     */
    content_index: number;
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * The ID of the item.
     */
    item_id: string;
    /**
     * The index of the output item in the response.
     */
    output_index: number;
    /**
     * The ID of the response.
     */
    response_id: string;
    /**
     * The final text content.
     */
    text: string;
    /**
     * The event type, must be `response.text.done`.
     */
    type: 'response.text.done';
}
/**
 * Returned when a Session is created. Emitted automatically when a new connection
 * is established as the first server event. This event will contain the default
 * Session configuration.
 */
export interface SessionCreatedEvent {
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * Realtime session object configuration.
     */
    session: SessionsAPI.Session;
    /**
     * The event type, must be `session.created`.
     */
    type: 'session.created';
}
/**
 * Send this event to update the session’s default configuration. The client may
 * send this event at any time to update the session configuration, and any field
 * may be updated at any time, except for "voice". The server will respond with a
 * `session.updated` event that shows the full effective configuration. Only fields
 * that are present are updated, thus the correct way to clear a field like
 * "instructions" is to pass an empty string.
 */
export interface SessionUpdateEvent {
    /**
     * Realtime session object configuration.
     */
    session: SessionUpdateEvent.Session;
    /**
     * The event type, must be `session.update`.
     */
    type: 'session.update';
    /**
     * Optional client-generated ID used to identify this event.
     */
    event_id?: string;
}
export declare namespace SessionUpdateEvent {
    /**
     * Realtime session object configuration.
     */
    interface Session {
        /**
         * The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
         * `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
         * (mono), and little-endian byte order.
         */
        input_audio_format?: 'pcm16' | 'g711_ulaw' | 'g711_alaw';
        /**
         * Configuration for input audio transcription, defaults to off and can be set to
         * `null` to turn off once on. Input audio transcription is not native to the
         * model, since the model consumes audio directly. Transcription runs
         * asynchronously through
         * [OpenAI Whisper transcription](https://platform.openai.com/docs/api-reference/audio/createTranscription)
         * and should be treated as rough guidance rather than the representation
         * understood by the model. The client can optionally set the language and prompt
         * for transcription, these fields will be passed to the Whisper API.
         */
        input_audio_transcription?: Session.InputAudioTranscription;
        /**
         * The default system instructions (i.e. system message) prepended to model calls.
         * This field allows the client to guide the model on desired responses. The model
         * can be instructed on response content and format, (e.g. "be extremely succinct",
         * "act friendly", "here are examples of good responses") and on audio behavior
         * (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
         * instructions are not guaranteed to be followed by the model, but they provide
         * guidance to the model on the desired behavior.
         *
         * Note that the server sets default instructions which will be used if this field
         * is not set and are visible in the `session.created` event at the start of the
         * session.
         */
        instructions?: string;
        /**
         * Maximum number of output tokens for a single assistant response, inclusive of
         * tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
         * `inf` for the maximum available tokens for a given model. Defaults to `inf`.
         */
        max_response_output_tokens?: number | 'inf';
        /**
         * The set of modalities the model can respond with. To disable audio, set this to
         * ["text"].
         */
        modalities?: Array<'text' | 'audio'>;
        /**
         * The Realtime model used for this session.
         */
        model?: 'gpt-4o-realtime-preview' | 'gpt-4o-realtime-preview-2024-10-01' | 'gpt-4o-realtime-preview-2024-12-17' | 'gpt-4o-mini-realtime-preview' | 'gpt-4o-mini-realtime-preview-2024-12-17';
        /**
         * The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
         * For `pcm16`, output audio is sampled at a rate of 24kHz.
         */
        output_audio_format?: 'pcm16' | 'g711_ulaw' | 'g711_alaw';
        /**
         * Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
         */
        temperature?: number;
        /**
         * How the model chooses tools. Options are `auto`, `none`, `required`, or specify
         * a function.
         */
        tool_choice?: string;
        /**
         * Tools (functions) available to the model.
         */
        tools?: Array<Session.Tool>;
        /**
         * Configuration for turn detection. Can be set to `null` to turn off. Server VAD
         * means that the model will detect the start and end of speech based on audio
         * volume and respond at the end of user speech.
         */
        turn_detection?: Session.TurnDetection;
        /**
         * The voice the model uses to respond. Voice cannot be changed during the session
         * once the model has responded with audio at least once. Current voice options are
         * `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
         */
        voice?: 'alloy' | 'ash' | 'ballad' | 'coral' | 'echo' | 'sage' | 'shimmer' | 'verse';
    }
    namespace Session {
        /**
         * Configuration for input audio transcription, defaults to off and can be set to
         * `null` to turn off once on. Input audio transcription is not native to the
         * model, since the model consumes audio directly. Transcription runs
         * asynchronously through
         * [OpenAI Whisper transcription](https://platform.openai.com/docs/api-reference/audio/createTranscription)
         * and should be treated as rough guidance rather than the representation
         * understood by the model. The client can optionally set the language and prompt
         * for transcription, these fields will be passed to the Whisper API.
         */
        interface InputAudioTranscription {
            /**
             * The language of the input audio. Supplying the input language in
             * [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
             * format will improve accuracy and latency.
             */
            language?: string;
            /**
             * The model to use for transcription, `whisper-1` is the only currently supported
             * model.
             */
            model?: string;
            /**
             * An optional text to guide the model's style or continue a previous audio
             * segment. The
             * [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
             * should match the audio language.
             */
            prompt?: string;
        }
        interface Tool {
            /**
             * The description of the function, including guidance on when and how to call it,
             * and guidance about what to tell the user when calling (if anything).
             */
            description?: string;
            /**
             * The name of the function.
             */
            name?: string;
            /**
             * Parameters of the function in JSON Schema.
             */
            parameters?: unknown;
            /**
             * The type of the tool, i.e. `function`.
             */
            type?: 'function';
        }
        /**
         * Configuration for turn detection. Can be set to `null` to turn off. Server VAD
         * means that the model will detect the start and end of speech based on audio
         * volume and respond at the end of user speech.
         */
        interface TurnDetection {
            /**
             * Whether or not to automatically generate a response when VAD is enabled. `true`
             * by default.
             */
            create_response?: boolean;
            /**
             * Amount of audio to include before the VAD detected speech (in milliseconds).
             * Defaults to 300ms.
             */
            prefix_padding_ms?: number;
            /**
             * Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms.
             * With shorter values the model will respond more quickly, but may jump in on
             * short pauses from the user.
             */
            silence_duration_ms?: number;
            /**
             * Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher
             * threshold will require louder audio to activate the model, and thus might
             * perform better in noisy environments.
             */
            threshold?: number;
            /**
             * Type of turn detection, only `server_vad` is currently supported.
             */
            type?: string;
        }
    }
}
/**
 * Returned when a session is updated with a `session.update` event, unless there
 * is an error.
 */
export interface SessionUpdatedEvent {
    /**
     * The unique ID of the server event.
     */
    event_id: string;
    /**
     * Realtime session object configuration.
     */
    session: SessionsAPI.Session;
    /**
     * The event type, must be `session.updated`.
     */
    type: 'session.updated';
}
export declare namespace Realtime {
    export { Sessions as Sessions, type SessionsAPISession as Session, type SessionCreateResponse as SessionCreateResponse, type SessionCreateParams as SessionCreateParams, };
}
//# sourceMappingURL=realtime.d.ts.map