Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions livekit-agents/livekit/agents/llm/realtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ class RealtimeCapabilities:
audio_output: bool
manual_function_calls: bool
per_response_tool_choice: bool
supports_client_content: bool = True
"""Whether the model supports send_client_content for mid-session chat context
updates. When False, chat turns should be sent via send_realtime_input and tool
responses via send_tool_response independently."""


class RealtimeError(Exception):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,12 @@ def __init__(
if not is_given(output_audio_transcription):
output_audio_transcription = types.AudioTranscriptionConfig()

if not is_given(model):
if vertexai:
model = "gemini-live-2.5-flash-native-audio"
else:
model = "gemini-2.5-flash-native-audio-preview-12-2025"

server_turn_detection = True
if (
is_given(realtime_input_config)
Expand All @@ -276,6 +282,10 @@ def __init__(
server_turn_detection = False
modalities = modalities if is_given(modalities) else [types.Modality.AUDIO]

# Gemini 3.1+ does not support send_client_content mid-session.
# Tool responses and chat turns must use their own dedicated API methods.
supports_client_content = "3.1" not in model

super().__init__(
capabilities=llm.RealtimeCapabilities(
message_truncation=False,
Expand All @@ -285,15 +295,10 @@ def __init__(
audio_output=types.Modality.AUDIO in modalities,
manual_function_calls=False,
per_response_tool_choice=False,
supports_client_content=supports_client_content,
)
)

if not is_given(model):
if vertexai:
model = "gemini-live-2.5-flash-native-audio"
else:
model = "gemini-2.5-flash-native-audio-preview-12-2025"

gemini_api_key = api_key if is_given(api_key) else os.environ.get("GOOGLE_API_KEY")
gcp_project = project if is_given(project) else os.environ.get("GOOGLE_CLOUD_PROJECT")
gcp_location: str | None = (
Expand Down Expand Up @@ -579,17 +584,6 @@ async def update_instructions(self, instructions: str) -> None:
)

async def update_chat_ctx(self, chat_ctx: llm.ChatContext) -> None:
if self._opts.model == "gemini-3.1-flash-live-preview":
logger.warning(
"update_chat_ctx is not compatible with 'gemini-3.1-flash-live-preview' and will be ignored."
)
self._chat_ctx = chat_ctx.copy(
exclude_handoff=True,
exclude_instructions=True,
exclude_empty_message=True,
exclude_config_update=True,
)
return
# Check for system/developer messages that will be dropped
system_msg_count = sum(
1 for msg in chat_ctx.messages() if msg.role in ("system", "developer")
Expand Down Expand Up @@ -625,21 +619,30 @@ async def update_chat_ctx(self, chat_ctx: llm.ChatContext) -> None:
append_ctx.items.append(item)

if append_ctx.items:
turns_dict, _ = append_ctx.copy(exclude_function_call=True).to_provider_format(
format="google", inject_dummy_user_message=False
)
# we are not generating, and do not need to inject
turns = [types.Content.model_validate(turn) for turn in turns_dict]
# Tool responses always go through send_tool_response regardless of model
tool_results = get_tool_results_for_realtime(
append_ctx,
vertexai=self._opts.vertexai,
tool_response_scheduling=self._opts.tool_response_scheduling,
)
if turns:
self._send_client_event(types.LiveClientContent(turns=turns, turn_complete=False))
if tool_results:
self._send_client_event(tool_results)

# Chat turns are routed based on model capabilities
turns_dict, _ = append_ctx.copy(exclude_function_call=True).to_provider_format(
format="google", inject_dummy_user_message=False
)
turns = [types.Content.model_validate(turn) for turn in turns_dict]
if turns:
if self._realtime_model.capabilities.supports_client_content:
self._send_client_event(
types.LiveClientContent(turns=turns, turn_complete=False)
)
else:
logger.debug(
"send_client_content not supported mid-session, skipping chat turn update"
)

# since we don't have a view of the history on the server side, we'll assume
# the current state is accurate. this isn't perfect because removals aren't done.
self._chat_ctx = chat_ctx
Expand Down
Loading