diff --git a/agents/package.json b/agents/package.json index c1111739c..0bac15227 100644 --- a/agents/package.json +++ b/agents/package.json @@ -50,7 +50,7 @@ "@bufbuild/protobuf": "^1.10.0", "@ffmpeg-installer/ffmpeg": "^1.1.0", "@livekit/mutex": "^1.1.1", - "@livekit/protocol": "^1.45.3", + "@livekit/protocol": "^1.45.6", "@livekit/typed-emitter": "^3.0.0", "@livekit/throws-transformer": "0.1.8", "@opentelemetry/api": "^1.9.0", diff --git a/agents/src/voice/room_io/_output.ts b/agents/src/voice/room_io/_output.ts index ef8c9ff8a..6d68fa035 100644 --- a/agents/src/voice/room_io/_output.ts +++ b/agents/src/voice/room_io/_output.ts @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: 2025 LiveKit, Inc. // // SPDX-License-Identifier: Apache-2.0 +import { AgentSession as pb } from '@livekit/protocol'; import type { RemoteParticipant } from '@livekit/rtc-node'; import { type AudioFrame, @@ -153,9 +154,6 @@ export class ParticipantTranscriptionOutput extends BaseParticipantTranscription return; } - // When json_format is enabled, serialize each chunk as a protobuf-compatible JSON dict. - // The Python implementation uses `agent_pb.TimedString` + `MessageToDict(preserving_proto_field_name=True)`. - // We emit the same snake_case shape directly (no protobuf runtime dependency on the JS side). // latestText must hold the encoded payload so non-delta flush (FINAL=true) republishes the // same newline-delimited JSON format as the interim chunks. const payload = this.jsonFormat @@ -168,24 +166,15 @@ export class ParticipantTranscriptionOutput extends BaseParticipantTranscription } private encodeJsonChunk(text: string | TimedString): string { - const obj: Record = { - text: isTimedString(text) ? text.text : String(text), - }; - if (isTimedString(text)) { - if (text.startTime !== undefined) { - obj.start_time = text.startTime; - } - if (text.endTime !== undefined) { - obj.end_time = text.endTime; - } - if (text.confidence !== undefined) { - obj.confidence = text.confidence; - } - if (text.startTimeOffset !== undefined) { - obj.start_time_offset = text.startTimeOffset; - } - } - return JSON.stringify(obj) + '\n'; + const isTimed = isTimedString(text); + const message = new pb.TimedString({ + text: isTimed ? text.text : text, + startTime: isTimed ? text.startTime : undefined, + endTime: isTimed ? text.endTime : undefined, + confidence: isTimed ? text.confidence : undefined, + startTimeOffset: isTimed ? text.startTimeOffset : undefined, + }); + return message.toJsonString({ useProtoFieldName: true }) + '\n'; } protected async handleCaptureText(text: string): Promise { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 3f41f05ea..5061514cd 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -116,8 +116,8 @@ importers: specifier: ^1.1.1 version: 1.1.1 '@livekit/protocol': - specifier: ^1.45.3 - version: 1.45.3 + specifier: ^1.45.6 + version: 1.45.6 '@livekit/throws-transformer': specifier: 0.1.8 version: 0.1.8(typescript@5.4.5) @@ -2190,8 +2190,8 @@ packages: cpu: [x64] os: [win32] - '@livekit/protocol@1.45.3': - resolution: {integrity: sha512-WmMxBTsy4dRBqcrswFwUUlgq3Z0nnhOqKR6tX749Rb/PcB1yBMUtrHxZvcsS6qi3/5+86zHeVG+exmu1sZqfJg==} + '@livekit/protocol@1.45.6': + resolution: {integrity: sha512-YPDmrUiVe1EY/q/2bD+Fp+69DWq6LZgeH+G/KEbz07OIVf8hgAYzfb1FgiOdWLRpSj06+SuTmrOY604fWNuD3w==} '@livekit/rtc-ffi-bindings-darwin-arm64@0.12.52-patch.0': resolution: {integrity: sha512-IKUir6goV8yVRR7E2qrAP0JtH7gUyMkO0TG8G+dopO/fkXAsPpSealgI9fLcBJl0zhKK+eGCr741r6xR+xxsVw==} @@ -6463,7 +6463,7 @@ snapshots: '@livekit/noise-cancellation-win32-x64@0.1.9': optional: true - '@livekit/protocol@1.45.3': + '@livekit/protocol@1.45.6': dependencies: '@bufbuild/protobuf': 1.10.1 @@ -8866,14 +8866,14 @@ snapshots: livekit-server-sdk@2.13.3: dependencies: '@bufbuild/protobuf': 1.10.1 - '@livekit/protocol': 1.45.3 + '@livekit/protocol': 1.45.6 camelcase-keys: 9.1.3 jose: 5.2.4 livekit-server-sdk@2.14.1: dependencies: '@bufbuild/protobuf': 1.10.1 - '@livekit/protocol': 1.45.3 + '@livekit/protocol': 1.45.6 camelcase-keys: 9.1.3 jose: 5.2.4