-
Notifications
You must be signed in to change notification settings - Fork 133
feat: audio support for google models with integration into playground #2213
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 1 commit
Commits
Show all changes
15 commits
Select commit
Hold shift + click to select a range
2dc042a
feat: support Google Gemini audio input
RATCHAW 9484c9d
fix(gateway): audio input routing and token tracking
RATCHAW beecbd3
Merge branch 'main' into feat/google-audio-input
RATCHAW 6a67b37
feat(api): add audio input cost tracking to activity and logs
RATCHAW 7eafe27
fix(ui): add audioInputCost to activity types
RATCHAW 916cf91
Merge branch 'main' into feat/google-audio-input
smakosh 1b7756b
feat(audio): enhance audio support for playground
RATCHAW 1a01806
Merge branch 'main' of https://github.com/theopenco/llmgateway into f…
RATCHAW b52f7a4
feat(audio): add audio support to chat messages
RATCHAW 2e2a144
feat(prompt-input): add audio attachment UI with Music2Icon
RATCHAW e29b066
chore(api): regenerate typed clients with audios field
RATCHAW ad9de7d
chore(db): squash audio columns into single migration
RATCHAW fada058
Merge branch 'main' into feat/google-audio-input
RATCHAW 421ca93
chore(db): align migrations with origin/main
steebchen 085b587
Merge remote-tracking branch 'origin/main' into feat/google-audio-input
steebchen File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,176 @@ | ||
| import fs from "node:fs"; | ||
| import path from "node:path"; | ||
| import { fileURLToPath } from "node:url"; | ||
|
|
||
| import "dotenv/config"; | ||
| import { beforeAll, beforeEach, describe, expect, test } from "vitest"; | ||
|
|
||
| import { | ||
| beforeAllHook, | ||
| beforeEachHook, | ||
| filteredModels, | ||
| getConcurrentTestOptions, | ||
| getTestOptions, | ||
| hasOnlyModels, | ||
| logMode, | ||
| matchesTestModel, | ||
| specifiedModels, | ||
| } from "@/chat-helpers.e2e.js"; | ||
|
|
||
| import { db, tables } from "@llmgateway/db"; | ||
|
|
||
| import { app } from "./app.js"; | ||
|
|
||
| import type { ProviderModelMapping } from "@llmgateway/models"; | ||
|
|
||
| const AUDIO_PROJECT_ID = "audio-test-project-id"; | ||
| const AUDIO_API_KEY_ID = "audio-test-api-key-id"; | ||
| const AUDIO_API_KEY_TOKEN = "real-token-audio"; | ||
|
|
||
| const __dirname = path.dirname(fileURLToPath(import.meta.url)); | ||
| const FIXTURE_AUDIO_PATH = path.join( | ||
| __dirname, | ||
| "test-fixtures", | ||
| "test-audio.wav", | ||
| ); | ||
|
|
||
| function readFixtureAudioBase64(): string { | ||
| const bytes = fs.readFileSync(FIXTURE_AUDIO_PATH); | ||
| return bytes.toString("base64"); | ||
| } | ||
|
|
||
| const audioTestCases = filteredModels | ||
| .filter((model) => { | ||
| if (hasOnlyModels) { | ||
| return model.providers.some( | ||
| (provider: ProviderModelMapping) => provider.test === "only", | ||
| ); | ||
| } | ||
| return true; | ||
| }) | ||
| .flatMap((model) => { | ||
| const cases: { model: string; provider: ProviderModelMapping }[] = []; | ||
|
|
||
| for (const provider of model.providers as ProviderModelMapping[]) { | ||
| if (provider.inputAudioPrice === undefined) { | ||
| continue; | ||
| } | ||
| if (provider.deactivatedAt && new Date() > provider.deactivatedAt) { | ||
| continue; | ||
| } | ||
| if (provider.deprecatedAt && new Date() > provider.deprecatedAt) { | ||
| continue; | ||
| } | ||
|
|
||
| if (specifiedModels) { | ||
| if (!matchesTestModel(provider.providerId, model.id, provider.region)) { | ||
| continue; | ||
| } | ||
| } else { | ||
| if (provider.test === "skip") { | ||
| continue; | ||
| } | ||
| } | ||
|
|
||
| if (hasOnlyModels && provider.test !== "only") { | ||
| continue; | ||
| } | ||
|
|
||
| cases.push({ | ||
| model: `${provider.providerId}/${provider.region ? provider.modelName : model.id}`, | ||
| provider, | ||
| }); | ||
| } | ||
|
|
||
| return cases; | ||
| }); | ||
|
|
||
| async function audioBeforeAllHook() { | ||
| await beforeAllHook(); | ||
| await db | ||
| .insert(tables.project) | ||
| .values({ | ||
| id: AUDIO_PROJECT_ID, | ||
| name: "Audio E2E Project", | ||
| organizationId: "org-id", | ||
| mode: "credits", | ||
| }) | ||
| .onConflictDoUpdate({ | ||
| target: tables.project.id, | ||
| set: { mode: "credits", organizationId: "org-id" }, | ||
| }); | ||
| await db | ||
| .insert(tables.apiKey) | ||
| .values({ | ||
| id: AUDIO_API_KEY_ID, | ||
| token: AUDIO_API_KEY_TOKEN, | ||
| projectId: AUDIO_PROJECT_ID, | ||
| description: "Audio E2E API Key", | ||
| createdBy: "user-id", | ||
| }) | ||
| .onConflictDoNothing(); | ||
| } | ||
|
|
||
| describe("e2e audio input", getConcurrentTestOptions(), () => { | ||
| beforeAll(audioBeforeAllHook); | ||
| beforeEach(beforeEachHook); | ||
|
|
||
| test("empty", () => { | ||
| expect(true).toBe(true); | ||
| }); | ||
|
|
||
| test.each(audioTestCases)( | ||
| "/v1/chat/completions accepts input_audio for $model", | ||
| { ...getTestOptions(), timeout: 120_000 }, | ||
| async ({ model, provider }) => { | ||
| const audioBase64 = readFixtureAudioBase64(); | ||
| const res = await app.request("/v1/chat/completions", { | ||
| method: "POST", | ||
| headers: { | ||
| "Content-Type": "application/json", | ||
| Authorization: `Bearer ${AUDIO_API_KEY_TOKEN}`, | ||
| }, | ||
| body: JSON.stringify({ | ||
| model, | ||
| messages: [ | ||
| { | ||
| role: "user", | ||
| content: [ | ||
| { | ||
| type: "text", | ||
| text: "What do you hear in this audio? Reply in one short sentence.", | ||
| }, | ||
| { | ||
| type: "input_audio", | ||
| input_audio: { data: audioBase64, format: "wav" }, | ||
| }, | ||
| ], | ||
| }, | ||
| ], | ||
| }), | ||
| }); | ||
|
|
||
| const json = await res.json(); | ||
| if (logMode) { | ||
| console.log( | ||
| "audio chat.completions response", | ||
| model, | ||
| JSON.stringify(json).slice(0, 800), | ||
| ); | ||
| } | ||
| expect(res.status).toBe(200); | ||
| expect(json.choices?.[0]?.message?.content).toBeTruthy(); | ||
|
|
||
| const audioTokens = json.usage?.prompt_tokens_details?.audio_tokens; | ||
| expect(typeof audioTokens).toBe("number"); | ||
| expect(audioTokens).toBeGreaterThan(0); | ||
|
|
||
| const audioInputCost = json.usage?.cost_details?.audio_input_cost; | ||
| expect(typeof audioInputCost).toBe("number"); | ||
|
|
||
| const expected = | ||
| audioTokens * (provider.inputAudioPrice ?? provider.inputPrice ?? 0); | ||
| expect(audioInputCost).toBeCloseTo(expected, 8); | ||
| }, | ||
| ); | ||
| }); | ||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.