Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions apps/api/src/routes/activity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ const dailyActivitySchema = z.object({
requestCost: z.number(),
dataStorageCost: z.number(),
imageInputCost: z.number(),
audioInputCost: z.number(),
imageOutputCost: z.number(),
videoOutputCost: z.number(),
cachedInputCost: z.number(),
Expand Down Expand Up @@ -252,6 +253,10 @@ activity.openapi(getActivity, async (c) => {
sql<number>`COALESCE(SUM(${apiKeyHourlyStats.imageInputCost}), 0)`.as(
"imageInputCost",
),
audioInputCost:
sql<number>`COALESCE(SUM(${apiKeyHourlyStats.audioInputCost}), 0)`.as(
"audioInputCost",
),
imageOutputCost:
sql<number>`COALESCE(SUM(${apiKeyHourlyStats.imageOutputCost}), 0)`.as(
"imageOutputCost",
Expand Down Expand Up @@ -409,6 +414,7 @@ activity.openapi(getActivity, async (c) => {
const cacheCount = Number(day.cacheCount);
const discountSavings = Number(day.discountSavings);
const imageInputCost = Number(day.imageInputCost);
const audioInputCost = Number(day.audioInputCost);
const imageOutputCost = Number(day.imageOutputCost);
const videoOutputCost = Number(day.videoOutputCost);
const cachedInputCost = Number(day.cachedInputCost);
Expand Down Expand Up @@ -440,6 +446,7 @@ activity.openapi(getActivity, async (c) => {
requestCost,
dataStorageCost,
imageInputCost,
audioInputCost,
imageOutputCost,
videoOutputCost,
cachedInputCost,
Expand Down Expand Up @@ -521,6 +528,10 @@ activity.openapi(getActivity, async (c) => {
sql<number>`COALESCE(SUM(${projectHourlyStats.imageInputCost}), 0)`.as(
"imageInputCost",
),
audioInputCost:
sql<number>`COALESCE(SUM(${projectHourlyStats.audioInputCost}), 0)`.as(
"audioInputCost",
),
imageOutputCost:
sql<number>`COALESCE(SUM(${projectHourlyStats.imageOutputCost}), 0)`.as(
"imageOutputCost",
Expand Down Expand Up @@ -679,6 +690,7 @@ activity.openapi(getActivity, async (c) => {
const requestCost = Number(day.requestCost);
const dataStorageCost = Number(day.dataStorageCost);
const imageInputCost = Number(day.imageInputCost);
const audioInputCost = Number(day.audioInputCost);
const imageOutputCost = Number(day.imageOutputCost);
const videoOutputCost = Number(day.videoOutputCost);
const cachedInputCost = Number(day.cachedInputCost);
Expand Down Expand Up @@ -711,6 +723,7 @@ activity.openapi(getActivity, async (c) => {
requestCost,
dataStorageCost,
imageInputCost,
audioInputCost,
imageOutputCost,
videoOutputCost,
cachedInputCost,
Expand Down
2 changes: 2 additions & 0 deletions apps/api/src/routes/internal-models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ const modelProviderMappingSchema = z.object({
maxOutput: z.number().nullable(),
streaming: z.boolean(),
vision: z.boolean().nullable(),
audio: z.boolean().nullable(),
reasoning: z.boolean().nullable(),
reasoningOutput: z.string().nullable(),
tools: z.boolean().nullable(),
Expand Down Expand Up @@ -219,6 +220,7 @@ internalModels.openapi(getModelsRoute, async (c) => {
return {
...mapping,
discount: effectiveDiscount,
audio: sharedMapping?.audio ?? null,
imageOutputPrice:
sharedMapping?.imageOutputPrice !== undefined
? String(sharedMapping.imageOutputPrice)
Expand Down
2 changes: 2 additions & 0 deletions apps/api/src/routes/logs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,10 @@ const logSchema = z.object({
cacheWriteInputCost: z.number().nullable().optional(),
webSearchCost: z.number().nullable().optional(),
imageInputTokens: z.string().nullable(),
audioInputTokens: z.string().nullable(),
imageOutputTokens: z.string().nullable(),
imageInputCost: z.number().nullable(),
audioInputCost: z.number().nullable(),
imageOutputCost: z.number().nullable(),
videoOutputCost: z.number().nullable(),
videoDownloadCount: z.number().nullable(),
Expand Down
5 changes: 5 additions & 0 deletions apps/code/src/lib/api/v1.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1014,8 +1014,10 @@ export interface paths {
cacheWriteInputCost?: number | null;
webSearchCost?: number | null;
imageInputTokens: string | null;
audioInputTokens: string | null;
imageOutputTokens: string | null;
imageInputCost: number | null;
audioInputCost: number | null;
imageOutputCost: number | null;
videoOutputCost: number | null;
videoDownloadCount: number | null;
Expand Down Expand Up @@ -1264,8 +1266,10 @@ export interface paths {
cacheWriteInputCost?: number | null;
webSearchCost?: number | null;
imageInputTokens: string | null;
audioInputTokens: string | null;
imageOutputTokens: string | null;
imageInputCost: number | null;
audioInputCost: number | null;
imageOutputCost: number | null;
videoOutputCost: number | null;
videoDownloadCount: number | null;
Expand Down Expand Up @@ -1399,6 +1403,7 @@ export interface paths {
requestCost: number;
dataStorageCost: number;
imageInputCost: number;
audioInputCost: number;
imageOutputCost: number;
videoOutputCost: number;
cachedInputCost: number;
Expand Down
17 changes: 17 additions & 0 deletions apps/gateway/src/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { cors } from "hono/cors";
import { HTTPException } from "hono/http-exception";
import { z } from "zod";

import { UnsupportedAudioFormatError } from "@llmgateway/actions";
import { redisClient } from "@llmgateway/cache";
import { db } from "@llmgateway/db";
import {
Expand Down Expand Up @@ -112,6 +113,22 @@ app.use("*", async (c, next) => {
});

app.onError((error, c) => {
if (error instanceof UnsupportedAudioFormatError) {
logger.warn("Unsupported audio format", {
message: error.message,
format: error.format,
providerTarget: error.providerTarget,
});
return c.json(
{
error: true,
status: 400,
message: error.message,
},
400,
);
}

if (error instanceof HTTPException) {
const status = error.status;

Expand Down
175 changes: 175 additions & 0 deletions apps/gateway/src/audio.e2e.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
import fs from "node:fs";
import path from "node:path";
import { fileURLToPath } from "node:url";

import "dotenv/config";
import { beforeAll, beforeEach, describe, expect, test } from "vitest";

import {
beforeAllHook,
beforeEachHook,
filteredModels,
getTestOptions,
hasOnlyModels,
logMode,
matchesTestModel,
specifiedModels,
} from "@/chat-helpers.e2e.js";

import { db, tables } from "@llmgateway/db";

import { app } from "./app.js";

import type { ProviderModelMapping } from "@llmgateway/models";

const AUDIO_PROJECT_ID = "audio-test-project-id";
const AUDIO_API_KEY_ID = "audio-test-api-key-id";
const AUDIO_API_KEY_TOKEN = "real-token-audio";

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const FIXTURE_AUDIO_PATH = path.join(
__dirname,
"test-fixtures",
"test-audio.wav",
);

function readFixtureAudioBase64(): string {
const bytes = fs.readFileSync(FIXTURE_AUDIO_PATH);
return bytes.toString("base64");
}

const audioTestCases = filteredModels
.filter((model) => {
if (hasOnlyModels) {
return model.providers.some(
(provider: ProviderModelMapping) => provider.test === "only",
);
}
return true;
})
.flatMap((model) => {
const cases: { model: string; provider: ProviderModelMapping }[] = [];

for (const provider of model.providers as ProviderModelMapping[]) {
if (provider.inputAudioPrice === undefined) {
continue;
}
if (provider.deactivatedAt && new Date() > provider.deactivatedAt) {
continue;
}
if (provider.deprecatedAt && new Date() > provider.deprecatedAt) {
continue;
}

if (specifiedModels) {
if (!matchesTestModel(provider.providerId, model.id, provider.region)) {
continue;
}
} else {
if (provider.test === "skip") {
continue;
}
}

if (hasOnlyModels && provider.test !== "only") {
continue;
}

cases.push({
model: `${provider.providerId}/${provider.region ? provider.modelName : model.id}`,
provider,
});
}

return cases;
});

async function audioBeforeAllHook() {
await beforeAllHook();
await db
.insert(tables.project)
.values({
id: AUDIO_PROJECT_ID,
name: "Audio E2E Project",
organizationId: "org-id",
mode: "credits",
})
.onConflictDoUpdate({
target: tables.project.id,
set: { mode: "credits", organizationId: "org-id" },
});
await db
.insert(tables.apiKey)
.values({
id: AUDIO_API_KEY_ID,
token: AUDIO_API_KEY_TOKEN,
projectId: AUDIO_PROJECT_ID,
description: "Audio E2E API Key",
createdBy: "user-id",
})
.onConflictDoNothing();
}

describe("e2e audio input", getTestOptions(), () => {
beforeAll(audioBeforeAllHook);
beforeEach(beforeEachHook);

test("empty", () => {
expect(true).toBe(true);
});

test.each(audioTestCases)(
"/v1/chat/completions accepts input_audio for $model",
{ ...getTestOptions(), timeout: 120_000 },
async ({ model, provider }) => {
const audioBase64 = readFixtureAudioBase64();
const res = await app.request("/v1/chat/completions", {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${AUDIO_API_KEY_TOKEN}`,
},
body: JSON.stringify({
model,
messages: [
{
role: "user",
content: [
{
type: "text",
text: "What do you hear in this audio? Reply in one short sentence.",
},
{
type: "input_audio",
input_audio: { data: audioBase64, format: "wav" },
},
],
},
],
}),
});

const json = await res.json();
if (logMode) {
console.log(
"audio chat.completions response",
model,
JSON.stringify(json).slice(0, 800),
);
}
expect(res.status).toBe(200);
expect(json.choices?.[0]?.message?.content).toBeTruthy();

const audioTokens = json.usage?.prompt_tokens_details?.audio_tokens;
expect(typeof audioTokens).toBe("number");
expect(audioTokens).toBeGreaterThan(0);

const audioInputCost = json.usage?.cost_details?.audio_input_cost;
expect(typeof audioInputCost).toBe("number");

const expected =
audioTokens * (provider.inputAudioPrice ?? provider.inputPrice ?? 0);
expect(audioInputCost).toBeCloseTo(expected, 8);
},
);
});
Loading
Loading