Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/core/schema/shared.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ export type TranscriptionProvider = z.infer<typeof transcriptionProviderSchema>;
// Provider name choices
export const providerNameSchema = z.enum([
"fal",
"replicate",
"elevenlabs",
"higgsfield",
"groq",
Expand Down
5 changes: 2 additions & 3 deletions src/definitions/actions/captions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import { writeFileSync } from "node:fs";
import { z } from "zod";
import { captionStyleSchema, filePathSchema } from "../../core/schema/shared";
import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
import { ffmpegProvider } from "../../providers/ffmpeg";
import { transcribe } from "./transcribe";

// Input schema with Zod
Expand Down Expand Up @@ -117,7 +116,7 @@ export async function addCaptions(

// Extract audio first
const audioPath = video.replace(/\.[^.]+$/, "_audio.mp3");
await ffmpegProvider.extractAudio(video, audioPath);
await Bun.$`ffmpeg -y -i ${video} -vn -acodec libmp3lame ${audioPath}`.quiet();

// Transcribe
const result = await transcribe({
Expand Down Expand Up @@ -158,7 +157,7 @@ export async function addCaptions(
console.log(`[captions] burning subtitles...`);

// For now, just copy the video (proper implementation would use subtitles filter)
await ffmpegProvider.convertFormat({ input: video, output });
await Bun.$`ffmpeg -y -i ${video} -c copy ${output}`.quiet();

console.log(`[captions] saved to ${output}`);
return output;
Expand Down
35 changes: 23 additions & 12 deletions src/definitions/actions/grok-edit.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
* Edit videos using xAI's Grok Imagine Video model
*/

import { fal } from "@fal-ai/client";
import { z } from "zod";
import { filePathSchema } from "../../core/schema/shared";
import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
import { falProvider } from "../../providers/fal";
import { ensureUrl, logQueueUpdate } from "./utils";

// Resolution enum matching the API spec
const grokEditResolutionSchema = z
Expand Down Expand Up @@ -57,10 +58,15 @@ export const definition: ActionDefinition<typeof schema> = {

console.log("[action/grok-edit] editing video with Grok Imagine");

const result = await falProvider.grokEditVideo({
prompt,
videoUrl: video,
resolution,
const inputUrl = await ensureUrl(video);
const result = await fal.subscribe("xai/grok-imagine-video/edit-video", {
input: {
prompt,
video_url: inputUrl,
resolution: resolution ?? "auto",
},
logs: true,
onQueueUpdate: logQueueUpdate("grok-edit"),
});

const data = result.data as {
Expand Down Expand Up @@ -100,10 +106,15 @@ export async function grokEditVideo(
): Promise<GrokEditOutput> {
console.log("[grok-edit] editing video");

const result = await falProvider.grokEditVideo({
prompt,
videoUrl,
resolution: options.resolution,
const inputUrl = await ensureUrl(videoUrl);
const result = await fal.subscribe("xai/grok-imagine-video/edit-video", {
input: {
prompt,
video_url: inputUrl,
resolution: options.resolution ?? "auto",
},
logs: true,
onQueueUpdate: logQueueUpdate("grok-edit"),
});

const data = result.data as {
Expand All @@ -116,13 +127,13 @@ export async function grokEditVideo(
};
};

const url = data?.video?.url;
if (!url) {
const resultUrl = data?.video?.url;
if (!resultUrl) {
throw new Error("No video URL in result");
}

return {
videoUrl: url,
videoUrl: resultUrl,
width: data.video?.width,
height: data.video?.height,
duration: data.video?.duration,
Expand Down
57 changes: 25 additions & 32 deletions src/definitions/actions/image.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
* Routes to Fal or Higgsfield based on options
*/

import { fal } from "@fal-ai/client";
import { HiggsfieldClient } from "@higgsfield/client";
import { z } from "zod";
import { imageSizeSchema } from "../../core/schema/shared";
import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
import { falProvider } from "../../providers/fal";
import { higgsfieldProvider } from "../../providers/higgsfield";
import { storageProvider } from "../../providers/storage";
import { logQueueUpdate } from "./utils";

// Input schema with Zod
const imageInputSchema = z.object({
Expand Down Expand Up @@ -69,57 +69,50 @@ export interface ImageGenerationResult {

export async function generateWithFal(
prompt: string,
options: { imageSize?: string; upload?: boolean } = {},
options: { imageSize?: string } = {},
): Promise<ImageGenerationResult> {
console.log("[image] generating with fal");

const result = await falProvider.generateImage({
prompt,
imageSize: options.imageSize,
});
type FalResult = { data: { images?: Array<{ url?: string }> } };
const result = (await fal.subscribe("fal-ai/flux-pro/v1.1" as string, {
input: {
prompt,
image_size: options.imageSize || "landscape_4_3",
},
logs: true,
onQueueUpdate: logQueueUpdate("image"),
})) as FalResult;

const imageUrl = (result.data as { images?: Array<{ url?: string }> })
?.images?.[0]?.url;
const imageUrl = result.data?.images?.[0]?.url;
if (!imageUrl) {
throw new Error("No image URL in result");
}

let uploaded: string | undefined;
if (options.upload) {
const timestamp = Date.now();
const objectKey = `images/fal/${timestamp}.png`;
uploaded = await storageProvider.uploadFromUrl(imageUrl, objectKey);
console.log(`[image] uploaded to ${uploaded}`);
}

return { imageUrl, uploaded };
return { imageUrl };
}

export async function generateWithSoul(
prompt: string,
options: { styleId?: string; upload?: boolean } = {},
options: { styleId?: string } = {},
): Promise<ImageGenerationResult> {
console.log("[image] generating with higgsfield soul");

const result = await higgsfieldProvider.generateSoul({
const client = new HiggsfieldClient({
apiKey: process.env.HIGGSFIELD_API_KEY || process.env.HF_API_KEY,
apiSecret: process.env.HIGGSFIELD_SECRET || process.env.HF_API_SECRET,
});

const jobSet = await client.generate("/v1/text2image/soul", {
prompt,
styleId: options.styleId,
...(options.styleId && { style_id: options.styleId }),
});

const imageUrl = result.jobs?.[0]?.results?.raw?.url;
const imageUrl = jobSet?.jobs?.[0]?.results?.raw?.url;
if (!imageUrl) {
throw new Error("No image URL in result");
}

let uploaded: string | undefined;
if (options.upload) {
const timestamp = Date.now();
const objectKey = `images/soul/${timestamp}.png`;
uploaded = await storageProvider.uploadFromUrl(imageUrl, objectKey);
console.log(`[image] uploaded to ${uploaded}`);
}

return { imageUrl, uploaded };
return { imageUrl };
}

export default definition;
48 changes: 17 additions & 31 deletions src/definitions/actions/music.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
*/

import { writeFile } from "node:fs/promises";
import { fal } from "@fal-ai/client";
import { z } from "zod";
import { audioFormatSchema, filePathSchema } from "../../core/schema/shared";
import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
import { falProvider } from "../../providers/fal";
import { storageProvider } from "../../providers/storage";
import { logQueueUpdate } from "./utils";

// Input schema with Zod
const musicInputSchema = z.object({
Expand Down Expand Up @@ -121,17 +121,21 @@ export async function generateMusic(
if (prompt) console.log(`[music] prompt: ${prompt}`);
if (tags) console.log(`[music] tags: ${tags.join(", ")}`);

const result = await falProvider.textToMusic({
prompt,
tags,
lyricsPrompt: lyrics,
seed,
promptStrength,
balanceStrength,
numSongs,
outputFormat: format,
outputBitRate: bitRate,
bpm,
const result = await fal.subscribe("fal-ai/sonauto/bark", {
input: {
prompt,
tags,
lyrics_prompt: lyrics,
seed,
prompt_strength: promptStrength,
balance_strength: balanceStrength,
num_songs: numSongs,
output_format: format,
output_bit_rate: bitRate,
bpm,
},
logs: true,
onQueueUpdate: logQueueUpdate("music"),
});

const musicResult: MusicResult = {
Expand Down Expand Up @@ -181,24 +185,6 @@ export async function generateMusic(
}
}

// Upload to storage if requested
if (upload) {
const uploadUrls: string[] = [];
for (let i = 0; i < musicResult.audio.length; i++) {
const audio = musicResult.audio[i];
if (!audio) continue;

const objectKey = `music/${Date.now()}-${i + 1}.${format || "wav"}`;
const uploadUrl = await storageProvider.uploadFromUrl(
audio.url,
objectKey,
);
uploadUrls.push(uploadUrl);
console.log(`[music] uploaded to ${uploadUrl}`);
}
musicResult.uploadUrls = uploadUrls;
}

return musicResult;
}

Expand Down
77 changes: 48 additions & 29 deletions src/definitions/actions/qwen-angles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
* Generates same scene from different camera angles (azimuth/elevation)
*/

import { fal } from "@fal-ai/client";
import { z } from "zod";
import { filePathSchema } from "../../core/schema/shared";
import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
import { falProvider } from "../../providers/fal";
import { ensureUrl, logQueueUpdate } from "./utils";

// Input schema with Zod
const qwenAnglesInputSchema = z.object({
Expand Down Expand Up @@ -123,20 +124,29 @@ export const definition: ActionDefinition<typeof schema> = {

console.log("[action/qwen-angles] adjusting camera angle");

const result = await falProvider.qwenMultipleAngles({
imageUrl: image,
horizontalAngle,
verticalAngle,
zoom,
additionalPrompt: prompt,
loraScale,
guidanceScale,
numInferenceSteps,
negativePrompt,
seed,
outputFormat,
numImages,
});
const imageUrl = await ensureUrl(image);
const result = await fal.subscribe(
"fal-ai/qwen-image-edit-2511-multiple-angles",
{
input: {
image_urls: [imageUrl],
horizontal_angle: horizontalAngle ?? 0,
vertical_angle: verticalAngle ?? 0,
zoom: zoom ?? 5,
additional_prompt: prompt,
lora_scale: loraScale ?? 1,
guidance_scale: guidanceScale ?? 4.5,
num_inference_steps: numInferenceSteps ?? 28,
acceleration: "regular",
negative_prompt: negativePrompt ?? "",
seed,
output_format: outputFormat ?? "png",
num_images: numImages ?? 1,
},
logs: true,
onQueueUpdate: logQueueUpdate("qwen-angles"),
},
);

const data = result.data as {
images?: Array<{ url: string }>;
Expand Down Expand Up @@ -182,20 +192,29 @@ export async function qwenAngles(
): Promise<QwenAnglesOutput> {
console.log("[qwen-angles] adjusting camera angle");

const result = await falProvider.qwenMultipleAngles({
imageUrl,
horizontalAngle: options.horizontalAngle,
verticalAngle: options.verticalAngle,
zoom: options.zoom,
additionalPrompt: options.prompt,
loraScale: options.loraScale,
guidanceScale: options.guidanceScale,
numInferenceSteps: options.numInferenceSteps,
negativePrompt: options.negativePrompt,
seed: options.seed,
outputFormat: options.outputFormat,
numImages: options.numImages,
});
const url = await ensureUrl(imageUrl);
const result = await fal.subscribe(
"fal-ai/qwen-image-edit-2511-multiple-angles",
{
input: {
image_urls: [url],
horizontal_angle: options.horizontalAngle ?? 0,
vertical_angle: options.verticalAngle ?? 0,
zoom: options.zoom ?? 5,
additional_prompt: options.prompt,
lora_scale: options.loraScale ?? 1,
guidance_scale: options.guidanceScale ?? 4.5,
num_inference_steps: options.numInferenceSteps ?? 28,
acceleration: "regular",
negative_prompt: options.negativePrompt ?? "",
seed: options.seed,
output_format: options.outputFormat ?? "png",
num_images: options.numImages ?? 1,
},
logs: true,
onQueueUpdate: logQueueUpdate("qwen-angles"),
},
);

const data = result.data as {
images?: Array<{ url: string }>;
Expand Down
Loading