diff --git a/.gitmodules b/.gitmodules index c263dbe06f80..c63d0607eb31 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "docs/themes/hugo-theme-relearn"] path = docs/themes/hugo-theme-relearn url = https://github.com/McShelby/hugo-theme-relearn.git +[submodule "backend/rust/kokoros/sources/Kokoros"] + path = backend/rust/kokoros/sources/Kokoros + url = https://github.com/lucasjinreal/Kokoros diff --git a/Makefile b/Makefile index 61bceb7e36fe..e0f965df6f82 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # Disable parallel execution for backend builds -.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization +.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros GOCMD=go GOTEST=$(GOCMD) test @@ -587,6 +587,9 @@ BACKEND_MLX_DISTRIBUTED = mlx-distributed|python|./|false|true BACKEND_TRL = trl|python|.|false|true BACKEND_LLAMA_CPP_QUANTIZATION = llama-cpp-quantization|python|.|false|true +# Rust backends +BACKEND_KOKOROS = kokoros|rust|.|false|true + # Helper function to build docker image for a backend # Usage: $(call docker-build-backend,BACKEND_NAME,DOCKERFILE_TYPE,BUILD_CONTEXT,PROGRESS_FLAG,NEEDS_BACKEND_ARG) define docker-build-backend @@ -645,12 +648,13 @@ $(eval $(call generate-docker-build-target,$(BACKEND_ACESTEP_CPP))) $(eval $(call generate-docker-build-target,$(BACKEND_MLX_DISTRIBUTED))) $(eval $(call generate-docker-build-target,$(BACKEND_TRL))) $(eval $(call generate-docker-build-target,$(BACKEND_LLAMA_CPP_QUANTIZATION))) +$(eval $(call generate-docker-build-target,$(BACKEND_KOKOROS))) # Pattern rule for docker-save targets docker-save-%: backend-images docker save local-ai-backend:$* -o backend-images/$*.tar -docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-acestep-cpp docker-build-voxtral docker-build-mlx-distributed docker-build-trl docker-build-llama-cpp-quantization +docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-acestep-cpp docker-build-voxtral docker-build-mlx-distributed docker-build-trl docker-build-llama-cpp-quantization docker-build-kokoros ######################################################## ### Mock Backend for E2E Tests diff --git a/backend/Dockerfile.rust b/backend/Dockerfile.rust new file mode 100644 index 000000000000..0b88743ed5f5 --- /dev/null +++ b/backend/Dockerfile.rust @@ -0,0 +1,80 @@ +ARG BASE_IMAGE=ubuntu:24.04 + +FROM ${BASE_IMAGE} AS builder +ARG BACKEND=kokoros +ARG BUILD_TYPE +ENV BUILD_TYPE=${BUILD_TYPE} +ARG CUDA_MAJOR_VERSION +ARG CUDA_MINOR_VERSION +ARG SKIP_DRIVERS=false +ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} +ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} +ENV DEBIAN_FRONTEND=noninteractive +ARG TARGETARCH +ARG TARGETVARIANT +ARG UBUNTU_VERSION=2404 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + build-essential \ + git ccache \ + ca-certificates \ + make cmake wget \ + curl unzip \ + clang \ + pkg-config \ + libssl-dev \ + espeak-ng libespeak-ng-dev \ + libsonic-dev libpcaudio-dev \ + libopus-dev \ + protobuf-compiler && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Cuda +ENV PATH=/usr/local/cuda/bin:${PATH} + +# CuBLAS requirements +RUN < Result<(), Box> { + let proto_path = std::env::var("BACKEND_PROTO_PATH") + .unwrap_or_else(|_| "proto/backend.proto".to_string()); + + let proto_dir = std::path::Path::new(&proto_path) + .parent() + .unwrap_or(std::path::Path::new(".")); + + tonic_build::configure() + .build_server(true) + .build_client(false) + .compile_protos(&[&proto_path], &[proto_dir])?; + + Ok(()) +} diff --git a/backend/rust/kokoros/package.sh b/backend/rust/kokoros/package.sh new file mode 100644 index 000000000000..83076352100b --- /dev/null +++ b/backend/rust/kokoros/package.sh @@ -0,0 +1,50 @@ +#!/bin/bash +set -e + +CURDIR=$(dirname "$(realpath $0)") +mkdir -p $CURDIR/package/lib + +# Copy the binary +cp -avf $CURDIR/target/release/kokoros-grpc $CURDIR/package/ + +# Copy the run script +cp -rfv $CURDIR/run.sh $CURDIR/package/ +chmod +x $CURDIR/package/run.sh + +# Copy ONNX Runtime shared libraries from ort build artifacts +ORT_LIB_DIR=$(find $CURDIR/target/release/build -name "libonnxruntime*.so*" -path "*/ort-sys-*/out/*" -exec dirname {} \; 2>/dev/null | head -1) +if [ -n "$ORT_LIB_DIR" ]; then + cp -avfL $ORT_LIB_DIR/libonnxruntime*.so* $CURDIR/package/lib/ 2>/dev/null || true +fi + +# Copy espeak-ng data +if [ -d "/usr/share/espeak-ng-data" ]; then + cp -rf /usr/share/espeak-ng-data $CURDIR/package/ +elif [ -d "/usr/lib/x86_64-linux-gnu/espeak-ng-data" ]; then + cp -rf /usr/lib/x86_64-linux-gnu/espeak-ng-data $CURDIR/package/ +fi + +# Copy ALL dynamic library dependencies of the binary +echo "Bundling dynamic library dependencies..." +ldd $CURDIR/target/release/kokoros-grpc | grep "=>" | awk '{print $3}' | while read lib; do + if [ -n "$lib" ] && [ -f "$lib" ]; then + cp -avfL "$lib" $CURDIR/package/lib/ 2>/dev/null || true + fi +done + +# Copy CA certificates for HTTPS (needed for model auto-download) +if [ -d "/etc/ssl/certs" ]; then + mkdir -p $CURDIR/package/etc/ssl + cp -rf /etc/ssl/certs $CURDIR/package/etc/ssl/ +fi + +# Copy the dynamic linker +if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then + cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so +elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then + cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so +fi + +echo "Packaging completed successfully" +ls -liah $CURDIR/package/ +ls -liah $CURDIR/package/lib/ diff --git a/backend/rust/kokoros/run.sh b/backend/rust/kokoros/run.sh new file mode 100755 index 000000000000..bdea9f77a914 --- /dev/null +++ b/backend/rust/kokoros/run.sh @@ -0,0 +1,23 @@ +#!/bin/bash +set -ex + +CURDIR=$(dirname "$(realpath $0)") + +export LD_LIBRARY_PATH=$CURDIR/lib:${LD_LIBRARY_PATH:-} + +# SSL certificates for model auto-download +if [ -d "$CURDIR/etc/ssl/certs" ]; then + export SSL_CERT_DIR=$CURDIR/etc/ssl/certs +fi + +# espeak-ng data directory +if [ -d "$CURDIR/espeak-ng-data" ]; then + export ESPEAK_NG_DATA=$CURDIR/espeak-ng-data +fi + +# Use bundled ld.so if present (portability) +if [ -f $CURDIR/lib/ld.so ]; then + exec $CURDIR/lib/ld.so $CURDIR/kokoros-grpc "$@" +fi + +exec $CURDIR/kokoros-grpc "$@" diff --git a/backend/rust/kokoros/sources/Kokoros b/backend/rust/kokoros/sources/Kokoros new file mode 160000 index 000000000000..7089168f0ca2 --- /dev/null +++ b/backend/rust/kokoros/sources/Kokoros @@ -0,0 +1 @@ +Subproject commit 7089168f0ca2d8e1fcd8e523c9d75d915c6afdff diff --git a/backend/rust/kokoros/src/auth.rs b/backend/rust/kokoros/src/auth.rs new file mode 100644 index 000000000000..b927eaebbcb3 --- /dev/null +++ b/backend/rust/kokoros/src/auth.rs @@ -0,0 +1,26 @@ +use tonic::{Request, Status}; + +/// Returns an interceptor function if LOCALAI_GRPC_AUTH_TOKEN is set. +pub fn make_auth_interceptor( +) -> Option) -> Result, Status> + Clone> { + let token = std::env::var("LOCALAI_GRPC_AUTH_TOKEN").ok()?; + if token.is_empty() { + return None; + } + let expected = format!("Bearer {}", token); + Some( + move |req: Request<()>| -> Result, Status> { + let meta = req.metadata(); + match meta.get("authorization") { + Some(val) => { + if val.as_bytes() == expected.as_bytes() { + Ok(req) + } else { + Err(Status::unauthenticated("invalid token")) + } + } + None => Err(Status::unauthenticated("missing authorization")), + } + }, + ) +} diff --git a/backend/rust/kokoros/src/main.rs b/backend/rust/kokoros/src/main.rs new file mode 100644 index 000000000000..c57181f7fc05 --- /dev/null +++ b/backend/rust/kokoros/src/main.rs @@ -0,0 +1,51 @@ +use clap::Parser; +use tonic::transport::Server; + +mod auth; +mod service; + +pub mod backend { + tonic::include_proto!("backend"); +} + +#[derive(Parser, Debug)] +#[command(name = "kokoros-grpc")] +struct Cli { + /// gRPC listen address (host:port) + #[arg(long, default_value = "localhost:50051")] + addr: String, +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + tracing_subscriber::fmt() + .with_writer(std::io::stderr) + .with_env_filter( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")), + ) + .init(); + + let cli = Cli::parse(); + let addr = cli.addr.parse()?; + + tracing::info!("Starting kokoros gRPC server on {}", addr); + + let mut builder = Server::builder(); + + if let Some(interceptor) = auth::make_auth_interceptor() { + tracing::info!("Bearer token authentication enabled"); + let svc = backend::backend_server::BackendServer::with_interceptor( + service::KokorosService::default(), + interceptor, + ); + builder.add_service(svc).serve(addr).await?; + } else { + let svc = backend::backend_server::BackendServer::new(service::KokorosService::default()) + .max_decoding_message_size(50 * 1024 * 1024) + .max_encoding_message_size(50 * 1024 * 1024); + builder.add_service(svc).serve(addr).await?; + } + + Ok(()) +} diff --git a/backend/rust/kokoros/src/service.rs b/backend/rust/kokoros/src/service.rs new file mode 100644 index 000000000000..c3603dcabf9f --- /dev/null +++ b/backend/rust/kokoros/src/service.rs @@ -0,0 +1,439 @@ +use std::sync::{Arc, Mutex}; +use tokio::sync::Mutex as TokioMutex; +use tokio_stream::wrappers::ReceiverStream; +use tonic::{Request, Response, Status}; + +use kokoros::tts::koko::{TTSKoko, TTSOpts}; + +use crate::backend; +use crate::backend::backend_server::Backend; + +pub struct KokorosService { + tts: Arc>>, + language: Arc>, + speed: Arc>, +} + +impl Default for KokorosService { + fn default() -> Self { + Self { + tts: Arc::new(TokioMutex::new(None)), + language: Arc::new(Mutex::new("en-us".to_string())), + speed: Arc::new(Mutex::new(1.0)), + } + } +} + +#[tonic::async_trait] +impl Backend for KokorosService { + async fn health( + &self, + _req: Request, + ) -> Result, Status> { + Ok(Response::new(backend::Reply { + message: b"OK".to_vec(), + ..Default::default() + })) + } + + async fn load_model( + &self, + req: Request, + ) -> Result, Status> { + let opts = req.into_inner(); + + // Model path: join ModelPath + Model, or just Model + let model_path = if !opts.model_path.is_empty() && !opts.model.is_empty() { + format!("{}/{}", opts.model_path, opts.model) + } else if !opts.model.is_empty() { + opts.model.clone() + } else { + "checkpoints/kokoro-v1.0.onnx".to_string() + }; + + // Voices data path from AudioPath, or derive from model dir + let voices_path = if !opts.audio_path.is_empty() { + opts.audio_path.clone() + } else { + let model_dir = std::path::Path::new(&model_path) + .parent() + .map(|p| p.to_string_lossy().to_string()) + .unwrap_or_else(|| ".".to_string()); + format!("{}/voices-v1.0.bin", model_dir) + }; + + // Parse options (key:value pairs) + for opt in &opts.options { + if let Some((key, value)) = opt.split_once(':') { + match key { + "lang_code" => *self.language.lock().unwrap() = value.to_string(), + "speed" => { + if let Ok(s) = value.parse::() { + *self.speed.lock().unwrap() = s; + } + } + _ => {} + } + } + } + + tracing::info!("Loading Kokoros model from: {}", model_path); + tracing::info!("Loading voices from: {}", voices_path); + tracing::info!("Language: {}", self.language.lock().unwrap()); + + let tts = TTSKoko::new(&model_path, &voices_path).await; + *self.tts.lock().await = Some(tts); + + tracing::info!("Kokoros TTS model loaded successfully"); + Ok(Response::new(backend::Result { + success: true, + message: "Kokoros TTS model loaded".into(), + })) + } + + async fn tts( + &self, + req: Request, + ) -> Result, Status> { + let req = req.into_inner(); + let tts_guard = self.tts.lock().await; + let tts = tts_guard + .as_ref() + .ok_or_else(|| Status::failed_precondition("Model not loaded"))?; + + let voice = if req.voice.is_empty() { + "af_heart" + } else { + &req.voice + }; + let lang = req + .language + .unwrap_or_else(|| self.language.lock().unwrap().clone()); + let speed = *self.speed.lock().unwrap(); + + tracing::debug!( + text = req.text, + voice = voice, + lang = lang.as_str(), + dst = req.dst, + "TTS request" + ); + + match tts.tts(TTSOpts { + txt: &req.text, + lan: &lang, + style_name: voice, + save_path: &req.dst, + mono: true, + speed, + initial_silence: None, + }) { + Ok(()) => Ok(Response::new(backend::Result { + success: true, + message: String::new(), + })), + Err(e) => { + tracing::error!("TTS error: {}", e); + Ok(Response::new(backend::Result { + success: false, + message: format!("TTS error: {}", e), + })) + } + } + } + + type TTSStreamStream = ReceiverStream>; + + async fn tts_stream( + &self, + req: Request, + ) -> Result, Status> { + let req = req.into_inner(); + let tts_guard = self.tts.lock().await; + let tts = tts_guard + .as_ref() + .ok_or_else(|| Status::failed_precondition("Model not loaded"))? + .clone(); + + let voice = if req.voice.is_empty() { + "af_heart".to_string() + } else { + req.voice + }; + let lang = req + .language + .unwrap_or_else(|| self.language.lock().unwrap().clone()); + let speed = *self.speed.lock().unwrap(); + let text = req.text; + + let (tx, rx) = tokio::sync::mpsc::channel(32); + + // Send sample rate info as first message + let tx_clone = tx.clone(); + let _ = tx_clone + .send(Ok(backend::Reply { + message: br#"{"sample_rate":24000}"#.to_vec(), + ..Default::default() + })) + .await; + + tokio::task::spawn_blocking(move || { + let result = tts.tts_raw_audio_streaming( + &text, + &lang, + &voice, + speed, + None, + None, + None, + None, + |audio_chunk: Vec| -> Result<(), Box> { + // Convert f32 PCM to 16-bit PCM bytes (what LocalAI expects for streaming) + let bytes: Vec = audio_chunk + .iter() + .flat_map(|&s| { + let clamped = s.clamp(-1.0, 1.0); + let i16_val = (clamped * 32767.0) as i16; + i16_val.to_le_bytes() + }) + .collect(); + tx.blocking_send(Ok(backend::Reply { + audio: bytes, + ..Default::default() + })) + .map_err(|e| Box::new(e) as Box) + }, + ); + if let Err(e) = result { + tracing::error!("TTSStream error: {}", e); + } + }); + + Ok(Response::new(ReceiverStream::new(rx))) + } + + async fn status( + &self, + _req: Request, + ) -> Result, Status> { + let tts = self.tts.lock().await; + let state = if tts.is_some() { + backend::status_response::State::Ready as i32 + } else { + backend::status_response::State::Uninitialized as i32 + }; + Ok(Response::new(backend::StatusResponse { + state, + memory: None, + })) + } + + async fn free( + &self, + _req: Request, + ) -> Result, Status> { + *self.tts.lock().await = None; + Ok(Response::new(backend::Result { + success: true, + message: "Model freed".into(), + })) + } + + // --- Unimplemented RPCs --- + + async fn predict( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + type PredictStreamStream = ReceiverStream>; + + async fn predict_stream( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + async fn embedding( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + async fn generate_image( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + async fn generate_video( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + async fn audio_transcription( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + async fn sound_generation( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + async fn tokenize_string( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + async fn detect( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + async fn stores_set( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + async fn stores_delete( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + async fn stores_get( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + async fn stores_find( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + async fn rerank( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + async fn get_metrics( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + async fn vad( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + async fn audio_encode( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + async fn audio_decode( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + async fn model_metadata( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + async fn start_fine_tune( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + type FineTuneProgressStream = ReceiverStream>; + + async fn fine_tune_progress( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + async fn stop_fine_tune( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + async fn list_checkpoints( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + async fn export_model( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + async fn start_quantization( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + type QuantizationProgressStream = + ReceiverStream>; + + async fn quantization_progress( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } + + async fn stop_quantization( + &self, + _: Request, + ) -> Result, Status> { + Err(Status::unimplemented("Not supported")) + } +} diff --git a/gallery/index.yaml b/gallery/index.yaml index 5458c3ecfada..29ac445a119c 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2433,6 +2433,120 @@ - lang_code:a known_usecases: - tts +- name: "kokoros" + url: "github:mudler/LocalAI/gallery/kokoros.yaml@master" + size: "327MB" + urls: + - https://github.com/lucasjinreal/Kokoros + license: apache-2.0 + tags: + - tts + - kokoros + - cpu + - text-to-speech + - rust + description: | + Kokoros is a pure Rust TTS backend using the Kokoro v1.0 ONNX model (82M parameters). + Fast, streaming TTS with high quality. American English with af_heart voice. + overrides: + backend: "kokoros" + name: "kokoros" + description: "Kokoros Rust TTS - American English" + parameters: + voice: "af_heart" + options: + - lang_code:en-us + known_usecases: + - tts + files: + - filename: "kokoro-v1.0.onnx" + uri: "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx" + - filename: "voices-v1.0.bin" + uri: "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin" +- name: "kokoros-ja" + url: "github:mudler/LocalAI/gallery/kokoros.yaml@master" + size: "327MB" + urls: + - https://github.com/lucasjinreal/Kokoros + license: apache-2.0 + tags: + - tts + - kokoros + - japanese + - text-to-speech + description: | + Kokoros Rust TTS - Japanese. Uses the Kokoro v1.0 ONNX model with Japanese phonemization. + overrides: + backend: "kokoros" + name: "kokoros-ja" + description: "Kokoros Rust TTS - Japanese" + parameters: + voice: "jf_alpha" + options: + - lang_code:ja + known_usecases: + - tts + files: + - filename: "kokoro-v1.0.onnx" + uri: "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx" + - filename: "voices-v1.0.bin" + uri: "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin" +- name: "kokoros-cmn" + url: "github:mudler/LocalAI/gallery/kokoros.yaml@master" + size: "327MB" + urls: + - https://github.com/lucasjinreal/Kokoros + license: apache-2.0 + tags: + - tts + - kokoros + - chinese + - text-to-speech + description: | + Kokoros Rust TTS - Mandarin Chinese. + overrides: + backend: "kokoros" + name: "kokoros-cmn" + description: "Kokoros Rust TTS - Mandarin Chinese" + parameters: + voice: "zf_xiaobei" + options: + - lang_code:cmn + known_usecases: + - tts + files: + - filename: "kokoro-v1.0.onnx" + uri: "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx" + - filename: "voices-v1.0.bin" + uri: "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin" +- name: "kokoros-de" + url: "github:mudler/LocalAI/gallery/kokoros.yaml@master" + size: "327MB" + urls: + - https://github.com/lucasjinreal/Kokoros + license: apache-2.0 + tags: + - tts + - kokoros + - german + - text-to-speech + description: | + Kokoros Rust TTS - German. + overrides: + backend: "kokoros" + name: "kokoros-de" + description: "Kokoros Rust TTS - German" + parameters: + voice: "df_greta" + options: + - lang_code:de + known_usecases: + - tts + files: + - filename: "kokoro-v1.0.onnx" + uri: "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx" + - filename: "voices-v1.0.bin" + uri: "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin" - name: "kitten-tts" url: "github:mudler/LocalAI/gallery/virtual.yaml@master" urls: diff --git a/gallery/kokoros.yaml b/gallery/kokoros.yaml new file mode 100644 index 000000000000..e7d701194038 --- /dev/null +++ b/gallery/kokoros.yaml @@ -0,0 +1,3 @@ +--- +config_file: | + backend: kokoros