diff --git a/.gitmodules b/.gitmodules
index c263dbe06f80..c63d0607eb31 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
 [submodule "docs/themes/hugo-theme-relearn"]
 	path = docs/themes/hugo-theme-relearn
 	url = https://github.com/McShelby/hugo-theme-relearn.git
+[submodule "backend/rust/kokoros/sources/Kokoros"]
+	path = backend/rust/kokoros/sources/Kokoros
+	url = https://github.com/lucasjinreal/Kokoros
diff --git a/Makefile b/Makefile
index 61bceb7e36fe..e0f965df6f82 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 # Disable parallel execution for backend builds
-.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization
+.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros
 
 GOCMD=go
 GOTEST=$(GOCMD) test
@@ -587,6 +587,9 @@ BACKEND_MLX_DISTRIBUTED = mlx-distributed|python|./|false|true
 BACKEND_TRL = trl|python|.|false|true
 BACKEND_LLAMA_CPP_QUANTIZATION = llama-cpp-quantization|python|.|false|true
 
+# Rust backends
+BACKEND_KOKOROS = kokoros|rust|.|false|true
+
 # Helper function to build docker image for a backend
 # Usage: $(call docker-build-backend,BACKEND_NAME,DOCKERFILE_TYPE,BUILD_CONTEXT,PROGRESS_FLAG,NEEDS_BACKEND_ARG)
 define docker-build-backend
@@ -645,12 +648,13 @@ $(eval $(call generate-docker-build-target,$(BACKEND_ACESTEP_CPP)))
 $(eval $(call generate-docker-build-target,$(BACKEND_MLX_DISTRIBUTED)))
 $(eval $(call generate-docker-build-target,$(BACKEND_TRL)))
 $(eval $(call generate-docker-build-target,$(BACKEND_LLAMA_CPP_QUANTIZATION)))
+$(eval $(call generate-docker-build-target,$(BACKEND_KOKOROS)))
 
 # Pattern rule for docker-save targets
 docker-save-%: backend-images
 	docker save local-ai-backend:$* -o backend-images/$*.tar
 
-docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-acestep-cpp docker-build-voxtral docker-build-mlx-distributed docker-build-trl docker-build-llama-cpp-quantization
+docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-acestep-cpp docker-build-voxtral docker-build-mlx-distributed docker-build-trl docker-build-llama-cpp-quantization docker-build-kokoros
 
 ########################################################
 ### Mock Backend for E2E Tests
diff --git a/backend/Dockerfile.rust b/backend/Dockerfile.rust
new file mode 100644
index 000000000000..0b88743ed5f5
--- /dev/null
+++ b/backend/Dockerfile.rust
@@ -0,0 +1,80 @@
+ARG BASE_IMAGE=ubuntu:24.04
+
+FROM ${BASE_IMAGE} AS builder
+ARG BACKEND=kokoros
+ARG BUILD_TYPE
+ENV BUILD_TYPE=${BUILD_TYPE}
+ARG CUDA_MAJOR_VERSION
+ARG CUDA_MINOR_VERSION
+ARG SKIP_DRIVERS=false
+ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION}
+ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
+ENV DEBIAN_FRONTEND=noninteractive
+ARG TARGETARCH
+ARG TARGETVARIANT
+ARG UBUNTU_VERSION=2404
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        build-essential \
+        git ccache \
+        ca-certificates \
+        make cmake wget \
+        curl unzip \
+        clang \
+        pkg-config \
+        libssl-dev \
+        espeak-ng libespeak-ng-dev \
+        libsonic-dev libpcaudio-dev \
+        libopus-dev \
+        protobuf-compiler && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# Cuda
+ENV PATH=/usr/local/cuda/bin:${PATH}
+
+# CuBLAS requirements
+RUN <<EOT bash
+    if ( [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "l4t" ] ) && [ "${SKIP_DRIVERS}" = "false" ]; then
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+            software-properties-common pciutils
+        if [ "amd64" = "$TARGETARCH" ]; then
+            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/cuda-keyring_1.1-1_all.deb
+        fi
+        if [ "arm64" = "$TARGETARCH" ]; then
+            if [ "${CUDA_MAJOR_VERSION}" = "13" ]; then
+                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/sbsa/cuda-keyring_1.1-1_all.deb
+            else
+                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/arm64/cuda-keyring_1.1-1_all.deb
+            fi
+        fi
+        dpkg -i cuda-keyring_1.1-1_all.deb && \
+        rm -f cuda-keyring_1.1-1_all.deb && \
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+            cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
+        apt-get clean && rm -rf /var/lib/apt/lists/*
+    fi
+EOT
+
+# Install Rust
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+ENV PATH="/root/.cargo/bin:${PATH}"
+
+COPY . /LocalAI
+
+RUN git config --global --add safe.directory /LocalAI
+
+RUN make -C /LocalAI/backend/rust/${BACKEND} build
+
+FROM scratch
+ARG BACKEND=kokoros
+
+COPY --from=builder /LocalAI/backend/rust/${BACKEND}/package/. ./
diff --git a/backend/index.yaml b/backend/index.yaml
index d94cb70be2f9..2bfea96aa123 100644
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -468,6 +468,28 @@
     nvidia-cuda-13: "cuda13-kokoro"
     nvidia-cuda-12: "cuda12-kokoro"
     nvidia-l4t-cuda-12: "nvidia-l4t-arm64-kokoro"
+- &kokoros
+  icon: https://avatars.githubusercontent.com/u/166769057?v=4
+  description: |
+    Kokoros is a pure Rust TTS backend using the Kokoro ONNX model (82M parameters).
+    It provides fast, high-quality text-to-speech with streaming support, built on
+    ONNX Runtime for efficient CPU inference. Supports English, Japanese, Mandarin
+    Chinese, and German.
+  urls:
+    - https://huggingface.co/hexgrad/Kokoro-82M
+    - https://github.com/lucasjinreal/Kokoros
+  tags:
+    - text-to-speech
+    - TTS
+    - Rust
+    - ONNX
+  license: apache-2.0
+  alias: "kokoros"
+  name: "kokoros"
+  capabilities:
+    default: "cpu-kokoros"
+    nvidia: "cuda12-kokoros"
+    nvidia-cuda-12: "cuda12-kokoros"
 - &coqui
   urls:
     - https://github.com/idiap/coqui-ai-TTS
@@ -2042,6 +2064,32 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-kokoro"
   mirrors:
     - localai/localai-backends:master-metal-darwin-arm64-kokoro
+## kokoros (Rust)
+- !!merge <<: *kokoros
+  name: "kokoros-development"
+  capabilities:
+    default: "cpu-kokoros-development"
+    nvidia: "cuda12-kokoros-development"
+- !!merge <<: *kokoros
+  name: "cpu-kokoros"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-kokoros"
+  mirrors:
+    - localai/localai-backends:latest-cpu-kokoros
+- !!merge <<: *kokoros
+  name: "cpu-kokoros-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-kokoros"
+  mirrors:
+    - localai/localai-backends:master-cpu-kokoros
+- !!merge <<: *kokoros
+  name: "cuda12-kokoros"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-kokoros"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-cuda-12-kokoros
+- !!merge <<: *kokoros
+  name: "cuda12-kokoros-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-kokoros"
+  mirrors:
+    - localai/localai-backends:master-gpu-nvidia-cuda-12-kokoros
 ## faster-whisper
 - !!merge <<: *faster-whisper
   name: "faster-whisper-development"
diff --git a/backend/rust/kokoros/Cargo.toml b/backend/rust/kokoros/Cargo.toml
new file mode 100644
index 000000000000..9790df3ac5d4
--- /dev/null
+++ b/backend/rust/kokoros/Cargo.toml
@@ -0,0 +1,27 @@
+[package]
+name = "kokoros-grpc"
+version = "0.1.0"
+edition = "2021"
+
+[[bin]]
+name = "kokoros-grpc"
+path = "src/main.rs"
+
+[dependencies]
+kokoros = { path = "sources/Kokoros/kokoros" }
+
+tonic = "0.13"
+prost = "0.13"
+tokio = { version = "1", features = ["full"] }
+tokio-stream = "0.1"
+clap = { version = "4", features = ["derive"] }
+tracing = "0.1"
+tracing-subscriber = { version = "0.3", features = ["env-filter"] }
+
+[build-dependencies]
+tonic-build = "0.13"
+
+[features]
+default = ["cpu"]
+cpu = ["kokoros/cpu"]
+cuda = ["kokoros/cuda"]
diff --git a/backend/rust/kokoros/Makefile b/backend/rust/kokoros/Makefile
new file mode 100644
index 000000000000..a6e420f0ad54
--- /dev/null
+++ b/backend/rust/kokoros/Makefile
@@ -0,0 +1,27 @@
+CURRENT_DIR=$(abspath ./)
+
+# Enable CUDA feature when building with cublas/l4t
+CARGO_FEATURES :=
+ifneq (,$(filter cublas l4t,$(BUILD_TYPE)))
+CARGO_FEATURES := --features cuda --no-default-features
+endif
+
+.PHONY: kokoros-grpc
+kokoros-grpc:
+	mkdir -p $(CURRENT_DIR)/proto
+	cp $(CURRENT_DIR)/../../backend.proto $(CURRENT_DIR)/proto/backend.proto
+	cd $(CURRENT_DIR) && \
+		BACKEND_PROTO_PATH=$(CURRENT_DIR)/proto/backend.proto \
+		cargo build --release $(CARGO_FEATURES)
+
+.PHONY: package
+package:
+	bash package.sh
+
+.PHONY: build
+build: kokoros-grpc package
+
+.PHONY: clean
+clean:
+	cargo clean
+	rm -rf package proto
diff --git a/backend/rust/kokoros/build.rs b/backend/rust/kokoros/build.rs
new file mode 100644
index 000000000000..b5d3a1a538f9
--- /dev/null
+++ b/backend/rust/kokoros/build.rs
@@ -0,0 +1,15 @@
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let proto_path = std::env::var("BACKEND_PROTO_PATH")
+        .unwrap_or_else(|_| "proto/backend.proto".to_string());
+
+    let proto_dir = std::path::Path::new(&proto_path)
+        .parent()
+        .unwrap_or(std::path::Path::new("."));
+
+    tonic_build::configure()
+        .build_server(true)
+        .build_client(false)
+        .compile_protos(&[&proto_path], &[proto_dir])?;
+
+    Ok(())
+}
diff --git a/backend/rust/kokoros/package.sh b/backend/rust/kokoros/package.sh
new file mode 100644
index 000000000000..83076352100b
--- /dev/null
+++ b/backend/rust/kokoros/package.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+set -e
+
+CURDIR=$(dirname "$(realpath $0)")
+mkdir -p $CURDIR/package/lib
+
+# Copy the binary
+cp -avf $CURDIR/target/release/kokoros-grpc $CURDIR/package/
+
+# Copy the run script
+cp -rfv $CURDIR/run.sh $CURDIR/package/
+chmod +x $CURDIR/package/run.sh
+
+# Copy ONNX Runtime shared libraries from ort build artifacts
+ORT_LIB_DIR=$(find $CURDIR/target/release/build -name "libonnxruntime*.so*" -path "*/ort-sys-*/out/*" -exec dirname {} \; 2>/dev/null | head -1)
+if [ -n "$ORT_LIB_DIR" ]; then
+    cp -avfL $ORT_LIB_DIR/libonnxruntime*.so* $CURDIR/package/lib/ 2>/dev/null || true
+fi
+
+# Copy espeak-ng data
+if [ -d "/usr/share/espeak-ng-data" ]; then
+    cp -rf /usr/share/espeak-ng-data $CURDIR/package/
+elif [ -d "/usr/lib/x86_64-linux-gnu/espeak-ng-data" ]; then
+    cp -rf /usr/lib/x86_64-linux-gnu/espeak-ng-data $CURDIR/package/
+fi
+
+# Copy ALL dynamic library dependencies of the binary
+echo "Bundling dynamic library dependencies..."
+ldd $CURDIR/target/release/kokoros-grpc | grep "=>" | awk '{print $3}' | while read lib; do
+    if [ -n "$lib" ] && [ -f "$lib" ]; then
+        cp -avfL "$lib" $CURDIR/package/lib/ 2>/dev/null || true
+    fi
+done
+
+# Copy CA certificates for HTTPS (needed for model auto-download)
+if [ -d "/etc/ssl/certs" ]; then
+    mkdir -p $CURDIR/package/etc/ssl
+    cp -rf /etc/ssl/certs $CURDIR/package/etc/ssl/
+fi
+
+# Copy the dynamic linker
+if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
+    cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
+elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
+    cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
+fi
+
+echo "Packaging completed successfully"
+ls -liah $CURDIR/package/
+ls -liah $CURDIR/package/lib/
diff --git a/backend/rust/kokoros/run.sh b/backend/rust/kokoros/run.sh
new file mode 100755
index 000000000000..bdea9f77a914
--- /dev/null
+++ b/backend/rust/kokoros/run.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+set -ex
+
+CURDIR=$(dirname "$(realpath $0)")
+
+export LD_LIBRARY_PATH=$CURDIR/lib:${LD_LIBRARY_PATH:-}
+
+# SSL certificates for model auto-download
+if [ -d "$CURDIR/etc/ssl/certs" ]; then
+    export SSL_CERT_DIR=$CURDIR/etc/ssl/certs
+fi
+
+# espeak-ng data directory
+if [ -d "$CURDIR/espeak-ng-data" ]; then
+    export ESPEAK_NG_DATA=$CURDIR/espeak-ng-data
+fi
+
+# Use bundled ld.so if present (portability)
+if [ -f $CURDIR/lib/ld.so ]; then
+    exec $CURDIR/lib/ld.so $CURDIR/kokoros-grpc "$@"
+fi
+
+exec $CURDIR/kokoros-grpc "$@"
diff --git a/backend/rust/kokoros/sources/Kokoros b/backend/rust/kokoros/sources/Kokoros
new file mode 160000
index 000000000000..7089168f0ca2
--- /dev/null
+++ b/backend/rust/kokoros/sources/Kokoros
@@ -0,0 +1 @@
+Subproject commit 7089168f0ca2d8e1fcd8e523c9d75d915c6afdff
diff --git a/backend/rust/kokoros/src/auth.rs b/backend/rust/kokoros/src/auth.rs
new file mode 100644
index 000000000000..b927eaebbcb3
--- /dev/null
+++ b/backend/rust/kokoros/src/auth.rs
@@ -0,0 +1,26 @@
+use tonic::{Request, Status};
+
+/// Returns an interceptor function if LOCALAI_GRPC_AUTH_TOKEN is set.
+pub fn make_auth_interceptor(
+) -> Option<impl Fn(Request<()>) -> Result<Request<()>, Status> + Clone> {
+    let token = std::env::var("LOCALAI_GRPC_AUTH_TOKEN").ok()?;
+    if token.is_empty() {
+        return None;
+    }
+    let expected = format!("Bearer {}", token);
+    Some(
+        move |req: Request<()>| -> Result<Request<()>, Status> {
+            let meta = req.metadata();
+            match meta.get("authorization") {
+                Some(val) => {
+                    if val.as_bytes() == expected.as_bytes() {
+                        Ok(req)
+                    } else {
+                        Err(Status::unauthenticated("invalid token"))
+                    }
+                }
+                None => Err(Status::unauthenticated("missing authorization")),
+            }
+        },
+    )
+}
diff --git a/backend/rust/kokoros/src/main.rs b/backend/rust/kokoros/src/main.rs
new file mode 100644
index 000000000000..c57181f7fc05
--- /dev/null
+++ b/backend/rust/kokoros/src/main.rs
@@ -0,0 +1,51 @@
+use clap::Parser;
+use tonic::transport::Server;
+
+mod auth;
+mod service;
+
+pub mod backend {
+    tonic::include_proto!("backend");
+}
+
+#[derive(Parser, Debug)]
+#[command(name = "kokoros-grpc")]
+struct Cli {
+    /// gRPC listen address (host:port)
+    #[arg(long, default_value = "localhost:50051")]
+    addr: String,
+}
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    tracing_subscriber::fmt()
+        .with_writer(std::io::stderr)
+        .with_env_filter(
+            tracing_subscriber::EnvFilter::try_from_default_env()
+                .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")),
+        )
+        .init();
+
+    let cli = Cli::parse();
+    let addr = cli.addr.parse()?;
+
+    tracing::info!("Starting kokoros gRPC server on {}", addr);
+
+    let mut builder = Server::builder();
+
+    if let Some(interceptor) = auth::make_auth_interceptor() {
+        tracing::info!("Bearer token authentication enabled");
+        let svc = backend::backend_server::BackendServer::with_interceptor(
+            service::KokorosService::default(),
+            interceptor,
+        );
+        builder.add_service(svc).serve(addr).await?;
+    } else {
+        let svc = backend::backend_server::BackendServer::new(service::KokorosService::default())
+            .max_decoding_message_size(50 * 1024 * 1024)
+            .max_encoding_message_size(50 * 1024 * 1024);
+        builder.add_service(svc).serve(addr).await?;
+    }
+
+    Ok(())
+}
diff --git a/backend/rust/kokoros/src/service.rs b/backend/rust/kokoros/src/service.rs
new file mode 100644
index 000000000000..c3603dcabf9f
--- /dev/null
+++ b/backend/rust/kokoros/src/service.rs
@@ -0,0 +1,439 @@
+use std::sync::{Arc, Mutex};
+use tokio::sync::Mutex as TokioMutex;
+use tokio_stream::wrappers::ReceiverStream;
+use tonic::{Request, Response, Status};
+
+use kokoros::tts::koko::{TTSKoko, TTSOpts};
+
+use crate::backend;
+use crate::backend::backend_server::Backend;
+
+pub struct KokorosService {
+    tts: Arc<TokioMutex<Option<TTSKoko>>>,
+    language: Arc<Mutex<String>>,
+    speed: Arc<Mutex<f32>>,
+}
+
+impl Default for KokorosService {
+    fn default() -> Self {
+        Self {
+            tts: Arc::new(TokioMutex::new(None)),
+            language: Arc::new(Mutex::new("en-us".to_string())),
+            speed: Arc::new(Mutex::new(1.0)),
+        }
+    }
+}
+
+#[tonic::async_trait]
+impl Backend for KokorosService {
+    async fn health(
+        &self,
+        _req: Request<backend::HealthMessage>,
+    ) -> Result<Response<backend::Reply>, Status> {
+        Ok(Response::new(backend::Reply {
+            message: b"OK".to_vec(),
+            ..Default::default()
+        }))
+    }
+
+    async fn load_model(
+        &self,
+        req: Request<backend::ModelOptions>,
+    ) -> Result<Response<backend::Result>, Status> {
+        let opts = req.into_inner();
+
+        // Model path: join ModelPath + Model, or just Model
+        let model_path = if !opts.model_path.is_empty() && !opts.model.is_empty() {
+            format!("{}/{}", opts.model_path, opts.model)
+        } else if !opts.model.is_empty() {
+            opts.model.clone()
+        } else {
+            "checkpoints/kokoro-v1.0.onnx".to_string()
+        };
+
+        // Voices data path from AudioPath, or derive from model dir
+        let voices_path = if !opts.audio_path.is_empty() {
+            opts.audio_path.clone()
+        } else {
+            let model_dir = std::path::Path::new(&model_path)
+                .parent()
+                .map(|p| p.to_string_lossy().to_string())
+                .unwrap_or_else(|| ".".to_string());
+            format!("{}/voices-v1.0.bin", model_dir)
+        };
+
+        // Parse options (key:value pairs)
+        for opt in &opts.options {
+            if let Some((key, value)) = opt.split_once(':') {
+                match key {
+                    "lang_code" => *self.language.lock().unwrap() = value.to_string(),
+                    "speed" => {
+                        if let Ok(s) = value.parse::<f32>() {
+                            *self.speed.lock().unwrap() = s;
+                        }
+                    }
+                    _ => {}
+                }
+            }
+        }
+
+        tracing::info!("Loading Kokoros model from: {}", model_path);
+        tracing::info!("Loading voices from: {}", voices_path);
+        tracing::info!("Language: {}", self.language.lock().unwrap());
+
+        let tts = TTSKoko::new(&model_path, &voices_path).await;
+        *self.tts.lock().await = Some(tts);
+
+        tracing::info!("Kokoros TTS model loaded successfully");
+        Ok(Response::new(backend::Result {
+            success: true,
+            message: "Kokoros TTS model loaded".into(),
+        }))
+    }
+
+    async fn tts(
+        &self,
+        req: Request<backend::TtsRequest>,
+    ) -> Result<Response<backend::Result>, Status> {
+        let req = req.into_inner();
+        let tts_guard = self.tts.lock().await;
+        let tts = tts_guard
+            .as_ref()
+            .ok_or_else(|| Status::failed_precondition("Model not loaded"))?;
+
+        let voice = if req.voice.is_empty() {
+            "af_heart"
+        } else {
+            &req.voice
+        };
+        let lang = req
+            .language
+            .unwrap_or_else(|| self.language.lock().unwrap().clone());
+        let speed = *self.speed.lock().unwrap();
+
+        tracing::debug!(
+            text = req.text,
+            voice = voice,
+            lang = lang.as_str(),
+            dst = req.dst,
+            "TTS request"
+        );
+
+        match tts.tts(TTSOpts {
+            txt: &req.text,
+            lan: &lang,
+            style_name: voice,
+            save_path: &req.dst,
+            mono: true,
+            speed,
+            initial_silence: None,
+        }) {
+            Ok(()) => Ok(Response::new(backend::Result {
+                success: true,
+                message: String::new(),
+            })),
+            Err(e) => {
+                tracing::error!("TTS error: {}", e);
+                Ok(Response::new(backend::Result {
+                    success: false,
+                    message: format!("TTS error: {}", e),
+                }))
+            }
+        }
+    }
+
+    type TTSStreamStream = ReceiverStream<Result<backend::Reply, Status>>;
+
+    async fn tts_stream(
+        &self,
+        req: Request<backend::TtsRequest>,
+    ) -> Result<Response<Self::TTSStreamStream>, Status> {
+        let req = req.into_inner();
+        let tts_guard = self.tts.lock().await;
+        let tts = tts_guard
+            .as_ref()
+            .ok_or_else(|| Status::failed_precondition("Model not loaded"))?
+            .clone();
+
+        let voice = if req.voice.is_empty() {
+            "af_heart".to_string()
+        } else {
+            req.voice
+        };
+        let lang = req
+            .language
+            .unwrap_or_else(|| self.language.lock().unwrap().clone());
+        let speed = *self.speed.lock().unwrap();
+        let text = req.text;
+
+        let (tx, rx) = tokio::sync::mpsc::channel(32);
+
+        // Send sample rate info as first message
+        let tx_clone = tx.clone();
+        let _ = tx_clone
+            .send(Ok(backend::Reply {
+                message: br#"{"sample_rate":24000}"#.to_vec(),
+                ..Default::default()
+            }))
+            .await;
+
+        tokio::task::spawn_blocking(move || {
+            let result = tts.tts_raw_audio_streaming(
+                &text,
+                &lang,
+                &voice,
+                speed,
+                None,
+                None,
+                None,
+                None,
+                |audio_chunk: Vec<f32>| -> Result<(), Box<dyn std::error::Error>> {
+                    // Convert f32 PCM to 16-bit PCM bytes (what LocalAI expects for streaming)
+                    let bytes: Vec<u8> = audio_chunk
+                        .iter()
+                        .flat_map(|&s| {
+                            let clamped = s.clamp(-1.0, 1.0);
+                            let i16_val = (clamped * 32767.0) as i16;
+                            i16_val.to_le_bytes()
+                        })
+                        .collect();
+                    tx.blocking_send(Ok(backend::Reply {
+                        audio: bytes,
+                        ..Default::default()
+                    }))
+                    .map_err(|e| Box::new(e) as Box<dyn std::error::Error>)
+                },
+            );
+            if let Err(e) = result {
+                tracing::error!("TTSStream error: {}", e);
+            }
+        });
+
+        Ok(Response::new(ReceiverStream::new(rx)))
+    }
+
+    async fn status(
+        &self,
+        _req: Request<backend::HealthMessage>,
+    ) -> Result<Response<backend::StatusResponse>, Status> {
+        let tts = self.tts.lock().await;
+        let state = if tts.is_some() {
+            backend::status_response::State::Ready as i32
+        } else {
+            backend::status_response::State::Uninitialized as i32
+        };
+        Ok(Response::new(backend::StatusResponse {
+            state,
+            memory: None,
+        }))
+    }
+
+    async fn free(
+        &self,
+        _req: Request<backend::HealthMessage>,
+    ) -> Result<Response<backend::Result>, Status> {
+        *self.tts.lock().await = None;
+        Ok(Response::new(backend::Result {
+            success: true,
+            message: "Model freed".into(),
+        }))
+    }
+
+    // --- Unimplemented RPCs ---
+
+    async fn predict(
+        &self,
+        _: Request<backend::PredictOptions>,
+    ) -> Result<Response<backend::Reply>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    type PredictStreamStream = ReceiverStream<Result<backend::Reply, Status>>;
+
+    async fn predict_stream(
+        &self,
+        _: Request<backend::PredictOptions>,
+    ) -> Result<Response<Self::PredictStreamStream>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    async fn embedding(
+        &self,
+        _: Request<backend::PredictOptions>,
+    ) -> Result<Response<backend::EmbeddingResult>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    async fn generate_image(
+        &self,
+        _: Request<backend::GenerateImageRequest>,
+    ) -> Result<Response<backend::Result>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    async fn generate_video(
+        &self,
+        _: Request<backend::GenerateVideoRequest>,
+    ) -> Result<Response<backend::Result>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    async fn audio_transcription(
+        &self,
+        _: Request<backend::TranscriptRequest>,
+    ) -> Result<Response<backend::TranscriptResult>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    async fn sound_generation(
+        &self,
+        _: Request<backend::SoundGenerationRequest>,
+    ) -> Result<Response<backend::Result>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    async fn tokenize_string(
+        &self,
+        _: Request<backend::PredictOptions>,
+    ) -> Result<Response<backend::TokenizationResponse>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    async fn detect(
+        &self,
+        _: Request<backend::DetectOptions>,
+    ) -> Result<Response<backend::DetectResponse>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    async fn stores_set(
+        &self,
+        _: Request<backend::StoresSetOptions>,
+    ) -> Result<Response<backend::Result>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    async fn stores_delete(
+        &self,
+        _: Request<backend::StoresDeleteOptions>,
+    ) -> Result<Response<backend::Result>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    async fn stores_get(
+        &self,
+        _: Request<backend::StoresGetOptions>,
+    ) -> Result<Response<backend::StoresGetResult>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    async fn stores_find(
+        &self,
+        _: Request<backend::StoresFindOptions>,
+    ) -> Result<Response<backend::StoresFindResult>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    async fn rerank(
+        &self,
+        _: Request<backend::RerankRequest>,
+    ) -> Result<Response<backend::RerankResult>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    async fn get_metrics(
+        &self,
+        _: Request<backend::MetricsRequest>,
+    ) -> Result<Response<backend::MetricsResponse>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    async fn vad(
+        &self,
+        _: Request<backend::VadRequest>,
+    ) -> Result<Response<backend::VadResponse>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    async fn audio_encode(
+        &self,
+        _: Request<backend::AudioEncodeRequest>,
+    ) -> Result<Response<backend::AudioEncodeResult>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    async fn audio_decode(
+        &self,
+        _: Request<backend::AudioDecodeRequest>,
+    ) -> Result<Response<backend::AudioDecodeResult>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    async fn model_metadata(
+        &self,
+        _: Request<backend::ModelOptions>,
+    ) -> Result<Response<backend::ModelMetadataResponse>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    async fn start_fine_tune(
+        &self,
+        _: Request<backend::FineTuneRequest>,
+    ) -> Result<Response<backend::FineTuneJobResult>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    type FineTuneProgressStream = ReceiverStream<Result<backend::FineTuneProgressUpdate, Status>>;
+
+    async fn fine_tune_progress(
+        &self,
+        _: Request<backend::FineTuneProgressRequest>,
+    ) -> Result<Response<Self::FineTuneProgressStream>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    async fn stop_fine_tune(
+        &self,
+        _: Request<backend::FineTuneStopRequest>,
+    ) -> Result<Response<backend::Result>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    async fn list_checkpoints(
+        &self,
+        _: Request<backend::ListCheckpointsRequest>,
+    ) -> Result<Response<backend::ListCheckpointsResponse>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    async fn export_model(
+        &self,
+        _: Request<backend::ExportModelRequest>,
+    ) -> Result<Response<backend::Result>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    async fn start_quantization(
+        &self,
+        _: Request<backend::QuantizationRequest>,
+    ) -> Result<Response<backend::QuantizationJobResult>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    type QuantizationProgressStream =
+        ReceiverStream<Result<backend::QuantizationProgressUpdate, Status>>;
+
+    async fn quantization_progress(
+        &self,
+        _: Request<backend::QuantizationProgressRequest>,
+    ) -> Result<Response<Self::QuantizationProgressStream>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+
+    async fn stop_quantization(
+        &self,
+        _: Request<backend::QuantizationStopRequest>,
+    ) -> Result<Response<backend::Result>, Status> {
+        Err(Status::unimplemented("Not supported"))
+    }
+}
diff --git a/gallery/index.yaml b/gallery/index.yaml
index 5458c3ecfada..29ac445a119c 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -2433,6 +2433,120 @@
       - lang_code:a
     known_usecases:
       - tts
+- name: "kokoros"
+  url: "github:mudler/LocalAI/gallery/kokoros.yaml@master"
+  size: "327MB"
+  urls:
+    - https://github.com/lucasjinreal/Kokoros
+  license: apache-2.0
+  tags:
+    - tts
+    - kokoros
+    - cpu
+    - text-to-speech
+    - rust
+  description: |
+    Kokoros is a pure Rust TTS backend using the Kokoro v1.0 ONNX model (82M parameters).
+    Fast, streaming TTS with high quality. American English with af_heart voice.
+  overrides:
+    backend: "kokoros"
+    name: "kokoros"
+    description: "Kokoros Rust TTS - American English"
+    parameters:
+      voice: "af_heart"
+    options:
+      - lang_code:en-us
+    known_usecases:
+      - tts
+  files:
+    - filename: "kokoro-v1.0.onnx"
+      uri: "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx"
+    - filename: "voices-v1.0.bin"
+      uri: "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin"
+- name: "kokoros-ja"
+  url: "github:mudler/LocalAI/gallery/kokoros.yaml@master"
+  size: "327MB"
+  urls:
+    - https://github.com/lucasjinreal/Kokoros
+  license: apache-2.0
+  tags:
+    - tts
+    - kokoros
+    - japanese
+    - text-to-speech
+  description: |
+    Kokoros Rust TTS - Japanese. Uses the Kokoro v1.0 ONNX model with Japanese phonemization.
+  overrides:
+    backend: "kokoros"
+    name: "kokoros-ja"
+    description: "Kokoros Rust TTS - Japanese"
+    parameters:
+      voice: "jf_alpha"
+    options:
+      - lang_code:ja
+    known_usecases:
+      - tts
+  files:
+    - filename: "kokoro-v1.0.onnx"
+      uri: "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx"
+    - filename: "voices-v1.0.bin"
+      uri: "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin"
+- name: "kokoros-cmn"
+  url: "github:mudler/LocalAI/gallery/kokoros.yaml@master"
+  size: "327MB"
+  urls:
+    - https://github.com/lucasjinreal/Kokoros
+  license: apache-2.0
+  tags:
+    - tts
+    - kokoros
+    - chinese
+    - text-to-speech
+  description: |
+    Kokoros Rust TTS - Mandarin Chinese.
+  overrides:
+    backend: "kokoros"
+    name: "kokoros-cmn"
+    description: "Kokoros Rust TTS - Mandarin Chinese"
+    parameters:
+      voice: "zf_xiaobei"
+    options:
+      - lang_code:cmn
+    known_usecases:
+      - tts
+  files:
+    - filename: "kokoro-v1.0.onnx"
+      uri: "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx"
+    - filename: "voices-v1.0.bin"
+      uri: "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin"
+- name: "kokoros-de"
+  url: "github:mudler/LocalAI/gallery/kokoros.yaml@master"
+  size: "327MB"
+  urls:
+    - https://github.com/lucasjinreal/Kokoros
+  license: apache-2.0
+  tags:
+    - tts
+    - kokoros
+    - german
+    - text-to-speech
+  description: |
+    Kokoros Rust TTS - German.
+  overrides:
+    backend: "kokoros"
+    name: "kokoros-de"
+    description: "Kokoros Rust TTS - German"
+    parameters:
+      voice: "df_greta"
+    options:
+      - lang_code:de
+    known_usecases:
+      - tts
+  files:
+    - filename: "kokoro-v1.0.onnx"
+      uri: "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx"
+    - filename: "voices-v1.0.bin"
+      uri: "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin"
 - name: "kitten-tts"
   url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
   urls:
diff --git a/gallery/kokoros.yaml b/gallery/kokoros.yaml
new file mode 100644
index 000000000000..e7d701194038
--- /dev/null
+++ b/gallery/kokoros.yaml
@@ -0,0 +1,3 @@
+---
+config_file: |
+  backend: kokoros