mudler · richiejp · Apr 3, 2026 · mudler · Apr 3, 2026
diff --git a/.gitmodules b/.gitmodules
@@ -1,3 +1,6 @@
 [submodule "docs/themes/hugo-theme-relearn"]
 	path = docs/themes/hugo-theme-relearn
 	url = https://github.com/McShelby/hugo-theme-relearn.git
+[submodule "backend/rust/kokoros/sources/Kokoros"]
+	path = backend/rust/kokoros/sources/Kokoros
+	url = https://github.com/lucasjinreal/Kokoros
diff --git a/Makefile b/Makefile
@@ -1,5 +1,5 @@
 # Disable parallel execution for backend builds
-.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization
+.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros
 
 GOCMD=go
 GOTEST=$(GOCMD) test
@@ -587,6 +587,9 @@ BACKEND_MLX_DISTRIBUTED = mlx-distributed|python|./|false|true
 BACKEND_TRL = trl|python|.|false|true
 BACKEND_LLAMA_CPP_QUANTIZATION = llama-cpp-quantization|python|.|false|true
 
+# Rust backends
+BACKEND_KOKOROS = kokoros|rust|.|false|true
+
 # Helper function to build docker image for a backend
 # Usage: $(call docker-build-backend,BACKEND_NAME,DOCKERFILE_TYPE,BUILD_CONTEXT,PROGRESS_FLAG,NEEDS_BACKEND_ARG)
 define docker-build-backend
@@ -645,12 +648,13 @@ $(eval $(call generate-docker-build-target,$(BACKEND_ACESTEP_CPP)))
 $(eval $(call generate-docker-build-target,$(BACKEND_MLX_DISTRIBUTED)))
 $(eval $(call generate-docker-build-target,$(BACKEND_TRL)))
 $(eval $(call generate-docker-build-target,$(BACKEND_LLAMA_CPP_QUANTIZATION)))
+$(eval $(call generate-docker-build-target,$(BACKEND_KOKOROS)))
 
 # Pattern rule for docker-save targets
 docker-save-%: backend-images
 	docker save local-ai-backend:$* -o backend-images/$*.tar
 
-docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-acestep-cpp docker-build-voxtral docker-build-mlx-distributed docker-build-trl docker-build-llama-cpp-quantization
+docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-acestep-cpp docker-build-voxtral docker-build-mlx-distributed docker-build-trl docker-build-llama-cpp-quantization docker-build-kokoros
 
 ########################################################
 ### Mock Backend for E2E Tests

diff --git a/backend/Dockerfile.rust b/backend/Dockerfile.rust
@@ -0,0 +1,80 @@
+ARG BASE_IMAGE=ubuntu:24.04
+
+FROM ${BASE_IMAGE} AS builder
+ARG BACKEND=kokoros
+ARG BUILD_TYPE
+ENV BUILD_TYPE=${BUILD_TYPE}
+ARG CUDA_MAJOR_VERSION
+ARG CUDA_MINOR_VERSION
+ARG SKIP_DRIVERS=false
+ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION}
+ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
+ENV DEBIAN_FRONTEND=noninteractive
+ARG TARGETARCH
+ARG TARGETVARIANT
+ARG UBUNTU_VERSION=2404
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        build-essential \
+        git ccache \
+        ca-certificates \
+        make cmake wget \
+        curl unzip \
+        clang \
+        pkg-config \
+        libssl-dev \
+        espeak-ng libespeak-ng-dev \
+        libsonic-dev libpcaudio-dev \
+        libopus-dev \
+        protobuf-compiler && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# Cuda
+ENV PATH=/usr/local/cuda/bin:${PATH}
+
+# CuBLAS requirements
+RUN <<EOT bash
+    if ( [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "l4t" ] ) && [ "${SKIP_DRIVERS}" = "false" ]; then
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+            software-properties-common pciutils
+        if [ "amd64" = "$TARGETARCH" ]; then
+            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/cuda-keyring_1.1-1_all.deb
+        fi
+        if [ "arm64" = "$TARGETARCH" ]; then
+            if [ "${CUDA_MAJOR_VERSION}" = "13" ]; then
+                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/sbsa/cuda-keyring_1.1-1_all.deb
+            else
+                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/arm64/cuda-keyring_1.1-1_all.deb
+            fi
+        fi
+        dpkg -i cuda-keyring_1.1-1_all.deb && \
+        rm -f cuda-keyring_1.1-1_all.deb && \
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+            cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+            libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
+        apt-get clean && rm -rf /var/lib/apt/lists/*
+    fi
+EOT
+
+# Install Rust
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+ENV PATH="/root/.cargo/bin:${PATH}"
+
+COPY . /LocalAI
+
+RUN git config --global --add safe.directory /LocalAI
+
+RUN make -C /LocalAI/backend/rust/${BACKEND} build
+
+FROM scratch
+ARG BACKEND=kokoros
+
+COPY --from=builder /LocalAI/backend/rust/${BACKEND}/package/. ./
diff --git a/backend/index.yaml b/backend/index.yaml
@@ -118,7 +118,7 @@
  capabilities:
    nvidia: "cuda12-rfdetr"
    intel: "intel-rfdetr"
    #amd: "rocm-rfdetr"
    nvidia-l4t: "nvidia-l4t-arm64-rfdetr"
    metal: "metal-rfdetr"
    default: "cpu-rfdetr"
@@ -468,6 +468,28 @@
     nvidia-cuda-13: "cuda13-kokoro"
     nvidia-cuda-12: "cuda12-kokoro"
     nvidia-l4t-cuda-12: "nvidia-l4t-arm64-kokoro"
+- &kokoros
+  icon: https://avatars.githubusercontent.com/u/166769057?v=4
+  description: |
+    Kokoros is a pure Rust TTS backend using the Kokoro ONNX model (82M parameters).
+    It provides fast, high-quality text-to-speech with streaming support, built on
+    ONNX Runtime for efficient CPU inference. Supports English, Japanese, Mandarin
+    Chinese, and German.
+  urls:
+    - https://huggingface.co/hexgrad/Kokoro-82M
+    - https://github.com/lucasjinreal/Kokoros
+  tags:
+    - text-to-speech
+    - TTS
+    - Rust
+    - ONNX
+  license: apache-2.0
+  alias: "kokoros"
+  name: "kokoros"
+  capabilities:
+    default: "cpu-kokoros"
+    nvidia: "cuda12-kokoros"
+    nvidia-cuda-12: "cuda12-kokoros"
 - &coqui
   urls:
     - https://github.com/idiap/coqui-ai-TTS
@@ -1522,7 +1544,7 @@
  capabilities:
    nvidia: "cuda12-rfdetr-development"
    intel: "intel-rfdetr-development"
    #amd: "rocm-rfdetr-development"
    nvidia-l4t: "nvidia-l4t-arm64-rfdetr-development"
    metal: "metal-rfdetr-development"
    default: "cpu-rfdetr-development"
@@ -2042,6 +2064,32 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-kokoro"
   mirrors:
     - localai/localai-backends:master-metal-darwin-arm64-kokoro
+## kokoros (Rust)
+- !!merge <<: *kokoros
+  name: "kokoros-development"
+  capabilities:
+    default: "cpu-kokoros-development"
+    nvidia: "cuda12-kokoros-development"
+- !!merge <<: *kokoros
+  name: "cpu-kokoros"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-kokoros"
+  mirrors:
+    - localai/localai-backends:latest-cpu-kokoros
+- !!merge <<: *kokoros
+  name: "cpu-kokoros-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-kokoros"
+  mirrors:
+    - localai/localai-backends:master-cpu-kokoros
+- !!merge <<: *kokoros
+  name: "cuda12-kokoros"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-kokoros"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-cuda-12-kokoros
+- !!merge <<: *kokoros
+  name: "cuda12-kokoros-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-kokoros"
+  mirrors:
+    - localai/localai-backends:master-gpu-nvidia-cuda-12-kokoros
 ## faster-whisper
 - !!merge <<: *faster-whisper
   name: "faster-whisper-development"

diff --git a/backend/rust/kokoros/Cargo.toml b/backend/rust/kokoros/Cargo.toml
@@ -0,0 +1,27 @@
+[package]
+name = "kokoros-grpc"
+version = "0.1.0"
+edition = "2021"
+
+[[bin]]
+name = "kokoros-grpc"
+path = "src/main.rs"
+
+[dependencies]
+kokoros = { path = "sources/Kokoros/kokoros" }
+
+tonic = "0.13"
+prost = "0.13"
+tokio = { version = "1", features = ["full"] }
+tokio-stream = "0.1"
+clap = { version = "4", features = ["derive"] }
+tracing = "0.1"
+tracing-subscriber = { version = "0.3", features = ["env-filter"] }
+
+[build-dependencies]
+tonic-build = "0.13"
+
+[features]
+default = ["cpu"]
+cpu = ["kokoros/cpu"]
+cuda = ["kokoros/cuda"]
diff --git a/backend/rust/kokoros/Makefile b/backend/rust/kokoros/Makefile
@@ -0,0 +1,27 @@
+CURRENT_DIR=$(abspath ./)
+
+# Enable CUDA feature when building with cublas/l4t
+CARGO_FEATURES :=
+ifneq (,$(filter cublas l4t,$(BUILD_TYPE)))
+CARGO_FEATURES := --features cuda --no-default-features
+endif
+
+.PHONY: kokoros-grpc
+kokoros-grpc:
+	mkdir -p $(CURRENT_DIR)/proto
+	cp $(CURRENT_DIR)/../../backend.proto $(CURRENT_DIR)/proto/backend.proto
+	cd $(CURRENT_DIR) && \
+		BACKEND_PROTO_PATH=$(CURRENT_DIR)/proto/backend.proto \
+		cargo build --release $(CARGO_FEATURES)
+
+.PHONY: package
+package:
+	bash package.sh
+
+.PHONY: build
+build: kokoros-grpc package
+
+.PHONY: clean
+clean:
+	cargo clean
+	rm -rf package proto
diff --git a/backend/rust/kokoros/build.rs b/backend/rust/kokoros/build.rs
@@ -0,0 +1,15 @@
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let proto_path = std::env::var("BACKEND_PROTO_PATH")
+        .unwrap_or_else(|_| "proto/backend.proto".to_string());
+
+    let proto_dir = std::path::Path::new(&proto_path)
+        .parent()
+        .unwrap_or(std::path::Path::new("."));
+
+    tonic_build::configure()
+        .build_server(true)
+        .build_client(false)
+        .compile_protos(&[&proto_path], &[proto_dir])?;
+
+    Ok(())
+}
diff --git a/backend/rust/kokoros/package.sh b/backend/rust/kokoros/package.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+set -e
+
+CURDIR=$(dirname "$(realpath $0)")
+mkdir -p $CURDIR/package/lib
+
+# Copy the binary
+cp -avf $CURDIR/target/release/kokoros-grpc $CURDIR/package/
+
+# Copy the run script
+cp -rfv $CURDIR/run.sh $CURDIR/package/
+chmod +x $CURDIR/package/run.sh
+
+# Copy ONNX Runtime shared libraries from ort build artifacts
+ORT_LIB_DIR=$(find $CURDIR/target/release/build -name "libonnxruntime*.so*" -path "*/ort-sys-*/out/*" -exec dirname {} \; 2>/dev/null | head -1)
+if [ -n "$ORT_LIB_DIR" ]; then
+    cp -avfL $ORT_LIB_DIR/libonnxruntime*.so* $CURDIR/package/lib/ 2>/dev/null || true
+fi
+
+# Copy espeak-ng data
+if [ -d "/usr/share/espeak-ng-data" ]; then
+    cp -rf /usr/share/espeak-ng-data $CURDIR/package/
+elif [ -d "/usr/lib/x86_64-linux-gnu/espeak-ng-data" ]; then
+    cp -rf /usr/lib/x86_64-linux-gnu/espeak-ng-data $CURDIR/package/
+fi
+
+# Copy ALL dynamic library dependencies of the binary
+echo "Bundling dynamic library dependencies..."
+ldd $CURDIR/target/release/kokoros-grpc | grep "=>" | awk '{print $3}' | while read lib; do
+    if [ -n "$lib" ] && [ -f "$lib" ]; then
+        cp -avfL "$lib" $CURDIR/package/lib/ 2>/dev/null || true
+    fi
+done
+
+# Copy CA certificates for HTTPS (needed for model auto-download)
+if [ -d "/etc/ssl/certs" ]; then
+    mkdir -p $CURDIR/package/etc/ssl
+    cp -rf /etc/ssl/certs $CURDIR/package/etc/ssl/
+fi
+
+# Copy the dynamic linker
+if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
+    cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
+elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
+    cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
+fi
+
+echo "Packaging completed successfully"
+ls -liah $CURDIR/package/
+ls -liah $CURDIR/package/lib/
diff --git a/backend/rust/kokoros/run.sh b/backend/rust/kokoros/run.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+set -ex
+
+CURDIR=$(dirname "$(realpath $0)")
+
+export LD_LIBRARY_PATH=$CURDIR/lib:${LD_LIBRARY_PATH:-}
+
+# SSL certificates for model auto-download
+if [ -d "$CURDIR/etc/ssl/certs" ]; then
+    export SSL_CERT_DIR=$CURDIR/etc/ssl/certs
+fi
+
+# espeak-ng data directory
+if [ -d "$CURDIR/espeak-ng-data" ]; then
+    export ESPEAK_NG_DATA=$CURDIR/espeak-ng-data
+fi
+
+# Use bundled ld.so if present (portability)
+if [ -f $CURDIR/lib/ld.so ]; then
+    exec $CURDIR/lib/ld.so $CURDIR/kokoros-grpc "$@"
+fi
+
+exec $CURDIR/kokoros-grpc "$@"
diff --git a/backend/rust/kokoros/sources/Kokoros b/backend/rust/kokoros/sources/Kokoros
diff --git a/backend/rust/kokoros/src/auth.rs b/backend/rust/kokoros/src/auth.rs
@@ -0,0 +1,26 @@
+use tonic::{Request, Status};
+
+/// Returns an interceptor function if LOCALAI_GRPC_AUTH_TOKEN is set.
+pub fn make_auth_interceptor(
+) -> Option<impl Fn(Request<()>) -> Result<Request<()>, Status> + Clone> {
+    let token = std::env::var("LOCALAI_GRPC_AUTH_TOKEN").ok()?;
+    if token.is_empty() {
+        return None;
+    }
+    let expected = format!("Bearer {}", token);
+    Some(
+        move |req: Request<()>| -> Result<Request<()>, Status> {
+            let meta = req.metadata();
+            match meta.get("authorization") {
+                Some(val) => {
+                    if val.as_bytes() == expected.as_bytes() {
+                        Ok(req)
+                    } else {
+                        Err(Status::unauthenticated("invalid token"))
+                    }
+                }
+                None => Err(Status::unauthenticated("missing authorization")),
+            }
+        },
+    )
+}