From e92748de62c6fa51c300986c548731b3cf1764a4 Mon Sep 17 00:00:00 2001 From: Anish Goyal Date: Thu, 4 Jun 2026 22:56:01 -0400 Subject: [PATCH] feat(tts): desktop Kokoro/Piper TTS v2 engine layer + user-editable Python path Forward-ports the v2 desktop TTS engine work onto current upstream: - DesktopTTSEngineFactory: lazy createKokoroEngine() (gated on kokoroAvailable) + DesktopKokoroTTSEngineV2 wrapper delegating synthesis to KokoroTTSAdapter, with a per-(utterance,voice,speed) audio cache and bounded parallel pre-synthesis (respects the shared TTS-performance setting). - TTSController: persist + restore the selected EngineType across launches (persistedEngineType / saveEngineType). - DesktopTTSService, TTSModule / TTSEngine / TTSState / TTSNotificationUseCase, TTSV2ScreenSpec, TTSStateAdapter, TTSEngineSettingsScreen, AndroidTTSEngineFactory. - AppPreferences: kokoroAvailable, selectedTTSEngine, and a NEW user-editable kokoroPythonPath() override. - KokoroTTSEngine: honour kokoroPythonPath before auto-discovery so users with a pyenv/conda/venv/system interpreter can point Kokoro at it from Settings. Conflicts vs upstream's TTS refactors resolved (kept their debug log levels, preserved the persisted-engine-type hydration). --- .../tts_service/v2/AndroidTTSEngineFactory.kt | 6 + .../preferences/prefs/AppPreferences.kt | 26 ++ .../services/tts_service/v2/TTSController.kt | 50 +++- .../services/tts_service/v2/TTSEngine.kt | 6 + .../services/tts_service/v2/TTSModule.kt | 37 ++- .../tts_service/v2/TTSNotificationUseCase.kt | 1 + .../services/tts_service/v2/TTSState.kt | 3 +- .../services/tts_service/DesktopTTSService.kt | 91 +++++-- .../tts_service/kokoro/KokoroTTSEngine.kt | 22 +- .../tts_service/v2/DesktopTTSEngineFactory.kt | 255 +++++++++++++++++- .../presentation/core/ui/TTSV2ScreenSpec.kt | 4 +- .../ui/home/tts/v2/TTSStateAdapter.kt | 4 +- .../ui/home/tts/TTSEngineSettingsScreen.kt | 35 +++ 13 files changed, 497 insertions(+), 43 deletions(-) diff --git a/domain/src/androidMain/kotlin/ireader/domain/services/tts_service/v2/AndroidTTSEngineFactory.kt b/domain/src/androidMain/kotlin/ireader/domain/services/tts_service/v2/AndroidTTSEngineFactory.kt index 72fc6c003..736b8b1e8 100644 --- a/domain/src/androidMain/kotlin/ireader/domain/services/tts_service/v2/AndroidTTSEngineFactory.kt +++ b/domain/src/androidMain/kotlin/ireader/domain/services/tts_service/v2/AndroidTTSEngineFactory.kt @@ -45,6 +45,12 @@ actual object TTSEngineFactory : KoinComponent { null } } + + /** + * Kokoro TTS requires a Python runtime and is desktop-only. + * Android falls back to the native engine; callers should handle null. + */ + actual fun createKokoroEngine(): TTSEngine? = null /** * Get cache statistics diff --git a/domain/src/commonMain/kotlin/ireader/domain/preferences/prefs/AppPreferences.kt b/domain/src/commonMain/kotlin/ireader/domain/preferences/prefs/AppPreferences.kt index 918579a21..49fe17f9c 100644 --- a/domain/src/commonMain/kotlin/ireader/domain/preferences/prefs/AppPreferences.kt +++ b/domain/src/commonMain/kotlin/ireader/domain/preferences/prefs/AppPreferences.kt @@ -192,6 +192,32 @@ class AppPreferences( fun selectedKokoroVoice(): Preference { return preferenceStore.getString("selected_kokoro_voice", "af_bella") } + + /** + * Whether Kokoro TTS is installed and verified. Persists across launches so the user + * doesn't re-run Install every time. True after a successful install/verify; false + * only on uninstall or failed verification. + */ + fun kokoroAvailable(): Preference { + return preferenceStore.getBoolean("kokoro_available", false) + } + + /** + * User's preferred TTS engine for reading. Values: "PIPER", "KOKORO", "GRADIO", + * "SIMULATION" (matches DesktopTTSService.TTSEngine and v2 EngineType names). + */ + fun selectedTTSEngine(): Preference { + return preferenceStore.getString("selected_tts_engine", "PIPER") + } + + /** + * Optional override for the Python interpreter Kokoro TTS uses. Empty = auto-discover + * (PATH + common install locations). Set from Settings -> TTS if Python lives somewhere + * non-standard (pyenv, conda, a venv, a system package). Must be Python 3.8-3.12. + */ + fun kokoroPythonPath(): Preference { + return preferenceStore.getString("kokoro_python_path", "") + } /** * Selected AI TTS provider diff --git a/domain/src/commonMain/kotlin/ireader/domain/services/tts_service/v2/TTSController.kt b/domain/src/commonMain/kotlin/ireader/domain/services/tts_service/v2/TTSController.kt index 8778c17ec..3a2cce5e9 100644 --- a/domain/src/commonMain/kotlin/ireader/domain/services/tts_service/v2/TTSController.kt +++ b/domain/src/commonMain/kotlin/ireader/domain/services/tts_service/v2/TTSController.kt @@ -39,7 +39,16 @@ class TTSController( private val nativeEngineFactory: () -> TTSEngine, private val gradioEngineFactory: ((GradioConfig) -> TTSEngine?)? = null, initialGradioConfig: GradioConfig? = null, - private val cacheUseCase: TTSCacheUseCase? = null + private val cacheUseCase: TTSCacheUseCase? = null, + // Produces a Kokoro engine on desktop when the user has installed it; returns null + // on platforms or configurations where Kokoro isn't available. When null, selecting + // `EngineType.KOKORO` falls back to the native engine with a warning. + private val kokoroEngineFactory: (() -> TTSEngine?)? = null, + // Optional persistence hooks so the user's selected engine survives app restarts. + // When provided, the controller loads the saved engine on first `initialize()` and + // writes the new value whenever `setEngine` actually switches. + private val persistedEngineType: (() -> EngineType?)? = null, + private val saveEngineType: ((EngineType) -> Unit)? = null ) { // Mutable Gradio config that can be updated at runtime private var gradioConfig: GradioConfig? = initialGradioConfig @@ -87,8 +96,12 @@ class TTSController( // Mutex to ensure commands are processed sequentially private val commandMutex = Mutex() - // State - single source of truth - private val _state = MutableStateFlow(TTSState()) + // State - single source of truth. Seeded from the persisted engine-type pref so + // the UI immediately reflects the user's last choice instead of defaulting to + // NATIVE and requiring a play() to hydrate via initialize(). + private val _state = MutableStateFlow( + TTSState(engineType = persistedEngineType?.invoke() ?: EngineType.NATIVE) + ) val state: StateFlow = _state.asStateFlow() // Events - one-time occurrences @@ -170,7 +183,17 @@ class TTSController( private fun initialize() { Log.debug { "$TAG: initialize()" } - + + // On first init, hydrate the engineType from persisted prefs (if available). + // We do this lazily inside initialize() rather than the constructor so we don't + // risk Koin/DI not-yet-ready access from a secondary thread. + if (engine == null) { + val saved = persistedEngineType?.invoke() + if (saved != null && saved != _state.value.engineType) { + _state.update { it.copy(engineType = saved) } + } + } + if (engine == null) { val currentEngineType = _state.value.engineType engine = when (currentEngineType) { @@ -178,6 +201,14 @@ class TTSController( Log.debug { "$TAG: Creating native engine" } nativeEngineFactory() } + EngineType.KOKORO -> { + Log.warn { "$TAG: Creating Kokoro engine" } + kokoroEngineFactory?.invoke() ?: run { + Log.warn { "$TAG: Kokoro unavailable, falling back to native" } + _state.update { it.copy(engineType = EngineType.NATIVE) } + nativeEngineFactory() + } + } EngineType.GRADIO -> { Log.debug { "$TAG: Creating Gradio engine" } val config = gradioConfig @@ -800,17 +831,20 @@ class TTSController( private fun setEngine(type: EngineType) { val currentState = _state.value if (currentState.engineType == type) return - + Log.debug { "$TAG: setEngine($type) - switching from ${currentState.engineType}" } - + // Stop current engine engine?.stop() engine?.release() engine = null - + // Update state with new engine type _state.update { it.copy(engineType = type, isEngineReady = false) } - + + // Persist the selection so it survives relaunches. + saveEngineType?.invoke(type) + // Create new engine will happen on next play() call via initialize() // Or we can initialize immediately initialize() diff --git a/domain/src/commonMain/kotlin/ireader/domain/services/tts_service/v2/TTSEngine.kt b/domain/src/commonMain/kotlin/ireader/domain/services/tts_service/v2/TTSEngine.kt index 6d514b37a..6acd5df1c 100644 --- a/domain/src/commonMain/kotlin/ireader/domain/services/tts_service/v2/TTSEngine.kt +++ b/domain/src/commonMain/kotlin/ireader/domain/services/tts_service/v2/TTSEngine.kt @@ -135,6 +135,12 @@ sealed class EngineEvent { expect object TTSEngineFactory { fun createNativeEngine(): TTSEngine fun createGradioEngine(config: GradioConfig): TTSEngine? + /** + * Create a Kokoro TTS engine. Returns null if Kokoro is unavailable on this platform + * (e.g. Android) or not yet installed. The TTSController falls back to the native + * engine when this returns null. + */ + fun createKokoroEngine(): TTSEngine? } /** diff --git a/domain/src/commonMain/kotlin/ireader/domain/services/tts_service/v2/TTSModule.kt b/domain/src/commonMain/kotlin/ireader/domain/services/tts_service/v2/TTSModule.kt index e59f38d26..f35eecf63 100644 --- a/domain/src/commonMain/kotlin/ireader/domain/services/tts_service/v2/TTSModule.kt +++ b/domain/src/commonMain/kotlin/ireader/domain/services/tts_service/v2/TTSModule.kt @@ -3,6 +3,8 @@ package ireader.domain.services.tts_service.v2 import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.SupervisorJob +import kotlinx.coroutines.flow.collect +import kotlinx.coroutines.launch import org.koin.dsl.module /** @@ -49,13 +51,44 @@ val ttsV2Module = module { // Using single instead of factory to maintain state across the app // No ChapterController - TTS has its own independent state, sync happens via onPop single { - TTSController( + val appPrefs: ireader.domain.preferences.prefs.AppPreferences = get() + // Legacy DesktopTTSService writes TTSEngine.{PIPER,KOKORO,GRADIO,SIMULATION}; + // V2 speaks EngineType.{NATIVE,KOKORO,GRADIO}. This mapping bridges both surfaces + // so whichever UI the user picks from, the other reflects it on next read. + fun nameToEngineType(name: String): EngineType? = when (name) { + "NATIVE", "PIPER", "SIMULATION" -> EngineType.NATIVE + "KOKORO" -> EngineType.KOKORO + "GRADIO" -> EngineType.GRADIO + else -> null + } + + val controller = TTSController( contentLoader = get(), nativeEngineFactory = { TTSEngineFactory.createNativeEngine() }, gradioEngineFactory = { config -> TTSEngineFactory.createGradioEngine(config) }, initialGradioConfig = null, // Can be set via SetGradioConfig command - cacheUseCase = getOrNull() // Optional - for offline playback of cached audio + cacheUseCase = getOrNull(), // Optional - for offline playback of cached audio + kokoroEngineFactory = { TTSEngineFactory.createKokoroEngine() }, + persistedEngineType = { nameToEngineType(appPrefs.selectedTTSEngine().get()) }, + saveEngineType = { appPrefs.selectedTTSEngine().set(it.name) } ) + + // Reactively mirror pref changes into V2 state. The legacy TTS Engine Manager + // settings screen writes the pref via DesktopTTSService.setEngine, and the + // reader's TTSSettings drawer writes the same pref via V2's saveEngineType. + // Without this observer, the two surfaces drift: the user picks Kokoro from + // settings but the reader's engine label still says "Native TTS". + val syncScope = CoroutineScope(Dispatchers.Default + SupervisorJob()) + syncScope.launch { + appPrefs.selectedTTSEngine().changes().collect { name -> + val type = nameToEngineType(name) ?: return@collect + if (controller.state.value.engineType != type) { + controller.dispatch(ireader.domain.services.tts_service.v2.TTSCommand.SetEngine(type)) + } + } + } + + controller } // ViewModel adapter for UI layer diff --git a/domain/src/commonMain/kotlin/ireader/domain/services/tts_service/v2/TTSNotificationUseCase.kt b/domain/src/commonMain/kotlin/ireader/domain/services/tts_service/v2/TTSNotificationUseCase.kt index 435bacbcf..05c25ce95 100644 --- a/domain/src/commonMain/kotlin/ireader/domain/services/tts_service/v2/TTSNotificationUseCase.kt +++ b/domain/src/commonMain/kotlin/ireader/domain/services/tts_service/v2/TTSNotificationUseCase.kt @@ -145,6 +145,7 @@ class TTSNotificationUseCase( speed = state.speed, ttsProvider = when (state.engineType) { EngineType.NATIVE -> "Native TTS" + EngineType.KOKORO -> "Kokoro TTS" EngineType.GRADIO -> "Gradio TTS" } ) diff --git a/domain/src/commonMain/kotlin/ireader/domain/services/tts_service/v2/TTSState.kt b/domain/src/commonMain/kotlin/ireader/domain/services/tts_service/v2/TTSState.kt index 32aefbb7c..184a214ab 100644 --- a/domain/src/commonMain/kotlin/ireader/domain/services/tts_service/v2/TTSState.kt +++ b/domain/src/commonMain/kotlin/ireader/domain/services/tts_service/v2/TTSState.kt @@ -100,7 +100,8 @@ enum class PlaybackState { } enum class EngineType { - NATIVE, // Platform native TTS (Android TTS, AVSpeechSynthesizer, etc.) + NATIVE, // Platform native TTS (Android TTS on Android, Piper on desktop) + KOKORO, // Kokoro Python TTS (desktop only; falls back to native on other platforms) GRADIO // Remote Gradio-based TTS (Coqui, etc.) } diff --git a/domain/src/desktopMain/kotlin/ireader/domain/services/tts_service/DesktopTTSService.kt b/domain/src/desktopMain/kotlin/ireader/domain/services/tts_service/DesktopTTSService.kt index 7b26fdf46..057533376 100644 --- a/domain/src/desktopMain/kotlin/ireader/domain/services/tts_service/DesktopTTSService.kt +++ b/domain/src/desktopMain/kotlin/ireader/domain/services/tts_service/DesktopTTSService.kt @@ -62,9 +62,15 @@ class DesktopTTSService : KoinComponent { } } - // TTS Engine selection + // TTS Engine selection. In-memory value is loaded from preferences in initialize() + // and written back to preferences in setEngine() so the selection persists across launches. private var currentEngine: TTSEngine = TTSEngine.PIPER - var kokoroAvailable = false + + // Backed by preferences so the install state persists across app launches. + // Once Kokoro has been verified as installed, the user shouldn't have to click Install again. + var kokoroAvailable: Boolean + get() = appPrefs.kokoroAvailable().get() + set(value) { appPrefs.kokoroAvailable().set(value) } lateinit var state: DesktopTTSState private var serviceJob: Job? = null @@ -114,6 +120,10 @@ class DesktopTTSService : KoinComponent { fun initialize() { state = DesktopTTSState() readPrefs() + + // Load persisted engine selection so the user's choice survives a relaunch. + currentEngine = runCatching { TTSEngine.valueOf(appPrefs.selectedTTSEngine().get()) } + .getOrDefault(TTSEngine.PIPER) // Record session start for analytics usageAnalytics.recordSessionStart() @@ -143,30 +153,49 @@ class DesktopTTSService : KoinComponent { // Check if Kokoro is already installed (don't auto-install) try { val kokoroDir = java.io.File(ireader.core.storage.AppDir, "kokoro/kokoro-tts") - - // Only initialize if FULLY installed (repo exists AND dependencies installed) - if (kokoroDir.exists() && kokoroDir.listFiles()?.isNotEmpty() == true) { - Log.info { "Found Kokoro repository, checking if fully installed..." } - - // Check if dependencies are installed without triggering installation - // Try to run kokoro --help to verify it's working - val pythonCheck = ProcessBuilder( - "python", "-m", "kokoro", "--help" - ).directory(kokoroDir) - .redirectErrorStream(true) - .start() - - val checkCompleted = pythonCheck.waitFor(5, java.util.concurrent.TimeUnit.SECONDS) - val output = pythonCheck.inputStream.bufferedReader().readText() - - if (checkCompleted && pythonCheck.exitValue() == 0 && output.contains("--voice")) { - kokoroAvailable = true - Log.info { "Kokoro TTS available (fully installed)" } - } else { - Log.info { "Kokoro repository found but not fully installed (user can complete installation from TTS Manager)" } + val persistedAvailable = kokoroAvailable + val repoPresent = kokoroDir.exists() && kokoroDir.listFiles()?.isNotEmpty() == true + + when { + // Pref says it's installed and the repo still exists on disk: trust the pref. + // Skip the expensive 5-second python subprocess check at startup so the + // user isn't forced to click Install again after every launch. + persistedAvailable && repoPresent -> { + Log.info { "Kokoro TTS available (persisted from previous install)" } + } + // Pref said installed but the repo is gone — user nuked ~/.cache. Reset flag. + persistedAvailable && !repoPresent -> { + Log.warn { "Kokoro marked installed but repository missing — resetting flag" } + kokoroAvailable = false + } + // No prior install recorded: do the classic verify-on-launch dance. + repoPresent -> { + Log.info { "Found Kokoro repository, checking if fully installed..." } + + // Try to run kokoro --help to verify it's working. Use `python3` so + // the check honours the same PATH shim mechanism Kokoro's own + // findPythonExecutable() uses (it prefers `python3` over `python`). + // On Arch, /usr/bin/python is the system 3.13 interpreter, and going + // through `python` would bypass any venv shim the user has set up. + val pythonCheck = ProcessBuilder( + "python3", "-m", "kokoro", "--help" + ).directory(kokoroDir) + .redirectErrorStream(true) + .start() + + val checkCompleted = pythonCheck.waitFor(5, java.util.concurrent.TimeUnit.SECONDS) + val output = pythonCheck.inputStream.bufferedReader().readText() + + if (checkCompleted && pythonCheck.exitValue() == 0 && output.contains("--voice")) { + kokoroAvailable = true + Log.info { "Kokoro TTS available (verified at startup, flag persisted)" } + } else { + Log.info { "Kokoro repository found but not fully installed (user can complete installation from TTS Manager)" } + } + } + else -> { + Log.info { "Kokoro not installed (user must install from TTS Manager)" } } - } else { - Log.info { "Kokoro not installed (user must install from TTS Manager)" } } } catch (e: Exception) { Log.debug { "Kokoro check: ${e.message}" } @@ -1944,6 +1973,7 @@ class DesktopTTSService : KoinComponent { * Set TTS engine */ fun setEngine(engine: TTSEngine) { + val previous = currentEngine when (engine) { TTSEngine.PIPER -> { if (synthesizer.isInitialized()) { @@ -1969,7 +1999,7 @@ class DesktopTTSService : KoinComponent { Log.info { "Gradio not available, trying to configure from preferences..." } configureGradioFromPreferences() } - + if (gradioAvailable && gradioPlayer != null) { currentEngine = TTSEngine.GRADIO isSimulationMode = false @@ -1984,6 +2014,11 @@ class DesktopTTSService : KoinComponent { Log.info { "Switched to Simulation mode" } } } + // Persist the selection only if the switch actually took effect (i.e. the engine was + // available). Refusing an unavailable engine leaves the previous selection untouched. + if (currentEngine != previous) { + appPrefs.selectedTTSEngine().set(currentEngine.name) + } } /** @@ -2088,9 +2123,9 @@ class DesktopTTSService : KoinComponent { if (kokoroDir.exists() && kokoroDir.listFiles()?.isNotEmpty() == true) { Log.info { "Found Kokoro repository, checking if fully installed..." } - // Check if dependencies are installed + // Check if dependencies are installed (use python3 to honour PATH shim) val pythonCheck = ProcessBuilder( - "python", "-m", "kokoro", "--help" + "python3", "-m", "kokoro", "--help" ).directory(kokoroDir) .redirectErrorStream(true) .start() diff --git a/domain/src/desktopMain/kotlin/ireader/domain/services/tts_service/kokoro/KokoroTTSEngine.kt b/domain/src/desktopMain/kotlin/ireader/domain/services/tts_service/kokoro/KokoroTTSEngine.kt index 1ae983de2..9904b6749 100644 --- a/domain/src/desktopMain/kotlin/ireader/domain/services/tts_service/kokoro/KokoroTTSEngine.kt +++ b/domain/src/desktopMain/kotlin/ireader/domain/services/tts_service/kokoro/KokoroTTSEngine.kt @@ -25,7 +25,12 @@ import java.util.concurrent.TimeUnit */ class KokoroTTSEngine( private val appDataDir: File = AppDir, - private var maxConcurrentProcesses: Int = 2 + private var maxConcurrentProcesses: Int = 2, + /** + * Optional user override for the Python interpreter. Null/blank = auto-discover. + * Sourced from AppPreferences.kokoroPythonPath() at engine-creation time. + */ + private val pythonPathOverride: String? = null, ) { private val kokoroDir = File(appDataDir, "kokoro").apply { mkdirs() } private val modelsDir = File(kokoroDir, "models").apply { mkdirs() } @@ -479,6 +484,21 @@ class KokoroTTSEngine( * Kokoro requires Python 3.8 to 3.12 (not 3.13+) */ private fun findPythonExecutable(): String? { + // User-configured override (Settings -> TTS -> Kokoro Python path). When set and + // runnable, it wins over auto-discovery so users whose interpreter lives somewhere + // non-standard (pyenv, conda, a venv, a system package) can point Kokoro at it. + pythonPathOverride?.takeIf { it.isNotBlank() }?.let { override -> + try { + val p = ProcessBuilder(override, "--version").redirectErrorStream(true).start() + if (p.waitFor(5, TimeUnit.SECONDS) && p.exitValue() == 0) { + Log.info { "Using configured Kokoro Python: $override" } + return override + } + } catch (_: Exception) { + Log.warn { "Configured Kokoro Python '$override' not runnable; falling back to auto-discovery" } + } + } + // First, try specific Python installations in AppData val userProfile = System.getenv("USERPROFILE") ?: System.getProperty("user.home") val pythonBaseDir = File(userProfile, "AppData/Local/Programs/Python") diff --git a/domain/src/desktopMain/kotlin/ireader/domain/services/tts_service/v2/DesktopTTSEngineFactory.kt b/domain/src/desktopMain/kotlin/ireader/domain/services/tts_service/v2/DesktopTTSEngineFactory.kt index 85031d5a2..ff6e4f7c9 100644 --- a/domain/src/desktopMain/kotlin/ireader/domain/services/tts_service/v2/DesktopTTSEngineFactory.kt +++ b/domain/src/desktopMain/kotlin/ireader/domain/services/tts_service/v2/DesktopTTSEngineFactory.kt @@ -3,6 +3,9 @@ package ireader.domain.services.tts_service.v2 import ireader.core.log.Log import ireader.domain.preferences.prefs.AppPreferences import ireader.domain.services.tts_service.* +import ireader.domain.services.tts_service.kokoro.KokoroTTSAdapter +import ireader.domain.services.tts_service.kokoro.KokoroTTSEngine +import ireader.domain.services.tts_service.piper.AudioData import ireader.domain.services.tts_service.piper.PiperSpeechSynthesizer import ireader.domain.services.tts_service.piper.PiperModelManager import ireader.domain.services.tts_service.piper.AudioPlaybackEngine @@ -51,6 +54,27 @@ actual object TTSEngineFactory : KoinComponent { null } } + + /** + * Build a lazily-initialised Kokoro engine if the user has already installed Kokoro + * (tracked via `appPreferences.kokoroAvailable()`). Returns null otherwise so the + * controller falls back to the native engine. The concurrent-process count is read + * from `appPreferences.maxConcurrentTTSProcesses()` on each engine creation, matching + * the Piper/Gradio engines' behaviour of respecting the shared TTS-performance setting. + */ + actual fun createKokoroEngine(): TTSEngine? { + if (!appPreferences.kokoroAvailable().get()) return null + val maxProcesses = appPreferences.maxConcurrentTTSProcesses().get() + val kokoroEngine = KokoroTTSEngine( + maxConcurrentProcesses = maxProcesses, + // User-editable override (Settings -> TTS -> Kokoro Python path); blank = auto-discover. + pythonPathOverride = appPreferences.kokoroPythonPath().get().takeIf { it.isNotBlank() }, + ) + val adapter = KokoroTTSAdapter(kokoroEngine, appPreferences).also { + it.loadVoiceFromPreferences() + } + return DesktopKokoroTTSEngineV2(adapter, appPreferences) + } /** * Get cache statistics @@ -508,9 +532,238 @@ class DesktopGradioTTSEngineV2( */ override suspend fun getCachedIndices(texts: List): Set { if (audioCache == null) return emptySet() - + return texts.mapIndexedNotNull { index, text -> if (audioCache.isCached(text, config.id)) index else null }.toSet() } } + +/** + * Desktop Kokoro TTS Engine wrapper for v2 architecture. + * + * Delegates synthesis to [KokoroTTSAdapter] (which spawns Python subprocesses, + * respecting `maxConcurrentTTSProcesses`) and plays the returned audio through + * [AudioPlaybackEngine]. Voice selection is read from preferences on every + * speak() call so UI changes via VoiceSelectionDialog take effect immediately. + * + * Non-blocking: speak() kicks off synthesis+playback in a background coroutine + * and returns as soon as the utterance is queued. Stop/pause are supported via + * job cancellation and AudioPlaybackEngine's own pause/resume. + */ +class DesktopKokoroTTSEngineV2( + private val adapter: KokoroTTSAdapter, + private val appPreferences: AppPreferences +) : TTSEngine { + companion object { + private const val TAG = "DesktopKokoroTTSV2" + private const val PRECACHE_MAX_ENTRIES = 16 + } + + private val _events = MutableSharedFlow(extraBufferCapacity = 10) + private val audioEngine: AudioPlaybackEngine by lazy { AudioPlaybackEngine() } + private val scope = CoroutineScope(Dispatchers.IO + SupervisorJob()) + + private var playbackJob: kotlinx.coroutines.Job? = null + private var speed: Float = 1.0f + private var initialized = false + + // In-memory cache of pre-synthesised audio, keyed by (utteranceId, voice, speed). + // The paragraph-advance path in TTSController precaches up to three upcoming items + // via `precacheNext`; this cache is what makes those hits pay off. Entries persist + // until speed or voice changes (which invalidates them since the bytes no longer + // match) or the cache grows past PRECACHE_MAX_ENTRIES (LRU via LinkedHashMap). + private data class CacheKey(val utteranceId: String, val voice: String, val speed: Float) + private val audioCache = object : LinkedHashMap(PRECACHE_MAX_ENTRIES, 0.75f, true) { + override fun removeEldestEntry(eldest: Map.Entry?): Boolean = + size > PRECACHE_MAX_ENTRIES + } + private val cacheLock = Any() + + // Track in-flight precache jobs so we can cancel on stop/release and skip duplicates. + private val inflightPrecache = java.util.concurrent.ConcurrentHashMap() + + @Volatile + private var currentUtteranceId: String? = null + + @Volatile + private var wasStopped = false + + override val events: Flow = _events + override val name: String = "Kokoro TTS" + + init { + // Kick off a background initialise so isReady() reflects reality after a short delay. + // The adapter's initialize() is idempotent; repeated calls are cheap after the first. + scope.launch { + val result = runCatching { adapter.initialize() } + if (result.isSuccess && result.getOrNull()?.isSuccess == true) { + initialized = true + _events.tryEmit(EngineEvent.Ready) + } else { + Log.warn { "$TAG: initialize failed: ${result.exceptionOrNull()?.message ?: "unknown"}" } + } + } + } + + private fun currentVoice(): String = appPreferences.selectedKokoroVoice().get() + + private fun cacheGet(utteranceId: String): AudioData? { + val key = CacheKey(utteranceId, currentVoice(), speed) + return synchronized(cacheLock) { audioCache[key] } + } + + private fun cachePut(utteranceId: String, data: AudioData) { + val key = CacheKey(utteranceId, currentVoice(), speed) + synchronized(cacheLock) { audioCache[key] = data } + } + + private fun ensureInitialized(): Boolean { + if (initialized) return true + val result = runCatching { kotlinx.coroutines.runBlocking { adapter.initialize() } } + initialized = result.isSuccess && result.getOrNull()?.isSuccess == true + return initialized + } + + override suspend fun speak(text: String, utteranceId: String) { + Log.warn { "$TAG: speak($utteranceId) - text length=${text.length}" } + + // Cancel any existing playback — only one utterance plays at a time. + playbackJob?.cancel() + audioEngine.stop() + + currentUtteranceId = utteranceId + wasStopped = false + + if (!initialized) { + val result = runCatching { adapter.initialize() } + initialized = result.isSuccess && result.getOrNull()?.isSuccess == true + if (!initialized) { + _events.tryEmit(EngineEvent.Error(utteranceId, "Kokoro not initialised")) + return + } + } + + val voice = currentVoice() + playbackJob = scope.launch { + try { + _events.tryEmit(EngineEvent.Started(utteranceId)) + + // Fast path: if the paragraph was precached, play without re-synthesising. + val cached = cacheGet(utteranceId) + val audioData = cached ?: run { + val r = adapter.synthesize(text, voice, speed) + val data = r.getOrElse { error -> + if (currentUtteranceId == utteranceId && !wasStopped) { + _events.tryEmit(EngineEvent.Error(utteranceId, error.message ?: "Kokoro synthesis failed")) + } + return@launch + } + cachePut(utteranceId, data) + data + } + + if (wasStopped || currentUtteranceId != utteranceId) return@launch + audioEngine.play(audioData) + if (currentUtteranceId == utteranceId && !wasStopped) { + _events.tryEmit(EngineEvent.Completed(utteranceId)) + } + } catch (_: kotlinx.coroutines.CancellationException) { + // no-op: cancellation is expected when the next utterance pre-empts us + } catch (e: Exception) { + if (currentUtteranceId == utteranceId && !wasStopped) { + _events.tryEmit(EngineEvent.Error(utteranceId, e.message ?: "Kokoro TTS error")) + } + } + } + } + + /** + * Pre-synthesise upcoming paragraphs in parallel. The underlying [KokoroTTSEngine] holds + * a semaphore keyed on `maxConcurrentProcesses` (the user's TTS Performance setting), so + * launching N coroutines here only spawns up to that many Python subprocesses at once — + * the rest queue up inside the engine. The net effect: the TTS pool is actually used, + * and by the time [speak] fires for the next paragraph its audio is already sitting in + * the in-memory cache. No more dead air between paragraphs. + */ + override fun precacheNext(items: List>) { + if (items.isEmpty()) return + if (!ensureInitialized()) return + val voice = currentVoice() + val currentSpeed = speed + for ((utteranceId, text) in items) { + if (text.isBlank()) continue + // Skip if already cached or another precache job is already running for this id. + val key = CacheKey(utteranceId, voice, currentSpeed) + val alreadyCached = synchronized(cacheLock) { audioCache[key] != null } + if (alreadyCached) continue + if (inflightPrecache.containsKey(utteranceId)) continue + + val job = scope.launch { + try { + val r = adapter.synthesize(text, voice, currentSpeed) + r.onSuccess { data -> + // Voice/speed may have changed while we were synthesising; re-check key. + cachePut(utteranceId, data) + }.onFailure { error -> + Log.warn { "$TAG: precache $utteranceId failed: ${error.message}" } + } + } catch (_: kotlinx.coroutines.CancellationException) { + // expected on stop/release + } finally { + inflightPrecache.remove(utteranceId) + } + } + inflightPrecache[utteranceId] = job + } + } + + override suspend fun isTextCached(text: String): Boolean { + // We cache by utteranceId, not by text. Return false so callers don't make + // assumptions; the real cache hit check is inside speak(). + return false + } + + override fun clearState() { + // Called on chapter change. Cancel any precache jobs for the old chapter and + // drop the cache — paragraph utteranceIds like "p_3" collide across chapters. + inflightPrecache.values.forEach { it.cancel() } + inflightPrecache.clear() + synchronized(cacheLock) { audioCache.clear() } + } + + override fun stop() { + wasStopped = true + currentUtteranceId = null + playbackJob?.cancel() + playbackJob = null + audioEngine.stop() + } + + override fun pause() { audioEngine.pause() } + override fun resume() { audioEngine.resume() } + + override fun setSpeed(speed: Float) { + val clamped = speed.coerceIn(0.5f, 2.0f) + if (clamped != this.speed) { + // Cached bytes are at the old speed. Invalidate. + synchronized(cacheLock) { audioCache.clear() } + } + this.speed = clamped + } + + override fun setPitch(pitch: Float) { + // Kokoro exposes no pitch knob — silently ignore so the UI slider still works. + } + + override fun isReady(): Boolean = initialized && adapter.isAvailable() + + override fun release() { + stop() + inflightPrecache.values.forEach { it.cancel() } + inflightPrecache.clear() + synchronized(cacheLock) { audioCache.clear() } + runCatching { adapter.shutdown() } + scope.cancel() + } +} diff --git a/presentation/src/commonMain/kotlin/ireader/presentation/core/ui/TTSV2ScreenSpec.kt b/presentation/src/commonMain/kotlin/ireader/presentation/core/ui/TTSV2ScreenSpec.kt index e95b253bd..32d6925a8 100644 --- a/presentation/src/commonMain/kotlin/ireader/presentation/core/ui/TTSV2ScreenSpec.kt +++ b/presentation/src/commonMain/kotlin/ireader/presentation/core/ui/TTSV2ScreenSpec.kt @@ -912,9 +912,10 @@ class TTSV2ScreenSpec( selectedVoiceModel = selectedPiperModel.takeIf { it.isNotEmpty() }, currentEngine = when (state.engineType) { EngineType.NATIVE -> "Native TTS" + EngineType.KOKORO -> "Kokoro TTS" EngineType.GRADIO -> "Gradio TTS" }, - availableEngines = listOf("Native TTS", "Gradio TTS"), + availableEngines = listOf("Native TTS", "Kokoro TTS", "Gradio TTS"), isTTSReady = state.isEngineReady, paragraphStartTime = paragraphStartTime, sentenceHighlightEnabled = sentenceHighlightEnabled, @@ -1008,6 +1009,7 @@ class TTSV2ScreenSpec( useGradioTTS = useGradioTTS, currentEngineName = when (state.engineType) { EngineType.NATIVE -> "Native TTS" + EngineType.KOKORO -> "Kokoro TTS" EngineType.GRADIO -> "Gradio TTS" }, readTranslatedText = readTranslatedText, diff --git a/presentation/src/commonMain/kotlin/ireader/presentation/ui/home/tts/v2/TTSStateAdapter.kt b/presentation/src/commonMain/kotlin/ireader/presentation/ui/home/tts/v2/TTSStateAdapter.kt index b9c62f306..806804d64 100644 --- a/presentation/src/commonMain/kotlin/ireader/presentation/ui/home/tts/v2/TTSStateAdapter.kt +++ b/presentation/src/commonMain/kotlin/ireader/presentation/ui/home/tts/v2/TTSStateAdapter.kt @@ -104,9 +104,10 @@ fun rememberTTSV2StateAdapter( sleepModeEnabled = sleepTimerState?.isEnabled == true, currentEngine = when (engineType) { EngineType.NATIVE -> "Native TTS" + EngineType.KOKORO -> "Kokoro TTS" EngineType.GRADIO -> "Gradio TTS" }, - availableEngines = listOf("Native TTS", "Gradio TTS"), + availableEngines = listOf("Native TTS", "Kokoro TTS", "Gradio TTS"), isTTSReady = isEngineReady, // Sentence highlighting paragraphStartTime = paragraphStartTime, @@ -166,6 +167,7 @@ fun rememberTTSV2Actions( override fun onSelectEngine(engine: String) { when (engine) { "Native TTS" -> adapter.useNativeTTS() + "Kokoro TTS" -> adapter.setEngine(EngineType.KOKORO) "Gradio TTS" -> adapter.setEngine(EngineType.GRADIO) } } diff --git a/presentation/src/desktopMain/kotlin/ireader/presentation/ui/home/tts/TTSEngineSettingsScreen.kt b/presentation/src/desktopMain/kotlin/ireader/presentation/ui/home/tts/TTSEngineSettingsScreen.kt index 52c284ce2..cccc75479 100644 --- a/presentation/src/desktopMain/kotlin/ireader/presentation/ui/home/tts/TTSEngineSettingsScreen.kt +++ b/presentation/src/desktopMain/kotlin/ireader/presentation/ui/home/tts/TTSEngineSettingsScreen.kt @@ -207,6 +207,30 @@ actual fun TTSEngineSettingsScreen( } ) + // Kokoro Python interpreter override (editable; blank = auto-discover). + // Lets users with a non-standard Python (pyenv/conda/venv/system) point + // Kokoro at it without code changes. Read back in createKokoroEngine(). + val kokoroPrefs = koinInject() + var kokoroPythonPath by remember { mutableStateOf(kokoroPrefs.kokoroPythonPath().get()) } + OutlinedTextField( + value = kokoroPythonPath, + onValueChange = { + kokoroPythonPath = it + kokoroPrefs.kokoroPythonPath().set(it.trim()) + }, + label = { Text("Kokoro Python path (optional)") }, + placeholder = { Text("Leave blank to auto-detect (Python 3.8-3.12)") }, + singleLine = true, + modifier = Modifier.fillMaxWidth().padding(top = 8.dp) + ) + Text( + text = "Point Kokoro at a specific Python interpreter (pyenv, conda, a venv, or a " + + "system install). Takes effect the next time the Kokoro engine is created.", + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.onSurfaceVariant, + modifier = Modifier.padding(top = 4.dp, bottom = 4.dp) + ) + // Gradio TTS (Online - Generic) var gradioAvailable by remember { mutableStateOf(ttsService.gradioAvailable) } var showGradioConfig by remember { mutableStateOf(false) } @@ -497,6 +521,17 @@ actual fun TTSVoiceSelectionScreen( onDismiss: () -> Unit ) { val ttsService: DesktopTTSService = koinInject() + // When the active engine is Kokoro, delegate to the Kokoro-aware + // VoiceSelectionDialog — the Piper content below has no concept of Kokoro voices. + // We branch once at entry rather than interleaving inside the Dialog so the Piper + // and Kokoro flows stay clearly separated. + if (ttsService.getCurrentEngine() == DesktopTTSService.TTSEngine.KOKORO) { + ireader.presentation.ui.reader.components.VoiceSelectionDialog( + ttsService = ttsService, + onDismiss = onDismiss + ) + return + } val voiceService: ireader.domain.services.tts_service.PiperVoiceService = koinInject() val voiceDownloader: PiperVoiceDownloader = koinInject() val appPrefs: ireader.domain.preferences.prefs.AppPreferences = koinInject()