livekit · davidzhao · Apr 7, 2026 · Mar 30, 2026 · Mar 31, 2026 · Mar 31, 2026
diff --git a/livekit-plugins/livekit-plugins-mistralai/README.md b/livekit-plugins/livekit-plugins-mistralai/README.md
@@ -1,30 +1,67 @@
-# MistralAI Plugin for LiveKit Agents
+# Mistral AI Plugin for LiveKit Agents
 
-Support for MistralAI services:
-
-- **LLM** — Chat completion with Mistral models
-- **STT** — Speech-to-text with Voxtral
-- **TTS** — Text-to-speech with Voxtral (supports saved voices and zero-shot voice cloning via `ref_audio`)
-
-See [https://docs.livekit.io/agents/integrations/mistral/](https://docs.livekit.io/agents/integrations/mistral/) for more information.
+Support for Mistral AI STT, TTS, and LLM services.
 
 ## Installation
 
 ```bash
 pip install livekit-plugins-mistralai
 ```
 
+For streaming STT (Voxtral Realtime), also install `silero` plugin.
+
+```bash
+pip install livekit-plugins-silero
+```
+
 ## Pre-requisites
 
-You'll need an API key from MistralAI. It can be set as an environment variable:
+You'll need an API key from Mistral AI. It can be set as an environment variable:
 
 ```bash
 export MISTRAL_API_KEY=your_api_key_here
 ```
 
 ## Usage
 
-### TTS
+### Speech-to-Text (STT)
+
+#### Offline transcription
+
+```python
+from livekit.plugins import mistralai
+
+stt = mistralai.STT()
+
+# With context biasing
+stt = mistralai.STT(
+    model="voxtral-mini-latest",
+    context_bias=["LiveKit", "Voxtral", "Mistral"]
+)
+```
+
+#### Realtime streaming transcription
+
+Voxtral Realtime streams interim transcripts over a WebSocket connection. Since this
+model has no server-side endpointing, the plugin runs an internal Silero VAD to detect
+when the user stops speaking and flush the audio — producing final transcripts and
+driving the end-of-turn pipeline.
+
+```python
+from livekit.plugins import mistralai
+from livekit.plugins.silero import VAD
+
+# Using Silero VAD with default settings (550ms silence threshold)
+stt = mistralai.STT(model="voxtral-mini-transcribe-realtime-2602")
+
+# Using custom VAD settings (e.g. shorter silence threshold for faster responses)
+stt = mistralai.STT(
+    model="voxtral-mini-transcribe-realtime-2602",
+    vad=VAD.load(min_silence_duration=0.3),
+)
+```
+
+### Text-to-Speech (TTS)
 
 ```python
 from livekit.plugins import mistralai
@@ -37,3 +74,18 @@ import base64
 ref_audio_b64 = base64.b64encode(open("sample.mp3", "rb").read()).decode()
 tts = mistralai.TTS(ref_audio=ref_audio_b64)
 ```
+
+### LLM
+
+```python
+from livekit.plugins import mistralai
+
+llm = mistralai.LLM()
+
+# With custom temperature/max. tokens
+llm = mistralai.LLM(
+    model="mistral-large-latest",
+    temperature=0.7,
+    max_completion_tokens=150
+)
+```
diff --git a/livekit-plugins/livekit-plugins-mistralai/livekit/plugins/mistralai/llm.py b/livekit-plugins/livekit-plugins-mistralai/livekit/plugins/mistralai/llm.py
@@ -5,8 +5,6 @@
 from dataclasses import dataclass
 from typing import Any, cast
 
-import httpx
-
 from livekit.agents import APIConnectionError, APIStatusError, APITimeoutError, llm
 from livekit.agents.llm import (
     ChatChunk,
@@ -30,37 +28,51 @@
 
 from .models import ChatModels
 
+DEFAULT_MODEL: ChatModels = "ministral-8b-latest"
+
 
 @dataclass
 class _LLMOptions:
-    model: str
-    temperature: NotGivenOr[float]
-    max_completion_tokens: NotGivenOr[int]
+    model: ChatModels | str
+    max_completion_tokens: int | None
+    temperature: float | None
 
 
-# Mistral LLM Class
 class LLM(llm.LLM):
     def __init__(
         self,
-        model: str | ChatModels = "ministral-8b-latest",
-        api_key: str | None = None,
         client: Mistral | None = None,
-        temperature: NotGivenOr[float] = NOT_GIVEN,
+        api_key: NotGivenOr[str] = NOT_GIVEN,
+        model: NotGivenOr[ChatModels | str] = NOT_GIVEN,
         max_completion_tokens: NotGivenOr[int] = NOT_GIVEN,
-        timeout: httpx.Timeout | None = None,
+        temperature: NotGivenOr[float] = NOT_GIVEN,
     ) -> None:
+        """
+        Create a new instance of MistralAI LLM.
+
+        Args:
+            client: Optional pre-configured MistralAI client instance.
+            api_key: Your Mistral AI API key. If not provided, will use the MISTRAL_API_KEY environment variable.
+            model: The Mistral AI model to use for completions, default is "ministral-8b-latest".
+            max_completion_tokens: The max. number of tokens the LLM can output.
+            temperature: The temperature to use the LLM with.
+        """
+
+        resolved_model = model if is_given(model) else DEFAULT_MODEL
+        resolved_max_completion_tokens = (
+            max_completion_tokens if is_given(max_completion_tokens) else None
+        )
+        resolved_temperature = temperature if is_given(temperature) else None
         super().__init__()
         self._opts = _LLMOptions(
-            model=model,
-            temperature=temperature,
-            max_completion_tokens=max_completion_tokens,
+            model=resolved_model,
+            max_completion_tokens=resolved_max_completion_tokens,
+            temperature=resolved_temperature,
         )
-        mistral_api_key = api_key or os.environ.get("MISTRAL_API_KEY")
+
+        mistral_api_key = api_key if is_given(api_key) else os.environ.get("MISTRAL_API_KEY")
         if not client and not mistral_api_key:
-            raise ValueError(
-                "Mistral API key is required, either as argument or set"
-                " MISTRAL_API_KEY environment variable"
-            )
+            raise ValueError("Mistral AI API key is required. Set MISTRAL_API_KEY or pass api_key")
         self._client = client or Mistral(api_key=mistral_api_key)
 
     @property
@@ -71,6 +83,28 @@ def model(self) -> str:
     def provider(self) -> str:
         return "MistralAI"
 
+    def update_options(
+        self,
+        *,
+        model: NotGivenOr[ChatModels | str] = NOT_GIVEN,
+        max_completion_tokens: NotGivenOr[int] = NOT_GIVEN,
+        temperature: NotGivenOr[float] = NOT_GIVEN,
+    ) -> None:
+        """
+        Update the LLM options.
+
+        Args:
+            model: The model to use for completions
+            max_completion_tokens: The max. number of tokens the LLM can output.
+            temperature: The temperature to use the LLM with.
+        """
+        if is_given(model):
+            self._opts.model = model
+        if is_given(max_completion_tokens):
+            self._opts.max_completion_tokens = max_completion_tokens
+        if is_given(temperature):
+            self._opts.temperature = temperature
+
     def chat(
         self,
         *,
@@ -86,21 +120,16 @@ def chat(
 
         if is_given(extra_kwargs):
             extra.update(extra_kwargs)
-
-        if is_given(self._opts.max_completion_tokens):
-            extra["max_tokens"] = self._opts.max_completion_tokens
-
-        if is_given(self._opts.temperature):
-            extra["temperature"] = self._opts.temperature
-
         if is_given(parallel_tool_calls):
             extra["parallel_tool_calls"] = parallel_tool_calls
-
         if is_given(tool_choice):
             extra["tool_choice"] = tool_choice
-
         if is_given(response_format):
             extra["response_format"] = response_format
+        if self._opts.max_completion_tokens is not None:
+            extra["max_tokens"] = self._opts.max_completion_tokens
+        if self._opts.temperature is not None:
+            extra["temperature"] = self._opts.temperature
 
         return LLMStream(
             self,
@@ -113,7 +142,6 @@ def chat(
         )
 
 
-# Mistral LLM STREAM
 class LLMStream(llm.LLMStream):
     def __init__(
         self,

diff --git a/livekit-plugins/livekit-plugins-mistralai/livekit/plugins/mistralai/models.py b/livekit-plugins/livekit-plugins-mistralai/livekit/plugins/mistralai/models.py
@@ -1,26 +1,31 @@
 from typing import Literal
 
 ChatModels = Literal[
-    "mistral-medium-latest",
     "mistral-large-latest",
-    "mistral-medium-2508",
+    "mistral-large-2512",
     "mistral-large-2411",
+    "mistral-medium-latest",
+    "mistral-medium-2508",
     "mistral-medium-2505",
-    "ministral-3b-2410",
-    "ministral-8b-2410",
-    "mistral-large-2411",
-    "mistral-large-2512",
+    "mistral-small-latest",
+    "mistral-small-2603",
+    "mistral-small-2506",
+    "ministral-14b-latest",
     "ministral-14b-2512",
+    "ministral-8b-latest",
     "ministral-8b-2512",
+    "ministral-3b-latest",
     "ministral-3b-2512",
-    "mistral-small-2407",
 ]
 
 STTModels = Literal[
-    "voxtral-small-2507", "voxtral-mini-2507", "voxtral-mini-latest", "voxtral-small-latest"
+    "voxtral-mini-transcribe-realtime-2602",
+    "voxtral-mini-latest",
+    "voxtral-mini-2602",
+    "voxtral-mini-2507",
 ]
 
-TTSModels = Literal["voxtral-mini-tts-2603", "voxtral-mini-tts-latest"]
+TTSModels = Literal["voxtral-mini-tts-latest", "voxtral-mini-tts-2603"]
 
 TTSVoices = Literal[
     "gb_jane_confident",