Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 62 additions & 10 deletions livekit-plugins/livekit-plugins-mistralai/README.md
Original file line number Diff line number Diff line change
@@ -1,30 +1,67 @@
# MistralAI Plugin for LiveKit Agents
# Mistral AI Plugin for LiveKit Agents

Support for MistralAI services:

- **LLM** — Chat completion with Mistral models
- **STT** — Speech-to-text with Voxtral
- **TTS** — Text-to-speech with Voxtral (supports saved voices and zero-shot voice cloning via `ref_audio`)

See [https://docs.livekit.io/agents/integrations/mistral/](https://docs.livekit.io/agents/integrations/mistral/) for more information.
Support for Mistral AI STT, TTS, and LLM services.

## Installation

```bash
pip install livekit-plugins-mistralai
```

For streaming STT (Voxtral Realtime), also install `silero` plugin.

```bash
pip install livekit-plugins-silero
```

## Pre-requisites

You'll need an API key from MistralAI. It can be set as an environment variable:
You'll need an API key from Mistral AI. It can be set as an environment variable:

```bash
export MISTRAL_API_KEY=your_api_key_here
```

## Usage

### TTS
### Speech-to-Text (STT)

#### Offline transcription

```python
from livekit.plugins import mistralai

stt = mistralai.STT()

# With context biasing
stt = mistralai.STT(
model="voxtral-mini-latest",
context_bias=["LiveKit", "Voxtral", "Mistral"]
)
```

#### Realtime streaming transcription

Voxtral Realtime streams interim transcripts over a WebSocket connection. Since this
model has no server-side endpointing, the plugin runs an internal Silero VAD to detect
when the user stops speaking and flush the audio — producing final transcripts and
driving the end-of-turn pipeline.

```python
from livekit.plugins import mistralai
from livekit.plugins.silero import VAD

# Using Silero VAD with default settings (550ms silence threshold)
stt = mistralai.STT(model="voxtral-mini-transcribe-realtime-2602")

# Using custom VAD settings (e.g. shorter silence threshold for faster responses)
stt = mistralai.STT(
model="voxtral-mini-transcribe-realtime-2602",
vad=VAD.load(min_silence_duration=0.3),
)
```

### Text-to-Speech (TTS)

```python
from livekit.plugins import mistralai
Expand All @@ -37,3 +74,18 @@ import base64
ref_audio_b64 = base64.b64encode(open("sample.mp3", "rb").read()).decode()
tts = mistralai.TTS(ref_audio=ref_audio_b64)
```

### LLM

```python
from livekit.plugins import mistralai

llm = mistralai.LLM()

# With custom temperature/max. tokens
llm = mistralai.LLM(
model="mistral-large-latest",
temperature=0.7,
max_completion_tokens=150
)
```
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
from dataclasses import dataclass
from typing import Any, cast

import httpx

from livekit.agents import APIConnectionError, APIStatusError, APITimeoutError, llm
from livekit.agents.llm import (
ChatChunk,
Expand All @@ -30,37 +28,51 @@

from .models import ChatModels

DEFAULT_MODEL: ChatModels = "ministral-8b-latest"


@dataclass
class _LLMOptions:
model: str
temperature: NotGivenOr[float]
max_completion_tokens: NotGivenOr[int]
model: ChatModels | str
max_completion_tokens: int | None
temperature: float | None


# Mistral LLM Class
class LLM(llm.LLM):
def __init__(
self,
model: str | ChatModels = "ministral-8b-latest",
api_key: str | None = None,
client: Mistral | None = None,
temperature: NotGivenOr[float] = NOT_GIVEN,
api_key: NotGivenOr[str] = NOT_GIVEN,
model: NotGivenOr[ChatModels | str] = NOT_GIVEN,
max_completion_tokens: NotGivenOr[int] = NOT_GIVEN,
timeout: httpx.Timeout | None = None,
temperature: NotGivenOr[float] = NOT_GIVEN,
) -> None:
"""
Create a new instance of MistralAI LLM.

Args:
client: Optional pre-configured MistralAI client instance.
api_key: Your Mistral AI API key. If not provided, will use the MISTRAL_API_KEY environment variable.
model: The Mistral AI model to use for completions, default is "ministral-8b-latest".
max_completion_tokens: The max. number of tokens the LLM can output.
temperature: The temperature to use the LLM with.
"""

resolved_model = model if is_given(model) else DEFAULT_MODEL
resolved_max_completion_tokens = (
max_completion_tokens if is_given(max_completion_tokens) else None
)
resolved_temperature = temperature if is_given(temperature) else None
super().__init__()
self._opts = _LLMOptions(
model=model,
temperature=temperature,
max_completion_tokens=max_completion_tokens,
model=resolved_model,
max_completion_tokens=resolved_max_completion_tokens,
temperature=resolved_temperature,
)
mistral_api_key = api_key or os.environ.get("MISTRAL_API_KEY")

mistral_api_key = api_key if is_given(api_key) else os.environ.get("MISTRAL_API_KEY")
if not client and not mistral_api_key:
raise ValueError(
"Mistral API key is required, either as argument or set"
" MISTRAL_API_KEY environment variable"
)
raise ValueError("Mistral AI API key is required. Set MISTRAL_API_KEY or pass api_key")
self._client = client or Mistral(api_key=mistral_api_key)

@property
Expand All @@ -71,6 +83,28 @@ def model(self) -> str:
def provider(self) -> str:
return "MistralAI"

def update_options(
self,
*,
model: NotGivenOr[ChatModels | str] = NOT_GIVEN,
max_completion_tokens: NotGivenOr[int] = NOT_GIVEN,
temperature: NotGivenOr[float] = NOT_GIVEN,
) -> None:
"""
Update the LLM options.

Args:
model: The model to use for completions
max_completion_tokens: The max. number of tokens the LLM can output.
temperature: The temperature to use the LLM with.
"""
if is_given(model):
self._opts.model = model
if is_given(max_completion_tokens):
self._opts.max_completion_tokens = max_completion_tokens
if is_given(temperature):
self._opts.temperature = temperature

def chat(
self,
*,
Expand All @@ -86,21 +120,16 @@ def chat(

if is_given(extra_kwargs):
extra.update(extra_kwargs)

if is_given(self._opts.max_completion_tokens):
extra["max_tokens"] = self._opts.max_completion_tokens

if is_given(self._opts.temperature):
extra["temperature"] = self._opts.temperature

if is_given(parallel_tool_calls):
extra["parallel_tool_calls"] = parallel_tool_calls

if is_given(tool_choice):
extra["tool_choice"] = tool_choice

if is_given(response_format):
extra["response_format"] = response_format
if self._opts.max_completion_tokens is not None:
extra["max_tokens"] = self._opts.max_completion_tokens
if self._opts.temperature is not None:
extra["temperature"] = self._opts.temperature

return LLMStream(
self,
Expand All @@ -113,7 +142,6 @@ def chat(
)


# Mistral LLM STREAM
class LLMStream(llm.LLMStream):
def __init__(
self,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,26 +1,31 @@
from typing import Literal

ChatModels = Literal[
"mistral-medium-latest",
"mistral-large-latest",
"mistral-medium-2508",
"mistral-large-2512",
"mistral-large-2411",
"mistral-medium-latest",
"mistral-medium-2508",
"mistral-medium-2505",
"ministral-3b-2410",
"ministral-8b-2410",
"mistral-large-2411",
"mistral-large-2512",
"mistral-small-latest",
"mistral-small-2603",
"mistral-small-2506",
"ministral-14b-latest",
"ministral-14b-2512",
"ministral-8b-latest",
"ministral-8b-2512",
"ministral-3b-latest",
"ministral-3b-2512",
"mistral-small-2407",
]

STTModels = Literal[
"voxtral-small-2507", "voxtral-mini-2507", "voxtral-mini-latest", "voxtral-small-latest"
"voxtral-mini-transcribe-realtime-2602",
"voxtral-mini-latest",
"voxtral-mini-2602",
"voxtral-mini-2507",
]

TTSModels = Literal["voxtral-mini-tts-2603", "voxtral-mini-tts-latest"]
TTSModels = Literal["voxtral-mini-tts-latest", "voxtral-mini-tts-2603"]

TTSVoices = Literal[
"gb_jane_confident",
Expand Down
Loading
Loading