Skip to content
Draft
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -211,3 +211,43 @@ EVA_RECORD_IDS=

# Logging level (DEBUG | INFO | WARNING | ERROR | CRITICAL)
EVA_LOG_LEVEL=INFO

# ==============================================
# Optional: Turn Detection & VAD Configuration
# ==============================================
# Fine-tune user turn detection and voice activity detection.
# Leave commented to use smart defaults.

# User turn start strategy: vad | transcription | external
# - vad: Start turn when VAD detects speech (default)
# - transcription: Start turn when STT produces transcription
# - external: Delegate to external service (e.g., Deepgram Flux)
# EVA_MODEL__TURN_START_STRATEGY=vad

# User turn start strategy parameters (JSON)
# EVA_MODEL__TURN_START_STRATEGY_PARAMS='{}'

# User turn stop strategy: turn_analyzer | speech_timeout | external
# - turn_analyzer: Use smart turn analyzer to detect natural turn end (default)
# - speech_timeout: Stop after fixed silence duration
# - external: Delegate to external service
# EVA_MODEL__TURN_STOP_STRATEGY=turn_analyzer

# User turn stop strategy parameters (JSON)
# For speech_timeout: {"user_speech_timeout": 0.8}
# For turn_analyzer: automatically uses smart turn detection
# EVA_MODEL__TURN_STOP_STRATEGY_PARAMS='{}'

# Note: For services with built-in turn detection (e.g., Deepgram Flux), set both to 'external':
# EVA_MODEL__TURN_START_STRATEGY=external
# EVA_MODEL__TURN_STOP_STRATEGY=external

# VAD (Voice Activity Detection) analyzer: silero
# EVA_MODEL__VAD=silero

# VAD parameters (JSON)
# - confidence: Minimum confidence threshold (0.0-1.0, default: 0.7)
# - start_secs: Duration to wait before confirming voice start (default: 0.2)
# - stop_secs: Duration to wait before confirming voice stop (default: 0.2)
# - min_volume: Minimum audio volume threshold (0.0-1.0, default: 0.6)
# EVA_MODEL__VAD_PARAMS='{"start_secs": 0.1, "stop_secs": 0.8, "min_volume": 0.6, "confidence": 0.7}'
35 changes: 21 additions & 14 deletions docs/experiment_setup.md
Copy link
Copy Markdown
Collaborator

@fanny-riols fanny-riols Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: This change is from main and was already reviewed, it's not for this PR.

Original file line number Diff line number Diff line change
Expand Up @@ -114,20 +114,25 @@ EVA_MODEL_LIST='[

## ElevenLabs User Simulator

The user simulator is an ElevenLabs Agent with the following configuration:

| Parameter | Value |
|---|---|
| LLM | GPT-4.1 |
| Voice (female) | Natalee Champlin |
| Voice (male) | Eric |
| Input audio | μ-law telephony, 8000 Hz |
| Turn detection silence | 15ms |
| Max conversation duration | 600s |
| Interruptions | Disabled |
| First message | None (agent speaks first) |
| Default personality | Disabled |
| Tools | end_call (user ends the call once task is complete or conversation cannot be advanced) |
We created 2 ElevenLabs Agents for the user simulator, one with a female and one with a male voice. When you create a new agent, create a "Blank Agent".
Then, use the following configuration:

| Parameter | Value |
|--------------------------------------|-----------------------------------------------------------------------------------|
| Voice (female) | Natalee Champlin |
| Voice (male) | Eric - Smooth, Trustworthy |
| TTS model family | V3 Conversational |
| Expressive mode | Enabled (no tags selected) |
| Language | English |
| LLM | GPT-5.1 |
| System prompt | {{prompt}} |
| Default personality | Disabled |
| First message | None (remove the default first message, as the agent speaks first) |
| Interruptible | Disabled |
| Advanced > Input audio | μ-law telephony, 8000 Hz |
| Advanced > Take turn after silence | 15ms |
| Advanced > Max conversation duration | 600s |
| Tools > System tools | Enable "End conversation" (Name is `end_call`, and Description is provided below) |

The simulator is prompted with a specific user goal and is instructed to stay on task, communicate all required named entities clearly, and terminate when the goal is accomplished or the task is clearly unlikely to succeed.

Expand All @@ -146,3 +151,5 @@ Call this function when any ONE of the following is true:

Before calling this tool, always say a brief goodbye first.
```

You can then get your `agent-id` from the Widget tab of your agent.
2 changes: 1 addition & 1 deletion src/eva/assistant/pipeline/audio_llm_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@ def __init__(
super().__init__(**kwargs)
self._audio_collector = audio_collector
params = params or {}
self._api_key = params.get["api_key"]
self._api_key = params["api_key"]
self._model = model
self._system_prompt = system_prompt or self.DEFAULT_SYSTEM_PROMPT
self._sample_rate = sample_rate
Expand Down
135 changes: 135 additions & 0 deletions src/eva/assistant/pipeline/turn_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
"""Factory functions for creating turn strategies and VAD analyzers from configuration.

This module provides functions to instantiate Pipecat turn strategies and VAD analyzers
based on configuration settings from environment variables or config files.
"""

from typing import Any

from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.audio.vad.vad_analyzer import VADAnalyzer, VADParams
from pipecat.turns.user_start import (
BaseUserTurnStartStrategy,
ExternalUserTurnStartStrategy,
TranscriptionUserTurnStartStrategy,
VADUserTurnStartStrategy,
)
from pipecat.turns.user_stop import (
BaseUserTurnStopStrategy,
ExternalUserTurnStopStrategy,
SpeechTimeoutUserTurnStopStrategy,
TurnAnalyzerUserTurnStopStrategy,
)

from eva.utils.logging import get_logger

logger = get_logger(__name__)


def create_vad_analyzer(vad_type: str | None, vad_params: dict[str, Any]) -> VADAnalyzer | None:
"""Create a VAD analyzer from configuration.

Args:
vad_type: VAD analyzer type ('silero' or None for default)
vad_params: VAD parameters (confidence, start_secs, stop_secs, min_volume)

Returns:
VAD analyzer instance, or None if vad_type is None

Raises:
ValueError: If vad_type is not supported
"""
if vad_type is None:
return None

vad_type_lower = vad_type.lower()

if vad_type_lower == "silero":
# Create VADParams, respecting existing defaults if no params specified
params = VADParams(**vad_params) if vad_params else None
return SileroVADAnalyzer(params=params)
else:
raise ValueError(
f"Unsupported VAD type: {vad_type}. Supported types: 'silero'"
)


def create_turn_start_strategy(
strategy_type: str | None,
strategy_params: dict[str, Any],
) -> BaseUserTurnStartStrategy | None:
"""Create a user turn start strategy from configuration.

Args:
strategy_type: Strategy type ('vad', 'transcription', 'external', or None for default)
strategy_params: Strategy-specific parameters

Returns:
Turn start strategy instance, or None if strategy_type is None

Raises:
ValueError: If strategy_type is not supported
"""
if strategy_type is None:
return None

strategy_type_lower = strategy_type.lower()

if strategy_type_lower == "vad":
# VADUserTurnStartStrategy has no required parameters
return VADUserTurnStartStrategy(**strategy_params)
elif strategy_type_lower == "transcription":
# TranscriptionUserTurnStartStrategy has no required parameters
return TranscriptionUserTurnStartStrategy(**strategy_params)
elif strategy_type_lower == "external":
# ExternalUserTurnStartStrategy has no required parameters
return ExternalUserTurnStartStrategy(**strategy_params)
else:
raise ValueError(
f"Unsupported turn start strategy: {strategy_type}. "
f"Supported types: 'vad', 'transcription', 'external'"
)


def create_turn_stop_strategy(
strategy_type: str | None,
strategy_params: dict[str, Any],
smart_turn_stop_secs: float | None = None,
) -> BaseUserTurnStopStrategy | None:
"""Create a user turn stop strategy from configuration.

Args:
strategy_type: Strategy type ('speech_timeout', 'turn_analyzer', 'external', or None for default)
strategy_params: Strategy-specific parameters
smart_turn_stop_secs: stop_secs for SmartTurnParams (used with turn_analyzer strategy)

Returns:
Turn stop strategy instance, or None if strategy_type is None

Raises:
ValueError: If strategy_type is not supported
"""
if strategy_type is None:
return None

strategy_type_lower = strategy_type.lower()

if strategy_type_lower == "speech_timeout":
# SpeechTimeoutUserTurnStopStrategy accepts user_speech_timeout parameter
return SpeechTimeoutUserTurnStopStrategy(**strategy_params)
elif strategy_type_lower == "turn_analyzer":
# TurnAnalyzerUserTurnStopStrategy requires a turn_analyzer instance
# If smart_turn_stop_secs is provided, use it; otherwise let SmartTurnParams use its default
smart_params = SmartTurnParams(stop_secs=smart_turn_stop_secs) if smart_turn_stop_secs is not None else None
turn_analyzer = LocalSmartTurnAnalyzerV3(params=smart_params)
return TurnAnalyzerUserTurnStopStrategy(turn_analyzer=turn_analyzer, **strategy_params)
elif strategy_type_lower == "external":
# ExternalUserTurnStopStrategy has no required parameters
return ExternalUserTurnStopStrategy(**strategy_params)
else:
raise ValueError(
f"Unsupported turn stop strategy: {strategy_type}. "
f"Supported types: 'speech_timeout', 'turn_analyzer', 'external'"
)
82 changes: 63 additions & 19 deletions src/eva/assistant/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
)
from pipecat.turns.user_start import VADUserTurnStartStrategy
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
from pipecat.turns.user_turn_strategies import ExternalUserTurnStrategies, UserTurnStrategies
from pipecat.turns.user_turn_strategies import UserTurnStrategies
from pipecat.utils.time import time_now_iso8601

from eva.assistant.agentic.audit_log import AuditLog, current_timestamp_ms
Expand All @@ -61,6 +61,11 @@
create_stt_service,
create_tts_service,
)
from eva.assistant.pipeline.turn_config import (
create_turn_start_strategy,
create_turn_stop_strategy,
create_vad_analyzer,
)
from eva.assistant.services.llm import LiteLLMClient
from eva.assistant.tools.tool_executor import ToolExecutor
from eva.models.agents import AgentConfig
Expand Down Expand Up @@ -326,26 +331,65 @@ async def _realtime_tool_handler(params) -> None:
"smart_turn_stop_secs", 0.8
) # Shorter silence so we don't have to wait 3s if smart turn marks audio as incomplete

if (
isinstance(self.pipeline_config, (PipelineConfig, SpeechToSpeechConfig))
and self.pipeline_config.turn_strategy == "external"
):
logger.info("Using external user turn strategies")
user_turn_strategies = ExternalUserTurnStrategies()
vad_analyzer = None
# Use configurable turn strategies if specified, otherwise fall back to defaults
if isinstance(self.pipeline_config, (PipelineConfig, AudioLLMConfig)):
turn_start_cfg = self.pipeline_config.turn_start_strategy
turn_start_params = self.pipeline_config.turn_start_strategy_params
turn_stop_cfg = self.pipeline_config.turn_stop_strategy
turn_stop_params = self.pipeline_config.turn_stop_strategy_params
vad_cfg = self.pipeline_config.vad
vad_cfg_params = self.pipeline_config.vad_params
else:
logger.info("Using local smart turn analyzer")
user_turn_strategies = UserTurnStrategies(
start=[VADUserTurnStartStrategy()],
stop=[
TurnAnalyzerUserTurnStopStrategy(
turn_analyzer=LocalSmartTurnAnalyzerV3(
params=SmartTurnParams(stop_secs=smart_turn_stop_secs)
)
)
],
turn_start_cfg = None
turn_start_params = {}
turn_stop_cfg = None
turn_stop_params = {}
vad_cfg = None
vad_cfg_params = {}

# Create turn start strategy
turn_start_strategy = create_turn_start_strategy(turn_start_cfg, turn_start_params)
if turn_start_strategy is None:
# Default: VADUserTurnStartStrategy
turn_start_strategy = VADUserTurnStartStrategy()
logger.info("Using default VAD user turn start strategy")
else:
logger.info(f"Using configured turn start strategy: {turn_start_cfg}")

# Create turn stop strategy
turn_stop_strategy = create_turn_stop_strategy(
turn_stop_cfg, turn_stop_params, smart_turn_stop_secs
)
if turn_stop_strategy is None:
# Default: TurnAnalyzerUserTurnStopStrategy with LocalSmartTurnAnalyzerV3
turn_stop_strategy = TurnAnalyzerUserTurnStopStrategy(
turn_analyzer=LocalSmartTurnAnalyzerV3(
params=SmartTurnParams(stop_secs=smart_turn_stop_secs)
)
)
vad_analyzer = SileroVADAnalyzer(params=VADParams(stop_secs=vad_stop_secs))
logger.info("Using default turn analyzer user turn stop strategy")
else:
logger.info(f"Using configured turn stop strategy: {turn_stop_cfg}")

logger.info("Using local smart turn analyzer")
user_turn_strategies = UserTurnStrategies(
start=[turn_start_strategy],
stop=[turn_stop_strategy],
)

# Create VAD analyzer
vad_analyzer = create_vad_analyzer(vad_cfg, vad_cfg_params)
if vad_analyzer is None:
# Default: SileroVADAnalyzer with configured stop_secs
# If vad_cfg_params were provided without vad_cfg, merge them with default stop_secs
vad_params_dict = {"stop_secs": vad_stop_secs}
if vad_cfg_params:
# User provided params without specifying vad type - merge with defaults
vad_params_dict.update(vad_cfg_params)
vad_analyzer = SileroVADAnalyzer(params=VADParams(**vad_params_dict))
logger.info("Using default Silero VAD analyzer")
else:
logger.info(f"Using configured VAD analyzer: {vad_cfg}")
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
context,
user_params=LLMUserAggregatorParams(
Expand Down
Loading