Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/en/guides/01-configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -834,6 +834,7 @@ Reranking model for search result refinement. Supports VikingDB (Volcengine), Co
"api_key": "your-api-key",
"api_base": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks",
"model": "qwen3-vl-rerank",
"timeout": 120,
"threshold": 0.1
}
}
Expand All @@ -850,6 +851,7 @@ Reranking model for search result refinement. Supports VikingDB (Volcengine), Co
| `api_key` | str | API key (for `openai` or `cohere` providers) |
| `api_base` | str | Endpoint URL (for `openai` provider) |
| `model` | str | Model name (for `openai` providers) |
| `timeout` | float | HTTP request timeout in seconds for OpenAI-compatible providers. Increase for slow or cold-starting local rerank servers. Default: `30.0` |
| `threshold` | float | Score threshold between `0.0` and `1.0`; results below this are filtered out. Default: `0.1` |
| `extra_headers` | object | Custom HTTP headers (for OpenAI-compatible providers, optional) |

Expand Down
2 changes: 2 additions & 0 deletions docs/zh/guides/01-configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -805,6 +805,7 @@ AST 提取支持:Python、JavaScript/TypeScript、Rust、Go、Java、C/C++。
"api_key": "your-api-key",
"api_base": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks",
"model": "qwen3-vl-rerank",
"timeout": 120,
"threshold": 0.1
}
}
Expand All @@ -821,6 +822,7 @@ AST 提取支持:Python、JavaScript/TypeScript、Rust、Go、Java、C/C++。
| `api_key` | str | API Key(用于 `openai` 或 `cohere` 提供方) |
| `api_base` | str | 接口地址(用于 `openai` 提供方) |
| `model` | str | 模型名称(用于 `openai` 提供方) |
| `timeout` | float | OpenAI 兼容 provider 的 HTTP 请求超时时间,单位为秒。对于较慢或冷启动的本地 rerank 服务可适当增大。默认:`30.0` |
| `threshold` | float | 分数阈值,范围为 `0.0` 到 `1.0`。低于此值的结果会被过滤。默认:`0.1` |
| `extra_headers` | object | 自定义 HTTP 请求头(OpenAI 兼容 provider 可用,可选) |

Expand Down
7 changes: 6 additions & 1 deletion openviking/models/rerank/openai_rerank.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def __init__(
api_base: str,
model_name: str,
extra_headers: Optional[Dict[str, str]] = None,
timeout: float = 30.0,
) -> None:
"""
Initialize OpenAI-compatible rerank client.
Expand All @@ -40,12 +41,15 @@ def __init__(
api_base: Full endpoint URL for the rerank API
model_name: Model name to use for reranking
extra_headers: Optional extra headers for API requests
timeout: HTTP request timeout in seconds. Defaults to 30. Increase for
local LLM servers that incur model cold-start latency on the first call.
"""
super().__init__()
self.api_key = api_key
self.api_base = api_base
self.model_name = model_name
self.extra_headers = extra_headers or {}
self.timeout = timeout
self.provider = "openai"

def rerank_batch(self, query: str, documents: List[str]) -> Optional[List[float]]:
Expand Down Expand Up @@ -81,7 +85,7 @@ def rerank_batch(self, query: str, documents: List[str]) -> Optional[List[float]
url=self.api_base,
headers=headers,
json=req_body,
timeout=30,
timeout=self.timeout,
)
response.raise_for_status()
result = response.json()
Expand Down Expand Up @@ -139,4 +143,5 @@ def from_config(cls, config) -> Optional["OpenAIRerankClient"]:
api_base=config.api_base,
model_name=config.model or "qwen3-rerank",
extra_headers=config.extra_headers,
timeout=config.timeout,
)
11 changes: 9 additions & 2 deletions openviking_cli/utils/config/rerank_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,15 @@ class RerankConfig(BaseModel):
)

extra_headers: Optional[Dict[str, str]] = Field(
default=None,
description="Extra HTTP headers for OpenAI-compatible providers"
default=None, description="Extra HTTP headers for OpenAI-compatible providers"
)

timeout: float = Field(
default=30.0,
description=(
"HTTP request timeout in seconds for OpenAI-compatible rerank calls. "
"Increase for local LLM servers with model cold-start latency."
),
)

threshold: float = Field(
Expand Down
110 changes: 110 additions & 0 deletions tests/unit/models/rerank/test_openai_rerank_timeout.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd.
# SPDX-License-Identifier: AGPL-3.0
"""Tests for OpenAIRerankClient configurable HTTP timeout support."""

from unittest.mock import Mock, patch

from openviking.models.rerank.openai_rerank import OpenAIRerankClient
from openviking_cli.utils.config.rerank_config import RerankConfig


def test_openai_rerank_client_default_timeout():
"""Client defaults to a 30s timeout when none is provided."""
client = OpenAIRerankClient(
api_key="test-key",
api_base="https://api.example.com/v1",
model_name="qwen3-rerank",
)

assert client.timeout == 30.0


def test_openai_rerank_client_custom_timeout():
"""Client stores an explicitly provided timeout."""
client = OpenAIRerankClient(
api_key="test-key",
api_base="https://api.example.com/v1",
model_name="qwen3-rerank",
timeout=120.0,
)

assert client.timeout == 120.0


def test_rerank_config_default_timeout():
"""RerankConfig defaults timeout to 30s for backwards compatibility."""
config = RerankConfig(
model="qwen3-rerank",
api_key="test-key",
api_base="https://api.example.com/v1",
)

assert config.timeout == 30.0


def test_openai_rerank_from_config_with_custom_timeout():
"""from_config threads a custom timeout through to the client."""
config = RerankConfig(
model="qwen3-rerank",
api_key="test-key",
api_base="https://api.example.com/v1",
timeout=120.0,
)

client = OpenAIRerankClient.from_config(config)

assert client.timeout == 120.0


def test_openai_rerank_from_config_default_timeout():
"""from_config preserves the 30s default when timeout is unset."""
config = RerankConfig(
model="qwen3-rerank",
api_key="test-key",
api_base="https://api.example.com/v1",
)

client = OpenAIRerankClient.from_config(config)

assert client.timeout == 30.0


@patch("openviking.models.rerank.openai_rerank.requests.post")
def test_rerank_batch_uses_configured_timeout(mock_post):
"""rerank_batch passes the configured timeout to requests.post."""
mock_response = Mock()
mock_response.status_code = 200
mock_response.json.return_value = {
"results": [{"index": 0, "relevance_score": 0.9}, {"index": 1, "relevance_score": 0.8}]
}
mock_post.return_value = mock_response

client = OpenAIRerankClient(
api_key="test-key",
api_base="https://api.example.com/v1",
model_name="qwen3-rerank",
timeout=120.0,
)

client.rerank_batch(query="test query", documents=["doc1", "doc2"])

assert mock_post.called
assert mock_post.call_args.kwargs["timeout"] == 120.0


@patch("openviking.models.rerank.openai_rerank.requests.post")
def test_rerank_batch_uses_default_timeout(mock_post):
"""rerank_batch falls back to the 30s default when no timeout is configured."""
mock_response = Mock()
mock_response.status_code = 200
mock_response.json.return_value = {"results": [{"index": 0, "relevance_score": 0.9}]}
mock_post.return_value = mock_response

client = OpenAIRerankClient(
api_key="test-key", api_base="https://api.example.com/v1", model_name="qwen3-rerank"
)

client.rerank_batch(query="test query", documents=["doc1"])

assert mock_post.called
assert mock_post.call_args.kwargs["timeout"] == 30.0
Loading