From dca8b289d02ac0a25ba03cf93d92bba80d3b3c92 Mon Sep 17 00:00:00 2001 From: Dico Angelo Date: Tue, 23 Jun 2026 01:26:51 -0400 Subject: [PATCH 1/2] feat(rerank): add configurable HTTP timeout for OpenAI-compatible client OpenAIRerankClient hardcoded a 30s HTTP timeout, which is insufficient for local LLM servers (e.g. llama.cpp on ROCm) that incur model cold-start latency on the first request after inactivity, causing ReadTimeout errors. Add a `timeout` field to RerankConfig (default 30.0, backwards-compatible) and thread it through OpenAIRerankClient.__init__, from_config, and the requests.post call in rerank_batch. The timeout can now be set per-environment in ov.conf, e.g. "timeout": 120. Closes #2732 --- openviking/models/rerank/openai_rerank.py | 7 +- openviking_cli/utils/config/rerank_config.py | 8 ++ .../rerank/test_openai_rerank_timeout.py | 110 ++++++++++++++++++ 3 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 tests/unit/models/rerank/test_openai_rerank_timeout.py diff --git a/openviking/models/rerank/openai_rerank.py b/openviking/models/rerank/openai_rerank.py index 50ff96843d..bd0665760b 100644 --- a/openviking/models/rerank/openai_rerank.py +++ b/openviking/models/rerank/openai_rerank.py @@ -31,6 +31,7 @@ def __init__( api_base: str, model_name: str, extra_headers: Optional[Dict[str, str]] = None, + timeout: float = 30.0, ) -> None: """ Initialize OpenAI-compatible rerank client. @@ -40,12 +41,15 @@ def __init__( api_base: Full endpoint URL for the rerank API model_name: Model name to use for reranking extra_headers: Optional extra headers for API requests + timeout: HTTP request timeout in seconds. Defaults to 30. Increase for + local LLM servers that incur model cold-start latency on the first call. """ super().__init__() self.api_key = api_key self.api_base = api_base self.model_name = model_name self.extra_headers = extra_headers or {} + self.timeout = timeout self.provider = "openai" def rerank_batch(self, query: str, documents: List[str]) -> Optional[List[float]]: @@ -81,7 +85,7 @@ def rerank_batch(self, query: str, documents: List[str]) -> Optional[List[float] url=self.api_base, headers=headers, json=req_body, - timeout=30, + timeout=self.timeout, ) response.raise_for_status() result = response.json() @@ -139,4 +143,5 @@ def from_config(cls, config) -> Optional["OpenAIRerankClient"]: api_base=config.api_base, model_name=config.model or "qwen3-rerank", extra_headers=config.extra_headers, + timeout=config.timeout, ) diff --git a/openviking_cli/utils/config/rerank_config.py b/openviking_cli/utils/config/rerank_config.py index 86d5798431..e6b8135637 100644 --- a/openviking_cli/utils/config/rerank_config.py +++ b/openviking_cli/utils/config/rerank_config.py @@ -36,6 +36,14 @@ class RerankConfig(BaseModel): description="Extra HTTP headers for OpenAI-compatible providers" ) + timeout: float = Field( + default=30.0, + description=( + "HTTP request timeout in seconds for OpenAI-compatible rerank calls. " + "Increase for local LLM servers with model cold-start latency." + ), + ) + threshold: float = Field( default=0.1, description="Relevance threshold (score > threshold is relevant)" ) diff --git a/tests/unit/models/rerank/test_openai_rerank_timeout.py b/tests/unit/models/rerank/test_openai_rerank_timeout.py new file mode 100644 index 0000000000..3517e58e87 --- /dev/null +++ b/tests/unit/models/rerank/test_openai_rerank_timeout.py @@ -0,0 +1,110 @@ +# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. +# SPDX-License-Identifier: AGPL-3.0 +"""Tests for OpenAIRerankClient configurable HTTP timeout support.""" + +from unittest.mock import Mock, patch + +from openviking.models.rerank.openai_rerank import OpenAIRerankClient +from openviking_cli.utils.config.rerank_config import RerankConfig + + +def test_openai_rerank_client_default_timeout(): + """Client defaults to a 30s timeout when none is provided.""" + client = OpenAIRerankClient( + api_key="test-key", + api_base="https://api.example.com/v1", + model_name="qwen3-rerank", + ) + + assert client.timeout == 30.0 + + +def test_openai_rerank_client_custom_timeout(): + """Client stores an explicitly provided timeout.""" + client = OpenAIRerankClient( + api_key="test-key", + api_base="https://api.example.com/v1", + model_name="qwen3-rerank", + timeout=120.0, + ) + + assert client.timeout == 120.0 + + +def test_rerank_config_default_timeout(): + """RerankConfig defaults timeout to 30s for backwards compatibility.""" + config = RerankConfig( + model="qwen3-rerank", + api_key="test-key", + api_base="https://api.example.com/v1", + ) + + assert config.timeout == 30.0 + + +def test_openai_rerank_from_config_with_custom_timeout(): + """from_config threads a custom timeout through to the client.""" + config = RerankConfig( + model="qwen3-rerank", + api_key="test-key", + api_base="https://api.example.com/v1", + timeout=120.0, + ) + + client = OpenAIRerankClient.from_config(config) + + assert client.timeout == 120.0 + + +def test_openai_rerank_from_config_default_timeout(): + """from_config preserves the 30s default when timeout is unset.""" + config = RerankConfig( + model="qwen3-rerank", + api_key="test-key", + api_base="https://api.example.com/v1", + ) + + client = OpenAIRerankClient.from_config(config) + + assert client.timeout == 30.0 + + +@patch("openviking.models.rerank.openai_rerank.requests.post") +def test_rerank_batch_uses_configured_timeout(mock_post): + """rerank_batch passes the configured timeout to requests.post.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "results": [{"index": 0, "relevance_score": 0.9}, {"index": 1, "relevance_score": 0.8}] + } + mock_post.return_value = mock_response + + client = OpenAIRerankClient( + api_key="test-key", + api_base="https://api.example.com/v1", + model_name="qwen3-rerank", + timeout=120.0, + ) + + client.rerank_batch(query="test query", documents=["doc1", "doc2"]) + + assert mock_post.called + assert mock_post.call_args.kwargs["timeout"] == 120.0 + + +@patch("openviking.models.rerank.openai_rerank.requests.post") +def test_rerank_batch_uses_default_timeout(mock_post): + """rerank_batch falls back to the 30s default when no timeout is configured.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"results": [{"index": 0, "relevance_score": 0.9}]} + mock_post.return_value = mock_response + + client = OpenAIRerankClient( + api_key="test-key", api_base="https://api.example.com/v1", model_name="qwen3-rerank" + ) + + client.rerank_batch(query="test query", documents=["doc1"]) + + assert mock_post.called + assert mock_post.call_args.kwargs["timeout"] == 30.0 From 9d5d2baab677968628eae90e5d1b3887d31f009f Mon Sep 17 00:00:00 2001 From: qin-ctx Date: Tue, 23 Jun 2026 18:17:43 +0800 Subject: [PATCH 2/2] docs: document rerank timeout config --- docs/en/guides/01-configuration.md | 2 ++ docs/zh/guides/01-configuration.md | 2 ++ openviking_cli/utils/config/rerank_config.py | 3 +-- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/en/guides/01-configuration.md b/docs/en/guides/01-configuration.md index 59a83e69de..f8d29c8388 100644 --- a/docs/en/guides/01-configuration.md +++ b/docs/en/guides/01-configuration.md @@ -834,6 +834,7 @@ Reranking model for search result refinement. Supports VikingDB (Volcengine), Co "api_key": "your-api-key", "api_base": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks", "model": "qwen3-vl-rerank", + "timeout": 120, "threshold": 0.1 } } @@ -850,6 +851,7 @@ Reranking model for search result refinement. Supports VikingDB (Volcengine), Co | `api_key` | str | API key (for `openai` or `cohere` providers) | | `api_base` | str | Endpoint URL (for `openai` provider) | | `model` | str | Model name (for `openai` providers) | +| `timeout` | float | HTTP request timeout in seconds for OpenAI-compatible providers. Increase for slow or cold-starting local rerank servers. Default: `30.0` | | `threshold` | float | Score threshold between `0.0` and `1.0`; results below this are filtered out. Default: `0.1` | | `extra_headers` | object | Custom HTTP headers (for OpenAI-compatible providers, optional) | diff --git a/docs/zh/guides/01-configuration.md b/docs/zh/guides/01-configuration.md index 93b32e9ed1..299e7bcf06 100644 --- a/docs/zh/guides/01-configuration.md +++ b/docs/zh/guides/01-configuration.md @@ -805,6 +805,7 @@ AST 提取支持:Python、JavaScript/TypeScript、Rust、Go、Java、C/C++。 "api_key": "your-api-key", "api_base": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks", "model": "qwen3-vl-rerank", + "timeout": 120, "threshold": 0.1 } } @@ -821,6 +822,7 @@ AST 提取支持:Python、JavaScript/TypeScript、Rust、Go、Java、C/C++。 | `api_key` | str | API Key(用于 `openai` 或 `cohere` 提供方) | | `api_base` | str | 接口地址(用于 `openai` 提供方) | | `model` | str | 模型名称(用于 `openai` 提供方) | +| `timeout` | float | OpenAI 兼容 provider 的 HTTP 请求超时时间,单位为秒。对于较慢或冷启动的本地 rerank 服务可适当增大。默认:`30.0` | | `threshold` | float | 分数阈值,范围为 `0.0` 到 `1.0`。低于此值的结果会被过滤。默认:`0.1` | | `extra_headers` | object | 自定义 HTTP 请求头(OpenAI 兼容 provider 可用,可选) | diff --git a/openviking_cli/utils/config/rerank_config.py b/openviking_cli/utils/config/rerank_config.py index e6b8135637..6cc5803ad1 100644 --- a/openviking_cli/utils/config/rerank_config.py +++ b/openviking_cli/utils/config/rerank_config.py @@ -32,8 +32,7 @@ class RerankConfig(BaseModel): ) extra_headers: Optional[Dict[str, str]] = Field( - default=None, - description="Extra HTTP headers for OpenAI-compatible providers" + default=None, description="Extra HTTP headers for OpenAI-compatible providers" ) timeout: float = Field(