volcengine · qin-ctx · Jun 23, 2026 · Jun 23, 2026 · Jun 23, 2026
diff --git a/docs/en/guides/01-configuration.md b/docs/en/guides/01-configuration.md
@@ -834,6 +834,7 @@ Reranking model for search result refinement. Supports VikingDB (Volcengine), Co
     "api_key": "your-api-key",
     "api_base": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks",
     "model": "qwen3-vl-rerank",
+    "timeout": 120,
     "threshold": 0.1
   }
 }
@@ -850,6 +851,7 @@ Reranking model for search result refinement. Supports VikingDB (Volcengine), Co
 | `api_key` | str | API key (for `openai` or `cohere` providers) |
 | `api_base` | str | Endpoint URL (for `openai` provider) |
 | `model` | str | Model name (for `openai` providers) |
+| `timeout` | float | HTTP request timeout in seconds for OpenAI-compatible providers. Increase for slow or cold-starting local rerank servers. Default: `30.0` |
 | `threshold` | float | Score threshold between `0.0` and `1.0`; results below this are filtered out. Default: `0.1` |
 | `extra_headers` | object | Custom HTTP headers (for OpenAI-compatible providers, optional) |
 

diff --git a/docs/zh/guides/01-configuration.md b/docs/zh/guides/01-configuration.md
@@ -805,6 +805,7 @@ AST 提取支持：Python、JavaScript/TypeScript、Rust、Go、Java、C/C++。
     "api_key": "your-api-key",
     "api_base": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks",
     "model": "qwen3-vl-rerank",
+    "timeout": 120,
     "threshold": 0.1
   }
 }
@@ -821,6 +822,7 @@ AST 提取支持：Python、JavaScript/TypeScript、Rust、Go、Java、C/C++。
 | `api_key` | str | API Key（用于 `openai` 或 `cohere` 提供方） |
 | `api_base` | str | 接口地址（用于 `openai` 提供方） |
 | `model` | str | 模型名称（用于 `openai` 提供方） |
+| `timeout` | float | OpenAI 兼容 provider 的 HTTP 请求超时时间，单位为秒。对于较慢或冷启动的本地 rerank 服务可适当增大。默认：`30.0` |
 | `threshold` | float | 分数阈值，范围为 `0.0` 到 `1.0`。低于此值的结果会被过滤。默认：`0.1` |
 | `extra_headers` | object | 自定义 HTTP 请求头（OpenAI 兼容 provider 可用，可选） |
 

diff --git a/openviking/models/rerank/openai_rerank.py b/openviking/models/rerank/openai_rerank.py
@@ -31,6 +31,7 @@ def __init__(
         api_base: str,
         model_name: str,
         extra_headers: Optional[Dict[str, str]] = None,
+        timeout: float = 30.0,
     ) -> None:
         """
         Initialize OpenAI-compatible rerank client.
@@ -40,12 +41,15 @@ def __init__(
             api_base: Full endpoint URL for the rerank API
             model_name: Model name to use for reranking
             extra_headers: Optional extra headers for API requests
+            timeout: HTTP request timeout in seconds. Defaults to 30. Increase for
+                local LLM servers that incur model cold-start latency on the first call.
         """
         super().__init__()
         self.api_key = api_key
         self.api_base = api_base
         self.model_name = model_name
         self.extra_headers = extra_headers or {}
+        self.timeout = timeout
         self.provider = "openai"
 
     def rerank_batch(self, query: str, documents: List[str]) -> Optional[List[float]]:
@@ -81,7 +85,7 @@ def rerank_batch(self, query: str, documents: List[str]) -> Optional[List[float]
                 url=self.api_base,
                 headers=headers,
                 json=req_body,
-                timeout=30,
+                timeout=self.timeout,
             )
             response.raise_for_status()
             result = response.json()
@@ -139,4 +143,5 @@ def from_config(cls, config) -> Optional["OpenAIRerankClient"]:
             api_base=config.api_base,
             model_name=config.model or "qwen3-rerank",
             extra_headers=config.extra_headers,
+            timeout=config.timeout,
         )
diff --git a/openviking_cli/utils/config/rerank_config.py b/openviking_cli/utils/config/rerank_config.py
@@ -32,8 +32,15 @@ class RerankConfig(BaseModel):
     )
 
     extra_headers: Optional[Dict[str, str]] = Field(
-        default=None,
-        description="Extra HTTP headers for OpenAI-compatible providers"
+        default=None, description="Extra HTTP headers for OpenAI-compatible providers"
+    )
+
+    timeout: float = Field(
+        default=30.0,
+        description=(
+            "HTTP request timeout in seconds for OpenAI-compatible rerank calls. "
+            "Increase for local LLM servers with model cold-start latency."
+        ),
     )
 
     threshold: float = Field(

diff --git a/tests/unit/models/rerank/test_openai_rerank_timeout.py b/tests/unit/models/rerank/test_openai_rerank_timeout.py
@@ -0,0 +1,110 @@
+# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd.
+# SPDX-License-Identifier: AGPL-3.0
+"""Tests for OpenAIRerankClient configurable HTTP timeout support."""
+
+from unittest.mock import Mock, patch
+
+from openviking.models.rerank.openai_rerank import OpenAIRerankClient
+from openviking_cli.utils.config.rerank_config import RerankConfig
+
+
+def test_openai_rerank_client_default_timeout():
+    """Client defaults to a 30s timeout when none is provided."""
+    client = OpenAIRerankClient(
+        api_key="test-key",
+        api_base="https://api.example.com/v1",
+        model_name="qwen3-rerank",
+    )
+
+    assert client.timeout == 30.0
+
+
+def test_openai_rerank_client_custom_timeout():
+    """Client stores an explicitly provided timeout."""
+    client = OpenAIRerankClient(
+        api_key="test-key",
+        api_base="https://api.example.com/v1",
+        model_name="qwen3-rerank",
+        timeout=120.0,
+    )
+
+    assert client.timeout == 120.0
+
+
+def test_rerank_config_default_timeout():
+    """RerankConfig defaults timeout to 30s for backwards compatibility."""
+    config = RerankConfig(
+        model="qwen3-rerank",
+        api_key="test-key",
+        api_base="https://api.example.com/v1",
+    )
+
+    assert config.timeout == 30.0
+
+
+def test_openai_rerank_from_config_with_custom_timeout():
+    """from_config threads a custom timeout through to the client."""
+    config = RerankConfig(
+        model="qwen3-rerank",
+        api_key="test-key",
+        api_base="https://api.example.com/v1",
+        timeout=120.0,
+    )
+
+    client = OpenAIRerankClient.from_config(config)
+
+    assert client.timeout == 120.0
+
+
+def test_openai_rerank_from_config_default_timeout():
+    """from_config preserves the 30s default when timeout is unset."""
+    config = RerankConfig(
+        model="qwen3-rerank",
+        api_key="test-key",
+        api_base="https://api.example.com/v1",
+    )
+
+    client = OpenAIRerankClient.from_config(config)
+
+    assert client.timeout == 30.0
+
+
+@patch("openviking.models.rerank.openai_rerank.requests.post")
+def test_rerank_batch_uses_configured_timeout(mock_post):
+    """rerank_batch passes the configured timeout to requests.post."""
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = {
+        "results": [{"index": 0, "relevance_score": 0.9}, {"index": 1, "relevance_score": 0.8}]
+    }
+    mock_post.return_value = mock_response
+
+    client = OpenAIRerankClient(
+        api_key="test-key",
+        api_base="https://api.example.com/v1",
+        model_name="qwen3-rerank",
+        timeout=120.0,
+    )
+
+    client.rerank_batch(query="test query", documents=["doc1", "doc2"])
+
+    assert mock_post.called
+    assert mock_post.call_args.kwargs["timeout"] == 120.0
+
+
+@patch("openviking.models.rerank.openai_rerank.requests.post")
+def test_rerank_batch_uses_default_timeout(mock_post):
+    """rerank_batch falls back to the 30s default when no timeout is configured."""
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = {"results": [{"index": 0, "relevance_score": 0.9}]}
+    mock_post.return_value = mock_response
+
+    client = OpenAIRerankClient(
+        api_key="test-key", api_base="https://api.example.com/v1", model_name="qwen3-rerank"
+    )
+
+    client.rerank_batch(query="test query", documents=["doc1"])
+
+    assert mock_post.called
+    assert mock_post.call_args.kwargs["timeout"] == 30.0