From dca8b289d02ac0a25ba03cf93d92bba80d3b3c92 Mon Sep 17 00:00:00 2001
From: Dico Angelo <dicoangelo@blackamethystcapitalkeyhold.onmicrosoft.com>
Date: Tue, 23 Jun 2026 01:26:51 -0400
Subject: [PATCH 1/2] feat(rerank): add configurable HTTP timeout for
 OpenAI-compatible client

OpenAIRerankClient hardcoded a 30s HTTP timeout, which is insufficient for
local LLM servers (e.g. llama.cpp on ROCm) that incur model cold-start
latency on the first request after inactivity, causing ReadTimeout errors.

Add a `timeout` field to RerankConfig (default 30.0, backwards-compatible)
and thread it through OpenAIRerankClient.__init__, from_config, and the
requests.post call in rerank_batch. The timeout can now be set per-environment
in ov.conf, e.g. "timeout": 120.

Closes #2732
---
 openviking/models/rerank/openai_rerank.py     |   7 +-
 openviking_cli/utils/config/rerank_config.py  |   8 ++
 .../rerank/test_openai_rerank_timeout.py      | 110 ++++++++++++++++++
 3 files changed, 124 insertions(+), 1 deletion(-)
 create mode 100644 tests/unit/models/rerank/test_openai_rerank_timeout.py

diff --git a/openviking/models/rerank/openai_rerank.py b/openviking/models/rerank/openai_rerank.py
index 50ff96843d..bd0665760b 100644
--- a/openviking/models/rerank/openai_rerank.py
+++ b/openviking/models/rerank/openai_rerank.py
@@ -31,6 +31,7 @@ def __init__(
         api_base: str,
         model_name: str,
         extra_headers: Optional[Dict[str, str]] = None,
+        timeout: float = 30.0,
     ) -> None:
         """
         Initialize OpenAI-compatible rerank client.
@@ -40,12 +41,15 @@ def __init__(
             api_base: Full endpoint URL for the rerank API
             model_name: Model name to use for reranking
             extra_headers: Optional extra headers for API requests
+            timeout: HTTP request timeout in seconds. Defaults to 30. Increase for
+                local LLM servers that incur model cold-start latency on the first call.
         """
         super().__init__()
         self.api_key = api_key
         self.api_base = api_base
         self.model_name = model_name
         self.extra_headers = extra_headers or {}
+        self.timeout = timeout
         self.provider = "openai"
 
     def rerank_batch(self, query: str, documents: List[str]) -> Optional[List[float]]:
@@ -81,7 +85,7 @@ def rerank_batch(self, query: str, documents: List[str]) -> Optional[List[float]
                 url=self.api_base,
                 headers=headers,
                 json=req_body,
-                timeout=30,
+                timeout=self.timeout,
             )
             response.raise_for_status()
             result = response.json()
@@ -139,4 +143,5 @@ def from_config(cls, config) -> Optional["OpenAIRerankClient"]:
             api_base=config.api_base,
             model_name=config.model or "qwen3-rerank",
             extra_headers=config.extra_headers,
+            timeout=config.timeout,
         )
diff --git a/openviking_cli/utils/config/rerank_config.py b/openviking_cli/utils/config/rerank_config.py
index 86d5798431..e6b8135637 100644
--- a/openviking_cli/utils/config/rerank_config.py
+++ b/openviking_cli/utils/config/rerank_config.py
@@ -36,6 +36,14 @@ class RerankConfig(BaseModel):
         description="Extra HTTP headers for OpenAI-compatible providers"
     )
 
+    timeout: float = Field(
+        default=30.0,
+        description=(
+            "HTTP request timeout in seconds for OpenAI-compatible rerank calls. "
+            "Increase for local LLM servers with model cold-start latency."
+        ),
+    )
+
     threshold: float = Field(
         default=0.1, description="Relevance threshold (score > threshold is relevant)"
     )
diff --git a/tests/unit/models/rerank/test_openai_rerank_timeout.py b/tests/unit/models/rerank/test_openai_rerank_timeout.py
new file mode 100644
index 0000000000..3517e58e87
--- /dev/null
+++ b/tests/unit/models/rerank/test_openai_rerank_timeout.py
@@ -0,0 +1,110 @@
+# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd.
+# SPDX-License-Identifier: AGPL-3.0
+"""Tests for OpenAIRerankClient configurable HTTP timeout support."""
+
+from unittest.mock import Mock, patch
+
+from openviking.models.rerank.openai_rerank import OpenAIRerankClient
+from openviking_cli.utils.config.rerank_config import RerankConfig
+
+
+def test_openai_rerank_client_default_timeout():
+    """Client defaults to a 30s timeout when none is provided."""
+    client = OpenAIRerankClient(
+        api_key="test-key",
+        api_base="https://api.example.com/v1",
+        model_name="qwen3-rerank",
+    )
+
+    assert client.timeout == 30.0
+
+
+def test_openai_rerank_client_custom_timeout():
+    """Client stores an explicitly provided timeout."""
+    client = OpenAIRerankClient(
+        api_key="test-key",
+        api_base="https://api.example.com/v1",
+        model_name="qwen3-rerank",
+        timeout=120.0,
+    )
+
+    assert client.timeout == 120.0
+
+
+def test_rerank_config_default_timeout():
+    """RerankConfig defaults timeout to 30s for backwards compatibility."""
+    config = RerankConfig(
+        model="qwen3-rerank",
+        api_key="test-key",
+        api_base="https://api.example.com/v1",
+    )
+
+    assert config.timeout == 30.0
+
+
+def test_openai_rerank_from_config_with_custom_timeout():
+    """from_config threads a custom timeout through to the client."""
+    config = RerankConfig(
+        model="qwen3-rerank",
+        api_key="test-key",
+        api_base="https://api.example.com/v1",
+        timeout=120.0,
+    )
+
+    client = OpenAIRerankClient.from_config(config)
+
+    assert client.timeout == 120.0
+
+
+def test_openai_rerank_from_config_default_timeout():
+    """from_config preserves the 30s default when timeout is unset."""
+    config = RerankConfig(
+        model="qwen3-rerank",
+        api_key="test-key",
+        api_base="https://api.example.com/v1",
+    )
+
+    client = OpenAIRerankClient.from_config(config)
+
+    assert client.timeout == 30.0
+
+
+@patch("openviking.models.rerank.openai_rerank.requests.post")
+def test_rerank_batch_uses_configured_timeout(mock_post):
+    """rerank_batch passes the configured timeout to requests.post."""
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = {
+        "results": [{"index": 0, "relevance_score": 0.9}, {"index": 1, "relevance_score": 0.8}]
+    }
+    mock_post.return_value = mock_response
+
+    client = OpenAIRerankClient(
+        api_key="test-key",
+        api_base="https://api.example.com/v1",
+        model_name="qwen3-rerank",
+        timeout=120.0,
+    )
+
+    client.rerank_batch(query="test query", documents=["doc1", "doc2"])
+
+    assert mock_post.called
+    assert mock_post.call_args.kwargs["timeout"] == 120.0
+
+
+@patch("openviking.models.rerank.openai_rerank.requests.post")
+def test_rerank_batch_uses_default_timeout(mock_post):
+    """rerank_batch falls back to the 30s default when no timeout is configured."""
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = {"results": [{"index": 0, "relevance_score": 0.9}]}
+    mock_post.return_value = mock_response
+
+    client = OpenAIRerankClient(
+        api_key="test-key", api_base="https://api.example.com/v1", model_name="qwen3-rerank"
+    )
+
+    client.rerank_batch(query="test query", documents=["doc1"])
+
+    assert mock_post.called
+    assert mock_post.call_args.kwargs["timeout"] == 30.0

From 9d5d2baab677968628eae90e5d1b3887d31f009f Mon Sep 17 00:00:00 2001
From: qin-ctx <qinhaojie.exe@bytedance.com>
Date: Tue, 23 Jun 2026 18:17:43 +0800
Subject: [PATCH 2/2] docs: document rerank timeout config

---
 docs/en/guides/01-configuration.md           | 2 ++
 docs/zh/guides/01-configuration.md           | 2 ++
 openviking_cli/utils/config/rerank_config.py | 3 +--
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/docs/en/guides/01-configuration.md b/docs/en/guides/01-configuration.md
index 59a83e69de..f8d29c8388 100644
--- a/docs/en/guides/01-configuration.md
+++ b/docs/en/guides/01-configuration.md
@@ -834,6 +834,7 @@ Reranking model for search result refinement. Supports VikingDB (Volcengine), Co
     "api_key": "your-api-key",
     "api_base": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks",
     "model": "qwen3-vl-rerank",
+    "timeout": 120,
     "threshold": 0.1
   }
 }
@@ -850,6 +851,7 @@ Reranking model for search result refinement. Supports VikingDB (Volcengine), Co
 | `api_key` | str | API key (for `openai` or `cohere` providers) |
 | `api_base` | str | Endpoint URL (for `openai` provider) |
 | `model` | str | Model name (for `openai` providers) |
+| `timeout` | float | HTTP request timeout in seconds for OpenAI-compatible providers. Increase for slow or cold-starting local rerank servers. Default: `30.0` |
 | `threshold` | float | Score threshold between `0.0` and `1.0`; results below this are filtered out. Default: `0.1` |
 | `extra_headers` | object | Custom HTTP headers (for OpenAI-compatible providers, optional) |
 
diff --git a/docs/zh/guides/01-configuration.md b/docs/zh/guides/01-configuration.md
index 93b32e9ed1..299e7bcf06 100644
--- a/docs/zh/guides/01-configuration.md
+++ b/docs/zh/guides/01-configuration.md
@@ -805,6 +805,7 @@ AST 提取支持：Python、JavaScript/TypeScript、Rust、Go、Java、C/C++。
     "api_key": "your-api-key",
     "api_base": "https://dashscope.aliyuncs.com/compatible-api/v1/reranks",
     "model": "qwen3-vl-rerank",
+    "timeout": 120,
     "threshold": 0.1
   }
 }
@@ -821,6 +822,7 @@ AST 提取支持：Python、JavaScript/TypeScript、Rust、Go、Java、C/C++。
 | `api_key` | str | API Key（用于 `openai` 或 `cohere` 提供方） |
 | `api_base` | str | 接口地址（用于 `openai` 提供方） |
 | `model` | str | 模型名称（用于 `openai` 提供方） |
+| `timeout` | float | OpenAI 兼容 provider 的 HTTP 请求超时时间，单位为秒。对于较慢或冷启动的本地 rerank 服务可适当增大。默认：`30.0` |
 | `threshold` | float | 分数阈值，范围为 `0.0` 到 `1.0`。低于此值的结果会被过滤。默认：`0.1` |
 | `extra_headers` | object | 自定义 HTTP 请求头（OpenAI 兼容 provider 可用，可选） |
 
diff --git a/openviking_cli/utils/config/rerank_config.py b/openviking_cli/utils/config/rerank_config.py
index e6b8135637..6cc5803ad1 100644
--- a/openviking_cli/utils/config/rerank_config.py
+++ b/openviking_cli/utils/config/rerank_config.py
@@ -32,8 +32,7 @@ class RerankConfig(BaseModel):
     )
 
     extra_headers: Optional[Dict[str, str]] = Field(
-        default=None,
-        description="Extra HTTP headers for OpenAI-compatible providers"
+        default=None, description="Extra HTTP headers for OpenAI-compatible providers"
     )
 
     timeout: float = Field(