From c5182f6fa20c3b688e924a755770e66d989028a1 Mon Sep 17 00:00:00 2001 From: Aleksander Pejcic Date: Fri, 12 Jun 2026 13:57:31 +0200 Subject: [PATCH 1/2] feat: add Sference provider support - Updated navigation and documentation to include the Sference provider. - Implemented Sference provider functionality, including inline batch submission and result handling. - Added tests for Sference provider's batch payload and response handling. Co-authored-by: Cursor --- docs/architecture/providers.md | 14 +++ docs/providers.md | 1 + docs/providers/_credentials/sference.md | 3 + docs/providers/_urls/sference.md | 3 + docs/providers/sference.md | 23 ++++ examples/providers/sference_example.py | 51 ++++++++ mkdocs.yml | 1 + src/batchling/providers/sference.py | 151 ++++++++++++++++++++++++ tests/test_provider_contracts.py | 67 ++++++++++- tests/test_provider_registry.py | 47 +++++++- 10 files changed, 355 insertions(+), 6 deletions(-) create mode 100644 docs/providers/_credentials/sference.md create mode 100644 docs/providers/_urls/sference.md create mode 100644 docs/providers/sference.md create mode 100644 examples/providers/sference_example.py create mode 100644 src/batchling/providers/sference.py diff --git a/docs/architecture/providers.md b/docs/architecture/providers.md index 7f81377..0b2e2cd 100644 --- a/docs/architecture/providers.md +++ b/docs/architecture/providers.md @@ -107,6 +107,20 @@ The Xai provider uses a provider-specific batch lifecycle and response envelope: - result retrieval from `/v1/batches/{batch_id}/results` - provider-specific result row key (`custom_id_field_name = "batch_request_id"`) +## sference provider + +The sference provider reuses the OpenAI provider implementation with inline batch +submission to ``POST /v1/batches`` (no ``/v1/files`` upload step): + +- `hostname = "api.sference.com"` +- `is_file_based = False` +- `batchable_endpoints = ("/v1/chat/completions",)` — sference inline batches execute + chat-completion bodies only; `/v1/responses` is not batchable via `/v1/batches` +- `supported_completion_windows = ("24h",)` — sference batches currently expose a + `24h` SLA window only +- poll via `GET /v1/batches/{batch_id}`; results via `GET /v1/batches/{batch_id}/results.jsonl` +- inherits OpenAI terminal states and default JSONL result decoding + ## Doubleword provider The Doubleword provider reuses the OpenAI provider implementation and only changes: diff --git a/docs/providers.md b/docs/providers.md index bde12c7..e89f254 100644 --- a/docs/providers.md +++ b/docs/providers.md @@ -10,6 +10,7 @@ The following providers are supported by `batchling`: - [Groq](providers/groq.md) - [Mistral](providers/mistral.md) - [OpenAI](providers/openai.md) +- [Sference](providers/sference.md) - [Together](providers/together.md) - [Vertex](providers/vertex.md) - [XAI](providers/xai.md) diff --git a/docs/providers/_credentials/sference.md b/docs/providers/_credentials/sference.md new file mode 100644 index 0000000..d2266b0 --- /dev/null +++ b/docs/providers/_credentials/sference.md @@ -0,0 +1,3 @@ + +Set `SFERENCE_API_KEY` in `.env` or export it in your shell before running batches. +You can create keys in the sference console. diff --git a/docs/providers/_urls/sference.md b/docs/providers/_urls/sference.md new file mode 100644 index 0000000..f0b4b8c --- /dev/null +++ b/docs/providers/_urls/sference.md @@ -0,0 +1,3 @@ + +- [Batch inference guide](https://sference.com/docs/guides/batches) +- [Responses & streams guide](https://sference.com/docs/guides/responses) diff --git a/docs/providers/sference.md b/docs/providers/sference.md new file mode 100644 index 0000000..f7055c3 --- /dev/null +++ b/docs/providers/sference.md @@ -0,0 +1,23 @@ +# Sference + +`batchling` is compatible with Sference through any [supported framework](../frameworks.md){ data-preview } + +The following endpoints are made batch-compatible by Sference: + +- `/v1/chat/completions` + +!!! warning "Check model support and batch pricing" + Before sending batches, review the provider's official pricing page for supported models and batch pricing details. + +The Batch API docs for Sference can be found on the following URL: +--8<-- "docs/providers/_urls/sference.md" + +## Example Usage + +--8<-- "docs/providers/_credentials/sference.md" + +Here's an example showing how to use `batchling` with Sference: + +```py title="sference_example.py" +--8<-- "examples/providers/sference_example.py" +``` diff --git a/examples/providers/sference_example.py b/examples/providers/sference_example.py new file mode 100644 index 0000000..676f61c --- /dev/null +++ b/examples/providers/sference_example.py @@ -0,0 +1,51 @@ +import asyncio +import os + +from dotenv import load_dotenv +from openai import AsyncOpenAI + +from batchling import batchify + +load_dotenv() + + +async def build_tasks() -> list: + """Build sference chat completion requests.""" + client = AsyncOpenAI( + api_key=os.getenv(key="SFERENCE_API_KEY"), + base_url="https://api.sference.com/v1", + ) + questions = [ + "Who is the best French painter? Answer in one short sentence.", + "What is the capital of France?", + ] + return [ + client.chat.completions.create( + model="moonshotai/Kimi-K2.6", + messages=[ + { + "role": "user", + "content": question, + } + ], + ) + for question in questions + ] + + +async def main() -> None: + """Run the sference example.""" + tasks = await build_tasks() + responses = await asyncio.gather(*tasks) + for response in responses: + print(f"{response.model} answer:\n{response.choices[0].message.content}\n") + + +async def run_with_batchify() -> None: + """Run `main` inside `batchify` for direct script execution.""" + async with batchify(completion_window="24h"): + await main() + + +if __name__ == "__main__": + asyncio.run(run_with_batchify()) diff --git a/mkdocs.yml b/mkdocs.yml index d2f780e..b685152 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -107,6 +107,7 @@ nav: - Groq: providers/groq.md - Mistral: providers/mistral.md - OpenAI: providers/openai.md + - Sference: providers/sference.md - Together: providers/together.md - Vertex: providers/vertex.md - XAI: providers/xai.md diff --git a/src/batchling/providers/sference.py b/src/batchling/providers/sference.py new file mode 100644 index 0000000..3803e3e --- /dev/null +++ b/src/batchling/providers/sference.py @@ -0,0 +1,151 @@ +import json +import typing as t + +import httpx + +from batchling.providers.base import ( + PendingRequestLike, + PollSnapshot, + ProviderRequestSpec, + ResumeContext, +) +from batchling.providers.openai import OpenAIProvider + + +class SferenceProvider(OpenAIProvider): + """Provider adapter for sference's inline ``POST /v1/batches`` API.""" + + name = "sference" + hostname = "api.sference.com" + is_file_based = False + file_content_endpoint = "/v1/batches/{id}/results.jsonl" + batch_endpoint = "/v1/batches" + output_file_field_name: str = "id" + error_file_field_name: str = "id" + supported_completion_windows: tuple[str, ...] = ("24h",) + batchable_endpoints = ("/v1/chat/completions",) + + def matches_url(self, hostname: str) -> bool: + normalized = hostname.lower() + return normalized in {"api.sference.com"} + + def build_jsonl_lines( + self, + *, + requests: t.Sequence[PendingRequestLike], + ) -> list[dict[str, t.Any]]: + return [ + { + "custom_id": request.custom_id, + "body": json.loads(s=request.params["body"].decode(encoding="utf-8")), + } + for request in requests + ] + + async def build_inline_batch_payload( + self, + *, + jsonl_lines: list[dict[str, t.Any]], + completion_window: str, + ) -> dict[str, t.Any]: + return { + "window": completion_window, + "requests": jsonl_lines, + } + + def build_batch_results_path(self, *, file_id: str | None, batch_id: str) -> str: + del file_id + return f"/v1/batches/{batch_id}/results.jsonl" + + def get_progress_from_poll( + self, + *, + payload: dict[str, t.Any], + requests_count: int, + ) -> tuple[int, float]: + if payload.get(self.batch_status_field_name) == "completed": + return requests_count, 100.0 + return 0, 0.0 + + async def get_result_locator_from_poll_response( + self, + *, + payload: dict[str, t.Any], + ) -> str: + return str(object=payload.get("id") or "") + + def build_poll_request_spec( + self, + *, + base_url: str, + api_headers: dict[str, str], + batch_id: str, + ) -> ProviderRequestSpec: + return super().build_poll_request_spec( + base_url=base_url, + api_headers=api_headers, + batch_id=batch_id, + ) + + async def parse_poll_response( + self, + *, + payload: dict[str, t.Any], + requests_count: int, + ) -> PollSnapshot: + return await super().parse_poll_response( + payload=payload, + requests_count=requests_count, + ) + + def build_results_request_spec( + self, + *, + base_url: str, + api_headers: dict[str, str], + file_id: str | None, + batch_id: str, + ) -> ProviderRequestSpec: + return super().build_results_request_spec( + base_url=base_url, + api_headers=api_headers, + file_id=file_id, + batch_id=batch_id, + ) + + def decode_results_content( + self, + *, + batch_id: str, + content: str, + ) -> dict[str, httpx.Response]: + return super().decode_results_content(batch_id=batch_id, content=content) + + def from_batch_result(self, result_item: dict[str, t.Any]) -> httpx.Response: + result_json = result_item.get("result_json") + error_json = result_item.get("error_json") + if result_json is not None: + status_code = 200 + body: dict[str, t.Any] | t.Any = result_json + elif error_json is not None: + status_code = 500 + body = error_json + else: + status_code = 500 + body = {"error": result_item.get("status") or "Missing result"} + + content, content_headers = self.encode_body(body=body) + headers = dict(content_headers) + return httpx.Response( + status_code=status_code, + headers=headers, + content=content, + ) + + def build_resume_context( + self, + *, + host: str, + headers: dict[str, str] | None, + ) -> ResumeContext: + return super().build_resume_context(host=host, headers=headers) diff --git a/tests/test_provider_contracts.py b/tests/test_provider_contracts.py index 430cc04..9fdfd6e 100644 --- a/tests/test_provider_contracts.py +++ b/tests/test_provider_contracts.py @@ -11,6 +11,7 @@ from batchling.providers.groq import GroqProvider from batchling.providers.mistral import MistralProvider from batchling.providers.openai import OpenAIProvider +from batchling.providers.sference import SferenceProvider from batchling.providers.together import TogetherProvider from batchling.providers.vertex import VertexProvider from batchling.providers.xai import XaiProvider @@ -28,6 +29,7 @@ DoublewordProvider(), XaiProvider(), VertexProvider(), + SferenceProvider(), ], ) def test_build_poll_request_spec_returns_get(provider: t.Any) -> None: @@ -61,6 +63,7 @@ def test_build_poll_request_spec_returns_get(provider: t.Any) -> None: DoublewordProvider(), XaiProvider(), VertexProvider(), + SferenceProvider(), ], ) def test_build_resume_context_adds_internal_header(provider: t.Any) -> None: @@ -73,9 +76,13 @@ def test_build_resume_context_adds_internal_header(provider: t.Any) -> None: Provider instance under test. """ host = ( - "us-central1-aiplatform.googleapis.com" if provider.name == "vertex" else "api.openai.com" + "us-central1-aiplatform.googleapis.com" + if provider.name == "vertex" + else "api.openai.com" + ) + context = provider.build_resume_context( + host=host, headers={"Authorization": "Bearer token"} ) - context = provider.build_resume_context(host=host, headers={"Authorization": "Bearer token"}) assert context.base_url.startswith("https://") assert context.api_headers["x-batchling-internal"] == "1" @@ -103,7 +110,9 @@ async def test_parse_poll_response_default_fields_for_openai_provider() -> None: @pytest.mark.asyncio -async def test_parse_poll_response_progress_defaults_to_zero_for_invalid_numbers() -> None: +async def test_parse_poll_response_progress_defaults_to_zero_for_invalid_numbers() -> ( + None +): """ Ensure invalid progress payload values fallback to zero. """ @@ -238,6 +247,58 @@ def test_decode_results_content_maps_custom_ids() -> None: assert isinstance(vertex_results["req-3"], httpx.Response) +@pytest.mark.asyncio +async def test_sference_build_inline_batch_payload_includes_window() -> None: + """ + Ensure inline batch payloads include the sference SLA window field. + """ + provider = SferenceProvider() + payload = await provider.build_inline_batch_payload( + jsonl_lines=[{"custom_id": "req-1", "body": {"model": "demo", "messages": []}}], + completion_window="24h", + ) + assert payload == { + "window": "24h", + "requests": [{"custom_id": "req-1", "body": {"model": "demo", "messages": []}}], + } + + +def test_sference_build_batch_results_path_uses_batch_id() -> None: + """ + Ensure sference downloads results from the inline batch results route. + """ + provider = SferenceProvider() + assert provider.build_batch_results_path(file_id=None, batch_id="batch-123") == ( + "/v1/batches/batch-123/results.jsonl" + ) + + +def test_sference_from_batch_result_decodes_result_json() -> None: + """ + Ensure sference batch rows decode from ``result_json`` / ``error_json``. + """ + provider = SferenceProvider() + success = provider.from_batch_result( + result_item={ + "custom_id": "req-1", + "status": "completed", + "result_json": {"object": "chat.completion", "choices": []}, + "error_json": None, + }, + ) + assert success.status_code == 200 + + failure = provider.from_batch_result( + result_item={ + "custom_id": "req-2", + "status": "failed", + "result_json": None, + "error_json": {"detail": "boom"}, + }, + ) + assert failure.status_code == 500 + + @pytest.mark.asyncio @pytest.mark.parametrize( ("payload", "expected_status"), diff --git a/tests/test_provider_registry.py b/tests/test_provider_registry.py index 4a0d8bf..58e2ec0 100644 --- a/tests/test_provider_registry.py +++ b/tests/test_provider_registry.py @@ -27,7 +27,8 @@ def test_provider_registry_auto_discovers_modules() -> None: if file_path.name not in {"__init__.py", "base.py"} } discovered_modules = { - provider.__class__.__module__.split(sep=".")[-1] for provider in providers_module.PROVIDERS + provider.__class__.__module__.split(sep=".")[-1] + for provider in providers_module.PROVIDERS } assert discovered_modules @@ -45,8 +46,13 @@ def test_provider_registry_contains_only_concrete_provider_instances() -> None: This test asserts registry instance types. """ assert providers_module.PROVIDERS - assert all(isinstance(provider, BaseProvider) for provider in providers_module.PROVIDERS) - assert all(provider.__class__ is not BaseProvider for provider in providers_module.PROVIDERS) + assert all( + isinstance(provider, BaseProvider) for provider in providers_module.PROVIDERS + ) + assert all( + provider.__class__ is not BaseProvider + for provider in providers_module.PROVIDERS + ) def test_provider_lookup_still_resolves_openai() -> None: @@ -103,6 +109,41 @@ def test_provider_lookup_resolves_xai() -> None: assert provider.name == "xai" +def test_provider_lookup_does_not_batch_sference_responses() -> None: + """ + Ensure sference responses are not routed through the inline batch API. + + Returns + ------- + None + This test asserts responses are excluded from batchable endpoints. + """ + provider = get_provider_for_batch_request( + hostname="api.sference.com", + path="/v1/responses", + method="POST", + ) + assert provider is None + + +def test_provider_lookup_resolves_sference_chat_completions() -> None: + """ + Ensure hostname lookup resolves the sference provider for chat completions. + + Returns + ------- + None + This test asserts hostname-to-provider mapping. + """ + provider = get_provider_for_batch_request( + hostname="api.sference.com", + path="/v1/chat/completions", + method="POST", + ) + assert provider is not None + assert provider.name == "sference" + + def test_provider_lookup_requires_exact_hostname_match() -> None: """ Ensure provider lookup does not match hostname suffixes or subdomains. From cf17bd27cca2b7c81127bd40b711a2cf0b7ff1a5 Mon Sep 17 00:00:00 2001 From: Aleksander Pejcic Date: Sun, 14 Jun 2026 22:57:53 +0200 Subject: [PATCH 2/2] fix: format provider tests with ruff line-length 100 Match prek ruff-format hook settings so CI passes. Co-authored-by: Cursor --- tests/test_provider_contracts.py | 12 +++--------- tests/test_provider_registry.py | 12 +++--------- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/tests/test_provider_contracts.py b/tests/test_provider_contracts.py index 9fdfd6e..10c5a20 100644 --- a/tests/test_provider_contracts.py +++ b/tests/test_provider_contracts.py @@ -76,13 +76,9 @@ def test_build_resume_context_adds_internal_header(provider: t.Any) -> None: Provider instance under test. """ host = ( - "us-central1-aiplatform.googleapis.com" - if provider.name == "vertex" - else "api.openai.com" - ) - context = provider.build_resume_context( - host=host, headers={"Authorization": "Bearer token"} + "us-central1-aiplatform.googleapis.com" if provider.name == "vertex" else "api.openai.com" ) + context = provider.build_resume_context(host=host, headers={"Authorization": "Bearer token"}) assert context.base_url.startswith("https://") assert context.api_headers["x-batchling-internal"] == "1" @@ -110,9 +106,7 @@ async def test_parse_poll_response_default_fields_for_openai_provider() -> None: @pytest.mark.asyncio -async def test_parse_poll_response_progress_defaults_to_zero_for_invalid_numbers() -> ( - None -): +async def test_parse_poll_response_progress_defaults_to_zero_for_invalid_numbers() -> None: """ Ensure invalid progress payload values fallback to zero. """ diff --git a/tests/test_provider_registry.py b/tests/test_provider_registry.py index 58e2ec0..657d2a0 100644 --- a/tests/test_provider_registry.py +++ b/tests/test_provider_registry.py @@ -27,8 +27,7 @@ def test_provider_registry_auto_discovers_modules() -> None: if file_path.name not in {"__init__.py", "base.py"} } discovered_modules = { - provider.__class__.__module__.split(sep=".")[-1] - for provider in providers_module.PROVIDERS + provider.__class__.__module__.split(sep=".")[-1] for provider in providers_module.PROVIDERS } assert discovered_modules @@ -46,13 +45,8 @@ def test_provider_registry_contains_only_concrete_provider_instances() -> None: This test asserts registry instance types. """ assert providers_module.PROVIDERS - assert all( - isinstance(provider, BaseProvider) for provider in providers_module.PROVIDERS - ) - assert all( - provider.__class__ is not BaseProvider - for provider in providers_module.PROVIDERS - ) + assert all(isinstance(provider, BaseProvider) for provider in providers_module.PROVIDERS) + assert all(provider.__class__ is not BaseProvider for provider in providers_module.PROVIDERS) def test_provider_lookup_still_resolves_openai() -> None: