Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/inference_endpoint/commands/benchmark/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,10 @@ def setup_benchmark(config: BenchmarkConfig, test_mode: TestMode) -> BenchmarkCo

# Tokenizer check (light API call, no download)
model_name = config.model_params.name
tokenizer_name = model_name if _check_tokenizer_exists(model_name) else None
tokenizer_source = config.model_params.tokenizer_name or model_name
tokenizer_name = (
tokenizer_source if _check_tokenizer_exists(tokenizer_source) else None
)
Comment on lines +391 to +394

# Streaming
logger.info(
Expand Down
4 changes: 4 additions & 0 deletions src/inference_endpoint/config/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,10 @@ class ModelParams(BaseModel):
StreamingMode,
cyclopts.Parameter(alias="--streaming", help="Streaming mode: auto/on/off"),
] = StreamingMode.AUTO
tokenizer_name: str | None = Field(
None,
description="HuggingFace tokenizer repo ID. Overrides model name for tokenizer loading.",
)
Comment on lines +202 to +205
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

To maintain consistency with other fields in ModelParams (such as name and max_new_tokens) and to improve usability when running benchmarks from the command line, consider adding a CLI alias for tokenizer_name using cyclopts.Parameter. This allows users to easily override the tokenizer repo ID via the --tokenizer flag.

Suggested change
tokenizer_name: str | None = Field(
None,
description="HuggingFace tokenizer repo ID. Overrides model name for tokenizer loading.",
)
tokenizer_name: Annotated[
str | None,
cyclopts.Parameter(
alias="--tokenizer",
help="HuggingFace tokenizer repo ID. Overrides model name for tokenizer loading.",
),
] = None



class SubmissionReference(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ model_params:
max_new_tokens: 1024 # Max output tokens
osl_distribution: null # Output sequence length distribution
streaming: 'on' # Streaming mode: auto/on/off | options: auto, on, off
tokenizer_name: null # HuggingFace tokenizer repo ID. Overrides model name for tokenizer loading.
datasets: # Dataset configs
- name: perf
type: performance # Dataset purpose: performance or accuracy | options: performance, accuracy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ model_params:
max_new_tokens: 1024 # Max output tokens
osl_distribution: null # Output sequence length distribution
streaming: 'off' # Streaming mode: auto/on/off | options: auto, on, off
tokenizer_name: null # HuggingFace tokenizer repo ID. Overrides model name for tokenizer loading.
datasets: # Dataset configs
- name: perf
type: performance # Dataset purpose: performance or accuracy | options: performance, accuracy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ model_params:
max_new_tokens: 1024 # Max output tokens
osl_distribution: null # Output sequence length distribution
streaming: 'on' # Streaming mode: auto/on/off | options: auto, on, off
tokenizer_name: null # HuggingFace tokenizer repo ID. Overrides model name for tokenizer loading.
datasets: # Dataset configs
- name: perf
type: performance # Dataset purpose: performance or accuracy | options: performance, accuracy
Expand Down
9 changes: 9 additions & 0 deletions tests/unit/config/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def test_defaults(self):
params = ModelParams(name="test")
assert params.temperature is None
assert params.max_new_tokens == 1024
assert params.tokenizer_name is None

@pytest.mark.unit
def test_with_osl_distribution(self):
Expand All @@ -84,6 +85,14 @@ def test_with_osl_distribution(self):
assert params.temperature == 0.5
assert params.osl_distribution.type == OSLDistributionType.NORMAL

@pytest.mark.unit
def test_tokenizer_name_override(self):
params = ModelParams(
name="qwen/qwen3.6-35b-a3b", tokenizer_name="Qwen/Qwen3.6-35B-A3B"
)
assert params.tokenizer_name == "Qwen/Qwen3.6-35B-A3B"
assert params.name == "qwen/qwen3.6-35b-a3b"


class TestAPIType:
@pytest.mark.unit
Expand Down
Loading