Skip to content

Commit ffe5168

Browse files
committed
Improve batch server scheduling and prompt handling
1 parent 95454bf commit ffe5168

3 files changed

Lines changed: 6205 additions & 311 deletions

File tree

Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
{
2+
"server": {
3+
"host": "0.0.0.0",
4+
"port": 8000
5+
},
6+
"model": {
7+
"alias": "gpt-3.5-turbo",
8+
"from_pretrained": {
9+
"repo_id": "lmstudio-community/Qwen3.5-0.8B-GGUF",
10+
"filename": "Qwen3.5-0.8B-Q8_0.gguf"
11+
},
12+
"n_ctx": 32768,
13+
"max_output_tokens": 4096,
14+
"n_seq_max": 64,
15+
"n_batch": 128,
16+
"n_ubatch": 128,
17+
"threads": 2,
18+
"threads_batch": 4,
19+
"kv_unified": true,
20+
"store_logits": false,
21+
"use_mmap": true,
22+
"response_schema": {
23+
"type": "object",
24+
"properties": {
25+
"role": {
26+
"const": "assistant"
27+
},
28+
"reasoning_content": {
29+
"type": "string",
30+
"x-regex": "^(?:<\\|im_start\\|>assistant\\n)?(?:<think>\\n)?(.*?)(?=</think>)"
31+
},
32+
"content": {
33+
"type": "string",
34+
"x-regex": "^(?:<\\|im_start\\|>assistant\\n)?(?:(?:<think>\\n)?.*?</think>\\s*)?(.*?)(?=\\s*<tool_call>\\n|<\\|im_end\\|>$|$)"
35+
},
36+
"tool_calls": {
37+
"type": "array",
38+
"x-regex-iterator": "<tool_call>\\n(.*?)\\n</tool_call>",
39+
"items": {
40+
"type": "object",
41+
"properties": {
42+
"type": {
43+
"const": "function"
44+
},
45+
"function": {
46+
"type": "object",
47+
"properties": {
48+
"name": {
49+
"type": "string",
50+
"x-regex": "^<function=([^>\\n]+)>\\n"
51+
},
52+
"arguments": {
53+
"type": "object",
54+
"x-regex": "^<function=[^>\\n]+>\\n(.*?)\\n</function>$",
55+
"x-regex-key-value": "<parameter=(?P<key>[^>\\n]+)>\\n(?P<value>.*?)\\n</parameter>",
56+
"additionalProperties": true
57+
}
58+
},
59+
"required": [
60+
"name",
61+
"arguments"
62+
]
63+
}
64+
},
65+
"required": [
66+
"type",
67+
"function"
68+
]
69+
}
70+
}
71+
},
72+
"required": [
73+
"role"
74+
]
75+
},
76+
"chat_template": [
77+
"{%- set image_count = namespace(value=0) %}\n",
78+
"{%- set video_count = namespace(value=0) %}\n",
79+
"{%- if enable_thinking is not defined and reasoning_effort is string %}\n",
80+
" {%- set qwen_reasoning_effort = reasoning_effort|lower %}\n",
81+
" {%- if qwen_reasoning_effort in ['none', 'minimal', 'low'] %}\n",
82+
" {%- set enable_thinking = false %}\n",
83+
" {%- elif qwen_reasoning_effort in ['medium', 'high'] %}\n",
84+
" {%- set enable_thinking = true %}\n",
85+
" {%- endif %}\n",
86+
"{%- endif %}\n",
87+
"{%- set forced_tool_name = none %}\n",
88+
"{%- if tool_choice is mapping %}\n",
89+
" {%- if tool_choice.function is defined and tool_choice.function.name is defined %}\n",
90+
" {%- set forced_tool_name = tool_choice.function.name %}\n",
91+
" {%- elif tool_choice.name is defined %}\n",
92+
" {%- set forced_tool_name = tool_choice.name %}\n",
93+
" {%- endif %}\n",
94+
"{%- elif function_call is mapping and function_call.name is defined %}\n",
95+
" {%- set forced_tool_name = function_call.name %}\n",
96+
"{%- endif %}\n",
97+
"{%- macro render_content(content, do_vision_count, is_system_content=false) %}\n",
98+
" {%- if content is string %}\n",
99+
" {{- content }}\n",
100+
" {%- elif content is iterable and content is not mapping %}\n",
101+
" {%- for item in content %}\n",
102+
" {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}\n",
103+
" {%- if is_system_content %}\n",
104+
" {{- raise_exception('System message cannot contain images.') }}\n",
105+
" {%- endif %}\n",
106+
" {%- if do_vision_count %}\n",
107+
" {%- set image_count.value = image_count.value + 1 %}\n",
108+
" {%- endif %}\n",
109+
" {%- if add_vision_id %}\n",
110+
" {{- 'Picture ' ~ image_count.value ~ ': ' }}\n",
111+
" {%- endif %}\n",
112+
" {{- '<|vision_start|><|image_pad|><|vision_end|>' }}\n",
113+
" {%- elif 'video' in item or item.type == 'video' %}\n",
114+
" {%- if is_system_content %}\n",
115+
" {{- raise_exception('System message cannot contain videos.') }}\n",
116+
" {%- endif %}\n",
117+
" {%- if do_vision_count %}\n",
118+
" {%- set video_count.value = video_count.value + 1 %}\n",
119+
" {%- endif %}\n",
120+
" {%- if add_vision_id %}\n",
121+
" {{- 'Video ' ~ video_count.value ~ ': ' }}\n",
122+
" {%- endif %}\n",
123+
" {{- '<|vision_start|><|video_pad|><|vision_end|>' }}\n",
124+
" {%- elif 'text' in item %}\n",
125+
" {{- item.text }}\n",
126+
" {%- else %}\n",
127+
" {{- raise_exception('Unexpected item type in content.') }}\n",
128+
" {%- endif %}\n",
129+
" {%- endfor %}\n",
130+
" {%- elif content is none or content is undefined %}\n",
131+
" {{- '' }}\n",
132+
" {%- else %}\n",
133+
" {{- raise_exception('Unexpected content type.') }}\n",
134+
" {%- endif %}\n",
135+
"{%- endmacro %}\n",
136+
"{%- if not messages %}\n",
137+
" {{- raise_exception('No messages provided.') }}\n",
138+
"{%- endif %}\n",
139+
"{%- if tools and tools is iterable and tools is not mapping %}\n",
140+
" {{- '<|im_start|>system\\n' }}\n",
141+
" {{- \"# Tools\\n\\nYou have access to the following functions:\\n\\n<tools>\" }}\n",
142+
" {%- for tool in tools %}\n",
143+
" {{- \"\\n\" }}\n",
144+
" {{- tool | tojson }}\n",
145+
" {%- endfor %}\n",
146+
" {{- \"\\n</tools>\" }}\n",
147+
" {{- '\\n\\nIf you choose to call a function ONLY reply in the following format with NO suffix:\\n\\n<tool_call>\\n<function=example_function_name>\\n<parameter=example_parameter_1>\\nvalue_1\\n</parameter>\\n<parameter=example_parameter_2>\\nThis is the value for the second parameter\\nthat can span\\nmultiple lines\\n</parameter>\\n</function>\\n</tool_call>\\n\\n<IMPORTANT>\\nReminder:\\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\\n- Required parameters MUST be specified\\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\\n</IMPORTANT>' }}\n",
148+
" {%- if messages[0].role == 'system' %}\n",
149+
" {%- set content = render_content(messages[0].content, false, true)|trim %}\n",
150+
" {%- if content %}\n",
151+
" {{- '\\n\\n' + content }}\n",
152+
" {%- endif %}\n",
153+
" {%- endif %}\n",
154+
" {{- '<|im_end|>\\n' }}\n",
155+
"{%- else %}\n",
156+
" {%- if messages[0].role == 'system' %}\n",
157+
" {%- set content = render_content(messages[0].content, false, true)|trim %}\n",
158+
" {{- '<|im_start|>system\\n' + content + '<|im_end|>\\n' }}\n",
159+
" {%- endif %}\n",
160+
"{%- endif %}\n",
161+
"{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n",
162+
"{%- for message in messages[::-1] %}\n",
163+
" {%- set index = (messages|length - 1) - loop.index0 %}\n",
164+
" {%- if ns.multi_step_tool and message.role == \"user\" %}\n",
165+
" {%- set content = render_content(message.content, false)|trim %}\n",
166+
" {%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}\n",
167+
" {%- set ns.multi_step_tool = false %}\n",
168+
" {%- set ns.last_query_index = index %}\n",
169+
" {%- endif %}\n",
170+
" {%- endif %}\n",
171+
"{%- endfor %}\n",
172+
"{%- if ns.multi_step_tool %}\n",
173+
" {{- raise_exception('No user query found in messages.') }}\n",
174+
"{%- endif %}\n",
175+
"{%- for message in messages %}\n",
176+
" {%- set content = render_content(message.content, true)|trim %}\n",
177+
" {%- if message.role == \"system\" %}\n",
178+
" {%- if not loop.first %}\n",
179+
" {{- raise_exception('System message must be at the beginning.') }}\n",
180+
" {%- endif %}\n",
181+
" {%- elif message.role == \"user\" %}\n",
182+
" {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n",
183+
" {%- elif message.role == \"assistant\" %}\n",
184+
" {%- set reasoning_content = '' %}\n",
185+
" {%- if message.reasoning_content is string %}\n",
186+
" {%- set reasoning_content = message.reasoning_content %}\n",
187+
" {%- else %}\n",
188+
" {%- if '</think>' in content %}\n",
189+
" {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n",
190+
" {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n",
191+
" {%- endif %}\n",
192+
" {%- endif %}\n",
193+
" {%- set reasoning_content = reasoning_content|trim %}\n",
194+
" {%- if loop.index0 > ns.last_query_index %}\n",
195+
" {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content + '\\n</think>\\n\\n' + content }}\n",
196+
" {%- else %}\n",
197+
" {{- '<|im_start|>' + message.role + '\\n' + content }}\n",
198+
" {%- endif %}\n",
199+
" {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}\n",
200+
" {%- for tool_call in message.tool_calls %}\n",
201+
" {%- if tool_call.function is defined %}\n",
202+
" {%- set tool_call = tool_call.function %}\n",
203+
" {%- endif %}\n",
204+
" {%- if loop.first %}\n",
205+
" {%- if content|trim %}\n",
206+
" {{- '\\n\\n<tool_call>\\n<function=' + tool_call.name + '>\\n' }}\n",
207+
" {%- else %}\n",
208+
" {{- '<tool_call>\\n<function=' + tool_call.name + '>\\n' }}\n",
209+
" {%- endif %}\n",
210+
" {%- else %}\n",
211+
" {{- '\\n<tool_call>\\n<function=' + tool_call.name + '>\\n' }}\n",
212+
" {%- endif %}\n",
213+
" {%- if tool_call.arguments is defined %}\n",
214+
" {%- set arguments = tool_call.arguments | from_json if tool_call.arguments is string else tool_call.arguments %}\n",
215+
" {%- for args_name, args_value in arguments|items %}\n",
216+
" {{- '<parameter=' + args_name + '>\\n' }}\n",
217+
" {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}\n",
218+
" {{- args_value }}\n",
219+
" {{- '\\n</parameter>\\n' }}\n",
220+
" {%- endfor %}\n",
221+
" {%- endif %}\n",
222+
" {{- '</function>\\n</tool_call>' }}\n",
223+
" {%- endfor %}\n",
224+
" {%- endif %}\n",
225+
" {{- '<|im_end|>\\n' }}\n",
226+
" {%- elif message.role == \"tool\" %}\n",
227+
" {%- if loop.previtem and loop.previtem.role != \"tool\" %}\n",
228+
" {{- '<|im_start|>user' }}\n",
229+
" {%- endif %}\n",
230+
" {{- '\\n<tool_response>\\n' }}\n",
231+
" {{- content }}\n",
232+
" {{- '\\n</tool_response>' }}\n",
233+
" {%- if not loop.last and loop.nextitem.role != \"tool\" %}\n",
234+
" {{- '<|im_end|>\\n' }}\n",
235+
" {%- elif loop.last %}\n",
236+
" {{- '<|im_end|>\\n' }}\n",
237+
" {%- endif %}\n",
238+
" {%- else %}\n",
239+
" {{- raise_exception('Unexpected message role.') }}\n",
240+
" {%- endif %}\n",
241+
"{%- endfor %}\n",
242+
"{%- if add_generation_prompt %}\n",
243+
" {{- '<|im_start|>assistant\\n' }}\n",
244+
" {%- if forced_tool_name %}\n",
245+
" {{- '<tool_call>\\n<function=' + forced_tool_name + '>\\n' }}\n",
246+
" {%- elif enable_thinking is defined and enable_thinking is false %}\n",
247+
" {{- '<think>\\n\\n</think>\\n\\n' }}\n",
248+
" {%- else %}\n",
249+
" {{- '<think>\\n' }}\n",
250+
" {%- endif %}\n",
251+
"{%- endif %}"
252+
]
253+
}
254+
}

0 commit comments

Comments
 (0)