Skip to content
Open
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 48 additions & 4 deletions astrbot/core/provider/sources/openai_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,11 @@ async def _query_stream(
llm_response = LLMResponse("assistant", is_chunk=True)

state = ChatCompletionStreamState()

# Track partial thinking tags across chunks for MiniMax-style reasoning
thinking_buffer = ""
# Compile regex once outside the loop for efficiency
thinking_pattern = re.compile(r"<think>(.*?)</think>", re.DOTALL)

async for chunk in stream:
if not chunk.choices:
Expand Down Expand Up @@ -568,10 +573,49 @@ async def _query_stream(
if delta and delta.content:
# Don't strip streaming chunks to preserve spaces between words
completion_text = self._normalize_content(delta.content, strip=False)
llm_response.result_chain = MessageChain(
chain=[Comp.Plain(completion_text)],
)
_y = True

# Handle partial   think... ‍ think tags that may span multiple chunks (MiniMax)
# Prepend any leftover thinking content from previous chunk
if thinking_buffer:
completion_text = thinking_buffer + completion_text
thinking_buffer = ""

# Find all thinking blocks in this chunk
# Extract complete thinking blocks
for match in thinking_pattern.finditer(completion_text):
think_content = match.group(1).strip()
if think_content:
if llm_response.reasoning_content:
llm_response.reasoning_content += "\n" + think_content
else:
llm_response.reasoning_content = think_content
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

When reasoning content is successfully extracted from the message body, the _y flag must be set to True. This ensures that the chunk is yielded to the consumer even if the remaining completion_text is empty (e.g., when a chunk contains only the end of a thinking block). Without this, the reasoning content might be delayed or lost.

Suggested change
for match in thinking_pattern.finditer(completion_text):
think_content = match.group(1).strip()
if think_content:
if llm_response.reasoning_content:
llm_response.reasoning_content += "\n" + think_content
else:
llm_response.reasoning_content = think_content
# Extract complete thinking blocks
for match in thinking_pattern.finditer(completion_text):
think_content = match.group(1).strip()
if think_content:
if llm_response.reasoning_content:
llm_response.reasoning_content += "\n" + think_content
else:
llm_response.reasoning_content = think_content
_y = True

_y = True

# Remove all complete thinking blocks from completion_text
completion_text = thinking_pattern.sub("", completion_text)

# Handle case where   think was found but ‍ think is missing (incomplete block at chunk boundary)
think_start = completion_text.rfind("<think>")
think_end = completion_text.rfind("</think>")

if think_start != -1 and (think_end == -1 or think_end < think_start):
# We have an unclosed   think tag, buffer everything from it onwards
thinking_buffer = completion_text[think_start:]
completion_text = completion_text[:think_start]
elif think_end != -1 and think_end > think_start:
# We closed a thinking block, clear any buffered content
thinking_buffer = ""

# Strip whitespace but preserve structure
# Use lstrip to remove leading newlines from thinking tags, but preserve trailing spaces for chunk concatenation
completion_text = completion_text.lstrip()

# Only yield if there's actual text content remaining
if completion_text:
llm_response.result_chain = MessageChain(
chain=[Comp.Plain(completion_text)],
)
_y = True
if chunk.usage:
llm_response.usage = self._extract_usage(chunk.usage)
elif choice_usage := getattr(choice, "usage", None):
Expand Down