Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 47 additions & 4 deletions astrbot/core/provider/sources/openai_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,10 @@ async def _query_stream(
llm_response = LLMResponse("assistant", is_chunk=True)

state = ChatCompletionStreamState()

# Track partial thinking tags across chunks for MiniMax-style reasoning
thinking_buffer = ""
in_thinking_block = False

async for chunk in stream:
Comment on lines +550 to 555
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The variable in_thinking_block is initialized but never used in the subsequent logic. Additionally, for better performance, the regex pattern should be compiled once outside the streaming loop. Also, since the llm_response object is reused across all chunks in the stream, we should clear the result_chain at the start of each iteration to prevent content from previous chunks from leaking into the current one (which can happen if a chunk contains only reasoning content or metadata).

        # Track partial thinking tags across chunks for MiniMax-style reasoning
        thinking_buffer = ""
        thinking_pattern = re.compile(r"<think>(.*?)</think>", re.DOTALL)

        async for chunk in stream:
            llm_response.result_chain = None

if not chunk.choices:
Expand Down Expand Up @@ -568,10 +572,49 @@ async def _query_stream(
if delta and delta.content:
# Don't strip streaming chunks to preserve spaces between words
completion_text = self._normalize_content(delta.content, strip=False)
llm_response.result_chain = MessageChain(
chain=[Comp.Plain(completion_text)],
)
_y = True

# Handle partial   think... ‍ think tags that may span multiple chunks (MiniMax)
# Prepend any leftover thinking content from previous chunk
if thinking_buffer:
completion_text = thinking_buffer + completion_text
thinking_buffer = ""

# Find all thinking blocks in this chunk
thinking_pattern = re.compile(r"<think>(.*?)</think>", re.DOTALL)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This line is now redundant as the regex pattern is compiled once outside the loop for efficiency.


# Extract complete thinking blocks
for match in thinking_pattern.finditer(completion_text):
think_content = match.group(1).strip()
if think_content:
if llm_response.reasoning_content:
llm_response.reasoning_content += "\n" + think_content
else:
llm_response.reasoning_content = think_content
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

When reasoning content is successfully extracted from the message body, the _y flag must be set to True. This ensures that the chunk is yielded to the consumer even if the remaining completion_text is empty (e.g., when a chunk contains only the end of a thinking block). Without this, the reasoning content might be delayed or lost.

Suggested change
for match in thinking_pattern.finditer(completion_text):
think_content = match.group(1).strip()
if think_content:
if llm_response.reasoning_content:
llm_response.reasoning_content += "\n" + think_content
else:
llm_response.reasoning_content = think_content
# Extract complete thinking blocks
for match in thinking_pattern.finditer(completion_text):
think_content = match.group(1).strip()
if think_content:
if llm_response.reasoning_content:
llm_response.reasoning_content += "\n" + think_content
else:
llm_response.reasoning_content = think_content
_y = True


# Remove all complete thinking blocks from completion_text
completion_text = thinking_pattern.sub("", completion_text)

# Handle case where   think was found but ‍ think is missing (incomplete block at chunk boundary)
think_start = completion_text.rfind("<think>")
think_end = completion_text.rfind("</think>")

if think_start != -1 and (think_end == -1 or think_end < think_start):
# We have an unclosed   think tag, buffer everything from it onwards
thinking_buffer = completion_text[think_start:]
completion_text = completion_text[:think_start]
elif think_end != -1 and think_end > think_start:
# We closed a thinking block, clear any buffered content
thinking_buffer = ""

# Strip whitespace but preserve structure
completion_text = completion_text.strip()
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Calling strip() on every chunk will remove leading and trailing spaces that are essential for correctly joining words split across chunk boundaries. This negates the strip=False setting used in _normalize_content and will cause words to be merged incorrectly (e.g., "Hello " and "world" becoming "Helloworld").


# Only yield if there's actual text content remaining
if completion_text:
llm_response.result_chain = MessageChain(
chain=[Comp.Plain(completion_text)],
)
_y = True
if chunk.usage:
llm_response.usage = self._extract_usage(chunk.usage)
elif choice_usage := getattr(choice, "usage", None):
Expand Down