Compare commits

...

2 Commits

Author SHA1 Message Date
Soulter
35d1232b62 chore: ruff format 2026-04-04 23:48:24 +08:00
Soulter
1410d9487b feat: support token usage extraction for llama.cpp 2026-04-04 23:47:41 +08:00
2 changed files with 93 additions and 6 deletions

View File

@@ -532,6 +532,7 @@ class ProviderOpenAIOfficial(Provider):
**payloads,
stream=True,
extra_body=extra_body,
stream_options={"include_usage": True},
)
llm_response = LLMResponse("assistant", is_chunk=True)
@@ -539,12 +540,10 @@ class ProviderOpenAIOfficial(Provider):
state = ChatCompletionStreamState()
async for chunk in stream:
if not chunk.choices:
continue
choice = chunk.choices[0]
delta = choice.delta
choice = chunk.choices[0] if chunk.choices else None
delta = choice.delta if choice else None
if dtcs := delta.tool_calls:
if delta and (dtcs := delta.tool_calls):
for idx, tc in enumerate(dtcs):
# siliconflow workaround
if tc.function and tc.function.arguments:
@@ -574,7 +573,7 @@ class ProviderOpenAIOfficial(Provider):
_y = True
if chunk.usage:
llm_response.usage = self._extract_usage(chunk.usage)
elif choice_usage := getattr(choice, "usage", None):
elif choice and (choice_usage := getattr(choice, "usage", None)):
# Workaround for some providers that only return usage in choices[].usage, e.g. MoonshotAI
# See https://github.com/AstrBotDevs/AstrBot/issues/6614
llm_response.usage = self._extract_usage(choice_usage)

View File

@@ -2,6 +2,7 @@ from types import SimpleNamespace
import pytest
from openai.types.chat.chat_completion import ChatCompletion
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
from PIL import Image as PILImage
from astrbot.core.exceptions import EmptyModelOutputError
@@ -1175,6 +1176,93 @@ async def test_parse_openai_completion_raises_empty_model_output_error():
await provider.terminate()
@pytest.mark.asyncio
async def test_query_stream_extracts_usage_from_empty_choices_chunk(monkeypatch):
provider = _make_provider()
try:
chunks = [
ChatCompletionChunk.model_validate(
{
"id": "chatcmpl-stream",
"object": "chat.completion.chunk",
"created": 0,
"model": "gpt-4o-mini",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"content": "ok",
},
"finish_reason": None,
}
],
}
),
ChatCompletionChunk.model_validate(
{
"id": "chatcmpl-stream",
"object": "chat.completion.chunk",
"created": 0,
"model": "gpt-4o-mini",
"choices": [
{
"index": 0,
"delta": {},
"finish_reason": "stop",
}
],
}
),
ChatCompletionChunk.model_validate(
{
"id": "chatcmpl-stream",
"object": "chat.completion.chunk",
"created": 0,
"model": "gpt-4o-mini",
"choices": [],
"usage": {
"prompt_tokens": 2550,
"completion_tokens": 125,
"total_tokens": 2675,
"prompt_tokens_details": {
"cached_tokens": 2488,
},
},
}
),
]
async def fake_stream():
for chunk in chunks:
yield chunk
async def fake_create(**kwargs):
return fake_stream()
monkeypatch.setattr(provider.client.chat.completions, "create", fake_create)
responses = [
response
async for response in provider._query_stream(
payloads={
"model": "gpt-4o-mini",
"messages": [{"role": "user", "content": "hello"}],
},
tools=None,
)
]
final_response = responses[-1]
assert final_response.completion_text == "ok"
assert final_response.usage is not None
assert final_response.usage.input_other == 62
assert final_response.usage.input_cached == 2488
assert final_response.usage.output == 125
finally:
await provider.terminate()
@pytest.mark.asyncio
async def test_query_filters_empty_assistant_message_without_tool_calls(monkeypatch):
"""Test that empty assistant messages without tool_calls are filtered out."""