chore: ruff format

feat: support token usage extraction for llama.cpp
2026-07-02 02:30:16 +08:00 · 2026-04-04 23:48:24 +08:00 · 2026-04-04 23:47:41 +08:00
2 changed files with 93 additions and 6 deletions
--- a/astrbot/core/provider/sources/openai_source.py
+++ b/astrbot/core/provider/sources/openai_source.py
@@ -532,6 +532,7 @@ class ProviderOpenAIOfficial(Provider):
            **payloads,
            stream=True,
            extra_body=extra_body,
+            stream_options={"include_usage": True},
        )

        llm_response = LLMResponse("assistant", is_chunk=True)
@@ -539,12 +540,10 @@ class ProviderOpenAIOfficial(Provider):
        state = ChatCompletionStreamState()

        async for chunk in stream:
-            if not chunk.choices:
-                continue
-            choice = chunk.choices[0]
-            delta = choice.delta
+            choice = chunk.choices[0] if chunk.choices else None
+            delta = choice.delta if choice else None

-            if dtcs := delta.tool_calls:
+            if delta and (dtcs := delta.tool_calls):
                for idx, tc in enumerate(dtcs):
                    # siliconflow workaround
                    if tc.function and tc.function.arguments:
@@ -574,7 +573,7 @@ class ProviderOpenAIOfficial(Provider):
                _y = True
            if chunk.usage:
                llm_response.usage = self._extract_usage(chunk.usage)
-            elif choice_usage := getattr(choice, "usage", None):
+            elif choice and (choice_usage := getattr(choice, "usage", None)):
                # Workaround for some providers that only return usage in choices[].usage, e.g. MoonshotAI
                # See https://github.com/AstrBotDevs/AstrBot/issues/6614
                llm_response.usage = self._extract_usage(choice_usage)
--- a/tests/test_openai_source.py
+++ b/tests/test_openai_source.py
@@ -2,6 +2,7 @@ from types import SimpleNamespace

 import pytest
 from openai.types.chat.chat_completion import ChatCompletion
+from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
 from PIL import Image as PILImage

 from astrbot.core.exceptions import EmptyModelOutputError
@@ -1175,6 +1176,93 @@ async def test_parse_openai_completion_raises_empty_model_output_error():
        await provider.terminate()


+@pytest.mark.asyncio
+async def test_query_stream_extracts_usage_from_empty_choices_chunk(monkeypatch):
+    provider = _make_provider()
+    try:
+        chunks = [
+            ChatCompletionChunk.model_validate(
+                {
+                    "id": "chatcmpl-stream",
+                    "object": "chat.completion.chunk",
+                    "created": 0,
+                    "model": "gpt-4o-mini",
+                    "choices": [
+                        {
+                            "index": 0,
+                            "delta": {
+                                "role": "assistant",
+                                "content": "ok",
+                            },
+                            "finish_reason": None,
+                        }
+                    ],
+                }
+            ),
+            ChatCompletionChunk.model_validate(
+                {
+                    "id": "chatcmpl-stream",
+                    "object": "chat.completion.chunk",
+                    "created": 0,
+                    "model": "gpt-4o-mini",
+                    "choices": [
+                        {
+                            "index": 0,
+                            "delta": {},
+                            "finish_reason": "stop",
+                        }
+                    ],
+                }
+            ),
+            ChatCompletionChunk.model_validate(
+                {
+                    "id": "chatcmpl-stream",
+                    "object": "chat.completion.chunk",
+                    "created": 0,
+                    "model": "gpt-4o-mini",
+                    "choices": [],
+                    "usage": {
+                        "prompt_tokens": 2550,
+                        "completion_tokens": 125,
+                        "total_tokens": 2675,
+                        "prompt_tokens_details": {
+                            "cached_tokens": 2488,
+                        },
+                    },
+                }
+            ),
+        ]
+
+        async def fake_stream():
+            for chunk in chunks:
+                yield chunk
+
+        async def fake_create(**kwargs):
+            return fake_stream()
+
+        monkeypatch.setattr(provider.client.chat.completions, "create", fake_create)
+
+        responses = [
+            response
+            async for response in provider._query_stream(
+                payloads={
+                    "model": "gpt-4o-mini",
+                    "messages": [{"role": "user", "content": "hello"}],
+                },
+                tools=None,
+            )
+        ]
+
+        final_response = responses[-1]
+        assert final_response.completion_text == "ok"
+        assert final_response.usage is not None
+        assert final_response.usage.input_other == 62
+        assert final_response.usage.input_cached == 2488
+        assert final_response.usage.output == 125
+    finally:
+        await provider.terminate()
+
+
@pytest.mark.asyncio
 async def test_query_filters_empty_assistant_message_without_tool_calls(monkeypatch):
    """Test that empty assistant messages without tool_calls are filtered out."""
Author	SHA1	Message	Date
Soulter	35d1232b62	chore: ruff format	2026-04-04 23:48:24 +08:00
Soulter	1410d9487b	feat: support token usage extraction for llama.cpp	2026-04-04 23:47:41 +08:00