Compare commits

...

8 Commits

Author SHA1 Message Date
Soulter
7d3a09f3db feat: update stats command to clarify conversation token usage display 2026-04-27 12:59:04 +08:00
Soulter
071f7b5701 feat: add cached input tokens display and update translations for clarity 2026-04-27 12:58:19 +08:00
Soulter
2ce6b1b885 feat: enhance stats command to aggregate conversation token usage 2026-04-27 12:53:46 +08:00
Soulter
8ca8231176 feat: reorder token usage output for improved clarity 2026-04-27 12:04:28 +08:00
Soulter
6ba01a4775 feat: reorder conversation stats output for better readability
Co-authored-by: Copilot <copilot@github.com>
2026-04-27 12:00:12 +08:00
Soulter
f02444146d feat: add /stats command to view conversation token usage
- Add stats() method to ConversationCommands that queries ProviderStat
  records by conversation_id and aggregates token breakdowns
- Register /stats command in main.py
2026-04-27 11:52:10 +08:00
Weilong Liao
415da218f6 fix: update reasoning_content handling to support empty string values (#7830)
* fix: update reasoning_content handling to support empty string values

* fix: add reasoning_content field for DeepSeek v4 models in assistant messages
2026-04-27 11:47:32 +08:00
Weilong Liao
07b37b98de fix: handle empty reasoning content for DeepSeek v4 models (#7823)
Co-authored-by: Copilot <copilot@github.com>
2026-04-27 02:19:40 +08:00
12 changed files with 150 additions and 35 deletions

View File

@@ -1,3 +1,6 @@
from sqlalchemy import case, func, select
from sqlmodel import col
from astrbot.api import sp, star
from astrbot.api.event import AstrMessageEvent, MessageEventResult
from astrbot.core import logger
@@ -7,6 +10,7 @@ from astrbot.core.agent.runners.deerflow.constants import (
DEERFLOW_THREAD_ID_KEY,
)
from astrbot.core.agent.runners.deerflow.deerflow_api_client import DeerFlowAPIClient
from astrbot.core.db.po import ProviderStat
from astrbot.core.utils.active_event_registry import active_event_registry
from .utils.rst_scene import RstScene
@@ -246,3 +250,62 @@ class ConversationCommands:
f"✅ Switched to new conversation: {cid[:4]}"
),
)
async def stats(self, message: AstrMessageEvent) -> None:
"""Show token usage statistics for the current conversation."""
umo = message.unified_msg_origin
cid = await self.context.conversation_manager.get_curr_conversation_id(umo)
if not cid:
message.set_result(
MessageEventResult().message(
"❌ You are not in a conversation. Use /new to create one."
),
)
return
db = self.context.get_db()
async with db.get_db() as session:
result = await session.execute(
select(
func.count(case((col(ProviderStat.id).is_not(None), 1))).label(
"record_count",
),
func.coalesce(func.sum(ProviderStat.token_input_other), 0).label(
"total_input_other",
),
func.coalesce(func.sum(ProviderStat.token_input_cached), 0).label(
"total_input_cached",
),
func.coalesce(func.sum(ProviderStat.token_output), 0).label(
"total_output",
),
).where(
col(ProviderStat.agent_type) == "internal",
col(ProviderStat.conversation_id) == cid,
)
)
stats = result.one()
if stats.record_count == 0:
message.set_result(
MessageEventResult().message(
"📊 No stats available for this conversation yet."
),
)
return
total_input_other = stats.total_input_other
total_input_cached = stats.total_input_cached
total_output = stats.total_output
total_tokens = total_input_other + total_input_cached + total_output
ret = (
f"📊 Conversation Token usage (ID: {cid[:8]}...)\n"
f"Total: {total_tokens:,}\n"
f"Input (cached): {total_input_cached:,}\n"
f"Input (other): {total_input_other:,}\n"
f"Output: {total_output:,}\n"
)
message.set_result(MessageEventResult().message(ret))

View File

@@ -47,6 +47,11 @@ class Main(star.Star):
"""Create new conversation"""
await self.conversation_c.new_conv(message)
@filter.command("stats")
async def stats(self, message: AstrMessageEvent) -> None:
"""Show token usage statistics for the current conversation"""
await self.conversation_c.stats(message)
@filter.permission_type(filter.PermissionType.ADMIN)
@filter.command("provider")
async def provider(

View File

@@ -183,10 +183,10 @@ class ToolLoopAgentRunner(BaseAgentRunner[TContext]):
self.stats.end_time = time.time()
parts = []
if llm_resp.reasoning_content or llm_resp.reasoning_signature:
if llm_resp.reasoning_content is not None or llm_resp.reasoning_signature:
parts.append(
ThinkPart(
think=llm_resp.reasoning_content,
think=llm_resp.reasoning_content or "",
encrypted=llm_resp.reasoning_signature,
)
)
@@ -876,10 +876,10 @@ class ToolLoopAgentRunner(BaseAgentRunner[TContext]):
# 将结果添加到上下文中
parts = []
if llm_resp.reasoning_content or llm_resp.reasoning_signature:
if llm_resp.reasoning_content is not None or llm_resp.reasoning_signature:
parts.append(
ThinkPart(
think=llm_resp.reasoning_content,
think=llm_resp.reasoning_content or "",
encrypted=llm_resp.reasoning_signature,
)
)
@@ -1361,10 +1361,10 @@ class ToolLoopAgentRunner(BaseAgentRunner[TContext]):
self.stats.end_time = time.time()
parts = []
if llm_resp.reasoning_content or llm_resp.reasoning_signature:
if llm_resp.reasoning_content is not None or llm_resp.reasoning_signature:
parts.append(
ThinkPart(
think=llm_resp.reasoning_content,
think=llm_resp.reasoning_content or "",
encrypted=llm_resp.reasoning_signature,
)
)

View File

@@ -353,7 +353,7 @@ class LLMResponse:
"""Tool call IDs."""
tools_call_extra_content: dict[str, dict[str, Any]] = field(default_factory=dict)
"""Tool call extra content. tool_call_id -> extra_content dict"""
reasoning_content: str = ""
reasoning_content: str | None = None
"""The reasoning content extracted from the LLM, if any."""
reasoning_signature: str | None = None
"""The signature of the reasoning content, if any."""
@@ -404,8 +404,6 @@ class LLMResponse:
raw_completion (ChatCompletion, optional): 原始响应, OpenAI 格式. Defaults to None.
"""
if reasoning_content is None:
reasoning_content = ""
if tools_call_args is None:
tools_call_args = []
if tools_call_name is None:

View File

@@ -39,7 +39,7 @@ class ProviderAnthropic(Provider):
stop_reason: str | None = None,
) -> None:
has_text_output = bool((llm_response.completion_text or "").strip())
has_reasoning_output = bool(llm_response.reasoning_content.strip())
has_reasoning_output = bool((llm_response.reasoning_content or "").strip())
has_tool_output = bool(llm_response.tools_call_args)
if has_text_output or has_reasoning_output or has_tool_output:
return

View File

@@ -462,7 +462,7 @@ class ProviderGoogleGenAI(Provider):
finish_reason: str | None = None,
) -> None:
has_text_output = bool((llm_response.completion_text or "").strip())
has_reasoning_output = bool(llm_response.reasoning_content.strip())
has_reasoning_output = bool((llm_response.reasoning_content or "").strip())
has_tool_output = bool(llm_response.tools_call_args)
if has_text_output or has_reasoning_output or has_tool_output:
return

View File

@@ -671,9 +671,9 @@ class ProviderOpenAIOfficial(Provider):
reasoning = self._extract_reasoning_content(chunk)
_y = False
llm_response.id = chunk.id
llm_response.reasoning_content = ""
llm_response.reasoning_content = None
llm_response.completion_text = ""
if reasoning:
if reasoning is not None:
llm_response.reasoning_content = reasoning
_y = True
if delta and delta.content:
@@ -701,22 +701,28 @@ class ProviderOpenAIOfficial(Provider):
def _extract_reasoning_content(
self,
completion: ChatCompletion | ChatCompletionChunk,
) -> str:
) -> str | None:
"""Extract reasoning content from OpenAI ChatCompletion if available."""
reasoning_text = ""
def _get_reasoning_attr(obj: Any) -> str | None:
fields_set = getattr(obj, "model_fields_set", None)
if isinstance(fields_set, set) and self.reasoning_key in fields_set:
attr = getattr(obj, self.reasoning_key, "")
return "" if attr is None else str(attr)
attr = getattr(obj, self.reasoning_key, None)
return None if attr is None else str(attr)
if not completion.choices:
return reasoning_text
return None
if isinstance(completion, ChatCompletion):
choice = completion.choices[0]
reasoning_attr = getattr(choice.message, self.reasoning_key, None)
if reasoning_attr:
reasoning_text = str(reasoning_attr)
reasoning_attr = _get_reasoning_attr(choice.message)
elif isinstance(completion, ChatCompletionChunk):
delta = completion.choices[0].delta
reasoning_attr = getattr(delta, self.reasoning_key, None)
if reasoning_attr:
reasoning_text = str(reasoning_attr)
return reasoning_text
reasoning_attr = _get_reasoning_attr(delta)
else:
return None
return reasoning_attr
def _extract_usage(self, usage: CompletionUsage | dict) -> TokenUsage:
ptd = getattr(usage, "prompt_tokens_details", None)
@@ -859,7 +865,9 @@ class ProviderOpenAIOfficial(Provider):
# parse the reasoning content if any
# the priority is higher than the <think> tag extraction
llm_response.reasoning_content = self._extract_reasoning_content(completion)
reasoning_content = self._extract_reasoning_content(completion)
if reasoning_content is not None:
llm_response.reasoning_content = reasoning_content
# parse tool calls if any
if choice.message.tool_calls and tools is not None:
@@ -906,7 +914,7 @@ class ProviderOpenAIOfficial(Provider):
"API 返回的 completion 由于内容安全过滤被拒绝(非 AstrBot)。",
)
has_text_output = bool((llm_response.completion_text or "").strip())
has_reasoning_output = bool(llm_response.reasoning_content.strip())
has_reasoning_output = bool((llm_response.reasoning_content or "").strip())
if (
not has_text_output
and not has_reasoning_output
@@ -982,24 +990,39 @@ class ProviderOpenAIOfficial(Provider):
"""Finally convert the payload. Such as think part conversion, tool inject."""
model = payloads.get("model", "").lower()
is_gemini = "gemini" in model
deepseek_reasoning_models = {"deepseek-v4-pro", "deepseek-v4-flash"}
is_deepseek_v4_reasoning = (
model in deepseek_reasoning_models
or "api.deepseek.com" in self.client.base_url.host
)
for message in payloads.get("messages", []):
if message.get("role") == "assistant" and isinstance(
message.get("content"), list
):
reasoning_content = ""
reasoning_content_present = False
new_content = [] # not including think part
for part in message["content"]:
if part.get("type") == "think":
reasoning_content_present = True
reasoning_content += str(part.get("think"))
else:
new_content.append(part)
# Some providers (Grok, etc.) reject empty content lists.
# When all parts were think blocks, fall back to None.
message["content"] = new_content or None
if reasoning_content:
if reasoning_content_present:
message["reasoning_content"] = reasoning_content
if (
message.get("role") == "assistant"
and is_deepseek_v4_reasoning
and "reasoning_content" not in message
):
# DeepSeek v4 reasoning models require the field on assistant
# history messages, even when the reasoning content is empty.
message["reasoning_content"] = ""
# Gemini 的 function_response 要求 google.protobuf.Struct即 JSON 对象),
# 纯文本会触发 400 Invalid argument需要包一层 JSON。
if is_gemini and message.get("role") == "tool":

View File

@@ -293,6 +293,15 @@
/>
</template>
<v-card class="stats-card" elevation="4">
<div
v-if="cachedInputTokens(messageContent(msg).agentStats) > 0"
class="stats-row"
>
<span>{{ tm("stats.cachedTokens") }}</span>
<strong>{{
cachedInputTokens(messageContent(msg).agentStats)
}}</strong>
</div>
<div class="stats-row">
<span>{{ tm("stats.inputTokens") }}</span>
<strong>{{ inputTokens(messageContent(msg).agentStats) }}</strong>
@@ -850,13 +859,17 @@ function formatTime(value: string) {
function inputTokens(stats: any) {
const usage = stats?.token_usage || {};
return (usage.input_other || 0) + (usage.input_cached || 0);
return usage.input_other || 0;
}
function outputTokens(stats: any) {
return stats?.token_usage?.output || 0;
}
function cachedInputTokens(stats: any) {
return stats?.token_usage?.input_cached || 0;
}
function agentDuration(stats: any) {
const directDuration = readPositiveNumber(stats, [
"duration",

View File

@@ -185,6 +185,15 @@
/>
</template>
<v-card class="stats-card" elevation="4">
<div
v-if="cachedInputTokens(messageContent(msg).agentStats) > 0"
class="stats-row"
>
<span>{{ tm("stats.cachedTokens") }}</span>
<strong>{{
cachedInputTokens(messageContent(msg).agentStats)
}}</strong>
</div>
<div class="stats-row">
<span>{{ tm("stats.inputTokens") }}</span>
<strong>{{
@@ -512,13 +521,17 @@ function formatTime(value: string) {
function inputTokens(stats: any) {
const usage = stats?.token_usage || {};
return (usage.input_other || 0) + (usage.input_cached || 0);
return usage.input_other || 0;
}
function outputTokens(stats: any) {
return stats?.token_usage?.output || 0;
}
function cachedInputTokens(stats: any) {
return stats?.token_usage?.input_cached || 0;
}
function agentDuration(stats: any) {
const directDuration = readPositiveNumber(stats, [
"duration",

View File

@@ -137,9 +137,9 @@
},
"stats": {
"tokens": "Tokens",
"inputTokens": "Input Tokens",
"inputTokens": "Input (other)",
"outputTokens": "Output Tokens",
"cachedTokens": "Cached Tokens",
"cachedTokens": "Input (cached)",
"duration": "Duration",
"ttft": "Time to First Token"
},

View File

@@ -137,9 +137,9 @@
},
"stats": {
"tokens": "Токены",
"inputTokens": "Входящие",
"inputTokens": "Входящие (прочие)",
"outputTokens": "Исходящие",
"cachedTokens": "Кэшированные",
"cachedTokens": "Входящие (кэш)",
"duration": "Время",
"ttft": "Время до первого токена"
},

View File

@@ -137,9 +137,9 @@
},
"stats": {
"tokens": "Token",
"inputTokens": "输入 Token",
"inputTokens": "输入(其他)",
"outputTokens": "输出 Token",
"cachedTokens": "缓存 Token",
"cachedTokens": "输入(缓存)",
"duration": "耗时",
"ttft": "首字时间"
},