diff --git a/packages/cli/src/services/BuiltinCommandLoader.ts b/packages/cli/src/services/BuiltinCommandLoader.ts index dc4c1f8d9..ada1ae0eb 100644 --- a/packages/cli/src/services/BuiltinCommandLoader.ts +++ b/packages/cli/src/services/BuiltinCommandLoader.ts @@ -14,6 +14,7 @@ import { authCommand } from '../ui/commands/authCommand.js'; import { bugCommand } from '../ui/commands/bugCommand.js'; import { clearCommand } from '../ui/commands/clearCommand.js'; import { compressCommand } from '../ui/commands/compressCommand.js'; +import { contextCommand } from '../ui/commands/contextCommand.js'; import { copyCommand } from '../ui/commands/copyCommand.js'; import { docsCommand } from '../ui/commands/docsCommand.js'; import { directoryCommand } from '../ui/commands/directoryCommand.js'; @@ -64,6 +65,7 @@ export class BuiltinCommandLoader implements ICommandLoader { bugCommand, clearCommand, compressCommand, + contextCommand, copyCommand, docsCommand, directoryCommand, diff --git a/packages/cli/src/ui/commands/CONTEXT_COMMAND.md b/packages/cli/src/ui/commands/CONTEXT_COMMAND.md new file mode 100644 index 000000000..de768d4b9 --- /dev/null +++ b/packages/cli/src/ui/commands/CONTEXT_COMMAND.md @@ -0,0 +1,293 @@ +# `/context` 命令 — 上下文窗口用量分解 + +## 概述 + +`/context` 命令展示当前模型上下文窗口的 token 使用情况。它将整个上下文窗口拆分为多个分类,帮助用户理解 token 花在了哪里,以及还剩多少空间。 + +## 上下文窗口的组成 + +一次 API 请求发送给模型的完整 prompt 包含以下部分: + +``` +┌─────────────────────────────────────────────┐ +│ Context Window (总容量) │ +│ │ +│ ┌─────────────────────────────────────┐ │ +│ │ System Prompt (系统提示词) │ │ +│ │ └─ 核心指令 + 行为规则 │ │ +│ ├─────────────────────────────────────┤ │ +│ │ Tool Declarations (工具声明) │ │ +│ │ ├─ Built-in tools (内置工具) │ │ +│ │ ├─ MCP tools (MCP 工具) │ │ +│ │ └─ SkillTool (技能工具) ◄──────────┼─── 包含所有 skill 的名称+描述 +│ ├─────────────────────────────────────┤ │ +│ │ Memory (用户记忆) │ │ +│ │ └─ QWEN.md + extension configs │ │ +│ ├─────────────────────────────────────┤ │ +│ │ Messages (对话消息) │ │ +│ │ ├─ 用户消息 │ │ +│ │ ├─ 模型回复 │ │ +│ │ └─ 工具调用 & 工具结果 ◄───────────┼─── skill body 在此加载 +│ ├─────────────────────────────────────┤ │ +│ │ Free Space (可用空间) │ │ +│ ├─────────────────────────────────────┤ │ +│ │ Autocompact Buffer (自动压缩缓冲) │ │ +│ └─────────────────────────────────────┘ │ +└─────────────────────────────────────────────┘ +``` + +**不变量**:所有分类之和 = Context Window 总容量。 + +## 各分类详解 + +### 1. System Prompt(系统提示词) + +| 属性 | 说明 | +| ------------ | ------------------------------------------------------------------ | +| **数据来源** | `getCoreSystemPrompt(undefined, modelName)` | +| **包含内容** | 模型的核心行为指令、输出格式要求、安全规则等 | +| **不包含** | Memory 内容(单独计算) | +| **计算方式** | 对系统提示词文本调用 `estimateTokens()` | +| **变化频率** | 基本固定,除非修改了 `QWEN_SYSTEM_MD` 环境变量或 `.qwen/system.md` | + +> **注意**:`getCoreSystemPrompt` 接受 `userMemory` 参数,这里传入 `undefined` 以排除 memory,因为 memory 作为独立分类统计。 + +### 2. Built-in Tools(内置工具) + +| 属性 | 说明 | +| ------------ | ----------------------------------------------------------------------------------------------------- | +| **数据来源** | `toolRegistry.getAllTools()` 中非 MCP、非 SkillTool 的工具 | +| **包含内容** | `read_file`、`edit`、`run_shell_command`、`grep_search`、`glob`、`list_directory` 等核心工具的 schema | +| **计算方式** | `allToolsTokens - skillsTokens - mcpToolsTotalTokens` | +| **详情列表** | 逐项展示每个内置工具的名称和 token 占用,按 token 数降序排列 | + +> **SkillTool** 虽然也是内置工具,但因其内容动态性(嵌入所有 skill 列表),独立作为 **Skills** 分类展示,不在 Built-in tools 中出现。 + +### 2b. MCP Tools(MCP 工具) + +| 属性 | 说明 | +| ------------ | ----------------------------------------------------------------------- | +| **数据来源** | `toolRegistry.getAllTools()` 中 `DiscoveredMCPTool` 实例 | +| **包含内容** | 通过 MCP 协议连接的外部工具服务器提供的工具 schema | +| **计算方式** | 各 MCP 工具 `estimateTokens(JSON.stringify(tool.schema))` 之和 | +| **详情列表** | 逐项展示每个 MCP 工具的名称(`serverName__toolName` 格式)和 token 占用 | +| **条件显示** | 仅当存在 MCP 工具时才显示此分类行和详情 | + +### 3. Skills(技能)⭐ 渐进式披露 + +Skills 采用**两阶段加载**设计: + +| 阶段 | 加载内容 | Token 归属 | 何时加载 | +| ------------ | ---------------------------------------------- | ----------------- | ------------------------------- | +| **第一阶段** | 每个 skill 的 name + 短 description + 使用说明 | **Skills 分类** | 每次 API 请求都发送 | +| **第二阶段** | 完整的 SKILL.md body 内容(详细指令、模板等) | **Messages 分类** | 模型调用 `skill` 工具后按需注入 | + +**`/context` 中 Skills 分类展示的是第一阶段的常驻开销。** + +#### 第一阶段的实现细节 + +SkillTool 在初始化时将所有 skill 信息嵌入其 `description` 字段: + +``` +Execute a skill within the main conversation + + +... 使用说明(~600 字符)... + + + + +pdf +Convert PDF files to text (project) +project + + +xlsx +Process Excel spreadsheets (user) +user + +...更多 skills... + +``` + +这整块文本是 SkillTool 的 tool declaration 的一部分,每次 API 请求都会发送。 + +#### Token 计算方式 + +``` +skillsTokens = estimateTokens(JSON.stringify(skillTool.schema)) +``` + +直接从 ToolRegistry 中获取 SkillTool 的完整 schema 进行估算,确保包含: + +- 使用说明文本(``) +- 所有 skill 的 XML 列表(``) +- schema 参数定义 + +#### 第二阶段(按需加载) + +当模型调用 `skill` 工具时,`SkillToolInvocation.execute()` 会加载完整的 SKILL.md: + +```typescript +const skill = await this.skillManager.loadSkillForRuntime(this.params.skill); +const llmContent = `Base directory: ${baseDir}\n\n${skill.body}\n`; +``` + +这个 body 内容作为工具调用结果注入到对话中,token 开销归入 **Messages** 分类。 + +#### Skills 详情列表 + +每个 skill 的详情行展示该 skill 在第一阶段中的大致占用,按 token 数降序排列。注意: + +- 各 skill 详情的 token 之和 **< Skills 分类总数**,差值是 skills_instructions 指令文本的开销 +- 详情仅展示名称和描述的 token,不包含 schema 参数定义部分 + +### 4. Memory Files(用户记忆) + +| 属性 | 说明 | +| ------------ | -------------------------------------------------------------------------- | +| **数据来源** | `config.getUserMemory()` | +| **包含内容** | `QWEN.md`、extension 配置、`output-language` 等用户级配置文件 | +| **加载位置** | 拼接到 System Prompt 末尾(通过 `getCoreSystemPrompt(userMemory, model)`) | +| **计算方式** | 解析 memory 文本中的 `--- Context from: ---` 标记,分文件估算 token | + +**Memory 内容格式**: + +``` +--- Context from: ~/.qwen/QWEN.md --- +用户自定义规则和偏好... +--- End of Context from: ~/.qwen/QWEN.md --- +--- Context from: ~/.qwen/extensions/config.md --- +扩展配置内容... +--- End of Context from: ~/.qwen/extensions/config.md --- +``` + +> **为什么 System Prompt 不包含 Memory?** 计算 System Prompt token 时传入 `userMemory = undefined`,Memory 作为独立分类展示,避免两个分类重叠。实际 API 请求中 memory 是拼接在 system prompt 末尾的。 + +### 5. Messages(对话消息) + +| 属性 | 说明 | +| ------------ | ---------------------------------------------------------------- | +| **数据来源** | 反推:`totalTokens - systemPrompt - allTools - memory` | +| **包含内容** | 所有用户消息、模型回复、工具调用参数、工具返回结果 | +| **特别包含** | skill body(第二阶段按需加载的内容)、文件读取结果、shell 输出等 | +| **计算方式** | `max(0, apiTotalTokens - estimatedOverhead)` | + +> **注意**:Messages 是通过 API 返回的 `totalTokens` 减去其他分类的估算值得出的,因此它吸收了估算误差。如果 overhead 被高估,Messages 会被相应低估。 + +### 6. Free Space(可用空间) + +| 属性 | 说明 | +| ------------ | ----------------------------------------------------- | +| **计算方式** | `contextWindowSize - totalTokens - autocompactBuffer` | +| **含义** | 在触发自动压缩之前,还能容纳多少 token 的对话内容 | + +### 7. Autocompact Buffer(自动压缩缓冲区) + +| 属性 | 说明 | +| ------------ | ----------------------------------------------------------------- | +| **计算方式** | `(1 - compressionThreshold) × contextWindowSize` | +| **默认值** | `(1 - 0.7) × 131072 = 39322`(约 30% 的上下文窗口) | +| **含义** | 当 token 用量达到 70% 时触发自动压缩,这 30% 的空间作为缓冲区预留 | + +## 两种展示模式 + +### 模式 A:无 API 数据(首次使用,尚未发送消息) + +``` +Context Usage + + No API response yet. Send a message to see actual usage. + + Estimated pre-conversation overhead + Model: glm-5 Context window: 131.1k tokens + + █ System prompt 4.8k tokens (3.7%) + █ System tools 5.2k tokens (4.0%) + █ Memory files 845 tokens (0.6%) + █ Skills 5.1k tokens (3.9%) + ░ Free space 75.8k tokens (57.8%) + ░ Autocompact buffer 39.3k tokens (30.0%) +``` + +- **不显示进度条和 total 数字**:避免估算值与后续 API 实际值产生不合理的对比 +- **不显示 Messages 行**:尚无对话 +- 各分类基于本地启发式估算(`estimateTokens`),可能与实际 API tokenizer 有 ~10% 偏差 + +### 模式 B:有 API 数据(已进行对话) + +``` +Context Usage + + ██████████░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ glm-5 + 25.3k/131.1k tokens (19.3%) + + Usage by category + █ System prompt 4.5k tokens (3.4%) + █ System tools 4.9k tokens (3.7%) + █ Memory files 790 tokens (0.6%) + █ Skills 4.8k tokens (3.7%) + █ Messages 10.3k tokens (7.9%) + ░ Free space 66.5k tokens (50.7%) + ░ Autocompact buffer 39.3k tokens (30.0%) +``` + +- **`totalTokens` 来自 API 响应**(`usageMetadata.promptTokenCount`),是最准确的值 +- **当本地估算 > API total 时**:按比例缩放各 overhead 分类,确保分类之和 = totalTokens +- **Messages** = `totalTokens - scaledOverhead`,包含所有对话内容 + 按需加载的 skill body + +## Token 估算方法 + +由于无法直接访问模型的 tokenizer,使用基于字符的启发式估算: + +``` +tokens ≈ ⌈asciiChars / 4 + nonAsciiChars × 1.5⌉ +``` + +| 字符类型 | 比例 | 依据 | +| --------------------------------- | --------------- | -------------------------------- | +| ASCII(英文、JSON 结构字符等) | ~4 字符/token | BPE tokenizer 对英文的平均压缩率 | +| 非 ASCII(中文、日文等 CJK 字符) | ~1.5 token/字符 | CJK 字符通常映射为 1-2 个 token | + +**已知局限**: + +- 不同模型的 tokenizer 有差异,估算可能偏差 ±10-20% +- JSON 结构字符(`{`, `"`, `:` 等)的实际 token 化比率与自然语言不同 +- 当估算偏高时,通过 `overheadScale` 按比例缩放校正 + +## 数据流图 + +``` + ┌──────────────────┐ + │ API Response │ + │ promptTokenCount │ ─── totalTokens (ground truth) + └──────────────────┘ + │ + ┌──────────────────────────┼──────────────────────────┐ + │ │ │ + ▼ ▼ ▼ +estimateTokens() estimateTokens() estimateTokens() + │ │ │ + ▼ ▼ ▼ +systemPromptTokens allToolsTokens memoryFilesTokens + │ + ┌─────┴──────┐ + │ │ + ▼ ▼ + systemToolsTokens skillsTokens + (allTools - skills) (from SkillTool schema) + │ │ + └─────┬──────┘ + │ + ▼ + rawOverhead = systemPrompt + allTools + memory + │ + ┌───────────┼───────────┐ + │ overheadScale │ (= min(1, totalTokens/rawOverhead)) + ▼ ▼ + scaled categories messages = totalTokens - scaledOverhead + │ │ + └───────────┬───────────┘ + ▼ + breakdown output +``` diff --git a/packages/cli/src/ui/commands/contextCommand.ts b/packages/cli/src/ui/commands/contextCommand.ts new file mode 100644 index 000000000..e4df88029 --- /dev/null +++ b/packages/cli/src/ui/commands/contextCommand.ts @@ -0,0 +1,310 @@ +/** + * @license + * Copyright 2025 Qwen + * SPDX-License-Identifier: Apache-2.0 + */ + +import { + type CommandContext, + type SlashCommand, + CommandKind, +} from './types.js'; +import { + MessageType, + type HistoryItemContextUsage, + type ContextCategoryBreakdown, + type ContextToolDetail, + type ContextMemoryDetail, + type ContextSkillDetail, +} from '../types.js'; +import { + DiscoveredMCPTool, + uiTelemetryService, + getCoreSystemPrompt, + DEFAULT_TOKEN_LIMIT, + ToolNames, +} from '@qwen-code/qwen-code-core'; +import { t } from '../../i18n/index.js'; + +/** + * Default compression token threshold (triggers compression at 70% usage). + * The autocompact buffer is (1 - threshold) * contextWindowSize. + */ +const DEFAULT_COMPRESSION_THRESHOLD = 0.7; + +/** + * Estimate token count for a string using a character-based heuristic. + * ASCII chars ≈ 4 chars/token, CJK/non-ASCII chars ≈ 1.5 tokens/char. + */ +function estimateTokens(text: string): number { + if (!text || text.length === 0) return 0; + let asciiChars = 0; + let nonAsciiChars = 0; + for (let i = 0; i < text.length; i++) { + const charCode = text.charCodeAt(i); + if (charCode < 128) { + asciiChars++; + } else { + nonAsciiChars++; + } + } + // CJK and other non-ASCII characters typically produce 1.5-2 tokens each + return Math.ceil(asciiChars / 4 + nonAsciiChars * 1.5); +} + +/** + * Parse concatenated memory content into individual file entries. + * Memory content format: "--- Context from: ---\n\n--- End of Context from: ---" + */ +function parseMemoryFiles(memoryContent: string): ContextMemoryDetail[] { + if (!memoryContent || memoryContent.trim().length === 0) return []; + + const results: ContextMemoryDetail[] = []; + // Use backreference (\1) to ensure start/end path markers match + const regex = + /--- Context from: (.+?) ---\n([\s\S]*?)--- End of Context from: \1 ---/g; + let match: RegExpExecArray | null; + + while ((match = regex.exec(memoryContent)) !== null) { + const filePath = match[1]!; + const content = match[2]!; + results.push({ + path: filePath, + tokens: estimateTokens(content), + }); + } + + // If no structured markers found, treat as a single memory block + if (results.length === 0 && memoryContent.trim().length > 0) { + results.push({ + path: t('memory'), + tokens: estimateTokens(memoryContent), + }); + } + + return results; +} + +export const contextCommand: SlashCommand = { + name: 'context', + get description() { + return t('Show context window usage breakdown.'); + }, + kind: CommandKind.BUILT_IN, + action: async (context: CommandContext) => { + const { config } = context.services; + if (!config) { + context.ui.addItem( + { + type: MessageType.ERROR, + text: t('Config not loaded.'), + }, + Date.now(), + ); + return; + } + + // --- Gather data --- + + const modelName = config.getModel() || 'unknown'; + const contentGeneratorConfig = config.getContentGeneratorConfig(); + const contextWindowSize = + contentGeneratorConfig.contextWindowSize ?? DEFAULT_TOKEN_LIMIT; + + // Total prompt token count from API (most accurate) + const apiTotalTokens = uiTelemetryService.getLastPromptTokenCount(); + + // 1. System prompt tokens (without memory, as memory is counted separately) + const systemPromptText = getCoreSystemPrompt(undefined, modelName); + const systemPromptTokens = estimateTokens(systemPromptText); + + // 2. Tool declarations tokens (includes ALL tools: built-in, MCP, skill tool) + const toolRegistry = config.getToolRegistry(); + const allTools = toolRegistry ? toolRegistry.getAllTools() : []; + const toolDeclarations = toolRegistry + ? toolRegistry.getFunctionDeclarations() + : []; + const toolsJsonStr = JSON.stringify(toolDeclarations); + const allToolsTokens = estimateTokens(toolsJsonStr); + + // 3. Per-tool details (for breakdown display) + const builtinTools: ContextToolDetail[] = []; + const mcpTools: ContextToolDetail[] = []; + for (const tool of allTools) { + const toolJsonStr = JSON.stringify(tool.schema); + const tokens = estimateTokens(toolJsonStr); + if (tool instanceof DiscoveredMCPTool) { + mcpTools.push({ + name: `${tool.serverName}__${tool.serverToolName || tool.name}`, + tokens, + }); + } else if (tool.name !== ToolNames.SKILL) { + // Built-in tool (exclude SkillTool, which is shown under Skills) + builtinTools.push({ + name: tool.name, + tokens, + }); + } + } + + // 4. Memory files + const memoryContent = config.getUserMemory(); + const memoryFiles = parseMemoryFiles(memoryContent); + const memoryFilesTokens = memoryFiles.reduce((sum, f) => sum + f.tokens, 0); + + // 5. Skills (progressive disclosure) + // The SkillTool's description embeds all skill name+description listings + // plus ~600 chars of instruction text. This is the "always in context" + // cost. The full SKILL.md body is only loaded on-demand when the model + // invokes the skill tool (and that cost appears in Messages). + // + // To get an accurate total, we read the SkillTool's actual schema from + // the registry rather than reconstructing from a template. + const skillTool = allTools.find((tool) => tool.name === ToolNames.SKILL); + const skillToolTotalTokens = skillTool + ? estimateTokens(JSON.stringify(skillTool.schema)) + : 0; + + // Per-skill breakdown for detail display (proportional to description length) + const skillManager = config.getSkillManager(); + const skillConfigs = skillManager ? await skillManager.listSkills() : []; + const skills: ContextSkillDetail[] = skillConfigs.map((skill) => ({ + name: skill.name, + tokens: estimateTokens( + `\n\n${skill.name}\n\n\n${skill.description} (${skill.level})\n\n\n${skill.level}\n\n`, + ), + })); + // Use the SkillTool's actual schema tokens as the total, not the sum of + // individual estimates (which would miss the instruction wrapper text). + const skillsTokens = skillToolTotalTokens; + + // 6. Autocompact buffer + const compressionThreshold = + config.getChatCompression()?.contextPercentageThreshold ?? + DEFAULT_COMPRESSION_THRESHOLD; + const autocompactBuffer = + compressionThreshold > 0 + ? Math.round((1 - compressionThreshold) * contextWindowSize) + : 0; + + // 7. Calculate raw overhead (allToolsTokens already includes skills) + const rawOverhead = systemPromptTokens + allToolsTokens + memoryFilesTokens; + + // 8. Determine total tokens and build breakdown + const isEstimated = apiTotalTokens === 0; + + // Sum of MCP tool tokens for category-level display + const mcpToolsTotalTokens = mcpTools.reduce( + (sum, tool) => sum + tool.tokens, + 0, + ); + + let totalTokens: number; + let displaySystemPrompt: number; + let displayBuiltinTools: number; + let displayMcpTools: number; + let displayMemoryFiles: number; + let displaySkills: number; + let messagesTokens: number; + let freeSpace: number; + let detailBuiltinTools: ContextToolDetail[]; + let detailMcpTools: ContextToolDetail[]; + let detailMemoryFiles: ContextMemoryDetail[]; + let detailSkills: ContextSkillDetail[]; + + if (isEstimated) { + // No API data yet: show raw overhead estimates only. + // Use 0 as totalTokens so the progress bar stays empty — + // avoids showing an inflated estimate that would "decrease" + // once real API data arrives. + totalTokens = 0; + displaySystemPrompt = systemPromptTokens; + // builtinTools category = allTools - skills - mcpTools + displayBuiltinTools = Math.max( + 0, + allToolsTokens - skillsTokens - mcpToolsTotalTokens, + ); + displayMcpTools = mcpToolsTotalTokens; + displayMemoryFiles = memoryFilesTokens; + displaySkills = skillsTokens; + messagesTokens = 0; + // Free space accounts for the estimated overhead + freeSpace = Math.max( + 0, + contextWindowSize - rawOverhead - autocompactBuffer, + ); + detailBuiltinTools = builtinTools; + detailMcpTools = mcpTools; + detailMemoryFiles = memoryFiles; + detailSkills = skills; + } else { + // API data available: use actual total with proportional scaling + totalTokens = apiTotalTokens; + + // When estimates overshoot API total, scale down proportionally + // so the breakdown categories add up to totalTokens. + const overheadScale = + rawOverhead > totalTokens ? totalTokens / rawOverhead : 1; + + displaySystemPrompt = Math.round(systemPromptTokens * overheadScale); + const scaledAllTools = Math.round(allToolsTokens * overheadScale); + displayMemoryFiles = Math.round(memoryFilesTokens * overheadScale); + displaySkills = Math.round(skillsTokens * overheadScale); + const scaledMcpTotal = Math.round(mcpToolsTotalTokens * overheadScale); + displayMcpTools = scaledMcpTotal; + displayBuiltinTools = Math.max( + 0, + scaledAllTools - displaySkills - scaledMcpTotal, + ); + + const scaledOverhead = + displaySystemPrompt + scaledAllTools + displayMemoryFiles; + messagesTokens = Math.max(0, totalTokens - scaledOverhead); + + freeSpace = Math.max( + 0, + contextWindowSize - totalTokens - autocompactBuffer, + ); + + // Scale detail items to match their parent categories + const scaleDetail = (items: T[]): T[] => + overheadScale < 1 + ? items.map((item) => ({ + ...item, + tokens: Math.round(item.tokens * overheadScale), + })) + : items; + + detailBuiltinTools = scaleDetail(builtinTools); + detailMcpTools = scaleDetail(mcpTools); + detailMemoryFiles = scaleDetail(memoryFiles); + detailSkills = scaleDetail(skills); + } + + const breakdown: ContextCategoryBreakdown = { + systemPrompt: displaySystemPrompt, + builtinTools: displayBuiltinTools, + mcpTools: displayMcpTools, + memoryFiles: displayMemoryFiles, + skills: displaySkills, + messages: messagesTokens, + freeSpace, + autocompactBuffer, + }; + + const contextUsageItem: HistoryItemContextUsage = { + type: MessageType.CONTEXT_USAGE, + modelName, + totalTokens, + contextWindowSize, + breakdown, + builtinTools: detailBuiltinTools, + mcpTools: detailMcpTools, + memoryFiles: detailMemoryFiles, + skills: detailSkills, + isEstimated, + }; + + context.ui.addItem(contextUsageItem, Date.now()); + }, +}; diff --git a/packages/cli/src/ui/components/HistoryItemDisplay.tsx b/packages/cli/src/ui/components/HistoryItemDisplay.tsx index a4fa9ee7c..5eb1e7bc9 100644 --- a/packages/cli/src/ui/components/HistoryItemDisplay.tsx +++ b/packages/cli/src/ui/components/HistoryItemDisplay.tsx @@ -33,6 +33,7 @@ import { getMCPServerStatus } from '@qwen-code/qwen-code-core'; import { SkillsList } from './views/SkillsList.js'; import { ToolsList } from './views/ToolsList.js'; import { McpStatus } from './views/McpStatus.js'; +import { ContextUsage } from './views/ContextUsage.js'; interface HistoryItemDisplayProps { item: HistoryItem; @@ -176,6 +177,19 @@ const HistoryItemDisplayComponent: React.FC = ({ {itemForDisplay.type === 'mcp_status' && ( )} + {itemForDisplay.type === 'context_usage' && ( + + )} ); }; diff --git a/packages/cli/src/ui/components/views/ContextUsage.tsx b/packages/cli/src/ui/components/views/ContextUsage.tsx new file mode 100644 index 000000000..67f4bf282 --- /dev/null +++ b/packages/cli/src/ui/components/views/ContextUsage.tsx @@ -0,0 +1,361 @@ +/** + * @license + * Copyright 2025 Qwen + * SPDX-License-Identifier: Apache-2.0 + */ + +import type React from 'react'; +import { Box, Text } from 'ink'; +import { theme } from '../../semantic-colors.js'; +import type { + ContextCategoryBreakdown, + ContextToolDetail, + ContextMemoryDetail, + ContextSkillDetail, +} from '../../types.js'; +import { t } from '../../../i18n/index.js'; + +// Progress bar characters +const FILLED = '\u2588'; // █ - filled block +const BUFFER = '\u2592'; // ▒ - medium shade (autocompact buffer) +const EMPTY = '\u2591'; // ░ - light shade (free space) + +const CONTENT_WIDTH = 56; + +interface ContextUsageProps { + modelName: string; + totalTokens: number; + contextWindowSize: number; + breakdown: ContextCategoryBreakdown; + builtinTools: ContextToolDetail[]; + mcpTools: ContextToolDetail[]; + memoryFiles: ContextMemoryDetail[]; + skills: ContextSkillDetail[]; + /** True when totalTokens is estimated (no API call yet) */ + isEstimated?: boolean; +} + +/** + * Truncate a string to maxLen, appending '…' if truncated. + */ +function truncateName(name: string, maxLen: number): string { + if (name.length <= maxLen) return name; + return name.slice(0, maxLen - 1) + '\u2026'; +} + +/** + * Format token count for display (e.g. 1234 -> "1.2k", 123456 -> "123.5k") + */ +function formatTokens(tokens: number): string { + if (tokens >= 1000) { + return `${(tokens / 1000).toFixed(1)}k`; + } + return `${tokens}`; +} + +/** + * Render a three-segment progress bar: used | autocompact buffer | free space. + */ +const ProgressBar: React.FC<{ + usedPercentage: number; + bufferPercentage: number; + width: number; +}> = ({ usedPercentage, bufferPercentage, width }) => { + const usedCount = Math.round((Math.min(usedPercentage, 100) / 100) * width); + const bufferCount = Math.round( + (Math.min(bufferPercentage, 100 - usedPercentage) / 100) * width, + ); + const freeCount = Math.max(0, width - usedCount - bufferCount); + + const usedStr = FILLED.repeat(Math.max(0, usedCount)); + const freeStr = EMPTY.repeat(Math.max(0, freeCount)); + const bufferStr = BUFFER.repeat(Math.max(0, bufferCount)); + + // Used color: accent by default, warning/error at high usage. + let usedColor = theme.text.accent; + if (usedPercentage > 80) { + usedColor = theme.status.error; + } else if (usedPercentage > 60) { + usedColor = theme.status.warning; + } + + return ( + + {usedStr} + {freeStr} + {bufferStr} + + ); +}; + +/** + * A row showing a category with its token count and percentage. + */ +const CategoryRow: React.FC<{ + symbol: string; + label: string; + tokens: number; + contextWindowSize: number; + symbolColor?: string; +}> = ({ symbol, label, tokens, contextWindowSize, symbolColor }) => { + const percentage = ((tokens / contextWindowSize) * 100).toFixed(1); + const tokenStr = `${formatTokens(tokens)} ${t('tokens')} (${percentage}%)`; + + return ( + + + {symbol} + + + {label} + + + {tokenStr} + + + ); +}; + +/** + * A detail row for individual items (MCP tools, memory files, skills). + */ +const DETAIL_NAME_MAX_LEN = 30; + +const DetailRow: React.FC<{ + name: string; + tokens: number; +}> = ({ name, tokens }) => { + const tokenStr = + tokens > 0 ? `${formatTokens(tokens)} ${t('tokens')}` : `0 ${t('tokens')}`; + return ( + + {'\u2514'} + + + {truncateName(name, DETAIL_NAME_MAX_LEN)} + + + + {tokenStr} + + + ); +}; + +export const ContextUsage: React.FC = ({ + modelName, + totalTokens, + contextWindowSize, + breakdown, + builtinTools, + mcpTools, + memoryFiles, + skills, + isEstimated, +}) => { + const percentage = + contextWindowSize > 0 ? (totalTokens / contextWindowSize) * 100 : 0; + + // Sort detail items by token count (descending) for better readability + const sortedBuiltinTools = [...builtinTools].sort( + (a, b) => b.tokens - a.tokens, + ); + const sortedMcpTools = [...mcpTools].sort((a, b) => b.tokens - a.tokens); + const sortedMemoryFiles = [...memoryFiles].sort( + (a, b) => b.tokens - a.tokens, + ); + const sortedSkills = [...skills].sort((a, b) => b.tokens - a.tokens); + + return ( + + {/* Title */} + + {t('Context Usage')} + + + + {isEstimated ? ( + <> + {/* No API data yet — show hint instead of progress bar */} + + + {t('No API response yet. Send a message to see actual usage.')} + + + + {/* Estimated overhead categories */} + + {t('Estimated pre-conversation overhead')} + + + {t('Model')}: {modelName} + {' '} + {t('Context window')}: {formatTokens(contextWindowSize)}{' '} + {t('tokens')} + + + + ) : ( + <> + {/* Model name + context window info */} + + {modelName} + + + {t('Context window')}: {formatTokens(contextWindowSize)}{' '} + {t('tokens')} + + + + {/* Progress bar — three segments: used | free | buffer */} + + 0 + ? (breakdown.autocompactBuffer / contextWindowSize) * 100 + : 0 + } + width={CONTENT_WIDTH} + /> + + + {/* Legend — same layout as CategoryRow for alignment */} + + + + + + {/* Breakdown header */} + + {t('Usage by category')} + + + )} + + + + {breakdown.mcpTools > 0 && ( + + )} + + + {/* Only show Messages when we have real API data */} + {!isEstimated && ( + + )} + + {/* Built-in tools detail */} + {sortedBuiltinTools.length > 0 && ( + + + {t('Built-in tools')} + + {sortedBuiltinTools.map((tool) => ( + + ))} + + )} + + {/* MCP Tools detail */} + {sortedMcpTools.length > 0 && ( + + + {t('MCP tools')} + + {sortedMcpTools.map((tool) => ( + + ))} + + )} + + {/* Memory files detail */} + {sortedMemoryFiles.length > 0 && ( + + + {t('Memory files')} + + {sortedMemoryFiles.map((file) => ( + + ))} + + )} + + {/* Skills detail */} + {sortedSkills.length > 0 && ( + + + {t('Skills')} + + {sortedSkills.map((skill) => ( + + ))} + + )} + + ); +}; diff --git a/packages/cli/src/ui/types.ts b/packages/cli/src/ui/types.ts index b111f9ac7..fc452d7f6 100644 --- a/packages/cli/src/ui/types.ts +++ b/packages/cli/src/ui/types.ts @@ -251,6 +251,48 @@ export type HistoryItemMcpStatus = HistoryItemBase & { showTips: boolean; }; +// --- Context Usage types --- + +export interface ContextCategoryBreakdown { + systemPrompt: number; + builtinTools: number; + mcpTools: number; + memoryFiles: number; + skills: number; + messages: number; + freeSpace: number; + autocompactBuffer: number; +} + +export interface ContextToolDetail { + name: string; + tokens: number; +} + +export interface ContextMemoryDetail { + path: string; + tokens: number; +} + +export interface ContextSkillDetail { + name: string; + tokens: number; +} + +export type HistoryItemContextUsage = HistoryItemBase & { + type: 'context_usage'; + modelName: string; + totalTokens: number; + contextWindowSize: number; + breakdown: ContextCategoryBreakdown; + builtinTools: ContextToolDetail[]; + mcpTools: ContextToolDetail[]; + memoryFiles: ContextMemoryDetail[]; + skills: ContextSkillDetail[]; + /** True when totalTokens is estimated (no API call yet) rather than from API response */ + isEstimated?: boolean; +}; + // Using Omit seems to have some issues with typescript's // type inference e.g. historyItem.type === 'tool_group' isn't auto-inferring that // 'tools' in historyItem. @@ -278,7 +320,8 @@ export type HistoryItemWithoutId = | HistoryItemExtensionsList | HistoryItemToolsList | HistoryItemSkillsList - | HistoryItemMcpStatus; + | HistoryItemMcpStatus + | HistoryItemContextUsage; export type HistoryItem = HistoryItemWithoutId & { id: number }; @@ -301,6 +344,7 @@ export enum MessageType { TOOLS_LIST = 'tools_list', SKILLS_LIST = 'skills_list', MCP_STATUS = 'mcp_status', + CONTEXT_USAGE = 'context_usage', } // Simplified message structure for internal feedback diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index c76fd2f8d..c2112fbd3 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -272,6 +272,7 @@ export * from './utils/projectSummary.js'; export * from './utils/quotaErrorDetection.js'; export * from './utils/readManyFiles.js'; export * from './utils/request-tokenizer/supportedImageFormats.js'; +export { TextTokenizer } from './utils/request-tokenizer/textTokenizer.js'; export * from './utils/retry.js'; export * from './utils/ripgrepUtils.js'; export * from './utils/schemaValidator.js';