diff --git a/.qwen/skills/e2e-testing/SKILL.md b/.qwen/skills/e2e-testing/SKILL.md index d34d3537b..105248d26 100644 --- a/.qwen/skills/e2e-testing/SKILL.md +++ b/.qwen/skills/e2e-testing/SKILL.md @@ -156,3 +156,24 @@ tmux kill-session -t test For testing MCP tool behavior end-to-end, read `references/mcp-testing.md`. It covers the setup gotchas (config location, git repo requirement) and includes a reusable zero-dependency test server template in `scripts/mcp-test-server.js`. + +## Token Usage Stats + +Use `scripts/token-stats.py` to summarize token usage across recent API logs: + +```bash +python3 .qwen/skills/e2e-testing/scripts/token-stats.py 20 # last 20 requests +``` + +Shows input, cached, and output tokens per request with cache hit rates. Useful +for verifying prompt caching behavior or investigating unexpected token counts. + +## Tips + +- Use interactive (tmux) mode when the bug involves permission prompts, slash + commands, or keyboard interactions. Headless mode has no TUI — these don't + exist there. +- Use interactive (tmux) mode for hang-related issues. Headless mode produces + no output when the process stalls, giving you nothing to work with. +- Use `--approval-mode default` when testing permission rules. `yolo` bypasses + rule evaluation entirely — it can't test whether a rule matches. diff --git a/.qwen/skills/e2e-testing/scripts/token-stats.py b/.qwen/skills/e2e-testing/scripts/token-stats.py new file mode 100644 index 000000000..8b1fce313 --- /dev/null +++ b/.qwen/skills/e2e-testing/scripts/token-stats.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +"""Display token usage stats from the last X request logs in ~/.qwen/logs.""" + +import argparse +import json +from pathlib import Path + + +def parse_args(): + p = argparse.ArgumentParser(description="Show token stats from qwen request logs") + p.add_argument("count", nargs="?", type=int, default=10, help="Number of recent logs to show (default: 10)") + p.add_argument("--log-dir", default=Path.home() / ".qwen" / "logs", type=Path) + return p.parse_args() + + +def load_logs(log_dir: Path, count: int): + files = sorted(log_dir.glob("*.json")) + for f in files[-count:]: + try: + with open(f) as fh: + yield json.load(fh), f.name + except (json.JSONDecodeError, OSError): + continue + + +def main(): + args = parse_args() + if not args.log_dir.is_dir(): + print(f"Log directory not found: {args.log_dir}") + return + + rows = [] + total_input = total_cached = total_output = 0 + + for data, fname in load_logs(args.log_dir, args.count): + ts = data.get("timestamp", "?") + model = data.get("request", {}).get("model", "?") + usage = data.get("response", {}).get("usage", {}) + + input_tok = usage.get("prompt_tokens", 0) + output_tok = usage.get("completion_tokens", 0) + cached_tok = usage.get("prompt_tokens_details", {}).get("cached_tokens", 0) + cache_rate = (cached_tok / input_tok * 100) if input_tok else 0 + + total_input += input_tok + total_cached += cached_tok + total_output += output_tok + + rows.append((ts, model, input_tok, cached_tok, output_tok, cache_rate)) + + if not rows: + print("No logs found.") + return + + # Print table + hdr = f"{'Timestamp':<28} {'Model':<16} {'Input':>8} {'Cached':>8} {'Output':>8} {'Cache%':>7}" + sep = "-" * len(hdr) + print(hdr) + print(sep) + for ts, model, inp, cached, out, rate in rows: + print(f"{ts:<28} {model:<16} {inp:>8,} {cached:>8,} {out:>8,} {rate:>6.1f}%") + + # Totals + print(sep) + overall_rate = (total_cached / total_input * 100) if total_input else 0 + print(f"{'TOTAL':<28} {'':<16} {total_input:>8,} {total_cached:>8,} {total_output:>8,} {overall_rate:>6.1f}%") + + +if __name__ == "__main__": + main() diff --git a/.qwen/skills/pr-review/SKILL.md b/.qwen/skills/pr-review/SKILL.md deleted file mode 100644 index 52bf75427..000000000 --- a/.qwen/skills/pr-review/SKILL.md +++ /dev/null @@ -1,104 +0,0 @@ ---- -name: pr-review -description: Reviews pull requests with code analysis and terminal smoke testing. Applies when examining code changes, running CLI tests, or when 'PR review', 'code review', 'terminal screenshot', 'visual test' is mentioned. ---- - -# PR Review — Code Review + Terminal Smoke Testing - -## Workflow - -### 1. Fetch PR Information - -```bash -# List open PRs -gh pr list - -# View PR details -gh pr view - -# Get diff -gh pr diff -``` - -### 2. Code Review - -Analyze changes across the following dimensions: - -- **Correctness** — Is the logic correct? Are edge cases handled? -- **Code Style** — Does it follow existing code style and conventions? -- **Performance** — Are there any performance concerns? -- **Test Coverage** — Are there corresponding tests for the changes? -- **Security** — Does it introduce any security risks? - -Output format: - -- 🔴 **Critical** — Must fix -- 🟡 **Suggestion** — Suggested improvement -- 🟢 **Nice to have** — Optional optimization - -### 3. Terminal Smoke Testing (Run for Every PR) - -**Run terminal-capture for every PR review**, not just UI changes. Reasons: - -- **Smoke Test** — Verify the CLI starts correctly and responds to user input, ensuring the PR didn't break anything -- **Visual Verification** — If there are UI changes, screenshots provide the most intuitive review evidence -- **Documentation** — Attach screenshots to the PR comments so reviewers can see the results without building locally - -```bash -# Checkout branch & build -gh pr checkout -npm run build -``` - -#### Scenario Selection Strategy - -Choose appropriate scenarios based on the PR's scope of changes: - -| PR Type | Recommended Scenarios | Description | -| ------------------------------------- | ------------------------------------------------------------ | --------------------------------- | -| **Any PR** (default) | smoke test: send `hi`, verify startup & response | Minimal-cost smoke validation | -| Slash command changes | Corresponding command scenarios (`/about`, `/context`, etc.) | Verify command output correctness | -| Ink component / layout changes | Multiple scenarios + full-flow long screenshot | Verify visual effects | -| Large refactors / dependency upgrades | Run `scenarios/all.ts` fully | Full regression | - -#### Running Screenshots - -```bash -# Write scenario config to integration-tests/terminal-capture/scenarios/ -# See terminal-capture skill for FlowStep API reference - -# Single scenario -npx tsx integration-tests/terminal-capture/run.ts integration-tests/terminal-capture/scenarios/.ts - - -# Check output in screenshots/ directory -``` - -#### Minimal Smoke Test Example - -No need to write a new scenario file — just use the existing `about.ts`. It sends "hi" then runs `/about`, covering startup + input + command response: - -```bash -npx tsx integration-tests/terminal-capture/run.ts integration-tests/terminal-capture/scenarios/about.ts -``` - -### 4. Upload Screenshots to PR - -Use Playwright MCP browser to upload screenshots to the PR comments (images hosted at `github.com/user-attachments/assets/`, zero side effects): - -1. Open the PR page with Playwright: `https://github.com//pull/` -2. Click the comment text box and enter a comment title (e.g., `## 📷 Terminal Smoke Test Screenshots`) -3. Click the "Paste, drop, or click to add files" button to trigger the file picker -4. Upload screenshot PNG files via `browser_file_upload` (can upload multiple one by one) -5. Wait for GitHub to process (about 2-3 seconds) — image links auto-insert into the comment box -6. Click the "Comment" button to submit - -> **Prerequisite**: Playwright MCP needs `--user-data-dir` configured to persist GitHub login session. First time use requires manually logging into GitHub in the Playwright browser. - -### 5. Submit Review - -Submit code review comments via `gh pr review`: - -```bash -gh pr review --comment --body "review content" -``` diff --git a/.qwen/skills/terminal-capture/SKILL.md b/.qwen/skills/terminal-capture/SKILL.md index 043f49542..a9ac59f45 100644 --- a/.qwen/skills/terminal-capture/SKILL.md +++ b/.qwen/skills/terminal-capture/SKILL.md @@ -196,7 +196,7 @@ export default [ ## Integration with PR Review -This tool is commonly used for visual verification during PR reviews. For the complete code review + screenshot workflow, see the [pr-review](../pr-review/SKILL.md) skill. +This tool is commonly used for visual verification during PR reviews. ## Troubleshooting diff --git a/packages/cli/src/ui/components/messages/AskUserQuestionDialog.test.tsx b/packages/cli/src/ui/components/messages/AskUserQuestionDialog.test.tsx index 5d396b176..4e7c195b6 100644 --- a/packages/cli/src/ui/components/messages/AskUserQuestionDialog.test.tsx +++ b/packages/cli/src/ui/components/messages/AskUserQuestionDialog.test.tsx @@ -219,36 +219,6 @@ describe('', () => { }); describe('multiple questions', () => { - it('shows Submit tab for multiple questions', async () => { - const onConfirm = vi.fn(); - const details = createConfirmationDetails({ - questions: [ - createSingleQuestion({ header: 'Q1' }), - createSingleQuestion({ header: 'Q2' }), - ], - }); - - const { stdin, lastFrame, unmount } = renderWithProviders( - , - ); - await wait(); - - // Navigate to submit tab (right arrow twice: Q1 -> Q2 -> Submit) - stdin.write('\u001B[C'); // Right - await wait(); - stdin.write('\u001B[C'); // Right - await wait(); - - const output = lastFrame(); - expect(output).toContain('Submit answers'); - expect(output).toContain('Cancel'); - expect(output).toContain('Your answers'); - unmount(); - }); - it('shows unanswered questions as (not answered) in Submit tab', async () => { const onConfirm = vi.fn(); const details = createConfirmationDetails({