mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-05-18 23:42:43 +00:00
Merge remote-tracking branch 'origin/main' into feat/git-diff-stats
# Conflicts: # scripts/unused-keys-only-in-locales.json
This commit is contained in:
commit
8b277f66c3
285 changed files with 23848 additions and 5365 deletions
167
scripts/benchmark-api-latency.mjs
Normal file
167
scripts/benchmark-api-latency.mjs
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
#!/usr/bin/env node
|
||||
/**
|
||||
* API Preconnect Latency Benchmark
|
||||
*
|
||||
* Measures the real TCP+TLS connection reuse benefit of preconnect by using
|
||||
* undici (the same library as apiPreconnect.ts) within a single process.
|
||||
*
|
||||
* Unlike the previous curl-based approach, this correctly measures connection
|
||||
* pool reuse: the same dispatcher instance is shared between the preconnect
|
||||
* HEAD request and the subsequent measured request, just like in production.
|
||||
*
|
||||
* Usage:
|
||||
* node scripts/benchmark-api-latency.mjs
|
||||
*
|
||||
* Environment variables:
|
||||
* ITERATIONS=3 Number of cold/warm pairs per endpoint (default: 3)
|
||||
* REQUEST_TIMEOUT_MS=5000 Per-request timeout in ms (default: 5000)
|
||||
* BENCHMARK_URLS Space-separated extra URLs to benchmark
|
||||
*/
|
||||
|
||||
import { createRequire } from 'module';
|
||||
import { performance } from 'perf_hooks';
|
||||
|
||||
// Resolve undici from the core package (same version used by preconnect)
|
||||
const require = createRequire(import.meta.url);
|
||||
const { Agent } = require('../packages/core/node_modules/undici/index.js');
|
||||
|
||||
const ITERATIONS = parseInt(process.env['ITERATIONS'] ?? '3', 10);
|
||||
const REQUEST_TIMEOUT_MS = parseInt(process.env['REQUEST_TIMEOUT_MS'] ?? '5000', 10);
|
||||
|
||||
const DEFAULT_ENDPOINTS = [
|
||||
{ url: 'https://api.openai.com', label: 'OpenAI' },
|
||||
{ url: 'https://api.anthropic.com', label: 'Anthropic' },
|
||||
{ url: 'https://dashscope.aliyuncs.com/compatible-mode/v1', label: 'DashScope (openai-compatible)' },
|
||||
];
|
||||
|
||||
const extraUrls = process.env['BENCHMARK_URLS']
|
||||
? process.env['BENCHMARK_URLS'].split(' ').filter(Boolean).map((url) => ({ url, label: url }))
|
||||
: [];
|
||||
|
||||
const ENDPOINTS = [...DEFAULT_ENDPOINTS, ...extraUrls];
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function newDispatcher() {
|
||||
return new Agent({
|
||||
headersTimeout: 0,
|
||||
bodyTimeout: 0,
|
||||
keepAliveTimeout: 60_000,
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchOnce(url, dispatcher, method = 'HEAD') {
|
||||
const start = performance.now();
|
||||
try {
|
||||
await fetch(url, {
|
||||
method,
|
||||
signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS),
|
||||
headers: { 'User-Agent': 'QwenCode-Benchmark/1.0' },
|
||||
dispatcher,
|
||||
});
|
||||
} catch (err) {
|
||||
// Timeouts and non-2xx are fine — we only care about connection timing
|
||||
if (err?.name === 'TimeoutError') {
|
||||
return performance.now() - start; // still records the time spent
|
||||
}
|
||||
}
|
||||
return performance.now() - start;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cold measurement: brand-new dispatcher, no preconnect.
|
||||
* Returns elapsed ms of the measured request.
|
||||
*/
|
||||
async function measureCold(url) {
|
||||
const dispatcher = newDispatcher();
|
||||
return fetchOnce(url, dispatcher, 'HEAD');
|
||||
}
|
||||
|
||||
/**
|
||||
* Warm measurement: same dispatcher for preconnect HEAD + measured request.
|
||||
* Returns elapsed ms of the measured request only (not the preconnect time).
|
||||
*/
|
||||
async function measureWarm(url) {
|
||||
const dispatcher = newDispatcher();
|
||||
// Preconnect — mirrors apiPreconnect.ts behaviour
|
||||
await fetchOnce(url, dispatcher, 'HEAD').catch(() => {});
|
||||
// Measured request reuses the warmed connection from the same pool
|
||||
return fetchOnce(url, dispatcher, 'HEAD');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function fmt(ms) {
|
||||
return `${ms.toFixed(1)}ms`;
|
||||
}
|
||||
|
||||
function avg(arr) {
|
||||
return arr.reduce((a, b) => a + b, 0) / arr.length;
|
||||
}
|
||||
|
||||
async function benchmarkEndpoint({ url, label }) {
|
||||
console.log(`\n ${label}`);
|
||||
console.log(` ${url}`);
|
||||
|
||||
const coldTimes = [];
|
||||
const warmTimes = [];
|
||||
|
||||
for (let i = 0; i < ITERATIONS; i++) {
|
||||
const cold = await measureCold(url);
|
||||
coldTimes.push(cold);
|
||||
|
||||
// Brief pause so the OS can release the cold connection
|
||||
await new Promise((r) => setTimeout(r, 500));
|
||||
|
||||
const warm = await measureWarm(url);
|
||||
warmTimes.push(warm);
|
||||
|
||||
console.log(` run ${i + 1}: cold=${fmt(cold)} warm=${fmt(warm)}`);
|
||||
|
||||
await new Promise((r) => setTimeout(r, 500));
|
||||
}
|
||||
|
||||
const avgCold = avg(coldTimes);
|
||||
const avgWarm = avg(warmTimes);
|
||||
const saved = avgCold - avgWarm;
|
||||
const pct = avgCold > 0 ? (saved / avgCold) * 100 : 0;
|
||||
|
||||
return { label, url, avgCold, avgWarm, saved, pct };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
console.log('=== Qwen Code API Preconnect Latency Benchmark ===');
|
||||
console.log(`Iterations per endpoint : ${ITERATIONS}`);
|
||||
console.log(`Request timeout : ${REQUEST_TIMEOUT_MS}ms`);
|
||||
console.log('\nRunning...');
|
||||
|
||||
const results = [];
|
||||
for (const endpoint of ENDPOINTS) {
|
||||
const result = await benchmarkEndpoint(endpoint);
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
// Summary table
|
||||
console.log('\n\n=== Results ===\n');
|
||||
console.log(
|
||||
'Endpoint'.padEnd(36) +
|
||||
'Cold (avg)'.padStart(12) +
|
||||
'Warm (avg)'.padStart(12) +
|
||||
'Saved'.padStart(10) +
|
||||
'Improvement'.padStart(13),
|
||||
);
|
||||
console.log('─'.repeat(83));
|
||||
|
||||
for (const r of results) {
|
||||
const status = r.pct >= 30 ? '✓' : r.pct >= 10 ? '~' : '✗';
|
||||
console.log(
|
||||
r.label.slice(0, 35).padEnd(36) +
|
||||
fmt(r.avgCold).padStart(12) +
|
||||
fmt(r.avgWarm).padStart(12) +
|
||||
fmt(r.saved).padStart(10) +
|
||||
`${r.pct.toFixed(1)}% ${status}`.padStart(13),
|
||||
);
|
||||
}
|
||||
|
||||
console.log('\nLegend: ✓ ≥30% improvement ~ 10–30% ✗ <10%');
|
||||
|
|
@ -13,7 +13,6 @@ import { fileURLToPath } from 'url';
|
|||
import { dirname } from 'path';
|
||||
|
||||
// Get __dirname for ESM modules
|
||||
// @ts-expect-error - import.meta is supported in NodeNext module system at runtime
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
interface CheckResult {
|
||||
|
|
@ -23,6 +22,7 @@ interface CheckResult {
|
|||
stats: {
|
||||
totalKeys: number;
|
||||
translatedKeys: number;
|
||||
zhTWTranslatedKeys: number;
|
||||
unusedKeys: string[];
|
||||
unusedKeysOnlyInLocales?: string[]; // 新增:只在 locales 中存在的未使用键
|
||||
};
|
||||
|
|
@ -172,27 +172,31 @@ function checkKeyValueConsistency(
|
|||
}
|
||||
|
||||
/**
|
||||
* Check if en.js and zh.js have matching keys
|
||||
* Check if locale files have matching keys with en.js
|
||||
* @param enTranslations The en.js translations
|
||||
* @param localeTranslations The target locale translations (zh.js or zh-TW.js)
|
||||
* @param localeLabel Label for diagnostics (e.g., "zh.js" or "zh-TW.js")
|
||||
*/
|
||||
function checkKeyMatching(
|
||||
enTranslations: Record<string, string | string[]>,
|
||||
zhTranslations: Record<string, string | string[]>,
|
||||
localeTranslations: Record<string, string | string[]>,
|
||||
localeLabel: string,
|
||||
): string[] {
|
||||
const errors: string[] = [];
|
||||
const enKeys = new Set(Object.keys(enTranslations));
|
||||
const zhKeys = new Set(Object.keys(zhTranslations));
|
||||
const localeKeys = new Set(Object.keys(localeTranslations));
|
||||
|
||||
// Check for keys in en but not in zh
|
||||
// Check for keys in en but not in locale
|
||||
for (const key of enKeys) {
|
||||
if (!zhKeys.has(key)) {
|
||||
errors.push(`Missing translation in zh.js: "${key}"`);
|
||||
if (!localeKeys.has(key)) {
|
||||
errors.push(`Missing translation in ${localeLabel}: "${key}"`);
|
||||
}
|
||||
}
|
||||
|
||||
// Check for keys in zh but not in en
|
||||
for (const key of zhKeys) {
|
||||
// Check for keys in locale but not in en
|
||||
for (const key of localeKeys) {
|
||||
if (!enKeys.has(key)) {
|
||||
errors.push(`Extra key in zh.js (not in en.js): "${key}"`);
|
||||
errors.push(`Extra key in ${localeLabel} (not in en.js): "${key}"`);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -304,10 +308,12 @@ async function checkI18n(): Promise<CheckResult> {
|
|||
|
||||
const enPath = path.join(localesDir, 'en.js');
|
||||
const zhPath = path.join(localesDir, 'zh.js');
|
||||
const zhTWPath = path.join(localesDir, 'zh-TW.js');
|
||||
|
||||
// Load translation files
|
||||
let enTranslations: Record<string, string | string[]>;
|
||||
let zhTranslations: Record<string, string | string[]>;
|
||||
let zhTWTranslations: Record<string, string | string[]>;
|
||||
|
||||
try {
|
||||
enTranslations = await loadTranslationsFile(enPath);
|
||||
|
|
@ -319,7 +325,12 @@ async function checkI18n(): Promise<CheckResult> {
|
|||
success: false,
|
||||
errors,
|
||||
warnings,
|
||||
stats: { totalKeys: 0, translatedKeys: 0, unusedKeys: [] },
|
||||
stats: {
|
||||
totalKeys: 0,
|
||||
translatedKeys: 0,
|
||||
zhTWTranslatedKeys: 0,
|
||||
unusedKeys: [],
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -333,7 +344,31 @@ async function checkI18n(): Promise<CheckResult> {
|
|||
success: false,
|
||||
errors,
|
||||
warnings,
|
||||
stats: { totalKeys: 0, translatedKeys: 0, unusedKeys: [] },
|
||||
stats: {
|
||||
totalKeys: 0,
|
||||
translatedKeys: 0,
|
||||
zhTWTranslatedKeys: 0,
|
||||
unusedKeys: [],
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
zhTWTranslations = await loadTranslationsFile(zhTWPath);
|
||||
} catch (error) {
|
||||
errors.push(
|
||||
`Failed to load zh-TW.js: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
return {
|
||||
success: false,
|
||||
errors,
|
||||
warnings,
|
||||
stats: {
|
||||
totalKeys: 0,
|
||||
translatedKeys: 0,
|
||||
zhTWTranslatedKeys: 0,
|
||||
unusedKeys: [],
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -342,9 +377,21 @@ async function checkI18n(): Promise<CheckResult> {
|
|||
errors.push(...consistencyErrors);
|
||||
|
||||
// Check key matching between en and zh
|
||||
const matchingErrors = checkKeyMatching(enTranslations, zhTranslations);
|
||||
const matchingErrors = checkKeyMatching(
|
||||
enTranslations,
|
||||
zhTranslations,
|
||||
'zh.js',
|
||||
);
|
||||
errors.push(...matchingErrors);
|
||||
|
||||
// Check key matching between en and zh-TW
|
||||
const matchingTWErrors = checkKeyMatching(
|
||||
enTranslations,
|
||||
zhTWTranslations,
|
||||
'zh-TW.js',
|
||||
);
|
||||
errors.push(...matchingTWErrors);
|
||||
|
||||
// Extract used keys from source code
|
||||
const usedKeys = await extractUsedKeys(sourceDir);
|
||||
|
||||
|
|
@ -363,7 +410,8 @@ async function checkI18n(): Promise<CheckResult> {
|
|||
}
|
||||
|
||||
const totalKeys = Object.keys(enTranslations).length;
|
||||
const translatedKeys = Object.keys(zhTranslations).length;
|
||||
const zhTranslatedKeys = Object.keys(zhTranslations).length;
|
||||
const zhTWTranslatedKeys = Object.keys(zhTWTranslations).length;
|
||||
|
||||
return {
|
||||
success: errors.length === 0,
|
||||
|
|
@ -371,7 +419,8 @@ async function checkI18n(): Promise<CheckResult> {
|
|||
warnings,
|
||||
stats: {
|
||||
totalKeys,
|
||||
translatedKeys,
|
||||
translatedKeys: zhTranslatedKeys,
|
||||
zhTWTranslatedKeys,
|
||||
unusedKeys,
|
||||
unusedKeysOnlyInLocales,
|
||||
},
|
||||
|
|
|
|||
472
scripts/test-rewind-e2e.sh
Executable file
472
scripts/test-rewind-e2e.sh
Executable file
|
|
@ -0,0 +1,472 @@
|
|||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# test-rewind-e2e.sh — tmux-based E2E verification for the conversation rewind
|
||||
# feature (PR #3441).
|
||||
#
|
||||
# Covers all 5 manual test items from the PR description:
|
||||
# 1. /rewind command → pick turn → UI truncated, input pre-populated
|
||||
# 2. Double-ESC on empty prompt → selector opens → rewind → continue
|
||||
# 3. ESC during streaming → cancels request, does NOT open selector
|
||||
# 4. /rewind with no history → selector does not open
|
||||
# 5. After rewind, model does not reference removed turns
|
||||
#
|
||||
# Prerequisites:
|
||||
# - tmux installed
|
||||
# - CLI already built: npm run build && npm run bundle
|
||||
# - Valid model API credentials in environment
|
||||
#
|
||||
# Usage:
|
||||
# bash scripts/test-rewind-e2e.sh
|
||||
# =============================================================================
|
||||
|
||||
set -uo pipefail
|
||||
|
||||
SESSION="test-rewind-$$"
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
BUNDLE="$PROJECT_DIR/dist/cli.js"
|
||||
WORKDIR="$(mktemp -d)"
|
||||
PASS_COUNT=0
|
||||
FAIL_COUNT=0
|
||||
TIMEOUT=${REWIND_TEST_TIMEOUT:-120} # seconds per wait_for call
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[0;33m'
|
||||
BOLD='\033[1m'
|
||||
RESET='\033[0m'
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
cleanup() {
|
||||
tmux kill-session -t "$SESSION" 2>/dev/null || true
|
||||
rm -rf "$WORKDIR"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
start_session() {
|
||||
# Deliver ESC immediately — without this, tmux holds ESC for up to 500ms
|
||||
# thinking it might be the start of an escape sequence, which breaks
|
||||
# double-ESC detection and other ESC-dependent interactions.
|
||||
# Must be set as a server option (not session) in tmux 2.6+.
|
||||
tmux set-option -sg escape-time 0 2>/dev/null || true
|
||||
tmux new-session -d -s "$SESSION" -x 120 -y 40 \
|
||||
"cd '$WORKDIR' && node '$BUNDLE' --approval-mode yolo 2>'$WORKDIR/stderr.log'"
|
||||
wait_for_prompt 60
|
||||
}
|
||||
|
||||
kill_session() {
|
||||
tmux kill-session -t "$SESSION" 2>/dev/null || true
|
||||
sleep 1
|
||||
}
|
||||
|
||||
# Capture entire pane including scrollback (for content assertions)
|
||||
capture() {
|
||||
tmux capture-pane -t "$SESSION" -p -S -200 2>/dev/null || true
|
||||
}
|
||||
|
||||
# Capture only the visible pane (for prompt detection)
|
||||
capture_visible() {
|
||||
tmux capture-pane -t "$SESSION" -p 2>/dev/null || true
|
||||
}
|
||||
|
||||
send() {
|
||||
# Type text using literal mode then press Enter
|
||||
tmux send-keys -t "$SESSION" -l "$1"
|
||||
sleep 0.5
|
||||
tmux send-keys -t "$SESSION" Enter
|
||||
}
|
||||
|
||||
send_keys() {
|
||||
tmux send-keys -t "$SESSION" "$@"
|
||||
}
|
||||
|
||||
# Wait for "Type your message" to appear on the visible pane.
|
||||
wait_for_prompt() {
|
||||
local timeout="${1:-$TIMEOUT}"
|
||||
local elapsed=0
|
||||
|
||||
while [ $elapsed -lt "$timeout" ]; do
|
||||
if capture_visible | grep -qF "Type your message"; then
|
||||
return 0
|
||||
fi
|
||||
sleep 2
|
||||
elapsed=$((elapsed + 2))
|
||||
done
|
||||
echo -e "${RED}TIMEOUT waiting for prompt (Type your message)${RESET}" >&2
|
||||
echo "--- Visible pane ---" >&2
|
||||
capture_visible >&2
|
||||
echo "--- End ---" >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
# Wait for the CLI to be truly idle:
|
||||
# 1. "Type your message" is visible (prompt ready)
|
||||
# 2. No "esc to cancel" on screen (no btw/side-query running)
|
||||
# 3. Screen content unchanged for 3 consecutive seconds
|
||||
wait_idle() {
|
||||
local timeout="${1:-$TIMEOUT}"
|
||||
local elapsed=0
|
||||
local last_hash=""
|
||||
local stable_count=0
|
||||
|
||||
while [ $elapsed -lt "$timeout" ]; do
|
||||
local screen
|
||||
screen=$(capture_visible)
|
||||
|
||||
# Must have prompt visible
|
||||
if ! echo "$screen" | grep -qF "Type your message"; then
|
||||
stable_count=0
|
||||
last_hash=""
|
||||
sleep 2
|
||||
elapsed=$((elapsed + 2))
|
||||
continue
|
||||
fi
|
||||
|
||||
# Must not have btw side-query running
|
||||
if echo "$screen" | grep -qF "esc to cancel"; then
|
||||
stable_count=0
|
||||
last_hash=""
|
||||
sleep 2
|
||||
elapsed=$((elapsed + 2))
|
||||
continue
|
||||
fi
|
||||
|
||||
# Check screen stability
|
||||
local current
|
||||
current=$(echo "$screen" | md5sum | cut -d' ' -f1)
|
||||
if [ "$current" = "$last_hash" ]; then
|
||||
stable_count=$((stable_count + 1))
|
||||
if [ $stable_count -ge 3 ]; then
|
||||
return 0
|
||||
fi
|
||||
else
|
||||
last_hash="$current"
|
||||
stable_count=0
|
||||
fi
|
||||
sleep 1
|
||||
elapsed=$((elapsed + 1))
|
||||
done
|
||||
echo -e "${RED}TIMEOUT waiting for idle${RESET}" >&2
|
||||
echo "--- Visible pane ---" >&2
|
||||
capture_visible >&2
|
||||
echo "--- End ---" >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
# Wait for text to appear on the visible pane
|
||||
wait_for() {
|
||||
local text="$1"
|
||||
local timeout="${2:-$TIMEOUT}"
|
||||
local elapsed=0
|
||||
while [ $elapsed -lt "$timeout" ]; do
|
||||
if capture_visible | grep -qF "$text"; then
|
||||
return 0
|
||||
fi
|
||||
sleep 2
|
||||
elapsed=$((elapsed + 2))
|
||||
done
|
||||
echo -e "${RED}TIMEOUT waiting for: ${text}${RESET}" >&2
|
||||
echo "--- Visible pane ---" >&2
|
||||
capture_visible >&2
|
||||
echo "--- End ---" >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
# Assert text IS on visible pane
|
||||
assert_screen() {
|
||||
local text="$1"
|
||||
if capture_visible | grep -qF "$text"; then
|
||||
return 0
|
||||
fi
|
||||
echo -e "${RED}ASSERT FAILED: expected '${text}' on screen${RESET}" >&2
|
||||
echo "--- Visible pane ---" >&2
|
||||
capture_visible >&2
|
||||
echo "--- End ---" >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
# Assert text IS on full capture (including scrollback)
|
||||
assert_scrollback() {
|
||||
local text="$1"
|
||||
if capture | grep -qF "$text"; then
|
||||
return 0
|
||||
fi
|
||||
echo -e "${RED}ASSERT FAILED: expected '${text}' in scrollback${RESET}" >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
# Assert text is NOT on visible pane
|
||||
assert_no_screen() {
|
||||
local text="$1"
|
||||
if capture_visible | grep -qF "$text"; then
|
||||
echo -e "${RED}ASSERT FAILED: did NOT expect '${text}' on screen${RESET}" >&2
|
||||
echo "--- Visible pane ---" >&2
|
||||
capture_visible >&2
|
||||
echo "--- End ---" >&2
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
pass() {
|
||||
echo -e "${GREEN}[PASS]${RESET} $1"
|
||||
PASS_COUNT=$((PASS_COUNT + 1))
|
||||
}
|
||||
|
||||
fail() {
|
||||
echo -e "${RED}[FAIL]${RESET} $1: $2"
|
||||
FAIL_COUNT=$((FAIL_COUNT + 1))
|
||||
}
|
||||
|
||||
# Run a test function, capturing its exit code properly.
|
||||
# Usage: run_test "Test Name" test_function_name
|
||||
run_test() {
|
||||
local name="$1"
|
||||
local func="$2"
|
||||
local rc=0
|
||||
local errmsg=""
|
||||
|
||||
errmsg=$($func 2>&1) || rc=$?
|
||||
|
||||
if [ $rc -eq 0 ]; then
|
||||
pass "$name"
|
||||
else
|
||||
# Extract last meaningful error line from stderr
|
||||
local last_err
|
||||
last_err=$(echo "$errmsg" | grep -E 'TIMEOUT|ASSERT FAILED' | tail -1)
|
||||
fail "$name" "${last_err:-exit code $rc}"
|
||||
echo "$errmsg" | head -30
|
||||
fi
|
||||
|
||||
# Always clean up the session between tests
|
||||
kill_session 2>/dev/null || true
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pre-flight checks
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
if ! command -v tmux &>/dev/null; then
|
||||
echo -e "${RED}Error: tmux is not installed${RESET}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "$BUNDLE" ]; then
|
||||
echo -e "${YELLOW}Bundle not found at $BUNDLE, building...${RESET}"
|
||||
(cd "$PROJECT_DIR" && npm run build && npm run bundle)
|
||||
fi
|
||||
|
||||
echo -e "${BOLD}=== Rewind Feature E2E Tests (tmux) ===${RESET}"
|
||||
echo "Session: $SESSION"
|
||||
echo "Workdir: $WORKDIR"
|
||||
echo ""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 1: /rewind command flow
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
test_rewind_command() {
|
||||
start_session
|
||||
|
||||
# Build 3-turn conversation with unique markers
|
||||
send "say exactly ALPHA1 and nothing else"
|
||||
wait_idle || return 1
|
||||
|
||||
send "say exactly BETA2 and nothing else"
|
||||
wait_idle || return 1
|
||||
|
||||
send "say exactly GAMMA3 and nothing else"
|
||||
wait_idle || return 1
|
||||
|
||||
# Open rewind selector via /rewind command
|
||||
send "/rewind"
|
||||
wait_for "Rewind Conversation" || return 1
|
||||
|
||||
# Navigate up to select BETA2 turn (selector starts at last turn GAMMA3)
|
||||
send_keys Up
|
||||
sleep 0.5
|
||||
|
||||
# Select the turn
|
||||
send_keys Enter
|
||||
sleep 1
|
||||
wait_for "confirm" 15 || return 1
|
||||
|
||||
# Confirm rewind
|
||||
send_keys y
|
||||
wait_for "Conversation rewound" || return 1
|
||||
|
||||
# After rewind: pressing Up once from the initial selection (GAMMA3, the last
|
||||
# real user turn) lands on BETA2. Rewind targets BETA2, so its text gets
|
||||
# pre-populated into the input bar. Slash commands like /rewind are excluded
|
||||
# from the turn list by isRealUserTurn().
|
||||
assert_screen "say exactly BETA2" || return 1
|
||||
# Verify the earlier turn (ALPHA1) is still in conversation
|
||||
assert_scrollback "ALPHA1" || return 1
|
||||
}
|
||||
|
||||
run_test "Test 1: /rewind command flow" test_rewind_command
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 2: Double-ESC opens selector
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
test_double_esc() {
|
||||
start_session
|
||||
|
||||
send "say exactly DELTA4 and nothing else"
|
||||
wait_idle || return 1
|
||||
|
||||
send "say exactly EPSILON5 and nothing else"
|
||||
wait_idle || return 1
|
||||
|
||||
# Double-ESC to open rewind selector.
|
||||
# Complication: a btw side-question (prompt suggestion) may be active after
|
||||
# the model responds. If btwItem is non-null, the first ESC cancels the btw
|
||||
# (AppContainer.tsx:1896) and never reaches the rewind handler. We send
|
||||
# 3 ESCs with proper timing to handle both btw-present and btw-absent cases:
|
||||
# ESC #1: cancels btw (if present), or starts rewind pending (if absent)
|
||||
# sleep 1.5s: >800ms to reset any rewind pending from ESC #1
|
||||
# ESC #2: starts rewind pending (btw now dismissed)
|
||||
# sleep 0.3s: within 800ms window
|
||||
# ESC #3: triggers rewind selector
|
||||
send_keys Escape
|
||||
sleep 1.5
|
||||
send_keys Escape
|
||||
sleep 0.5
|
||||
wait_for "Esc again to rewind" 15 || return 1
|
||||
|
||||
# Third ESC within 800ms — should open selector
|
||||
send_keys Escape
|
||||
wait_for "Rewind Conversation" || return 1
|
||||
|
||||
# Select last turn (pre-selected) & confirm
|
||||
send_keys Enter
|
||||
sleep 1
|
||||
send_keys y
|
||||
wait_for "Conversation rewound" || return 1
|
||||
|
||||
# Continue conversation after rewind — verify model still works
|
||||
send "say exactly ZETA6 and nothing else"
|
||||
wait_idle || return 1
|
||||
assert_scrollback "ZETA6" || return 1
|
||||
}
|
||||
|
||||
run_test "Test 2: Double-ESC opens selector" test_double_esc
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 3: ESC during streaming cancels (no rewind)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
test_esc_during_streaming() {
|
||||
start_session
|
||||
|
||||
# Send a prompt that will generate a long response
|
||||
send "write a detailed 500 word essay about the history of computing from 1940 to 2000"
|
||||
|
||||
# Wait for streaming to start (prompt disappears)
|
||||
sleep 4
|
||||
|
||||
# Single ESC while streaming — should cancel, NOT open rewind
|
||||
send_keys Escape
|
||||
|
||||
# Verify rewind selector did NOT open
|
||||
sleep 3
|
||||
assert_no_screen "Rewind Conversation" || return 1
|
||||
|
||||
# Should eventually return to idle
|
||||
wait_idle || return 1
|
||||
}
|
||||
|
||||
run_test "Test 3: ESC during streaming cancels (no rewind)" test_esc_during_streaming
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 4: /rewind with no prior conversation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
test_rewind_no_history() {
|
||||
start_session
|
||||
|
||||
# Immediately try /rewind with no conversation history.
|
||||
# The /rewind text itself gets recorded as a user turn before the slash
|
||||
# command handler runs, so the guard (≥1 user turn) passes and the
|
||||
# selector opens showing only the "/rewind" entry — which is not a
|
||||
# meaningful rewindable turn. We verify the selector has only 1 turn.
|
||||
send "/rewind"
|
||||
sleep 3
|
||||
|
||||
# The selector may or may not open depending on implementation.
|
||||
# If it opens, it should show exactly "1 turns" (only the /rewind itself).
|
||||
if capture_visible | grep -qF "Rewind Conversation"; then
|
||||
assert_screen "1 turns" || return 1
|
||||
# Close the selector with ESC
|
||||
send_keys Escape
|
||||
sleep 1
|
||||
fi
|
||||
|
||||
# Either way, after dismissing we should be back at the prompt
|
||||
wait_for_prompt 10 || return 1
|
||||
}
|
||||
|
||||
run_test "Test 4: /rewind with no prior conversation" test_rewind_no_history
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 5: After rewind, model ignores removed turns
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
test_rewind_context_isolation() {
|
||||
start_session
|
||||
|
||||
# First turn: give model a unique fact
|
||||
send "The secret code for this session is XRAY99. Just confirm you received it by saying OK."
|
||||
wait_idle || return 1
|
||||
|
||||
# Second turn: different content
|
||||
send "say exactly YANKEEZ and nothing else"
|
||||
wait_idle || return 1
|
||||
|
||||
# Rewind to remove the YANKEEZ turn
|
||||
send "/rewind"
|
||||
wait_for "Rewind Conversation" || return 1
|
||||
|
||||
# Select the most recent turn (YANKEEZ) and confirm
|
||||
send_keys Enter
|
||||
sleep 1
|
||||
send_keys y
|
||||
wait_for "Conversation rewound" || return 1
|
||||
|
||||
# Clear pre-populated input (Ctrl-U clears line in most terminals)
|
||||
send_keys C-u
|
||||
sleep 0.5
|
||||
|
||||
# Ask the model what it remembers
|
||||
send "What was the secret code I told you? Reply with just the code, nothing else."
|
||||
wait_idle || return 1
|
||||
|
||||
# Model should reference XRAY99 (surviving turn)
|
||||
assert_scrollback "XRAY99" || return 1
|
||||
}
|
||||
|
||||
run_test "Test 5: After rewind, model ignores removed turns" test_rewind_context_isolation
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Summary
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
echo ""
|
||||
echo -e "${BOLD}=== Results ===${RESET}"
|
||||
echo -e "${GREEN}Passed: ${PASS_COUNT}${RESET}"
|
||||
if [ "$FAIL_COUNT" -gt 0 ]; then
|
||||
echo -e "${RED}Failed: ${FAIL_COUNT}${RESET}"
|
||||
else
|
||||
echo -e "Failed: 0"
|
||||
fi
|
||||
|
||||
if [ "$FAIL_COUNT" -gt 0 ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e "${GREEN}All ${PASS_COUNT} tests passed.${RESET}"
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"generatedAt": "2026-04-24T09:44:54.528Z",
|
||||
"generatedAt": "2026-04-27T03:28:33.689Z",
|
||||
"keys": [
|
||||
" Models: Qwen latest models\n",
|
||||
" qwen auth qwen-oauth - Authenticate with Qwen OAuth (discontinued)",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue