qwen-code/scripts/test-rewind-e2e.sh

#!/usr/bin/env bash
# =============================================================================
# test-rewind-e2e.sh — tmux-based E2E verification for the conversation rewind
# feature (PR #3441).
#
# Covers all 5 manual test items from the PR description:
#   1. /rewind command → pick turn → UI truncated, input pre-populated
#   2. Double-ESC on empty prompt → selector opens → rewind → continue
#   3. ESC during streaming → cancels request, does NOT open selector
#   4. /rewind with no history → selector does not open
#   5. After rewind, model does not reference removed turns
#
# Prerequisites:
#   - tmux installed
#   - CLI already built:  npm run build && npm run bundle
#   - Valid model API credentials in environment
#
# Usage:
#   bash scripts/test-rewind-e2e.sh
# =============================================================================

set -uo pipefail

SESSION="test-rewind-$$"
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
BUNDLE="$PROJECT_DIR/dist/cli.js"
WORKDIR="$(mktemp -d)"
PASS_COUNT=0
FAIL_COUNT=0
TIMEOUT=${REWIND_TEST_TIMEOUT:-120}  # seconds per wait_for call

# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BOLD='\033[1m'
RESET='\033[0m'

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

cleanup() {
  tmux kill-session -t "$SESSION" 2>/dev/null || true
  rm -rf "$WORKDIR"
}
trap cleanup EXIT

start_session() {
  # Deliver ESC immediately — without this, tmux holds ESC for up to 500ms
  # thinking it might be the start of an escape sequence, which breaks
  # double-ESC detection and other ESC-dependent interactions.
  # Must be set as a server option (not session) in tmux 2.6+.
  tmux set-option -sg escape-time 0 2>/dev/null || true
  tmux new-session -d -s "$SESSION" -x 120 -y 40 \
    "cd '$WORKDIR' && node '$BUNDLE' --approval-mode yolo 2>'$WORKDIR/stderr.log'"
  wait_for_prompt 60
}

kill_session() {
  tmux kill-session -t "$SESSION" 2>/dev/null || true
  sleep 1
}

# Capture entire pane including scrollback (for content assertions)
capture() {
  tmux capture-pane -t "$SESSION" -p -S -200 2>/dev/null || true
}

# Capture only the visible pane (for prompt detection)
capture_visible() {
  tmux capture-pane -t "$SESSION" -p 2>/dev/null || true
}

send() {
  # Type text using literal mode then press Enter
  tmux send-keys -t "$SESSION" -l "$1"
  sleep 0.5
  tmux send-keys -t "$SESSION" Enter
}

send_keys() {
  tmux send-keys -t "$SESSION" "$@"
}

# Wait for "Type your message" to appear on the visible pane.
wait_for_prompt() {
  local timeout="${1:-$TIMEOUT}"
  local elapsed=0

  while [ $elapsed -lt "$timeout" ]; do
    if capture_visible | grep -qF "Type your message"; then
      return 0
    fi
    sleep 2
    elapsed=$((elapsed + 2))
  done
  echo -e "${RED}TIMEOUT waiting for prompt (Type your message)${RESET}" >&2
  echo "--- Visible pane ---" >&2
  capture_visible >&2
  echo "--- End ---" >&2
  return 1
}

# Wait for the CLI to be truly idle:
#   1. "Type your message" is visible (prompt ready)
#   2. No "esc to cancel" on screen (no btw/side-query running)
#   3. Screen content unchanged for 3 consecutive seconds
wait_idle() {
  local timeout="${1:-$TIMEOUT}"
  local elapsed=0
  local last_hash=""
  local stable_count=0

  while [ $elapsed -lt "$timeout" ]; do
    local screen
    screen=$(capture_visible)

    # Must have prompt visible
    if ! echo "$screen" | grep -qF "Type your message"; then
      stable_count=0
      last_hash=""
      sleep 2
      elapsed=$((elapsed + 2))
      continue
    fi

    # Must not have btw side-query running
    if echo "$screen" | grep -qF "esc to cancel"; then
      stable_count=0
      last_hash=""
      sleep 2
      elapsed=$((elapsed + 2))
      continue
    fi

    # Check screen stability
    local current
    current=$(echo "$screen" | md5sum | cut -d' ' -f1)
    if [ "$current" = "$last_hash" ]; then
      stable_count=$((stable_count + 1))
      if [ $stable_count -ge 3 ]; then
        return 0
      fi
    else
      last_hash="$current"
      stable_count=0
    fi
    sleep 1
    elapsed=$((elapsed + 1))
  done
  echo -e "${RED}TIMEOUT waiting for idle${RESET}" >&2
  echo "--- Visible pane ---" >&2
  capture_visible >&2
  echo "--- End ---" >&2
  return 1
}

# Wait for text to appear on the visible pane
wait_for() {
  local text="$1"
  local timeout="${2:-$TIMEOUT}"
  local elapsed=0
  while [ $elapsed -lt "$timeout" ]; do
    if capture_visible | grep -qF "$text"; then
      return 0
    fi
    sleep 2
    elapsed=$((elapsed + 2))
  done
  echo -e "${RED}TIMEOUT waiting for: ${text}${RESET}" >&2
  echo "--- Visible pane ---" >&2
  capture_visible >&2
  echo "--- End ---" >&2
  return 1
}

# Assert text IS on visible pane
assert_screen() {
  local text="$1"
  if capture_visible | grep -qF "$text"; then
    return 0
  fi
  echo -e "${RED}ASSERT FAILED: expected '${text}' on screen${RESET}" >&2
  echo "--- Visible pane ---" >&2
  capture_visible >&2
  echo "--- End ---" >&2
  return 1
}

# Assert text IS on full capture (including scrollback)
assert_scrollback() {
  local text="$1"
  if capture | grep -qF "$text"; then
    return 0
  fi
  echo -e "${RED}ASSERT FAILED: expected '${text}' in scrollback${RESET}" >&2
  return 1
}

# Assert text is NOT on visible pane
assert_no_screen() {
  local text="$1"
  if capture_visible | grep -qF "$text"; then
    echo -e "${RED}ASSERT FAILED: did NOT expect '${text}' on screen${RESET}" >&2
    echo "--- Visible pane ---" >&2
    capture_visible >&2
    echo "--- End ---" >&2
    return 1
  fi
  return 0
}

pass() {
  echo -e "${GREEN}[PASS]${RESET} $1"
  PASS_COUNT=$((PASS_COUNT + 1))
}

fail() {
  echo -e "${RED}[FAIL]${RESET} $1: $2"
  FAIL_COUNT=$((FAIL_COUNT + 1))
}

# Run a test function, capturing its exit code properly.
# Usage: run_test "Test Name" test_function_name
run_test() {
  local name="$1"
  local func="$2"
  local rc=0
  local errmsg=""

  errmsg=$($func 2>&1) || rc=$?

  if [ $rc -eq 0 ]; then
    pass "$name"
  else
    # Extract last meaningful error line from stderr
    local last_err
    last_err=$(echo "$errmsg" | grep -E 'TIMEOUT|ASSERT FAILED' | tail -1)
    fail "$name" "${last_err:-exit code $rc}"
    echo "$errmsg" | head -30
  fi

  # Always clean up the session between tests
  kill_session 2>/dev/null || true
}

# ---------------------------------------------------------------------------
# Pre-flight checks
# ---------------------------------------------------------------------------

if ! command -v tmux &>/dev/null; then
  echo -e "${RED}Error: tmux is not installed${RESET}" >&2
  exit 1
fi

if [ ! -f "$BUNDLE" ]; then
  echo -e "${YELLOW}Bundle not found at $BUNDLE, building...${RESET}"
  (cd "$PROJECT_DIR" && npm run build && npm run bundle)
fi

echo -e "${BOLD}=== Rewind Feature E2E Tests (tmux) ===${RESET}"
echo "Session: $SESSION"
echo "Workdir: $WORKDIR"
echo ""

# ---------------------------------------------------------------------------
# Test 1: /rewind command flow
# ---------------------------------------------------------------------------

test_rewind_command() {
  start_session

  # Build 3-turn conversation with unique markers
  send "say exactly ALPHA1 and nothing else"
  wait_idle || return 1

  send "say exactly BETA2 and nothing else"
  wait_idle || return 1

  send "say exactly GAMMA3 and nothing else"
  wait_idle || return 1

  # Open rewind selector via /rewind command
  send "/rewind"
  wait_for "Rewind Conversation" || return 1

  # Navigate up to select BETA2 turn (selector starts at last turn GAMMA3)
  send_keys Up
  sleep 0.5

  # Select the turn
  send_keys Enter
  sleep 1
  wait_for "confirm" 15 || return 1

  # Confirm rewind
  send_keys y
  wait_for "Conversation rewound" || return 1

  # After rewind: the input should be pre-populated with the selected turn's
  # text ("say exactly GAMMA3..."). The GAMMA3 *response* turn should be gone
  # from the conversation, but the text appears in the input bar — which is
  # the correct pre-population behavior.
  # Verify pre-population: the input bar should contain GAMMA3 text
  assert_screen "say exactly GAMMA3" || return 1
  # Verify the earlier turns (ALPHA1, BETA2) are still in conversation
  assert_scrollback "ALPHA1" || return 1
}

run_test "Test 1: /rewind command flow" test_rewind_command

# ---------------------------------------------------------------------------
# Test 2: Double-ESC opens selector
# ---------------------------------------------------------------------------

test_double_esc() {
  start_session

  send "say exactly DELTA4 and nothing else"
  wait_idle || return 1

  send "say exactly EPSILON5 and nothing else"
  wait_idle || return 1

  # Double-ESC to open rewind selector.
  # Complication: a btw side-question (prompt suggestion) may be active after
  # the model responds. If btwItem is non-null, the first ESC cancels the btw
  # (AppContainer.tsx:1896) and never reaches the rewind handler. We send
  # 3 ESCs with proper timing to handle both btw-present and btw-absent cases:
  #   ESC #1: cancels btw (if present), or starts rewind pending (if absent)
  #   sleep 1.5s: >800ms to reset any rewind pending from ESC #1
  #   ESC #2: starts rewind pending (btw now dismissed)
  #   sleep 0.3s: within 800ms window
  #   ESC #3: triggers rewind selector
  send_keys Escape
  sleep 1.5
  send_keys Escape
  sleep 0.5
  wait_for "Esc again to rewind" 15 || return 1

  # Third ESC within 800ms — should open selector
  send_keys Escape
  wait_for "Rewind Conversation" || return 1

  # Select last turn (pre-selected) & confirm
  send_keys Enter
  sleep 1
  send_keys y
  wait_for "Conversation rewound" || return 1

  # Continue conversation after rewind — verify model still works
  send "say exactly ZETA6 and nothing else"
  wait_idle || return 1
  assert_scrollback "ZETA6" || return 1
}

run_test "Test 2: Double-ESC opens selector" test_double_esc

# ---------------------------------------------------------------------------
# Test 3: ESC during streaming cancels (no rewind)
# ---------------------------------------------------------------------------

test_esc_during_streaming() {
  start_session

  # Send a prompt that will generate a long response
  send "write a detailed 500 word essay about the history of computing from 1940 to 2000"

  # Wait for streaming to start (prompt disappears)
  sleep 4

  # Single ESC while streaming — should cancel, NOT open rewind
  send_keys Escape

  # Verify rewind selector did NOT open
  sleep 3
  assert_no_screen "Rewind Conversation" || return 1

  # Should eventually return to idle
  wait_idle || return 1
}

run_test "Test 3: ESC during streaming cancels (no rewind)" test_esc_during_streaming

# ---------------------------------------------------------------------------
# Test 4: /rewind with no prior conversation
# ---------------------------------------------------------------------------

test_rewind_no_history() {
  start_session

  # Immediately try /rewind with no conversation history.
  # The /rewind text itself gets recorded as a user turn before the slash
  # command handler runs, so the guard (≥1 user turn) passes and the
  # selector opens showing only the "/rewind" entry — which is not a
  # meaningful rewindable turn. We verify the selector has only 1 turn.
  send "/rewind"
  sleep 3

  # The selector may or may not open depending on implementation.
  # If it opens, it should show exactly "1 turns" (only the /rewind itself).
  if capture_visible | grep -qF "Rewind Conversation"; then
    assert_screen "1 turns" || return 1
    # Close the selector with ESC
    send_keys Escape
    sleep 1
  fi

  # Either way, after dismissing we should be back at the prompt
  wait_for_prompt 10 || return 1
}

run_test "Test 4: /rewind with no prior conversation" test_rewind_no_history

# ---------------------------------------------------------------------------
# Test 5: After rewind, model ignores removed turns
# ---------------------------------------------------------------------------

test_rewind_context_isolation() {
  start_session

  # First turn: give model a unique fact
  send "The secret code for this session is XRAY99. Just confirm you received it by saying OK."
  wait_idle || return 1

  # Second turn: different content
  send "say exactly YANKEEZ and nothing else"
  wait_idle || return 1

  # Rewind to remove the YANKEEZ turn
  send "/rewind"
  wait_for "Rewind Conversation" || return 1

  # Select the most recent turn (YANKEEZ) and confirm
  send_keys Enter
  sleep 1
  send_keys y
  wait_for "Conversation rewound" || return 1

  # Clear pre-populated input (Ctrl-U clears line in most terminals)
  send_keys C-u
  sleep 0.5

  # Ask the model what it remembers
  send "What was the secret code I told you? Reply with just the code, nothing else."
  wait_idle || return 1

  # Model should reference XRAY99 (surviving turn)
  assert_scrollback "XRAY99" || return 1
}

run_test "Test 5: After rewind, model ignores removed turns" test_rewind_context_isolation

# ---------------------------------------------------------------------------
# Summary
# ---------------------------------------------------------------------------

echo ""
echo -e "${BOLD}=== Results ===${RESET}"
echo -e "${GREEN}Passed: ${PASS_COUNT}${RESET}"
if [ "$FAIL_COUNT" -gt 0 ]; then
  echo -e "${RED}Failed: ${FAIL_COUNT}${RESET}"
else
  echo -e "Failed: 0"
fi

if [ "$FAIL_COUNT" -gt 0 ]; then
  exit 1
fi

echo -e "${GREEN}All ${PASS_COUNT} tests passed.${RESET}"