feat(sse): decouple SSE to mcp.pi.ruv.io proxy + Claude Code source research

SSE Proxy Decoupling (ADR-130): - Fix ruvbrain-sse proxy: proper MCP handshake, session creation, drain polling - Fix internal queue endpoints: session_create keeps receiver, drain returns buffered messages - Add response_queues to AppState for SSE proxy communication - Skip sparsifier for >5M edge graphs (was crashing on 16M edges) - Add SSE_DISABLED/MAX_SSE env vars for configurable connection limits - Route SSE to dedicated mcp.pi.ruv.io subdomain (Cloudflare CNAME) - Serve SSE at root / path on proxy (no /sse needed) - Update all references from pi.ruv.io/sse to mcp.pi.ruv.io - Fix Dockerfile consciousness crate build (feature/version mismatches) Claude Code CLI Source Research (ADR-133): - 19 research documents analyzing Claude Code internals (3000+ lines) - Decompiler script + RVF corpus builder for all major versions - Binary RVF containers for v0.2, v1.0, v2.0, v2.1 (300-2068 vectors each) - Call graphs, class hierarchies, state machines from minified source Integration Strategy (ADR-134): - 6-tier integration plan: WASM MCP, agents, hooks, cache, SDK, plugin - Integration guide with architecture diagrams and performance targets Co-Authored-By: claude-flow <ruv@ruv.net>
2026-07-10 01:38:44 +00:00 · 2026-04-02 23:39:56 +00:00 · 2026-04-02 23:39:56 +00:00 · 930fca916f
commit 930fca916f
parent 3569b697c1
103 changed files with 50257 additions and 78 deletions
--- a/scripts/claude-code-decompile.sh
+++ b/scripts/claude-code-decompile.sh
@ -0,0 +1,267 @@
+#!/usr/bin/env bash
+# claude-code-decompile.sh - Extract and analyze Claude Code CLI source
+#
+# Extracts the bundled JavaScript from the Claude Code binary or npm package,
+# applies basic beautification, and splits into logical modules.
+#
+# Usage: ./scripts/claude-code-decompile.sh [output-dir]
+#
+# Output directory defaults to ./claude-code-extracted/
+
+set -euo pipefail
+
+OUTPUT_DIR="${1:-./claude-code-extracted}"
+BINARY=""
+CLI_JS=""
+
+# Color output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+
+log() { echo -e "${GREEN}[+]${NC} $*"; }
+warn() { echo -e "${YELLOW}[!]${NC} $*"; }
+err() { echo -e "${RED}[-]${NC} $*" >&2; }
+
+# Find the Claude Code source
+find_source() {
+    # Method 1: NPM package (preferred - cleaner JS)
+    local npm_paths=(
+        "$(npm root -g 2>/dev/null)/claude-flow/node_modules/@anthropic-ai/claude-code/cli.js"
+        "$(npm root -g 2>/dev/null)/@anthropic-ai/claude-code/cli.js"
+        "./node_modules/@anthropic-ai/claude-code/cli.js"
+    )
+    for p in "${npm_paths[@]}"; do
+        if [[ -f "$p" ]]; then
+            CLI_JS="$p"
+            log "Found NPM package: $CLI_JS"
+            return 0
+        fi
+    done
+
+    # Method 2: Bun SEA binary
+    local bin_paths=(
+        "$HOME/.local/bin/claude"
+        "$HOME/.local/share/claude/versions/"
+        "/usr/local/bin/claude"
+    )
+    for p in "${bin_paths[@]}"; do
+        if [[ -f "$p" ]]; then
+            BINARY="$(readlink -f "$p" 2>/dev/null || echo "$p")"
+            log "Found binary: $BINARY"
+            return 0
+        elif [[ -d "$p" ]]; then
+            BINARY="$(ls -t "$p"* 2>/dev/null | head -1)"
+            if [[ -n "$BINARY" ]]; then
+                log "Found binary: $BINARY"
+                return 0
+            fi
+        fi
+    done
+
+    err "Could not find Claude Code binary or npm package"
+    echo "Install via: npm install -g @anthropic-ai/claude-code"
+    echo "Or ensure claude is installed: claude --version"
+    return 1
+}
+
+# Extract JS from Bun SEA binary using strings
+extract_from_binary() {
+    local binary="$1"
+    local output="$2"
+
+    log "Extracting strings from binary ($(du -h "$binary" | cut -f1))..."
+    strings "$binary" > "${output}/raw-strings.txt"
+
+    local total_lines
+    total_lines=$(wc -l < "${output}/raw-strings.txt")
+    log "Extracted $total_lines string fragments"
+
+    # Extract JS-like patterns
+    log "Filtering JavaScript patterns..."
+    grep -E '(function\s|class\s|=>\s*\{|export\s|import\s|require\(|async\s|await\s|const\s|let\s|var\s)' \
+        "${output}/raw-strings.txt" > "${output}/js-fragments.txt" 2>/dev/null || true
+
+    local js_lines
+    js_lines=$(wc -l < "${output}/js-fragments.txt")
+    log "Found $js_lines JS-like fragments"
+}
+
+# Process the cli.js bundle
+process_bundle() {
+    local source="$1"
+    local output="$2"
+
+    log "Processing bundle: $(du -h "$source" | cut -f1)"
+
+    # Copy original
+    cp "$source" "${output}/cli.js.original"
+
+    # Basic beautification: add newlines at statement boundaries
+    log "Beautifying (adding newlines at statement boundaries)..."
+    sed 's/;/;\n/g' "$source" | \
+    sed 's/{/{\n/g' | \
+    sed 's/}/}\n/g' > "${output}/cli.beautified.js"
+
+    local beautified_lines
+    beautified_lines=$(wc -l < "${output}/cli.beautified.js")
+    log "Beautified: $beautified_lines lines"
+
+    # Extract metrics
+    log "Computing code metrics..."
+    {
+        echo "=== Claude Code Source Metrics ==="
+        echo "Date: $(date -Iseconds)"
+        echo "Source: $source"
+        echo "Original size: $(wc -c < "$source") bytes"
+        echo "Original lines: $(wc -l < "$source")"
+        echo "Beautified lines: $beautified_lines"
+        echo ""
+        echo "--- Counts ---"
+        echo "Functions: $(grep -oP 'function\s*\w*\s*\(' "$source" | wc -l)"
+        echo "Async functions: $(grep -oP 'async\s+function' "$source" | wc -l)"
+        echo "Arrow functions: $(grep -oP '=>' "$source" | wc -l)"
+        echo "Classes: $(grep -oP 'class \w+' "$source" | wc -l)"
+        echo "Extends: $(grep -oP 'extends \w+' "$source" | wc -l)"
+        echo "For-await loops: $(grep -c 'for await' "$source")"
+        echo "Yield statements: $(grep -c 'yield' "$source")"
+        echo ""
+        echo "--- Node.js Imports ---"
+        grep -oP 'from"[^"]*"' "$source" | sort -u | grep -P 'from"(node:|assert|child_process|crypto|events|fs|http|https|module|net|os|path|process|stream|tty|url|util|zlib)'
+        echo ""
+        echo "--- Class Definitions ---"
+        grep -oP 'class \w+( extends \w+)?' "$source" | sort -u
+    } > "${output}/metrics.txt"
+
+    log "Metrics saved to ${output}/metrics.txt"
+}
+
+# Split into logical modules based on patterns
+split_modules() {
+    local source="$1"
+    local output="$2"
+    local modules_dir="${output}/modules"
+    mkdir -p "$modules_dir"
+
+    log "Splitting into logical modules..."
+
+    # Extract tool-related code
+    grep -oP '.{0,200}(BashTool|FileReadTool|FileEditTool|FileWriteTool|AgentOutputTool|WebFetch|WebSearch|TodoWrite|NotebookEdit|GlobTool|GrepTool).{0,200}' \
+        "$source" > "${modules_dir}/tools.txt" 2>/dev/null || true
+
+    # Extract permission-related code
+    grep -oP '.{0,200}(permission|Permission|canUseTool|alwaysAllowRules|denyWrite|sandbox|Sandbox).{0,200}' \
+        "$source" > "${modules_dir}/permissions.txt" 2>/dev/null || true
+
+    # Extract MCP-related code
+    grep -oP '.{0,200}(mcp__|McpClient|McpServer|McpError|callTool|listTools|initialize).{0,200}' \
+        "$source" > "${modules_dir}/mcp.txt" 2>/dev/null || true
+
+    # Extract streaming-related code
+    grep -oP '.{0,200}(content_block_delta|message_start|message_stop|message_delta|content_block_start|content_block_stop|stream_event|text_delta|input_json_delta).{0,200}' \
+        "$source" > "${modules_dir}/streaming.txt" 2>/dev/null || true
+
+    # Extract context/compaction code
+    grep -oP '.{0,200}(compact|compaction|tengu_compact|microcompact|auto_compact|compact_boundary|preCompactTokenCount|postCompactTokenCount).{0,200}' \
+        "$source" > "${modules_dir}/compaction.txt" 2>/dev/null || true
+
+    # Extract agent loop code
+    grep -oP '.{0,200}(agentLoop|mainLoop|s\$\(|querySource|toolUseContext|systemPrompt).{0,200}' \
+        "$source" > "${modules_dir}/agent-loop.txt" 2>/dev/null || true
+
+    # Extract telemetry events
+    grep -oP '"tengu_[^"]*"' "$source" | sort -u > "${modules_dir}/telemetry-events.txt" 2>/dev/null || true
+
+    # Extract string constants (tool names, commands, etc.)
+    grep -oP 'name:"[a-z][-a-z]*",description:"[^"]*"' "$source" | sort -u > "${modules_dir}/commands.txt" 2>/dev/null || true
+
+    # Extract class hierarchy
+    grep -oP 'class \w+ extends \w+' "$source" | sort -u > "${modules_dir}/class-hierarchy.txt" 2>/dev/null || true
+
+    # Count extracted lines per module
+    for f in "${modules_dir}"/*.txt; do
+        local name
+        name=$(basename "$f" .txt)
+        local lines
+        lines=$(wc -l < "$f")
+        log "  Module '$name': $lines fragments"
+    done
+}
+
+# Generate RVF files from extracted modules
+generate_rvf() {
+    local modules_dir="$1/modules"
+    local rvf_dir="$1/rvf"
+    mkdir -p "$rvf_dir"
+
+    log "Generating RVF files..."
+
+    local version
+    version=$(grep -oP 'VERSION:"[^"]*"' "$modules_dir/../cli.js.original" 2>/dev/null | head -1 | grep -oP '\d+\.\d+\.\d+' || echo "unknown")
+
+    for f in "${modules_dir}"/*.txt; do
+        local name
+        name=$(basename "$f" .txt)
+        local rvf_file="${rvf_dir}/${name}.rvf"
+        {
+            echo "---"
+            echo "type: source-extraction"
+            echo "module: ${name}"
+            echo "binary: claude-code"
+            echo "version: ${version}"
+            echo "extraction-method: strings+pattern-match"
+            echo "confidence: medium"
+            echo "fragments: $(wc -l < "$f")"
+            echo "---"
+            echo ""
+            echo "# ${name} - Extracted Fragments"
+            echo ""
+            echo '```javascript'
+            cat "$f"
+            echo '```'
+        } > "$rvf_file"
+        log "  Created ${rvf_file}"
+    done
+}
+
+# Main
+main() {
+    log "Claude Code Decompiler"
+    log "======================"
+
+    mkdir -p "$OUTPUT_DIR"
+
+    find_source
+
+    if [[ -n "$CLI_JS" ]]; then
+        process_bundle "$CLI_JS" "$OUTPUT_DIR"
+        split_modules "$CLI_JS" "$OUTPUT_DIR"
+        generate_rvf "$OUTPUT_DIR"
+    elif [[ -n "$BINARY" ]]; then
+        extract_from_binary "$BINARY" "$OUTPUT_DIR"
+        # If we got enough JS, process it
+        if [[ -f "${OUTPUT_DIR}/js-fragments.txt" ]]; then
+            split_modules "${OUTPUT_DIR}/js-fragments.txt" "$OUTPUT_DIR"
+            generate_rvf "$OUTPUT_DIR"
+        fi
+    fi
+
+    log ""
+    log "Extraction complete!"
+    log "Output directory: $OUTPUT_DIR"
+    log ""
+    log "Key files:"
+    log "  metrics.txt        - Code metrics and counts"
+    log "  cli.beautified.js  - Beautified bundle (if from NPM)"
+    log "  modules/           - Split by logical module"
+    log "  rvf/               - RVF files with metadata headers"
+
+    # Summary
+    if [[ -f "${OUTPUT_DIR}/metrics.txt" ]]; then
+        echo ""
+        head -15 "${OUTPUT_DIR}/metrics.txt"
+    fi
+}
+
+main "$@"
--- a/scripts/claude-code-rvf-corpus.sh
+++ b/scripts/claude-code-rvf-corpus.sh
@ -0,0 +1,455 @@
+#!/usr/bin/env bash
+# claude-code-rvf-corpus.sh - Build binary RVF containers for every major
+# Claude Code CLI release.
+#
+# Downloads the latest patch of each major.minor series from npm, extracts
+# the CLI bundle, splits into modules, and creates a binary RVF container
+# with vector embeddings and witness chains.
+#
+# Usage:
+#   ./scripts/claude-code-rvf-corpus.sh [--dry-run] [--series 0.2,1.0,2.0,2.1]
+#
+# Output: docs/research/claude-code-rvsource/versions/<vX.Y.z>/
+#   - claude-code-vX.Y.rvf          Binary RVF container
+#   - claude-code-vX.Y.rvf.manifest.json  Container manifest
+#   - source/                        Extracted JS modules
+#   - README.md                      Version metadata
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+OUTPUT_BASE="${ROOT_DIR}/docs/research/claude-code-rvsource/versions"
+TMP_DIR="/tmp/cc-rvf-corpus-$$"
+DRY_RUN=false
+FILTER_SERIES=""
+
+# Colors
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+CYAN='\033[0;36m'
+BOLD='\033[1m'
+NC='\033[0m'
+
+log()  { echo -e "${GREEN}[+]${NC} $*"; }
+info() { echo -e "${CYAN}[*]${NC} $*"; }
+warn() { echo -e "${YELLOW}[!]${NC} $*"; }
+err()  { echo -e "${RED}[-]${NC} $*" >&2; }
+
+cleanup() {
+    rm -rf "$TMP_DIR"
+}
+trap cleanup EXIT
+
+# Parse arguments
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --dry-run)  DRY_RUN=true; shift ;;
+        --series)   FILTER_SERIES="$2"; shift 2 ;;
+        --help|-h)
+            echo "Usage: $0 [--dry-run] [--series 0.2,1.0,2.0,2.1]"
+            exit 0
+            ;;
+        *) err "Unknown argument: $1"; exit 1 ;;
+    esac
+done
+
+# Fetch all versions from npm and group by major.minor
+get_version_groups() {
+    log "Fetching Claude Code versions from npm..." >&2
+    local versions_json
+    versions_json=$(npm view @anthropic-ai/claude-code versions --json 2>/dev/null)
+
+    # Use node to group versions and pick latest patch per major.minor
+    node -e "
+const versions = $versions_json;
+const groups = {};
+
+for (const v of versions) {
+    const parts = v.split('.');
+    const key = parts[0] + '.' + parts[1];
+    const patch = parseInt(parts[2], 10);
+
+    if (!groups[key] || patch > groups[key].patch) {
+        groups[key] = { version: v, patch, key };
+    }
+}
+
+// Sort by semver
+const sorted = Object.values(groups).sort((a, b) => {
+    const [aMaj, aMin] = a.key.split('.').map(Number);
+    const [bMaj, bMin] = b.key.split('.').map(Number);
+    return aMaj !== bMaj ? aMaj - bMaj : aMin - bMin;
+});
+
+for (const g of sorted) {
+    console.log(g.key + ' ' + g.version);
+}
+"
+}
+
+# Download and extract a specific version
+download_version() {
+    local version="$1"
+    local dest_dir="$2"
+
+    mkdir -p "$dest_dir"
+    info "  Downloading @anthropic-ai/claude-code@${version}..."
+
+    local tgz_dir="${TMP_DIR}/tarballs"
+    mkdir -p "$tgz_dir"
+
+    npm pack "@anthropic-ai/claude-code@${version}" --pack-destination "$tgz_dir" \
+        >/dev/null 2>&1
+
+    # Find the tarball (naming varies between npm versions)
+    local tgz
+    tgz=$(ls "$tgz_dir"/anthropic-ai-claude-code-*.tgz 2>/dev/null | head -1)
+    if [[ -z "$tgz" ]]; then
+        err "  Failed to download version ${version}"
+        return 1
+    fi
+
+    # Try to extract cli.js, then cli.mjs (don't list the tarball, just try)
+    tar xf "$tgz" -C "$dest_dir" --strip-components=1 package/cli.js 2>/dev/null || true
+    tar xf "$tgz" -C "$dest_dir" --strip-components=1 package/cli.mjs 2>/dev/null || true
+    tar xf "$tgz" -C "$dest_dir" --strip-components=1 package/package.json 2>/dev/null || true
+
+    # Rename cli.mjs -> cli.js for consistency
+    if [[ -f "${dest_dir}/cli.mjs" ]] && [[ ! -f "${dest_dir}/cli.js" ]]; then
+        mv "${dest_dir}/cli.mjs" "${dest_dir}/cli.js"
+    fi
+
+    if [[ ! -f "${dest_dir}/cli.js" ]]; then
+        warn "  No cli.js or cli.mjs found in ${version}"
+        return 1
+    fi
+
+    rm -f "$tgz"
+    local size
+    size=$(du -sh "${dest_dir}/cli.js" 2>/dev/null | cut -f1)
+    info "  Extracted cli.js (${size})"
+    return 0
+}
+
+# Split a CLI bundle into modules
+split_modules() {
+    local cli_path="$1"
+    local source_dir="$2"
+
+    info "  Splitting into modules..."
+    node "${SCRIPT_DIR}/lib/module-splitter.mjs" "$cli_path" "$source_dir" 2>/dev/null
+}
+
+# Build a binary RVF container
+build_rvf() {
+    local source_dir="$1"
+    local rvf_path="$2"
+    local version="$3"
+    local series="$4"
+
+    info "  Building binary RVF container..."
+    node "${SCRIPT_DIR}/lib/rvf-builder.mjs" \
+        "$source_dir" "$rvf_path" \
+        --meta "version=${version}" \
+        --meta "series=${series}" \
+        --meta "package=@anthropic-ai/claude-code" \
+        --meta "corpus=claude-code-rvsource" \
+        2>/dev/null
+}
+
+# Generate a README for a version directory
+generate_readme() {
+    local ver_dir="$1"
+    local series="$2"
+    local version="$3"
+    local rvf_file="$4"
+
+    local metrics_file="${ver_dir}/source/metrics.json"
+    local manifest_file="${rvf_file}.manifest.json"
+
+    # Read metrics
+    local bundle_size="unknown"
+    local classes="?"
+    local functions="?"
+    local modules_count="?"
+
+    if [[ -f "$metrics_file" ]]; then
+        bundle_size=$(node -e "const m=JSON.parse(require('fs').readFileSync('$metrics_file','utf-8')); console.log((m.sizeBytes/1024/1024).toFixed(1)+'MB')")
+        classes=$(node -e "const m=JSON.parse(require('fs').readFileSync('$metrics_file','utf-8')); console.log(m.classes)")
+        functions=$(node -e "const m=JSON.parse(require('fs').readFileSync('$metrics_file','utf-8')); console.log(m.functions)")
+        modules_count=$(node -e "const m=JSON.parse(require('fs').readFileSync('$metrics_file','utf-8')); console.log(Object.keys(m.modules||{}).length)")
+    fi
+
+    local rvf_size="N/A"
+    local rvf_vectors="N/A"
+    local rvf_id="N/A"
+    if [[ -f "$manifest_file" ]]; then
+        rvf_size=$(node -e "const m=JSON.parse(require('fs').readFileSync('$manifest_file','utf-8')); console.log((m.fileSizeBytes/1024).toFixed(1)+'KB')")
+        rvf_vectors=$(node -e "const m=JSON.parse(require('fs').readFileSync('$manifest_file','utf-8')); console.log(m.totalVectors)")
+        rvf_id=$(node -e "const m=JSON.parse(require('fs').readFileSync('$manifest_file','utf-8')); console.log(m.fileId)")
+    fi
+
+    cat > "${ver_dir}/README.md" <<READMEEOF
+# Claude Code v${version} (${series} series)
+
+## Binary RVF Container
+
+| Property | Value |
+|----------|-------|
+| Version | ${version} |
+| Series | ${series} |
+| Bundle size | ${bundle_size} |
+| RVF size | ${rvf_size} |
+| Vectors | ${rvf_vectors} |
+| RVF File ID | \`${rvf_id}\` |
+| Classes | ${classes} |
+| Functions | ${functions} |
+| Modules | ${modules_count} |
+| Extracted | $(date -Iseconds) |
+
+## Files
+
+- \`claude-code-v${series}.rvf\` - Binary RVF container with HNSW index + witness chain
+- \`claude-code-v${series}.rvf.manifest.json\` - Container manifest (vector ID map, metadata)
+- \`source/\` - Extracted JavaScript module fragments
+
+## RVF Container Details
+
+The \`.rvf\` file is a real binary container created with the \`@ruvector/rvf-node\`
+native backend. It contains:
+
+- **128-dimensional fingerprint vectors** for each code fragment
+- **HNSW index** (M=16, ef_construction=200) for fast similarity search
+- **Cosine distance** metric
+- **Witness chain** for provenance verification
+
+To query this container:
+
+\`\`\`typescript
+import { RvfDatabase } from '@ruvector/rvf';
+
+const db = await RvfDatabase.openReadonly('./claude-code-v${series}.rvf');
+const results = await db.query(queryVector, 10);
+await db.close();
+\`\`\`
+READMEEOF
+}
+
+# Generate the top-level index README
+generate_index() {
+    local base_dir="$1"
+    shift
+    local entries=("$@")
+
+    cat > "${base_dir}/README.md" <<'INDEXHEADER'
+# Claude Code RVF Corpus
+
+Binary RVF containers for every major Claude Code CLI release, with
+HNSW-indexed vector embeddings and witness chains for provenance.
+
+## Versions
+
+| Series | Version | Bundle | RVF Size | Vectors | File ID |
+|--------|---------|--------|----------|---------|---------|
+INDEXHEADER
+
+    for entry in "${entries[@]}"; do
+        echo "$entry" >> "${base_dir}/README.md"
+    done
+
+    cat >> "${base_dir}/README.md" <<'INDEXFOOTER'
+
+## How to Use
+
+```bash
+# Build the corpus
+./scripts/claude-code-rvf-corpus.sh
+
+# Build only specific series
+./scripts/claude-code-rvf-corpus.sh --series 2.0,2.1
+```
+
+## Format
+
+Each version directory contains:
+- A binary `.rvf` container (128-dim cosine-distance HNSW index)
+- A `.manifest.json` sidecar with vector-to-fragment mapping
+- Extracted JavaScript modules in `source/`
+
+Generated by `scripts/claude-code-rvf-corpus.sh` using `@ruvector/rvf-node`.
+INDEXFOOTER
+}
+
+# -----------------------------------------------------------------------
+# Main
+# -----------------------------------------------------------------------
+
+main() {
+    echo -e "${BOLD}Claude Code RVF Corpus Builder${NC}"
+    echo -e "${BOLD}==============================${NC}"
+    echo ""
+
+    mkdir -p "$TMP_DIR" "$OUTPUT_BASE"
+
+    # Get version groups
+    local groups
+    groups=$(get_version_groups)
+
+    if [[ -z "$groups" ]]; then
+        err "No versions found on npm"
+        exit 1
+    fi
+
+    local total_groups
+    total_groups=$(echo "$groups" | wc -l)
+    log "Found ${total_groups} major.minor series"
+
+    # Apply filter if specified
+    if [[ -n "$FILTER_SERIES" ]]; then
+        local filtered=""
+        IFS=',' read -ra FILTER_ARRAY <<< "$FILTER_SERIES"
+        while IFS= read -r line; do
+            local series
+            series=$(echo "$line" | awk '{print $1}')
+            for f in "${FILTER_ARRAY[@]}"; do
+                if [[ "$series" == "$f" ]]; then
+                    filtered+="${line}"$'\n'
+                fi
+            done
+        done <<< "$groups"
+        groups="$filtered"
+        total_groups=$(echo -n "$groups" | grep -c '^' || echo 0)
+        log "Filtered to ${total_groups} series: ${FILTER_SERIES}"
+    fi
+
+    if $DRY_RUN; then
+        warn "DRY RUN - would process these versions:"
+        echo "$groups" | while IFS= read -r line; do
+            [[ -z "$line" ]] && continue
+            local series version
+            series=$(echo "$line" | awk '{print $1}')
+            version=$(echo "$line" | awk '{print $2}')
+            echo "  v${series}.x -> ${version}"
+        done
+        exit 0
+    fi
+
+    local processed=0
+    local failed=0
+
+    while IFS= read -r line; do
+        [[ -z "$line" ]] && continue
+
+        local series version
+        series=$(echo "$line" | awk '{print $1}')
+        version=$(echo "$line" | awk '{print $2}')
+
+        echo ""
+        log "Processing v${series}.x (latest: ${version})"
+
+        local ver_dir="${OUTPUT_BASE}/v${series}.x"
+        local source_dir="${ver_dir}/source"
+        local rvf_file="${ver_dir}/claude-code-v${series}.rvf"
+        local extract_dir="${TMP_DIR}/extract-${version}"
+
+        mkdir -p "$ver_dir" "$source_dir"
+
+        # Step 1: Download
+        if ! download_version "$version" "$extract_dir"; then
+            warn "  Skipping ${version} (download failed)"
+            ((failed++)) || true
+            continue
+        fi
+
+        local cli_path="${extract_dir}/cli.js"
+        if [[ ! -f "$cli_path" ]]; then
+            warn "  No CLI bundle found for ${version}"
+            ((failed++)) || true
+            continue
+        fi
+
+        # Step 2: Split into modules
+        if ! split_modules "$cli_path" "$source_dir"; then
+            warn "  Module splitting failed for ${version}"
+        fi
+
+        # Step 3: Build binary RVF container
+        if build_rvf "$source_dir" "$rvf_file" "$version" "$series"; then
+            log "  RVF container created: $(basename "$rvf_file")"
+        else
+            warn "  RVF creation failed for ${version}"
+            # Create a fallback TODO note
+            cat > "${ver_dir}/TODO-rvf.md" <<EOF
+# TODO: Create RVF Container
+
+Version: ${version}
+Series: v${series}.x
+Error: RVF binary creation failed
+
+The source modules have been extracted to \`source/\` but the binary
+RVF container could not be created. This typically means the
+\`@ruvector/rvf-node\` native backend is not available.
+
+To create the container manually:
+
+\`\`\`bash
+node scripts/lib/rvf-builder.mjs source/ claude-code-v${series}.rvf \\
+  --meta version=${version} --meta series=${series}
+\`\`\`
+EOF
+        fi
+
+        # Step 4: Generate README
+        generate_readme "$ver_dir" "$series" "$version" "$rvf_file"
+
+        # Clean up extracted tarball content
+        rm -rf "$extract_dir"
+
+        ((processed++)) || true
+        log "  Done (${processed}/${total_groups})"
+    done <<< "$groups"
+
+    # Generate index
+    echo ""
+    log "Generating corpus index..."
+
+    # Rebuild index entries by scanning output dirs
+    local final_entries=()
+    for d in "${OUTPUT_BASE}"/v*.x; do
+        [[ -d "$d" ]] || continue
+        local dir_name
+        dir_name=$(basename "$d")
+        local series_name="${dir_name#v}"
+        series_name="${series_name%.x}"
+
+        local manifest="${d}/claude-code-v${series_name}.rvf.manifest.json"
+        if [[ -f "$manifest" ]]; then
+            local row
+            row=$(node -e "
+const m=JSON.parse(require('fs').readFileSync('$manifest','utf-8'));
+const s=m.source||{};
+const met=s.metrics||{};
+const bundle=(met.bundleSizeBytes/1024/1024).toFixed(1)+'MB';
+const rvfSize=(m.fileSizeBytes/1024).toFixed(1)+'KB';
+console.log('| ${series_name} | '+s.version+' | '+bundle+' | '+rvfSize+' | '+m.totalVectors+' | \`'+m.fileId.slice(0,12)+'...\` |');
+" 2>/dev/null || echo "| ${series_name} | ? | ? | ? | ? | ? |")
+            final_entries+=("$row")
+        else
+            final_entries+=("| ${series_name} | ? | ? | N/A | N/A | N/A |")
+        fi
+    done
+
+    generate_index "$OUTPUT_BASE" "${final_entries[@]}"
+
+    echo ""
+    echo -e "${BOLD}Corpus build complete.${NC}"
+    log "Output: ${OUTPUT_BASE}/"
+    log "Versions processed: ${processed:-0}"
+    if [[ ${failed:-0} -gt 0 ]]; then
+        warn "Versions failed: ${failed}"
+    fi
+}
+
+main "$@"
--- a/scripts/deploy-dragnes.sh
+++ b/scripts/deploy-dragnes.sh
@ -96,7 +96,7 @@ gcloud run deploy "${SERVICE_NAME}" \
  --set-env-vars="DRAGNES_BRAIN_URL=https://pi.ruv.io" \
  --set-env-vars="DRAGNES_MODEL_VERSION=0.1.0" \
  --update-secrets="OPENAI_API_KEY=OPENROUTER_API_KEY:latest" \
-  --set-env-vars='MCP_SERVERS=[{"name":"pi-brain","url":"https://pi.ruv.io/sse"}]'
+  --set-env-vars='MCP_SERVERS=[{"name":"pi-brain","url":"https://mcp.pi.ruv.io"}]'

 # ---------- CDN for WASM assets -----------------------------------------------

--- a/scripts/lib/module-splitter.mjs
+++ b/scripts/lib/module-splitter.mjs
@ -0,0 +1,211 @@
+#!/usr/bin/env node
+/**
+ * module-splitter.mjs - Split a Claude Code CLI bundle into logical modules.
+ *
+ * Given a path to cli.js / cli.mjs, extracts recognizable subsystems
+ * (tools, MCP, permissions, streaming, agent-loop, compaction, telemetry)
+ * and writes individual .js files plus a metrics.json manifest.
+ *
+ * Usage:
+ *   node scripts/lib/module-splitter.mjs <cli-bundle> <output-dir>
+ */
+
+import { readFileSync, writeFileSync, mkdirSync, statSync } from 'fs';
+import { join, basename } from 'path';
+
+// Module extraction: keyword -> module name.
+// A line containing the keyword is assigned to that module.
+// Order matters: first match wins for each line.
+const MODULE_KEYWORDS = {
+  'tool-dispatch': [
+    'BashTool', 'FileReadTool', 'FileEditTool', 'FileWriteTool',
+    'AgentOutputTool', 'WebFetch', 'WebSearch', 'TodoWrite',
+    'NotebookEdit', 'GlobTool', 'GrepTool',
+  ],
+  'permission-system': [
+    'canUseTool', 'alwaysAllowRules', 'denyWrite',
+    'Permission', 'permission',
+  ],
+  'mcp-client': [
+    'mcp__', 'McpClient', 'McpServer', 'McpError',
+    'callTool', 'listTools',
+  ],
+  'streaming-handler': [
+    'content_block_delta', 'message_start', 'message_stop',
+    'message_delta', 'content_block_start', 'content_block_stop',
+    'stream_event', 'text_delta', 'input_json_delta',
+  ],
+  'context-manager': [
+    'tengu_compact', 'microcompact', 'auto_compact',
+    'compact_boundary', 'preCompactTokenCount',
+    'postCompactTokenCount', 'compaction',
+  ],
+  'agent-loop': [
+    'agentLoop', 'mainLoop', 'querySource',
+    'toolUseContext', 'systemPrompt',
+  ],
+};
+
+// Simple global regex patterns for small, fast extractions.
+const SIMPLE_PATTERNS = {
+  telemetry: /"tengu_[^"]*"/g,
+  commands: /name:"[a-z][-a-z]*",description:"[^"]*"/g,
+  'class-hierarchy': /class \w+( extends \w+)?/g,
+};
+
+/**
+ * Split source into statements (semicolon-delimited chunks).
+ * For minified bundles, this gives us logical units.
+ */
+function splitStatements(source) {
+  // Split on semicolons that are not inside strings.
+  // For minified JS, simple semicolon split works well enough.
+  // Limit chunk size to ~2KB for vector embedding granularity.
+  const MAX_CHUNK = 2048;
+  const raw = source.split(';');
+  const chunks = [];
+  let buffer = '';
+
+  for (const part of raw) {
+    if (buffer.length + part.length > MAX_CHUNK && buffer.length > 0) {
+      chunks.push(buffer);
+      buffer = part;
+    } else {
+      buffer += (buffer ? ';' : '') + part;
+    }
+  }
+  if (buffer.length > 0) chunks.push(buffer);
+  return chunks;
+}
+
+/**
+ * Assign statements to modules based on keyword matching.
+ */
+function classifyStatements(statements) {
+  const modules = {};
+
+  for (const stmt of statements) {
+    if (stmt.length < 10) continue;
+
+    for (const [modName, keywords] of Object.entries(MODULE_KEYWORDS)) {
+      const matched = keywords.some((kw) => stmt.includes(kw));
+      if (matched) {
+        if (!modules[modName]) modules[modName] = [];
+        modules[modName].push(stmt.trim());
+        break; // first-match wins
+      }
+    }
+  }
+
+  return modules;
+}
+
+/**
+ * Extract simple pattern matches (telemetry events, commands, classes).
+ */
+function extractSimplePatterns(source) {
+  const results = {};
+
+  for (const [modName, pattern] of Object.entries(SIMPLE_PATTERNS)) {
+    pattern.lastIndex = 0;
+    const matches = new Set();
+    let m;
+    while ((m = pattern.exec(source)) !== null) {
+      const frag = m[0].trim();
+      if (frag.length > 3) matches.add(frag);
+    }
+    if (matches.size > 0) {
+      results[modName] = [...matches];
+    }
+  }
+
+  return results;
+}
+
+/**
+ * Compute basic metrics about the CLI bundle.
+ */
+function computeMetrics(source, filePath) {
+  const sizeBytes = statSync(filePath).size;
+  const versionMatch = source.match(/VERSION[=:]"?(\d+\.\d+\.\d+)/);
+  const version = versionMatch ? versionMatch[1] : 'unknown';
+
+  return {
+    version,
+    sizeBytes,
+    lines: source.split('\n').length,
+    functions: (source.match(/function\s*\w*\s*\(/g) || []).length,
+    asyncFunctions: (source.match(/async\s+function/g) || []).length,
+    arrowFunctions: (source.match(/=>/g) || []).length,
+    classes: (source.match(/class \w+/g) || []).length,
+    extends: (source.match(/extends \w+/g) || []).length,
+  };
+}
+
+/**
+ * Main entry point.
+ */
+function main() {
+  const [bundlePath, outputDir] = process.argv.slice(2);
+  if (!bundlePath || !outputDir) {
+    console.error('Usage: node module-splitter.mjs <cli-bundle> <output-dir>');
+    process.exit(1);
+  }
+
+  mkdirSync(outputDir, { recursive: true });
+
+  console.log(`Reading bundle: ${bundlePath}`);
+  const source = readFileSync(bundlePath, 'utf-8');
+  const metrics = computeMetrics(source, bundlePath);
+  console.log(`  Size: ${(metrics.sizeBytes / 1024 / 1024).toFixed(1)} MB, ` +
+    `${metrics.classes} classes, ${metrics.functions} functions`);
+
+  // Phase 1: statement-based classification (fast, O(n) per keyword set)
+  console.log('  Splitting into statements...');
+  const statements = splitStatements(source);
+  console.log(`  ${statements.length} statements`);
+
+  const classified = classifyStatements(statements);
+  const moduleResults = {};
+
+  for (const [modName, fragments] of Object.entries(classified)) {
+    const outFile = join(outputDir, `${modName}.js`);
+    writeFileSync(outFile, fragments.join('\n\n'), 'utf-8');
+    moduleResults[modName] = {
+      fragments: fragments.length,
+      sizeBytes: Buffer.byteLength(fragments.join('\n\n')),
+    };
+    console.log(`  Module "${modName}": ${fragments.length} fragments`);
+  }
+
+  // Phase 2: simple pattern extractions (telemetry, commands, classes)
+  console.log('  Extracting simple patterns...');
+  const simple = extractSimplePatterns(source);
+
+  for (const [modName, fragments] of Object.entries(simple)) {
+    const outFile = join(outputDir, `${modName}.js`);
+    writeFileSync(outFile, fragments.join('\n'), 'utf-8');
+    moduleResults[modName] = {
+      fragments: fragments.length,
+      sizeBytes: Buffer.byteLength(fragments.join('\n')),
+    };
+    console.log(`  Module "${modName}": ${fragments.length} fragments`);
+  }
+
+  // Write metrics manifest
+  const manifest = {
+    ...metrics,
+    sourceFile: basename(bundlePath),
+    extractedAt: new Date().toISOString(),
+    modules: moduleResults,
+  };
+  writeFileSync(
+    join(outputDir, 'metrics.json'),
+    JSON.stringify(manifest, null, 2)
+  );
+
+  // Output JSON summary to stdout for the caller script
+  console.log(JSON.stringify(manifest));
+}
+
+main();
--- a/scripts/lib/rvf-builder.mjs
+++ b/scripts/lib/rvf-builder.mjs
@ -0,0 +1,259 @@
+#!/usr/bin/env node
+/**
+ * rvf-builder.mjs - Create binary RVF containers from extracted source modules.
+ *
+ * Uses the @ruvector/rvf-node native backend to produce real binary .rvf files
+ * with HNSW-indexed vector embeddings and witness chains.
+ *
+ * Each source fragment is embedded as a deterministic vector derived from its
+ * content hash (a lightweight "fingerprint" embedding). This allows similarity
+ * search across versions without requiring a full ML embedding model.
+ *
+ * Usage:
+ *   node scripts/lib/rvf-builder.mjs <source-dir> <output.rvf> [--meta key=val ...]
+ *
+ * source-dir  : directory with .js module files + metrics.json
+ * output.rvf  : path for the binary RVF container
+ * --meta      : optional key=value metadata pairs
+ */
+
+import { readFileSync, readdirSync, existsSync, writeFileSync } from 'fs';
+import { join, basename, resolve } from 'path';
+import { createHash } from 'crypto';
+
+// Vector dimension for fingerprint embeddings
+const DIMENSIONS = 128;
+
+/**
+ * Generate a deterministic fingerprint vector from text content.
+ *
+ * Uses SHA-256 → expand to DIMENSIONS floats in [-1, 1].
+ * This is NOT a semantic embedding but a content fingerprint that
+ * allows exact-match deduplication and change detection across versions.
+ */
+function fingerprintVector(text) {
+  const hash = createHash('sha256').update(text).digest();
+  const vec = new Float32Array(DIMENSIONS);
+
+  // Expand 32 bytes of hash into DIMENSIONS floats using a simple
+  // deterministic expansion: for each float, mix two hash bytes.
+  for (let i = 0; i < DIMENSIONS; i++) {
+    const byteA = hash[i % 32];
+    const byteB = hash[(i * 7 + 13) % 32];
+    // Map to [-1, 1]
+    vec[i] = ((byteA * 256 + byteB) / 65535) * 2 - 1;
+  }
+
+  // Normalize to unit length for cosine distance
+  let norm = 0;
+  for (let i = 0; i < DIMENSIONS; i++) norm += vec[i] * vec[i];
+  norm = Math.sqrt(norm);
+  if (norm > 0) {
+    for (let i = 0; i < DIMENSIONS; i++) vec[i] /= norm;
+  }
+
+  return vec;
+}
+
+/**
+ * Load the native rvf-node backend.
+ */
+async function loadRvfNode() {
+  // Try several possible paths for the native module
+  const candidates = [
+    resolve(process.cwd(), 'npm/packages/rvf-node/index.js'),
+    resolve(process.cwd(), 'node_modules/@ruvector/rvf-node/index.js'),
+  ];
+
+  for (const p of candidates) {
+    if (existsSync(p)) {
+      const mod = await import(p);
+      return mod.RvfDatabase ?? mod.default?.RvfDatabase ?? mod;
+    }
+  }
+  throw new Error(
+    'Could not find @ruvector/rvf-node. Tried:\n  ' + candidates.join('\n  ')
+  );
+}
+
+/**
+ * Parse --meta key=value arguments from argv.
+ */
+function parseMeta(argv) {
+  const meta = {};
+  for (let i = 0; i < argv.length; i++) {
+    if (argv[i] === '--meta' && argv[i + 1]) {
+      const [k, ...rest] = argv[i + 1].split('=');
+      meta[k] = rest.join('=');
+      i++;
+    }
+  }
+  return meta;
+}
+
+/**
+ * Main entry point.
+ */
+async function main() {
+  const args = process.argv.slice(2);
+  const sourceDir = args[0];
+  const outputRvf = args[1];
+
+  if (!sourceDir || !outputRvf) {
+    console.error(
+      'Usage: node rvf-builder.mjs <source-dir> <output.rvf> [--meta key=val ...]'
+    );
+    process.exit(1);
+  }
+
+  const meta = parseMeta(args.slice(2));
+
+  // Load native RVF module
+  let RvfDatabase;
+  try {
+    RvfDatabase = await loadRvfNode();
+  } catch (err) {
+    console.error('Failed to load @ruvector/rvf-node:', err.message);
+    process.exit(1);
+  }
+
+  // Read metrics if available
+  const metricsPath = join(sourceDir, 'metrics.json');
+  let metrics = {};
+  if (existsSync(metricsPath)) {
+    metrics = JSON.parse(readFileSync(metricsPath, 'utf-8'));
+  }
+
+  // Collect all .js module files
+  const moduleFiles = readdirSync(sourceDir)
+    .filter((f) => f.endsWith('.js'))
+    .sort();
+
+  if (moduleFiles.length === 0) {
+    console.error(`No .js module files found in ${sourceDir}`);
+    process.exit(1);
+  }
+
+  console.log(
+    `Building RVF container: ${basename(outputRvf)} (${moduleFiles.length} modules, ${DIMENSIONS}d vectors)`
+  );
+
+  // Create the RVF database
+  const db = RvfDatabase.create(outputRvf, {
+    dimension: DIMENSIONS,
+    metric: 'Cosine',
+    profile: 0,
+    compression: 'None',
+    signing: false,
+    m: 16,
+    ef_construction: 200,
+  });
+
+  // Ingest vectors for each module fragment
+  let totalFragments = 0;
+  let vectorId = 1;
+  const idMap = {};
+
+  for (const modFile of moduleFiles) {
+    const modName = basename(modFile, '.js');
+    const content = readFileSync(join(sourceDir, modFile), 'utf-8');
+    const fragments = content.split('\n\n').filter((f) => f.trim().length > 10);
+
+    if (fragments.length === 0) continue;
+
+    // Build a flat vector array and IDs for batch ingest
+    const vectors = new Float32Array(fragments.length * DIMENSIONS);
+    const ids = [];
+
+    for (let i = 0; i < fragments.length; i++) {
+      const vec = fingerprintVector(fragments[i]);
+      vectors.set(vec, i * DIMENSIONS);
+      ids.push(vectorId);
+      idMap[vectorId] = {
+        module: modName,
+        fragmentIndex: i,
+        sizeBytes: Buffer.byteLength(fragments[i]),
+        hash: createHash('sha256').update(fragments[i]).digest('hex').slice(0, 16),
+      };
+      vectorId++;
+    }
+
+    const result = db.ingestBatch(vectors, ids);
+    totalFragments += result.accepted;
+    console.log(
+      `  ${modName}: ${result.accepted} vectors ingested (${fragments.length} fragments)`
+    );
+  }
+
+  // Get final status
+  const status = db.status();
+  const fileId = db.fileId();
+  const segments = db.segments();
+
+  // Write the ID mapping sidecar (extends the default .idmap.json)
+  const sidecarPath = outputRvf + '.manifest.json';
+  const manifest = {
+    format: 'rvf-binary',
+    version: '1.0',
+    fileId,
+    dimensions: DIMENSIONS,
+    metric: 'cosine',
+    totalVectors: status.totalVectors,
+    totalSegments: status.totalSegments,
+    fileSizeBytes: status.fileSize,
+    epoch: status.currentEpoch,
+    segments: segments.map((s) => ({
+      id: s.id,
+      type: s.segType,
+      offset: s.offset,
+      payloadLength: s.payloadLength,
+    })),
+    source: {
+      package: meta.package || '@anthropic-ai/claude-code',
+      version: meta.version || metrics.version || 'unknown',
+      extractedAt: metrics.extractedAt || new Date().toISOString(),
+      metrics: {
+        bundleSizeBytes: metrics.sizeBytes || 0,
+        classes: metrics.classes || 0,
+        functions: metrics.functions || 0,
+        asyncFunctions: metrics.asyncFunctions || 0,
+        arrowFunctions: metrics.arrowFunctions || 0,
+      },
+    },
+    modules: Object.entries(metrics.modules || {}).map(([name, info]) => ({
+      name,
+      ...info,
+    })),
+    idMap,
+    meta,
+    createdAt: new Date().toISOString(),
+  };
+
+  writeFileSync(sidecarPath, JSON.stringify(manifest, null, 2));
+
+  db.close();
+
+  console.log(`\nRVF container created successfully:`);
+  console.log(`  File: ${outputRvf}`);
+  console.log(`  File ID: ${fileId}`);
+  console.log(`  Vectors: ${totalFragments}`);
+  console.log(`  Segments: ${status.totalSegments}`);
+  console.log(`  Size: ${(status.fileSize / 1024).toFixed(1)} KB`);
+  console.log(`  Manifest: ${sidecarPath}`);
+
+  // Output JSON for caller
+  const result = {
+    success: true,
+    path: outputRvf,
+    fileId,
+    vectors: totalFragments,
+    segments: status.totalSegments,
+    sizeBytes: status.fileSize,
+  };
+  console.log(JSON.stringify(result));
+}
+
+main().catch((err) => {
+  console.error('Fatal error:', err);
+  process.exit(1);
+});