ruvector/scripts/claude-code-decompile.sh

#!/usr/bin/env bash
# claude-code-decompile.sh - Extract and analyze Claude Code CLI source
#
# Extracts the bundled JavaScript from the Claude Code binary or npm package,
# applies basic beautification, and splits into logical modules.
#
# Usage: ./scripts/claude-code-decompile.sh [output-dir]
#
# Output directory defaults to ./claude-code-extracted/

set -euo pipefail

OUTPUT_DIR="${1:-./claude-code-extracted}"
BINARY=""
CLI_JS=""

# Color output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'

log() { echo -e "${GREEN}[+]${NC} $*"; }
warn() { echo -e "${YELLOW}[!]${NC} $*"; }
err() { echo -e "${RED}[-]${NC} $*" >&2; }

# Find the Claude Code source
find_source() {
    # Method 1: NPM package (preferred - cleaner JS)
    local npm_paths=(
        "$(npm root -g 2>/dev/null)/claude-flow/node_modules/@anthropic-ai/claude-code/cli.js"
        "$(npm root -g 2>/dev/null)/@anthropic-ai/claude-code/cli.js"
        "./node_modules/@anthropic-ai/claude-code/cli.js"
    )
    for p in "${npm_paths[@]}"; do
        if [[ -f "$p" ]]; then
            CLI_JS="$p"
            log "Found NPM package: $CLI_JS"
            return 0
        fi
    done

    # Method 2: Bun SEA binary
    local bin_paths=(
        "$HOME/.local/bin/claude"
        "$HOME/.local/share/claude/versions/"
        "/usr/local/bin/claude"
    )
    for p in "${bin_paths[@]}"; do
        if [[ -f "$p" ]]; then
            BINARY="$(readlink -f "$p" 2>/dev/null || echo "$p")"
            log "Found binary: $BINARY"
            return 0
        elif [[ -d "$p" ]]; then
            BINARY="$(ls -t "$p"* 2>/dev/null | head -1)"
            if [[ -n "$BINARY" ]]; then
                log "Found binary: $BINARY"
                return 0
            fi
        fi
    done

    err "Could not find Claude Code binary or npm package"
    echo "Install via: npm install -g @anthropic-ai/claude-code"
    echo "Or ensure claude is installed: claude --version"
    return 1
}

# Extract JS from Bun SEA binary using strings
extract_from_binary() {
    local binary="$1"
    local output="$2"

    log "Extracting strings from binary ($(du -h "$binary" | cut -f1))..."
    strings "$binary" > "${output}/raw-strings.txt"

    local total_lines
    total_lines=$(wc -l < "${output}/raw-strings.txt")
    log "Extracted $total_lines string fragments"

    # Extract JS-like patterns
    log "Filtering JavaScript patterns..."
    grep -E '(function\s|class\s|=>\s*\{|export\s|import\s|require\(|async\s|await\s|const\s|let\s|var\s)' \
        "${output}/raw-strings.txt" > "${output}/js-fragments.txt" 2>/dev/null || true

    local js_lines
    js_lines=$(wc -l < "${output}/js-fragments.txt")
    log "Found $js_lines JS-like fragments"
}

# Process the cli.js bundle
process_bundle() {
    local source="$1"
    local output="$2"

    log "Processing bundle: $(du -h "$source" | cut -f1)"

    # Copy original
    cp "$source" "${output}/cli.js.original"

    # Basic beautification: add newlines at statement boundaries
    log "Beautifying (adding newlines at statement boundaries)..."
    sed 's/;/;\n/g' "$source" | \
    sed 's/{/{\n/g' | \
    sed 's/}/}\n/g' > "${output}/cli.beautified.js"

    local beautified_lines
    beautified_lines=$(wc -l < "${output}/cli.beautified.js")
    log "Beautified: $beautified_lines lines"

    # Extract metrics
    log "Computing code metrics..."
    {
        echo "=== Claude Code Source Metrics ==="
        echo "Date: $(date -Iseconds)"
        echo "Source: $source"
        echo "Original size: $(wc -c < "$source") bytes"
        echo "Original lines: $(wc -l < "$source")"
        echo "Beautified lines: $beautified_lines"
        echo ""
        echo "--- Counts ---"
        echo "Functions: $(grep -oP 'function\s*\w*\s*\(' "$source" | wc -l)"
        echo "Async functions: $(grep -oP 'async\s+function' "$source" | wc -l)"
        echo "Arrow functions: $(grep -oP '=>' "$source" | wc -l)"
        echo "Classes: $(grep -oP 'class \w+' "$source" | wc -l)"
        echo "Extends: $(grep -oP 'extends \w+' "$source" | wc -l)"
        echo "For-await loops: $(grep -c 'for await' "$source")"
        echo "Yield statements: $(grep -c 'yield' "$source")"
        echo ""
        echo "--- Node.js Imports ---"
        grep -oP 'from"[^"]*"' "$source" | sort -u | grep -P 'from"(node:|assert|child_process|crypto|events|fs|http|https|module|net|os|path|process|stream|tty|url|util|zlib)'
        echo ""
        echo "--- Class Definitions ---"
        grep -oP 'class \w+( extends \w+)?' "$source" | sort -u
    } > "${output}/metrics.txt"

    log "Metrics saved to ${output}/metrics.txt"
}

# Split into logical modules based on patterns
split_modules() {
    local source="$1"
    local output="$2"
    local modules_dir="${output}/modules"
    mkdir -p "$modules_dir"

    log "Splitting into logical modules..."

    # Extract tool-related code
    grep -oP '.{0,200}(BashTool|FileReadTool|FileEditTool|FileWriteTool|AgentOutputTool|WebFetch|WebSearch|TodoWrite|NotebookEdit|GlobTool|GrepTool).{0,200}' \
        "$source" > "${modules_dir}/tools.txt" 2>/dev/null || true

    # Extract permission-related code
    grep -oP '.{0,200}(permission|Permission|canUseTool|alwaysAllowRules|denyWrite|sandbox|Sandbox).{0,200}' \
        "$source" > "${modules_dir}/permissions.txt" 2>/dev/null || true

    # Extract MCP-related code
    grep -oP '.{0,200}(mcp__|McpClient|McpServer|McpError|callTool|listTools|initialize).{0,200}' \
        "$source" > "${modules_dir}/mcp.txt" 2>/dev/null || true

    # Extract streaming-related code
    grep -oP '.{0,200}(content_block_delta|message_start|message_stop|message_delta|content_block_start|content_block_stop|stream_event|text_delta|input_json_delta).{0,200}' \
        "$source" > "${modules_dir}/streaming.txt" 2>/dev/null || true

    # Extract context/compaction code
    grep -oP '.{0,200}(compact|compaction|tengu_compact|microcompact|auto_compact|compact_boundary|preCompactTokenCount|postCompactTokenCount).{0,200}' \
        "$source" > "${modules_dir}/compaction.txt" 2>/dev/null || true

    # Extract agent loop code
    grep -oP '.{0,200}(agentLoop|mainLoop|s\$\(|querySource|toolUseContext|systemPrompt).{0,200}' \
        "$source" > "${modules_dir}/agent-loop.txt" 2>/dev/null || true

    # Extract telemetry events
    grep -oP '"tengu_[^"]*"' "$source" | sort -u > "${modules_dir}/telemetry-events.txt" 2>/dev/null || true

    # Extract string constants (tool names, commands, etc.)
    grep -oP 'name:"[a-z][-a-z]*",description:"[^"]*"' "$source" | sort -u > "${modules_dir}/commands.txt" 2>/dev/null || true

    # Extract class hierarchy
    grep -oP 'class \w+ extends \w+' "$source" | sort -u > "${modules_dir}/class-hierarchy.txt" 2>/dev/null || true

    # Count extracted lines per module
    for f in "${modules_dir}"/*.txt; do
        local name
        name=$(basename "$f" .txt)
        local lines
        lines=$(wc -l < "$f")
        log "  Module '$name': $lines fragments"
    done
}

# Generate RVF files from extracted modules
generate_rvf() {
    local modules_dir="$1/modules"
    local rvf_dir="$1/rvf"
    mkdir -p "$rvf_dir"

    log "Generating RVF files..."

    local version
    version=$(grep -oP 'VERSION:"[^"]*"' "$modules_dir/../cli.js.original" 2>/dev/null | head -1 | grep -oP '\d+\.\d+\.\d+' || echo "unknown")

    for f in "${modules_dir}"/*.txt; do
        local name
        name=$(basename "$f" .txt)
        local rvf_file="${rvf_dir}/${name}.rvf"
        {
            echo "---"
            echo "type: source-extraction"
            echo "module: ${name}"
            echo "binary: claude-code"
            echo "version: ${version}"
            echo "extraction-method: strings+pattern-match"
            echo "confidence: medium"
            echo "fragments: $(wc -l < "$f")"
            echo "---"
            echo ""
            echo "# ${name} - Extracted Fragments"
            echo ""
            echo '```javascript'
            cat "$f"
            echo '```'
        } > "$rvf_file"
        log "  Created ${rvf_file}"
    done
}

# Main
main() {
    log "Claude Code Decompiler"
    log "======================"

    mkdir -p "$OUTPUT_DIR"

    find_source

    if [[ -n "$CLI_JS" ]]; then
        process_bundle "$CLI_JS" "$OUTPUT_DIR"
        split_modules "$CLI_JS" "$OUTPUT_DIR"
        generate_rvf "$OUTPUT_DIR"
    elif [[ -n "$BINARY" ]]; then
        extract_from_binary "$BINARY" "$OUTPUT_DIR"
        # If we got enough JS, process it
        if [[ -f "${OUTPUT_DIR}/js-fragments.txt" ]]; then
            split_modules "${OUTPUT_DIR}/js-fragments.txt" "$OUTPUT_DIR"
            generate_rvf "$OUTPUT_DIR"
        fi
    fi

    log ""
    log "Extraction complete!"
    log "Output directory: $OUTPUT_DIR"
    log ""
    log "Key files:"
    log "  metrics.txt        - Code metrics and counts"
    log "  cli.beautified.js  - Beautified bundle (if from NPM)"
    log "  modules/           - Split by logical module"
    log "  rvf/               - RVF files with metadata headers"

    # Summary
    if [[ -f "${OUTPUT_DIR}/metrics.txt" ]]; then
        echo ""
        head -15 "${OUTPUT_DIR}/metrics.txt"
    fi
}

main "$@"