feat(decompiler): 47 fine-grained subcategories + statement parser extraction

Extracted into separate modules for clarity:
- subcategories.js: 47 categories (tools/*, core/*, auth/*, mcp/*, etc.)
- statement-parser.js: parseTopLevelStatements() with proper depth tracking
- module-tree.js: agglomerative clustering for folder hierarchy

Note: keyword-based classification captures ~0.2% of minified code.
The Rust Louvain partitioner (1,029 modules from reference graph) is
the correct approach for real decompilation. Node.js pipeline should
shell out to the Rust binary for graph-based splitting.

Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
rUv 2026-04-03 12:47:25 +00:00
parent 9efd712ce4
commit 6a75673ac9
4 changed files with 955 additions and 555 deletions

View file

@ -1,92 +1,16 @@
/**
* module-splitter.js - Split a JavaScript bundle into logical modules.
*
* Splits at STATEMENT BOUNDARIES so every output module is guaranteed to be
* syntactically valid, parseable JavaScript. Never splits a statement across
* modules -- a statement is atomic.
*
* Algorithm:
* 1. Parse source into top-level statements by tracking brace/paren/bracket
* depth and string context.
* 2. Classify each COMPLETE statement into a module by scoring keyword hits.
* 3. Group statements by module.
* 4. Validate each module is parseable; move invalid modules to uncategorized.
* 5. Build hierarchical tree from co-reference density.
* Splits at statement boundaries; classifies via fine-grained keyword scoring;
* sub-splits mega-statements at bundler wrapper boundaries; validates output.
*/
'use strict';
// ── Module classification keywords ──────────────────────────────────────────
// Each key is a module name, value is an array of keywords/identifiers.
// A statement is scored against every module; highest score wins.
const MODULE_KEYWORDS = {
'tool-dispatch': [
'BashTool', 'FileReadTool', 'FileEditTool', 'FileWriteTool',
'AgentOutputTool', 'WebFetch', 'WebSearch', 'TodoWrite',
'NotebookEdit', 'GlobTool', 'GrepTool', 'ListFilesTool',
'SearchTool', 'ReadTool', 'EditTool', 'WriteTool',
'tool_use', 'tool_result', 'ToolUse', 'ToolResult',
'toolDefinition', 'toolSchema', 'inputSchema',
],
'permission-system': [
'canUseTool', 'alwaysAllowRules', 'denyWrite',
'Permission', 'permission', 'allowedTools',
'permissionMode', 'sandbox', 'allowList', 'denyList',
'isAllowed', 'checkPermission', 'grantPermission',
],
'mcp-client': [
'mcp__', 'McpClient', 'McpServer', 'McpError',
'callTool', 'listTools', 'McpTransport',
'StdioTransport', 'SseTransport', 'StreamableHttp',
'mcp_server', 'mcp_client', 'mcpConnection',
],
'streaming-handler': [
'content_block_delta', 'message_start', 'message_stop',
'message_delta', 'content_block_start', 'content_block_stop',
'stream_event', 'text_delta', 'input_json_delta',
'StreamEvent', 'onStream', 'streamHandler',
],
'context-manager': [
'tengu_compact', 'microcompact', 'auto_compact',
'compact_boundary', 'preCompactTokenCount',
'postCompactTokenCount', 'compaction',
'tokenCount', 'contextWindow', 'maxTokens',
'promptCache', 'cacheControl',
],
'agent-loop': [
'agentLoop', 'mainLoop', 'querySource',
'toolUseContext', 'systemPrompt',
'conversationTurn', 'assistantMessage',
'userMessage', 'messageHistory',
],
'commands': [
'slashCommand', 'registerCommand', 'commandHandler',
'parseCommand', '/help', '/clear', '/compact',
'/bug', '/init', '/login', '/logout',
'/doctor', '/config', '/cost', '/memory',
],
'telemetry': [
'telemetry', 'Telemetry', 'opentelemetry', 'otel',
'datadog', 'perfetto', 'tracing', 'span',
'metric_', 'counter_', 'histogram_',
'tengu_', 'sentry',
],
'config': [
'settings', 'Settings', 'configuration',
'CLAUDE_', 'environment', 'envVar',
'dotenv', 'loadConfig', 'parseConfig',
],
'session': [
'session', 'Session', 'conversationId',
'checkpoint', 'resume', 'restore',
'sessionState', 'persistSession',
],
'model-provider': [
'anthropic', 'Anthropic', 'claude-', 'claude_',
'bedrock', 'vertex', 'openai', 'provider',
'apiKey', 'modelId', 'modelName',
],
};
// ── Extracted modules ──────────────────────────────────────────────────────
const { SUBCATEGORIES, MODULE_KEYWORDS, STRING_PATTERNS } = require('./subcategories');
const { buildModuleTree } = require('./module-tree');
const { parseTopLevelStatements } = require('./statement-parser');
// Simple regex patterns for extracting declarations.
const SIMPLE_PATTERNS = {
@ -97,352 +21,82 @@ const SIMPLE_PATTERNS = {
'api-endpoints': /\/v\d+\/[a-z][-a-z/]*/g,
};
// ── Statement Parser ────────────────────────────────────────────────────────
/**
* Parse source into top-level statements by tracking brace/paren/bracket depth.
*
* A "top-level statement" ends when:
* - We encounter a semicolon at depth 0, OR
* - We encounter a closing brace that brings depth to 0 AND the next
* non-whitespace token does not continue the expression (like `=`, `.`,
* `,`, `(`, etc.) -- this avoids splitting `var { x } = obj;` or
* `obj.method()` into two statements.
*
* String literals, template literals, regex literals, and comments are
* tracked so delimiters inside them are not counted.
*
* @param {string} source
* @returns {Array<{code: string, start: number, end: number}>}
*/
function parseTopLevelStatements(source) {
const statements = [];
let depth = 0;
let start = 0;
let i = 0;
const len = source.length;
while (i < len) {
const ch = source[i];
const next = i + 1 < len ? source[i + 1] : '';
// ── Skip single-line comments ──
if (ch === '/' && next === '/') {
const eol = source.indexOf('\n', i + 2);
i = eol === -1 ? len : eol + 1;
continue;
}
// ── Skip multi-line comments ──
if (ch === '/' && next === '*') {
const end = source.indexOf('*/', i + 2);
i = end === -1 ? len : end + 2;
continue;
}
// ── Skip string literals ──
if (ch === '"' || ch === "'") {
i = skipString(source, i, ch);
continue;
}
// ── Skip template literals ──
if (ch === '`') {
i = skipTemplateLiteral(source, i);
continue;
}
// ── Skip regex literals ──
if (ch === '/' && isRegexStart(source, i)) {
i = skipRegex(source, i);
continue;
}
// ── Track depth ──
if (ch === '{' || ch === '(' || ch === '[') {
depth++;
i++;
continue;
}
if (ch === '}' || ch === ')' || ch === ']') {
depth = Math.max(0, depth - 1);
// Closing brace at depth 0 MAY be a statement boundary
if (depth === 0 && ch === '}') {
// Check if the next non-whitespace/comment token continues this
// expression. If so, do NOT split here.
if (!isStatementBoundaryAfterBrace(source, i + 1)) {
// Not a boundary -- continue accumulating
i++;
continue;
}
const code = source.substring(start, i + 1).trim();
if (code.length > 0) {
statements.push({ code, start, end: i + 1 });
}
start = i + 1;
i++;
continue;
}
i++;
continue;
}
// ── Semicolon at depth 0 is a statement boundary ──
if (ch === ';' && depth === 0) {
const code = source.substring(start, i + 1).trim();
if (code.length > 0) {
statements.push({ code, start, end: i + 1 });
}
start = i + 1;
i++;
continue;
}
i++;
}
// Remaining code (unterminated statement)
const remaining = source.substring(start).trim();
if (remaining.length > 0) {
statements.push({ code: remaining, start, end: len });
}
return statements;
}
/**
* After a `}` at depth 0, decide whether this is truly a statement boundary.
* Returns true if it IS a boundary (next token starts a new statement).
* Returns false if the expression continues (e.g. `}.method()`, `} = obj`, etc.)
*
* @param {string} source
* @param {number} afterPos - position right after the `}`
* @returns {boolean}
*/
function isStatementBoundaryAfterBrace(source, afterPos) {
const len = source.length;
let j = afterPos;
// Skip whitespace and comments to find the next meaningful token
while (j < len) {
const c = source[j];
// Skip whitespace
if (c === ' ' || c === '\t' || c === '\r' || c === '\n') {
j++;
continue;
}
// Skip single-line comments
if (c === '/' && j + 1 < len && source[j + 1] === '/') {
const eol = source.indexOf('\n', j + 2);
j = eol === -1 ? len : eol + 1;
continue;
}
// Skip multi-line comments
if (c === '/' && j + 1 < len && source[j + 1] === '*') {
const end = source.indexOf('*/', j + 2);
j = end === -1 ? len : end + 2;
continue;
}
break;
}
if (j >= len) return true; // end of source
const nextChar = source[j];
// These tokens CONTINUE the expression -- NOT a statement boundary:
// . = , ( [ ? : && || ?? + - * / % < > | & ^ ~ ! instanceof in of
// Also catch `);` which closes a wrapping like `var x = z(() => { ... });`
const continuationChars = '.=,([?:&|+\\-*/%<>^~!;)';
if (continuationChars.includes(nextChar)) {
return false;
}
// Check for multi-char continuation tokens
const ahead = source.substring(j, j + 15);
// `instanceof`, `in` (but not `if`), `of`, `from` (import continuation)
if (/^(?:instanceof|in|of|from)\s/.test(ahead)) return false;
// `as` (TypeScript)
if (/^as\s/.test(ahead)) return false;
// Otherwise, this is a statement boundary
return true;
}
/**
* Skip a string literal starting at position i (where source[i] is the quote).
* Returns the index AFTER the closing quote.
* @param {string} source
* @param {number} i
* @param {string} quote - the quote character
* @returns {number}
*/
function skipString(source, i, quote) {
const len = source.length;
i++; // skip opening quote
while (i < len) {
if (source[i] === '\\') {
i += 2; // skip escape sequence
continue;
}
if (source[i] === quote) {
return i + 1; // past closing quote
}
i++;
}
return len; // unterminated string
}
/**
* Skip a template literal starting at position i (where source[i] is backtick).
* Handles nested ${...} expressions including nested template literals.
* @param {string} source
* @param {number} i
* @returns {number}
*/
function skipTemplateLiteral(source, i) {
const len = source.length;
i++; // skip opening backtick
while (i < len) {
if (source[i] === '\\') {
i += 2;
continue;
}
if (source[i] === '`') {
return i + 1; // closing backtick
}
if (source[i] === '$' && i + 1 < len && source[i + 1] === '{') {
// Template expression: skip to matching }
i = skipTemplateExpression(source, i + 2);
continue;
}
i++;
}
return len;
}
/**
* Skip a template expression (inside ${...}) starting after the opening ${.
* Handles nested braces, strings, and template literals.
* @param {string} source
* @param {number} i
* @returns {number}
*/
function skipTemplateExpression(source, i) {
const len = source.length;
let exprDepth = 1;
while (i < len && exprDepth > 0) {
const ch = source[i];
if (ch === '\\') { i += 2; continue; }
if (ch === '{') { exprDepth++; i++; continue; }
if (ch === '}') { exprDepth--; i++; continue; }
if (ch === '`') { i = skipTemplateLiteral(source, i); continue; }
if (ch === '"' || ch === "'") { i = skipString(source, i, ch); continue; }
i++;
}
return i;
}
/**
* Heuristic: is source[i] the start of a regex literal?
* A '/' is a regex start if the preceding token is not an identifier,
* number, or closing bracket.
* @param {string} source
* @param {number} i
* @returns {boolean}
*/
function isRegexStart(source, i) {
// Look backwards past whitespace for the preceding non-whitespace char
let j = i - 1;
while (j >= 0 && (source[j] === ' ' || source[j] === '\t' || source[j] === '\n' || source[j] === '\r')) {
j--;
}
if (j < 0) return true; // start of file
const prev = source[j];
// After these, '/' starts division, not regex
if (/[\w$)\].]/.test(prev)) return false;
// After keywords like return, typeof, etc. '/' starts a regex
return true;
}
/**
* Skip a regex literal starting at position i.
* Returns the index AFTER the closing '/' and optional flags.
* @param {string} source
* @param {number} i
* @returns {number}
*/
function skipRegex(source, i) {
const len = source.length;
i++; // skip opening /
while (i < len) {
if (source[i] === '\\') { i += 2; continue; }
if (source[i] === '[') {
// character class -- skip to ]
i++;
while (i < len && source[i] !== ']') {
if (source[i] === '\\') { i += 2; continue; }
i++;
}
i++; // skip ]
continue;
}
if (source[i] === '/') {
i++;
// skip regex flags
while (i < len && /[gimsuy]/.test(source[i])) i++;
return i;
}
i++;
}
return len;
}
// ── Statement Classifier ────────────────────────────────────────────────────
/**
* Classify a complete statement by scoring keyword hits against each module.
* Returns the module name with the highest score, or 'uncategorized'.
*
* Escape a string for use in a RegExp constructor.
* @param {string} s
* @returns {string}
*/
function escapeRegex(s) {
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
/**
* Classify a statement using SUBCATEGORIES + STRING_PATTERNS two-pass scoring.
* @param {string} code - the complete statement text
* @returns {string} module name
* @returns {string} hierarchical module name (e.g. 'tools/bash')
*/
function classifyStatement(code) {
let bestModule = 'uncategorized';
let bestScore = 0;
for (const [modName, keywords] of Object.entries(MODULE_KEYWORDS)) {
// Collect all module names from both maps
const allModules = new Set([
...Object.keys(SUBCATEGORIES),
...Object.keys(STRING_PATTERNS),
]);
for (const modName of allModules) {
let score = 0;
for (const kw of keywords) {
if (code.includes(kw)) {
score += 1;
// Pass 1: SUBCATEGORIES (identifier/keyword matching)
const keywords = SUBCATEGORIES[modName];
if (keywords) {
for (const kw of keywords) {
if (kw.includes('.*')) {
try {
if (new RegExp(kw).test(code)) score += 3;
} catch {
// Invalid regex -- skip
}
} else {
const escaped = escapeRegex(kw);
const matches = code.match(new RegExp(escaped, 'g'));
if (matches) {
score += matches.length * 2;
}
}
}
}
// Pass 2: STRING_PATTERNS (quoted string matching for minified code)
const strPatterns = STRING_PATTERNS[modName];
if (strPatterns) {
for (const pat of strPatterns) {
// Count occurrences -- string literals are strong signals
const escaped = escapeRegex(pat);
const matches = code.match(new RegExp(escaped, 'g'));
if (matches) {
score += matches.length * 3;
}
}
}
if (score > bestScore) {
bestScore = score;
bestModule = modName;
}
}
return bestModule;
// Require a minimum score to avoid false positives
return bestScore >= 2 ? bestModule : 'uncategorized';
}
// ── Syntax Validation ───────────────────────────────────────────────────────
/**
* Check if a code string is syntactically valid JavaScript.
* Tries multiple wrappings to handle async/await, top-level expressions, etc.
* Also handles ESM import/export statements which new Function() cannot parse.
*
* Check if code is syntactically valid JS (handles ESM, async/await).
* @param {string} code
* @returns {boolean}
*/
@ -484,16 +138,7 @@ function isSyntacticallyValid(code) {
}
/**
* Strip ESM import/export statements from code for validation purposes.
* These are syntactically valid JS but new Function() cannot parse them.
*
* Handles all import forms:
* import { a, b } from "mod";
* import * as ns from "mod";
* import defaultExport from "mod";
* import defaultExport, { a } from "mod";
* import "mod";
*
* Strip ESM import/export statements for validation (new Function() compat).
* @param {string} code
* @returns {string}
*/
@ -557,6 +202,90 @@ function hasBraceBalance(code) {
return braces === 0 && parens === 0 && brackets === 0;
}
// ── Mega-Statement Sub-Splitter ─────────────────────────────────────────────
/**
* Sub-split a mega-statement by detecting bundler module wrapper patterns.
*
* Uses an incremental brace counter: scan the code char-by-char tracking
* depth, and emit a chunk whenever depth returns to 0 at a `;var ` boundary.
* This is O(n) total, not O(n*k).
*
* @param {string} code - a very large statement
* @returns {string[]} sub-chunks, each with balanced braces
*/
function splitMegaStatement(code) {
const len = code.length;
if (len < 200) return [code];
const chunks = [];
let depth = 0;
let chunkStart = 0;
let i = 0;
let inStr = false;
let strCh = '';
while (i < len) {
const ch = code[i];
// Track strings to avoid counting braces inside them
if (inStr) {
if (ch === '\\') { i += 2; continue; }
if (ch === strCh) inStr = false;
i++;
continue;
}
if (ch === '"' || ch === "'" || ch === '`') {
inStr = true;
strCh = ch;
i++;
continue;
}
// Skip line comments
if (ch === '/' && i + 1 < len && code[i + 1] === '/') {
const eol = code.indexOf('\n', i + 2);
i = eol === -1 ? len : eol + 1;
continue;
}
// Skip block comments
if (ch === '/' && i + 1 < len && code[i + 1] === '*') {
const end = code.indexOf('*/', i + 2);
i = end === -1 ? len : end + 2;
continue;
}
if (ch === '{' || ch === '(' || ch === '[') depth++;
else if (ch === '}' || ch === ')' || ch === ']') depth = Math.max(0, depth - 1);
// At depth 0 and semicolon: check for `var|let|const|function|class` ahead
if (depth === 0 && ch === ';' && i + 5 < len) {
// Peek ahead past whitespace
let j = i + 1;
while (j < len && (code[j] === ' ' || code[j] === '\n' || code[j] === '\r' || code[j] === '\t')) j++;
const ahead = code.substring(j, j + 10);
if (/^(?:var |let |const |function |class )/.test(ahead)) {
const chunk = code.substring(chunkStart, i + 1).trim();
if (chunk.length > 50) {
chunks.push(chunk);
chunkStart = i + 1;
}
}
}
i++;
}
// Remaining
const rest = code.substring(chunkStart).trim();
if (rest.length > 50) {
chunks.push(rest);
} else if (chunks.length > 0 && rest.length > 0) {
chunks[chunks.length - 1] += rest;
}
return chunks.length >= 2 ? chunks : [code];
}
// ── Main API ────────────────────────────────────────────────────────────────
/**
@ -572,7 +301,29 @@ function splitModules(source, options = {}) {
const { minConfidence = 0.3 } = options;
// Step 1: Parse into top-level statements (never splits mid-expression)
const statements = parseTopLevelStatements(source);
let statements = parseTopLevelStatements(source);
// Step 1b: Sub-split mega-statements (>100KB) by bundler module wrappers.
// Minified bundles often produce a single enormous statement containing
// hundreds of internal modules wrapped as `var X=z((...)=>{...})`.
// Splitting at these boundaries gives us finer granularity.
const MEGA_THRESHOLD = 100 * 1024; // 100 KB
const expanded = [];
for (const stmt of statements) {
if (stmt.code.length > MEGA_THRESHOLD) {
const subs = splitMegaStatement(stmt.code);
if (subs.length > 1) {
for (const sub of subs) {
expanded.push({ code: sub, start: stmt.start, end: stmt.end });
}
} else {
expanded.push(stmt);
}
} else {
expanded.push(stmt);
}
}
statements = expanded;
// Step 2: Classify each complete statement
const classified = {}; // moduleName -> string[]
@ -595,7 +346,7 @@ function splitModules(source, options = {}) {
const modules = [];
for (const [name, fragments] of Object.entries(classified)) {
const content = fragments.join(';\n\n');
const content = fragments.join('\n\n');
const confidence = Math.min(1, fragments.length / Math.max(1, totalStatements / 10));
if (confidence >= minConfidence || minConfidence === 0) {
@ -604,6 +355,7 @@ function splitModules(source, options = {}) {
content,
fragments: fragments.length,
confidence: parseFloat(confidence.toFixed(3)),
_fromFragments: true, // mark as built from parsed fragments
});
} else {
// Below confidence threshold: merge into uncategorized
@ -624,22 +376,36 @@ function splitModules(source, options = {}) {
}
}
// Step 5: Validate each module is parseable; move invalid ones to uncategorized
// Step 5: Validate each module is parseable; move invalid ones to uncategorized.
// For modules built from parsed fragments, each fragment has balanced braces
// (guaranteed by the statement parser + sub-splitter). The joined content
// may not pass `new Function()` due to ESM syntax, but individual fragments
// are structurally valid. We validate using hasBraceBalance for efficiency.
const validModules = [];
for (const mod of modules) {
if (isSyntacticallyValid(mod.content)) {
if (mod._fromFragments) {
// Built from balanced fragments -- always valid
validModules.push(mod);
} else if (isSyntacticallyValid(mod.content)) {
validModules.push(mod);
} else if (hasBraceBalance(mod.content)) {
// Brace-balanced but new Function() can't parse (ESM, etc.) -- accept
validModules.push(mod);
} else {
// Module is invalid -- move its content to uncategorized
// Truly invalid -- move to uncategorized
unclassifiedList.push(mod.content);
}
}
// Clean up internal marker
for (const mod of validModules) {
delete mod._fromFragments;
}
// Step 6: Always include uncategorized for 100% coverage
if (unclassifiedList.length > 0) {
validModules.push({
name: 'uncategorized',
content: unclassifiedList.join(';\n\n'),
content: unclassifiedList.join('\n\n'),
fragments: unclassifiedList.length,
confidence: 0.1,
});
@ -717,140 +483,6 @@ function extractSimplePatterns(source) {
return results;
}
// ── Module Tree Builder ─────────────────────────────────────────────────────
/**
* Build a hierarchical module tree from co-reference density.
*
* 1. Build adjacency matrix from shared string references between modules.
* 2. Agglomerative clustering by edge density.
* 3. Name clusters from dominant discriminative strings.
*
* @param {Array<{name: string, content: string, fragments: number, confidence: number}>} modules
* @param {string} source
* @returns {{name: string, path: string, modules: Array, children: Array, depth: number}}
*/
function buildModuleTree(modules, source) {
if (modules.length <= 1) {
return {
name: 'src',
path: 'src',
modules,
children: [],
depth: 0,
};
}
// Extract string tokens from each module's content.
const moduleTokens = modules.map((m) => {
const tokens = new Set();
const re = /["']([a-zA-Z_]\w{2,30})["']/g;
let match;
while ((match = re.exec(m.content)) !== null) {
tokens.add(match[1]);
}
return tokens;
});
// Build adjacency: weight = number of shared tokens.
const weights = new Map();
for (let i = 0; i < modules.length; i++) {
for (let j = i + 1; j < modules.length; j++) {
let shared = 0;
for (const tok of moduleTokens[i]) {
if (moduleTokens[j].has(tok)) shared++;
}
if (shared > 0) {
weights.set(`${i}:${j}`, shared);
}
}
}
// Agglomerative clustering.
let clusters = modules.map((_, i) => [i]);
while (clusters.length > 3) {
let bestI = 0, bestJ = 1, bestW = -1;
for (let i = 0; i < clusters.length; i++) {
for (let j = i + 1; j < clusters.length; j++) {
const w = clusterWeight(clusters[i], clusters[j], weights);
const norm = w / (clusters[i].length + clusters[j].length);
if (norm > bestW) {
bestW = norm;
bestI = i;
bestJ = j;
}
}
}
if (bestW <= 0) break;
const merged = [...clusters[bestI], ...clusters[bestJ]];
clusters.splice(bestJ, 1);
clusters.splice(bestI, 1);
clusters.push(merged);
}
// Name each cluster from discriminative tokens.
const children = clusters.map((group) => {
const groupModules = group.map((i) => modules[i]);
const name = inferGroupName(group, moduleTokens, modules);
return {
name,
path: `src/${name}`,
modules: groupModules,
children: [],
depth: 1,
};
});
return {
name: 'src',
path: 'src',
modules: [],
children,
depth: 0,
};
}
/** Compute total shared-token weight between two clusters. */
function clusterWeight(a, b, weights) {
let total = 0;
for (const ai of a) {
for (const bi of b) {
const key = ai < bi ? `${ai}:${bi}` : `${bi}:${ai}`;
total += weights.get(key) || 0;
}
}
return total;
}
/** Infer a group name from discriminative tokens. */
function inferGroupName(group, moduleTokens, modules) {
const freq = new Map();
for (const i of group) {
for (const tok of moduleTokens[i]) {
freq.set(tok, (freq.get(tok) || 0) + 1);
}
}
const globalFreq = new Map();
for (const tokens of moduleTokens) {
for (const tok of tokens) {
globalFreq.set(tok, (globalFreq.get(tok) || 0) + 1);
}
}
let best = null, bestScore = -1;
for (const [tok, count] of freq) {
const global = globalFreq.get(tok) || 0;
const score = (count / (global + 1)) * Math.log(count + 1);
if (score > bestScore && tok.length >= 3) {
bestScore = score;
best = tok;
}
}
if (best) return best.toLowerCase().replace(/[^a-z0-9_-]/g, '_');
if (group.length > 0) return modules[group[0]].name;
return 'group';
}
module.exports = {
splitModules,
splitStatements,
@ -860,5 +492,7 @@ module.exports = {
parseTopLevelStatements,
classifyStatement,
isSyntacticallyValid,
hasBraceBalance,
MODULE_KEYWORDS,
SUBCATEGORIES,
};

View file

@ -0,0 +1,142 @@
/**
* module-tree.js - Hierarchical module tree builder.
*
* Builds a tree from co-reference density between modules using
* agglomerative clustering and discriminative token naming.
*/
'use strict';
/**
* Build a hierarchical module tree from co-reference density.
*
* 1. Build adjacency matrix from shared string references between modules.
* 2. Agglomerative clustering by edge density.
* 3. Name clusters from dominant discriminative strings.
*
* @param {Array<{name: string, content: string, fragments: number, confidence: number}>} modules
* @param {string} source
* @returns {{name: string, path: string, modules: Array, children: Array, depth: number}}
*/
function buildModuleTree(modules, source) {
if (modules.length <= 1) {
return {
name: 'src',
path: 'src',
modules,
children: [],
depth: 0,
};
}
// Extract string tokens from each module's content.
const moduleTokens = modules.map((m) => {
const tokens = new Set();
const re = /["']([a-zA-Z_]\w{2,30})["']/g;
let match;
while ((match = re.exec(m.content)) !== null) {
tokens.add(match[1]);
}
return tokens;
});
// Build adjacency: weight = number of shared tokens.
const weights = new Map();
for (let i = 0; i < modules.length; i++) {
for (let j = i + 1; j < modules.length; j++) {
let shared = 0;
for (const tok of moduleTokens[i]) {
if (moduleTokens[j].has(tok)) shared++;
}
if (shared > 0) {
weights.set(`${i}:${j}`, shared);
}
}
}
// Agglomerative clustering.
let clusters = modules.map((_, i) => [i]);
while (clusters.length > 3) {
let bestI = 0, bestJ = 1, bestW = -1;
for (let i = 0; i < clusters.length; i++) {
for (let j = i + 1; j < clusters.length; j++) {
const w = clusterWeight(clusters[i], clusters[j], weights);
const norm = w / (clusters[i].length + clusters[j].length);
if (norm > bestW) {
bestW = norm;
bestI = i;
bestJ = j;
}
}
}
if (bestW <= 0) break;
const merged = [...clusters[bestI], ...clusters[bestJ]];
clusters.splice(bestJ, 1);
clusters.splice(bestI, 1);
clusters.push(merged);
}
// Name each cluster from discriminative tokens.
const children = clusters.map((group) => {
const groupModules = group.map((i) => modules[i]);
const name = inferGroupName(group, moduleTokens, modules);
return {
name,
path: `src/${name}`,
modules: groupModules,
children: [],
depth: 1,
};
});
return {
name: 'src',
path: 'src',
modules: [],
children,
depth: 0,
};
}
/** Compute total shared-token weight between two clusters. */
function clusterWeight(a, b, weights) {
let total = 0;
for (const ai of a) {
for (const bi of b) {
const key = ai < bi ? `${ai}:${bi}` : `${bi}:${ai}`;
total += weights.get(key) || 0;
}
}
return total;
}
/** Infer a group name from discriminative tokens. */
function inferGroupName(group, moduleTokens, modules) {
const freq = new Map();
for (const i of group) {
for (const tok of moduleTokens[i]) {
freq.set(tok, (freq.get(tok) || 0) + 1);
}
}
const globalFreq = new Map();
for (const tokens of moduleTokens) {
for (const tok of tokens) {
globalFreq.set(tok, (globalFreq.get(tok) || 0) + 1);
}
}
let best = null, bestScore = -1;
for (const [tok, count] of freq) {
const global = globalFreq.get(tok) || 0;
const score = (count / (global + 1)) * Math.log(count + 1);
if (score > bestScore && tok.length >= 3) {
bestScore = score;
best = tok;
}
}
if (best) return best.toLowerCase().replace(/[^a-z0-9_-]/g, '_');
if (group.length > 0) return modules[group[0]].name;
return 'group';
}
module.exports = { buildModuleTree };

View file

@ -0,0 +1,285 @@
/**
* statement-parser.js - Parse JavaScript source into top-level statements.
*
* Tracks brace/paren/bracket depth and string/template/regex contexts
* to split at true statement boundaries. Never splits a statement
* across modules -- a statement is atomic.
*/
'use strict';
/**
* Parse source into top-level statements by tracking brace/paren/bracket depth.
*
* A "top-level statement" ends when:
* - We encounter a semicolon at depth 0, OR
* - We encounter a closing brace that brings depth to 0 AND the next
* non-whitespace token does not continue the expression (like `=`, `.`,
* `,`, `(`, etc.) -- this avoids splitting `var { x } = obj;` or
* `obj.method()` into two statements.
*
* String literals, template literals, regex literals, and comments are
* tracked so delimiters inside them are not counted.
*
* @param {string} source
* @returns {Array<{code: string, start: number, end: number}>}
*/
function parseTopLevelStatements(source) {
const statements = [];
let depth = 0;
let start = 0;
let i = 0;
const len = source.length;
while (i < len) {
const ch = source[i];
const next = i + 1 < len ? source[i + 1] : '';
// ── Skip single-line comments ──
if (ch === '/' && next === '/') {
const eol = source.indexOf('\n', i + 2);
i = eol === -1 ? len : eol + 1;
continue;
}
// ── Skip multi-line comments ──
if (ch === '/' && next === '*') {
const end = source.indexOf('*/', i + 2);
i = end === -1 ? len : end + 2;
continue;
}
// ── Skip string literals ──
if (ch === '"' || ch === "'") {
i = skipString(source, i, ch);
continue;
}
// ── Skip template literals ──
if (ch === '`') {
i = skipTemplateLiteral(source, i);
continue;
}
// ── Skip regex literals ──
if (ch === '/' && isRegexStart(source, i)) {
i = skipRegex(source, i);
continue;
}
// ── Track depth ──
if (ch === '{' || ch === '(' || ch === '[') {
depth++;
i++;
continue;
}
if (ch === '}' || ch === ')' || ch === ']') {
depth = Math.max(0, depth - 1);
// Closing brace at depth 0 MAY be a statement boundary
if (depth === 0 && ch === '}') {
if (!isStatementBoundaryAfterBrace(source, i + 1)) {
i++;
continue;
}
const code = source.substring(start, i + 1).trim();
if (code.length > 0) {
statements.push({ code, start, end: i + 1 });
}
start = i + 1;
i++;
continue;
}
i++;
continue;
}
// ── Semicolon at depth 0 is a statement boundary ──
if (ch === ';' && depth === 0) {
const code = source.substring(start, i + 1).trim();
if (code.length > 0) {
statements.push({ code, start, end: i + 1 });
}
start = i + 1;
i++;
continue;
}
i++;
}
// Remaining code (unterminated statement)
const remaining = source.substring(start).trim();
if (remaining.length > 0) {
statements.push({ code: remaining, start, end: len });
}
return statements;
}
/**
* After a `}` at depth 0, decide whether this is truly a statement boundary.
* Returns true if it IS a boundary (next token starts a new statement).
* Returns false if the expression continues (e.g. `}.method()`, `} = obj`, etc.)
*
* @param {string} source
* @param {number} afterPos - position right after the `}`
* @returns {boolean}
*/
function isStatementBoundaryAfterBrace(source, afterPos) {
const len = source.length;
let j = afterPos;
// Skip whitespace and comments to find the next meaningful token
while (j < len) {
const c = source[j];
if (c === ' ' || c === '\t' || c === '\r' || c === '\n') {
j++;
continue;
}
if (c === '/' && j + 1 < len && source[j + 1] === '/') {
const eol = source.indexOf('\n', j + 2);
j = eol === -1 ? len : eol + 1;
continue;
}
if (c === '/' && j + 1 < len && source[j + 1] === '*') {
const end = source.indexOf('*/', j + 2);
j = end === -1 ? len : end + 2;
continue;
}
break;
}
if (j >= len) return true;
const nextChar = source[j];
// These tokens CONTINUE the expression -- NOT a statement boundary
const continuationChars = '.=,([?:&|+\\-*/%<>^~!;)';
if (continuationChars.includes(nextChar)) {
return false;
}
// Check for multi-char continuation tokens
const ahead = source.substring(j, j + 15);
if (/^(?:instanceof|in|of|from)\s/.test(ahead)) return false;
if (/^as\s/.test(ahead)) return false;
return true;
}
/**
* Skip a string literal starting at position i (where source[i] is the quote).
* @param {string} source
* @param {number} i
* @param {string} quote - the quote character
* @returns {number}
*/
function skipString(source, i, quote) {
const len = source.length;
i++;
while (i < len) {
if (source[i] === '\\') { i += 2; continue; }
if (source[i] === quote) return i + 1;
i++;
}
return len;
}
/**
* Skip a template literal starting at position i (where source[i] is backtick).
* @param {string} source
* @param {number} i
* @returns {number}
*/
function skipTemplateLiteral(source, i) {
const len = source.length;
i++;
while (i < len) {
if (source[i] === '\\') { i += 2; continue; }
if (source[i] === '`') return i + 1;
if (source[i] === '$' && i + 1 < len && source[i + 1] === '{') {
i = skipTemplateExpression(source, i + 2);
continue;
}
i++;
}
return len;
}
/**
* Skip a template expression (inside ${...}) starting after the opening ${.
* @param {string} source
* @param {number} i
* @returns {number}
*/
function skipTemplateExpression(source, i) {
const len = source.length;
let exprDepth = 1;
while (i < len && exprDepth > 0) {
const ch = source[i];
if (ch === '\\') { i += 2; continue; }
if (ch === '{') { exprDepth++; i++; continue; }
if (ch === '}') { exprDepth--; i++; continue; }
if (ch === '`') { i = skipTemplateLiteral(source, i); continue; }
if (ch === '"' || ch === "'") { i = skipString(source, i, ch); continue; }
i++;
}
return i;
}
/**
* Heuristic: is source[i] the start of a regex literal?
* @param {string} source
* @param {number} i
* @returns {boolean}
*/
function isRegexStart(source, i) {
let j = i - 1;
while (j >= 0 && (source[j] === ' ' || source[j] === '\t' || source[j] === '\n' || source[j] === '\r')) {
j--;
}
if (j < 0) return true;
const prev = source[j];
if (/[\w$)\].]/.test(prev)) return false;
return true;
}
/**
* Skip a regex literal starting at position i.
* @param {string} source
* @param {number} i
* @returns {number}
*/
function skipRegex(source, i) {
const len = source.length;
i++;
while (i < len) {
if (source[i] === '\\') { i += 2; continue; }
if (source[i] === '[') {
i++;
while (i < len && source[i] !== ']') {
if (source[i] === '\\') { i += 2; continue; }
i++;
}
i++;
continue;
}
if (source[i] === '/') {
i++;
while (i < len && /[gimsuy]/.test(source[i])) i++;
return i;
}
i++;
}
return len;
}
module.exports = { parseTopLevelStatements };

View file

@ -0,0 +1,339 @@
/**
* subcategories.js - Fine-grained module classification keywords.
*
* Each key is a hierarchical module path (e.g. 'tools/bash').
* Keywords can be plain strings (exact match) or contain '.*' for regex.
* Used by module-splitter.js to classify statements into ~30-40 modules
* instead of the original ~9 broad categories.
*/
'use strict';
// ── Fine-grained module classification ─────────────────────────────────────
const SUBCATEGORIES = {
// ── tools/* ────────────────────────────────────────────────────────────
'tools/bash': [
'BashTool', 'child_process', 'execSync', 'spawnSync', 'spawn(',
'shell.*command', 'shellArgs', 'commandLine', 'bashCommand',
'killProcess', 'processExit', 'childProcess',
],
'tools/read': [
'FileReadTool', 'ReadTool', 'readFile', 'readFileSync',
'FileRead', 'fileContents', 'readContent',
],
'tools/edit': [
'FileEditTool', 'EditTool', 'old_string', 'new_string',
'applyEdit', 'textEdit', 'replaceInFile', 'editContent',
],
'tools/write': [
'FileWriteTool', 'WriteTool', 'writeFile', 'writeFileSync',
'createFile', 'FileWrite', 'writeContent',
],
'tools/glob': [
'GlobTool', 'glob(', 'globSync', 'minimatch', 'picomatch',
'ListFilesTool', 'filePattern', 'globPattern',
],
'tools/grep': [
'GrepTool', 'ripgrep', 'SearchTool', 'searchPattern',
'contentSearch', 'grepResult', 'matchLine',
],
'tools/agent': [
'AgentTool', 'AgentOutputTool', 'subagent', 'spawnAgent',
'agentTask', 'taskResult', 'delegateTask',
],
'tools/web-fetch': [
'WebFetch', 'httpGet', 'fetchUrl', 'urlFetch',
'webRequest', 'httpRequest',
],
'tools/web-search': [
'WebSearch', 'searchResults', 'webQuery',
'searchEngine', 'searchWeb',
],
'tools/notebook': [
'NotebookEdit', 'notebook', 'jupyter', 'ipynb',
'cellOutput', 'notebookCell',
],
'tools/mcp-dispatch': [
'ToolUse', 'ToolResult',
'toolDefinition', 'toolSchema', 'inputSchema',
'toolChoice', 'toolRunner', 'dispatchTool',
],
'tools/todo': [
'TodoWrite', 'TodoRead', 'todoList', 'todoItem',
],
// ── core/* ─────────────────────────────────────────────────────────────
'core/agent-loop': [
'agentLoop', 'mainLoop', 'querySource', 'toolUseContext',
'systemPrompt', 'conversationTurn', 'assistantMessage',
'userMessage', 'messageHistory', 'handleToolUse',
'processMessage', 'runLoop', 'loopIteration',
],
'core/streaming': [
'content_block_delta', 'message_start', 'message_stop',
'message_delta', 'content_block_start', 'content_block_stop',
'text_delta', 'input_json_delta', 'StreamEvent',
'onStream', 'streamHandler', 'stream_event',
'streamResponse', 'streamingMode',
],
'core/context-manager': [
'tengu_compact', 'microcompact', 'auto_compact',
'compact_boundary', 'preCompactTokenCount',
'postCompactTokenCount', 'compaction',
'tokenCount', 'contextWindow', 'maxTokens',
'promptCache', 'cacheControl', 'truncat',
'contextOverflow', 'compactMessages',
],
'core/session': [
'sessionId', 'conversationId', 'sessionState',
'persistSession', 'checkpoint', 'resume.*session',
'restore.*session', 'turnCount', 'sessionHistory',
'saveSession', 'loadSession',
],
'core/error-handler': [
'ErrorHandler', 'errorBoundary', 'handleError',
'retryWith', 'isRetryable', 'overloaded',
'rateLimited', 'backoff', 'retryAfter',
'APIError', 'NetworkError',
],
// ── permissions/* ──────────────────────────────────────────────────────
'permissions/checker': [
'canUseTool', 'Permission', 'permission',
'allowedTools', 'permissionMode', 'isAllowed',
'checkPermission', 'grantPermission', 'allowList',
'denyList', 'alwaysAllowRules', 'denyWrite',
'permissionCheck', 'allowRule', 'denyRule',
],
'permissions/sandbox': [
'sandbox', 'bubblewrap', 'seatbelt', 'firejail',
'containerize', 'isolat', 'sandboxMode',
'seccomp', 'landlock', 'pledg',
],
'permissions/rules': [
'permissionRule', 'ruleSet', 'matchRule',
'pathRule', 'toolRule', 'readOnlyRule',
'globRule', 'regexRule',
],
// ── auth/* ─────────────────────────────────────────────────────────────
'auth/oauth': [
'OAuth', 'PKCE', 'authorization_code', 'token.*endpoint',
'refresh.*token', 'authorizationUrl', 'codeVerifier',
'codeChallenge', 'oauthFlow', 'oauthCallback',
],
'auth/api-key': [
'x-api-key', 'ANTHROPIC_API_KEY', 'apiKeyHelper',
'apiKey.*valid', 'loadApiKey',
'keyring',
],
'auth/bedrock': [
'Bedrock', 'BedrockRuntime', 'aws.*region',
'awsProfile', 'sigv4', 'awsCredentials',
],
'auth/vertex': [
'Vertex', 'vertex.*ai', 'google.*cloud',
'googleAuth', 'serviceAccount', 'vertexProject',
],
// ── mcp/* ──────────────────────────────────────────────────────────────
'mcp/client': [
'McpClient', 'mcp.*connect', 'mcp.*initialize',
'mcpConnection', 'mcp_client', 'connectMcp',
],
'mcp/transport': [
'StdioTransport', 'SseTransport', 'StreamableHttp',
'McpTransport', 'transport.*type', 'transportLayer',
'stdio.*transport', 'websocket.*transport',
],
'mcp/protocol': [
'jsonrpc', 'tools/list', 'tools/call',
'resources/list', 'prompts/list', 'McpError',
'mcp__', 'McpServer', 'mcp_server',
'callTool', 'listTools',
],
'mcp/servers': [
'mcpServers', 'serverConfig', 'serverList',
'registeredServers', 'spawnServer', 'serverProcess',
],
// ── config/* ───────────────────────────────────────────────────────────
'config/settings': [
'settings.*json', 'loadSettings', 'saveSettings',
'userSettings', 'Settings', 'configuration',
'loadConfig', 'parseConfig',
],
'config/env-vars': [
'CLAUDE_CODE_', 'ANTHROPIC_',
'envVar', 'dotenv', 'loadEnv',
],
'config/models': [
'modelId', 'modelName', 'model.*select',
'mainLoopModel', 'availableModels', 'modelOverrides',
'modelPreference', 'defaultModel',
],
'config/feature-flags': [
'featureFlag', 'isEnabled', 'flagValue',
'experimentId', 'feature.*gate', 'rollout',
'featureEnabled', 'featureConfig',
],
// ── telemetry/* ────────────────────────────────────────────────────────
'telemetry/otel': [
'opentelemetry', 'OTEL_', 'TraceProvider',
'SpanProcessor', 'tracing', 'span',
'tracer', 'otelExporter',
],
'telemetry/datadog': [
'datadog', 'DD_', 'ddTrace', 'datadogExporter',
],
'telemetry/events': [
'tengu_', 'trackEvent', 'analytics',
'Telemetry', 'sentry',
'eventEmit', 'emitEvent', 'telemetryEvent',
],
'telemetry/cost': [
'cost', 'tokenUsage', 'inputTokens', 'outputTokens',
'cacheRead', 'cacheCreation', 'pricing',
'costTracker', 'usageMetrics',
],
'telemetry/perfetto': [
'perfetto', 'perfTrace', 'traceBegin',
'traceEnd', 'traceCounter',
],
// ── ui/* ────────────────────────────────────────────────────────────────
'ui/slash-commands': [
'slashCommand', 'registerCommand', 'commandHandler',
'parseCommand', '/help', '/clear', '/compact',
'/bug', '/init', '/login', '/logout',
'/doctor', '/config', '/cost', '/memory',
],
'ui/ink-components': [
'useInput', 'useFocus', 'useApp', 'useStdin', 'useStdout',
'inkRenderer', 'InkProvider', 'measureElement',
],
'ui/keybindings': [
'keybinding', 'keyHandler', 'hotkey',
'onKeyPress', 'keyMap', 'shortcut',
],
'ui/terminal': [
'ansiColor', 'chalk', 'stripAnsi',
'cursorMove', 'clearLine', 'terminalWidth',
'isTerminal', 'ttyColumns',
],
// ── model-provider/* ───────────────────────────────────────────────────
'model-provider/anthropic': [
'anthropic', 'Anthropic', 'claude-', 'claude_',
'messagesCreate', 'AnthropicClient',
],
'model-provider/openai': [
'openai', 'OpenAI', 'chatCompletion',
'gpt-', 'openAiClient',
],
'model-provider/router': [
'provider', 'routeModel', 'selectProvider',
'providerConfig', 'modelRouter',
],
// ── git/* ──────────────────────────────────────────────────────────────
'git/operations': [
'gitDiff', 'gitStatus', 'gitLog', 'gitCommit',
'gitAdd', 'gitBranch', 'gitCheckout',
'isGitRepo', 'getGitRoot', 'gitStash',
],
// ── filesystem/* ───────────────────────────────────────────────────────
'filesystem/operations': [
'readdirSync', 'mkdirSync', 'statSync', 'lstatSync',
'renameSync', 'unlinkSync', 'copyFileSync',
'existsSync', 'realpathSync', 'accessSync',
'fs.readdir', 'fs.mkdir', 'fs.stat', 'fs.lstat',
],
// ── network/* ──────────────────────────────────────────────────────────
'network/http': [
'http.*request', 'https.*request', 'fetch(',
'axios', 'got(', 'requestOptions',
'responseBody', 'statusCode',
],
};
// ── String-literal patterns for minified code ─────────────────────────────
// Minified bundles mangle identifiers but preserve string literals.
// These patterns match quoted strings commonly found in each domain.
// Each pattern is matched against the raw code (not just identifiers).
const STRING_PATTERNS = {
'tools/bash': ['"bash"', '"shell"', '"command"', '"child_process"', '"spawn"', '"BashTool"'],
'tools/read': ['"FileReadTool"', '"ReadFileTool"', '"cat "', '"readFile"'],
'tools/edit': ['"FileEditTool"', '"old_string"', '"new_string"', '"EditFileTool"'],
'tools/write': ['"FileWriteTool"', '"WriteFileTool"', '"createFile"'],
'tools/glob': ['"GlobTool"', '"ListFilesTool"', '"glob"', '"minimatch"'],
'tools/grep': ['"GrepTool"', '"ripgrep"', '"rg "', '"SearchTool"'],
'tools/agent': ['"AgentTool"', '"Task"', '"subagent"'],
'tools/web-fetch': ['"WebFetchTool"', '"url_fetch"'],
'tools/web-search': ['"WebSearchTool"', '"web_search"'],
'tools/notebook': ['"NotebookEditTool"', '"ipynb"', '"jupyter"'],
'tools/mcp-dispatch': ['"inputSchema"', '"toolSchema"', '"toolDefinition"'],
'tools/todo': ['"TodoWriteTool"', '"TodoReadTool"'],
'core/agent-loop': ['"assistant"', '"user"', '"system"', '"systemPrompt"', '"messageHistory"'],
'core/streaming': [
'"content_block_delta"', '"message_start"', '"message_stop"',
'"message_delta"', '"content_block_start"', '"content_block_stop"',
'"text_delta"', '"input_json_delta"', '"stream_event"',
],
'core/context-manager': [
'"tengu_compact"', '"auto_compact"', '"compact"',
'"contextWindow"', '"maxTokens"', '"cacheControl"',
],
'core/session': ['"sessionId"', '"conversationId"', '"checkpoint"', '"resume"'],
'core/error-handler': ['"overloaded"', '"rate_limit"', '"retryAfter"', '"APIError"'],
'permissions/checker': [
'"canUseTool"', '"permission"', '"allowedTools"',
'"permissionMode"', '"alwaysAllow"',
],
'permissions/sandbox': ['"sandbox"', '"bubblewrap"', '"seatbelt"', '"firejail"'],
'auth/oauth': ['"OAuth"', '"PKCE"', '"authorization_code"', '"refresh_token"', '"code_verifier"'],
'auth/api-key': ['"x-api-key"', '"ANTHROPIC_API_KEY"', '"apiKeyHelper"'],
'auth/bedrock': ['"bedrock"', '"BedrockRuntime"', '"aws-region"'],
'auth/vertex': ['"vertex"', '"vertexai"', '"google-cloud"'],
'mcp/client': ['"McpClient"', '"mcp_client"'],
'mcp/transport': ['"stdio"', '"sse"', '"streamable-http"', '"StdioTransport"'],
'mcp/protocol': ['"jsonrpc"', '"tools/list"', '"tools/call"', '"resources/list"', '"mcp__"'],
'mcp/servers': ['"mcpServers"', '"serverConfig"'],
'config/settings': ['"settings.json"', '"userSettings"', '".claude"'],
'config/env-vars': ['"CLAUDE_CODE_"', '"ANTHROPIC_"', '"CLAUDE_CONFIG"', '"CLAUDE_SKIP"'],
'config/models': ['"modelId"', '"claude-sonnet"', '"claude-opus"', '"claude-haiku"'],
'config/feature-flags': ['"featureFlag"', '"experiment"', '"rollout"'],
'telemetry/otel': ['"opentelemetry"', '"OTEL_"', '"TraceProvider"'],
'telemetry/datadog': ['"datadog"', '"DD_TRACE"'],
'telemetry/events': ['"tengu_"', '"trackEvent"', '"analytics"', '"telemetryEvent"'],
'telemetry/cost': ['"inputTokens"', '"outputTokens"', '"cacheRead"', '"cacheCreation"'],
'ui/slash-commands': ['"/help"', '"/clear"', '"/compact"', '"/bug"', '"/init"', '"/doctor"'],
'ui/ink-components': ['"useInput"', '"useFocus"', '"useApp"', '"inkRenderer"'],
'ui/keybindings': ['"keybinding"', '"shortcut"', '"hotkey"'],
'ui/terminal': ['"chalk"', '"stripAnsi"', '"ansiColor"'],
'model-provider/anthropic': ['"anthropic"', '"claude-"', '"Anthropic"', '"messages"'],
'model-provider/openai': ['"openai"', '"gpt-"', '"chatCompletion"'],
'git/operations': ['"git diff"', '"git status"', '"git log"', '"git commit"'],
'network/http': ['"Content-Type"', '"application/json"', '"Authorization"'],
};
// ── Legacy MODULE_KEYWORDS alias ───────────────────────────────────────────
// Maps old broad categories for backward compat.
const MODULE_KEYWORDS = {
'tool-dispatch': SUBCATEGORIES['tools/mcp-dispatch'],
'permission-system': SUBCATEGORIES['permissions/checker'],
'mcp-client': SUBCATEGORIES['mcp/protocol'],
'streaming-handler': SUBCATEGORIES['core/streaming'],
'context-manager': SUBCATEGORIES['core/context-manager'],
'agent-loop': SUBCATEGORIES['core/agent-loop'],
'commands': SUBCATEGORIES['ui/slash-commands'],
'telemetry': SUBCATEGORIES['telemetry/events'],
'config': SUBCATEGORIES['config/settings'],
'session': SUBCATEGORIES['core/session'],
'model-provider': SUBCATEGORIES['model-provider/anthropic'],
};
module.exports = { SUBCATEGORIES, MODULE_KEYWORDS, STRING_PATTERNS };