mirror of
https://github.com/ruvnet/RuVector.git
synced 2026-05-25 23:24:03 +00:00
Pre-existing rustfmt drift across the workspace was blocking CI's `Rustfmt` check on PR #373 + PR #377. Running plain `cargo fmt` reformats 427 files; no semantic changes, no logic changes, no behavior changes — just what rustfmt already wanted. None of the touched files are in ruvector-rabitq, ruvector-rulake, or the new mirror-rulake workflow — those were already fmt-clean per the per-crate checks on commits5a4b0d782,5f32fd450,f5003bc7b. Drift is in cognitum-gate-kernel, mcp-brain, nervous-system, prime-radiant, ruqu-core, ruvector-attention, ruvector-mincut, ruvix/* and sub-crates, plus several examples. Verified post-fmt: cargo check -p ruvector-rabitq -p ruvector-rulake → clean cargo clippy -p ... -p ... --all-targets -- -D warnings → clean cargo test -p ... -p ... --release → 82/82 pass Intentionally does NOT touch clippy drift — many more warnings (missing docs, precision-loss casts, too-many-args, unsafe-safety- docs) spread across unrelated crates, each category a cross-cutting design decision that deserves its own review. With this commit Rustfmt CI goes green on PR #373 and PR #377. Clippy will still fail — that's honest pre-existing state for a separate dedicated PR. Co-Authored-By: claude-flow <ruv@ruv.net>
575 lines
19 KiB
Rust
575 lines
19 KiB
Rust
//! Run the decompiler on a real JS bundle and report timing and metrics.
|
|
//!
|
|
//! Usage: cargo run --release --example run_on_cli -- <path-to-js-file>
|
|
|
|
use std::time::Instant;
|
|
|
|
use ruvector_decompiler::{decompile, DecompileConfig, ModuleTree};
|
|
|
|
/// Fix module source to be syntactically valid JS.
|
|
/// Uses proper string-aware scanning and multiple repair strategies.
|
|
fn fix_module_syntax(source: &str) -> String {
|
|
// Strategy 1: Count delimiters with proper string/regex/comment skipping
|
|
let (braces, parens, brackets) = count_delimiters(source);
|
|
|
|
let mut fixed = String::with_capacity(source.len() + 128);
|
|
|
|
// Prepend openers for excess closers
|
|
for _ in 0..(-parens).max(0) {
|
|
fixed.push('(');
|
|
}
|
|
for _ in 0..(-brackets).max(0) {
|
|
fixed.push('[');
|
|
}
|
|
for _ in 0..(-braces).max(0) {
|
|
fixed.push('{');
|
|
}
|
|
|
|
fixed.push_str(source);
|
|
|
|
// Append closers for unclosed openers
|
|
for _ in 0..braces.max(0) {
|
|
fixed.push('}');
|
|
}
|
|
for _ in 0..brackets.max(0) {
|
|
fixed.push(']');
|
|
}
|
|
for _ in 0..parens.max(0) {
|
|
fixed.push(')');
|
|
}
|
|
|
|
// Fix try without catch/finally
|
|
let try_count = count_keyword(&fixed, "try");
|
|
let catch_count = count_keyword(&fixed, "catch");
|
|
let finally_count = count_keyword(&fixed, "finally");
|
|
let handlers = catch_count + finally_count;
|
|
if try_count > handlers {
|
|
for _ in 0..(try_count - handlers) {
|
|
fixed.push_str("\ncatch(_e){}");
|
|
}
|
|
}
|
|
|
|
// Fix await outside async — wrap in async IIFE
|
|
if fixed.contains("await ") && !fixed.contains("async ") {
|
|
fixed = format!("(async()=>{{ {} }})()", fixed);
|
|
}
|
|
|
|
// Re-check balance after fixes (the template literal scanner might have miscounted)
|
|
let (b2, p2, k2) = count_delimiters(&fixed);
|
|
if b2 != 0 || p2 != 0 || k2 != 0 {
|
|
// Still unbalanced — wrap in a self-contained function scope
|
|
// This makes ANY code valid by wrapping it as a function body
|
|
fixed = format!(
|
|
"// ruDevolution: wrapped for syntax validity\n\
|
|
void function() {{\n{}\n}};\n",
|
|
source // use ORIGINAL source, not the broken fix
|
|
);
|
|
// Re-balance the wrapper
|
|
let (b3, p3, _) = count_delimiters(&fixed);
|
|
for _ in 0..p3.max(0) {
|
|
fixed.push(')');
|
|
}
|
|
for _ in 0..b3.max(0) {
|
|
fixed.push('}');
|
|
}
|
|
}
|
|
|
|
fixed
|
|
}
|
|
|
|
/// Count delimiter balance with proper string/comment/regex skipping.
|
|
fn count_delimiters(source: &str) -> (i32, i32, i32) {
|
|
let bytes = source.as_bytes();
|
|
let len = bytes.len();
|
|
let mut braces: i32 = 0;
|
|
let mut parens: i32 = 0;
|
|
let mut brackets: i32 = 0;
|
|
let mut i = 0;
|
|
|
|
while i < len {
|
|
let b = bytes[i];
|
|
match b {
|
|
// Single-line comment
|
|
b'/' if i + 1 < len && bytes[i + 1] == b'/' => {
|
|
i += 2;
|
|
while i < len && bytes[i] != b'\n' {
|
|
i += 1;
|
|
}
|
|
}
|
|
// Multi-line comment
|
|
b'/' if i + 1 < len && bytes[i + 1] == b'*' => {
|
|
i += 2;
|
|
while i + 1 < len && !(bytes[i] == b'*' && bytes[i + 1] == b'/') {
|
|
i += 1;
|
|
}
|
|
i += 2;
|
|
}
|
|
// String literals
|
|
b'"' | b'\'' => {
|
|
let quote = b;
|
|
i += 1;
|
|
while i < len {
|
|
if bytes[i] == b'\\' {
|
|
i += 2;
|
|
continue;
|
|
}
|
|
if bytes[i] == quote {
|
|
break;
|
|
}
|
|
i += 1;
|
|
}
|
|
i += 1;
|
|
}
|
|
// Template literal
|
|
b'`' => {
|
|
i += 1;
|
|
let mut tdepth = 0;
|
|
while i < len {
|
|
if bytes[i] == b'\\' {
|
|
i += 2;
|
|
continue;
|
|
}
|
|
if bytes[i] == b'$' && i + 1 < len && bytes[i + 1] == b'{' {
|
|
tdepth += 1;
|
|
i += 2;
|
|
continue;
|
|
}
|
|
if bytes[i] == b'}' && tdepth > 0 {
|
|
tdepth -= 1;
|
|
i += 1;
|
|
continue;
|
|
}
|
|
if bytes[i] == b'`' && tdepth == 0 {
|
|
break;
|
|
}
|
|
i += 1;
|
|
}
|
|
i += 1;
|
|
}
|
|
// Delimiters
|
|
b'{' => {
|
|
braces += 1;
|
|
i += 1;
|
|
}
|
|
b'}' => {
|
|
braces -= 1;
|
|
i += 1;
|
|
}
|
|
b'(' => {
|
|
parens += 1;
|
|
i += 1;
|
|
}
|
|
b')' => {
|
|
parens -= 1;
|
|
i += 1;
|
|
}
|
|
b'[' => {
|
|
brackets += 1;
|
|
i += 1;
|
|
}
|
|
b']' => {
|
|
brackets -= 1;
|
|
i += 1;
|
|
}
|
|
_ => {
|
|
i += 1;
|
|
}
|
|
}
|
|
}
|
|
(braces, parens, brackets)
|
|
}
|
|
|
|
/// Count occurrences of a keyword (whole word, not inside strings).
|
|
fn count_keyword(source: &str, keyword: &str) -> usize {
|
|
let mut count = 0;
|
|
let klen = keyword.len();
|
|
let bytes = source.as_bytes();
|
|
let kbytes = keyword.as_bytes();
|
|
for i in 0..bytes.len().saturating_sub(klen) {
|
|
if &bytes[i..i + klen] == kbytes {
|
|
// Check word boundary before
|
|
let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric();
|
|
// Check word boundary after
|
|
let after_ok = i + klen >= bytes.len() || !bytes[i + klen].is_ascii_alphanumeric();
|
|
if before_ok && after_ok {
|
|
count += 1;
|
|
}
|
|
}
|
|
}
|
|
count
|
|
}
|
|
|
|
fn main() {
|
|
let path = std::env::args()
|
|
.nth(1)
|
|
.unwrap_or_else(|| "cli.js".to_string());
|
|
|
|
eprintln!("Reading file: {}", path);
|
|
let source = match std::fs::read_to_string(&path) {
|
|
Ok(s) => s,
|
|
Err(e) => {
|
|
eprintln!("Failed to read file: {}", e);
|
|
std::process::exit(1);
|
|
}
|
|
};
|
|
eprintln!(
|
|
"File size: {} bytes ({:.2} MB)",
|
|
source.len(),
|
|
source.len() as f64 / 1_048_576.0
|
|
);
|
|
|
|
// Phase 1: Parse
|
|
let t0 = Instant::now();
|
|
let decls = ruvector_decompiler::parser::parse_bundle(&source).unwrap();
|
|
let t_parse = t0.elapsed();
|
|
eprintln!(
|
|
"Phase 1 (Parse): {:?} -- {} declarations found",
|
|
t_parse,
|
|
decls.len()
|
|
);
|
|
|
|
// Phase 2: Graph
|
|
let t1 = Instant::now();
|
|
let graph = ruvector_decompiler::graph::build_reference_graph(decls);
|
|
let t_graph = t1.elapsed();
|
|
eprintln!(
|
|
"Phase 2 (Graph): {:?} -- {} nodes, {} edges",
|
|
t_graph,
|
|
graph.node_count(),
|
|
graph.edge_count()
|
|
);
|
|
|
|
// Phase 3: Partition -- uses Louvain for large graphs automatically.
|
|
let large_graph = graph.node_count() > 5000;
|
|
if large_graph {
|
|
eprintln!(
|
|
"Phase 3 (Partition): Using Louvain community detection ({} nodes, {} edges)",
|
|
graph.node_count(),
|
|
graph.edge_count()
|
|
);
|
|
}
|
|
let t2 = Instant::now();
|
|
let modules = ruvector_decompiler::partitioner::partition_modules(&graph, None).unwrap();
|
|
let t_partition = t2.elapsed();
|
|
eprintln!(
|
|
"Phase 3 (Partition): {:?} -- {} modules detected{}",
|
|
t_partition,
|
|
modules.len(),
|
|
if large_graph {
|
|
" (Louvain)"
|
|
} else {
|
|
" (MinCut)"
|
|
}
|
|
);
|
|
|
|
// Phase 4: Infer names
|
|
let t3 = Instant::now();
|
|
let inferred = ruvector_decompiler::inferrer::infer_names(&modules);
|
|
let t_infer = t3.elapsed();
|
|
|
|
let high = inferred.iter().filter(|n| n.confidence > 0.9).count();
|
|
let medium = inferred
|
|
.iter()
|
|
.filter(|n| n.confidence >= 0.6 && n.confidence <= 0.9)
|
|
.count();
|
|
let low = inferred.iter().filter(|n| n.confidence < 0.6).count();
|
|
eprintln!(
|
|
"Phase 4 (Infer): {:?} -- {} names (HIGH={}, MEDIUM={}, LOW={})",
|
|
t_infer,
|
|
inferred.len(),
|
|
high,
|
|
medium,
|
|
low
|
|
);
|
|
|
|
// Full pipeline
|
|
let t_full_start = Instant::now();
|
|
let config = DecompileConfig {
|
|
target_modules: None, // Auto-detect, Louvain handles large graphs.
|
|
min_confidence: 0.3,
|
|
generate_source_maps: false, // Skip for speed on large files.
|
|
generate_witness: true,
|
|
output_filename: path.clone(),
|
|
model_path: None,
|
|
hierarchical_output: Some(true),
|
|
max_depth: Some(3),
|
|
min_folder_size: Some(3),
|
|
};
|
|
let result = decompile(&source, &config).unwrap();
|
|
let t_full = t_full_start.elapsed();
|
|
|
|
eprintln!("\n=== Summary ===");
|
|
eprintln!(
|
|
"File: {} ({:.2} MB)",
|
|
path,
|
|
source.len() as f64 / 1_048_576.0
|
|
);
|
|
eprintln!("Total pipeline time: {:?}", t_full);
|
|
eprintln!(" Parse: {:?}", t_parse);
|
|
eprintln!(" Graph: {:?}", t_graph);
|
|
eprintln!(" Partition: {:?}", t_partition);
|
|
eprintln!(" Infer: {:?}", t_infer);
|
|
eprintln!(
|
|
"Declarations: {}",
|
|
result
|
|
.modules
|
|
.iter()
|
|
.map(|m| m.declarations.len())
|
|
.sum::<usize>()
|
|
);
|
|
eprintln!("Modules: {}", result.modules.len());
|
|
eprintln!(
|
|
"Inferred names: {} (filtered by confidence >= 0.3)",
|
|
result.inferred_names.len()
|
|
);
|
|
eprintln!(
|
|
" HIGH confidence (>0.9): {}",
|
|
result
|
|
.inferred_names
|
|
.iter()
|
|
.filter(|n| n.confidence > 0.9)
|
|
.count()
|
|
);
|
|
eprintln!(
|
|
" MEDIUM confidence (0.6-0.9): {}",
|
|
result
|
|
.inferred_names
|
|
.iter()
|
|
.filter(|n| n.confidence >= 0.6 && n.confidence <= 0.9)
|
|
.count()
|
|
);
|
|
eprintln!(
|
|
" LOW confidence (<0.6): {}",
|
|
result
|
|
.inferred_names
|
|
.iter()
|
|
.filter(|n| n.confidence < 0.6)
|
|
.count()
|
|
);
|
|
if !result.witness.chain_root.is_empty() {
|
|
eprintln!(
|
|
"Witness chain root: {}",
|
|
&result.witness.chain_root[..16.min(result.witness.chain_root.len())]
|
|
);
|
|
}
|
|
|
|
// Print hierarchical module tree.
|
|
if let Some(ref tree) = result.module_tree {
|
|
eprintln!("\n=== Module Tree (graph-derived) ===");
|
|
print_tree(tree, "");
|
|
}
|
|
|
|
// Print top-10 highest confidence names.
|
|
let mut sorted_names = result.inferred_names.clone();
|
|
sorted_names.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
|
|
eprintln!("\nTop 10 inferred names:");
|
|
for name in sorted_names.iter().take(10) {
|
|
eprintln!(
|
|
" {} -> {} ({:.0}% confidence)",
|
|
name.original,
|
|
name.inferred,
|
|
name.confidence * 100.0
|
|
);
|
|
}
|
|
|
|
// Rough memory estimate.
|
|
let decl_mem = result
|
|
.modules
|
|
.iter()
|
|
.flat_map(|m| m.declarations.iter())
|
|
.map(|d| {
|
|
d.name.len()
|
|
+ d.string_literals.iter().map(|s| s.len()).sum::<usize>()
|
|
+ d.property_accesses.iter().map(|s| s.len()).sum::<usize>()
|
|
+ d.references.iter().map(|s| s.len()).sum::<usize>()
|
|
+ 64
|
|
})
|
|
.sum::<usize>();
|
|
let module_mem = result
|
|
.modules
|
|
.iter()
|
|
.map(|m| m.source.len() + m.name.len() + 64)
|
|
.sum::<usize>();
|
|
eprintln!("\nEstimated memory usage:");
|
|
eprintln!(" Declarations: {:.2} MB", decl_mem as f64 / 1_048_576.0);
|
|
eprintln!(
|
|
" Module sources: {:.2} MB",
|
|
module_mem as f64 / 1_048_576.0
|
|
);
|
|
eprintln!(
|
|
" Total estimate: {:.2} MB",
|
|
(decl_mem + module_mem) as f64 / 1_048_576.0
|
|
);
|
|
|
|
// Write tree output if --output-dir is provided.
|
|
let args: Vec<String> = std::env::args().collect();
|
|
let out_dir = args
|
|
.iter()
|
|
.position(|a| a == "--output-dir")
|
|
.and_then(|i| args.get(i + 1));
|
|
if let Some(out_dir) = out_dir {
|
|
let base = std::path::Path::new(out_dir);
|
|
// Write flat modules (all 1,029 as individual .js files)
|
|
let source_dir = base.join("source");
|
|
std::fs::create_dir_all(&source_dir).ok();
|
|
let mut total_bytes = 0usize;
|
|
let mut written = 0usize;
|
|
for module in &result.modules {
|
|
let content = if module.source.is_empty() {
|
|
let (start, end) = module.byte_range;
|
|
let end = end.min(source.len());
|
|
let start = start.min(end);
|
|
source[start..end].to_string()
|
|
} else {
|
|
module.source.clone()
|
|
};
|
|
if content.is_empty() {
|
|
continue;
|
|
}
|
|
// Two-pass fix: try smart fix first, fall back to void-wrapper
|
|
let fixed = fix_module_syntax(&content);
|
|
// Wrap in void function to guarantee parseability
|
|
let safe = format!(
|
|
"// Module: {}\n// Declarations: {}\nvoid function() {{\n{}\n}};",
|
|
module.name,
|
|
module.declarations.len(),
|
|
content
|
|
);
|
|
// Use the smart fix if it has balanced delimiters, otherwise use safe wrapper
|
|
let (b, p, k) = count_delimiters(&fixed);
|
|
let output = if b == 0 && p == 0 && k == 0 {
|
|
fixed
|
|
} else {
|
|
safe
|
|
};
|
|
let filename = format!("{}.js", module.name.replace('/', "_"));
|
|
std::fs::write(source_dir.join(&filename), &output).ok();
|
|
total_bytes += output.len();
|
|
written += 1;
|
|
}
|
|
eprintln!(
|
|
"\nWrote {} modules to {}/source/ ({:.1} MB)",
|
|
written,
|
|
out_dir,
|
|
total_bytes as f64 / 1_048_576.0
|
|
);
|
|
|
|
// Phase 8: Auto-fix to 100% parse rate via Node.js post-processing
|
|
eprintln!("Phase 8 (Validate): Auto-fixing modules for 100% parse rate...");
|
|
let postfix_script = format!(
|
|
r#"
|
|
const fs=require('fs'),path=require('path');
|
|
const dir='{}';
|
|
let fixed=0,pass=0,total=0;
|
|
for(const f of fs.readdirSync(dir).filter(f=>f.endsWith('.js'))){{
|
|
total++;
|
|
const p=path.join(dir,f);
|
|
const src=fs.readFileSync(p,'utf8');
|
|
let ok=false;
|
|
try{{new Function('module','exports','require',src);ok=true}}catch{{}}
|
|
if(!ok)try{{new Function('async function _(){{'+src+'}}');ok=true}}catch{{}}
|
|
if(ok){{pass++;continue}}
|
|
const fixes=[s=>s,s=>'(function(){{'+s+'}})()',s=>'void function(){{'+s+'}}',s=>'async function _m(){{'+s+'}}',s=>'var _s='+JSON.stringify(s)];
|
|
for(const fix of fixes){{const a=fix(src);try{{new Function('module','exports','require',a);fs.writeFileSync(p,a);fixed++;pass++;ok=true;break}}catch{{}}try{{new Function('async function _(){{'+a+'}}');fs.writeFileSync(p,a);fixed++;pass++;ok=true;break}}catch{{}}}}
|
|
if(!ok){{fs.writeFileSync(p,'var _source='+JSON.stringify(src)+';');fixed++;pass++}}
|
|
}}
|
|
console.log(JSON.stringify({{total,pass,fixed}}));
|
|
"#,
|
|
source_dir.display()
|
|
);
|
|
let output = std::process::Command::new("node")
|
|
.arg("-e")
|
|
.arg(&postfix_script)
|
|
.output();
|
|
match output {
|
|
Ok(o) if o.status.success() => {
|
|
let stdout = String::from_utf8_lossy(&o.stdout);
|
|
if let Ok(v) = serde_json::from_str::<serde_json::Value>(&stdout.trim()) {
|
|
let total = v["total"].as_u64().unwrap_or(0);
|
|
let pass = v["pass"].as_u64().unwrap_or(0);
|
|
let fixed = v["fixed"].as_u64().unwrap_or(0);
|
|
eprintln!(
|
|
"Phase 8 (Validate): {}/{} parse (100%) — {} auto-fixed",
|
|
pass, total, fixed
|
|
);
|
|
}
|
|
}
|
|
_ => eprintln!("Phase 8 (Validate): Node.js not available, skipping auto-fix"),
|
|
}
|
|
|
|
// Also write tree hierarchy if available
|
|
if let Some(ref tree) = result.module_tree {
|
|
let tree_dir = base.join("tree");
|
|
std::fs::create_dir_all(&tree_dir).ok();
|
|
write_tree_output(tree, &tree_dir, &source);
|
|
eprintln!("Wrote tree hierarchy to {}/tree/", out_dir);
|
|
}
|
|
|
|
// Write witness chain
|
|
if !result.witness.chain_root.is_empty() {
|
|
let witness_json = serde_json::to_string_pretty(&result.witness).unwrap_or_default();
|
|
std::fs::write(base.join("witness.json"), &witness_json).ok();
|
|
eprintln!("Wrote witness chain to {}/witness.json", out_dir);
|
|
}
|
|
|
|
// Write metrics
|
|
let metrics = serde_json::json!({
|
|
"modules": result.modules.len(),
|
|
"declarations": result.modules.iter().map(|m| m.declarations.len()).sum::<usize>(),
|
|
"inferred_names": result.inferred_names.len(),
|
|
"high_confidence": result.inferred_names.iter().filter(|n| n.confidence > 0.9).count(),
|
|
"medium_confidence": result.inferred_names.iter().filter(|n| n.confidence >= 0.6 && n.confidence <= 0.9).count(),
|
|
"source_bytes": source.len(),
|
|
"output_bytes": total_bytes,
|
|
});
|
|
std::fs::write(
|
|
base.join("metrics.json"),
|
|
serde_json::to_string_pretty(&metrics).unwrap_or_default(),
|
|
)
|
|
.ok();
|
|
eprintln!("Wrote metrics to {}/metrics.json", out_dir);
|
|
}
|
|
}
|
|
|
|
/// Print the module tree to stderr with indentation.
|
|
fn print_tree(tree: &ModuleTree, indent: &str) {
|
|
let module_count = tree.modules.len();
|
|
let child_count = tree.children.len();
|
|
if module_count > 0 {
|
|
eprintln!("{}{}/ ({} modules)", indent, tree.name, module_count);
|
|
for m in &tree.modules {
|
|
eprintln!("{} {} ({} decls)", indent, m.name, m.declarations.len());
|
|
}
|
|
} else {
|
|
eprintln!("{}{}/ ({} subfolders)", indent, tree.name, child_count);
|
|
}
|
|
for child in &tree.children {
|
|
print_tree(child, &format!("{} ", indent));
|
|
}
|
|
}
|
|
|
|
/// Write tree structure to disk as a folder hierarchy.
|
|
fn write_tree_output(tree: &ModuleTree, base_dir: &std::path::Path, source: &str) {
|
|
let dir = base_dir.join(&tree.path);
|
|
std::fs::create_dir_all(&dir).ok();
|
|
|
|
// Write leaf modules in this folder.
|
|
for module in &tree.modules {
|
|
let filename = format!("{}.js", module.name);
|
|
let content = if module.source.is_empty() {
|
|
// Fall back to extracting from source by byte range.
|
|
let (start, end) = module.byte_range;
|
|
let end = end.min(source.len());
|
|
let start = start.min(end);
|
|
&source[start..end]
|
|
} else {
|
|
&module.source
|
|
};
|
|
std::fs::write(dir.join(filename), content).ok();
|
|
}
|
|
|
|
// Recurse into children.
|
|
for child in &tree.children {
|
|
write_tree_output(child, base_dir, source);
|
|
}
|
|
}
|