ruvector/crates/ruvector-decompiler/tests/model_decompiler.rs
rUv 7704c94624 feat(decompiler): LLM weight decompiler + API prober (ADR-138)
Model weight decompilation:
- GGUF v2/v3 parser (self-contained, no ruvllm dep)
- Safetensors JSON header parser
- Architecture inference from tensor shapes (GQA, FFN, vocab)
- Tokenizer extraction, quantization detection
- Witness chain for model provenance
- 6 integration tests, behind `model` feature flag

API probing (live tested):
- Probes Claude, OpenAI, Gemini APIs without weight access
- Detects: streaming, tools, system_prompt, vision capabilities
- Measures: latency, tokens/sec, tokenizer type
- Model fingerprinting via self-identification + math tests
- Verified: Gemini 2.0 Flash (556ms, 46 tok/s, all caps detected)

CLI: npx ruvector decompile --model file.gguf
     npx ruvector decompile --api gemini-2.0-flash

78 Rust tests passing.

Co-Authored-By: claude-flow <ruv@ruv.net>
2026-04-03 19:08:30 +00:00

280 lines
10 KiB
Rust

//! Integration tests for LLM model weight decompilation.
//!
//! These tests create minimal valid GGUF and Safetensors files inline
//! to verify the full decompilation pipeline without external models.
#![cfg(feature = "model")]
use std::collections::HashMap;
use std::io::Write;
use std::path::PathBuf;
use ruvector_decompiler::model_decompiler;
use ruvector_decompiler::model_types::*;
// ── Helpers for building test files ──────────────────────────────────────
/// Build a minimal valid GGUF v3 file with LLaMA-like structure.
fn build_test_gguf() -> Vec<u8> {
let mut buf = Vec::new();
// Header: magic, version, tensor_count, metadata_count
buf.extend_from_slice(&0x46554747u32.to_le_bytes()); // GGUF magic
buf.extend_from_slice(&3u32.to_le_bytes()); // version 3
buf.extend_from_slice(&5u64.to_le_bytes()); // 5 tensors
buf.extend_from_slice(&4u64.to_le_bytes()); // 4 metadata entries
// Metadata 1: general.architecture = "llama"
write_kv_string(&mut buf, "general.architecture", "llama");
// Metadata 2: llama.embedding_length = 256
write_kv_u32(&mut buf, "llama.embedding_length", 256);
// Metadata 3: llama.block_count = 2
write_kv_u32(&mut buf, "llama.block_count", 2);
// Metadata 4: llama.attention.head_count = 4
write_kv_u32(&mut buf, "llama.attention.head_count", 4);
// Tensor 1: token_embd.weight [512, 256] F32
write_tensor_info(&mut buf, "token_embd.weight", &[512, 256], 0, 0);
// Tensor 2: blk.0.attn_q.weight [256, 256] F32
write_tensor_info(&mut buf, "blk.0.attn_q.weight", &[256, 256], 0, 524288);
// Tensor 3: blk.0.attn_k.weight [64, 256] F32
write_tensor_info(&mut buf, "blk.0.attn_k.weight", &[64, 256], 0, 786432);
// Tensor 4: blk.0.ffn_up.weight [1024, 256] F32
write_tensor_info(&mut buf, "blk.0.ffn_up.weight", &[1024, 256], 0, 851968);
// Tensor 5: blk.1.attn_q.weight [256, 256] F32
write_tensor_info(&mut buf, "blk.1.attn_q.weight", &[256, 256], 0, 1900544);
buf
}
fn write_kv_string(buf: &mut Vec<u8>, key: &str, value: &str) {
// Key string: length (u64) + bytes
buf.extend_from_slice(&(key.len() as u64).to_le_bytes());
buf.extend_from_slice(key.as_bytes());
// Value type: 8 = String
buf.extend_from_slice(&8u32.to_le_bytes());
// Value string: length (u64) + bytes
buf.extend_from_slice(&(value.len() as u64).to_le_bytes());
buf.extend_from_slice(value.as_bytes());
}
fn write_kv_u32(buf: &mut Vec<u8>, key: &str, value: u32) {
buf.extend_from_slice(&(key.len() as u64).to_le_bytes());
buf.extend_from_slice(key.as_bytes());
// Value type: 4 = U32
buf.extend_from_slice(&4u32.to_le_bytes());
buf.extend_from_slice(&value.to_le_bytes());
}
fn write_tensor_info(buf: &mut Vec<u8>, name: &str, shape: &[u64], dtype: u32, offset: u64) {
// Name string
buf.extend_from_slice(&(name.len() as u64).to_le_bytes());
buf.extend_from_slice(name.as_bytes());
// n_dims
buf.extend_from_slice(&(shape.len() as u32).to_le_bytes());
// Shape dimensions
for dim in shape {
buf.extend_from_slice(&dim.to_le_bytes());
}
// dtype
buf.extend_from_slice(&dtype.to_le_bytes());
// offset
buf.extend_from_slice(&offset.to_le_bytes());
}
fn write_test_file(name: &str, data: &[u8]) -> PathBuf {
let path = std::env::temp_dir().join(name);
let mut f = std::fs::File::create(&path).unwrap();
f.write_all(data).unwrap();
path
}
// ── GGUF tests ───────────────────────────────────────────────────────────
#[test]
fn test_decompile_gguf_basic() {
let data = build_test_gguf();
let path = write_test_file("test_decompile.gguf", &data);
let result = model_decompiler::decompile_gguf(&path).unwrap();
// Format check
match &result.format {
ModelFormat::Gguf { version } => assert_eq!(*version, 3),
_ => panic!("expected GGUF format"),
}
// Architecture
assert_eq!(result.architecture.name, "llama");
assert_eq!(result.architecture.hidden_size, 256);
assert_eq!(result.architecture.num_layers, 2);
assert_eq!(result.architecture.num_heads, 4);
assert_eq!(result.architecture.vocab_size, 512);
// KV heads: K proj is [64, 256], head_dim = 256/4 = 64, so kv_heads = 64/64 = 1
assert_eq!(result.architecture.num_kv_heads, 1);
// FFN size from ffn_up: [1024, 256] -> intermediate = 1024
assert_eq!(result.architecture.intermediate_size, 1024);
// Total params
let expected_params = 512 * 256 + 256 * 256 + 64 * 256 + 1024 * 256 + 256 * 256;
assert_eq!(result.architecture.total_params, expected_params);
// Layers should include embedding, attention, MLP
assert!(!result.layers.is_empty());
assert!(result.layers.iter().any(|l| matches!(l.layer_type, LayerType::Embedding)));
assert!(result.layers.iter().any(|l| matches!(l.layer_type, LayerType::Attention { .. })));
assert!(result.layers.iter().any(|l| matches!(l.layer_type, LayerType::Mlp { .. })));
// Witness chain
assert!(!result.witness.source_hash.is_empty());
assert!(!result.witness.chain_root.is_empty());
assert!(!result.witness.module_witnesses.is_empty());
// Metadata
assert_eq!(
result.metadata.get("general.architecture").map(|s| s.as_str()),
Some("llama")
);
let _ = std::fs::remove_file(&path);
}
#[test]
fn test_decompile_gguf_invalid_magic() {
let mut data = vec![0u8; 24];
// Wrong magic
data[..4].copy_from_slice(&0xDEADBEEFu32.to_le_bytes());
let path = write_test_file("test_bad_magic.gguf", &data);
let result = model_decompiler::decompile_gguf(&path);
assert!(result.is_err());
assert!(format!("{}", result.unwrap_err()).contains("not a GGUF file"));
let _ = std::fs::remove_file(&path);
}
// ── Safetensors tests ────────────────────────────────────────────────────
#[test]
fn test_decompile_safetensors_basic() {
let header = serde_json::json!({
"model.embed_tokens.weight": {
"dtype": "F16",
"shape": [32000, 4096],
"data_offsets": [0, 262144000]
},
"model.layers.0.self_attn.q_proj.weight": {
"dtype": "F16",
"shape": [4096, 4096],
"data_offsets": [262144000, 295698432]
},
"model.layers.0.self_attn.k_proj.weight": {
"dtype": "F16",
"shape": [1024, 4096],
"data_offsets": [295698432, 304087040]
},
"model.layers.0.mlp.gate_proj.weight": {
"dtype": "F16",
"shape": [14336, 4096],
"data_offsets": [304087040, 421527552]
},
"model.layers.1.self_attn.q_proj.weight": {
"dtype": "F16",
"shape": [4096, 4096],
"data_offsets": [421527552, 455081984]
},
"__metadata__": {
"format": "pt",
"architecture": "llama"
}
});
let header_bytes = serde_json::to_vec(&header).unwrap();
let header_len = header_bytes.len() as u64;
let mut file_bytes = Vec::new();
file_bytes.extend_from_slice(&header_len.to_le_bytes());
file_bytes.extend_from_slice(&header_bytes);
let path = write_test_file("test_decompile.safetensors", &file_bytes);
let result = model_decompiler::decompile_safetensors(&path).unwrap();
match &result.format {
ModelFormat::Safetensors => {}
_ => panic!("expected Safetensors format"),
}
assert_eq!(result.architecture.name, "llama");
assert_eq!(result.architecture.hidden_size, 4096);
assert_eq!(result.architecture.num_layers, 2);
assert_eq!(result.architecture.vocab_size, 32000);
assert_eq!(result.architecture.num_heads, 32);
// K proj [1024, 4096], head_dim=128, kv_heads = 1024/128 = 8
assert_eq!(result.architecture.num_kv_heads, 8);
assert_eq!(result.architecture.intermediate_size, 14336);
// No tokenizer in safetensors
assert!(result.tokenizer.is_none());
// Witness chain
assert!(!result.witness.source_hash.is_empty());
let _ = std::fs::remove_file(&path);
}
// ── Auto-detect tests ────────────────────────────────────────────────────
#[test]
fn test_decompile_model_auto_detect_gguf() {
let data = build_test_gguf();
let path = write_test_file("test_auto.gguf", &data);
let result = model_decompiler::decompile_model(&path).unwrap();
assert!(matches!(result.format, ModelFormat::Gguf { .. }));
let _ = std::fs::remove_file(&path);
}
#[test]
fn test_decompile_model_unsupported_ext() {
let path = std::env::temp_dir().join("test_bad.xyz");
std::fs::write(&path, b"garbage").unwrap();
let result = model_decompiler::decompile_model(&path);
assert!(result.is_err());
assert!(format!("{}", result.unwrap_err()).contains("unsupported model format"));
let _ = std::fs::remove_file(&path);
}
// ── Quantization tests ───────────────────────────────────────────────────
#[test]
fn test_quantization_detection() {
// Build a GGUF with Q4_K tensors (type 12)
let mut buf = Vec::new();
buf.extend_from_slice(&0x46554747u32.to_le_bytes());
buf.extend_from_slice(&3u32.to_le_bytes());
buf.extend_from_slice(&2u64.to_le_bytes()); // 2 tensors
buf.extend_from_slice(&1u64.to_le_bytes()); // 1 metadata
write_kv_string(&mut buf, "general.architecture", "llama");
// Tensor 1: token_embd.weight [100, 64] F16 (type 1)
write_tensor_info(&mut buf, "token_embd.weight", &[100, 64], 1, 0);
// Tensor 2: blk.0.attn_q.weight [64, 64] Q4_K (type 12)
write_tensor_info(&mut buf, "blk.0.attn_q.weight", &[64, 64], 12, 12800);
let path = write_test_file("test_quant.gguf", &buf);
let result = model_decompiler::decompile_gguf(&path).unwrap();
let quant = result.quantization.unwrap();
assert_eq!(quant.method, "Q4_K");
assert!(quant.bits_per_weight > 0.0);
assert!(quant.compression_ratio > 1.0);
let _ = std::fs::remove_file(&path);
}