diff --git a/npm/ruvector/.npmignore b/npm/ruvector/.npmignore deleted file mode 100644 index 338974344..000000000 --- a/npm/ruvector/.npmignore +++ /dev/null @@ -1,49 +0,0 @@ -# Source files -src/ -*.ts -!*.d.ts - -# Build config -tsconfig.json -tsconfig.*.json -.tsup/ - -# Development -node_modules/ -.git/ -.github/ -.gitignore -examples/ - -# Test files -*.test.js -*.test.ts -*.spec.js -*.spec.ts -test-*.js -coverage/ - -# Logs and temp files -*.log -*.tmp -.DS_Store -.cache/ -*.tsbuildinfo - -# CI/CD -.travis.yml -.gitlab-ci.yml -azure-pipelines.yml -.circleci/ - -# Documentation (keep README.md) -docs/ -*.md -!README.md - -# Editor -.vscode/ -.idea/ -*.swp -*.swo -*~ diff --git a/npm/ruvector/README.md b/npm/ruvector/README.md deleted file mode 100644 index bbf1f89d3..000000000 --- a/npm/ruvector/README.md +++ /dev/null @@ -1,707 +0,0 @@ -# RuVector - -[![MIT License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) -[![npm](https://img.shields.io/npm/v/ruvector.svg)](https://www.npmjs.com/package/ruvector) -[![npm downloads](https://img.shields.io/npm/dm/ruvector.svg)](https://www.npmjs.com/package/ruvector) -[![TypeScript](https://img.shields.io/badge/TypeScript-Ready-blue.svg)](https://www.typescriptlang.org/) -[![Node.js](https://img.shields.io/badge/Node.js-16+-green.svg)](https://nodejs.org/) - -**A distributed vector database that learns.** Store embeddings, query with Cypher, scale horizontally, and let the index improve itself through Graph Neural Networks. - -```bash -npx ruvector -``` - -> **All-in-One Package**: The `ruvector` package includes everything β€” vector search, graph queries, GNN layers, AI agent routing, and WASM support. No additional packages needed. - -## Why RuVector? - -Traditional vector databases just store and search. When you ask "find similar items," they return results but never get smarter. They can't handle complex relationships. They don't optimize your AI costs. - -**RuVector is built for the agentic AI era:** - -| Challenge | RuVector Solution | -|-----------|-------------------| -| RAG retrieval quality plateaus | **Self-learning GNN** improves results over time | -| Knowledge graphs need separate DB | **Cypher queries** built-in (Neo4j syntax) | -| LLM costs spiral out of control | **AI Router** sends simple queries to cheaper models | -| Memory usage explodes at scale | **Adaptive compression** (2-32x reduction) | -| Can't run AI in the browser | **Full WASM support** for client-side inference | - -## Quick Start - -### Installation - -```bash -# Install the package -npm install ruvector - -# Or try instantly without installing -npx ruvector - -# With yarn -yarn add ruvector - -# With pnpm -pnpm add ruvector -``` - -### Basic Vector Search - -```javascript -const { VectorDB } = require('ruvector'); - -// Create a vector database (384 = OpenAI ada-002 dimensions) -const db = new VectorDB(384); - -// Insert vectors with metadata -await db.insert('doc1', embedding1, { - title: 'Introduction to AI', - category: 'tech', - date: '2024-01-15' -}); - -// Semantic search -const results = await db.search(queryEmbedding, 10); - -// Filter by metadata -const filtered = await db.search(queryEmbedding, 10, { - category: 'tech', - date: { $gte: '2024-01-01' } -}); -``` - -### RAG (Retrieval-Augmented Generation) - -```javascript -const { VectorDB } = require('ruvector'); -const OpenAI = require('openai'); - -const db = new VectorDB(1536); // text-embedding-3-small dimensions -const openai = new OpenAI(); - -// Index your documents -async function indexDocument(doc) { - const embedding = await openai.embeddings.create({ - model: 'text-embedding-3-small', - input: doc.content - }); - await db.insert(doc.id, embedding.data[0].embedding, { - title: doc.title, - content: doc.content - }); -} - -// RAG query -async function ragQuery(question) { - // 1. Embed the question - const questionEmb = await openai.embeddings.create({ - model: 'text-embedding-3-small', - input: question - }); - - // 2. Retrieve relevant context - const context = await db.search(questionEmb.data[0].embedding, 5); - - // 3. Generate answer with context - const response = await openai.chat.completions.create({ - model: 'gpt-4-turbo', - messages: [{ - role: 'user', - content: `Context:\n${context.map(c => c.metadata.content).join('\n\n')} - -Question: ${question} -Answer based only on the context above:` - }] - }); - - return response.choices[0].message.content; -} -``` - -### Knowledge Graphs (Cypher) - -```javascript -const { GraphDB } = require('ruvector'); - -const graph = new GraphDB(); - -// Create entities and relationships -graph.execute(` - CREATE (alice:Person {name: 'Alice', role: 'Engineer'}) - CREATE (bob:Person {name: 'Bob', role: 'Manager'}) - CREATE (techcorp:Company {name: 'TechCorp', industry: 'AI'}) - CREATE (alice)-[:WORKS_AT {since: 2022}]->(techcorp) - CREATE (bob)-[:WORKS_AT {since: 2020}]->(techcorp) - CREATE (alice)-[:REPORTS_TO]->(bob) -`); - -// Query relationships -const team = graph.execute(` - MATCH (p:Person)-[:WORKS_AT]->(c:Company {name: 'TechCorp'}) - RETURN p.name, p.role -`); - -// Find paths -const chain = graph.execute(` - MATCH path = (a:Person {name: 'Alice'})-[:REPORTS_TO*1..3]->(manager) - RETURN path -`); - -// Combine with vector search -const similarPeople = graph.execute(` - MATCH (p:Person) - WHERE vector.similarity(p.embedding, $queryEmbedding) > 0.8 - RETURN p ORDER BY vector.similarity(p.embedding, $queryEmbedding) DESC - LIMIT 10 -`); -``` - -### GNN-Enhanced Search (Self-Learning) - -```javascript -const { GNNLayer, VectorDB } = require('ruvector'); - -// Create GNN layer for query enhancement -const gnn = new GNNLayer(384, 512, 4); // input_dim, output_dim, num_heads - -// The GNN learns from your search patterns -async function enhancedSearch(query) { - // Get initial results - const neighbors = await db.search(query, 20); - - // Compute attention weights based on user clicks/relevance - const weights = computeRelevanceWeights(neighbors); - - // GNN enhances the query using graph structure - const enhancedQuery = gnn.forward(query, - neighbors.map(n => n.embedding), - weights - ); - - // Re-rank with enhanced understanding - return db.search(enhancedQuery, 10); -} - -// Train on user feedback -gnn.train({ - queries: historicalQueries, - clicks: userClickData, - relevance: expertLabels -}, { epochs: 100 }); -``` - -### AI Agent Routing (Tiny Dancer) - -Route queries to the optimal LLM based on complexity β€” save 60-80% on API costs: - -```javascript -const { Router } = require('ruvector'); - -const router = new Router({ - confidenceThreshold: 0.85, - maxUncertainty: 0.15, - enableCircuitBreaker: true -}); - -// Define your model candidates -const models = [ - { id: 'gpt-4-turbo', embedding: gpt4Emb, cost: 0.03, quality: 0.95 }, - { id: 'gpt-3.5-turbo', embedding: gpt35Emb, cost: 0.002, quality: 0.80 }, - { id: 'claude-3-haiku', embedding: haikuEmb, cost: 0.001, quality: 0.75 }, - { id: 'llama-3-8b', embedding: llamaEmb, cost: 0.0005, quality: 0.70 } -]; - -async function smartComplete(prompt) { - const promptEmb = await embed(prompt); - - // Router decides optimal model - const decision = router.route(promptEmb, models); - - console.log(`Routing to ${decision.candidateId} (confidence: ${decision.confidence})`); - // Output: "Routing to gpt-3.5-turbo (confidence: 0.92)" - - // Call the selected model - return callModel(decision.candidateId, prompt); -} -``` - -### Compression (2-32x Memory Savings) - -```javascript -const { compress, decompress, CompressionTier } = require('ruvector'); - -// Automatic tier selection -const auto = compress(embedding, 0.3); // 30% quality threshold - -// Explicit tiers -const f16 = compress(embedding, CompressionTier.F16); // 2x compression -const pq8 = compress(embedding, CompressionTier.PQ8); // 8x compression -const pq4 = compress(embedding, CompressionTier.PQ4); // 16x compression -const binary = compress(embedding, CompressionTier.Binary); // 32x compression - -// Adaptive tiering based on access frequency -db.enableAdaptiveCompression({ - hotThreshold: 0.8, // Keep hot data in f32 - warmThreshold: 0.4, // Compress to f16 - coldThreshold: 0.1, // Compress to PQ8 - archiveThreshold: 0.01 // Compress to binary -}); -``` - -## CLI Usage - -```bash -# Show system info and backend status -npx ruvector info - -# Initialize a new index -npx ruvector init my-index --dimension 384 --type hnsw - -# Insert vectors from JSON/JSONL -npx ruvector insert my-index vectors.json -npx ruvector insert my-index vectors.jsonl --format jsonl - -# Search with a query -npx ruvector search my-index --query "[0.1, 0.2, ...]" -k 10 -npx ruvector search my-index --text "machine learning" -k 10 # Auto-embed - -# Show index statistics -npx ruvector stats my-index - -# Run performance benchmarks -npx ruvector benchmark --dimension 384 --num-vectors 10000 - -# Export/import -npx ruvector export my-index backup.bin -npx ruvector import backup.bin restored-index -``` - -## Self-Learning Hooks (Claude Code Integration) - -RuVector includes a self-learning intelligence layer that improves AI agent decisions over time. These hooks integrate with Claude Code and other AI development tools. - -### Setup - -```bash -# Initialize hooks in your project -npx ruvector hooks init -``` - -### Hook Commands - -```bash -# Session Management -ruvector hooks session-start # Start session tracking -ruvector hooks session-end # End session with export - -# Pre/Post Edit Hooks -ruvector hooks pre-edit # Get agent suggestions before editing -ruvector hooks post-edit --success # Record edit outcomes - -# Pre/Post Command Hooks -ruvector hooks pre-command "cargo test" # Analyze command before running -ruvector hooks post-command "cargo test" --success # Record command outcomes - -# Intelligence -ruvector hooks stats # Show learning statistics -ruvector hooks route # Get agent routing suggestion -ruvector hooks suggest-context # Get context suggestions - -# Memory -ruvector hooks remember -t # Store in vector memory -ruvector hooks recall # Semantic search memory -``` - -### How It Works - -The intelligence system uses: -- **Q-Learning**: Learns optimal agent routing from past successes/failures -- **Vector Memory**: Semantic storage with cosine similarity search -- **File Sequences**: Predicts related files based on edit patterns -- **Error Patterns**: Remembers fixes for common errors - -### Example Output - -``` -🧠 Intelligence Analysis: - πŸ“ ruvector-core/lib.rs - πŸ€– Recommended: rust-developer (80% confidence) - β†’ learned from past success - πŸ“š Similar: 3 past edits - πŸ“Ž Related: mod.rs, tests.rs - πŸ’¬ ⚑ Core lib: run cargo test --lib after changes -``` - -### Claude Code Integration - -Add to your `.claude/settings.json`: - -```json -{ - "hooks": { - "PreToolUse": [{ "command": "ruvector hooks pre-edit $file" }], - "PostToolUse": [{ "command": "ruvector hooks post-edit $file --success" }], - "SessionStart": [{ "command": "ruvector hooks session-start" }], - "Stop": [{ "command": "ruvector hooks session-end" }] - } -} -``` - -### Learning Data - -| Storage | Contents | -|---------|----------| -| `.ruvector/intelligence.json` | Q-table, memories, trajectories | -| Patterns | State-action values for agent routing | -| Memories | Vector embeddings for semantic recall | -| Trajectories | Learning history for continuous improvement | - -## Integrations - -### LangChain - -```javascript -const { RuVectorStore } = require('ruvector/langchain'); -const { OpenAIEmbeddings } = require('@langchain/openai'); - -const vectorStore = new RuVectorStore( - new OpenAIEmbeddings(), - { dimension: 1536 } -); - -await vectorStore.addDocuments(documents); -const results = await vectorStore.similaritySearch("query", 5); -``` - -### LlamaIndex - -```javascript -const { RuVectorIndex } = require('ruvector/llamaindex'); - -const index = new RuVectorIndex({ - dimension: 384, - enableGNN: true -}); - -await index.insert(documents); -const queryEngine = index.asQueryEngine(); -const response = await queryEngine.query("What is machine learning?"); -``` - -### OpenAI / Anthropic - -```javascript -const { createEmbedder } = require('ruvector'); - -// OpenAI -const openaiEmbed = createEmbedder('openai', { - model: 'text-embedding-3-small' -}); - -// Anthropic (via Voyage) -const anthropicEmbed = createEmbedder('voyage', { - model: 'voyage-2' -}); - -// Cohere -const cohereEmbed = createEmbedder('cohere', { - model: 'embed-english-v3.0' -}); -``` - -## Benchmarks - -| Operation | Dimensions | Time | Throughput | -|-----------|------------|------|------------| -| **HNSW Search (k=10)** | 384 | 61Β΅s | 16,400 QPS | -| **HNSW Search (k=100)** | 384 | 164Β΅s | 6,100 QPS | -| **Cosine Similarity** | 1536 | 143ns | 7M ops/sec | -| **Dot Product** | 384 | 33ns | 30M ops/sec | -| **Insert** | 384 | 20Β΅s | 50,000/sec | -| **GNN Forward** | 384β†’512 | 89Β΅s | 11,200/sec | -| **Compression (PQ8)** | 384 | 12Β΅s | 83,000/sec | - -Run your own benchmarks: -```bash -npx ruvector benchmark --dimension 384 --num-vectors 100000 -``` - -## Comparison - -| Feature | RuVector | Pinecone | Qdrant | ChromaDB | Milvus | Weaviate | -|---------|----------|----------|--------|----------|--------|----------| -| **Latency (p50)** | **61Β΅s** | ~2ms | ~1ms | ~50ms | ~5ms | ~3ms | -| **Graph Queries** | βœ… Cypher | ❌ | ❌ | ❌ | ❌ | βœ… GraphQL | -| **Self-Learning** | βœ… GNN | ❌ | ❌ | ❌ | ❌ | ❌ | -| **AI Routing** | βœ… | ❌ | ❌ | ❌ | ❌ | ❌ | -| **Browser/WASM** | βœ… | ❌ | ❌ | ❌ | ❌ | ❌ | -| **Compression** | 2-32x | ❌ | βœ… | ❌ | βœ… | βœ… | -| **Hybrid Search** | βœ… | βœ… | βœ… | ❌ | βœ… | βœ… | -| **Multi-tenancy** | βœ… | βœ… | βœ… | βœ… | βœ… | βœ… | -| **Open Source** | βœ… MIT | ❌ | βœ… Apache | βœ… Apache | βœ… Apache | βœ… BSD | -| **Pricing** | Free | $70+/mo | Free | Free | Free | Free | - -## npm Packages - -| Package | Description | -|---------|-------------| -| [`ruvector`](https://www.npmjs.com/package/ruvector) | **All-in-one package (recommended)** | -| [`@ruvector/wasm`](https://www.npmjs.com/package/@ruvector/wasm) | Browser/WASM bindings | -| [`@ruvector/graph`](https://www.npmjs.com/package/@ruvector/graph) | Graph database with Cypher | -| [`@ruvector/gnn`](https://www.npmjs.com/package/@ruvector/gnn) | Graph Neural Network layers | -| [`@ruvector/tiny-dancer`](https://www.npmjs.com/package/@ruvector/tiny-dancer) | AI agent routing (FastGRNN) | -| [`@ruvector/router`](https://www.npmjs.com/package/@ruvector/router) | Semantic routing engine | - -```bash -# Install all-in-one (recommended) -npm install ruvector - -# Or install specific packages -npm install @ruvector/graph @ruvector/gnn -``` - -## API Reference - -### VectorDB - -```typescript -class VectorDB { - constructor(dimension: number, options?: VectorDBOptions); - - // CRUD operations - insert(id: string, values: number[], metadata?: object): Promise; - insertBatch(vectors: Vector[], options?: BatchOptions): Promise; - get(id: string): Promise; - update(id: string, values?: number[], metadata?: object): Promise; - delete(id: string): Promise; - - // Search - search(query: number[], k?: number, filter?: Filter): Promise; - hybridSearch(query: number[], text: string, k?: number): Promise; - - // Persistence - save(path: string): Promise; - static load(path: string): Promise; - - // Management - stats(): Promise; - optimize(): Promise; - clear(): Promise; -} -``` - -### GraphDB - -```typescript -class GraphDB { - constructor(options?: GraphDBOptions); - - // Cypher execution - execute(cypher: string, params?: object): QueryResult; - - // Direct API - createNode(label: string, properties: object): string; - createRelationship(from: string, to: string, type: string, props?: object): void; - createHyperedge(nodeIds: string[], type: string, props?: object): string; - - // Traversal - shortestPath(from: string, to: string): Path | null; - neighbors(nodeId: string, depth?: number): Node[]; -} -``` - -### GNNLayer - -```typescript -class GNNLayer { - constructor(inputDim: number, outputDim: number, numHeads: number); - - // Inference - forward(query: number[], neighbors: number[][], weights: number[]): number[]; - - // Training - train(data: TrainingData, config?: TrainingConfig): TrainingMetrics; - save(path: string): void; - static load(path: string): GNNLayer; -} -``` - -### Router - -```typescript -class Router { - constructor(config?: RouterConfig); - - // Routing - route(query: number[], candidates: Candidate[]): RoutingDecision; - routeBatch(queries: number[][], candidates: Candidate[]): RoutingDecision[]; - - // Management - reloadModel(): void; - circuitBreakerStatus(): 'closed' | 'open' | 'half-open'; - resetCircuitBreaker(): void; -} -``` - -## Use Cases - -### Agentic AI / Multi-Agent Systems - -```javascript -// Route tasks to specialized agents -const agents = [ - { id: 'researcher', embedding: researchEmb, capabilities: ['search', 'summarize'] }, - { id: 'coder', embedding: codeEmb, capabilities: ['code', 'debug'] }, - { id: 'analyst', embedding: analysisEmb, capabilities: ['data', 'visualize'] } -]; - -const taskEmb = await embed("Write a Python script to analyze sales data"); -const decision = router.route(taskEmb, agents); -// Routes to 'coder' agent with high confidence -``` - -### Recommendation Systems - -```javascript -const recommendations = graph.execute(` - MATCH (user:User {id: $userId})-[:VIEWED]->(item:Product) - MATCH (item)-[:SIMILAR_TO]->(rec:Product) - WHERE NOT (user)-[:VIEWED]->(rec) - AND vector.similarity(rec.embedding, $userPreference) > 0.7 - RETURN rec - ORDER BY vector.similarity(rec.embedding, $userPreference) DESC - LIMIT 10 -`); -``` - -### Semantic Caching - -```javascript -const cache = new VectorDB(1536); - -async function cachedLLMCall(prompt) { - const promptEmb = await embed(prompt); - - // Check semantic cache - const cached = await cache.search(promptEmb, 1); - if (cached[0]?.score > 0.95) { - return cached[0].metadata.response; // Cache hit - } - - // Cache miss - call LLM - const response = await llm.complete(prompt); - await cache.insert(generateId(), promptEmb, { prompt, response }); - - return response; -} -``` - -### Document Q&A with Sources - -```javascript -async function qaWithSources(question) { - const results = await db.search(await embed(question), 5); - - const answer = await llm.complete({ - prompt: `Answer based on these sources:\n${results.map(r => - `[${r.id}] ${r.metadata.content}` - ).join('\n')}\n\nQuestion: ${question}`, - }); - - return { - answer, - sources: results.map(r => ({ - id: r.id, - title: r.metadata.title, - relevance: r.score - })) - }; -} -``` - -## Architecture - -``` -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ ruvector β”‚ -β”‚ (All-in-One npm Package) β”‚ -β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ -β”‚ VectorDB β”‚ GraphDB β”‚ GNNLayer β”‚ Router β”‚ -β”‚ (Search) β”‚ (Cypher) β”‚ (ML) β”‚ (AI Routing) β”‚ -β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ -β”‚ Rust Core Engine β”‚ -β”‚ β€’ HNSW Index β€’ Cypher Parser β€’ Attention β€’ FastGRNN β”‚ -β”‚ β€’ SIMD Ops β€’ Hyperedges β€’ Training β€’ Uncertainty β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - β”‚ - β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” - β”‚ β”‚ β”‚ - β”Œβ”€β”€β”€β”€β–Όβ”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β–Όβ”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β–Όβ”€β”€β”€β”€β” - β”‚ Native β”‚ β”‚ WASM β”‚ β”‚ FFI β”‚ - β”‚(napi-rs)β”‚ β”‚(wasm32) β”‚ β”‚ (C) β”‚ - β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - β”‚ β”‚ β”‚ - β”Œβ”€β”€β”€β”€β–Όβ”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β–Όβ”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β–Όβ”€β”€β”€β”€β” - β”‚ Node.js β”‚ β”‚ Browser β”‚ β”‚ Python β”‚ - β”‚ Bun β”‚ β”‚ Deno β”‚ β”‚ Go β”‚ - β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ -``` - -## Platform Support - -| Platform | Backend | Installation | -|----------|---------|--------------| -| **Node.js 16+** | Native (napi-rs) | `npm install ruvector` | -| **Node.js (fallback)** | WASM | Automatic if native fails | -| **Bun** | Native | `bun add ruvector` | -| **Deno** | WASM | `import from "npm:ruvector"` | -| **Browser** | WASM | `npm install @ruvector/wasm` | -| **Cloudflare Workers** | WASM | `npm install @ruvector/wasm` | -| **Vercel Edge** | WASM | `npm install @ruvector/wasm` | - -## Documentation - -- [Getting Started Guide](https://github.com/ruvnet/ruvector/blob/main/docs/guide/GETTING_STARTED.md) -- [Cypher Reference](https://github.com/ruvnet/ruvector/blob/main/docs/api/CYPHER_REFERENCE.md) -- [GNN Architecture](https://github.com/ruvnet/ruvector/blob/main/docs/gnn-layer-implementation.md) -- [Performance Tuning](https://github.com/ruvnet/ruvector/blob/main/docs/optimization/PERFORMANCE_TUNING_GUIDE.md) -- [API Reference](https://github.com/ruvnet/ruvector/tree/main/docs/api) - -## Contributing - -```bash -# Clone repository -git clone https://github.com/ruvnet/ruvector.git -cd ruvector - -# Install dependencies -npm install - -# Run tests -npm test - -# Build -npm run build - -# Benchmarks -npm run bench -``` - -See [CONTRIBUTING.md](https://github.com/ruvnet/ruvector/blob/main/docs/development/CONTRIBUTING.md) for guidelines. - -## License - -MIT License β€” free for commercial and personal use. - ---- - -
- -**Built by [rUv](https://ruv.io)** β€’ [GitHub](https://github.com/ruvnet/ruvector) β€’ [npm](https://npmjs.com/package/ruvector) - -*Vector search that gets smarter over time.* - -**[⭐ Star on GitHub](https://github.com/ruvnet/ruvector)** if RuVector helps your project! - -
diff --git a/npm/ruvector/bin/ruvector.js b/npm/ruvector/bin/ruvector.js deleted file mode 100755 index efbb2989b..000000000 --- a/npm/ruvector/bin/ruvector.js +++ /dev/null @@ -1,1150 +0,0 @@ -#!/usr/bin/env node - -/** - * rUvector CLI - * - * Beautiful command-line interface for vector database operations - * Includes: Vector Search, Graph/Cypher, GNN, Compression, and more - */ - -const { Command } = require('commander'); -const chalk = require('chalk'); -const ora = require('ora'); -const Table = require('cli-table3'); -const fs = require('fs').promises; -const path = require('path'); - -// Lazy load backends to improve startup time -let vectorBackend = null; -let graphBackend = null; -let gnnBackend = null; - -function getVectorBackend() { - if (!vectorBackend) { - try { - const { VectorIndex, getBackendInfo, Utils } = require('../dist/index.js'); - vectorBackend = { VectorIndex, getBackendInfo, Utils }; - } catch (e) { - console.error(chalk.red('Vector backend not available:', e.message)); - process.exit(1); - } - } - return vectorBackend; -} - -function getGraphBackend() { - if (!graphBackend) { - try { - graphBackend = require('@ruvector/graph-node'); - } catch (e) { - try { - graphBackend = require('@ruvector/graph-wasm'); - } catch (e2) { - return null; - } - } - } - return graphBackend; -} - -function getGnnBackend() { - if (!gnnBackend) { - try { - gnnBackend = require('@ruvector/gnn-node'); - } catch (e) { - try { - gnnBackend = require('@ruvector/gnn-wasm'); - } catch (e2) { - return null; - } - } - } - return gnnBackend; -} - -const program = new Command(); - -// Utility to format numbers -function formatNumber(num) { - if (num >= 1_000_000) { - return `${(num / 1_000_000).toFixed(2)}M`; - } else if (num >= 1_000) { - return `${(num / 1_000).toFixed(2)}K`; - } - return num.toString(); -} - -// Utility to format bytes -function formatBytes(bytes) { - if (bytes >= 1_073_741_824) { - return `${(bytes / 1_073_741_824).toFixed(2)} GB`; - } else if (bytes >= 1_048_576) { - return `${(bytes / 1_048_576).toFixed(2)} MB`; - } else if (bytes >= 1_024) { - return `${(bytes / 1_024).toFixed(2)} KB`; - } - return `${bytes} B`; -} - -// Utility to format duration -function formatDuration(ms) { - if (ms >= 1000) { - return `${(ms / 1000).toFixed(2)}s`; - } - return `${ms.toFixed(2)}ms`; -} - -// Doctor command - diagnose installation -program - .command('doctor') - .description('Diagnose installation and check all dependencies') - .action(async () => { - console.log(chalk.bold.cyan('\n🩺 rUvector Doctor - Diagnosing Installation\n')); - - const checks = []; - - // Check Node.js version - const nodeVersion = process.version; - const nodeMajor = parseInt(nodeVersion.slice(1).split('.')[0]); - checks.push({ - name: 'Node.js Version', - status: nodeMajor >= 16 ? 'pass' : 'fail', - message: nodeVersion, - hint: nodeMajor < 16 ? 'Requires Node.js >= 16.0.0' : null - }); - - // Check core package - let coreVersion = null; - try { - const core = require('@ruvector/core'); - coreVersion = typeof core.version === 'function' ? core.version() : (core.version || 'installed'); - checks.push({ - name: '@ruvector/core', - status: 'pass', - message: coreVersion - }); - } catch (e) { - checks.push({ - name: '@ruvector/core', - status: 'warn', - message: 'Not installed (using fallback)', - hint: 'npm install @ruvector/core' - }); - } - - // Check graph package (Node.js native) - try { - const graph = require('@ruvector/graph-node'); - checks.push({ - name: '@ruvector/graph-node', - status: 'pass', - message: graph.version || 'installed' - }); - } catch (e) { - // Check WASM fallback - try { - const graphWasm = require('@ruvector/graph-wasm'); - checks.push({ - name: '@ruvector/graph-wasm', - status: 'pass', - message: graphWasm.version || 'installed (WASM)' - }); - } catch (e2) { - checks.push({ - name: 'Graph Module', - status: 'warn', - message: 'Not installed', - hint: 'npm install @ruvector/graph-node' - }); - } - } - - // Check GNN package (Node.js native) - try { - const gnn = require('@ruvector/gnn-node'); - checks.push({ - name: '@ruvector/gnn-node', - status: 'pass', - message: gnn.version || 'installed' - }); - } catch (e) { - // Check WASM fallback - try { - const gnnWasm = require('@ruvector/gnn-wasm'); - checks.push({ - name: '@ruvector/gnn-wasm', - status: 'pass', - message: gnnWasm.version || 'installed (WASM)' - }); - } catch (e2) { - checks.push({ - name: 'GNN Module', - status: 'warn', - message: 'Not installed', - hint: 'npm install @ruvector/gnn-node' - }); - } - } - - // Check dist files - const distPath = require('path').join(__dirname, '..', 'dist', 'index.js'); - try { - require('fs').accessSync(distPath); - checks.push({ - name: 'Built dist files', - status: 'pass', - message: 'Found' - }); - } catch (e) { - checks.push({ - name: 'Built dist files', - status: 'fail', - message: 'Not found', - hint: 'Run npm run build in the ruvector package' - }); - } - - // Display results - const table = new Table({ - head: ['Check', 'Status', 'Details'], - colWidths: [25, 10, 40] - }); - - let hasErrors = false; - let hasWarnings = false; - - checks.forEach(check => { - let statusIcon; - if (check.status === 'pass') { - statusIcon = chalk.green('βœ“ Pass'); - } else if (check.status === 'warn') { - statusIcon = chalk.yellow('β—‹ Warn'); - hasWarnings = true; - } else { - statusIcon = chalk.red('βœ— Fail'); - hasErrors = true; - } - - table.push([ - check.name, - statusIcon, - check.message + (check.hint ? chalk.gray(` (${check.hint})`) : '') - ]); - }); - - console.log(table.toString()); - console.log(); - - // Summary - if (hasErrors) { - console.log(chalk.red('βœ— Some required checks failed. Please fix the issues above.')); - } else if (hasWarnings) { - console.log(chalk.yellow('β—‹ All required checks passed, but some optional modules are missing.')); - console.log(chalk.cyan(' Install optional modules for full functionality.')); - } else { - console.log(chalk.green('βœ“ All checks passed! rUvector is ready to use.')); - } - - // Show available features - console.log(chalk.bold.cyan('\nπŸ“¦ Available Commands:\n')); - console.log(chalk.white(' Core: ') + chalk.green('info, init, stats, insert, search, benchmark')); - if (getGraphBackend()) { - console.log(chalk.white(' Graph: ') + chalk.green('graph query, graph create-node')); - } else { - console.log(chalk.white(' Graph: ') + chalk.gray('(install @ruvector/graph-node)')); - } - if (getGnnBackend()) { - console.log(chalk.white(' GNN: ') + chalk.green('gnn layer, gnn compress')); - } else { - console.log(chalk.white(' GNN: ') + chalk.gray('(install @ruvector/gnn-node)')); - } - console.log(); - }); - -// Info command -program - .command('info') - .description('Show backend information and available modules') - .action(() => { - const { getBackendInfo } = getVectorBackend(); - const info = getBackendInfo(); - - console.log(chalk.bold.cyan('\nπŸš€ rUvector - All-in-One Vector Database\n')); - - const table = new Table({ - chars: { 'mid': '', 'left-mid': '', 'mid-mid': '', 'right-mid': '' } - }); - - table.push( - ['Backend Type', chalk.green(info.type === 'native' ? '⚑ Native' : '🌐 WASM')], - ['Version', info.version], - ['Features', info.features.join(', ')] - ); - - console.log(table.toString()); - console.log(); - - // Show available modules - console.log(chalk.bold.cyan('πŸ“¦ Available Modules:\n')); - const modulesTable = new Table({ - head: ['Module', 'Status', 'Description'], - colWidths: [20, 12, 45] - }); - - // Check vector - modulesTable.push(['Vector Search', chalk.green('βœ“ Ready'), 'HNSW index, similarity search']); - - // Check graph - const graphAvailable = getGraphBackend() !== null; - modulesTable.push([ - 'Graph/Cypher', - graphAvailable ? chalk.green('βœ“ Ready') : chalk.yellow('β—‹ Optional'), - 'Neo4j-compatible queries, hyperedges' - ]); - - // Check GNN - const gnnAvailable = getGnnBackend() !== null; - modulesTable.push([ - 'GNN Layers', - gnnAvailable ? chalk.green('βœ“ Ready') : chalk.yellow('β—‹ Optional'), - 'Neural network on graph topology' - ]); - - // Built-in features - modulesTable.push(['Compression', chalk.green('βœ“ Built-in'), 'f32β†’f16β†’PQ8β†’PQ4β†’Binary (2-32x)']); - modulesTable.push(['WASM/Browser', chalk.green('βœ“ Built-in'), 'Client-side vector search']); - - console.log(modulesTable.toString()); - console.log(); - - if (!graphAvailable || !gnnAvailable) { - console.log(chalk.cyan('πŸ’‘ Install optional modules:')); - if (!graphAvailable) { - console.log(chalk.white(' npm install @ruvector/graph-node')); - } - if (!gnnAvailable) { - console.log(chalk.white(' npm install @ruvector/gnn-node')); - } - console.log(); - } - }); - -// Init command -program - .command('init ') - .description('Initialize a new vector index') - .option('-d, --dimension ', 'Vector dimension', '384') - .option('-m, --metric ', 'Distance metric (cosine|euclidean|dot)', 'cosine') - .option('-t, --type ', 'Index type (flat|hnsw)', 'hnsw') - .option('--hnsw-m ', 'HNSW M parameter', '16') - .option('--hnsw-ef ', 'HNSW ef_construction parameter', '200') - .action(async (indexPath, options) => { - const spinner = ora('Initializing vector index...').start(); - - try { - const { VectorIndex } = getVectorBackend(); - const index = new VectorIndex({ - dimension: parseInt(options.dimension), - metric: options.metric, - indexType: options.type, - hnswConfig: options.type === 'hnsw' ? { - m: parseInt(options.hnswM), - efConstruction: parseInt(options.hnswEf) - } : undefined - }); - - await index.save(indexPath); - - spinner.succeed(chalk.green('Index initialized successfully!')); - - console.log(chalk.cyan('\nConfiguration:')); - console.log(` Path: ${chalk.white(indexPath)}`); - console.log(` Dimension: ${chalk.white(options.dimension)}`); - console.log(` Metric: ${chalk.white(options.metric)}`); - console.log(` Type: ${chalk.white(options.type)}`); - - if (options.type === 'hnsw') { - console.log(chalk.cyan('\nHNSW Parameters:')); - console.log(` M: ${chalk.white(options.hnswM)}`); - console.log(` ef_construction: ${chalk.white(options.hnswEf)}`); - } - - console.log(); - } catch (error) { - spinner.fail(chalk.red('Failed to initialize index')); - console.error(chalk.red(error.message)); - process.exit(1); - } - }); - -// Stats command -program - .command('stats ') - .description('Show index statistics') - .action(async (indexPath) => { - const spinner = ora('Loading index...').start(); - - try { - const { VectorIndex } = getVectorBackend(); - const index = await VectorIndex.load(indexPath); - const stats = await index.stats(); - - spinner.succeed(chalk.green('Index loaded')); - - console.log(chalk.bold.cyan('\nπŸ“Š Index Statistics\n')); - - const table = new Table({ - chars: { 'mid': '', 'left-mid': '', 'mid-mid': '', 'right-mid': '' } - }); - - table.push( - ['Vectors', chalk.white(formatNumber(stats.vectorCount))], - ['Dimension', chalk.white(stats.dimension)], - ['Index Type', chalk.white(stats.indexType)], - ['Memory Usage', chalk.white(stats.memoryUsage ? formatBytes(stats.memoryUsage) : 'N/A')] - ); - - console.log(table.toString()); - console.log(); - } catch (error) { - spinner.fail(chalk.red('Failed to load index')); - console.error(chalk.red(error.message)); - process.exit(1); - } - }); - -// Insert command -program - .command('insert ') - .description('Insert vectors from JSON file') - .option('-b, --batch-size ', 'Batch size', '1000') - .action(async (indexPath, vectorsFile, options) => { - let spinner = ora('Loading index...').start(); - - try { - const { VectorIndex } = getVectorBackend(); - const index = await VectorIndex.load(indexPath); - spinner.succeed(); - - spinner = ora('Loading vectors...').start(); - const data = await fs.readFile(vectorsFile, 'utf-8'); - const vectors = JSON.parse(data); - spinner.succeed(chalk.green(`Loaded ${vectors.length} vectors`)); - - const startTime = Date.now(); - spinner = ora('Inserting vectors...').start(); - - let lastProgress = 0; - await index.insertBatch(vectors, { - batchSize: parseInt(options.batchSize), - progressCallback: (progress) => { - const percent = Math.floor(progress * 100); - if (percent > lastProgress) { - spinner.text = `Inserting vectors... ${percent}%`; - lastProgress = percent; - } - } - }); - - const duration = Date.now() - startTime; - const throughput = vectors.length / (duration / 1000); - - spinner.succeed(chalk.green('Vectors inserted!')); - - console.log(chalk.cyan('\nPerformance:')); - console.log(` Duration: ${chalk.white(formatDuration(duration))}`); - console.log(` Throughput: ${chalk.white(formatNumber(throughput))} vectors/sec`); - - spinner = ora('Saving index...').start(); - await index.save(indexPath); - spinner.succeed(chalk.green('Index saved')); - - console.log(); - } catch (error) { - spinner.fail(chalk.red('Operation failed')); - console.error(chalk.red(error.message)); - process.exit(1); - } - }); - -// Search command -program - .command('search ') - .description('Search for similar vectors') - .requiredOption('-q, --query ', 'Query vector as JSON array') - .option('-k, --top-k ', 'Number of results', '10') - .option('--ef ', 'HNSW ef parameter') - .action(async (indexPath, options) => { - const spinner = ora('Loading index...').start(); - - try { - const { VectorIndex } = getVectorBackend(); - const index = await VectorIndex.load(indexPath); - spinner.succeed(); - - const query = JSON.parse(options.query); - - spinner.text = 'Searching...'; - spinner.start(); - - const startTime = Date.now(); - const results = await index.search(query, { - k: parseInt(options.topK), - ef: options.ef ? parseInt(options.ef) : undefined - }); - const duration = Date.now() - startTime; - - spinner.succeed(chalk.green(`Found ${results.length} results in ${formatDuration(duration)}`)); - - console.log(chalk.bold.cyan('\nπŸ” Search Results\n')); - - const table = new Table({ - head: ['Rank', 'ID', 'Score', 'Metadata'], - colWidths: [6, 20, 12, 40] - }); - - results.forEach((result, i) => { - table.push([ - chalk.yellow(`#${i + 1}`), - result.id, - chalk.green(result.score.toFixed(4)), - result.metadata ? JSON.stringify(result.metadata).substring(0, 37) + '...' : '' - ]); - }); - - console.log(table.toString()); - console.log(); - } catch (error) { - spinner.fail(chalk.red('Search failed')); - console.error(chalk.red(error.message)); - process.exit(1); - } - }); - -// Benchmark command -program - .command('benchmark') - .description('Run performance benchmarks') - .option('-d, --dimension ', 'Vector dimension', '384') - .option('-n, --num-vectors ', 'Number of vectors', '10000') - .option('-q, --num-queries ', 'Number of queries', '100') - .action(async (options) => { - const { VectorIndex, Utils } = getVectorBackend(); - const dimension = parseInt(options.dimension); - const numVectors = parseInt(options.numVectors); - const numQueries = parseInt(options.numQueries); - - console.log(chalk.bold.cyan('\n⚑ Performance Benchmark\n')); - console.log(chalk.cyan('Configuration:')); - console.log(` Dimension: ${chalk.white(dimension)}`); - console.log(` Vectors: ${chalk.white(formatNumber(numVectors))}`); - console.log(` Queries: ${chalk.white(formatNumber(numQueries))}`); - console.log(); - - const results = []; - - try { - // Create index - let spinner = ora('Creating index...').start(); - const index = new VectorIndex({ - dimension, - metric: 'cosine', - indexType: 'hnsw' - }); - spinner.succeed(); - - // Generate vectors - spinner = ora('Generating vectors...').start(); - const vectors = []; - for (let i = 0; i < numVectors; i++) { - vectors.push({ - id: `vec_${i}`, - values: Utils.randomVector(dimension) - }); - } - spinner.succeed(); - - // Insert benchmark - spinner = ora('Benchmarking inserts...').start(); - const insertStart = Date.now(); - await index.insertBatch(vectors, { batchSize: 1000 }); - const insertDuration = Date.now() - insertStart; - const insertThroughput = numVectors / (insertDuration / 1000); - spinner.succeed(); - - results.push({ - operation: 'Insert', - duration: insertDuration, - throughput: insertThroughput - }); - - // Search benchmark - spinner = ora('Benchmarking searches...').start(); - const queries = []; - for (let i = 0; i < numQueries; i++) { - queries.push(Utils.randomVector(dimension)); - } - - const searchStart = Date.now(); - for (const query of queries) { - await index.search(query, { k: 10 }); - } - const searchDuration = Date.now() - searchStart; - const searchThroughput = numQueries / (searchDuration / 1000); - spinner.succeed(); - - results.push({ - operation: 'Search', - duration: searchDuration, - throughput: searchThroughput - }); - - // Display results - console.log(chalk.bold.cyan('\nπŸ“ˆ Results\n')); - - const table = new Table({ - head: ['Operation', 'Total Time', 'Throughput'], - colWidths: [15, 20, 25] - }); - - results.forEach(result => { - table.push([ - chalk.white(result.operation), - chalk.yellow(formatDuration(result.duration)), - chalk.green(`${formatNumber(result.throughput)} ops/sec`) - ]); - }); - - console.log(table.toString()); - console.log(); - - // Backend info - const { getBackendInfo } = getVectorBackend(); - const info = getBackendInfo(); - console.log(chalk.cyan(`Backend: ${chalk.white(info.type)}`)); - console.log(); - - } catch (error) { - console.error(chalk.red('Benchmark failed:'), error.message); - process.exit(1); - } - }); - -// ============================================================================ -// GRAPH COMMANDS -// ============================================================================ - -const graphCmd = program - .command('graph') - .description('Graph database commands (Cypher queries, nodes, edges)'); - -graphCmd - .command('query ') - .description('Execute a Cypher query') - .option('-f, --format ', 'Output format (table|json)', 'table') - .action(async (cypher, options) => { - const graph = getGraphBackend(); - if (!graph) { - console.error(chalk.red('Graph module not installed. Run: npm install @ruvector/graph-node')); - process.exit(1); - } - - const spinner = ora('Executing Cypher query...').start(); - try { - const db = new graph.GraphDB(); - const results = await db.query(cypher); - spinner.succeed(chalk.green(`Query returned ${results.length} results`)); - - if (options.format === 'json') { - console.log(JSON.stringify(results, null, 2)); - } else { - if (results.length > 0) { - const table = new Table({ - head: Object.keys(results[0]).map(k => chalk.cyan(k)) - }); - results.forEach(row => { - table.push(Object.values(row).map(v => - typeof v === 'object' ? JSON.stringify(v) : String(v) - )); - }); - console.log(table.toString()); - } - } - } catch (error) { - spinner.fail(chalk.red('Query failed')); - console.error(chalk.red(error.message)); - process.exit(1); - } - }); - -graphCmd - .command('create-node') - .description('Create a new node') - .requiredOption('-l, --label