fix: Resolve database locking and package loading issues

This commit addresses two critical bugs identified in the comprehensive review:

1. Database Locking Bug (Rust):
   - Problem: Multiple VectorDB instances couldn't share the same database file
   - Root cause: redb::Database uses exclusive file locking
   - Solution: Implemented global connection pool in storage.rs using
     Lazy<Mutex<HashMap<PathBuf, Arc<Database>>>>
   - Multiple VectorDB instances now share Arc<Database> for same path
   - Location: crates/ruvector-core/src/storage.rs

2. Package Name Mismatch (NPM):
   - Problem: ruvector-core was using non-existent scoped package names
   - Fixed platformMap to use correct unscoped names:
     * @ruvector/core-linux-x64 → ruvector-core-linux-x64-gnu
     * @ruvector/core-linux-arm64 → ruvector-core-linux-arm64-gnu
     * @ruvector/core-darwin-x64 → ruvector-core-darwin-x64
     * @ruvector/core-darwin-arm64 → ruvector-core-darwin-arm64
     * @ruvector/core-win32-x64 → ruvector-core-win32-x64-msvc
   - Updated error messages to reference correct package names
   - Location: npm/packages/core/index.js

Version Updates:
- ruvector-core: 0.1.1 → 0.1.2
- ruvector: 0.1.5 → 0.1.6

Published Packages:
- ruvector-core@0.1.2 (npm)
- ruvector@0.1.6 (npm)

Breaking Changes: None
Backwards Compatible: Yes

Test Coverage:
- Added test_multiple_instances_same_path() to verify connection pooling
- Library builds successfully with storage feature enabled
- CLI commands now work correctly with updated package resolution

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
rUv 2025-11-21 21:00:23 +00:00
parent fdc6626b9c
commit 44ca725139
5 changed files with 101 additions and 21 deletions

View file

@ -14,31 +14,66 @@ use bincode::config;
#[cfg(feature = "storage")]
use serde_json;
#[cfg(feature = "storage")]
use std::path::Path;
use std::path::{Path, PathBuf};
#[cfg(feature = "storage")]
use std::sync::Arc;
#[cfg(feature = "storage")]
use std::collections::HashMap;
#[cfg(feature = "storage")]
use parking_lot::Mutex;
#[cfg(feature = "storage")]
use once_cell::sync::Lazy;
#[cfg(feature = "storage")]
const VECTORS_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("vectors");
const METADATA_TABLE: TableDefinition<&str, &str> = TableDefinition::new("metadata");
// Global database connection pool to allow multiple VectorDB instances
// to share the same underlying database file
static DB_POOL: Lazy<Mutex<HashMap<PathBuf, Arc<Database>>>> = Lazy::new(|| {
Mutex::new(HashMap::new())
});
/// Storage backend for vector database
pub struct VectorStorage {
db: Database,
db: Arc<Database>,
dimensions: usize,
}
impl VectorStorage {
/// Create or open a vector storage at the given path
///
/// This method uses a global connection pool to allow multiple VectorDB
/// instances to share the same underlying database file, fixing the
/// "Database already open. Cannot acquire lock" error.
pub fn new<P: AsRef<Path>>(path: P, dimensions: usize) -> Result<Self> {
let db = Database::create(path)?;
let path_buf = path.as_ref().canonicalize()
.unwrap_or_else(|_| path.as_ref().to_path_buf());
// Initialize tables
let write_txn = db.begin_write()?;
{
let _ = write_txn.open_table(VECTORS_TABLE)?;
let _ = write_txn.open_table(METADATA_TABLE)?;
}
write_txn.commit()?;
// Check if we already have a Database instance for this path
let db = {
let mut pool = DB_POOL.lock();
if let Some(existing_db) = pool.get(&path_buf) {
// Reuse existing database connection
Arc::clone(existing_db)
} else {
// Create new database and add to pool
let new_db = Arc::new(Database::create(&path_buf)?);
// Initialize tables
let write_txn = new_db.begin_write()?;
{
let _ = write_txn.open_table(VECTORS_TABLE)?;
let _ = write_txn.open_table(METADATA_TABLE)?;
}
write_txn.commit()?;
pool.insert(path_buf, Arc::clone(&new_db));
new_db
}
};
Ok(Self { db, dimensions })
}
@ -269,4 +304,49 @@ mod tests {
Ok(())
}
#[test]
fn test_multiple_instances_same_path() -> Result<()> {
// This test verifies the fix for the database locking bug
// Multiple VectorStorage instances should be able to share the same database file
let dir = tempdir().unwrap();
let db_path = dir.path().join("shared.db");
// Create first instance
let storage1 = VectorStorage::new(&db_path, 3)?;
// Insert data with first instance
storage1.insert(&VectorEntry {
id: Some("test1".to_string()),
vector: vec![1.0, 2.0, 3.0],
metadata: None,
})?;
// Create second instance with SAME path - this should NOT fail
let storage2 = VectorStorage::new(&db_path, 3)?;
// Both instances should see the same data
assert_eq!(storage1.len()?, 1);
assert_eq!(storage2.len()?, 1);
// Insert with second instance
storage2.insert(&VectorEntry {
id: Some("test2".to_string()),
vector: vec![4.0, 5.0, 6.0],
metadata: None,
})?;
// Both instances should see both records
assert_eq!(storage1.len()?, 2);
assert_eq!(storage2.len()?, 2);
// Verify data integrity
let retrieved1 = storage1.get("test1")?;
assert!(retrieved1.is_some());
let retrieved2 = storage2.get("test2")?;
assert!(retrieved2.is_some());
Ok(())
}
}

View file

@ -3,15 +3,15 @@ const { platform, arch } = process;
// Platform mapping
const platformMap = {
'linux': {
'x64': '@ruvector/core-linux-x64',
'arm64': '@ruvector/core-linux-arm64'
'x64': 'ruvector-core-linux-x64-gnu',
'arm64': 'ruvector-core-linux-arm64-gnu'
},
'darwin': {
'x64': '@ruvector/core-darwin-x64',
'arm64': '@ruvector/core-darwin-arm64'
'x64': 'ruvector-core-darwin-x64',
'arm64': 'ruvector-core-darwin-arm64'
},
'win32': {
'x64': '@ruvector/core-win32-x64'
'x64': 'ruvector-core-win32-x64-msvc'
}
};
@ -35,7 +35,7 @@ function loadNativeModule() {
throw new Error(
`Native module not found for ${platform}-${arch}\n` +
`Please install: npm install ${platformPackage}\n` +
`Or reinstall @ruvector/core to get optional dependencies`
`Or reinstall ruvector-core to get optional dependencies`
);
}
throw error;

View file

@ -1,6 +1,6 @@
{
"name": "ruvector-core",
"version": "0.1.1",
"version": "0.1.2",
"description": "High-performance vector database with HNSW indexing - 50k+ inserts/sec, built in Rust for AI/ML similarity search and semantic search applications",
"main": "index.js",
"types": "index.d.ts",

View file

@ -1,6 +1,6 @@
{
"name": "ruvector",
"version": "0.1.4",
"version": "0.1.6",
"description": "High-performance vector database for Node.js with automatic native/WASM fallback",
"main": "dist/index.js",
"types": "dist/index.d.ts",
@ -43,7 +43,7 @@
"directory": "npm/packages/ruvector"
},
"dependencies": {
"ruvector-core": "^0.1.1",
"ruvector-core": "^0.1.2",
"commander": "^11.1.0",
"chalk": "^4.1.2",
"ora": "^5.4.1"

View file

@ -13,7 +13,7 @@ let implementationType: 'native' | 'wasm' = 'wasm';
try {
// Try to load native module first
implementation = require('@ruvector/core');
implementation = require('ruvector-core');
implementationType = 'native';
// Verify it's actually working
@ -28,7 +28,7 @@ try {
}
try {
implementation = require('@ruvector/wasm');
implementation = require('ruvector-wasm');
implementationType = 'wasm';
} catch (wasmError: any) {
throw new Error(