mirror of
https://github.com/ruvnet/RuVector.git
synced 2026-05-23 21:25:02 +00:00
fix(rvf): add string ID ↔ numeric label mapping to NodeBackend
NodeBackend.ingestBatch() passed string IDs directly to the N-API layer
via Number(e.id), which returns NaN for non-numeric strings (UUIDs, hex
hashes, etc.). The native Rust HNSW silently drops entries with NaN IDs,
causing silent data loss with no error signal.
Fix: Add a bidirectional string↔numeric mapping layer to NodeBackend,
following the same pattern used by HNSWLibBackend in AgentDB:
- resolveLabel(): allocates sequential i64 labels for string IDs
- query(): maps numeric labels back to original string IDs
- delete(): resolves string IDs to labels before calling native layer
- Mappings persisted to {path}.idmap.json sidecar file
Also fixes query() returning numeric labels instead of original string
IDs, and delete() silently failing for non-numeric IDs.
Bumps @ruvector/rvf from 0.1.9 → 0.2.0 (breaking fix).
Closes ruvnet/agentic-flow#114
Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
parent
3d038d6d26
commit
0649333772
6 changed files with 474 additions and 8 deletions
15
npm/packages/rvf/dist/backend.d.ts
vendored
15
npm/packages/rvf/dist/backend.d.ts
vendored
|
|
@ -46,6 +46,10 @@ export interface RvfBackend {
|
|||
export declare class NodeBackend implements RvfBackend {
|
||||
private native;
|
||||
private handle;
|
||||
private idToLabel;
|
||||
private labelToId;
|
||||
private nextLabel;
|
||||
private storePath;
|
||||
private loadNative;
|
||||
private ensureHandle;
|
||||
create(path: string, options: RvfOptions): Promise<void>;
|
||||
|
|
@ -68,6 +72,17 @@ export declare class NodeBackend implements RvfBackend {
|
|||
extractEbpf(): Promise<RvfEbpfData | null>;
|
||||
segments(): Promise<RvfSegmentInfo[]>;
|
||||
dimension(): Promise<number>;
|
||||
/**
|
||||
* Get or allocate a numeric label for a string ID.
|
||||
* If the ID was already seen, returns the existing label.
|
||||
*/
|
||||
private resolveLabel;
|
||||
/** Path to the sidecar mappings file. */
|
||||
private mappingsPath;
|
||||
/** Persist the string↔label mapping to a sidecar JSON file. */
|
||||
private saveMappings;
|
||||
/** Load the string↔label mapping from the sidecar JSON file if it exists. */
|
||||
private loadMappings;
|
||||
}
|
||||
/**
|
||||
* Backend that delegates to the `@ruvector/rvf-wasm` WASM build.
|
||||
|
|
|
|||
106
npm/packages/rvf/dist/backend.js
vendored
106
npm/packages/rvf/dist/backend.js
vendored
|
|
@ -52,6 +52,11 @@ class NodeBackend {
|
|||
this.native = null;
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
this.handle = null;
|
||||
// String ID <-> Numeric Label mappings (N-API layer requires i64 labels)
|
||||
this.idToLabel = new Map();
|
||||
this.labelToId = new Map();
|
||||
this.nextLabel = 1; // RVF uses 1-based labels
|
||||
this.storePath = '';
|
||||
}
|
||||
async loadNative() {
|
||||
if (this.native)
|
||||
|
|
@ -76,6 +81,10 @@ class NodeBackend {
|
|||
await this.loadNative();
|
||||
try {
|
||||
this.handle = await this.native.create(path, mapOptionsToNative(options));
|
||||
this.storePath = path;
|
||||
this.idToLabel.clear();
|
||||
this.labelToId.clear();
|
||||
this.nextLabel = 1;
|
||||
}
|
||||
catch (err) {
|
||||
throw errors_1.RvfError.fromNative(err);
|
||||
|
|
@ -85,6 +94,8 @@ class NodeBackend {
|
|||
await this.loadNative();
|
||||
try {
|
||||
this.handle = await this.native.open(path);
|
||||
this.storePath = path;
|
||||
await this.loadMappings();
|
||||
}
|
||||
catch (err) {
|
||||
throw errors_1.RvfError.fromNative(err);
|
||||
|
|
@ -94,6 +105,8 @@ class NodeBackend {
|
|||
await this.loadNative();
|
||||
try {
|
||||
this.handle = await this.native.openReadonly(path);
|
||||
this.storePath = path;
|
||||
await this.loadMappings();
|
||||
}
|
||||
catch (err) {
|
||||
throw errors_1.RvfError.fromNative(err);
|
||||
|
|
@ -115,8 +128,14 @@ class NodeBackend {
|
|||
const f32 = v instanceof Float32Array ? v : new Float32Array(v);
|
||||
flat.set(f32, i * dim);
|
||||
}
|
||||
const ids = entries.map((e) => Number(e.id));
|
||||
// Map string IDs to numeric labels for the N-API layer.
|
||||
// The native Rust HNSW expects i64 labels — non-numeric strings cause
|
||||
// silent data loss (NaN → dropped). We maintain a bidirectional
|
||||
// string↔label mapping and persist it as a sidecar JSON file.
|
||||
const ids = entries.map((e) => this.resolveLabel(e.id));
|
||||
const result = this.handle.ingestBatch(flat, ids);
|
||||
// Persist mappings after every ingest so they survive crashes.
|
||||
await this.saveMappings();
|
||||
return {
|
||||
accepted: Number(result.accepted),
|
||||
rejected: Number(result.rejected),
|
||||
|
|
@ -132,8 +151,9 @@ class NodeBackend {
|
|||
try {
|
||||
const nativeOpts = options ? mapQueryOptionsToNative(options) : undefined;
|
||||
const results = this.handle.query(vector, k, nativeOpts);
|
||||
// Map numeric labels back to original string IDs.
|
||||
return results.map((r) => ({
|
||||
id: String(r.id),
|
||||
id: this.labelToId.get(Number(r.id)) ?? String(r.id),
|
||||
distance: r.distance,
|
||||
}));
|
||||
}
|
||||
|
|
@ -144,8 +164,23 @@ class NodeBackend {
|
|||
async delete(ids) {
|
||||
this.ensureHandle();
|
||||
try {
|
||||
const numIds = ids.map((id) => Number(id));
|
||||
// Resolve string IDs to numeric labels for the N-API layer.
|
||||
const numIds = ids
|
||||
.map((id) => this.idToLabel.get(id))
|
||||
.filter((label) => label !== undefined);
|
||||
if (numIds.length === 0) {
|
||||
return { deleted: 0, epoch: 0 };
|
||||
}
|
||||
const result = this.handle.delete(numIds);
|
||||
// Remove deleted entries from the mapping.
|
||||
for (const id of ids) {
|
||||
const label = this.idToLabel.get(id);
|
||||
if (label !== undefined) {
|
||||
this.idToLabel.delete(id);
|
||||
this.labelToId.delete(label);
|
||||
}
|
||||
}
|
||||
await this.saveMappings();
|
||||
return { deleted: Number(result.deleted), epoch: result.epoch };
|
||||
}
|
||||
catch (err) {
|
||||
|
|
@ -191,6 +226,7 @@ class NodeBackend {
|
|||
if (!this.handle)
|
||||
return;
|
||||
try {
|
||||
await this.saveMappings();
|
||||
this.handle.close();
|
||||
}
|
||||
catch (err) {
|
||||
|
|
@ -198,6 +234,10 @@ class NodeBackend {
|
|||
}
|
||||
finally {
|
||||
this.handle = null;
|
||||
this.idToLabel.clear();
|
||||
this.labelToId.clear();
|
||||
this.nextLabel = 1;
|
||||
this.storePath = '';
|
||||
}
|
||||
}
|
||||
async fileId() {
|
||||
|
|
@ -235,6 +275,12 @@ class NodeBackend {
|
|||
const child = new NodeBackend();
|
||||
child.native = this.native;
|
||||
child.handle = childHandle;
|
||||
child.storePath = childPath;
|
||||
// Copy parent mappings to child (COW semantics)
|
||||
child.idToLabel = new Map(this.idToLabel);
|
||||
child.labelToId = new Map(this.labelToId);
|
||||
child.nextLabel = this.nextLabel;
|
||||
await child.saveMappings();
|
||||
return child;
|
||||
}
|
||||
catch (err) {
|
||||
|
|
@ -313,6 +359,60 @@ class NodeBackend {
|
|||
throw errors_1.RvfError.fromNative(err);
|
||||
}
|
||||
}
|
||||
// ─── String ID ↔ Numeric Label mapping helpers ───
|
||||
/**
|
||||
* Get or allocate a numeric label for a string ID.
|
||||
* If the ID was already seen, returns the existing label.
|
||||
*/
|
||||
resolveLabel(id) {
|
||||
let label = this.idToLabel.get(id);
|
||||
if (label !== undefined)
|
||||
return label;
|
||||
label = this.nextLabel++;
|
||||
this.idToLabel.set(id, label);
|
||||
this.labelToId.set(label, id);
|
||||
return label;
|
||||
}
|
||||
/** Path to the sidecar mappings file. */
|
||||
mappingsPath() {
|
||||
return this.storePath ? this.storePath + '.idmap.json' : '';
|
||||
}
|
||||
/** Persist the string↔label mapping to a sidecar JSON file. */
|
||||
async saveMappings() {
|
||||
const mp = this.mappingsPath();
|
||||
if (!mp)
|
||||
return;
|
||||
try {
|
||||
const fs = await Promise.resolve().then(() => __importStar(require('fs')));
|
||||
const data = JSON.stringify({
|
||||
idToLabel: Object.fromEntries(this.idToLabel),
|
||||
labelToId: Object.fromEntries(Array.from(this.labelToId.entries()).map(([k, v]) => [String(k), v])),
|
||||
nextLabel: this.nextLabel,
|
||||
});
|
||||
fs.writeFileSync(mp, data, 'utf-8');
|
||||
}
|
||||
catch {
|
||||
// Non-fatal: mapping persistence is best-effort (e.g. read-only FS).
|
||||
}
|
||||
}
|
||||
/** Load the string↔label mapping from the sidecar JSON file if it exists. */
|
||||
async loadMappings() {
|
||||
const mp = this.mappingsPath();
|
||||
if (!mp)
|
||||
return;
|
||||
try {
|
||||
const fs = await Promise.resolve().then(() => __importStar(require('fs')));
|
||||
if (!fs.existsSync(mp))
|
||||
return;
|
||||
const raw = JSON.parse(fs.readFileSync(mp, 'utf-8'));
|
||||
this.idToLabel = new Map(Object.entries(raw.idToLabel ?? {}).map(([k, v]) => [k, Number(v)]));
|
||||
this.labelToId = new Map(Object.entries(raw.labelToId ?? {}).map(([k, v]) => [Number(k), v]));
|
||||
this.nextLabel = raw.nextLabel ?? this.idToLabel.size + 1;
|
||||
}
|
||||
catch {
|
||||
// Non-fatal: start with empty mappings.
|
||||
}
|
||||
}
|
||||
}
|
||||
exports.NodeBackend = NodeBackend;
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
|
|||
2
npm/packages/rvf/dist/backend.js.map
vendored
2
npm/packages/rvf/dist/backend.js.map
vendored
File diff suppressed because one or more lines are too long
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@ruvector/rvf",
|
||||
"version": "0.1.9",
|
||||
"version": "0.2.0",
|
||||
"description": "RuVector Format — unified TypeScript SDK for vector intelligence",
|
||||
"main": "dist/index.js",
|
||||
"module": "dist/index.js",
|
||||
|
|
|
|||
|
|
@ -79,6 +79,12 @@ export class NodeBackend implements RvfBackend {
|
|||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
private handle: any = null;
|
||||
|
||||
// String ID <-> Numeric Label mappings (N-API layer requires i64 labels)
|
||||
private idToLabel: Map<string, number> = new Map();
|
||||
private labelToId: Map<number, string> = new Map();
|
||||
private nextLabel: number = 1; // RVF uses 1-based labels
|
||||
private storePath: string = '';
|
||||
|
||||
private async loadNative(): Promise<void> {
|
||||
if (this.native) return;
|
||||
try {
|
||||
|
|
@ -105,6 +111,10 @@ export class NodeBackend implements RvfBackend {
|
|||
await this.loadNative();
|
||||
try {
|
||||
this.handle = await this.native.create(path, mapOptionsToNative(options));
|
||||
this.storePath = path;
|
||||
this.idToLabel.clear();
|
||||
this.labelToId.clear();
|
||||
this.nextLabel = 1;
|
||||
} catch (err) {
|
||||
throw RvfError.fromNative(err);
|
||||
}
|
||||
|
|
@ -114,6 +124,8 @@ export class NodeBackend implements RvfBackend {
|
|||
await this.loadNative();
|
||||
try {
|
||||
this.handle = await this.native.open(path);
|
||||
this.storePath = path;
|
||||
await this.loadMappings();
|
||||
} catch (err) {
|
||||
throw RvfError.fromNative(err);
|
||||
}
|
||||
|
|
@ -123,6 +135,8 @@ export class NodeBackend implements RvfBackend {
|
|||
await this.loadNative();
|
||||
try {
|
||||
this.handle = await this.native.openReadonly(path);
|
||||
this.storePath = path;
|
||||
await this.loadMappings();
|
||||
} catch (err) {
|
||||
throw RvfError.fromNative(err);
|
||||
}
|
||||
|
|
@ -143,8 +157,14 @@ export class NodeBackend implements RvfBackend {
|
|||
const f32 = v instanceof Float32Array ? v : new Float32Array(v);
|
||||
flat.set(f32, i * dim);
|
||||
}
|
||||
const ids = entries.map((e) => Number(e.id));
|
||||
// Map string IDs to numeric labels for the N-API layer.
|
||||
// The native Rust HNSW expects i64 labels — non-numeric strings cause
|
||||
// silent data loss (NaN → dropped). We maintain a bidirectional
|
||||
// string↔label mapping and persist it as a sidecar JSON file.
|
||||
const ids = entries.map((e) => this.resolveLabel(e.id));
|
||||
const result = this.handle.ingestBatch(flat, ids);
|
||||
// Persist mappings after every ingest so they survive crashes.
|
||||
await this.saveMappings();
|
||||
return {
|
||||
accepted: Number(result.accepted),
|
||||
rejected: Number(result.rejected),
|
||||
|
|
@ -164,8 +184,9 @@ export class NodeBackend implements RvfBackend {
|
|||
try {
|
||||
const nativeOpts = options ? mapQueryOptionsToNative(options) : undefined;
|
||||
const results = this.handle.query(vector, k, nativeOpts);
|
||||
// Map numeric labels back to original string IDs.
|
||||
return (results as Array<{ id: number; distance: number }>).map((r) => ({
|
||||
id: String(r.id),
|
||||
id: this.labelToId.get(Number(r.id)) ?? String(r.id),
|
||||
distance: r.distance,
|
||||
}));
|
||||
} catch (err) {
|
||||
|
|
@ -176,8 +197,23 @@ export class NodeBackend implements RvfBackend {
|
|||
async delete(ids: string[]): Promise<RvfDeleteResult> {
|
||||
this.ensureHandle();
|
||||
try {
|
||||
const numIds = ids.map((id) => Number(id));
|
||||
// Resolve string IDs to numeric labels for the N-API layer.
|
||||
const numIds = ids
|
||||
.map((id) => this.idToLabel.get(id))
|
||||
.filter((label): label is number => label !== undefined);
|
||||
if (numIds.length === 0) {
|
||||
return { deleted: 0, epoch: 0 };
|
||||
}
|
||||
const result = this.handle.delete(numIds);
|
||||
// Remove deleted entries from the mapping.
|
||||
for (const id of ids) {
|
||||
const label = this.idToLabel.get(id);
|
||||
if (label !== undefined) {
|
||||
this.idToLabel.delete(id);
|
||||
this.labelToId.delete(label);
|
||||
}
|
||||
}
|
||||
await this.saveMappings();
|
||||
return { deleted: Number(result.deleted), epoch: result.epoch };
|
||||
} catch (err) {
|
||||
throw RvfError.fromNative(err);
|
||||
|
|
@ -222,11 +258,16 @@ export class NodeBackend implements RvfBackend {
|
|||
async close(): Promise<void> {
|
||||
if (!this.handle) return;
|
||||
try {
|
||||
await this.saveMappings();
|
||||
this.handle.close();
|
||||
} catch (err) {
|
||||
throw RvfError.fromNative(err);
|
||||
} finally {
|
||||
this.handle = null;
|
||||
this.idToLabel.clear();
|
||||
this.labelToId.clear();
|
||||
this.nextLabel = 1;
|
||||
this.storePath = '';
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -265,6 +306,12 @@ export class NodeBackend implements RvfBackend {
|
|||
const child = new NodeBackend();
|
||||
child.native = this.native;
|
||||
child.handle = childHandle;
|
||||
child.storePath = childPath;
|
||||
// Copy parent mappings to child (COW semantics)
|
||||
child.idToLabel = new Map(this.idToLabel);
|
||||
child.labelToId = new Map(this.labelToId);
|
||||
child.nextLabel = this.nextLabel;
|
||||
await child.saveMappings();
|
||||
return child;
|
||||
} catch (err) {
|
||||
throw RvfError.fromNative(err);
|
||||
|
|
@ -348,6 +395,65 @@ export class NodeBackend implements RvfBackend {
|
|||
throw RvfError.fromNative(err);
|
||||
}
|
||||
}
|
||||
|
||||
// ─── String ID ↔ Numeric Label mapping helpers ───
|
||||
|
||||
/**
|
||||
* Get or allocate a numeric label for a string ID.
|
||||
* If the ID was already seen, returns the existing label.
|
||||
*/
|
||||
private resolveLabel(id: string): number {
|
||||
let label = this.idToLabel.get(id);
|
||||
if (label !== undefined) return label;
|
||||
label = this.nextLabel++;
|
||||
this.idToLabel.set(id, label);
|
||||
this.labelToId.set(label, id);
|
||||
return label;
|
||||
}
|
||||
|
||||
/** Path to the sidecar mappings file. */
|
||||
private mappingsPath(): string {
|
||||
return this.storePath ? this.storePath + '.idmap.json' : '';
|
||||
}
|
||||
|
||||
/** Persist the string↔label mapping to a sidecar JSON file. */
|
||||
private async saveMappings(): Promise<void> {
|
||||
const mp = this.mappingsPath();
|
||||
if (!mp) return;
|
||||
try {
|
||||
const fs = await import('fs');
|
||||
const data = JSON.stringify({
|
||||
idToLabel: Object.fromEntries(this.idToLabel),
|
||||
labelToId: Object.fromEntries(
|
||||
Array.from(this.labelToId.entries()).map(([k, v]) => [String(k), v]),
|
||||
),
|
||||
nextLabel: this.nextLabel,
|
||||
});
|
||||
fs.writeFileSync(mp, data, 'utf-8');
|
||||
} catch {
|
||||
// Non-fatal: mapping persistence is best-effort (e.g. read-only FS).
|
||||
}
|
||||
}
|
||||
|
||||
/** Load the string↔label mapping from the sidecar JSON file if it exists. */
|
||||
private async loadMappings(): Promise<void> {
|
||||
const mp = this.mappingsPath();
|
||||
if (!mp) return;
|
||||
try {
|
||||
const fs = await import('fs');
|
||||
if (!fs.existsSync(mp)) return;
|
||||
const raw = JSON.parse(fs.readFileSync(mp, 'utf-8'));
|
||||
this.idToLabel = new Map(Object.entries(raw.idToLabel ?? {}).map(
|
||||
([k, v]) => [k, Number(v)],
|
||||
));
|
||||
this.labelToId = new Map(
|
||||
Object.entries(raw.labelToId ?? {}).map(([k, v]) => [Number(k), v as string]),
|
||||
);
|
||||
this.nextLabel = raw.nextLabel ?? this.idToLabel.size + 1;
|
||||
} catch {
|
||||
// Non-fatal: start with empty mappings.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
|
|||
245
npm/packages/rvf/tests/test-id-mapping.js
Normal file
245
npm/packages/rvf/tests/test-id-mapping.js
Normal file
|
|
@ -0,0 +1,245 @@
|
|||
'use strict';
|
||||
/**
|
||||
* Tests for NodeBackend string ID ↔ numeric label mapping (issue #114 fix).
|
||||
*
|
||||
* These tests exercise the mapping logic directly without requiring the
|
||||
* native @ruvector/rvf-node addon, by using a lightweight mock.
|
||||
*/
|
||||
|
||||
const assert = require('assert');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const os = require('os');
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock the native module so tests work without the N-API addon
|
||||
// ---------------------------------------------------------------------------
|
||||
class MockNativeHandle {
|
||||
constructor() {
|
||||
this.vectors = new Map(); // label → Float32Array
|
||||
}
|
||||
ingestBatch(flat, ids) {
|
||||
const dim = flat.length / ids.length;
|
||||
let accepted = 0;
|
||||
for (let i = 0; i < ids.length; i++) {
|
||||
const label = ids[i];
|
||||
// Mimic native behavior: NaN labels are silently ignored
|
||||
if (Number.isNaN(label) || label === undefined) continue;
|
||||
this.vectors.set(label, flat.slice(i * dim, (i + 1) * dim));
|
||||
accepted++;
|
||||
}
|
||||
return { accepted, rejected: ids.length - accepted, epoch: 1 };
|
||||
}
|
||||
query(vector, k) {
|
||||
const results = [];
|
||||
for (const [id, vec] of this.vectors) {
|
||||
let dist = 0;
|
||||
for (let i = 0; i < vector.length; i++) dist += (vector[i] - vec[i]) ** 2;
|
||||
results.push({ id, distance: Math.sqrt(dist) });
|
||||
}
|
||||
results.sort((a, b) => a.distance - b.distance);
|
||||
return results.slice(0, k);
|
||||
}
|
||||
delete(numIds) {
|
||||
let deleted = 0;
|
||||
for (const id of numIds) {
|
||||
if (this.vectors.delete(id)) deleted++;
|
||||
}
|
||||
return { deleted, epoch: 1 };
|
||||
}
|
||||
status() { return { total_vectors: this.vectors.size, total_segments: 1, file_size: 0, current_epoch: 0, profile_id: 0, compaction_state: 'idle', dead_space_ratio: 0, read_only: false }; }
|
||||
close() { this.vectors.clear(); }
|
||||
dimension() { return 4; }
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// We test NodeBackend by patching its loadNative to use our mock.
|
||||
// ---------------------------------------------------------------------------
|
||||
const { NodeBackend } = require('../dist/backend');
|
||||
|
||||
async function createTestBackend(tmpDir) {
|
||||
const storePath = path.join(tmpDir, 'test.rvf');
|
||||
const backend = new NodeBackend();
|
||||
// Patch internals: skip native loading, use mock handle
|
||||
backend['native'] = { create: () => new MockNativeHandle(), open: () => new MockNativeHandle() };
|
||||
backend['handle'] = new MockNativeHandle();
|
||||
backend['storePath'] = storePath;
|
||||
backend['idToLabel'] = new Map();
|
||||
backend['labelToId'] = new Map();
|
||||
backend['nextLabel'] = 1;
|
||||
return { backend, storePath };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
let passed = 0, failed = 0;
|
||||
|
||||
async function test(name, fn) {
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'rvf-id-test-'));
|
||||
try {
|
||||
await fn(tmpDir);
|
||||
console.log(` PASS ${name}`);
|
||||
passed++;
|
||||
} catch (err) {
|
||||
console.log(` FAIL ${name}: ${err.message}`);
|
||||
failed++;
|
||||
} finally {
|
||||
// Cleanup
|
||||
try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
}
|
||||
}
|
||||
|
||||
(async () => {
|
||||
console.log('--- NodeBackend ID Mapping (Issue #114) ---');
|
||||
|
||||
await test('string IDs are mapped to numeric labels', async (tmp) => {
|
||||
const { backend } = await createTestBackend(tmp);
|
||||
const vec = new Float32Array([1, 2, 3, 4]);
|
||||
await backend.ingestBatch([
|
||||
{ id: 'chunk_0', vector: vec },
|
||||
{ id: 'uuid-abc-123', vector: vec },
|
||||
{ id: 'da003664_2b0f6ff3747e', vector: vec },
|
||||
]);
|
||||
const handle = backend['handle'];
|
||||
// All 3 should have been accepted (no NaN labels)
|
||||
assert.strictEqual(handle.vectors.size, 3, `Expected 3 vectors, got ${handle.vectors.size}`);
|
||||
});
|
||||
|
||||
await test('numeric labels are sequential starting at 1', async (tmp) => {
|
||||
const { backend } = await createTestBackend(tmp);
|
||||
const vec = new Float32Array([1, 2, 3, 4]);
|
||||
await backend.ingestBatch([
|
||||
{ id: 'alpha', vector: vec },
|
||||
{ id: 'beta', vector: vec },
|
||||
{ id: 'gamma', vector: vec },
|
||||
]);
|
||||
assert.strictEqual(backend['idToLabel'].get('alpha'), 1);
|
||||
assert.strictEqual(backend['idToLabel'].get('beta'), 2);
|
||||
assert.strictEqual(backend['idToLabel'].get('gamma'), 3);
|
||||
assert.strictEqual(backend['nextLabel'], 4);
|
||||
});
|
||||
|
||||
await test('duplicate IDs reuse the same label', async (tmp) => {
|
||||
const { backend } = await createTestBackend(tmp);
|
||||
const vec = new Float32Array([1, 2, 3, 4]);
|
||||
await backend.ingestBatch([{ id: 'dup', vector: vec }]);
|
||||
await backend.ingestBatch([{ id: 'dup', vector: vec }]);
|
||||
assert.strictEqual(backend['idToLabel'].get('dup'), 1);
|
||||
assert.strictEqual(backend['nextLabel'], 2); // Only 1 unique label allocated
|
||||
});
|
||||
|
||||
await test('query returns original string IDs, not numeric labels', async (tmp) => {
|
||||
const { backend } = await createTestBackend(tmp);
|
||||
await backend.ingestBatch([
|
||||
{ id: 'doc_hello', vector: new Float32Array([1, 0, 0, 0]) },
|
||||
{ id: 'doc_world', vector: new Float32Array([0, 1, 0, 0]) },
|
||||
]);
|
||||
const results = await backend.query(new Float32Array([1, 0, 0, 0]), 2);
|
||||
const ids = results.map((r) => r.id);
|
||||
assert.ok(ids.includes('doc_hello'), `Expected doc_hello in results, got ${ids}`);
|
||||
assert.ok(ids.includes('doc_world'), `Expected doc_world in results, got ${ids}`);
|
||||
});
|
||||
|
||||
await test('delete resolves string IDs to labels', async (tmp) => {
|
||||
const { backend } = await createTestBackend(tmp);
|
||||
await backend.ingestBatch([
|
||||
{ id: 'to_keep', vector: new Float32Array([1, 0, 0, 0]) },
|
||||
{ id: 'to_delete', vector: new Float32Array([0, 1, 0, 0]) },
|
||||
]);
|
||||
const result = await backend.delete(['to_delete']);
|
||||
assert.strictEqual(result.deleted, 1);
|
||||
assert.strictEqual(backend['handle'].vectors.size, 1);
|
||||
// Mapping should be cleaned up
|
||||
assert.strictEqual(backend['idToLabel'].has('to_delete'), false);
|
||||
assert.strictEqual(backend['idToLabel'].has('to_keep'), true);
|
||||
});
|
||||
|
||||
await test('delete of unknown ID returns 0', async (tmp) => {
|
||||
const { backend } = await createTestBackend(tmp);
|
||||
await backend.ingestBatch([{ id: 'exists', vector: new Float32Array([1, 0, 0, 0]) }]);
|
||||
const result = await backend.delete(['nonexistent']);
|
||||
assert.strictEqual(result.deleted, 0);
|
||||
});
|
||||
|
||||
await test('mappings are persisted to sidecar JSON file', async (tmp) => {
|
||||
const { backend, storePath } = await createTestBackend(tmp);
|
||||
// Create a dummy file so the path directory exists
|
||||
fs.writeFileSync(storePath, '');
|
||||
await backend.ingestBatch([
|
||||
{ id: 'persist_a', vector: new Float32Array([1, 0, 0, 0]) },
|
||||
{ id: 'persist_b', vector: new Float32Array([0, 1, 0, 0]) },
|
||||
]);
|
||||
const mapFile = storePath + '.idmap.json';
|
||||
assert.ok(fs.existsSync(mapFile), `Mapping file not created at ${mapFile}`);
|
||||
const data = JSON.parse(fs.readFileSync(mapFile, 'utf-8'));
|
||||
assert.strictEqual(data.idToLabel['persist_a'], 1);
|
||||
assert.strictEqual(data.idToLabel['persist_b'], 2);
|
||||
assert.strictEqual(data.labelToId['1'], 'persist_a');
|
||||
assert.strictEqual(data.labelToId['2'], 'persist_b');
|
||||
assert.strictEqual(data.nextLabel, 3);
|
||||
});
|
||||
|
||||
await test('mappings are restored from sidecar JSON on open', async (tmp) => {
|
||||
const storePath = path.join(tmp, 'restore.rvf');
|
||||
fs.writeFileSync(storePath, '');
|
||||
// Write a sidecar mapping file manually
|
||||
const mapData = {
|
||||
idToLabel: { 'restored_x': 10, 'restored_y': 20 },
|
||||
labelToId: { '10': 'restored_x', '20': 'restored_y' },
|
||||
nextLabel: 21,
|
||||
};
|
||||
fs.writeFileSync(storePath + '.idmap.json', JSON.stringify(mapData));
|
||||
|
||||
const backend = new NodeBackend();
|
||||
backend['native'] = { open: () => new MockNativeHandle() };
|
||||
backend['handle'] = new MockNativeHandle();
|
||||
backend['storePath'] = storePath;
|
||||
// Simulate loadMappings
|
||||
await backend['loadMappings']();
|
||||
|
||||
assert.strictEqual(backend['idToLabel'].get('restored_x'), 10);
|
||||
assert.strictEqual(backend['idToLabel'].get('restored_y'), 20);
|
||||
assert.strictEqual(backend['labelToId'].get(10), 'restored_x');
|
||||
assert.strictEqual(backend['labelToId'].get(20), 'restored_y');
|
||||
assert.strictEqual(backend['nextLabel'], 21);
|
||||
});
|
||||
|
||||
await test('purely numeric string IDs still work correctly', async (tmp) => {
|
||||
const { backend } = await createTestBackend(tmp);
|
||||
await backend.ingestBatch([
|
||||
{ id: '42', vector: new Float32Array([1, 0, 0, 0]) },
|
||||
{ id: '99', vector: new Float32Array([0, 1, 0, 0]) },
|
||||
]);
|
||||
// They get mapped labels, not passed through as raw numbers
|
||||
assert.strictEqual(backend['idToLabel'].get('42'), 1);
|
||||
assert.strictEqual(backend['idToLabel'].get('99'), 2);
|
||||
const results = await backend.query(new Float32Array([1, 0, 0, 0]), 2);
|
||||
const ids = results.map((r) => r.id);
|
||||
assert.ok(ids.includes('42'), `Expected '42' in results`);
|
||||
assert.ok(ids.includes('99'), `Expected '99' in results`);
|
||||
});
|
||||
|
||||
await test('mixed numeric and string IDs coexist', async (tmp) => {
|
||||
const { backend } = await createTestBackend(tmp);
|
||||
await backend.ingestBatch([
|
||||
{ id: '1', vector: new Float32Array([1, 0, 0, 0]) },
|
||||
{ id: 'uuid-abc', vector: new Float32Array([0, 1, 0, 0]) },
|
||||
{ id: '999', vector: new Float32Array([0, 0, 1, 0]) },
|
||||
{ id: 'chunk_42', vector: new Float32Array([0, 0, 0, 1]) },
|
||||
]);
|
||||
assert.strictEqual(backend['handle'].vectors.size, 4);
|
||||
const results = await backend.query(new Float32Array([1, 0, 0, 0]), 4);
|
||||
const ids = new Set(results.map((r) => r.id));
|
||||
assert.ok(ids.has('1'));
|
||||
assert.ok(ids.has('uuid-abc'));
|
||||
assert.ok(ids.has('999'));
|
||||
assert.ok(ids.has('chunk_42'));
|
||||
});
|
||||
|
||||
// Print results
|
||||
console.log(`\n${'='.repeat(60)}`);
|
||||
console.log(`Results: ${passed} passed, ${failed} failed, ${passed + failed} total`);
|
||||
console.log('='.repeat(60));
|
||||
process.exit(failed > 0 ? 1 : 0);
|
||||
})();
|
||||
Loading…
Add table
Add a link
Reference in a new issue