mirror of
https://github.com/ruvnet/RuVector.git
synced 2026-05-23 21:25:02 +00:00
feat: ruvllm-wasm v2.0.0 — first functional WASM publish
- Gate WebGPU web-sys features behind `webgpu` Cargo feature flag - Remove unused bytemuck, gpu_map_mode, GpuSupportedLimits dependencies - Add wasm-opt=false workaround for Rust 1.91 codegen bug - Published @ruvector/ruvllm-wasm@2.0.0 with compiled WASM binary (435KB) - ADR-084 documenting build workarounds and known limitations Closes #240 Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
parent
0657a96082
commit
da0f016d0f
3 changed files with 102 additions and 39 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
|
@ -9541,7 +9541,6 @@ dependencies = [
|
|||
name = "ruvllm-wasm"
|
||||
version = "2.0.0"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"console_error_panic_hook",
|
||||
"js-sys",
|
||||
"serde",
|
||||
|
|
|
|||
|
|
@ -10,6 +10,9 @@ description = "WASM bindings for RuvLLM - browser-compatible LLM inference runti
|
|||
keywords = ["wasm", "llm", "inference", "browser", "webgpu"]
|
||||
categories = ["wasm", "api-bindings", "web-programming"]
|
||||
|
||||
[package.metadata.wasm-pack.profile.release]
|
||||
wasm-opt = false
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib", "rlib"]
|
||||
|
||||
|
|
@ -18,12 +21,12 @@ crate-type = ["cdylib", "rlib"]
|
|||
wasm-bindgen = "0.2"
|
||||
wasm-bindgen-futures = "0.4"
|
||||
js-sys = "0.3"
|
||||
# Core web-sys features (always needed)
|
||||
web-sys = { version = "0.3", features = [
|
||||
"console",
|
||||
"Performance",
|
||||
"Window",
|
||||
"Navigator",
|
||||
# Web Workers support (enabled with parallel feature)
|
||||
"Worker",
|
||||
"WorkerOptions",
|
||||
"WorkerType",
|
||||
|
|
@ -33,39 +36,6 @@ web-sys = { version = "0.3", features = [
|
|||
"MessageEvent",
|
||||
"ErrorEvent",
|
||||
"DedicatedWorkerGlobalScope",
|
||||
# WebGPU features (enabled with webgpu feature)
|
||||
"Gpu",
|
||||
"GpuAdapter",
|
||||
"GpuAdapterInfo",
|
||||
"GpuDevice",
|
||||
"GpuQueue",
|
||||
"GpuBuffer",
|
||||
"GpuBufferDescriptor",
|
||||
"GpuShaderModule",
|
||||
"GpuShaderModuleDescriptor",
|
||||
"GpuBindGroup",
|
||||
"GpuBindGroupDescriptor",
|
||||
"GpuBindGroupEntry",
|
||||
"GpuBindGroupLayout",
|
||||
"GpuBindGroupLayoutDescriptor",
|
||||
"GpuBindGroupLayoutEntry",
|
||||
"GpuBufferBinding",
|
||||
"GpuBufferBindingLayout",
|
||||
"GpuBufferBindingType",
|
||||
"GpuComputePipeline",
|
||||
"GpuComputePipelineDescriptor",
|
||||
"GpuPipelineLayout",
|
||||
"GpuPipelineLayoutDescriptor",
|
||||
"GpuProgrammableStage",
|
||||
"GpuCommandEncoder",
|
||||
"GpuCommandEncoderDescriptor",
|
||||
"GpuCommandBuffer",
|
||||
"GpuComputePassEncoder",
|
||||
"GpuComputePassDescriptor",
|
||||
"gpu_map_mode",
|
||||
"GpuRequestAdapterOptions",
|
||||
"GpuDeviceDescriptor",
|
||||
"GpuSupportedLimits",
|
||||
] }
|
||||
|
||||
# Serialization
|
||||
|
|
@ -76,16 +46,27 @@ serde_json = "1.0"
|
|||
# Error handling
|
||||
console_error_panic_hook = { version = "0.1", optional = true }
|
||||
|
||||
# Byte casting for GPU buffers
|
||||
bytemuck = { version = "1.14", features = ["derive"] }
|
||||
|
||||
[dev-dependencies]
|
||||
wasm-bindgen-test = "0.3"
|
||||
|
||||
[features]
|
||||
default = ["console_error_panic_hook"]
|
||||
# WebGPU acceleration
|
||||
webgpu = []
|
||||
# WebGPU acceleration (adds GPU compute pipeline, shader compilation, buffer management)
|
||||
webgpu = ["web-sys/Gpu", "web-sys/GpuAdapter", "web-sys/GpuAdapterInfo",
|
||||
"web-sys/GpuDevice", "web-sys/GpuQueue", "web-sys/GpuBuffer",
|
||||
"web-sys/GpuBufferDescriptor", "web-sys/GpuShaderModule",
|
||||
"web-sys/GpuShaderModuleDescriptor", "web-sys/GpuBindGroup",
|
||||
"web-sys/GpuBindGroupDescriptor", "web-sys/GpuBindGroupEntry",
|
||||
"web-sys/GpuBindGroupLayout", "web-sys/GpuBindGroupLayoutDescriptor",
|
||||
"web-sys/GpuBindGroupLayoutEntry", "web-sys/GpuBufferBinding",
|
||||
"web-sys/GpuBufferBindingLayout", "web-sys/GpuBufferBindingType",
|
||||
"web-sys/GpuComputePipeline", "web-sys/GpuComputePipelineDescriptor",
|
||||
"web-sys/GpuPipelineLayout", "web-sys/GpuPipelineLayoutDescriptor",
|
||||
"web-sys/GpuProgrammableStage", "web-sys/GpuCommandEncoder",
|
||||
"web-sys/GpuCommandEncoderDescriptor", "web-sys/GpuCommandBuffer",
|
||||
"web-sys/GpuComputePassEncoder", "web-sys/GpuComputePassDescriptor",
|
||||
"web-sys/GpuRequestAdapterOptions", "web-sys/GpuDeviceDescriptor"]
|
||||
# Enable parallel inference with Web Workers
|
||||
parallel = []
|
||||
# Enable SIMD optimizations (requires wasm-simd target feature)
|
||||
|
|
|
|||
83
docs/adr/ADR-084-ruvllm-wasm-publish.md
Normal file
83
docs/adr/ADR-084-ruvllm-wasm-publish.md
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
# ADR-084: ruvllm-wasm — First Functional npm Publish
|
||||
|
||||
**Status**: Accepted
|
||||
**Date**: 2026-03-06
|
||||
**Authors**: RuVector Team
|
||||
**Deciders**: ruv
|
||||
**Related**: ADR-083 (Brain Training Loops), Issue #238 (placeholder deprecation)
|
||||
|
||||
## 1. Context
|
||||
|
||||
The `@ruvector/ruvllm-wasm` npm package (v0.1.0) was a placeholder — published without compiled WASM binaries. It was deprecated in PR #239. Meanwhile, the Rust crate `ruvllm-wasm` (v2.0.0) contains substantial working code:
|
||||
|
||||
| Subsystem | Status | Exports |
|
||||
|-----------|--------|---------|
|
||||
| KV Cache (two-tier FP32+u8) | Working | `KvCacheWasm`, `KvCacheConfigWasm` |
|
||||
| Memory (arena + buffer pool) | Working | `InferenceArenaWasm`, `BufferPoolWasm` |
|
||||
| Chat Templates (7 formats) | Working | `ChatTemplateWasm`, `ChatMessageWasm` |
|
||||
| HNSW Semantic Router | Working | `HnswRouterWasm`, `PatternWasm`, `RouteResultWasm` |
|
||||
| MicroLoRA (rank 1-4) | Working | `MicroLoraWasm`, `AdaptFeedbackWasm` |
|
||||
| SONA Instant Learning | Working | `SonaInstantWasm`, `SonaConfigWasm` |
|
||||
| Web Workers | Working | `ParallelInference`, feature detection |
|
||||
| WebGPU (matmul shader) | Feature-gated | `WebGpuInference`, `WebGpuContext` |
|
||||
| IntelligentLLM (combined) | Commented out | Pending API compatibility |
|
||||
|
||||
## 2. Decision
|
||||
|
||||
### 2.1 Fix WASM Build
|
||||
|
||||
The Rust 1.91 compiler has a codegen bug where release-profile optimizations produce invalid WASM (type mismatch: `expected i32, found f64` in wasm-bindgen post-processing). Debug builds validate fine.
|
||||
|
||||
**Workaround**: Build with `codegen-units=256` + `lto=off`. This prevents cross-function optimization passes that trigger the bug while still producing optimized output.
|
||||
|
||||
```bash
|
||||
CARGO_PROFILE_RELEASE_CODEGEN_UNITS=256 \
|
||||
CARGO_PROFILE_RELEASE_LTO=off \
|
||||
wasm-pack build crates/ruvllm-wasm --target web --scope ruvector --release
|
||||
```
|
||||
|
||||
Added `wasm-opt = false` to `[package.metadata.wasm-pack.profile.release]` since wasm-opt's validator also rejects the binary.
|
||||
|
||||
### 2.2 Gate WebGPU Features
|
||||
|
||||
WebGPU `web-sys` features (`gpu_map_mode`, `GpuSupportedLimits`, 28 GPU types) were compiled unconditionally, inflating binary size. Moved all GPU web-sys features behind the `webgpu` Cargo feature flag.
|
||||
|
||||
Removed unused `bytemuck` dependency and `gpu_map_mode` / `GpuSupportedLimits` (declared but never referenced in source).
|
||||
|
||||
### 2.3 Publish as v2.0.0
|
||||
|
||||
Published `@ruvector/ruvllm-wasm@2.0.0` to npm with:
|
||||
- Compiled WASM binary (~435 KB, ~150 KB gzipped)
|
||||
- TypeScript definitions (`.d.ts`)
|
||||
- ES module JS glue code
|
||||
- Accurate README with working API examples
|
||||
|
||||
### 2.4 README
|
||||
|
||||
Replaced placeholder README with accurate documentation covering all exported types, working code examples, and browser compatibility table.
|
||||
|
||||
## 3. Files Modified
|
||||
|
||||
| File | Changes |
|
||||
|------|---------|
|
||||
| `crates/ruvllm-wasm/Cargo.toml` | Gate WebGPU features, remove unused bytemuck/gpu_map_mode/GpuSupportedLimits, add wasm-opt=false |
|
||||
| `crates/ruvllm-wasm/pkg/README.md` | Complete rewrite with accurate API docs |
|
||||
| `crates/ruvllm-wasm/pkg/` | Generated: `.wasm`, `.js`, `.d.ts` files |
|
||||
|
||||
## 4. Build Artifact Details
|
||||
|
||||
| File | Size |
|
||||
|------|------|
|
||||
| `ruvllm_wasm_bg.wasm` | 435 KB |
|
||||
| `ruvllm_wasm.js` | 128 KB |
|
||||
| `ruvllm_wasm.d.ts` | 45 KB |
|
||||
|
||||
## 5. Known Limitations
|
||||
|
||||
| Area | Limitation | Resolution Path |
|
||||
|------|-----------|-----------------|
|
||||
| Rust 1.91 codegen bug | Requires `codegen-units=256` workaround | Fixed in future Rust compiler release |
|
||||
| IntelligentLLMWasm | Commented out, references non-existent `HnswRouterConfigWasm` | Create config struct or pass params directly |
|
||||
| WebGPU attention | CPU fallback only (matmul has GPU path) | Implement attention WGSL shader pipeline |
|
||||
| Worker pool | Uses `setTimeout` polling instead of proper task completion signals | Implement message-based completion tracking |
|
||||
| GGUF model loading | Not yet wired (no `load_model_from_url`) | Requires streaming fetch + parser integration |
|
||||
Loading…
Add table
Add a link
Reference in a new issue