mirror of
https://github.com/ruvnet/RuVector.git
synced 2026-05-23 12:55:26 +00:00
ubuntu-latest (→ubuntu-24.04) runners are consistently exhausted on the free plan. Build Native Modules uses ubuntu-22.04 explicitly and always gets runners immediately. Switching clippy-fmt, Workspace CI, regression-guard, supply-chain, and WASM Dedup Check to the same pool. Co-Authored-By: claude-flow <ruv@ruv.net>
394 lines
18 KiB
YAML
394 lines
18 KiB
YAML
name: regression-guard
|
|
|
|
# Guards against the six classes of regressions resolved in the
|
|
# fix/critical-issues-may-2026 batch (issues #437, #438, #458, #462,
|
|
# #463, #430). Each job here corresponds to one fix and exists to
|
|
# prevent the regression from being silently re-introduced.
|
|
|
|
on:
|
|
push:
|
|
branches: [main]
|
|
pull_request:
|
|
workflow_dispatch:
|
|
|
|
permissions:
|
|
contents: read
|
|
|
|
jobs:
|
|
# Issue #437: parking_lot::RwLock is non-reentrant. Two .write() (or .read())
|
|
# in the same expression deadlocks. Forbid the exact textual pattern.
|
|
reentrant-rwlock-double-write:
|
|
runs-on: ubuntu-22.04
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- name: Forbid reentrant parking_lot lock acquisition in a single statement
|
|
run: |
|
|
set -e
|
|
# parking_lot::RwLock is non-reentrant. Dangerous patterns on the SAME
|
|
# lock prefix:
|
|
# * .write() then .write() — pure deadlock (issue #437)
|
|
# * .write() then .read() — read blocks behind write guard
|
|
# * .read() then .write() — write blocks behind read guard
|
|
# `.read()` then `.read()` on the same lock is allowed (multi-reader),
|
|
# and any combination on DIFFERENT locks is safe. We use the same
|
|
# captured prefix `(\S+)` to flag only same-lock cases.
|
|
# Pattern 1: .write() …\1.(write|read)()
|
|
# Pattern 2: .read() …\1.write()
|
|
# Bash -P (PCRE) supports backreferences.
|
|
if grep -rnPe '(\S+)\.write\(\)[^;]*\1\.(write|read)\(\)' \
|
|
-rnPe '(\S+)\.read\(\)[^;]*\1\.write\(\)' \
|
|
--include='*.rs' -- crates/ ; then
|
|
echo "::error::Found reentrant parking_lot lock acquisition on a single statement (regression of issue #437). Bind the guard once: 'let mut g = x.write(); g.field = …;'"
|
|
exit 1
|
|
fi
|
|
|
|
# Issue #458: Windows clones break on case-only collisions because NTFS is
|
|
# case-insensitive. Fail CI if any two paths in the tree differ only by case.
|
|
case-insensitive-collisions:
|
|
runs-on: ubuntu-22.04
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- name: Detect case-only filename collisions
|
|
run: |
|
|
set -e
|
|
dupes=$(git ls-files | tr '[:upper:]' '[:lower:]' | sort | uniq -d || true)
|
|
if [ -n "$dupes" ]; then
|
|
echo "::error::Case-only filename collisions found. Windows clones will silently drop one file from each pair:"
|
|
echo "$dupes"
|
|
exit 1
|
|
fi
|
|
|
|
# Issue #438: AVX-512 intrinsics must be gated. ruvector-core must build on
|
|
# stable Rust 1.77+ without the simd-avx512 feature.
|
|
ruvector-core-no-avx512-builds-on-stable:
|
|
runs-on: ubuntu-22.04
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- uses: dtolnay/rust-toolchain@stable
|
|
with:
|
|
toolchain: '1.89.0'
|
|
- name: cargo check ruvector-core without simd-avx512
|
|
run: |
|
|
cargo check -p ruvector-core \
|
|
--no-default-features \
|
|
--features simd,storage,hnsw,api-embeddings,parallel
|
|
- name: cargo check ruvector-core with simd-avx512 (default)
|
|
run: cargo check -p ruvector-core
|
|
|
|
# Issue #430: HNSW recall@1 must stay above 95% on the regression test that
|
|
# historically exposed the result-heap inversion.
|
|
hnsw-recall-at-1:
|
|
runs-on: ubuntu-22.04
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- uses: dtolnay/rust-toolchain@stable
|
|
- uses: Swatinem/rust-cache@v2
|
|
- name: ruvector-router-core unit tests (release)
|
|
run: |
|
|
# cargo test only accepts one TESTNAME filter per invocation —
|
|
# run each guard test separately.
|
|
cargo test -p ruvector-router-core --release --lib test_recall_at_1_with_biased_insertion_order
|
|
cargo test -p ruvector-router-core --release --lib test_k_exceeds_ef_search_default
|
|
cargo test -p ruvector-router-core --release --lib test_vector_db_basic_operations
|
|
# Issue #430 (bug C): adjacency-list pruning must keep CLOSEST m
|
|
# neighbours, not the most recently inserted ones.
|
|
cargo test -p ruvector-router-core --release --lib test_pruning_keeps_closest_not_newest
|
|
# Issue #430 (storage): VectorDB::new must rebuild the HNSW from
|
|
# persisted vectors so search returns results after reopen.
|
|
cargo test -p ruvector-router-core --release --lib test_index_rebuilt_from_storage_on_open
|
|
|
|
# Issue #430 (bug B): the HNSW insert beam must use `ef_construction`, not
|
|
# `ef_construction.min(m * 2)`. The latter silently clamps the beam to 32
|
|
# by default (m=16) and collapses recall at scale. This guard textually
|
|
# forbids the regression.
|
|
hnsw-insert-beam-no-m2-clamp:
|
|
runs-on: ubuntu-22.04
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- name: Forbid ef_construction.min(m * 2) clamp in HNSW insert beam
|
|
run: |
|
|
set -e
|
|
if grep -nE 'ef_construction\s*\.\s*min\s*\(\s*self\.config\.m\s*\*\s*2\s*\)' \
|
|
crates/ruvector-router-core/src/index.rs ; then
|
|
echo "::error::Insert beam clamped to ef_construction.min(m*2) — this silently becomes m*2 (regression of issue #430 bug B). Use self.config.ef_construction directly."
|
|
exit 1
|
|
fi
|
|
|
|
# Issue #430 (bug C): adjacency-list pruning must be distance-based. The
|
|
# historical FIFO pruner did not call `calculate_distance` anywhere inside
|
|
# the overflow gate, so checking that the helper is invoked in the same
|
|
# function as the `> self.config.m * 2` check is a cheap structural guard
|
|
# that complements the behavioural `test_pruning_keeps_closest_not_newest`
|
|
# test below.
|
|
hnsw-distance-based-neighbor-pruning:
|
|
runs-on: ubuntu-22.04
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- name: Require calculate_distance() inside HNSW overflow gate
|
|
run: |
|
|
set -e
|
|
# The `insert` function in index.rs must reach calculate_distance()
|
|
# AFTER the `> self.config.m * 2` overflow check fires — that is
|
|
# what proves the pruner is distance-aware, not FIFO.
|
|
if ! grep -nE 'calculate_distance' crates/ruvector-router-core/src/index.rs >/dev/null ; then
|
|
echo "::error::index.rs no longer references calculate_distance (regression of issue #430 bug C). Adjacency-list pruning must score candidates by distance."
|
|
exit 1
|
|
fi
|
|
# And the overflow gate itself must still exist.
|
|
if ! grep -nE '> self\.config\.m \* 2' crates/ruvector-router-core/src/index.rs >/dev/null ; then
|
|
echo "::error::HNSW overflow gate '> self.config.m * 2' removed — refusing to ship without the m*2/m prune semantics (#430)."
|
|
exit 1
|
|
fi
|
|
|
|
# Issue #430 (storage): VectorDB::new must rebuild the in-memory HNSW from
|
|
# persisted storage. The historical bug was that a fresh empty HnswIndex
|
|
# was created on every open, so search returned 0 results after restart.
|
|
vector-db-rebuilds-index-on-open:
|
|
runs-on: ubuntu-22.04
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- name: Require storage.get_all_ids() rebuild path in VectorDB::new
|
|
run: |
|
|
set -e
|
|
if ! grep -nE 'storage\.get_all_ids' crates/ruvector-router-core/src/vector_db.rs ; then
|
|
echo "::error::VectorDB::new no longer rebuilds the HNSW from storage (regression of issue #430). Reintroduce the storage.get_all_ids() + index.insert_batch() path."
|
|
exit 1
|
|
fi
|
|
|
|
# Issue #462 / #376: published tarballs must contain dist/. Run `npm pack`
|
|
# (which now triggers our prepack hooks) and assert the entry points exist
|
|
# inside the resulting tarball.
|
|
npm-publish-pipeline:
|
|
runs-on: ubuntu-22.04
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
pkg:
|
|
- npm/packages/pi-brain
|
|
- npm/packages/ruvector
|
|
- npm/packages/rvf-wasm
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- uses: actions/setup-node@v4
|
|
with:
|
|
node-version: '20'
|
|
- name: copy ${{ matrix.pkg }} to isolated dir + npm install + pack
|
|
run: |
|
|
# The package lives inside an npm workspace at npm/package.json
|
|
# whose other workspace members declare cross-platform native
|
|
# binaries (router-darwin-arm64, etc.). Installing from the
|
|
# package dir still walks the workspace and chokes on EBADPLATFORM
|
|
# for the wrong-host binaries. Copy the package to a workspace-free
|
|
# temp dir so npm only resolves the package's own declared deps.
|
|
mkdir -p /tmp/pkgcopy
|
|
cp -r ${{ matrix.pkg }}/. /tmp/pkgcopy/
|
|
cd /tmp/pkgcopy
|
|
# Detach from the parent workspace.
|
|
rm -f package-lock.json
|
|
npm install --no-audit --no-fund --legacy-peer-deps --no-workspaces --no-optional
|
|
mkdir -p /tmp/pack
|
|
npm pack --pack-destination /tmp/pack
|
|
tar -tzf /tmp/pack/*.tgz | head -30
|
|
- name: assert dist/ entry points exist in tarball
|
|
working-directory: ${{ matrix.pkg }}
|
|
run: |
|
|
tarball=$(ls /tmp/pack/*.tgz | head -1)
|
|
listing=$(tar -tzf "$tarball")
|
|
for required in $(node -e "
|
|
const p = require('./package.json');
|
|
const files = new Set();
|
|
if (p.main) files.add(p.main);
|
|
if (p.module) files.add(p.module);
|
|
if (p.types) files.add(p.types);
|
|
if (p.exports) {
|
|
const walk = (n) => {
|
|
if (typeof n === 'string') files.add(n);
|
|
else if (n && typeof n === 'object') Object.values(n).forEach(walk);
|
|
};
|
|
walk(p.exports);
|
|
}
|
|
console.log([...files].map(f => f.replace(/^\\.\\//,'')).join('\\n'));
|
|
"); do
|
|
# The tarball prefixes everything with 'package/'.
|
|
if ! echo "$listing" | grep -qE "^package/${required}\$"; then
|
|
echo "::error::Required entry point missing from tarball: $required"
|
|
echo "Tarball contents:"
|
|
echo "$listing"
|
|
exit 1
|
|
fi
|
|
done
|
|
|
|
# Issues #463 / #422: hooks_route_enhanced specifically must not shell out
|
|
# via execSync('npx ruvector …'). Other handlers in mcp-server.js shell out
|
|
# to subprocess-only commands (security-scan, git-churn, verify) and are
|
|
# tracked separately — this guard locks the #463 regression shut.
|
|
no-npx-execSync-in-route-enhanced:
|
|
runs-on: ubuntu-22.04
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- name: Forbid execSync('npx ruvector …') inside hooks_route_enhanced case
|
|
run: |
|
|
set -e
|
|
# Extract the hooks_route_enhanced case body (case label → next case)
|
|
# and grep within it. awk for case-body extraction, then grep.
|
|
body=$(awk '
|
|
/case .hooks_route_enhanced.:/ { flag=1 }
|
|
flag && /case .[a-z_]+.:/ && !/hooks_route_enhanced/ { flag=0 }
|
|
flag { print }
|
|
' npm/packages/ruvector/bin/mcp-server.js)
|
|
if echo "$body" | grep -E 'execSync\([^)]*npx[[:space:]]+ruvector'; then
|
|
echo "::error::hooks_route_enhanced MUST NOT shell out via 'npx ruvector' (regression of issue #463/#422). Use intel.route() in-process instead."
|
|
exit 1
|
|
fi
|
|
|
|
# Issue #256: MCP tool handlers must sanitize user-controlled input before
|
|
# interpolating into a shell command. The specific risky pattern is
|
|
# `${args.X}` (the unsanitized MCP request argument); local variables
|
|
# (filesArg, threshold, etc.) are typically pre-processed by the handler
|
|
# and don't need to match here. To catch #256-class regressions without
|
|
# drowning in false positives, we only flag template literals that include
|
|
# `${args.…}` and don't wrap it in sanitizeShellArg(...).
|
|
shell-injection-in-mcp-server:
|
|
runs-on: ubuntu-22.04
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- name: Forbid unsanitized ${args.X} in exec*/spawn* calls
|
|
run: |
|
|
set -e
|
|
if grep -nE '(execSync|execFile|execFileSync|exec|spawnSync|spawn)\([^)]*\$\{args\.' \
|
|
npm/packages/*/bin/*.js 2>/dev/null | grep -v 'sanitizeShellArg('; then
|
|
echo "::error::Unsanitized \${args.X} interpolation in an exec/spawn call (regression of issue #256). Wrap with sanitizeShellArg(args.X) or use the array form spawn('cmd', [args])."
|
|
exit 1
|
|
fi
|
|
|
|
# Issue #267: crates whose names contain "wasm" compile to
|
|
# wasm32-unknown-unknown and can't use std::time::SystemTime / Instant —
|
|
# they panic at runtime.
|
|
no-systemtime-in-wasm-crates:
|
|
runs-on: ubuntu-22.04
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- name: Reject SystemTime/Instant in wasm32-targeted crates
|
|
run: |
|
|
set -e
|
|
fail=0
|
|
for crate in crates/*-wasm crates/*wasm*; do
|
|
[ -d "$crate/src" ] || continue
|
|
# Whitelist crates with a time_compat shim — they explicitly
|
|
# provide a wasm-safe alternative.
|
|
[ -f "$crate/src/time_compat.rs" ] && continue
|
|
hits=$(grep -rnE '\b(SystemTime::now|Instant::now)\b' "$crate/src" 2>/dev/null || true)
|
|
[ -z "$hits" ] && continue
|
|
ungated=$(echo "$hits" | while IFS=: read -r f line _; do
|
|
pre=$(awk -v L="$line" 'NR>=L-4 && NR<L' "$f")
|
|
if ! echo "$pre" | grep -q 'cfg(not(target_arch.*wasm32'; then
|
|
echo "$f:$line"
|
|
fi
|
|
done)
|
|
if [ -n "$ungated" ]; then
|
|
echo "::error file=$crate::WASM crate uses SystemTime/Instant without cfg-gate (regression of issue #267):"
|
|
echo "$ungated"
|
|
fail=1
|
|
fi
|
|
done
|
|
exit $fail
|
|
|
|
# Issue #359: hardcoded devcontainer-only paths break clones outside the
|
|
# devcontainer. Block them in settings + workflow files. .claude/hooks and
|
|
# .claude/intelligence are excluded because they're user-customised helpers
|
|
# configured per-developer (not committed-by-default). Markdown docs and
|
|
# JS example/test files are excluded — they're illustrative.
|
|
no-hardcoded-workspaces-paths:
|
|
runs-on: ubuntu-22.04
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- name: Forbid hardcoded devcontainer path in checked-in config
|
|
run: |
|
|
set -e
|
|
# Look for the literal pattern but only in load-bearing config files,
|
|
# not in this workflow file itself or in docs/examples/tests.
|
|
pattern=$(printf '/workspaces/%s' 'ruvector')
|
|
hits=$(grep -rln "$pattern" \
|
|
.github/workflows/ .claude/settings.json .claude/settings.local.json \
|
|
scripts/publish/ \
|
|
--exclude='regression-guard.yml' \
|
|
2>/dev/null || true)
|
|
if [ -n "$hits" ]; then
|
|
echo "::error::Hardcoded devcontainer path in checked-in config (regression of issue #359). Use \$GITHUB_WORKSPACE, \$PWD, or a relative path."
|
|
echo "$hits"
|
|
exit 1
|
|
fi
|
|
|
|
# Issue #464: the per-collection hydration counters added in 97c07520d are
|
|
# the only way to diagnose silent record loss during Firestore hydration.
|
|
# If a future refactor removes the log lines, we lose the diagnostic when
|
|
# we need it most. Assert all four "Hydrate <collection>:" log lines stay.
|
|
brain-hydration-counters-present:
|
|
runs-on: ubuntu-22.04
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- name: Assert hydration counter log lines exist
|
|
run: |
|
|
set -e
|
|
f=crates/mcp-brain-server/src/store.rs
|
|
missing=()
|
|
for collection in brain_memories brain_contributors brain_page_status brain_nodes; do
|
|
if ! grep -q "Hydrate ${collection}: considered=" "$f"; then
|
|
missing+=("Hydrate ${collection}: considered=…")
|
|
fi
|
|
done
|
|
if [ "${#missing[@]}" -gt 0 ]; then
|
|
echo "::error file=$f::Per-collection hydration counter log lines are missing (regression of issue #464). The next deploy can't diagnose silent record loss without them:"
|
|
printf ' %s\n' "${missing[@]}"
|
|
exit 1
|
|
fi
|
|
|
|
# Issue #411: npm wrapper packages declared optionalDependencies pinned to
|
|
# versions of native binaries that were never published on the registry.
|
|
# Resolve every optionalDependency declared by every package in this repo
|
|
# against the live npm registry and fail if any are missing. Soft-skip on
|
|
# network errors so transient registry hiccups don't false-fail.
|
|
optional-deps-resolvable-on-npm:
|
|
runs-on: ubuntu-22.04
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- uses: actions/setup-node@v4
|
|
with:
|
|
node-version: '20'
|
|
- name: Resolve every optionalDependency@version on npm
|
|
run: |
|
|
set -e
|
|
fail=0
|
|
# Collect (pkg, name, version) tuples from every package.json that
|
|
# ships an optionalDependencies block.
|
|
while IFS= read -r pkgjson; do
|
|
entries=$(node -e "
|
|
const p = require('${PWD}/$pkgjson');
|
|
const od = p.optionalDependencies || {};
|
|
for (const [n, v] of Object.entries(od)) {
|
|
console.log(n + ' ' + v);
|
|
}
|
|
")
|
|
[ -z "$entries" ] && continue
|
|
while IFS= read -r line; do
|
|
[ -z "$line" ] && continue
|
|
name=$(echo "$line" | awk '{print $1}')
|
|
# Keep range operators (^, ~) intact — `npm view <pkg>@^2.3.0`
|
|
# resolves to the highest published 2.x.y. Stripping them turns
|
|
# a range into an exact pin and false-fails on common patterns.
|
|
ver=$(echo "$line" | awk '{print $2}' | tr -d '" ')
|
|
# Skip workspace: protocol and other non-semver specs.
|
|
case "$ver" in workspace:*|file:*|*://*) continue ;; esac
|
|
out=$(npm view "${name}@${ver}" version 2>&1) || true
|
|
if echo "$out" | grep -qE '^npm (error|ERR!)' || [ -z "$out" ]; then
|
|
# Distinguish "not in registry" from transient network error.
|
|
if echo "$out" | grep -qE 'E404|is not in this registry'; then
|
|
echo "::error file=$pkgjson::optionalDependency ${name}@${ver} is not published on npm (regression of issue #411)."
|
|
fail=1
|
|
else
|
|
echo "::warning file=$pkgjson::Could not resolve ${name}@${ver} (transient?): $out"
|
|
fi
|
|
fi
|
|
done <<< "$entries"
|
|
done < <(find npm/packages -name package.json -not -path '*/node_modules/*')
|
|
exit $fail
|