ruvector/.github/workflows/regression-guard.yml
ruvnet 0edc4b985f ci: switch all CI workflows from ubuntu-latest to ubuntu-22.04
ubuntu-latest (→ubuntu-24.04) runners are consistently exhausted on the
free plan. Build Native Modules uses ubuntu-22.04 explicitly and always
gets runners immediately. Switching clippy-fmt, Workspace CI,
regression-guard, supply-chain, and WASM Dedup Check to the same pool.

Co-Authored-By: claude-flow <ruv@ruv.net>
2026-05-22 09:29:02 -04:00

394 lines
18 KiB
YAML

name: regression-guard
# Guards against the six classes of regressions resolved in the
# fix/critical-issues-may-2026 batch (issues #437, #438, #458, #462,
# #463, #430). Each job here corresponds to one fix and exists to
# prevent the regression from being silently re-introduced.
on:
push:
branches: [main]
pull_request:
workflow_dispatch:
permissions:
contents: read
jobs:
# Issue #437: parking_lot::RwLock is non-reentrant. Two .write() (or .read())
# in the same expression deadlocks. Forbid the exact textual pattern.
reentrant-rwlock-double-write:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Forbid reentrant parking_lot lock acquisition in a single statement
run: |
set -e
# parking_lot::RwLock is non-reentrant. Dangerous patterns on the SAME
# lock prefix:
# * .write() then .write() — pure deadlock (issue #437)
# * .write() then .read() — read blocks behind write guard
# * .read() then .write() — write blocks behind read guard
# `.read()` then `.read()` on the same lock is allowed (multi-reader),
# and any combination on DIFFERENT locks is safe. We use the same
# captured prefix `(\S+)` to flag only same-lock cases.
# Pattern 1: .write() …\1.(write|read)()
# Pattern 2: .read() …\1.write()
# Bash -P (PCRE) supports backreferences.
if grep -rnPe '(\S+)\.write\(\)[^;]*\1\.(write|read)\(\)' \
-rnPe '(\S+)\.read\(\)[^;]*\1\.write\(\)' \
--include='*.rs' -- crates/ ; then
echo "::error::Found reentrant parking_lot lock acquisition on a single statement (regression of issue #437). Bind the guard once: 'let mut g = x.write(); g.field = …;'"
exit 1
fi
# Issue #458: Windows clones break on case-only collisions because NTFS is
# case-insensitive. Fail CI if any two paths in the tree differ only by case.
case-insensitive-collisions:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Detect case-only filename collisions
run: |
set -e
dupes=$(git ls-files | tr '[:upper:]' '[:lower:]' | sort | uniq -d || true)
if [ -n "$dupes" ]; then
echo "::error::Case-only filename collisions found. Windows clones will silently drop one file from each pair:"
echo "$dupes"
exit 1
fi
# Issue #438: AVX-512 intrinsics must be gated. ruvector-core must build on
# stable Rust 1.77+ without the simd-avx512 feature.
ruvector-core-no-avx512-builds-on-stable:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
toolchain: '1.89.0'
- name: cargo check ruvector-core without simd-avx512
run: |
cargo check -p ruvector-core \
--no-default-features \
--features simd,storage,hnsw,api-embeddings,parallel
- name: cargo check ruvector-core with simd-avx512 (default)
run: cargo check -p ruvector-core
# Issue #430: HNSW recall@1 must stay above 95% on the regression test that
# historically exposed the result-heap inversion.
hnsw-recall-at-1:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
- name: ruvector-router-core unit tests (release)
run: |
# cargo test only accepts one TESTNAME filter per invocation —
# run each guard test separately.
cargo test -p ruvector-router-core --release --lib test_recall_at_1_with_biased_insertion_order
cargo test -p ruvector-router-core --release --lib test_k_exceeds_ef_search_default
cargo test -p ruvector-router-core --release --lib test_vector_db_basic_operations
# Issue #430 (bug C): adjacency-list pruning must keep CLOSEST m
# neighbours, not the most recently inserted ones.
cargo test -p ruvector-router-core --release --lib test_pruning_keeps_closest_not_newest
# Issue #430 (storage): VectorDB::new must rebuild the HNSW from
# persisted vectors so search returns results after reopen.
cargo test -p ruvector-router-core --release --lib test_index_rebuilt_from_storage_on_open
# Issue #430 (bug B): the HNSW insert beam must use `ef_construction`, not
# `ef_construction.min(m * 2)`. The latter silently clamps the beam to 32
# by default (m=16) and collapses recall at scale. This guard textually
# forbids the regression.
hnsw-insert-beam-no-m2-clamp:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Forbid ef_construction.min(m * 2) clamp in HNSW insert beam
run: |
set -e
if grep -nE 'ef_construction\s*\.\s*min\s*\(\s*self\.config\.m\s*\*\s*2\s*\)' \
crates/ruvector-router-core/src/index.rs ; then
echo "::error::Insert beam clamped to ef_construction.min(m*2) — this silently becomes m*2 (regression of issue #430 bug B). Use self.config.ef_construction directly."
exit 1
fi
# Issue #430 (bug C): adjacency-list pruning must be distance-based. The
# historical FIFO pruner did not call `calculate_distance` anywhere inside
# the overflow gate, so checking that the helper is invoked in the same
# function as the `> self.config.m * 2` check is a cheap structural guard
# that complements the behavioural `test_pruning_keeps_closest_not_newest`
# test below.
hnsw-distance-based-neighbor-pruning:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Require calculate_distance() inside HNSW overflow gate
run: |
set -e
# The `insert` function in index.rs must reach calculate_distance()
# AFTER the `> self.config.m * 2` overflow check fires — that is
# what proves the pruner is distance-aware, not FIFO.
if ! grep -nE 'calculate_distance' crates/ruvector-router-core/src/index.rs >/dev/null ; then
echo "::error::index.rs no longer references calculate_distance (regression of issue #430 bug C). Adjacency-list pruning must score candidates by distance."
exit 1
fi
# And the overflow gate itself must still exist.
if ! grep -nE '> self\.config\.m \* 2' crates/ruvector-router-core/src/index.rs >/dev/null ; then
echo "::error::HNSW overflow gate '> self.config.m * 2' removed — refusing to ship without the m*2/m prune semantics (#430)."
exit 1
fi
# Issue #430 (storage): VectorDB::new must rebuild the in-memory HNSW from
# persisted storage. The historical bug was that a fresh empty HnswIndex
# was created on every open, so search returned 0 results after restart.
vector-db-rebuilds-index-on-open:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Require storage.get_all_ids() rebuild path in VectorDB::new
run: |
set -e
if ! grep -nE 'storage\.get_all_ids' crates/ruvector-router-core/src/vector_db.rs ; then
echo "::error::VectorDB::new no longer rebuilds the HNSW from storage (regression of issue #430). Reintroduce the storage.get_all_ids() + index.insert_batch() path."
exit 1
fi
# Issue #462 / #376: published tarballs must contain dist/. Run `npm pack`
# (which now triggers our prepack hooks) and assert the entry points exist
# inside the resulting tarball.
npm-publish-pipeline:
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
pkg:
- npm/packages/pi-brain
- npm/packages/ruvector
- npm/packages/rvf-wasm
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: '20'
- name: copy ${{ matrix.pkg }} to isolated dir + npm install + pack
run: |
# The package lives inside an npm workspace at npm/package.json
# whose other workspace members declare cross-platform native
# binaries (router-darwin-arm64, etc.). Installing from the
# package dir still walks the workspace and chokes on EBADPLATFORM
# for the wrong-host binaries. Copy the package to a workspace-free
# temp dir so npm only resolves the package's own declared deps.
mkdir -p /tmp/pkgcopy
cp -r ${{ matrix.pkg }}/. /tmp/pkgcopy/
cd /tmp/pkgcopy
# Detach from the parent workspace.
rm -f package-lock.json
npm install --no-audit --no-fund --legacy-peer-deps --no-workspaces --no-optional
mkdir -p /tmp/pack
npm pack --pack-destination /tmp/pack
tar -tzf /tmp/pack/*.tgz | head -30
- name: assert dist/ entry points exist in tarball
working-directory: ${{ matrix.pkg }}
run: |
tarball=$(ls /tmp/pack/*.tgz | head -1)
listing=$(tar -tzf "$tarball")
for required in $(node -e "
const p = require('./package.json');
const files = new Set();
if (p.main) files.add(p.main);
if (p.module) files.add(p.module);
if (p.types) files.add(p.types);
if (p.exports) {
const walk = (n) => {
if (typeof n === 'string') files.add(n);
else if (n && typeof n === 'object') Object.values(n).forEach(walk);
};
walk(p.exports);
}
console.log([...files].map(f => f.replace(/^\\.\\//,'')).join('\\n'));
"); do
# The tarball prefixes everything with 'package/'.
if ! echo "$listing" | grep -qE "^package/${required}\$"; then
echo "::error::Required entry point missing from tarball: $required"
echo "Tarball contents:"
echo "$listing"
exit 1
fi
done
# Issues #463 / #422: hooks_route_enhanced specifically must not shell out
# via execSync('npx ruvector …'). Other handlers in mcp-server.js shell out
# to subprocess-only commands (security-scan, git-churn, verify) and are
# tracked separately — this guard locks the #463 regression shut.
no-npx-execSync-in-route-enhanced:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Forbid execSync('npx ruvector …') inside hooks_route_enhanced case
run: |
set -e
# Extract the hooks_route_enhanced case body (case label → next case)
# and grep within it. awk for case-body extraction, then grep.
body=$(awk '
/case .hooks_route_enhanced.:/ { flag=1 }
flag && /case .[a-z_]+.:/ && !/hooks_route_enhanced/ { flag=0 }
flag { print }
' npm/packages/ruvector/bin/mcp-server.js)
if echo "$body" | grep -E 'execSync\([^)]*npx[[:space:]]+ruvector'; then
echo "::error::hooks_route_enhanced MUST NOT shell out via 'npx ruvector' (regression of issue #463/#422). Use intel.route() in-process instead."
exit 1
fi
# Issue #256: MCP tool handlers must sanitize user-controlled input before
# interpolating into a shell command. The specific risky pattern is
# `${args.X}` (the unsanitized MCP request argument); local variables
# (filesArg, threshold, etc.) are typically pre-processed by the handler
# and don't need to match here. To catch #256-class regressions without
# drowning in false positives, we only flag template literals that include
# `${args.…}` and don't wrap it in sanitizeShellArg(...).
shell-injection-in-mcp-server:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Forbid unsanitized ${args.X} in exec*/spawn* calls
run: |
set -e
if grep -nE '(execSync|execFile|execFileSync|exec|spawnSync|spawn)\([^)]*\$\{args\.' \
npm/packages/*/bin/*.js 2>/dev/null | grep -v 'sanitizeShellArg('; then
echo "::error::Unsanitized \${args.X} interpolation in an exec/spawn call (regression of issue #256). Wrap with sanitizeShellArg(args.X) or use the array form spawn('cmd', [args])."
exit 1
fi
# Issue #267: crates whose names contain "wasm" compile to
# wasm32-unknown-unknown and can't use std::time::SystemTime / Instant —
# they panic at runtime.
no-systemtime-in-wasm-crates:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Reject SystemTime/Instant in wasm32-targeted crates
run: |
set -e
fail=0
for crate in crates/*-wasm crates/*wasm*; do
[ -d "$crate/src" ] || continue
# Whitelist crates with a time_compat shim — they explicitly
# provide a wasm-safe alternative.
[ -f "$crate/src/time_compat.rs" ] && continue
hits=$(grep -rnE '\b(SystemTime::now|Instant::now)\b' "$crate/src" 2>/dev/null || true)
[ -z "$hits" ] && continue
ungated=$(echo "$hits" | while IFS=: read -r f line _; do
pre=$(awk -v L="$line" 'NR>=L-4 && NR<L' "$f")
if ! echo "$pre" | grep -q 'cfg(not(target_arch.*wasm32'; then
echo "$f:$line"
fi
done)
if [ -n "$ungated" ]; then
echo "::error file=$crate::WASM crate uses SystemTime/Instant without cfg-gate (regression of issue #267):"
echo "$ungated"
fail=1
fi
done
exit $fail
# Issue #359: hardcoded devcontainer-only paths break clones outside the
# devcontainer. Block them in settings + workflow files. .claude/hooks and
# .claude/intelligence are excluded because they're user-customised helpers
# configured per-developer (not committed-by-default). Markdown docs and
# JS example/test files are excluded — they're illustrative.
no-hardcoded-workspaces-paths:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Forbid hardcoded devcontainer path in checked-in config
run: |
set -e
# Look for the literal pattern but only in load-bearing config files,
# not in this workflow file itself or in docs/examples/tests.
pattern=$(printf '/workspaces/%s' 'ruvector')
hits=$(grep -rln "$pattern" \
.github/workflows/ .claude/settings.json .claude/settings.local.json \
scripts/publish/ \
--exclude='regression-guard.yml' \
2>/dev/null || true)
if [ -n "$hits" ]; then
echo "::error::Hardcoded devcontainer path in checked-in config (regression of issue #359). Use \$GITHUB_WORKSPACE, \$PWD, or a relative path."
echo "$hits"
exit 1
fi
# Issue #464: the per-collection hydration counters added in 97c07520d are
# the only way to diagnose silent record loss during Firestore hydration.
# If a future refactor removes the log lines, we lose the diagnostic when
# we need it most. Assert all four "Hydrate <collection>:" log lines stay.
brain-hydration-counters-present:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Assert hydration counter log lines exist
run: |
set -e
f=crates/mcp-brain-server/src/store.rs
missing=()
for collection in brain_memories brain_contributors brain_page_status brain_nodes; do
if ! grep -q "Hydrate ${collection}: considered=" "$f"; then
missing+=("Hydrate ${collection}: considered=…")
fi
done
if [ "${#missing[@]}" -gt 0 ]; then
echo "::error file=$f::Per-collection hydration counter log lines are missing (regression of issue #464). The next deploy can't diagnose silent record loss without them:"
printf ' %s\n' "${missing[@]}"
exit 1
fi
# Issue #411: npm wrapper packages declared optionalDependencies pinned to
# versions of native binaries that were never published on the registry.
# Resolve every optionalDependency declared by every package in this repo
# against the live npm registry and fail if any are missing. Soft-skip on
# network errors so transient registry hiccups don't false-fail.
optional-deps-resolvable-on-npm:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: '20'
- name: Resolve every optionalDependency@version on npm
run: |
set -e
fail=0
# Collect (pkg, name, version) tuples from every package.json that
# ships an optionalDependencies block.
while IFS= read -r pkgjson; do
entries=$(node -e "
const p = require('${PWD}/$pkgjson');
const od = p.optionalDependencies || {};
for (const [n, v] of Object.entries(od)) {
console.log(n + ' ' + v);
}
")
[ -z "$entries" ] && continue
while IFS= read -r line; do
[ -z "$line" ] && continue
name=$(echo "$line" | awk '{print $1}')
# Keep range operators (^, ~) intact — `npm view <pkg>@^2.3.0`
# resolves to the highest published 2.x.y. Stripping them turns
# a range into an exact pin and false-fails on common patterns.
ver=$(echo "$line" | awk '{print $2}' | tr -d '" ')
# Skip workspace: protocol and other non-semver specs.
case "$ver" in workspace:*|file:*|*://*) continue ;; esac
out=$(npm view "${name}@${ver}" version 2>&1) || true
if echo "$out" | grep -qE '^npm (error|ERR!)' || [ -z "$out" ]; then
# Distinguish "not in registry" from transient network error.
if echo "$out" | grep -qE 'E404|is not in this registry'; then
echo "::error file=$pkgjson::optionalDependency ${name}@${ver} is not published on npm (regression of issue #411)."
fail=1
else
echo "::warning file=$pkgjson::Could not resolve ${name}@${ver} (transient?): $out"
fi
fi
done <<< "$entries"
done < <(find npm/packages -name package.json -not -path '*/node_modules/*')
exit $fail