ruvector/.github/workflows/regression-guard.yml

name: regression-guard

# Guards against the six classes of regressions resolved in the
# fix/critical-issues-may-2026 batch (issues #437, #438, #458, #462,
# #463, #430). Each job here corresponds to one fix and exists to
# prevent the regression from being silently re-introduced.

on:
  push:
    branches: [main]
  pull_request:
  workflow_dispatch:

permissions:
  contents: read

jobs:
  # Issue #437: parking_lot::RwLock is non-reentrant. Two .write() (or .read())
  # in the same expression deadlocks. Forbid the exact textual pattern.
  reentrant-rwlock-double-write:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v4
      - name: Forbid reentrant parking_lot lock acquisition in a single statement
        run: |
          set -e
          # parking_lot::RwLock is non-reentrant. Dangerous patterns on the SAME
          # lock prefix:
          #   * .write() then .write() — pure deadlock (issue #437)
          #   * .write() then .read()  — read blocks behind write guard
          #   * .read()  then .write() — write blocks behind read guard
          # `.read()` then `.read()` on the same lock is allowed (multi-reader),
          # and any combination on DIFFERENT locks is safe. We use the same
          # captured prefix `(\S+)` to flag only same-lock cases.
          # Pattern 1: .write() …\1.(write|read)()
          # Pattern 2: .read()  …\1.write()
          # Bash -P (PCRE) supports backreferences.
          if grep -rnPe '(\S+)\.write\(\)[^;]*\1\.(write|read)\(\)' \
                  -rnPe '(\S+)\.read\(\)[^;]*\1\.write\(\)' \
                 --include='*.rs' -- crates/ ; then
            echo "::error::Found reentrant parking_lot lock acquisition on a single statement (regression of issue #437). Bind the guard once: 'let mut g = x.write(); g.field = …;'"
            exit 1
          fi

  # Issue #458: Windows clones break on case-only collisions because NTFS is
  # case-insensitive. Fail CI if any two paths in the tree differ only by case.
  case-insensitive-collisions:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v4
      - name: Detect case-only filename collisions
        run: |
          set -e
          dupes=$(git ls-files | tr '[:upper:]' '[:lower:]' | sort | uniq -d || true)
          if [ -n "$dupes" ]; then
            echo "::error::Case-only filename collisions found. Windows clones will silently drop one file from each pair:"
            echo "$dupes"
            exit 1
          fi

  # Issue #438: AVX-512 intrinsics must be gated. ruvector-core must build on
  # stable Rust 1.77+ without the simd-avx512 feature.
  ruvector-core-no-avx512-builds-on-stable:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
        with:
          toolchain: '1.89.0'
      - name: cargo check ruvector-core without simd-avx512
        run: |
          cargo check -p ruvector-core \
            --no-default-features \
            --features simd,storage,hnsw,api-embeddings,parallel
      - name: cargo check ruvector-core with simd-avx512 (default)
        run: cargo check -p ruvector-core

  # Issue #430: HNSW recall@1 must stay above 95% on the regression test that
  # historically exposed the result-heap inversion.
  hnsw-recall-at-1:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable
      - uses: Swatinem/rust-cache@v2
      - name: ruvector-router-core unit tests (release)
        run: |
          # cargo test only accepts one TESTNAME filter per invocation —
          # run each guard test separately.
          cargo test -p ruvector-router-core --release --lib test_recall_at_1_with_biased_insertion_order
          cargo test -p ruvector-router-core --release --lib test_k_exceeds_ef_search_default
          cargo test -p ruvector-router-core --release --lib test_vector_db_basic_operations
          # Issue #430 (bug C): adjacency-list pruning must keep CLOSEST m
          # neighbours, not the most recently inserted ones.
          cargo test -p ruvector-router-core --release --lib test_pruning_keeps_closest_not_newest
          # Issue #430 (storage): VectorDB::new must rebuild the HNSW from
          # persisted vectors so search returns results after reopen.
          cargo test -p ruvector-router-core --release --lib test_index_rebuilt_from_storage_on_open

  # Issue #430 (bug B): the HNSW insert beam must use `ef_construction`, not
  # `ef_construction.min(m * 2)`. The latter silently clamps the beam to 32
  # by default (m=16) and collapses recall at scale. This guard textually
  # forbids the regression.
  hnsw-insert-beam-no-m2-clamp:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v4
      - name: Forbid ef_construction.min(m * 2) clamp in HNSW insert beam
        run: |
          set -e
          if grep -nE 'ef_construction\s*\.\s*min\s*\(\s*self\.config\.m\s*\*\s*2\s*\)' \
                  crates/ruvector-router-core/src/index.rs ; then
            echo "::error::Insert beam clamped to ef_construction.min(m*2) — this silently becomes m*2 (regression of issue #430 bug B). Use self.config.ef_construction directly."
            exit 1
          fi

  # Issue #430 (bug C): adjacency-list pruning must be distance-based. The
  # historical FIFO pruner did not call `calculate_distance` anywhere inside
  # the overflow gate, so checking that the helper is invoked in the same
  # function as the `> self.config.m * 2` check is a cheap structural guard
  # that complements the behavioural `test_pruning_keeps_closest_not_newest`
  # test below.
  hnsw-distance-based-neighbor-pruning:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v4
      - name: Require calculate_distance() inside HNSW overflow gate
        run: |
          set -e
          # The `insert` function in index.rs must reach calculate_distance()
          # AFTER the `> self.config.m * 2` overflow check fires — that is
          # what proves the pruner is distance-aware, not FIFO.
          if ! grep -nE 'calculate_distance' crates/ruvector-router-core/src/index.rs >/dev/null ; then
            echo "::error::index.rs no longer references calculate_distance (regression of issue #430 bug C). Adjacency-list pruning must score candidates by distance."
            exit 1
          fi
          # And the overflow gate itself must still exist.
          if ! grep -nE '> self\.config\.m \* 2' crates/ruvector-router-core/src/index.rs >/dev/null ; then
            echo "::error::HNSW overflow gate '> self.config.m * 2' removed — refusing to ship without the m*2/m prune semantics (#430)."
            exit 1
          fi

  # Issue #430 (storage): VectorDB::new must rebuild the in-memory HNSW from
  # persisted storage. The historical bug was that a fresh empty HnswIndex
  # was created on every open, so search returned 0 results after restart.
  vector-db-rebuilds-index-on-open:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v4
      - name: Require storage.get_all_ids() rebuild path in VectorDB::new
        run: |
          set -e
          if ! grep -nE 'storage\.get_all_ids' crates/ruvector-router-core/src/vector_db.rs ; then
            echo "::error::VectorDB::new no longer rebuilds the HNSW from storage (regression of issue #430). Reintroduce the storage.get_all_ids() + index.insert_batch() path."
            exit 1
          fi

  # Issue #462 / #376: published tarballs must contain dist/. Run `npm pack`
  # (which now triggers our prepack hooks) and assert the entry points exist
  # inside the resulting tarball.
  npm-publish-pipeline:
    runs-on: ubuntu-22.04
    strategy:
      fail-fast: false
      matrix:
        pkg:
          - npm/packages/pi-brain
          - npm/packages/ruvector
          - npm/packages/rvf-wasm
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-node@v4
        with:
          node-version: '20'
      - name: copy ${{ matrix.pkg }} to isolated dir + npm install + pack
        run: |
          # The package lives inside an npm workspace at npm/package.json
          # whose other workspace members declare cross-platform native
          # binaries (router-darwin-arm64, etc.). Installing from the
          # package dir still walks the workspace and chokes on EBADPLATFORM
          # for the wrong-host binaries. Copy the package to a workspace-free
          # temp dir so npm only resolves the package's own declared deps.
          mkdir -p /tmp/pkgcopy
          cp -r ${{ matrix.pkg }}/. /tmp/pkgcopy/
          cd /tmp/pkgcopy
          # Detach from the parent workspace.
          rm -f package-lock.json
          npm install --no-audit --no-fund --legacy-peer-deps --no-workspaces --no-optional
          mkdir -p /tmp/pack
          npm pack --pack-destination /tmp/pack
          tar -tzf /tmp/pack/*.tgz | head -30
      - name: assert dist/ entry points exist in tarball
        working-directory: ${{ matrix.pkg }}
        run: |
          tarball=$(ls /tmp/pack/*.tgz | head -1)
          listing=$(tar -tzf "$tarball")
          for required in $(node -e "
            const p = require('./package.json');
            const files = new Set();
            if (p.main) files.add(p.main);
            if (p.module) files.add(p.module);
            if (p.types) files.add(p.types);
            if (p.exports) {
              const walk = (n) => {
                if (typeof n === 'string') files.add(n);
                else if (n && typeof n === 'object') Object.values(n).forEach(walk);
              };
              walk(p.exports);
            }
            console.log([...files].map(f => f.replace(/^\\.\\//,'')).join('\\n'));
          "); do
            # The tarball prefixes everything with 'package/'.
            if ! echo "$listing" | grep -qE "^package/${required}\$"; then
              echo "::error::Required entry point missing from tarball: $required"
              echo "Tarball contents:"
              echo "$listing"
              exit 1
            fi
          done

  # Issues #463 / #422: hooks_route_enhanced specifically must not shell out
  # via execSync('npx ruvector …'). Other handlers in mcp-server.js shell out
  # to subprocess-only commands (security-scan, git-churn, verify) and are
  # tracked separately — this guard locks the #463 regression shut.
  no-npx-execSync-in-route-enhanced:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v4
      - name: Forbid execSync('npx ruvector …') inside hooks_route_enhanced case
        run: |
          set -e
          # Extract the hooks_route_enhanced case body (case label → next case)
          # and grep within it. awk for case-body extraction, then grep.
          body=$(awk '
            /case .hooks_route_enhanced.:/ { flag=1 }
            flag && /case .[a-z_]+.:/ && !/hooks_route_enhanced/ { flag=0 }
            flag { print }
          ' npm/packages/ruvector/bin/mcp-server.js)
          if echo "$body" | grep -E 'execSync\([^)]*npx[[:space:]]+ruvector'; then
            echo "::error::hooks_route_enhanced MUST NOT shell out via 'npx ruvector' (regression of issue #463/#422). Use intel.route() in-process instead."
            exit 1
          fi

  # Issue #256: MCP tool handlers must sanitize user-controlled input before
  # interpolating into a shell command. The specific risky pattern is
  # `${args.X}` (the unsanitized MCP request argument); local variables
  # (filesArg, threshold, etc.) are typically pre-processed by the handler
  # and don't need to match here. To catch #256-class regressions without
  # drowning in false positives, we only flag template literals that include
  # `${args.…}` and don't wrap it in sanitizeShellArg(...).
  shell-injection-in-mcp-server:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v4
      - name: Forbid unsanitized ${args.X} in exec*/spawn* calls
        run: |
          set -e
          if grep -nE '(execSync|execFile|execFileSync|exec|spawnSync|spawn)\([^)]*\$\{args\.' \
              npm/packages/*/bin/*.js 2>/dev/null | grep -v 'sanitizeShellArg('; then
            echo "::error::Unsanitized \${args.X} interpolation in an exec/spawn call (regression of issue #256). Wrap with sanitizeShellArg(args.X) or use the array form spawn('cmd', [args])."
            exit 1
          fi

  # Issue #267: crates whose names contain "wasm" compile to
  # wasm32-unknown-unknown and can't use std::time::SystemTime / Instant —
  # they panic at runtime.
  no-systemtime-in-wasm-crates:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v4
      - name: Reject SystemTime/Instant in wasm32-targeted crates
        run: |
          set -e
          fail=0
          for crate in crates/*-wasm crates/*wasm*; do
            [ -d "$crate/src" ] || continue
            # Whitelist crates with a time_compat shim — they explicitly
            # provide a wasm-safe alternative.
            [ -f "$crate/src/time_compat.rs" ] && continue
            hits=$(grep -rnE '\b(SystemTime::now|Instant::now)\b' "$crate/src" 2>/dev/null || true)
            [ -z "$hits" ] && continue
            ungated=$(echo "$hits" | while IFS=: read -r f line _; do
              pre=$(awk -v L="$line" 'NR>=L-4 && NR<L' "$f")
              if ! echo "$pre" | grep -q 'cfg(not(target_arch.*wasm32'; then
                echo "$f:$line"
              fi
            done)
            if [ -n "$ungated" ]; then
              echo "::error file=$crate::WASM crate uses SystemTime/Instant without cfg-gate (regression of issue #267):"
              echo "$ungated"
              fail=1
            fi
          done
          exit $fail

  # Issue #359: hardcoded devcontainer-only paths break clones outside the
  # devcontainer. Block them in settings + workflow files. .claude/hooks and
  # .claude/intelligence are excluded because they're user-customised helpers
  # configured per-developer (not committed-by-default). Markdown docs and
  # JS example/test files are excluded — they're illustrative.
  no-hardcoded-workspaces-paths:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v4
      - name: Forbid hardcoded devcontainer path in checked-in config
        run: |
          set -e
          # Look for the literal pattern but only in load-bearing config files,
          # not in this workflow file itself or in docs/examples/tests.
          pattern=$(printf '/workspaces/%s' 'ruvector')
          hits=$(grep -rln "$pattern" \
              .github/workflows/ .claude/settings.json .claude/settings.local.json \
              scripts/publish/ \
              --exclude='regression-guard.yml' \
              2>/dev/null || true)
          if [ -n "$hits" ]; then
            echo "::error::Hardcoded devcontainer path in checked-in config (regression of issue #359). Use \$GITHUB_WORKSPACE, \$PWD, or a relative path."
            echo "$hits"
            exit 1
          fi

  # Issue #464: the per-collection hydration counters added in 97c07520d are
  # the only way to diagnose silent record loss during Firestore hydration.
  # If a future refactor removes the log lines, we lose the diagnostic when
  # we need it most. Assert all four "Hydrate <collection>:" log lines stay.
  brain-hydration-counters-present:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v4
      - name: Assert hydration counter log lines exist
        run: |
          set -e
          f=crates/mcp-brain-server/src/store.rs
          missing=()
          for collection in brain_memories brain_contributors brain_page_status brain_nodes; do
            if ! grep -q "Hydrate ${collection}: considered=" "$f"; then
              missing+=("Hydrate ${collection}: considered=…")
            fi
          done
          if [ "${#missing[@]}" -gt 0 ]; then
            echo "::error file=$f::Per-collection hydration counter log lines are missing (regression of issue #464). The next deploy can't diagnose silent record loss without them:"
            printf '  %s\n' "${missing[@]}"
            exit 1
          fi

  # Issue #411: npm wrapper packages declared optionalDependencies pinned to
  # versions of native binaries that were never published on the registry.
  # Resolve every optionalDependency declared by every package in this repo
  # against the live npm registry and fail if any are missing. Soft-skip on
  # network errors so transient registry hiccups don't false-fail.
  optional-deps-resolvable-on-npm:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-node@v4
        with:
          node-version: '20'
      - name: Resolve every optionalDependency@version on npm
        run: |
          set -e
          fail=0
          # Collect (pkg, name, version) tuples from every package.json that
          # ships an optionalDependencies block.
          while IFS= read -r pkgjson; do
            entries=$(node -e "
              const p = require('${PWD}/$pkgjson');
              const od = p.optionalDependencies || {};
              for (const [n, v] of Object.entries(od)) {
                console.log(n + ' ' + v);
              }
            ")
            [ -z "$entries" ] && continue
            while IFS= read -r line; do
              [ -z "$line" ] && continue
              name=$(echo "$line" | awk '{print $1}')
              # Keep range operators (^, ~) intact — `npm view <pkg>@^2.3.0`
              # resolves to the highest published 2.x.y. Stripping them turns
              # a range into an exact pin and false-fails on common patterns.
              ver=$(echo "$line" | awk '{print $2}' | tr -d '" ')
              # Skip workspace: protocol and other non-semver specs.
              case "$ver" in workspace:*|file:*|*://*) continue ;; esac
              out=$(npm view "${name}@${ver}" version 2>&1) || true
              if echo "$out" | grep -qE '^npm (error|ERR!)' || [ -z "$out" ]; then
                # Distinguish "not in registry" from transient network error.
                if echo "$out" | grep -qE 'E404|is not in this registry'; then
                  echo "::error file=$pkgjson::optionalDependency ${name}@${ver} is not published on npm (regression of issue #411)."
                  fail=1
                else
                  echo "::warning file=$pkgjson::Could not resolve ${name}@${ver} (transient?): $out"
                fi
              fi
            done <<< "$entries"
          done < <(find npm/packages -name package.json -not -path '*/node_modules/*')
          exit $fail