Ruview/scripts/check_fix_markers.py
ruv eda45a6857 ci: fix-marker regression guard (witness-style)
Adds a fast per-PR gate that asserts previously-shipped fixes are still
present in the tree — the CI analogue of the ruflo witness fix-marker
system, but self-contained (no plugin dependency, reviewable as plain
JSON). Complements the heavier checks (firmware build, deterministic
pipeline proof, release witness bundle) by catching the silent-revert
class of regression that build+test wouldn't.

  - scripts/fix-markers.json   manifest: 11 markers (RuView#396, #521,
    #517, #505, #354, #263, #266/#321, #265, #232/#375/#385/#386/#390,
    ADR-028 proof + witness bundle). Each has files / require (literal
    substring or /regex/) / optional forbid / rationale / ref.
  - scripts/check_fix_markers.py  stdlib-only checker. Exit 0 clean /
    1 regression / 2 bad manifest. Modes: --list, --json, --only ID.
  - .github/workflows/fix-regression-guard.yml  runs on PR + push to
    main/master; gates on the checker and writes the result table into
    the run summary + an artifact.

If a fix is intentionally removed, update scripts/fix-markers.json in the
same PR with a rationale — the diff becomes the audit trail.

Co-Authored-By: claude-flow <ruv@ruv.net>
2026-05-11 10:48:14 -04:00

190 lines
6.7 KiB
Python

#!/usr/bin/env python3
"""Fix-marker regression guard for RuView.
Reads ``scripts/fix-markers.json`` and asserts that every previously-shipped
fix is still present in the codebase:
* every file listed in a marker must exist;
* every ``require`` pattern must appear in at least one of the marker's files
(a missing pattern means the fix was probably reverted);
* no ``forbid`` pattern may appear in any of the marker's files
(a re-appearing anti-pattern means the bug was re-introduced).
A pattern is a literal substring by default. Wrap it in ``/.../`` to treat it
as a (multiline, case-sensitive) regular expression, e.g. ``"/fall_thresh\\s*=\\s*2\\.0/"``.
This is a stdlib-only script — no dependencies, runs anywhere Python 3.8+ does.
Usage::
python scripts/check_fix_markers.py # check everything (CI)
python scripts/check_fix_markers.py --list # list all markers
python scripts/check_fix_markers.py --json # machine-readable result
python scripts/check_fix_markers.py --only RuView#396 RuView#521
Exit codes: 0 = all markers OK, 1 = one or more regressions, 2 = bad manifest.
"""
from __future__ import annotations
import argparse
import json
import re
import sys
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parent.parent
MANIFEST_PATH = REPO_ROOT / "scripts" / "fix-markers.json"
# Best-effort UTF-8 stdout (Windows consoles default to cp1252); harmless on
# Linux/CI where it's already UTF-8. We still keep all symbols ASCII below so
# the script works even if reconfigure() is unavailable.
try: # pragma: no cover - environment-dependent
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
except Exception:
pass
# ANSI colours — disabled automatically when stdout isn't a TTY (CI logs are
# plain either way, but keep them readable locally).
_TTY = sys.stdout.isatty()
def _c(code: str, s: str) -> str:
return f"\033[{code}m{s}\033[0m" if _TTY else s
GREEN = lambda s: _c("32", s)
RED = lambda s: _c("31", s)
YELLOW = lambda s: _c("33", s)
DIM = lambda s: _c("2", s)
BOLD = lambda s: _c("1", s)
OK_MARK = "PASS"
BAD_MARK = "FAIL"
ARROW = "->"
class ManifestError(Exception):
pass
def load_manifest() -> dict:
if not MANIFEST_PATH.exists():
raise ManifestError(f"manifest not found: {MANIFEST_PATH}")
try:
data = json.loads(MANIFEST_PATH.read_text(encoding="utf-8"))
except json.JSONDecodeError as e:
raise ManifestError(f"manifest is not valid JSON: {e}") from e
if not isinstance(data, dict) or not isinstance(data.get("markers"), list):
raise ManifestError("manifest must be an object with a 'markers' array")
ids = [m.get("id") for m in data["markers"]]
dupes = {i for i in ids if ids.count(i) > 1}
if dupes:
raise ManifestError(f"duplicate marker ids: {sorted(dupes)}")
return data
def _pattern_found(text: str, pattern: str) -> bool:
if len(pattern) >= 2 and pattern.startswith("/") and pattern.endswith("/"):
return re.search(pattern[1:-1], text, re.MULTILINE) is not None
return pattern in text
def check_marker(marker: dict) -> tuple[bool, list[str]]:
"""Return (ok, problems) for a single marker."""
problems: list[str] = []
files = marker.get("files", [])
require = marker.get("require", [])
forbid = marker.get("forbid", [])
if not files:
problems.append("marker lists no files")
return False, problems
contents: dict[str, str] = {}
for rel in files:
p = REPO_ROOT / rel
if not p.exists():
problems.append(f"missing file: {rel}")
continue
try:
contents[rel] = p.read_text(encoding="utf-8", errors="replace")
except OSError as e:
problems.append(f"cannot read {rel}: {e}")
haystack = "\n".join(contents.values())
for pat in require:
if not _pattern_found(haystack, pat):
problems.append(f"required marker absent (fix likely reverted): {pat!r}")
for pat in forbid:
for rel, text in contents.items():
if _pattern_found(text, pat):
problems.append(f"forbidden pattern re-appeared in {rel} (bug re-introduced?): {pat!r}")
return (len(problems) == 0), problems
def cmd_list(manifest: dict) -> int:
print(BOLD(f"{len(manifest['markers'])} fix markers tracked:\n"))
for m in manifest["markers"]:
print(f" {BOLD(m['id']):<28} {m.get('title', '')}")
if m.get("ref"):
print(DIM(f" {m['ref']}"))
for f in m.get("files", []):
print(DIM(f" - {f}"))
return 0
def main(argv: list[str]) -> int:
ap = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
ap.add_argument("--list", action="store_true", help="list all markers and exit")
ap.add_argument("--json", action="store_true", help="emit a JSON result object")
ap.add_argument("--only", nargs="+", metavar="ID", help="only check the given marker ids")
args = ap.parse_args(argv)
try:
manifest = load_manifest()
except ManifestError as e:
print(RED(f"[manifest error] {e}"), file=sys.stderr)
return 2
if args.list:
return cmd_list(manifest)
markers = manifest["markers"]
if args.only:
wanted = set(args.only)
markers = [m for m in markers if m["id"] in wanted]
unknown = wanted - {m["id"] for m in markers}
if unknown:
print(RED(f"[error] unknown marker id(s): {sorted(unknown)}"), file=sys.stderr)
return 2
results = []
failed = 0
for m in markers:
ok, problems = check_marker(m)
results.append({"id": m["id"], "title": m.get("title", ""), "ok": ok, "problems": problems})
if not ok:
failed += 1
if args.json:
print(json.dumps({"ok": failed == 0, "checked": len(markers), "failed": failed, "markers": results}, indent=2))
return 0 if failed == 0 else 1
print(BOLD(f"Fix-marker regression guard - {len(markers)} marker(s)\n"))
for r in results:
if r["ok"]:
print(f" {GREEN('[' + OK_MARK + ']')} {r['id']:<28} {DIM(r['title'])}")
else:
print(f" {RED('[' + BAD_MARK + ']')} {BOLD(r['id']):<28} {r['title']}")
for p in r["problems"]:
print(f" {RED(ARROW)} {p}")
print()
if failed:
print(RED(BOLD(f"{failed}/{len(markers)} marker(s) regressed.")))
print(DIM(" A reverted fix is a regression. Restore the marker, or - if the change is"))
print(DIM(" intentional - update scripts/fix-markers.json in the same PR with a rationale."))
return 1
print(GREEN(BOLD(f"All {len(markers)} fix markers present.")))
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))