Pulse/scripts/release_control/canonical_completion_guard.py

555 lines
20 KiB
Python

#!/usr/bin/env python3
"""Canonical subsystem completion guard.
Blocks commits when staged runtime changes touch a canonical subsystem but the
matching subsystem contract file and required verification artifact are not
staged in the same commit.
"""
from __future__ import annotations
import argparse
import json
from pathlib import Path
import subprocess
import sys
from typing import Dict, List, Sequence, Set
from control_plane import DEFAULT_CONTROL_PLANE
from repo_file_io import load_repo_json
from subsystem_contracts import load_contract_index, referenced_contracts_for_path
REPO_ROOT = Path(__file__).resolve().parents[2]
SUBSYSTEM_REGISTRY = DEFAULT_CONTROL_PLANE["registry_path"]
REQUIRED_VERIFICATION_FIELDS: tuple[str, ...] = (
"allow_same_subsystem_tests",
"test_prefixes",
"exact_files",
"require_explicit_path_policy_coverage",
)
REQUIRED_PATH_POLICY_FIELDS: tuple[str, ...] = (
"allow_same_subsystem_tests",
"test_prefixes",
"exact_files",
)
SUBSTANTIVE_CONTRACT_SECTIONS: tuple[str, ...] = (
"## Purpose",
"## Canonical Files",
"## Shared Boundaries",
"## Extension Points",
"## Forbidden Paths",
"## Completion Obligations",
"## Current State",
)
def validate_verification_policy(
subsystem_id: str,
policy: dict,
*,
context: str,
required_fields: tuple[str, ...] = REQUIRED_VERIFICATION_FIELDS,
) -> None:
if not isinstance(policy, dict):
raise ValueError(f"subsystem {subsystem_id} {context} missing verification policy")
for field in required_fields:
if field not in policy:
raise ValueError(f"subsystem {subsystem_id} {context} missing {field}")
def load_subsystem_rules(*, staged: bool = False) -> List[dict]:
payload = load_repo_json(SUBSYSTEM_REGISTRY, staged=staged)
rules = list(payload.get("subsystems", []))
for rule in rules:
subsystem_id = str(rule.get("id"))
verification = rule.get("verification")
validate_verification_policy(subsystem_id, verification, context="verification policy")
path_policies = verification.get("path_policies", [])
if not isinstance(path_policies, list):
raise ValueError(f"subsystem {subsystem_id} verification policy path_policies must be a list")
for index, policy in enumerate(path_policies):
validate_verification_policy(
subsystem_id,
policy,
context=f"path policy #{index}",
required_fields=REQUIRED_PATH_POLICY_FIELDS,
)
if "id" not in policy:
raise ValueError(f"subsystem {subsystem_id} path policy #{index} missing id")
if "match_prefixes" not in policy:
raise ValueError(
f"subsystem {subsystem_id} path policy {policy['id']} missing match_prefixes"
)
if "match_files" not in policy:
raise ValueError(
f"subsystem {subsystem_id} path policy {policy['id']} missing match_files"
)
return rules
IGNORED_PREFIXES: tuple[str, ...] = (
DEFAULT_CONTROL_PLANE["profile_root_rel"] + "/",
"internal/repoctl/",
".husky/",
"scripts/release_control/",
)
def git_staged_files() -> List[str]:
result = subprocess.run(
["git", "diff", "--cached", "--name-only", "--diff-filter=ACMRD"],
cwd=REPO_ROOT,
check=True,
capture_output=True,
text=True,
)
return [line.strip() for line in result.stdout.splitlines() if line.strip()]
def stdin_files(stdin: Sequence[str]) -> List[str]:
return [line.strip() for line in stdin if line.strip()]
def is_test_or_fixture(path: str) -> bool:
if path.startswith("scripts/tests/"):
return True
if path.endswith("_test.go"):
return True
if "/__tests__/" in path:
return True
if path.endswith(".test.ts") or path.endswith(".test.tsx"):
return True
if path.endswith(".spec.ts") or path.endswith(".spec.tsx"):
return True
return False
def is_ignored_runtime_file(path: str) -> bool:
return any(path.startswith(prefix) for prefix in IGNORED_PREFIXES)
def subsystem_matches_path(rule: dict, path: str) -> bool:
prefixes = tuple(rule.get("owned_prefixes", []))
exact_files = tuple(rule.get("owned_files", []))
return (
any(path == prefix.rstrip("/") or path.startswith(prefix) for prefix in prefixes)
or path in exact_files
)
def path_policy_matches(policy: dict, path: str) -> bool:
prefixes = tuple(policy.get("match_prefixes", []))
exact_files = tuple(policy.get("match_files", []))
return (
any(path == prefix.rstrip("/") or path.startswith(prefix) for prefix in prefixes)
or path in exact_files
)
def normalize_verification_requirement(
source: dict,
*,
requirement_id: str,
label: str,
touched_runtime_files: Sequence[str],
) -> dict:
return {
"id": requirement_id,
"label": label,
"touched_runtime_files": sorted(set(touched_runtime_files)),
"allow_same_subsystem_tests": bool(source.get("allow_same_subsystem_tests", False)),
"test_prefixes": sorted(set(source.get("test_prefixes", []))),
"exact_files": sorted(set(source.get("exact_files", []))),
}
def build_verification_requirements(rule: dict, touched_runtime_files: Sequence[str]) -> List[dict]:
verification = dict(rule.get("verification", {}))
path_policies = list(verification.get("path_policies", []))
require_explicit_path_policy_coverage = bool(
verification.get("require_explicit_path_policy_coverage", False)
)
requirements: List[dict] = []
matched_by_policy: Dict[str, dict] = {}
unmatched_runtime_files: List[str] = []
for path in touched_runtime_files:
matching_policy = next((policy for policy in path_policies if path_policy_matches(policy, path)), None)
if matching_policy is None:
unmatched_runtime_files.append(path)
continue
policy_id = str(matching_policy["id"])
requirement = matched_by_policy.get(policy_id)
if requirement is None:
requirement = normalize_verification_requirement(
matching_policy,
requirement_id=policy_id,
label=str(matching_policy.get("label", policy_id)),
touched_runtime_files=[],
)
matched_by_policy[policy_id] = requirement
requirements.append(requirement)
requirement["touched_runtime_files"].append(path)
if unmatched_runtime_files:
if require_explicit_path_policy_coverage:
requirements.insert(
0,
{
"id": "missing-path-policy-coverage",
"label": "registry path policy coverage",
"touched_runtime_files": list(unmatched_runtime_files),
"allow_same_subsystem_tests": False,
"test_prefixes": [],
"exact_files": [],
"path_policy_gap": True,
},
)
else:
requirements.insert(
0,
normalize_verification_requirement(
verification,
requirement_id="default",
label="default subsystem verification",
touched_runtime_files=unmatched_runtime_files,
),
)
return requirements
def infer_impacted_subsystems(
staged_files: Sequence[str],
*,
use_staged_registry: bool = False,
) -> Dict[str, dict]:
impacted: Dict[str, dict] = {}
rules = load_subsystem_rules(staged=use_staged_registry)
rules_by_id = {str(rule["id"]): rule for rule in rules}
for path in staged_files:
if is_ignored_runtime_file(path) or is_test_or_fixture(path):
continue
for rule in rules:
if not subsystem_matches_path(rule, path):
continue
impacted.setdefault(
str(rule["id"]),
{
"id": str(rule["id"]),
"contract": str(rule["contract"]),
"touched_runtime_files": [],
"verification": dict(rule.get("verification", {})),
},
)["touched_runtime_files"].append(path)
for subsystem_id, data in impacted.items():
data["verification_requirements"] = build_verification_requirements(
rules_by_id[subsystem_id],
data["touched_runtime_files"],
)
return impacted
def required_contract_updates(
staged_files: Sequence[str],
impacted: Dict[str, dict] | None = None,
*,
use_staged_contract_index: bool = False,
) -> Dict[str, dict]:
impacted_subsystems = impacted if impacted is not None else infer_impacted_subsystems(staged_files)
required: Dict[str, dict] = {}
contract_index = load_contract_index(staged=use_staged_contract_index)
for subsystem_id, data in impacted_subsystems.items():
required[data["contract"]] = {
"subsystem": subsystem_id,
"contract": data["contract"],
"reason": "owner",
"touched_runtime_files": sorted(set(data["touched_runtime_files"])),
"matched_references": [],
}
touched_runtime_files = sorted(
{
path
for data in impacted_subsystems.values()
for path in data.get("touched_runtime_files", [])
}
)
for path in touched_runtime_files:
for contract in referenced_contracts_for_path(path, contract_index):
contract_path = str(contract["contract"])
entry = required.setdefault(
contract_path,
{
"subsystem": str(contract["subsystem_id"]),
"contract": contract_path,
"reason": "dependent-reference",
"touched_runtime_files": [],
"matched_references": [],
},
)
if entry["reason"] == "owner":
continue
if path not in entry["touched_runtime_files"]:
entry["touched_runtime_files"].append(path)
for reference in contract.get("matched_references", []):
descriptor = f"{reference['heading']}: {reference['path']}"
if descriptor not in entry["matched_references"]:
entry["matched_references"].append(descriptor)
detail = {
"heading": str(reference.get("heading", "")),
"path": str(reference.get("path", "")),
}
if isinstance(reference.get("line"), int):
detail["line"] = int(reference["line"])
if isinstance(reference.get("heading_line"), int):
detail["heading_line"] = int(reference["heading_line"])
detail_list = entry.setdefault("matched_reference_details", [])
if detail not in detail_list:
detail_list.append(detail)
for data in required.values():
data["touched_runtime_files"] = sorted(set(data["touched_runtime_files"]))
data["matched_references"] = sorted(set(data["matched_references"]), key=str.casefold)
details = data.get("matched_reference_details")
if isinstance(details, list) and details:
data["matched_reference_details"] = sorted(
details,
key=lambda item: (
int(item.get("line", 0) or 0),
str(item.get("heading", "")).casefold(),
str(item.get("path", "")).casefold(),
),
)
return dict(sorted(required.items()))
def staged_verification_files_for_requirement(rule: dict, requirement: dict, staged_files: Sequence[str]) -> List[str]:
exact_files = set(requirement.get("exact_files", []))
test_prefixes = tuple(requirement.get("test_prefixes", []))
allow_same_subsystem_tests = bool(requirement.get("allow_same_subsystem_tests", False))
matches: List[str] = []
for path in staged_files:
if path in exact_files:
matches.append(path)
continue
if not is_test_or_fixture(path):
continue
if any(path.startswith(prefix) for prefix in test_prefixes):
matches.append(path)
continue
if allow_same_subsystem_tests and subsystem_matches_path(rule, path):
matches.append(path)
return sorted(set(matches))
def staged_contract_patch(path: str) -> str:
result = subprocess.run(
["git", "diff", "--cached", "--unified=1000", "--no-color", "--", path],
cwd=REPO_ROOT,
check=True,
capture_output=True,
text=True,
)
return result.stdout
def contract_patch_has_substantive_change(patch_text: str) -> bool:
current_section = ""
for line in patch_text.splitlines():
if line.startswith(("diff --git ", "index ", "--- ", "+++ ", "@@ ")):
continue
if not line:
continue
prefix = line[0]
if prefix not in {" ", "+", "-"}:
continue
stripped = line[1:].strip()
if stripped.startswith("## "):
current_section = stripped
continue
if prefix == " ":
continue
if current_section not in SUBSTANTIVE_CONTRACT_SECTIONS:
continue
if not stripped:
continue
if stripped in {"```", "```json"}:
continue
return True
return False
def staged_contract_has_substantive_change(path: str) -> bool:
return contract_patch_has_substantive_change(staged_contract_patch(path))
def format_missing_requirements(
missing_contracts: Dict[str, dict],
insufficient_contract_updates: Dict[str, dict],
missing_verification: Dict[str, dict],
) -> str:
lines = [
"BLOCKED: canonical subsystem changes require matching contract and verification updates.",
"",
"Stage the required subsystem file(s) in the same commit:",
]
for contract_path, data in sorted(missing_contracts.items()):
if data.get("reason") == "dependent-reference":
lines.append(
f"- dependent subsystem {data['subsystem']}: missing contract {contract_path}"
)
lines.append(
" touched runtime files are canonical references in that dependent subsystem contract"
)
else:
lines.append(f"- subsystem {data['subsystem']}: missing contract {contract_path}")
for path in sorted(data["touched_runtime_files"]):
lines.append(f" touched by {path}")
if data.get("reason") == "dependent-reference":
for reference in data.get("matched_references", []):
lines.append(f" referenced by {reference}")
for contract_path, data in sorted(insufficient_contract_updates.items()):
lines.append(
f"- subsystem {data['subsystem']}: contract {contract_path} is staged but does not include a substantive section update"
)
lines.append(
" update one of: Purpose, Canonical Files, Shared Boundaries, Extension Points, Forbidden Paths, Completion Obligations, or Current State"
)
for path in sorted(data["touched_runtime_files"]):
lines.append(f" touched by {path}")
if data.get("reason") == "dependent-reference":
for reference in data.get("matched_references", []):
lines.append(f" referenced by {reference}")
for subsystem_id, data in sorted(missing_verification.items()):
for requirement in data["missing_requirements"]:
if requirement.get("path_policy_gap"):
lines.append(
f"- subsystem {subsystem_id}: missing registry path policy coverage for touched runtime files"
)
else:
lines.append(
f"- subsystem {subsystem_id}: missing verification artifact for {requirement['label']}"
)
for path in sorted(requirement["touched_runtime_files"]):
lines.append(f" touched by {path}")
if requirement.get("path_policy_gap"):
lines.append(
" update "
f"{DEFAULT_CONTROL_PLANE['registry_rel']} so each touched path matches an explicit path policy"
)
lines.append(
f" default subsystem verification is forbidden for governed {DEFAULT_CONTROL_PLANE['active_profile_id']} subsystems"
)
continue
exact_files = sorted(requirement.get("exact_files", []))
test_prefixes = sorted(requirement.get("test_prefixes", []))
allow_same_subsystem_tests = bool(requirement.get("allow_same_subsystem_tests", False))
if exact_files:
lines.append(" acceptable exact proof files include:")
for path in exact_files:
lines.append(f" {path}")
if test_prefixes:
lines.append(" acceptable staged test prefixes include:")
for prefix in test_prefixes:
lines.append(f" {prefix}")
if allow_same_subsystem_tests:
lines.append(" same-subsystem test/spec files are also accepted")
lines.extend(
[
"",
"Rule:",
"If a canonical subsystem changes, its contract under",
f"`{DEFAULT_CONTROL_PLANE['subsystems_dir_rel']}/` must be updated in the same commit.",
"If a touched runtime path is also named in another subsystem contract's",
"`Canonical Files`, `Shared Boundaries`, or `Extension Points`, that dependent contract must be updated too.",
"A staged contract file only counts if its staged diff changes a substantive contract section,",
"not just `Contract Metadata` or cosmetic noise.",
"Each touched runtime path must also satisfy the first matching",
"verification policy from that subsystem's registry entry.",
]
)
return "\n".join(lines)
def check_staged_contracts(staged_files: Sequence[str]) -> int:
staged_set: Set[str] = set(staged_files)
impacted = infer_impacted_subsystems(staged_files, use_staged_registry=True)
required_contracts = required_contract_updates(
staged_files,
impacted,
use_staged_contract_index=True,
)
missing_contracts = {
contract_path: data
for contract_path, data in required_contracts.items()
if contract_path not in staged_set
}
insufficient_contract_updates = {
contract_path: data
for contract_path, data in required_contracts.items()
if contract_path in staged_set
if not staged_contract_has_substantive_change(contract_path)
}
missing_verification: Dict[str, dict] = {}
for subsystem_id, data in impacted.items():
missing_requirements = [
requirement
for requirement in data.get("verification_requirements", [])
if not staged_verification_files_for_requirement(data, requirement, staged_files)
]
if missing_requirements:
missing_verification[subsystem_id] = {
**data,
"missing_requirements": missing_requirements,
}
if not missing_contracts and not insufficient_contract_updates and not missing_verification:
return 0
print(
format_missing_requirements(
missing_contracts,
insufficient_contract_updates,
missing_verification,
),
file=sys.stderr,
)
return 1
def parse_args(argv: Sequence[str]) -> argparse.Namespace:
parser = argparse.ArgumentParser(
description=(
"Enforce canonical subsystem contract and proof-of-change updates for "
"staged files or an explicit changed-file list."
)
)
parser.add_argument(
"--files-from-stdin",
action="store_true",
help="Read newline-delimited changed files from standard input instead of git staged files.",
)
return parser.parse_args(list(argv))
def main(argv: Sequence[str] | None = None) -> int:
args = parse_args(list(argv or ()))
if args.files_from_stdin:
return check_staged_contracts(stdin_files(sys.stdin))
return check_staged_contracts(git_staged_files())
if __name__ == "__main__":
raise SystemExit(main(sys.argv[1:]))