Pulse/scripts/release_control/canonical_completion_guard.py

#!/usr/bin/env python3
"""Canonical subsystem completion guard.

Blocks commits when staged runtime changes touch a canonical subsystem but the
matching subsystem contract file and required verification artifact are not
staged in the same commit.
"""

from __future__ import annotations

import argparse
import json
from pathlib import Path
import subprocess
import sys
from typing import Dict, List, Sequence, Set

from control_plane import DEFAULT_CONTROL_PLANE
from repo_file_io import load_repo_json
from subsystem_contracts import load_contract_index, referenced_contracts_for_path


REPO_ROOT = Path(__file__).resolve().parents[2]
SUBSYSTEM_REGISTRY = DEFAULT_CONTROL_PLANE["registry_path"]

REQUIRED_VERIFICATION_FIELDS: tuple[str, ...] = (
    "allow_same_subsystem_tests",
    "test_prefixes",
    "exact_files",
    "require_explicit_path_policy_coverage",
)
REQUIRED_PATH_POLICY_FIELDS: tuple[str, ...] = (
    "allow_same_subsystem_tests",
    "test_prefixes",
    "exact_files",
)
SUBSTANTIVE_CONTRACT_SECTIONS: tuple[str, ...] = (
    "## Purpose",
    "## Canonical Files",
    "## Shared Boundaries",
    "## Extension Points",
    "## Forbidden Paths",
    "## Completion Obligations",
    "## Current State",
)


def validate_verification_policy(
    subsystem_id: str,
    policy: dict,
    *,
    context: str,
    required_fields: tuple[str, ...] = REQUIRED_VERIFICATION_FIELDS,
) -> None:
    if not isinstance(policy, dict):
        raise ValueError(f"subsystem {subsystem_id} {context} missing verification policy")
    for field in required_fields:
        if field not in policy:
            raise ValueError(f"subsystem {subsystem_id} {context} missing {field}")


def load_subsystem_rules(*, staged: bool = False) -> List[dict]:
    payload = load_repo_json(SUBSYSTEM_REGISTRY, staged=staged)
    rules = list(payload.get("subsystems", []))
    for rule in rules:
        subsystem_id = str(rule.get("id"))
        verification = rule.get("verification")
        validate_verification_policy(subsystem_id, verification, context="verification policy")
        path_policies = verification.get("path_policies", [])
        if not isinstance(path_policies, list):
            raise ValueError(f"subsystem {subsystem_id} verification policy path_policies must be a list")
        for index, policy in enumerate(path_policies):
            validate_verification_policy(
                subsystem_id,
                policy,
                context=f"path policy #{index}",
                required_fields=REQUIRED_PATH_POLICY_FIELDS,
            )
            if "id" not in policy:
                raise ValueError(f"subsystem {subsystem_id} path policy #{index} missing id")
            if "match_prefixes" not in policy:
                raise ValueError(
                    f"subsystem {subsystem_id} path policy {policy['id']} missing match_prefixes"
                )
            if "match_files" not in policy:
                raise ValueError(
                    f"subsystem {subsystem_id} path policy {policy['id']} missing match_files"
                )
    return rules

IGNORED_PREFIXES: tuple[str, ...] = (
    DEFAULT_CONTROL_PLANE["profile_root_rel"] + "/",
    "internal/repoctl/",
    ".husky/",
    "scripts/release_control/",
)


def git_staged_files() -> List[str]:
    result = subprocess.run(
        ["git", "diff", "--cached", "--name-only", "--diff-filter=ACMRD"],
        cwd=REPO_ROOT,
        check=True,
        capture_output=True,
        text=True,
    )
    return [line.strip() for line in result.stdout.splitlines() if line.strip()]


def stdin_files(stdin: Sequence[str]) -> List[str]:
    return [line.strip() for line in stdin if line.strip()]


def is_test_or_fixture(path: str) -> bool:
    if path.startswith("scripts/tests/"):
        return True
    if path.endswith("_test.go"):
        return True
    if "/__tests__/" in path:
        return True
    if path.endswith(".test.ts") or path.endswith(".test.tsx"):
        return True
    if path.endswith(".spec.ts") or path.endswith(".spec.tsx"):
        return True
    return False


def is_ignored_runtime_file(path: str) -> bool:
    return any(path.startswith(prefix) for prefix in IGNORED_PREFIXES)


def subsystem_matches_path(rule: dict, path: str) -> bool:
    prefixes = tuple(rule.get("owned_prefixes", []))
    exact_files = tuple(rule.get("owned_files", []))
    return (
        any(path == prefix.rstrip("/") or path.startswith(prefix) for prefix in prefixes)
        or path in exact_files
    )


def path_policy_matches(policy: dict, path: str) -> bool:
    prefixes = tuple(policy.get("match_prefixes", []))
    exact_files = tuple(policy.get("match_files", []))
    return (
        any(path == prefix.rstrip("/") or path.startswith(prefix) for prefix in prefixes)
        or path in exact_files
    )


def normalize_verification_requirement(
    source: dict,
    *,
    requirement_id: str,
    label: str,
    touched_runtime_files: Sequence[str],
) -> dict:
    return {
        "id": requirement_id,
        "label": label,
        "touched_runtime_files": sorted(set(touched_runtime_files)),
        "allow_same_subsystem_tests": bool(source.get("allow_same_subsystem_tests", False)),
        "test_prefixes": sorted(set(source.get("test_prefixes", []))),
        "exact_files": sorted(set(source.get("exact_files", []))),
    }


def build_verification_requirements(rule: dict, touched_runtime_files: Sequence[str]) -> List[dict]:
    verification = dict(rule.get("verification", {}))
    path_policies = list(verification.get("path_policies", []))
    require_explicit_path_policy_coverage = bool(
        verification.get("require_explicit_path_policy_coverage", False)
    )
    requirements: List[dict] = []
    matched_by_policy: Dict[str, dict] = {}
    unmatched_runtime_files: List[str] = []

    for path in touched_runtime_files:
        matching_policy = next((policy for policy in path_policies if path_policy_matches(policy, path)), None)
        if matching_policy is None:
            unmatched_runtime_files.append(path)
            continue

        policy_id = str(matching_policy["id"])
        requirement = matched_by_policy.get(policy_id)
        if requirement is None:
            requirement = normalize_verification_requirement(
                matching_policy,
                requirement_id=policy_id,
                label=str(matching_policy.get("label", policy_id)),
                touched_runtime_files=[],
            )
            matched_by_policy[policy_id] = requirement
            requirements.append(requirement)
        requirement["touched_runtime_files"].append(path)

    if unmatched_runtime_files:
        if require_explicit_path_policy_coverage:
            requirements.insert(
                0,
                {
                    "id": "missing-path-policy-coverage",
                    "label": "registry path policy coverage",
                    "touched_runtime_files": list(unmatched_runtime_files),
                    "allow_same_subsystem_tests": False,
                    "test_prefixes": [],
                    "exact_files": [],
                    "path_policy_gap": True,
                },
            )
        else:
            requirements.insert(
                0,
                normalize_verification_requirement(
                    verification,
                    requirement_id="default",
                    label="default subsystem verification",
                    touched_runtime_files=unmatched_runtime_files,
                ),
            )

    return requirements


def infer_impacted_subsystems(
    staged_files: Sequence[str],
    *,
    use_staged_registry: bool = False,
) -> Dict[str, dict]:
    impacted: Dict[str, dict] = {}
    rules = load_subsystem_rules(staged=use_staged_registry)
    rules_by_id = {str(rule["id"]): rule for rule in rules}

    for path in staged_files:
        if is_ignored_runtime_file(path) or is_test_or_fixture(path):
            continue

        for rule in rules:
            if not subsystem_matches_path(rule, path):
                continue
            impacted.setdefault(
                str(rule["id"]),
                {
                    "id": str(rule["id"]),
                    "contract": str(rule["contract"]),
                    "touched_runtime_files": [],
                    "verification": dict(rule.get("verification", {})),
                },
            )["touched_runtime_files"].append(path)

    for subsystem_id, data in impacted.items():
        data["verification_requirements"] = build_verification_requirements(
            rules_by_id[subsystem_id],
            data["touched_runtime_files"],
        )

    return impacted


def required_contract_updates(
    staged_files: Sequence[str],
    impacted: Dict[str, dict] | None = None,
    *,
    use_staged_contract_index: bool = False,
) -> Dict[str, dict]:
    impacted_subsystems = impacted if impacted is not None else infer_impacted_subsystems(staged_files)
    required: Dict[str, dict] = {}
    contract_index = load_contract_index(staged=use_staged_contract_index)

    for subsystem_id, data in impacted_subsystems.items():
        required[data["contract"]] = {
            "subsystem": subsystem_id,
            "contract": data["contract"],
            "reason": "owner",
            "touched_runtime_files": sorted(set(data["touched_runtime_files"])),
            "matched_references": [],
        }

    touched_runtime_files = sorted(
        {
            path
            for data in impacted_subsystems.values()
            for path in data.get("touched_runtime_files", [])
        }
    )
    for path in touched_runtime_files:
        for contract in referenced_contracts_for_path(path, contract_index):
            contract_path = str(contract["contract"])
            entry = required.setdefault(
                contract_path,
                {
                    "subsystem": str(contract["subsystem_id"]),
                    "contract": contract_path,
                    "reason": "dependent-reference",
                    "touched_runtime_files": [],
                    "matched_references": [],
                },
            )
            if entry["reason"] == "owner":
                continue
            if path not in entry["touched_runtime_files"]:
                entry["touched_runtime_files"].append(path)
            for reference in contract.get("matched_references", []):
                descriptor = f"{reference['heading']}: {reference['path']}"
                if descriptor not in entry["matched_references"]:
                    entry["matched_references"].append(descriptor)
                detail = {
                    "heading": str(reference.get("heading", "")),
                    "path": str(reference.get("path", "")),
                }
                if isinstance(reference.get("line"), int):
                    detail["line"] = int(reference["line"])
                if isinstance(reference.get("heading_line"), int):
                    detail["heading_line"] = int(reference["heading_line"])
                detail_list = entry.setdefault("matched_reference_details", [])
                if detail not in detail_list:
                    detail_list.append(detail)

    for data in required.values():
        data["touched_runtime_files"] = sorted(set(data["touched_runtime_files"]))
        data["matched_references"] = sorted(set(data["matched_references"]), key=str.casefold)
        details = data.get("matched_reference_details")
        if isinstance(details, list) and details:
            data["matched_reference_details"] = sorted(
                details,
                key=lambda item: (
                    int(item.get("line", 0) or 0),
                    str(item.get("heading", "")).casefold(),
                    str(item.get("path", "")).casefold(),
                ),
            )

    return dict(sorted(required.items()))


def staged_verification_files_for_requirement(rule: dict, requirement: dict, staged_files: Sequence[str]) -> List[str]:
    exact_files = set(requirement.get("exact_files", []))
    test_prefixes = tuple(requirement.get("test_prefixes", []))
    allow_same_subsystem_tests = bool(requirement.get("allow_same_subsystem_tests", False))
    matches: List[str] = []
    for path in staged_files:
        if path in exact_files:
            matches.append(path)
            continue
        if not is_test_or_fixture(path):
            continue
        if any(path.startswith(prefix) for prefix in test_prefixes):
            matches.append(path)
            continue
        if allow_same_subsystem_tests and subsystem_matches_path(rule, path):
            matches.append(path)
    return sorted(set(matches))


def staged_contract_patch(path: str) -> str:
    result = subprocess.run(
        ["git", "diff", "--cached", "--unified=1000", "--no-color", "--", path],
        cwd=REPO_ROOT,
        check=True,
        capture_output=True,
        text=True,
    )
    return result.stdout


def contract_patch_has_substantive_change(patch_text: str) -> bool:
    current_section = ""
    for line in patch_text.splitlines():
        if line.startswith(("diff --git ", "index ", "--- ", "+++ ", "@@ ")):
            continue
        if not line:
            continue
        prefix = line[0]
        if prefix not in {" ", "+", "-"}:
            continue
        stripped = line[1:].strip()
        if stripped.startswith("## "):
            current_section = stripped
            continue
        if prefix == " ":
            continue
        if current_section not in SUBSTANTIVE_CONTRACT_SECTIONS:
            continue
        if not stripped:
            continue
        if stripped in {"```", "```json"}:
            continue
        return True
    return False


def staged_contract_has_substantive_change(path: str) -> bool:
    return contract_patch_has_substantive_change(staged_contract_patch(path))


def format_missing_requirements(
    missing_contracts: Dict[str, dict],
    insufficient_contract_updates: Dict[str, dict],
    missing_verification: Dict[str, dict],
) -> str:
    lines = [
        "BLOCKED: canonical subsystem changes require matching contract and verification updates.",
        "",
        "Stage the required subsystem file(s) in the same commit:",
    ]

    for contract_path, data in sorted(missing_contracts.items()):
        if data.get("reason") == "dependent-reference":
            lines.append(
                f"- dependent subsystem {data['subsystem']}: missing contract {contract_path}"
            )
            lines.append(
                "  touched runtime files are canonical references in that dependent subsystem contract"
            )
        else:
            lines.append(f"- subsystem {data['subsystem']}: missing contract {contract_path}")
        for path in sorted(data["touched_runtime_files"]):
            lines.append(f"  touched by {path}")
        if data.get("reason") == "dependent-reference":
            for reference in data.get("matched_references", []):
                lines.append(f"  referenced by {reference}")

    for contract_path, data in sorted(insufficient_contract_updates.items()):
        lines.append(
            f"- subsystem {data['subsystem']}: contract {contract_path} is staged but does not include a substantive section update"
        )
        lines.append(
            "  update one of: Purpose, Canonical Files, Shared Boundaries, Extension Points, Forbidden Paths, Completion Obligations, or Current State"
        )
        for path in sorted(data["touched_runtime_files"]):
            lines.append(f"  touched by {path}")
        if data.get("reason") == "dependent-reference":
            for reference in data.get("matched_references", []):
                lines.append(f"  referenced by {reference}")

    for subsystem_id, data in sorted(missing_verification.items()):
        for requirement in data["missing_requirements"]:
            if requirement.get("path_policy_gap"):
                lines.append(
                    f"- subsystem {subsystem_id}: missing registry path policy coverage for touched runtime files"
                )
            else:
                lines.append(
                    f"- subsystem {subsystem_id}: missing verification artifact for {requirement['label']}"
                )
            for path in sorted(requirement["touched_runtime_files"]):
                lines.append(f"  touched by {path}")
            if requirement.get("path_policy_gap"):
                lines.append(
                    "  update "
                    f"{DEFAULT_CONTROL_PLANE['registry_rel']} so each touched path matches an explicit path policy"
                )
                lines.append(
                    f"  default subsystem verification is forbidden for governed {DEFAULT_CONTROL_PLANE['active_profile_id']} subsystems"
                )
                continue
            exact_files = sorted(requirement.get("exact_files", []))
            test_prefixes = sorted(requirement.get("test_prefixes", []))
            allow_same_subsystem_tests = bool(requirement.get("allow_same_subsystem_tests", False))
            if exact_files:
                lines.append("  acceptable exact proof files include:")
                for path in exact_files:
                    lines.append(f"    {path}")
            if test_prefixes:
                lines.append("  acceptable staged test prefixes include:")
                for prefix in test_prefixes:
                    lines.append(f"    {prefix}")
            if allow_same_subsystem_tests:
                lines.append("  same-subsystem test/spec files are also accepted")

    lines.extend(
        [
            "",
            "Rule:",
            "If a canonical subsystem changes, its contract under",
            f"`{DEFAULT_CONTROL_PLANE['subsystems_dir_rel']}/` must be updated in the same commit.",
            "If a touched runtime path is also named in another subsystem contract's",
            "`Canonical Files`, `Shared Boundaries`, or `Extension Points`, that dependent contract must be updated too.",
            "A staged contract file only counts if its staged diff changes a substantive contract section,",
            "not just `Contract Metadata` or cosmetic noise.",
            "Each touched runtime path must also satisfy the first matching",
            "verification policy from that subsystem's registry entry.",
        ]
    )
    return "\n".join(lines)


def check_staged_contracts(staged_files: Sequence[str]) -> int:
    staged_set: Set[str] = set(staged_files)
    impacted = infer_impacted_subsystems(staged_files, use_staged_registry=True)
    required_contracts = required_contract_updates(
        staged_files,
        impacted,
        use_staged_contract_index=True,
    )
    missing_contracts = {
        contract_path: data
        for contract_path, data in required_contracts.items()
        if contract_path not in staged_set
    }
    insufficient_contract_updates = {
        contract_path: data
        for contract_path, data in required_contracts.items()
        if contract_path in staged_set
        if not staged_contract_has_substantive_change(contract_path)
    }
    missing_verification: Dict[str, dict] = {}
    for subsystem_id, data in impacted.items():
        missing_requirements = [
            requirement
            for requirement in data.get("verification_requirements", [])
            if not staged_verification_files_for_requirement(data, requirement, staged_files)
        ]
        if missing_requirements:
            missing_verification[subsystem_id] = {
                **data,
                "missing_requirements": missing_requirements,
            }
    if not missing_contracts and not insufficient_contract_updates and not missing_verification:
        return 0

    print(
        format_missing_requirements(
            missing_contracts,
            insufficient_contract_updates,
            missing_verification,
        ),
        file=sys.stderr,
    )
    return 1


def parse_args(argv: Sequence[str]) -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description=(
            "Enforce canonical subsystem contract and proof-of-change updates for "
            "staged files or an explicit changed-file list."
        )
    )
    parser.add_argument(
        "--files-from-stdin",
        action="store_true",
        help="Read newline-delimited changed files from standard input instead of git staged files.",
    )
    return parser.parse_args(list(argv))


def main(argv: Sequence[str] | None = None) -> int:
    args = parse_args(list(argv or ()))
    if args.files_from_stdin:
        return check_staged_contracts(stdin_files(sys.stdin))
    return check_staged_contracts(git_staged_files())


if __name__ == "__main__":
    raise SystemExit(main(sys.argv[1:]))