Pulse/scripts/release_control/subsystem_contracts.py
2026-04-09 09:35:28 +01:00

259 lines
8.3 KiB
Python

#!/usr/bin/env python3
"""Shared parsing helpers for active release profile subsystem contracts."""
from __future__ import annotations
import json
import os
from pathlib import Path
import re
import subprocess
from typing import Any
from control_plane import DEFAULT_CONTROL_PLANE
REPO_ROOT = Path(__file__).resolve().parents[2]
DEFAULT_REPO_ROOT = REPO_ROOT
CONTRACTS_DIR = DEFAULT_CONTROL_PLANE["subsystems_dir_path"]
TEMPLATE_REL = DEFAULT_CONTROL_PLANE["subsystem_contract_template_rel"]
REQUIRED_SECTIONS = [
"## Contract Metadata",
"## Purpose",
"## Canonical Files",
"## Shared Boundaries",
"## Extension Points",
"## Forbidden Paths",
"## Completion Obligations",
"## Current State",
]
LIST_SECTIONS = {
"## Canonical Files",
"## Shared Boundaries",
"## Extension Points",
"## Forbidden Paths",
"## Completion Obligations",
}
PATH_SUFFIXES = (
".go",
".json",
".md",
".mod",
".mjs",
".py",
".sh",
".sum",
".ts",
".tsx",
".yaml",
".yml",
)
def git_env() -> dict[str, str]:
env = os.environ.copy()
if REPO_ROOT != DEFAULT_REPO_ROOT:
env.pop("GIT_INDEX_FILE", None)
return env
def git(*args: str, text: bool) -> subprocess.CompletedProcess:
return subprocess.run(
["git", *args],
cwd=REPO_ROOT,
check=True,
capture_output=True,
text=text,
env=git_env(),
)
def tracked_contract_paths(*, staged: bool = False) -> list[str]:
if staged:
result = git(
"ls-files",
"-z",
"--",
CONTRACTS_DIR.relative_to(REPO_ROOT).as_posix(),
text=False,
)
git_paths = sorted(
entry.decode("utf-8")
for entry in result.stdout.split(b"\x00")
if entry and entry.decode("utf-8").endswith(".md")
)
if git_paths:
return git_paths
# Governance contract files are filesystem-only (gitignored) — fall back to disk
return sorted(
path.relative_to(REPO_ROOT).as_posix()
for path in CONTRACTS_DIR.glob("*.md")
)
def staged_contract_text(rel: str) -> str:
result = git("show", f":{rel}", text=True)
return result.stdout
def tracked_contract_files(*, staged: bool = False) -> dict[str, str]:
payload: dict[str, str] = {}
for rel in tracked_contract_paths(staged=staged):
if staged:
try:
payload[rel] = staged_contract_text(rel)
continue
except subprocess.CalledProcessError:
pass
payload[rel] = (REPO_ROOT / rel).read_text(encoding="utf-8")
return payload
def section_body(lines: list[str], heading: str) -> list[str]:
start = next(index for index, line in enumerate(lines) if line == heading) + 1
end = len(lines)
for index in range(start, len(lines)):
if lines[index].startswith("## "):
end = index
break
return lines[start:end]
def section_list_items(body_lines: list[str]) -> list[tuple[int, str]]:
items: list[tuple[int, str]] = []
for index, line in enumerate(body_lines):
stripped = line.strip()
if not stripped:
continue
for marker in [f"{n}." for n in range(1, 100)]:
if stripped.startswith(marker + " "):
items.append((index, stripped[len(marker) + 1 :]))
break
return items
def looks_like_repo_path(token: str) -> bool:
candidate = token.strip()
return "/" in candidate or candidate.endswith(PATH_SUFFIXES)
def parse_contract_metadata(body_lines: list[str]) -> tuple[dict[str, Any] | None, list[str]]:
errors: list[str] = []
meaningful = [line for line in body_lines if line.strip()]
if len(meaningful) < 3:
return None, ["contract metadata section must contain a JSON fenced block"]
if meaningful[0].strip() != "```json":
errors.append("contract metadata section must start with ```json")
return None, errors
if meaningful[-1].strip() != "```":
errors.append("contract metadata section must end with ```")
return None, errors
json_block = "\n".join(meaningful[1:-1]).strip()
if not json_block:
errors.append("contract metadata JSON block must not be empty")
return None, errors
try:
payload = json.loads(json_block)
except json.JSONDecodeError as exc:
errors.append(f"contract metadata JSON is invalid: {exc}")
return None, errors
if not isinstance(payload, dict):
errors.append("contract metadata JSON must be an object")
return None, errors
return payload, errors
def parse_contract_text(rel: str, content: str) -> tuple[dict[str, Any], list[str]]:
errors: list[str] = []
path_references: list[dict[str, str]] = []
lines = content.splitlines()
if not lines or not lines[0].startswith("# "):
errors.append(f"{rel} must start with a level-1 heading")
heading_positions: dict[str, int] = {}
for index, line in enumerate(lines):
if line in REQUIRED_SECTIONS:
if line in heading_positions:
errors.append(f"{rel} duplicates required section {line!r}")
heading_positions[line] = index
metadata: dict[str, Any] | None = None
if "## Contract Metadata" in heading_positions:
metadata, metadata_errors = parse_contract_metadata(section_body(lines, "## Contract Metadata"))
errors.extend(f"{rel} {error}" for error in metadata_errors)
for heading in ("## Canonical Files", "## Shared Boundaries", "## Extension Points"):
if heading not in heading_positions:
continue
heading_line = heading_positions[heading] + 1
body_start = heading_positions[heading] + 1
body = section_body(lines, heading)
items = section_list_items(body)
for item_index, item in items:
line_number = body_start + item_index + 1
for token in re.findall(r"`([^`]+)`", item):
if looks_like_repo_path(token):
path_references.append(
{
"heading": heading,
"path": token,
"line": line_number,
"heading_line": heading_line,
}
)
return {
"title": lines[0].strip() if lines else "",
"metadata": metadata,
"path_references": path_references,
"heading_positions": heading_positions,
"lines": lines,
}, errors
def load_contract_index(
contract_texts: dict[str, str] | None = None,
*,
staged: bool = False,
) -> dict[str, dict[str, Any]]:
contract_index: dict[str, dict[str, Any]] = {}
for rel, content in (contract_texts or tracked_contract_files(staged=staged)).items():
if rel == TEMPLATE_REL or not rel.endswith(".md"):
continue
parsed, _ = parse_contract_text(rel, content)
metadata = parsed.get("metadata")
if not isinstance(metadata, dict):
continue
subsystem_id = str(metadata.get("subsystem_id", "")).strip()
if not subsystem_id:
continue
contract_index[subsystem_id] = {
"subsystem_id": subsystem_id,
"contract": rel,
"metadata": metadata,
"path_references": list(parsed.get("path_references", [])),
"title": parsed.get("title", ""),
}
return contract_index
def contract_reference_matches_path(reference_path: str, path: str) -> bool:
if reference_path.endswith("/"):
return path.startswith(reference_path)
return path == reference_path
def referenced_contracts_for_path(
path: str,
contract_index: dict[str, dict[str, Any]] | None = None,
) -> list[dict[str, Any]]:
contracts_by_subsystem = contract_index or load_contract_index()
matches: list[dict[str, Any]] = []
for contract in contracts_by_subsystem.values():
referenced_paths = [
reference
for reference in contract.get("path_references", [])
if contract_reference_matches_path(str(reference.get("path", "")), path)
]
if referenced_paths:
matches.append({**contract, "matched_references": referenced_paths})
return sorted(matches, key=lambda contract: str(contract.get("subsystem_id", "")).casefold())