mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-05-07 00:37:36 +00:00
259 lines
8.3 KiB
Python
259 lines
8.3 KiB
Python
#!/usr/bin/env python3
|
|
"""Shared parsing helpers for active release profile subsystem contracts."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
import re
|
|
import subprocess
|
|
from typing import Any
|
|
|
|
from control_plane import DEFAULT_CONTROL_PLANE
|
|
|
|
REPO_ROOT = Path(__file__).resolve().parents[2]
|
|
DEFAULT_REPO_ROOT = REPO_ROOT
|
|
CONTRACTS_DIR = DEFAULT_CONTROL_PLANE["subsystems_dir_path"]
|
|
TEMPLATE_REL = DEFAULT_CONTROL_PLANE["subsystem_contract_template_rel"]
|
|
REQUIRED_SECTIONS = [
|
|
"## Contract Metadata",
|
|
"## Purpose",
|
|
"## Canonical Files",
|
|
"## Shared Boundaries",
|
|
"## Extension Points",
|
|
"## Forbidden Paths",
|
|
"## Completion Obligations",
|
|
"## Current State",
|
|
]
|
|
LIST_SECTIONS = {
|
|
"## Canonical Files",
|
|
"## Shared Boundaries",
|
|
"## Extension Points",
|
|
"## Forbidden Paths",
|
|
"## Completion Obligations",
|
|
}
|
|
PATH_SUFFIXES = (
|
|
".go",
|
|
".json",
|
|
".md",
|
|
".mod",
|
|
".mjs",
|
|
".py",
|
|
".sh",
|
|
".sum",
|
|
".ts",
|
|
".tsx",
|
|
".yaml",
|
|
".yml",
|
|
)
|
|
|
|
|
|
def git_env() -> dict[str, str]:
|
|
env = os.environ.copy()
|
|
if REPO_ROOT != DEFAULT_REPO_ROOT:
|
|
env.pop("GIT_INDEX_FILE", None)
|
|
return env
|
|
|
|
|
|
def git(*args: str, text: bool) -> subprocess.CompletedProcess:
|
|
return subprocess.run(
|
|
["git", *args],
|
|
cwd=REPO_ROOT,
|
|
check=True,
|
|
capture_output=True,
|
|
text=text,
|
|
env=git_env(),
|
|
)
|
|
|
|
|
|
def tracked_contract_paths(*, staged: bool = False) -> list[str]:
|
|
if staged:
|
|
result = git(
|
|
"ls-files",
|
|
"-z",
|
|
"--",
|
|
CONTRACTS_DIR.relative_to(REPO_ROOT).as_posix(),
|
|
text=False,
|
|
)
|
|
git_paths = sorted(
|
|
entry.decode("utf-8")
|
|
for entry in result.stdout.split(b"\x00")
|
|
if entry and entry.decode("utf-8").endswith(".md")
|
|
)
|
|
if git_paths:
|
|
return git_paths
|
|
# Governance contract files are filesystem-only (gitignored) — fall back to disk
|
|
return sorted(
|
|
path.relative_to(REPO_ROOT).as_posix()
|
|
for path in CONTRACTS_DIR.glob("*.md")
|
|
)
|
|
|
|
|
|
def staged_contract_text(rel: str) -> str:
|
|
result = git("show", f":{rel}", text=True)
|
|
return result.stdout
|
|
|
|
|
|
def tracked_contract_files(*, staged: bool = False) -> dict[str, str]:
|
|
payload: dict[str, str] = {}
|
|
for rel in tracked_contract_paths(staged=staged):
|
|
if staged:
|
|
try:
|
|
payload[rel] = staged_contract_text(rel)
|
|
continue
|
|
except subprocess.CalledProcessError:
|
|
pass
|
|
payload[rel] = (REPO_ROOT / rel).read_text(encoding="utf-8")
|
|
return payload
|
|
|
|
|
|
def section_body(lines: list[str], heading: str) -> list[str]:
|
|
start = next(index for index, line in enumerate(lines) if line == heading) + 1
|
|
end = len(lines)
|
|
for index in range(start, len(lines)):
|
|
if lines[index].startswith("## "):
|
|
end = index
|
|
break
|
|
return lines[start:end]
|
|
|
|
|
|
def section_list_items(body_lines: list[str]) -> list[tuple[int, str]]:
|
|
items: list[tuple[int, str]] = []
|
|
for index, line in enumerate(body_lines):
|
|
stripped = line.strip()
|
|
if not stripped:
|
|
continue
|
|
for marker in [f"{n}." for n in range(1, 100)]:
|
|
if stripped.startswith(marker + " "):
|
|
items.append((index, stripped[len(marker) + 1 :]))
|
|
break
|
|
return items
|
|
|
|
|
|
def looks_like_repo_path(token: str) -> bool:
|
|
candidate = token.strip()
|
|
return "/" in candidate or candidate.endswith(PATH_SUFFIXES)
|
|
|
|
|
|
def parse_contract_metadata(body_lines: list[str]) -> tuple[dict[str, Any] | None, list[str]]:
|
|
errors: list[str] = []
|
|
meaningful = [line for line in body_lines if line.strip()]
|
|
if len(meaningful) < 3:
|
|
return None, ["contract metadata section must contain a JSON fenced block"]
|
|
if meaningful[0].strip() != "```json":
|
|
errors.append("contract metadata section must start with ```json")
|
|
return None, errors
|
|
if meaningful[-1].strip() != "```":
|
|
errors.append("contract metadata section must end with ```")
|
|
return None, errors
|
|
json_block = "\n".join(meaningful[1:-1]).strip()
|
|
if not json_block:
|
|
errors.append("contract metadata JSON block must not be empty")
|
|
return None, errors
|
|
try:
|
|
payload = json.loads(json_block)
|
|
except json.JSONDecodeError as exc:
|
|
errors.append(f"contract metadata JSON is invalid: {exc}")
|
|
return None, errors
|
|
if not isinstance(payload, dict):
|
|
errors.append("contract metadata JSON must be an object")
|
|
return None, errors
|
|
return payload, errors
|
|
|
|
|
|
def parse_contract_text(rel: str, content: str) -> tuple[dict[str, Any], list[str]]:
|
|
errors: list[str] = []
|
|
path_references: list[dict[str, str]] = []
|
|
lines = content.splitlines()
|
|
if not lines or not lines[0].startswith("# "):
|
|
errors.append(f"{rel} must start with a level-1 heading")
|
|
|
|
heading_positions: dict[str, int] = {}
|
|
for index, line in enumerate(lines):
|
|
if line in REQUIRED_SECTIONS:
|
|
if line in heading_positions:
|
|
errors.append(f"{rel} duplicates required section {line!r}")
|
|
heading_positions[line] = index
|
|
|
|
metadata: dict[str, Any] | None = None
|
|
if "## Contract Metadata" in heading_positions:
|
|
metadata, metadata_errors = parse_contract_metadata(section_body(lines, "## Contract Metadata"))
|
|
errors.extend(f"{rel} {error}" for error in metadata_errors)
|
|
|
|
for heading in ("## Canonical Files", "## Shared Boundaries", "## Extension Points"):
|
|
if heading not in heading_positions:
|
|
continue
|
|
heading_line = heading_positions[heading] + 1
|
|
body_start = heading_positions[heading] + 1
|
|
body = section_body(lines, heading)
|
|
items = section_list_items(body)
|
|
for item_index, item in items:
|
|
line_number = body_start + item_index + 1
|
|
for token in re.findall(r"`([^`]+)`", item):
|
|
if looks_like_repo_path(token):
|
|
path_references.append(
|
|
{
|
|
"heading": heading,
|
|
"path": token,
|
|
"line": line_number,
|
|
"heading_line": heading_line,
|
|
}
|
|
)
|
|
|
|
return {
|
|
"title": lines[0].strip() if lines else "",
|
|
"metadata": metadata,
|
|
"path_references": path_references,
|
|
"heading_positions": heading_positions,
|
|
"lines": lines,
|
|
}, errors
|
|
|
|
|
|
def load_contract_index(
|
|
contract_texts: dict[str, str] | None = None,
|
|
*,
|
|
staged: bool = False,
|
|
) -> dict[str, dict[str, Any]]:
|
|
contract_index: dict[str, dict[str, Any]] = {}
|
|
for rel, content in (contract_texts or tracked_contract_files(staged=staged)).items():
|
|
if rel == TEMPLATE_REL or not rel.endswith(".md"):
|
|
continue
|
|
parsed, _ = parse_contract_text(rel, content)
|
|
metadata = parsed.get("metadata")
|
|
if not isinstance(metadata, dict):
|
|
continue
|
|
subsystem_id = str(metadata.get("subsystem_id", "")).strip()
|
|
if not subsystem_id:
|
|
continue
|
|
contract_index[subsystem_id] = {
|
|
"subsystem_id": subsystem_id,
|
|
"contract": rel,
|
|
"metadata": metadata,
|
|
"path_references": list(parsed.get("path_references", [])),
|
|
"title": parsed.get("title", ""),
|
|
}
|
|
return contract_index
|
|
|
|
|
|
def contract_reference_matches_path(reference_path: str, path: str) -> bool:
|
|
if reference_path.endswith("/"):
|
|
return path.startswith(reference_path)
|
|
return path == reference_path
|
|
|
|
|
|
def referenced_contracts_for_path(
|
|
path: str,
|
|
contract_index: dict[str, dict[str, Any]] | None = None,
|
|
) -> list[dict[str, Any]]:
|
|
contracts_by_subsystem = contract_index or load_contract_index()
|
|
matches: list[dict[str, Any]] = []
|
|
for contract in contracts_by_subsystem.values():
|
|
referenced_paths = [
|
|
reference
|
|
for reference in contract.get("path_references", [])
|
|
if contract_reference_matches_path(str(reference.get("path", "")), path)
|
|
]
|
|
if referenced_paths:
|
|
matches.append({**contract, "matched_references": referenced_paths})
|
|
return sorted(matches, key=lambda contract: str(contract.get("subsystem_id", "")).casefold())
|