Fail closed dry-run action execution

This commit is contained in:
rcourtman 2026-05-05 09:22:04 +01:00
parent 53a928ee2d
commit d91c2afedb
12 changed files with 323 additions and 7 deletions

View file

@ -155,6 +155,12 @@ Returns the deterministic pre-execution plan for a capability advertised on a un
This endpoint is API-first and plan-only: it resolves the resource from the unified registry, verifies the requested capability and parameter schema, returns approval policy, blast radius, stale-plan hashes, and preflight checks, and does not approve or execute anything.
`POST /api/actions/{id}/decision`
Records an explicit `approved` or `rejected` decision for a persisted `pending_approval` action. It does not execute the action.
`POST /api/actions/{id}/execute`
Starts execution only for an approved action or an approval-free executable plan, records `executing` before dispatch, and records the terminal result afterward. Dry-run-only plans are rejected and cannot be executed through this endpoint.
CLI adapter:
```bash
PULSE_API_TOKEN=your-token pulse actions capabilities \
@ -170,6 +176,17 @@ PULSE_API_TOKEN=your-token pulse actions plan \
--reason "Recover after confirmed outage" \
--requested-by agent:oncall-helper
PULSE_API_TOKEN=your-token pulse actions decide \
--api-url http://localhost:7655 \
--action-id act_... \
--outcome approved \
--reason "Inside maintenance window"
PULSE_API_TOKEN=your-token pulse actions execute \
--api-url http://localhost:7655 \
--action-id act_... \
--reason "Execute approved recovery"
PULSE_API_TOKEN=your-token pulse actions audit \
--api-url http://localhost:7655 \
--resource-id vm:42 \

View file

@ -137,7 +137,9 @@ implicit command execution or define a parallel execution handoff. When a
planned resource capability is actually executed from an agent-lifecycle
surface, that handoff must route through `POST /api/actions/{id}/execute` so
the API-owned action audit records `executing` before dispatch and the
terminal execution result afterward.
terminal execution result afterward. Dry-run-only plans remain planning evidence
only; lifecycle surfaces must not present them as executable, dispatch them
through agent-local command paths, or bypass the API fail-closed execution gate.
The node setup modal boundary must keep guided setup and manual credential
submission separate. For new PVE/PBS setup, Agent Install and Direct Connection

View file

@ -309,11 +309,13 @@ the canonical monitored-system blocked payload.
requires a later explicit execution contract.
Action execution is API-owned as the next explicit contract:
`POST /api/actions/{id}/execute` may only start execution for an approved
action or an approval-free allowed plan, must atomically persist the
action or an approval-free executable plan, must atomically persist the
`executing` lifecycle state before invoking a registered executor, and must
atomically persist the terminal `completed` or `failed` result afterward.
If no API executor is registered, the endpoint must fail closed without
mutating the approved audit record or appending execution lifecycle events.
Dry-run-only plans are not executable plans and must fail closed before any
`executing` mutation. If no API executor is registered, the endpoint must
fail closed without mutating the approved audit record or appending execution
lifecycle events.
Approval must never imply execution, and local UI, CLI, MCP, agent, or
storage/recovery adapters must not bypass this endpoint with a parallel
execution transport.

View file

@ -112,7 +112,10 @@ without executing the underlying capability. Any storage/recovery execution
handoff for the approved action must route through
`POST /api/actions/{id}/execute` so the API-owned action audit records
`executing` before dispatch and the terminal result afterward instead of
creating storage-local action transport.
creating storage-local action transport. Dry-run-only plans remain planning
evidence only; storage and recovery surfaces must not present them as
executable, dispatch them through provider-local restore/remediation paths, or
bypass the API fail-closed execution gate.
1. Add or change recovery-point persistence, rollups, or series derivation through `internal/recovery/`
2. Add or change recovery page UX through `frontend-modern/src/components/Recovery/` and keep canonical route/query/filter state ownership in `frontend-modern/src/features/recovery/useRecoverySurfaceState.ts`

View file

@ -478,7 +478,9 @@ the current audit record and append the lifecycle event atomically without
creating execution results or accepting stale second decisions.
Action execution now follows that same resource-owned state machine:
`BeginActionExecution` may transition only an approved action, or an
approval-free allowed plan, into `executing`; `CompleteActionExecution` may
approval-free allowed plan that is not `ApprovalDryRun`, into `executing`;
dry-run-only plans may remain audited planning evidence, but must fail closed
before any `executing` lifecycle mutation. `CompleteActionExecution` may
transition only an executing record into `completed` or `failed` with an
explicit `ExecutionResult`. `RecordActionExecutionStart` and
`RecordActionExecutionResult` must perform the audit update and lifecycle

View file

@ -2,6 +2,7 @@ package actionplanner
import (
"errors"
"strings"
"testing"
"time"
@ -97,6 +98,48 @@ func TestPlannerBuildsDeterministicGovernedPlan(t *testing.T) {
}
}
func TestPlannerBuildsDryRunOnlyPlanWithoutExecutionApproval(t *testing.T) {
now := time.Date(2026, 5, 5, 9, 0, 0, 0, time.UTC)
resource := unified.Resource{
ID: "vm:42",
Type: unified.ResourceTypeVM,
Name: "web-42",
Status: unified.StatusOnline,
Capabilities: []unified.ResourceCapability{
{
Name: "restart",
Type: unified.CapabilityTypeCommon,
Description: "Restart the VM",
MinimumApprovalLevel: unified.ApprovalDryRun,
},
},
}
req := unified.ActionRequest{
RequestID: "agent-run-dry-run",
ResourceID: "vm:42",
CapabilityName: "restart",
Reason: "Validate restart path without execution",
RequestedBy: "agent:oncall-helper",
}
plan, err := (Planner{Now: func() time.Time { return now }}).Plan(req, resource)
if err != nil {
t.Fatalf("Plan() error = %v", err)
}
if plan.ApprovalPolicy != unified.ApprovalDryRun {
t.Fatalf("ApprovalPolicy = %q, want %q", plan.ApprovalPolicy, unified.ApprovalDryRun)
}
if plan.RequiresApproval {
t.Fatalf("RequiresApproval = true, want false because dry-run-only plans cannot be executed")
}
if plan.Preflight == nil || !strings.Contains(strings.Join(plan.Preflight.SafetyChecks, " "), "dry-run-only") {
t.Fatalf("dry-run-only safety checks missing: %#v", plan.Preflight)
}
if !strings.Contains(plan.Message, "dry-run only") {
t.Fatalf("plan message = %q", plan.Message)
}
}
func TestPlannerRejectsUndeclaredParams(t *testing.T) {
resource := unified.Resource{
ID: "vm:42",

View file

@ -426,6 +426,8 @@ func writeActionExecutionApplyError(w http.ResponseWriter, err error) {
writeErrorResponse(w, http.StatusConflict, "action_not_executing", "Action is not executing", nil)
case errors.Is(err, unified.ErrActionPlanExpired):
writeErrorResponse(w, http.StatusConflict, "action_plan_expired", "Action plan has expired", nil)
case errors.Is(err, unified.ErrActionDryRunOnly):
writeErrorResponse(w, http.StatusConflict, "action_dry_run_only", "Action plan is dry-run only and cannot be executed", nil)
default:
writeErrorResponse(w, http.StatusInternalServerError, "action_execution_failed", sanitizeErrorForClient(err, "Action execution failed"), nil)
}
@ -437,7 +439,8 @@ func writeActionExecutionPersistError(w http.ResponseWriter, err error) {
errors.Is(err, unified.ErrActionAlreadyExecuting),
errors.Is(err, unified.ErrActionExecutionFinal),
errors.Is(err, unified.ErrActionNotExecuting),
errors.Is(err, unified.ErrActionPlanExpired):
errors.Is(err, unified.ErrActionPlanExpired),
errors.Is(err, unified.ErrActionDryRunOnly):
writeActionExecutionApplyError(w, err)
default:
writeErrorResponse(w, http.StatusInternalServerError, "action_execution_persist_failed", sanitizeErrorForClient(err, "Failed to persist action execution"), nil)

View file

@ -518,6 +518,75 @@ func TestHandleExecuteActionWithoutExecutorLeavesApprovedAuditUnchanged(t *testi
}
}
func TestHandleExecuteActionRejectsDryRunOnlyPlan(t *testing.T) {
now := time.Now().UTC().Truncate(time.Second)
h := NewResourceHandlers(&config.Config{DataPath: t.TempDir()})
executor := &stubActionExecutor{result: &unified.ExecutionResult{Success: true, Output: "should not run"}}
h.SetActionExecutor(executor)
store, err := h.getStore("default")
if err != nil {
t.Fatalf("get store: %v", err)
}
record := unified.ActionAuditRecord{
ID: "act_dry_run_only",
CreatedAt: now.Add(-time.Minute),
UpdatedAt: now,
State: unified.ActionStatePlanned,
Request: unified.ActionRequest{
RequestID: "req-dry-run-only",
ResourceID: "vm:42",
CapabilityName: "restart",
Reason: "Validate restart path without execution",
RequestedBy: "agent:oncall-helper",
},
Plan: unified.ActionPlan{
ActionID: "act_dry_run_only",
RequestID: "req-dry-run-only",
Allowed: true,
ApprovalPolicy: unified.ApprovalDryRun,
PlannedAt: now.Add(-time.Minute),
ExpiresAt: now.Add(5 * time.Minute),
ResourceVersion: "resource:sha256:test",
PolicyVersion: "policy:sha256:test",
PlanHash: "sha256:test",
},
}
if err := store.RecordActionAudit(record); err != nil {
t.Fatalf("RecordActionAudit: %v", err)
}
executeRec := httptest.NewRecorder()
executeReq := httptest.NewRequest(http.MethodPost, "/api/actions/act_dry_run_only/execute", bytes.NewBufferString(`{}`))
executeReq.SetPathValue("id", "act_dry_run_only")
executeReq = executeReq.WithContext(auth.WithUser(executeReq.Context(), "operator@example.com"))
h.HandleExecuteAction(executeRec, executeReq)
if executeRec.Code != http.StatusConflict {
t.Fatalf("execute status = %d, body=%s", executeRec.Code, executeRec.Body.String())
}
if !strings.Contains(executeRec.Body.String(), `"code":"action_dry_run_only"`) {
t.Fatalf("execute body = %s", executeRec.Body.String())
}
if executor.calls != 0 {
t.Fatalf("dry-run-only plan should not call executor, calls=%d received=%#v", executor.calls, executor.received)
}
got, ok, err := store.GetActionAudit("act_dry_run_only")
if err != nil {
t.Fatalf("GetActionAudit: %v", err)
}
if !ok || got.State != unified.ActionStatePlanned || got.Result != nil {
t.Fatalf("dry-run-only audit changed = %#v, ok=%v", got, ok)
}
events, err := store.GetActionLifecycleEvents("act_dry_run_only", time.Time{}, 10)
if err != nil {
t.Fatalf("GetActionLifecycleEvents: %v", err)
}
if len(events) != 0 {
t.Fatalf("dry-run-only execution must not append lifecycle events: %#v", events)
}
}
func TestPersistActionPlanAuditFillsMissingLifecycleState(t *testing.T) {
now := time.Date(2026, 5, 3, 10, 0, 0, 0, time.UTC)
store := unified.NewMemoryStore()

View file

@ -12019,6 +12019,109 @@ func TestContract_ActionExecutionJSONSnapshot(t *testing.T) {
assertJSONSnapshot(t, got, want)
}
func TestContract_ActionDryRunOnlyExecutionErrorJSONSnapshot(t *testing.T) {
now := time.Now().UTC().Truncate(time.Second)
h := NewResourceHandlers(&config.Config{DataPath: t.TempDir()})
store, err := h.getStore("default")
if err != nil {
t.Fatalf("get store: %v", err)
}
record := unifiedresources.ActionAuditRecord{
ID: "act_dry_run_contract",
CreatedAt: now.Add(-time.Minute),
UpdatedAt: now.Add(-time.Minute),
State: unifiedresources.ActionStatePlanned,
Request: unifiedresources.ActionRequest{
RequestID: "agent-run-dry-run",
ResourceID: "vm:42",
CapabilityName: "restart",
Reason: "Inspect possible restart remediation",
RequestedBy: "agent:oncall-helper",
},
Plan: unifiedresources.ActionPlan{
ActionID: "act_dry_run_contract",
RequestID: "agent-run-dry-run",
Allowed: true,
RequiresApproval: false,
ApprovalPolicy: unifiedresources.ApprovalDryRun,
RollbackAvailable: false,
PlannedAt: now.Add(-time.Minute),
ExpiresAt: now.Add(4 * time.Minute),
ResourceVersion: "resource:sha256:dry-run-contract",
PolicyVersion: "policy:sha256:dry-run-contract",
PlanHash: "sha256:dry-run-contract",
Preflight: &unifiedresources.ActionPreflight{
Target: "vm:42",
CurrentState: "web-42 is warning",
IntendedChange: "Dry-run only restart inspection",
DryRunAvailable: true,
DryRunSummary: "Provider advertised dry-run only; no execution is allowed.",
SafetyChecks: []string{
"Dry-run-only plans are not executable.",
},
VerificationSteps: []string{
"Review /api/audit/actions/act_dry_run_contract/events for lifecycle evidence.",
},
GeneratedAt: now.Add(-time.Minute),
},
},
}
if err := store.RecordActionAudit(record); err != nil {
t.Fatalf("record action audit: %v", err)
}
req := httptest.NewRequest(http.MethodPost, "/api/actions/act_dry_run_contract/execute", bytes.NewBufferString(`{}`))
req.SetPathValue("id", "act_dry_run_contract")
rec := httptest.NewRecorder()
h.HandleExecuteAction(rec, req)
if rec.Code != http.StatusConflict {
t.Fatalf("status = %d, want %d; body=%s", rec.Code, http.StatusConflict, rec.Body.String())
}
var apiErr APIError
if err := json.Unmarshal(rec.Body.Bytes(), &apiErr); err != nil {
t.Fatalf("decode dry-run execution error: %v", err)
}
payload := struct {
Status int `json:"status"`
Error struct {
Code string `json:"code"`
Message string `json:"message"`
} `json:"error"`
}{Status: rec.Code}
payload.Error.Code = apiErr.Code
payload.Error.Message = apiErr.ErrorMessage
got, err := json.Marshal(payload)
if err != nil {
t.Fatalf("marshal dry-run execution error contract: %v", err)
}
const want = `{
"status":409,
"error":{
"code":"action_dry_run_only",
"message":"Action plan is dry-run only and cannot be executed"
}
}`
assertJSONSnapshot(t, got, want)
gotAudit, ok, err := store.GetActionAudit("act_dry_run_contract")
if err != nil {
t.Fatalf("GetActionAudit: %v", err)
}
if !ok || gotAudit.State != unifiedresources.ActionStatePlanned || gotAudit.Result != nil {
t.Fatalf("dry-run audit mutated: ok=%v state=%q result=%#v", ok, gotAudit.State, gotAudit.Result)
}
events, err := store.GetActionLifecycleEvents("act_dry_run_contract", time.Time{}, 10)
if err != nil {
t.Fatalf("GetActionLifecycleEvents: %v", err)
}
if len(events) != 0 {
t.Fatalf("dry-run execution should not append lifecycle events, got %d", len(events))
}
}
func TestContract_ResourceTimelineEndpointsIncludeRelatedChanges(t *testing.T) {
now := time.Date(2026, 4, 25, 22, 15, 0, 0, time.UTC)
h := NewResourceHandlers(&config.Config{DataPath: t.TempDir()})

View file

@ -133,6 +133,7 @@ var (
ErrActionAlreadyExecuting = errors.New("action is already executing")
ErrActionExecutionFinal = errors.New("action execution is already final")
ErrActionPlanExpired = errors.New("action plan expired")
ErrActionDryRunOnly = errors.New("action plan is dry-run only")
ErrInvalidApprovalOutcome = errors.New("invalid approval outcome")
)
@ -297,6 +298,9 @@ func ValidateActionExecutionStart(record ActionAuditRecord, now time.Time) error
if !record.Plan.ExpiresAt.IsZero() && !now.Before(record.Plan.ExpiresAt) {
return ErrActionPlanExpired
}
if record.Plan.ApprovalPolicy == ApprovalDryRun {
return ErrActionDryRunOnly
}
switch record.State {
case ActionStateApproved:
return nil

View file

@ -263,6 +263,13 @@ func TestBeginActionExecutionRejectsUnsafeStates(t *testing.T) {
if _, _, err := BeginActionExecution(expired, "operator@example.com", now); !errors.Is(err, ErrActionPlanExpired) {
t.Fatalf("expired error = %v, want %v", err, ErrActionPlanExpired)
}
dryRunOnly := base
dryRunOnly.State = ActionStatePlanned
dryRunOnly.Plan.RequiresApproval = false
dryRunOnly.Plan.ApprovalPolicy = ApprovalDryRun
if _, _, err := BeginActionExecution(dryRunOnly, "operator@example.com", now); !errors.Is(err, ErrActionDryRunOnly) {
t.Fatalf("dry-run-only error = %v, want %v", err, ErrActionDryRunOnly)
}
}
func TestCompleteActionExecutionRecordsResult(t *testing.T) {

View file

@ -1520,6 +1520,67 @@ func TestRecordActionExecutionStartAndResult_UpdatesAuditAndAppendsLifecycle(t *
}
}
func TestRecordActionExecutionStartRejectsDryRunOnlyPlan(t *testing.T) {
store := newTestStore(t)
now := time.Date(2026, 5, 4, 15, 0, 0, 0, time.UTC)
record := ActionAuditRecord{
ID: "act_dry_run_only",
CreatedAt: now.Add(-time.Minute),
UpdatedAt: now.Add(-time.Minute),
State: ActionStatePlanned,
Request: ActionRequest{
RequestID: "req-dry-run",
ResourceID: "vm:404",
CapabilityName: "restart",
Reason: "dry-run validation",
RequestedBy: "agent:test",
},
Plan: ActionPlan{
ActionID: "act_dry_run_only",
RequestID: "req-dry-run",
Allowed: true,
ApprovalPolicy: ApprovalDryRun,
PlannedAt: now.Add(-time.Minute),
ExpiresAt: now.Add(5 * time.Minute),
ResourceVersion: "resource:sha256:test",
PolicyVersion: "policy:sha256:test",
PlanHash: "sha256:test",
},
}
if err := store.RecordActionAudit(record); err != nil {
t.Fatalf("RecordActionAudit: %v", err)
}
forcedExecuting := record
forcedExecuting.State = ActionStateExecuting
forcedExecuting.UpdatedAt = now
err := store.RecordActionExecutionStart(forcedExecuting, ActionLifecycleEvent{
ActionID: record.ID,
Timestamp: now,
State: ActionStateExecuting,
Actor: "agent:test",
Message: "should not execute",
})
if !errors.Is(err, ErrActionDryRunOnly) {
t.Fatalf("RecordActionExecutionStart error = %v, want %v", err, ErrActionDryRunOnly)
}
got, ok, err := store.GetActionAudit(record.ID)
if err != nil {
t.Fatalf("GetActionAudit: %v", err)
}
if !ok || got.State != ActionStatePlanned || got.Result != nil {
t.Fatalf("dry-run-only audit changed = %#v, ok=%v", got, ok)
}
events, err := store.GetActionLifecycleEvents(record.ID, time.Time{}, 10)
if err != nil {
t.Fatalf("GetActionLifecycleEvents: %v", err)
}
if len(events) != 0 {
t.Fatalf("dry-run-only execution should not append events: %#v", events)
}
}
func TestRecordActionAudit_NormalizesGovernedPlan(t *testing.T) {
store := newTestStore(t)
now := time.Date(2026, 4, 25, 22, 40, 0, 0, time.UTC)