diff --git a/cmd/pulse-control-plane/main.go b/cmd/pulse-control-plane/main.go index 25e0bcb27..3b85024e5 100644 --- a/cmd/pulse-control-plane/main.go +++ b/cmd/pulse-control-plane/main.go @@ -337,6 +337,22 @@ func printCloudAuditReport(report *cloudcp.CloudAuditReport) { tenant.Age.Round(time.Second), ) } + fmt.Printf("proof_account_stale_count=%d\n", len(report.StaleProofAccounts)) + for _, account := range report.StaleProofAccounts { + fmt.Printf("proof_account_stale=%s kind=%s age=%s\n", + account.AccountID, + account.Kind, + account.Age.Round(time.Second), + ) + } + fmt.Printf("hosted_paid_orphan_entitlement_count=%d\n", len(report.OrphanPaidHostedEntitlements)) + for _, entitlement := range report.OrphanPaidHostedEntitlements { + fmt.Printf("hosted_paid_orphan_entitlement=%s tenant_id=%s kind=%s\n", + entitlement.EntitlementID, + entitlement.TenantID, + entitlement.Kind, + ) + } for _, container := range report.ManagedRuntimeContainers { if container.State == "running" && (container.HealthStatus == "" || container.HealthStatus == "none" || container.HealthStatus == "healthy") { continue diff --git a/docs/release-control/v6/internal/subsystems/cloud-paid.md b/docs/release-control/v6/internal/subsystems/cloud-paid.md index 19a784285..d227f5378 100644 --- a/docs/release-control/v6/internal/subsystems/cloud-paid.md +++ b/docs/release-control/v6/internal/subsystems/cloud-paid.md @@ -145,6 +145,9 @@ cloud-specific enforcement rules. filesystem, tenant data, Docker runtime store, and Docker build-cache thresholds are part of the Cloud paid readiness contract rather than an operator-only cleanup script. + The same cloud audit contract must fail on stale proof/canary account rows + and paid hosted entitlements whose tenant rows are missing, because either + residue can recreate or mask hosted runtime state after a cleanup. 10. `internal/cloudcp/tenant_runtime_rollout.go` shared with `deployment-installability`: hosted tenant runtime rollout is both a Pulse Cloud runtime contract boundary and a deployment-installability release-rollout boundary. Hosted tenant runtime reconciliation must treat a registered tenant with preserved tenant data but no live Docker runtime as a recoverable managed diff --git a/docs/release-control/v6/internal/subsystems/deployment-installability.md b/docs/release-control/v6/internal/subsystems/deployment-installability.md index 7f1327cd3..2238b6562 100644 --- a/docs/release-control/v6/internal/subsystems/deployment-installability.md +++ b/docs/release-control/v6/internal/subsystems/deployment-installability.md @@ -669,8 +669,9 @@ That deployment boundary also owns hosted storage admission: production control-plane deployments must mount host root and Docker runtime storage read-only for inspection, expose explicit root/data/Docker/build-cache thresholds, and provide `pulse-control-plane cloud audit` as the operator proof -for tenant counts, unhealthy managed containers, disk pressure, and stale -proof tenants before GA or rollout evidence is accepted. +for tenant counts, unhealthy managed containers, disk pressure, stale proof +tenants/accounts, and orphan paid hosted entitlements before GA or rollout +evidence is accepted. That same verification contract also applies before Playwright attaches: if a managed hot-dev session is already running when the verify lock is active, the integration launcher must restart that session instead of silently attaching to diff --git a/internal/cloudcp/cloud_audit.go b/internal/cloudcp/cloud_audit.go index 7b4a21436..1ccdddf22 100644 --- a/internal/cloudcp/cloud_audit.go +++ b/internal/cloudcp/cloud_audit.go @@ -20,19 +20,35 @@ type ProofTenantAuditItem struct { Age time.Duration } +type ProofAccountAuditItem struct { + AccountID string + Kind registry.AccountKind + CreatedAt time.Time + Age time.Duration +} + +type HostedEntitlementAuditItem struct { + EntitlementID string + TenantID string + Kind registry.HostedEntitlementKind + IssuedAt time.Time +} + type CloudAuditReport struct { - OK bool - Failures []string - Storage *StorageGuardrailReport - TenantCounts map[registry.TenantState]int - TenantTotal int - RegistryUnhealthyActive int - DockerManagedTotal int - DockerManagedRunning int - DockerManagedUnhealthy int - DockerUnavailable string - StaleProofTenants []ProofTenantAuditItem - ManagedRuntimeContainers []cpDocker.RuntimeContainerSummary + OK bool + Failures []string + Storage *StorageGuardrailReport + TenantCounts map[registry.TenantState]int + TenantTotal int + RegistryUnhealthyActive int + DockerManagedTotal int + DockerManagedRunning int + DockerManagedUnhealthy int + DockerUnavailable string + StaleProofTenants []ProofTenantAuditItem + StaleProofAccounts []ProofAccountAuditItem + OrphanPaidHostedEntitlements []HostedEntitlementAuditItem + ManagedRuntimeContainers []cpDocker.RuntimeContainerSummary } func AuditCloud(ctx context.Context, cfg *CPConfig) (*CloudAuditReport, error) { @@ -54,11 +70,13 @@ func AuditCloud(ctx context.Context, cfg *CPConfig) (*CloudAuditReport, error) { if err != nil { return nil, fmt.Errorf("list tenants: %w", err) } + tenantIDs := make(map[string]struct{}, len(tenants)) report.TenantTotal = len(tenants) for _, tenant := range tenants { if tenant == nil { continue } + tenantIDs[strings.TrimSpace(tenant.ID)] = struct{}{} report.TenantCounts[tenant.State]++ if tenant.State == registry.TenantStateActive && !tenant.HealthCheckOK { report.RegistryUnhealthyActive++ @@ -72,6 +90,28 @@ func AuditCloud(ctx context.Context, cfg *CPConfig) (*CloudAuditReport, error) { report.addFailure(fmt.Sprintf("%d proof/canary tenants are older than %s", len(report.StaleProofTenants), cfg.ProofTenantMaxAge)) } + entitlements, err := reg.ListHostedEntitlements() + if err != nil { + return nil, fmt.Errorf("list hosted entitlements: %w", err) + } + report.OrphanPaidHostedEntitlements = findOrphanPaidHostedEntitlements(entitlements, tenantIDs) + if len(report.OrphanPaidHostedEntitlements) > 0 { + report.addFailure(fmt.Sprintf("%d paid hosted entitlements reference missing tenants", len(report.OrphanPaidHostedEntitlements))) + } + + accounts, err := reg.ListAccounts() + if err != nil { + return nil, fmt.Errorf("list accounts: %w", err) + } + stripeAccounts, err := reg.ListStripeAccounts() + if err != nil { + return nil, fmt.Errorf("list stripe accounts: %w", err) + } + report.StaleProofAccounts = findStaleProofAccounts(accounts, stripeAccounts, cfg.ProofTenantMatchers, cfg.ProofTenantMaxAge, time.Now().UTC()) + if len(report.StaleProofAccounts) > 0 { + report.addFailure(fmt.Sprintf("%d proof/canary accounts are older than %s", len(report.StaleProofAccounts), cfg.ProofTenantMaxAge)) + } + dockerMgr, err := cpDocker.NewManager(cpDocker.ManagerConfig{ Image: cfg.PulseImage, Network: cfg.DockerNetwork, @@ -176,6 +216,77 @@ func findStaleProofTenants(tenants []*registry.Tenant, matchers []string, maxAge return items } +func findOrphanPaidHostedEntitlements(entitlements []*registry.HostedEntitlement, tenantIDs map[string]struct{}) []HostedEntitlementAuditItem { + items := make([]HostedEntitlementAuditItem, 0) + for _, entitlement := range entitlements { + if entitlement == nil || entitlement.Kind != registry.HostedEntitlementKindPaid { + continue + } + tenantID := strings.TrimSpace(entitlement.TenantID) + if tenantID == "" { + continue + } + if _, ok := tenantIDs[tenantID]; ok { + continue + } + items = append(items, HostedEntitlementAuditItem{ + EntitlementID: strings.TrimSpace(entitlement.ID), + TenantID: tenantID, + Kind: entitlement.Kind, + IssuedAt: entitlement.IssuedAt.UTC(), + }) + } + sort.Slice(items, func(i, j int) bool { + if items[i].IssuedAt.Equal(items[j].IssuedAt) { + return items[i].EntitlementID < items[j].EntitlementID + } + return items[i].IssuedAt.Before(items[j].IssuedAt) + }) + return items +} + +func findStaleProofAccounts(accounts []*registry.Account, stripeAccounts []*registry.StripeAccount, matchers []string, maxAge time.Duration, now time.Time) []ProofAccountAuditItem { + if maxAge <= 0 { + return nil + } + stripeByAccount := make(map[string][]*registry.StripeAccount, len(stripeAccounts)) + for _, stripeAccount := range stripeAccounts { + if stripeAccount == nil { + continue + } + accountID := strings.TrimSpace(stripeAccount.AccountID) + if accountID == "" { + continue + } + stripeByAccount[accountID] = append(stripeByAccount[accountID], stripeAccount) + } + + cutoff := now.Add(-maxAge) + items := make([]ProofAccountAuditItem, 0) + for _, account := range accounts { + if account == nil || account.CreatedAt.IsZero() || account.CreatedAt.After(cutoff) { + continue + } + if !matchesProofAccount(account, stripeByAccount[account.ID], matchers) { + continue + } + createdAt := account.CreatedAt.UTC() + items = append(items, ProofAccountAuditItem{ + AccountID: strings.TrimSpace(account.ID), + Kind: account.Kind, + CreatedAt: createdAt, + Age: now.Sub(createdAt), + }) + } + sort.Slice(items, func(i, j int) bool { + if items[i].CreatedAt.Equal(items[j].CreatedAt) { + return items[i].AccountID < items[j].AccountID + } + return items[i].CreatedAt.Before(items[j].CreatedAt) + }) + return items +} + func matchesProofTenant(tenant *registry.Tenant, matchers []string) bool { if tenant == nil { return false @@ -202,3 +313,37 @@ func matchesProofTenant(tenant *registry.Tenant, matchers []string) bool { } return false } + +func matchesProofAccount(account *registry.Account, stripeAccounts []*registry.StripeAccount, matchers []string) bool { + if account == nil { + return false + } + parts := []string{ + account.ID, + string(account.Kind), + account.DisplayName, + } + for _, stripeAccount := range stripeAccounts { + if stripeAccount == nil { + continue + } + parts = append(parts, + stripeAccount.StripeCustomerID, + stripeAccount.StripeSubscriptionID, + stripeAccount.StripeSubItemWorkspacesID, + stripeAccount.PlanVersion, + stripeAccount.SubscriptionState, + ) + } + haystack := strings.ToLower(strings.Join(parts, " ")) + for _, matcher := range matchers { + matcher = strings.ToLower(strings.TrimSpace(matcher)) + if matcher == "" { + continue + } + if strings.Contains(haystack, matcher) { + return true + } + } + return false +} diff --git a/internal/cloudcp/config.go b/internal/cloudcp/config.go index 2cafd075a..e5355a618 100644 --- a/internal/cloudcp/config.go +++ b/internal/cloudcp/config.go @@ -175,7 +175,7 @@ func LoadConfig() (*CPConfig, error) { StorageMinDockerAvailableBytes: storageMinDockerAvailable, StorageMaxDockerBuildCacheBytes: storageMaxDockerBuildCache, ProofTenantMaxAge: proofTenantMaxAge, - ProofTenantMatchers: parseCSVEnv("CP_PROOF_TENANT_MATCHERS", "proof,canary,rehearsal"), + ProofTenantMatchers: parseCSVEnv("CP_PROOF_TENANT_MATCHERS", "proof,canary,rehearsal,msp_prod,ownerseed,owner_seed"), StripeWebhookSecret: strings.TrimSpace(os.Getenv("STRIPE_WEBHOOK_SECRET")), StripeAPIKey: strings.TrimSpace(os.Getenv("STRIPE_API_KEY")), PublicCloudSignupEnabled: envOrDefaultBool("CP_PUBLIC_CLOUD_SIGNUP_ENABLED", false), diff --git a/internal/cloudcp/config_test.go b/internal/cloudcp/config_test.go index 56f9ecc16..01bf841ba 100644 --- a/internal/cloudcp/config_test.go +++ b/internal/cloudcp/config_test.go @@ -165,6 +165,9 @@ func TestLoadConfig_EnablesStorageGuardrailsByDefaultInProduction(t *testing.T) if cfg.StorageDockerPath != "/var/lib/docker" { t.Fatalf("StorageDockerPath = %q, want /var/lib/docker", cfg.StorageDockerPath) } + if got := strings.Join(cfg.ProofTenantMatchers, ","); got != "proof,canary,rehearsal,msp_prod,ownerseed,owner_seed" { + t.Fatalf("ProofTenantMatchers = %q", got) + } } func TestLoadConfig_InvalidStorageByteSize(t *testing.T) { diff --git a/internal/cloudcp/registry/registry.go b/internal/cloudcp/registry/registry.go index 25def93f6..e4efc20a9 100644 --- a/internal/cloudcp/registry/registry.go +++ b/internal/cloudcp/registry/registry.go @@ -1251,6 +1251,34 @@ func (r *TenantRegistry) GetHostedEntitlementByTrialRequestID(requestID string) return loadHostedEntitlement(row) } +// ListHostedEntitlements returns all hosted entitlement authority rows. +func (r *TenantRegistry) ListHostedEntitlements() ([]*HostedEntitlement, error) { + rows, err := r.db.Query(` + SELECT id, kind, tenant_id, trial_request_id, org_id, email, return_url, instance_token, instance_host, + trial_started_at, refresh_token, activation_token, issued_at, activation_issued_at, last_refreshed_at, redeemed_at, revoked_at + FROM hosted_entitlements + ORDER BY issued_at DESC`) + if err != nil { + return nil, fmt.Errorf("list hosted entitlements: %w", err) + } + defer rows.Close() + + var out []*HostedEntitlement + for rows.Next() { + rec, scanErr := loadHostedEntitlement(rows) + if scanErr != nil { + return nil, scanErr + } + if rec != nil { + out = append(out, rec) + } + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterate hosted entitlements: %w", err) + } + return out, nil +} + // MarkHostedEntitlementRefreshed records the last successful hosted entitlement refresh time. func (r *TenantRegistry) MarkHostedEntitlementRefreshed(id string, refreshedAt time.Time) error { id = strings.TrimSpace(id) diff --git a/internal/cloudcp/registry/registry_test.go b/internal/cloudcp/registry/registry_test.go index 55b3c97e4..452fa43c5 100644 --- a/internal/cloudcp/registry/registry_test.go +++ b/internal/cloudcp/registry/registry_test.go @@ -1102,6 +1102,17 @@ func TestHostedEntitlementLookupAndIssue(t *testing.T) { if stored != "etr_paid_three" { t.Fatalf("stored token after revoke = %q, want %q", stored, "etr_paid_three") } + + listed, err := reg.ListHostedEntitlements() + if err != nil { + t.Fatalf("ListHostedEntitlements: %v", err) + } + if len(listed) != 1 { + t.Fatalf("len(ListHostedEntitlements) = %d, want 1", len(listed)) + } + if listed[0].ID != paidHostedEntitlementID(tenant.ID) || listed[0].RefreshToken != "etr_paid_three" { + t.Fatalf("listed entitlement = %#v, want current paid entitlement", listed[0]) + } } func TestTenantRegistryCanonicalizesTenantPlanVersion(t *testing.T) { diff --git a/internal/cloudcp/storage_admission_test.go b/internal/cloudcp/storage_admission_test.go index ca2d4052d..be29c6460 100644 --- a/internal/cloudcp/storage_admission_test.go +++ b/internal/cloudcp/storage_admission_test.go @@ -119,3 +119,45 @@ func TestFindStaleProofTenantsUsesConfiguredMatchersAndAge(t *testing.T) { t.Fatalf("stale[0].TenantID = %q, want t-OLDPROOF", stale[0].TenantID) } } + +func TestFindOrphanPaidHostedEntitlementsFlagsMissingTenants(t *testing.T) { + issuedAt := time.Date(2026, 4, 24, 12, 0, 0, 0, time.UTC) + entitlements := []*registry.HostedEntitlement{ + {ID: "paid:t-ACTIVE", Kind: registry.HostedEntitlementKindPaid, TenantID: "t-ACTIVE", IssuedAt: issuedAt}, + {ID: "paid:t-MISSING", Kind: registry.HostedEntitlementKindPaid, TenantID: "t-MISSING", IssuedAt: issuedAt.Add(-time.Hour)}, + {ID: "trial:req", Kind: registry.HostedEntitlementKindTrial, TenantID: "", IssuedAt: issuedAt}, + } + + orphaned := findOrphanPaidHostedEntitlements(entitlements, map[string]struct{}{ + "t-ACTIVE": {}, + }) + if len(orphaned) != 1 { + t.Fatalf("len(orphaned) = %d, want 1 (%v)", len(orphaned), orphaned) + } + if orphaned[0].EntitlementID != "paid:t-MISSING" { + t.Fatalf("orphaned[0].EntitlementID = %q, want paid:t-MISSING", orphaned[0].EntitlementID) + } +} + +func TestFindStaleProofAccountsUsesAccountAndStripeMatchers(t *testing.T) { + now := time.Date(2026, 4, 24, 12, 0, 0, 0, time.UTC) + old := now.Add(-48 * time.Hour) + fresh := now.Add(-1 * time.Hour) + accounts := []*registry.Account{ + {ID: "a_rehearsal_old", Kind: registry.AccountKindMSP, DisplayName: "Production Rehearsal", CreatedAt: old}, + {ID: "a_customer", Kind: registry.AccountKindIndividual, DisplayName: "Customer", CreatedAt: old}, + {ID: "a_stripe_old", Kind: registry.AccountKindMSP, DisplayName: "Pulse", CreatedAt: old}, + {ID: "a_rehearsal_fresh", Kind: registry.AccountKindMSP, DisplayName: "Canary", CreatedAt: fresh}, + } + stripeAccounts := []*registry.StripeAccount{ + {AccountID: "a_stripe_old", StripeCustomerID: "cus_msp_rehearsal_123", PlanVersion: "msp_starter"}, + } + + stale := findStaleProofAccounts(accounts, stripeAccounts, []string{"canary", "rehearsal"}, 24*time.Hour, now) + if len(stale) != 2 { + t.Fatalf("len(stale) = %d, want 2 (%v)", len(stale), stale) + } + if stale[0].AccountID != "a_rehearsal_old" || stale[1].AccountID != "a_stripe_old" { + t.Fatalf("stale account order = %#v, want rehearsal then stripe", stale) + } +} diff --git a/internal/cloudcp/tenant_runtime_rollout_test.go b/internal/cloudcp/tenant_runtime_rollout_test.go index 6915f275d..627161cf5 100644 --- a/internal/cloudcp/tenant_runtime_rollout_test.go +++ b/internal/cloudcp/tenant_runtime_rollout_test.go @@ -408,7 +408,8 @@ func TestTenantRuntimeRollout_AdmissionFailureStopsMissingRuntimeRestore(t *test tenant := ®istry.Tenant{ID: "t-ADMIT02", ContainerID: "removed-container"} reg := &fakeTenantRuntimeRolloutRegistry{tenant: tenant} docker := newFakeTenantRuntimeRolloutDocker() - service := newTestTenantRuntimeRolloutService(reg, docker, &fakeTenantRuntimeRolloutSynchronizer{}, newFakeTenantRuntimeRolloutClock()) + sync := &fakeTenantRuntimeRolloutSynchronizer{} + service := newTestTenantRuntimeRolloutService(reg, docker, sync, newFakeTenantRuntimeRolloutClock()) service.admissionCheck = func(context.Context) error { return errors.New("storage pressure") } @@ -423,6 +424,9 @@ func TestTenantRuntimeRollout_AdmissionFailureStopsMissingRuntimeRestore(t *test if len(docker.createCalls) != 0 { t.Fatalf("create call count = %d, want 0", len(docker.createCalls)) } + if len(sync.restores) != 0 { + t.Fatalf("restore count = %d, want 0", len(sync.restores)) + } } func TestTenantRuntimeContractReconcilePlan_AllTenantsClassifiesNoopRolloutAndSkip(t *testing.T) {