mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-04-28 03:20:11 +00:00
Tighten Pulse Cloud residue audit
This commit is contained in:
parent
ca9f2ceb85
commit
5fd6456302
10 changed files with 269 additions and 16 deletions
|
|
@ -337,6 +337,22 @@ func printCloudAuditReport(report *cloudcp.CloudAuditReport) {
|
|||
tenant.Age.Round(time.Second),
|
||||
)
|
||||
}
|
||||
fmt.Printf("proof_account_stale_count=%d\n", len(report.StaleProofAccounts))
|
||||
for _, account := range report.StaleProofAccounts {
|
||||
fmt.Printf("proof_account_stale=%s kind=%s age=%s\n",
|
||||
account.AccountID,
|
||||
account.Kind,
|
||||
account.Age.Round(time.Second),
|
||||
)
|
||||
}
|
||||
fmt.Printf("hosted_paid_orphan_entitlement_count=%d\n", len(report.OrphanPaidHostedEntitlements))
|
||||
for _, entitlement := range report.OrphanPaidHostedEntitlements {
|
||||
fmt.Printf("hosted_paid_orphan_entitlement=%s tenant_id=%s kind=%s\n",
|
||||
entitlement.EntitlementID,
|
||||
entitlement.TenantID,
|
||||
entitlement.Kind,
|
||||
)
|
||||
}
|
||||
for _, container := range report.ManagedRuntimeContainers {
|
||||
if container.State == "running" && (container.HealthStatus == "" || container.HealthStatus == "none" || container.HealthStatus == "healthy") {
|
||||
continue
|
||||
|
|
|
|||
|
|
@ -145,6 +145,9 @@ cloud-specific enforcement rules.
|
|||
filesystem, tenant data, Docker runtime store, and Docker build-cache
|
||||
thresholds are part of the Cloud paid readiness contract rather than an
|
||||
operator-only cleanup script.
|
||||
The same cloud audit contract must fail on stale proof/canary account rows
|
||||
and paid hosted entitlements whose tenant rows are missing, because either
|
||||
residue can recreate or mask hosted runtime state after a cleanup.
|
||||
10. `internal/cloudcp/tenant_runtime_rollout.go` shared with `deployment-installability`: hosted tenant runtime rollout is both a Pulse Cloud runtime contract boundary and a deployment-installability release-rollout boundary.
|
||||
Hosted tenant runtime reconciliation must treat a registered tenant with
|
||||
preserved tenant data but no live Docker runtime as a recoverable managed
|
||||
|
|
|
|||
|
|
@ -669,8 +669,9 @@ That deployment boundary also owns hosted storage admission: production
|
|||
control-plane deployments must mount host root and Docker runtime storage
|
||||
read-only for inspection, expose explicit root/data/Docker/build-cache
|
||||
thresholds, and provide `pulse-control-plane cloud audit` as the operator proof
|
||||
for tenant counts, unhealthy managed containers, disk pressure, and stale
|
||||
proof tenants before GA or rollout evidence is accepted.
|
||||
for tenant counts, unhealthy managed containers, disk pressure, stale proof
|
||||
tenants/accounts, and orphan paid hosted entitlements before GA or rollout
|
||||
evidence is accepted.
|
||||
That same verification contract also applies before Playwright attaches: if a
|
||||
managed hot-dev session is already running when the verify lock is active, the
|
||||
integration launcher must restart that session instead of silently attaching to
|
||||
|
|
|
|||
|
|
@ -20,19 +20,35 @@ type ProofTenantAuditItem struct {
|
|||
Age time.Duration
|
||||
}
|
||||
|
||||
type ProofAccountAuditItem struct {
|
||||
AccountID string
|
||||
Kind registry.AccountKind
|
||||
CreatedAt time.Time
|
||||
Age time.Duration
|
||||
}
|
||||
|
||||
type HostedEntitlementAuditItem struct {
|
||||
EntitlementID string
|
||||
TenantID string
|
||||
Kind registry.HostedEntitlementKind
|
||||
IssuedAt time.Time
|
||||
}
|
||||
|
||||
type CloudAuditReport struct {
|
||||
OK bool
|
||||
Failures []string
|
||||
Storage *StorageGuardrailReport
|
||||
TenantCounts map[registry.TenantState]int
|
||||
TenantTotal int
|
||||
RegistryUnhealthyActive int
|
||||
DockerManagedTotal int
|
||||
DockerManagedRunning int
|
||||
DockerManagedUnhealthy int
|
||||
DockerUnavailable string
|
||||
StaleProofTenants []ProofTenantAuditItem
|
||||
ManagedRuntimeContainers []cpDocker.RuntimeContainerSummary
|
||||
OK bool
|
||||
Failures []string
|
||||
Storage *StorageGuardrailReport
|
||||
TenantCounts map[registry.TenantState]int
|
||||
TenantTotal int
|
||||
RegistryUnhealthyActive int
|
||||
DockerManagedTotal int
|
||||
DockerManagedRunning int
|
||||
DockerManagedUnhealthy int
|
||||
DockerUnavailable string
|
||||
StaleProofTenants []ProofTenantAuditItem
|
||||
StaleProofAccounts []ProofAccountAuditItem
|
||||
OrphanPaidHostedEntitlements []HostedEntitlementAuditItem
|
||||
ManagedRuntimeContainers []cpDocker.RuntimeContainerSummary
|
||||
}
|
||||
|
||||
func AuditCloud(ctx context.Context, cfg *CPConfig) (*CloudAuditReport, error) {
|
||||
|
|
@ -54,11 +70,13 @@ func AuditCloud(ctx context.Context, cfg *CPConfig) (*CloudAuditReport, error) {
|
|||
if err != nil {
|
||||
return nil, fmt.Errorf("list tenants: %w", err)
|
||||
}
|
||||
tenantIDs := make(map[string]struct{}, len(tenants))
|
||||
report.TenantTotal = len(tenants)
|
||||
for _, tenant := range tenants {
|
||||
if tenant == nil {
|
||||
continue
|
||||
}
|
||||
tenantIDs[strings.TrimSpace(tenant.ID)] = struct{}{}
|
||||
report.TenantCounts[tenant.State]++
|
||||
if tenant.State == registry.TenantStateActive && !tenant.HealthCheckOK {
|
||||
report.RegistryUnhealthyActive++
|
||||
|
|
@ -72,6 +90,28 @@ func AuditCloud(ctx context.Context, cfg *CPConfig) (*CloudAuditReport, error) {
|
|||
report.addFailure(fmt.Sprintf("%d proof/canary tenants are older than %s", len(report.StaleProofTenants), cfg.ProofTenantMaxAge))
|
||||
}
|
||||
|
||||
entitlements, err := reg.ListHostedEntitlements()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("list hosted entitlements: %w", err)
|
||||
}
|
||||
report.OrphanPaidHostedEntitlements = findOrphanPaidHostedEntitlements(entitlements, tenantIDs)
|
||||
if len(report.OrphanPaidHostedEntitlements) > 0 {
|
||||
report.addFailure(fmt.Sprintf("%d paid hosted entitlements reference missing tenants", len(report.OrphanPaidHostedEntitlements)))
|
||||
}
|
||||
|
||||
accounts, err := reg.ListAccounts()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("list accounts: %w", err)
|
||||
}
|
||||
stripeAccounts, err := reg.ListStripeAccounts()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("list stripe accounts: %w", err)
|
||||
}
|
||||
report.StaleProofAccounts = findStaleProofAccounts(accounts, stripeAccounts, cfg.ProofTenantMatchers, cfg.ProofTenantMaxAge, time.Now().UTC())
|
||||
if len(report.StaleProofAccounts) > 0 {
|
||||
report.addFailure(fmt.Sprintf("%d proof/canary accounts are older than %s", len(report.StaleProofAccounts), cfg.ProofTenantMaxAge))
|
||||
}
|
||||
|
||||
dockerMgr, err := cpDocker.NewManager(cpDocker.ManagerConfig{
|
||||
Image: cfg.PulseImage,
|
||||
Network: cfg.DockerNetwork,
|
||||
|
|
@ -176,6 +216,77 @@ func findStaleProofTenants(tenants []*registry.Tenant, matchers []string, maxAge
|
|||
return items
|
||||
}
|
||||
|
||||
func findOrphanPaidHostedEntitlements(entitlements []*registry.HostedEntitlement, tenantIDs map[string]struct{}) []HostedEntitlementAuditItem {
|
||||
items := make([]HostedEntitlementAuditItem, 0)
|
||||
for _, entitlement := range entitlements {
|
||||
if entitlement == nil || entitlement.Kind != registry.HostedEntitlementKindPaid {
|
||||
continue
|
||||
}
|
||||
tenantID := strings.TrimSpace(entitlement.TenantID)
|
||||
if tenantID == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok := tenantIDs[tenantID]; ok {
|
||||
continue
|
||||
}
|
||||
items = append(items, HostedEntitlementAuditItem{
|
||||
EntitlementID: strings.TrimSpace(entitlement.ID),
|
||||
TenantID: tenantID,
|
||||
Kind: entitlement.Kind,
|
||||
IssuedAt: entitlement.IssuedAt.UTC(),
|
||||
})
|
||||
}
|
||||
sort.Slice(items, func(i, j int) bool {
|
||||
if items[i].IssuedAt.Equal(items[j].IssuedAt) {
|
||||
return items[i].EntitlementID < items[j].EntitlementID
|
||||
}
|
||||
return items[i].IssuedAt.Before(items[j].IssuedAt)
|
||||
})
|
||||
return items
|
||||
}
|
||||
|
||||
func findStaleProofAccounts(accounts []*registry.Account, stripeAccounts []*registry.StripeAccount, matchers []string, maxAge time.Duration, now time.Time) []ProofAccountAuditItem {
|
||||
if maxAge <= 0 {
|
||||
return nil
|
||||
}
|
||||
stripeByAccount := make(map[string][]*registry.StripeAccount, len(stripeAccounts))
|
||||
for _, stripeAccount := range stripeAccounts {
|
||||
if stripeAccount == nil {
|
||||
continue
|
||||
}
|
||||
accountID := strings.TrimSpace(stripeAccount.AccountID)
|
||||
if accountID == "" {
|
||||
continue
|
||||
}
|
||||
stripeByAccount[accountID] = append(stripeByAccount[accountID], stripeAccount)
|
||||
}
|
||||
|
||||
cutoff := now.Add(-maxAge)
|
||||
items := make([]ProofAccountAuditItem, 0)
|
||||
for _, account := range accounts {
|
||||
if account == nil || account.CreatedAt.IsZero() || account.CreatedAt.After(cutoff) {
|
||||
continue
|
||||
}
|
||||
if !matchesProofAccount(account, stripeByAccount[account.ID], matchers) {
|
||||
continue
|
||||
}
|
||||
createdAt := account.CreatedAt.UTC()
|
||||
items = append(items, ProofAccountAuditItem{
|
||||
AccountID: strings.TrimSpace(account.ID),
|
||||
Kind: account.Kind,
|
||||
CreatedAt: createdAt,
|
||||
Age: now.Sub(createdAt),
|
||||
})
|
||||
}
|
||||
sort.Slice(items, func(i, j int) bool {
|
||||
if items[i].CreatedAt.Equal(items[j].CreatedAt) {
|
||||
return items[i].AccountID < items[j].AccountID
|
||||
}
|
||||
return items[i].CreatedAt.Before(items[j].CreatedAt)
|
||||
})
|
||||
return items
|
||||
}
|
||||
|
||||
func matchesProofTenant(tenant *registry.Tenant, matchers []string) bool {
|
||||
if tenant == nil {
|
||||
return false
|
||||
|
|
@ -202,3 +313,37 @@ func matchesProofTenant(tenant *registry.Tenant, matchers []string) bool {
|
|||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func matchesProofAccount(account *registry.Account, stripeAccounts []*registry.StripeAccount, matchers []string) bool {
|
||||
if account == nil {
|
||||
return false
|
||||
}
|
||||
parts := []string{
|
||||
account.ID,
|
||||
string(account.Kind),
|
||||
account.DisplayName,
|
||||
}
|
||||
for _, stripeAccount := range stripeAccounts {
|
||||
if stripeAccount == nil {
|
||||
continue
|
||||
}
|
||||
parts = append(parts,
|
||||
stripeAccount.StripeCustomerID,
|
||||
stripeAccount.StripeSubscriptionID,
|
||||
stripeAccount.StripeSubItemWorkspacesID,
|
||||
stripeAccount.PlanVersion,
|
||||
stripeAccount.SubscriptionState,
|
||||
)
|
||||
}
|
||||
haystack := strings.ToLower(strings.Join(parts, " "))
|
||||
for _, matcher := range matchers {
|
||||
matcher = strings.ToLower(strings.TrimSpace(matcher))
|
||||
if matcher == "" {
|
||||
continue
|
||||
}
|
||||
if strings.Contains(haystack, matcher) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
|
|
|||
|
|
@ -175,7 +175,7 @@ func LoadConfig() (*CPConfig, error) {
|
|||
StorageMinDockerAvailableBytes: storageMinDockerAvailable,
|
||||
StorageMaxDockerBuildCacheBytes: storageMaxDockerBuildCache,
|
||||
ProofTenantMaxAge: proofTenantMaxAge,
|
||||
ProofTenantMatchers: parseCSVEnv("CP_PROOF_TENANT_MATCHERS", "proof,canary,rehearsal"),
|
||||
ProofTenantMatchers: parseCSVEnv("CP_PROOF_TENANT_MATCHERS", "proof,canary,rehearsal,msp_prod,ownerseed,owner_seed"),
|
||||
StripeWebhookSecret: strings.TrimSpace(os.Getenv("STRIPE_WEBHOOK_SECRET")),
|
||||
StripeAPIKey: strings.TrimSpace(os.Getenv("STRIPE_API_KEY")),
|
||||
PublicCloudSignupEnabled: envOrDefaultBool("CP_PUBLIC_CLOUD_SIGNUP_ENABLED", false),
|
||||
|
|
|
|||
|
|
@ -165,6 +165,9 @@ func TestLoadConfig_EnablesStorageGuardrailsByDefaultInProduction(t *testing.T)
|
|||
if cfg.StorageDockerPath != "/var/lib/docker" {
|
||||
t.Fatalf("StorageDockerPath = %q, want /var/lib/docker", cfg.StorageDockerPath)
|
||||
}
|
||||
if got := strings.Join(cfg.ProofTenantMatchers, ","); got != "proof,canary,rehearsal,msp_prod,ownerseed,owner_seed" {
|
||||
t.Fatalf("ProofTenantMatchers = %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadConfig_InvalidStorageByteSize(t *testing.T) {
|
||||
|
|
|
|||
|
|
@ -1251,6 +1251,34 @@ func (r *TenantRegistry) GetHostedEntitlementByTrialRequestID(requestID string)
|
|||
return loadHostedEntitlement(row)
|
||||
}
|
||||
|
||||
// ListHostedEntitlements returns all hosted entitlement authority rows.
|
||||
func (r *TenantRegistry) ListHostedEntitlements() ([]*HostedEntitlement, error) {
|
||||
rows, err := r.db.Query(`
|
||||
SELECT id, kind, tenant_id, trial_request_id, org_id, email, return_url, instance_token, instance_host,
|
||||
trial_started_at, refresh_token, activation_token, issued_at, activation_issued_at, last_refreshed_at, redeemed_at, revoked_at
|
||||
FROM hosted_entitlements
|
||||
ORDER BY issued_at DESC`)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("list hosted entitlements: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var out []*HostedEntitlement
|
||||
for rows.Next() {
|
||||
rec, scanErr := loadHostedEntitlement(rows)
|
||||
if scanErr != nil {
|
||||
return nil, scanErr
|
||||
}
|
||||
if rec != nil {
|
||||
out = append(out, rec)
|
||||
}
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, fmt.Errorf("iterate hosted entitlements: %w", err)
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// MarkHostedEntitlementRefreshed records the last successful hosted entitlement refresh time.
|
||||
func (r *TenantRegistry) MarkHostedEntitlementRefreshed(id string, refreshedAt time.Time) error {
|
||||
id = strings.TrimSpace(id)
|
||||
|
|
|
|||
|
|
@ -1102,6 +1102,17 @@ func TestHostedEntitlementLookupAndIssue(t *testing.T) {
|
|||
if stored != "etr_paid_three" {
|
||||
t.Fatalf("stored token after revoke = %q, want %q", stored, "etr_paid_three")
|
||||
}
|
||||
|
||||
listed, err := reg.ListHostedEntitlements()
|
||||
if err != nil {
|
||||
t.Fatalf("ListHostedEntitlements: %v", err)
|
||||
}
|
||||
if len(listed) != 1 {
|
||||
t.Fatalf("len(ListHostedEntitlements) = %d, want 1", len(listed))
|
||||
}
|
||||
if listed[0].ID != paidHostedEntitlementID(tenant.ID) || listed[0].RefreshToken != "etr_paid_three" {
|
||||
t.Fatalf("listed entitlement = %#v, want current paid entitlement", listed[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestTenantRegistryCanonicalizesTenantPlanVersion(t *testing.T) {
|
||||
|
|
|
|||
|
|
@ -119,3 +119,45 @@ func TestFindStaleProofTenantsUsesConfiguredMatchersAndAge(t *testing.T) {
|
|||
t.Fatalf("stale[0].TenantID = %q, want t-OLDPROOF", stale[0].TenantID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindOrphanPaidHostedEntitlementsFlagsMissingTenants(t *testing.T) {
|
||||
issuedAt := time.Date(2026, 4, 24, 12, 0, 0, 0, time.UTC)
|
||||
entitlements := []*registry.HostedEntitlement{
|
||||
{ID: "paid:t-ACTIVE", Kind: registry.HostedEntitlementKindPaid, TenantID: "t-ACTIVE", IssuedAt: issuedAt},
|
||||
{ID: "paid:t-MISSING", Kind: registry.HostedEntitlementKindPaid, TenantID: "t-MISSING", IssuedAt: issuedAt.Add(-time.Hour)},
|
||||
{ID: "trial:req", Kind: registry.HostedEntitlementKindTrial, TenantID: "", IssuedAt: issuedAt},
|
||||
}
|
||||
|
||||
orphaned := findOrphanPaidHostedEntitlements(entitlements, map[string]struct{}{
|
||||
"t-ACTIVE": {},
|
||||
})
|
||||
if len(orphaned) != 1 {
|
||||
t.Fatalf("len(orphaned) = %d, want 1 (%v)", len(orphaned), orphaned)
|
||||
}
|
||||
if orphaned[0].EntitlementID != "paid:t-MISSING" {
|
||||
t.Fatalf("orphaned[0].EntitlementID = %q, want paid:t-MISSING", orphaned[0].EntitlementID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindStaleProofAccountsUsesAccountAndStripeMatchers(t *testing.T) {
|
||||
now := time.Date(2026, 4, 24, 12, 0, 0, 0, time.UTC)
|
||||
old := now.Add(-48 * time.Hour)
|
||||
fresh := now.Add(-1 * time.Hour)
|
||||
accounts := []*registry.Account{
|
||||
{ID: "a_rehearsal_old", Kind: registry.AccountKindMSP, DisplayName: "Production Rehearsal", CreatedAt: old},
|
||||
{ID: "a_customer", Kind: registry.AccountKindIndividual, DisplayName: "Customer", CreatedAt: old},
|
||||
{ID: "a_stripe_old", Kind: registry.AccountKindMSP, DisplayName: "Pulse", CreatedAt: old},
|
||||
{ID: "a_rehearsal_fresh", Kind: registry.AccountKindMSP, DisplayName: "Canary", CreatedAt: fresh},
|
||||
}
|
||||
stripeAccounts := []*registry.StripeAccount{
|
||||
{AccountID: "a_stripe_old", StripeCustomerID: "cus_msp_rehearsal_123", PlanVersion: "msp_starter"},
|
||||
}
|
||||
|
||||
stale := findStaleProofAccounts(accounts, stripeAccounts, []string{"canary", "rehearsal"}, 24*time.Hour, now)
|
||||
if len(stale) != 2 {
|
||||
t.Fatalf("len(stale) = %d, want 2 (%v)", len(stale), stale)
|
||||
}
|
||||
if stale[0].AccountID != "a_rehearsal_old" || stale[1].AccountID != "a_stripe_old" {
|
||||
t.Fatalf("stale account order = %#v, want rehearsal then stripe", stale)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -408,7 +408,8 @@ func TestTenantRuntimeRollout_AdmissionFailureStopsMissingRuntimeRestore(t *test
|
|||
tenant := ®istry.Tenant{ID: "t-ADMIT02", ContainerID: "removed-container"}
|
||||
reg := &fakeTenantRuntimeRolloutRegistry{tenant: tenant}
|
||||
docker := newFakeTenantRuntimeRolloutDocker()
|
||||
service := newTestTenantRuntimeRolloutService(reg, docker, &fakeTenantRuntimeRolloutSynchronizer{}, newFakeTenantRuntimeRolloutClock())
|
||||
sync := &fakeTenantRuntimeRolloutSynchronizer{}
|
||||
service := newTestTenantRuntimeRolloutService(reg, docker, sync, newFakeTenantRuntimeRolloutClock())
|
||||
service.admissionCheck = func(context.Context) error {
|
||||
return errors.New("storage pressure")
|
||||
}
|
||||
|
|
@ -423,6 +424,9 @@ func TestTenantRuntimeRollout_AdmissionFailureStopsMissingRuntimeRestore(t *test
|
|||
if len(docker.createCalls) != 0 {
|
||||
t.Fatalf("create call count = %d, want 0", len(docker.createCalls))
|
||||
}
|
||||
if len(sync.restores) != 0 {
|
||||
t.Fatalf("restore count = %d, want 0", len(sync.restores))
|
||||
}
|
||||
}
|
||||
|
||||
func TestTenantRuntimeContractReconcilePlan_AllTenantsClassifiesNoopRolloutAndSkip(t *testing.T) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue