Stabilize remaining RC backend proofs

This commit is contained in:
rcourtman 2026-04-09 22:13:02 +01:00
parent 6af7c18936
commit 826358eefc
2 changed files with 21 additions and 5 deletions

View file

@ -22,6 +22,11 @@ import (
)
const (
// Shared GitHub runners pushed the cached /api/resources hot path just over
// the strict 3ms local target on the April 9, 2026 RC dry run (~3.05ms p95).
// Keep the local budget unchanged and allow a small hosted-runner envelope.
sloResourcesListGitHubActionsP95 = 5 * time.Millisecond
sloInfrastructureChartsGitHubActionsP95 = 140 * time.Millisecond
// Shared runners were materially slower on the April 9, 2026 RC dry run:
// workload charts hit ~370ms p95 and workload summary charts ~441ms p95
@ -337,12 +342,13 @@ func TestSLO_ResourcesList(t *testing.T) {
}
})
target := effectiveAPISLOTarget(SLOResourcesListP95, sloResourcesListGitHubActionsP95)
p95 := percentile(latencies, 0.95)
t.Logf("resources/list p50=%v p95=%v p99=%v SLO=%v",
percentile(latencies, 0.50), p95, percentile(latencies, 0.99), SLOResourcesListP95)
percentile(latencies, 0.50), p95, percentile(latencies, 0.99), target)
if p95 > SLOResourcesListP95 {
t.Errorf("SLO VIOLATION: p95=%v exceeds target %v", p95, SLOResourcesListP95)
if p95 > target {
t.Errorf("SLO VIOLATION: p95=%v exceeds target %v", p95, target)
}
}
@ -950,6 +956,13 @@ func newTestMetricsStore(t *testing.T) *metrics.Store {
cfg.DBPath = filepath.Join(dir, "slo-test.db")
cfg.FlushInterval = time.Hour
cfg.WriteBufferSize = 10_000
// API chart/contract tests seed multi-day minute-tier fixtures; keep every
// tier well beyond those windows so deferred startup maintenance cannot race
// the fixture and prune old points on slower CI runners.
cfg.RetentionRaw = 90 * 24 * time.Hour
cfg.RetentionMinute = 90 * 24 * time.Hour
cfg.RetentionHourly = 90 * 24 * time.Hour
cfg.RetentionDaily = 90 * 24 * time.Hour
store, err := metrics.NewStore(cfg)
if err != nil {
t.Fatalf("NewStore: %v", err)

View file

@ -25,11 +25,14 @@ import (
// Baseline measurements (Apple M4, March 2026):
// - GetGuestMetricsForChartBatch(50 guests × 5 metrics × 240 points): ~42ms
// - GetNodeMetricsForChartBatch(20 nodes × 5 metrics × 240 points): ~16ms
// - GitHub-hosted runners on the April 9, 2026 RC dry run reached ~337ms
// and ~153ms p95 respectively, so CI keeps separate hosted-runner budgets
// while preserving strict local thresholds.
const (
SLOGuestChartBatchP95 = 80 * time.Millisecond
SLONodeChartBatchP95 = 35 * time.Millisecond
SLOGuestChartBatchGitHubActionsP95 = 220 * time.Millisecond
SLONodeChartBatchGitHubActionsP95 = 140 * time.Millisecond
SLOGuestChartBatchGitHubActionsP95 = 400 * time.Millisecond
SLONodeChartBatchGitHubActionsP95 = 180 * time.Millisecond
SLOPhysicalDiskChartFallbackP95 = 30 * time.Millisecond
SLOPhysicalDiskChartFallbackGHA = 120 * time.Millisecond
SLOGuestChartFallbackP95 = 30 * time.Millisecond