diff --git a/internal/ai/findings_update_safety.go b/internal/ai/findings_update_safety.go index 5e43401aa..7269c6bd5 100644 --- a/internal/ai/findings_update_safety.go +++ b/internal/ai/findings_update_safety.go @@ -105,13 +105,13 @@ func (w *UpdateSafetyWatcher) Observe(hosts []models.DockerHost, now time.Time) continue } // Digest changed -- transition to state B. - snap.priorDigest = snap.digest - snap.changeDigest = c.ImageDigest - snap.baseRestarts = snap.restartCount + snap.priorDigest = snap.digest + snap.changeDigest = c.ImageDigest + snap.baseRestarts = snap.restartCount snap.lastEmittedRestarts = snap.restartCount - snap.detectedAt = now - snap.digest = c.ImageDigest - snap.restartCount = c.RestartCount + snap.detectedAt = now + snap.digest = c.ImageDigest + snap.restartCount = c.RestartCount severity := FindingSeverityInfo if c.RestartCount > snap.baseRestarts { @@ -123,7 +123,28 @@ func (w *UpdateSafetyWatcher) Observe(hosts []models.DockerHost, now time.Time) } // State B: change already detected, verifying stability. - snap.digest = c.ImageDigest + if c.ImageDigest != snap.digest { + // Another image change landed before the prior verification + // window completed. Treat it as a fresh update and restart the + // window instead of resolving against stale evidence. + snap.priorDigest = snap.digest + snap.changeDigest = c.ImageDigest + snap.baseRestarts = snap.restartCount + snap.lastEmittedRestarts = snap.restartCount + snap.detectedAt = now + snap.digest = c.ImageDigest + snap.restartCount = c.RestartCount + + severity := FindingSeverityInfo + if c.RestartCount > snap.baseRestarts { + severity = FindingSeverityWarning + snap.lastEmittedRestarts = c.RestartCount + } + emit = append(emit, buildUpdateSafetyFinding(key, host, c, snap, severity, now, now)) + continue + } + + snap.digest = c.ImageDigest snap.restartCount = c.RestartCount restartsAfterChange := c.RestartCount - snap.baseRestarts @@ -138,10 +159,10 @@ func (w *UpdateSafetyWatcher) Observe(hosts []models.DockerHost, now time.Time) // Stable for the full window -- emit resolve sentinel and reset. dedupKey := UpdateSafetyFindingPrefix + ":" + key resolve = append(resolve, resolveSentinel{DedupKey: dedupKey, Reason: updateSafetyResolveReason}) - snap.detectedAt = time.Time{} - snap.priorDigest = "" - snap.changeDigest = "" - snap.baseRestarts = 0 + snap.detectedAt = time.Time{} + snap.priorDigest = "" + snap.changeDigest = "" + snap.baseRestarts = 0 snap.lastEmittedRestarts = 0 } // Otherwise: still in window, no new restarts -- do nothing. diff --git a/internal/ai/findings_update_safety_test.go b/internal/ai/findings_update_safety_test.go index 56885c71c..9cf93bfa6 100644 --- a/internal/ai/findings_update_safety_test.go +++ b/internal/ai/findings_update_safety_test.go @@ -138,6 +138,43 @@ func TestUpdateSafety_StableWindowEmitsResolveSentinel(t *testing.T) { } } +// TestUpdateSafety_SecondDigestChangeResetsVerificationWindow verifies that a +// second image update during the verification window restarts the window and +// updates the existing finding evidence rather than resolving the old update. +func TestUpdateSafety_SecondDigestChangeResetsVerificationWindow(t *testing.T) { + w := newUpdateSafetyWatcher() + t0 := time.Now() + + w.Observe([]models.DockerHost{makeHost("h1", "c1", "sha256:aaa", 0)}, t0) + + emit1, resolve1 := w.Observe([]models.DockerHost{makeHost("h1", "c1", "sha256:bbb", 0)}, t0.Add(5*time.Second)) + if len(emit1) != 1 || len(resolve1) != 0 { + t.Fatalf("first update: want 1 emit and 0 resolves, got emit=%d resolve=%d", len(emit1), len(resolve1)) + } + + secondUpdateAt := t0.Add(20 * time.Second) + emit2, resolve2 := w.Observe([]models.DockerHost{makeHost("h1", "c1", "sha256:ccc", 0)}, secondUpdateAt) + if len(emit2) != 1 || len(resolve2) != 0 { + t.Fatalf("second update: want 1 emit and 0 resolves, got emit=%d resolve=%d", len(emit2), len(resolve2)) + } + if emit2[0].Evidence != "prior_digest=sha256:bbb new_digest=sha256:ccc restart_count=0" { + t.Fatalf("second update evidence = %q", emit2[0].Evidence) + } + + // The first update's window has elapsed, but the second update's window has + // not. Resolving now would close against stale evidence. + oldWindowElapsed := t0.Add(5*time.Second + updateSafetyVerifyWindow + time.Second) + emit3, resolve3 := w.Observe([]models.DockerHost{makeHost("h1", "c1", "sha256:ccc", 0)}, oldWindowElapsed) + if len(emit3) != 0 || len(resolve3) != 0 { + t.Fatalf("old window elapsed: want silent, got emit=%d resolve=%d", len(emit3), len(resolve3)) + } + + emit4, resolve4 := w.Observe([]models.DockerHost{makeHost("h1", "c1", "sha256:ccc", 0)}, secondUpdateAt.Add(updateSafetyVerifyWindow+time.Second)) + if len(emit4) != 0 || len(resolve4) != 1 { + t.Fatalf("second window elapsed: want 0 emit and 1 resolve, got emit=%d resolve=%d", len(emit4), len(resolve4)) + } +} + // TestUpdateSafety_EmptyDigestEmitsNothing verifies that containers with an // empty ImageDigest (agent not yet reporting one) are silently skipped. func TestUpdateSafety_EmptyDigestEmitsNothing(t *testing.T) {