Pulse/internal/dockeragent/container_update.go
2026-03-27 23:50:41 +00:00

322 lines
13 KiB
Go

package dockeragent
import (
"context"
"fmt"
"io"
"strings"
"time"
"github.com/moby/moby/api/types/network"
agentsdocker "github.com/rcourtman/pulse-go-rewrite/pkg/agents/docker"
)
// ContainerUpdateResult captures the outcome of a container update operation.
type ContainerUpdateResult struct {
Success bool `json:"success"`
ContainerID string `json:"containerId"`
OldContainerID string `json:"oldContainerId,omitempty"`
NewContainerID string `json:"newContainerId,omitempty"`
ContainerName string `json:"containerName"`
OldImageDigest string `json:"oldImageDigest,omitempty"`
NewImageDigest string `json:"newImageDigest,omitempty"`
BackupCreated bool `json:"backupCreated"`
BackupContainer string `json:"backupContainer,omitempty"`
Error string `json:"error,omitempty"`
}
// handleUpdateContainerCommand handles the update_container command from Pulse.
func (a *Agent) handleUpdateContainerCommand(ctx context.Context, target TargetConfig, command agentsdocker.Command) error {
containerID, ok := command.Payload["containerId"].(string)
if !ok || containerID == "" {
a.logger.Error().Msg("Update command missing containerId in payload")
if err := a.sendCommandAck(ctx, target, command.ID, agentsdocker.CommandStatusFailed, "Missing containerId in payload"); err != nil {
a.logger.Error().Err(err).Msg("Failed to send failure acknowledgement")
}
return nil
}
a.logger.Info().
Str("commandID", command.ID).
Str("containerId", containerID).
Msg("Received update_container command from Pulse")
// Send acknowledgement that we're starting the update
if err := a.sendCommandAck(ctx, target, command.ID, agentsdocker.CommandStatusAcknowledged, "Starting container update"); err != nil {
a.logger.Error().Err(err).Msg("Failed to send acknowledgement to Pulse")
return nil
}
// Create a progress callback to send step updates to Pulse
progressFn := func(step string) {
// Send progress update (using "in_progress" status with step message)
if err := a.sendCommandAck(ctx, target, command.ID, agentsdocker.CommandStatusInProgress, step); err != nil {
a.logger.Warn().Err(err).Str("step", step).Msg("Failed to send progress update")
}
}
// Perform the update with progress tracking
result := a.updateContainerWithProgress(ctx, containerID, progressFn)
// Send completion status
status := agentsdocker.CommandStatusCompleted
message := fmt.Sprintf("Container %s updated successfully", result.ContainerName)
if !result.Success {
status = agentsdocker.CommandStatusFailed
message = result.Error
}
var payload map[string]any
if result.Success && result.OldContainerID != "" && result.NewContainerID != "" && result.OldContainerID != result.NewContainerID {
// Provide a stable mapping so the server can migrate persisted metadata (custom URLs, notes, tags)
// from the old container runtime ID to the new one.
payload = map[string]any{
"oldContainerId": result.OldContainerID,
"newContainerId": result.NewContainerID,
}
}
var err error
if payload != nil {
err = a.sendCommandAckWithPayload(ctx, target, command.ID, status, message, payload)
} else {
err = a.sendCommandAck(ctx, target, command.ID, status, message)
}
if err != nil {
a.logger.Error().Err(err).Msg("Failed to send completion acknowledgement to Pulse")
}
return nil
}
// updateContainerWithProgress performs the actual container update operation with progress reporting.
// This is the core logic that:
// 1. Inspects the current container configuration
// 2. Pulls the latest image
// 3. Stops and renames the old container (for backup)
// 4. Creates a new container with the same config
// 5. Starts the new container
// 6. Cleans up on success or rolls back on failure
//
// The progressFn callback is called at each step to report progress to Pulse.
func (a *Agent) updateContainerWithProgress(ctx context.Context, containerID string, progressFn func(step string)) ContainerUpdateResult {
result := ContainerUpdateResult{
ContainerID: containerID,
OldContainerID: containerID,
}
// Helper to report progress (handles nil progressFn)
reportProgress := func(step string) {
if progressFn != nil {
progressFn(step)
}
}
// 1. Inspect the current container to get its full configuration
inspect, err := a.docker.ContainerInspect(ctx, containerID)
if err != nil {
result.Error = fmt.Sprintf("Failed to inspect container: %v", err)
a.logger.Error().Err(err).Str("containerId", containerID).Msg("Failed to inspect container for update")
return result
}
result.ContainerName = strings.TrimPrefix(inspect.Name, "/")
result.OldImageDigest = inspect.Image
// Reject updates for backup containers (created during previous updates)
if strings.Contains(result.ContainerName, "_pulse_backup_") {
result.Error = "Cannot update backup containers - these are temporary and should be cleaned up"
a.logger.Warn().
Str("container", result.ContainerName).
Msg("Rejecting update request for backup container")
return result
}
a.logger.Info().
Str("container", result.ContainerName).
Str("image", inspect.Config.Image).
Msg("Starting container update")
// 2. Pull the latest image
imageName := inspect.Config.Image
reportProgress(fmt.Sprintf("Pulling image %s...", imageName))
a.logger.Info().Str("image", imageName).Msg("Pulling latest image")
pullResp, err := a.docker.ImagePull(ctx, imageName, imagePullOptions{})
if err != nil {
result.Error = fmt.Sprintf("Failed to pull image %s: %v", imageName, err)
a.logger.Error().Err(err).Str("image", imageName).Msg("Failed to pull latest image")
return result
}
// Consume the pull response to ensure the pull completes
_, _ = io.Copy(io.Discard, pullResp)
pullResp.Close()
a.logger.Info().Str("image", imageName).Msg("Successfully pulled latest image")
// 3. Stop the current container
reportProgress(fmt.Sprintf("Stopping container %s...", result.ContainerName))
stopTimeout := 30 // seconds
if err := a.docker.ContainerStop(ctx, containerID, containerStopOptions{Timeout: &stopTimeout}); err != nil {
result.Error = fmt.Sprintf("Failed to stop container: %v", err)
a.logger.Error().Err(err).Str("container", result.ContainerName).Msg("Failed to stop container")
return result
}
a.logger.Info().Str("container", result.ContainerName).Msg("Container stopped")
// 4. Rename the old container for backup
backupName := result.ContainerName + "_pulse_backup_" + nowFn().Format("20060102_150405")
if err := a.docker.ContainerRename(ctx, containerID, backupName); err != nil {
result.Error = fmt.Sprintf("Failed to rename container for backup: %v", err)
a.logger.Error().Err(err).Str("container", result.ContainerName).Msg("Failed to rename container for backup")
// Try to restart the original container
_ = a.docker.ContainerStart(ctx, containerID, containerStartOptions{})
return result
}
result.BackupCreated = true
result.BackupContainer = backupName
a.logger.Info().Str("backup", backupName).Msg("Container renamed for backup")
reportProgress(fmt.Sprintf("Creating new container %s...", result.ContainerName))
// 5. Prepare network configuration
// We need to handle network settings carefully
var networkingConfig *network.NetworkingConfig
if len(inspect.NetworkSettings.Networks) > 0 {
networkingConfig = &network.NetworkingConfig{
EndpointsConfig: make(map[string]*network.EndpointSettings),
}
// Only set the first network here; we'll connect to others after creation
for netName, netConfig := range inspect.NetworkSettings.Networks {
networkingConfig.EndpointsConfig[netName] = &network.EndpointSettings{
Aliases: netConfig.Aliases,
IPAMConfig: netConfig.IPAMConfig,
Links: netConfig.Links,
NetworkID: netConfig.NetworkID,
MacAddress: netConfig.MacAddress,
DriverOpts: netConfig.DriverOpts,
}
break // Only set one network during creation
}
}
// 6. Create a new container with the same configuration
createResp, err := a.docker.ContainerCreate(
ctx,
inspect.Config,
inspect.HostConfig,
networkingConfig,
nil, // Platform
result.ContainerName,
)
if err != nil {
result.Error = fmt.Sprintf("Failed to create new container: %v", err)
a.logger.Error().Err(err).Str("container", result.ContainerName).Msg("Failed to create new container")
// Rollback: rename backup back to original name
_ = a.docker.ContainerRename(ctx, backupName, result.ContainerName)
_ = a.docker.ContainerStart(ctx, containerID, containerStartOptions{})
return result
}
newContainerID := createResp.ID
result.NewContainerID = newContainerID
// After a successful update, the resulting "current" container ID is the new container.
result.ContainerID = newContainerID
a.logger.Info().Str("newContainerId", newContainerID).Msg("New container created")
// 7. Connect to additional networks (if more than one)
networkCount := 0
for netName, netConfig := range inspect.NetworkSettings.Networks {
networkCount++
if networkCount == 1 {
continue // Skip the first one, already connected during creation
}
endpointConfig := &network.EndpointSettings{
Aliases: netConfig.Aliases,
IPAMConfig: netConfig.IPAMConfig,
Links: netConfig.Links,
MacAddress: netConfig.MacAddress,
DriverOpts: netConfig.DriverOpts,
}
if err := a.docker.NetworkConnect(ctx, netName, newContainerID, endpointConfig); err != nil {
a.logger.Warn().Err(err).Str("network", netName).Msg("Failed to connect to network, continuing anyway")
}
}
// 8. Start the new container
reportProgress(fmt.Sprintf("Starting container %s...", result.ContainerName))
if err := a.docker.ContainerStart(ctx, newContainerID, containerStartOptions{}); err != nil {
result.Error = fmt.Sprintf("Failed to start new container: %v", err)
a.logger.Error().Err(err).Str("container", result.ContainerName).Msg("Failed to start new container")
// Rollback: remove new container, rename backup back
_ = a.docker.ContainerRemove(ctx, newContainerID, containerRemoveOptions{Force: true})
_ = a.docker.ContainerRename(ctx, backupName, result.ContainerName)
_ = a.docker.ContainerStart(ctx, containerID, containerStartOptions{})
return result
}
a.logger.Info().Str("container", result.ContainerName).Msg("New container started, verifying stability...")
// 9. Verify container stability
reportProgress("Verifying container stability...")
// Wait a few seconds to ensure it doesn't crash immediately
sleepFn(5 * time.Second)
verifyInspect, err := a.docker.ContainerInspect(ctx, newContainerID)
if err != nil {
result.Error = fmt.Sprintf("Failed to inspect new container during verification: %v", err)
a.logger.Error().Err(err).Str("container", result.ContainerName).Msg("Failed to verify container stability")
// Rollback
_ = a.docker.ContainerRemove(ctx, newContainerID, containerRemoveOptions{Force: true})
_ = a.docker.ContainerRename(ctx, backupName, result.ContainerName)
_ = a.docker.ContainerStart(ctx, containerID, containerStartOptions{})
return result
}
// Check if running
if !verifyInspect.State.Running {
result.Error = fmt.Sprintf("New container crashed immediately (exit code %d): %s", verifyInspect.State.ExitCode, verifyInspect.State.Error)
a.logger.Error().Str("container", result.ContainerName).Int("exitCode", verifyInspect.State.ExitCode).Msg("New container crashed, rolling back")
// Rollback
_ = a.docker.ContainerRemove(ctx, newContainerID, containerRemoveOptions{Force: true})
_ = a.docker.ContainerRename(ctx, backupName, result.ContainerName)
_ = a.docker.ContainerStart(ctx, containerID, containerStartOptions{})
return result
}
// Check health if available
if verifyInspect.State.Health != nil && verifyInspect.State.Health.Status == "unhealthy" {
result.Error = "New container reported unhealthy status"
a.logger.Error().Str("container", result.ContainerName).Msg("New container unhealthy, rolling back")
// Rollback
_ = a.docker.ContainerRemove(ctx, newContainerID, containerRemoveOptions{Force: true})
_ = a.docker.ContainerRename(ctx, backupName, result.ContainerName)
_ = a.docker.ContainerStart(ctx, containerID, containerStartOptions{})
return result
}
result.NewImageDigest = verifyInspect.Image
// 10. Schedule cleanup of backup container after a delay
// This gives time to verify the new container is working
go func() {
sleepFn(5 * time.Minute)
cleanupCtx := context.Background()
if err := a.docker.ContainerRemove(cleanupCtx, backupName, containerRemoveOptions{Force: true}); err != nil {
a.logger.Warn().Err(err).Str("backup", backupName).Msg("Failed to cleanup backup container")
} else {
a.logger.Info().Str("backup", backupName).Msg("Backup container cleaned up")
}
}()
result.Success = true
a.logger.Info().
Str("container", result.ContainerName).
Str("oldDigest", result.OldImageDigest[:12]).
Str("newDigest", result.NewImageDigest[:12]).
Msg("Container update completed successfully")
return result
}