Pulse/internal/servicediscovery/store.go
2026-03-29 13:53:46 +01:00

1103 lines
31 KiB
Go

package servicediscovery
import (
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"sync"
"time"
"github.com/rcourtman/pulse-go-rewrite/internal/crypto"
"github.com/rcourtman/pulse-go-rewrite/internal/securityutil"
"github.com/rs/zerolog/log"
)
// CryptoManager interface for encryption/decryption.
type CryptoManager interface {
Encrypt(plaintext []byte) ([]byte, error)
Decrypt(ciphertext []byte) ([]byte, error)
}
// Store provides encrypted per-resource storage for discovery data.
type Store struct {
mu sync.RWMutex
dataDir string
crypto CryptoManager
cache map[string]*ResourceDiscovery // In-memory cache
cacheTime map[string]time.Time // Cache timestamps
cacheTTL time.Duration
// Fingerprint storage (in-memory with file persistence)
fingerprintDir string
fingerprints map[string]*ContainerFingerprint // resourceID -> fingerprint
fingerprintMu sync.RWMutex
lastFingerprintScan time.Time
}
func cloneResourceDiscovery(src *ResourceDiscovery) *ResourceDiscovery {
if src == nil {
return nil
}
cloned := *src
if src.Facts != nil {
cloned.Facts = append([]DiscoveryFact(nil), src.Facts...)
}
if src.ConfigPaths != nil {
cloned.ConfigPaths = append([]string(nil), src.ConfigPaths...)
}
if src.DataPaths != nil {
cloned.DataPaths = append([]string(nil), src.DataPaths...)
}
if src.LogPaths != nil {
cloned.LogPaths = append([]string(nil), src.LogPaths...)
}
if src.Ports != nil {
cloned.Ports = append([]PortInfo(nil), src.Ports...)
}
if src.DockerMounts != nil {
cloned.DockerMounts = append([]DockerBindMount(nil), src.DockerMounts...)
}
if src.UserSecrets != nil {
cloned.UserSecrets = make(map[string]string, len(src.UserSecrets))
for k, v := range src.UserSecrets {
cloned.UserSecrets[k] = v
}
}
if src.RawCommandOutput != nil {
cloned.RawCommandOutput = make(map[string]string, len(src.RawCommandOutput))
for k, v := range src.RawCommandOutput {
cloned.RawCommandOutput[k] = v
}
}
return &cloned
}
func cloneContainerFingerprint(src *ContainerFingerprint) *ContainerFingerprint {
if src == nil {
return nil
}
cloned := *src
if src.Ports != nil {
cloned.Ports = append([]string(nil), src.Ports...)
}
if src.MountPaths != nil {
cloned.MountPaths = append([]string(nil), src.MountPaths...)
}
if src.EnvKeys != nil {
cloned.EnvKeys = append([]string(nil), src.EnvKeys...)
}
return &cloned
}
func canonicalStoredResourceType(rt ResourceType) ResourceType {
return NormalizeResourceType(rt)
}
// normalizeResourceID keeps persisted resource IDs strict/canonical for v6.
func normalizeResourceID(id string) string {
return id
}
func canonicalStoredResourceID(id string) string {
return normalizeResourceID(id)
}
// normalizeDiscovery canonicalizes persisted fields on load.
func normalizeDiscovery(d *ResourceDiscovery) {
if d == nil {
return
}
d.ResourceType = canonicalStoredResourceType(d.ResourceType)
d.ID = canonicalStoredResourceID(d.ID)
if strings.TrimSpace(d.TargetID) == "" {
_, targetID, _, err := ParseResourceID(d.ID)
if err == nil {
d.TargetID = strings.TrimSpace(targetID)
}
}
if d.ResourceType == ResourceTypeAgent && strings.TrimSpace(d.AgentID) == "" {
d.AgentID = d.TargetID
}
}
func canonicalizeFingerprint(fp *ContainerFingerprint) {
if fp == nil {
return
}
fp.ResourceID = canonicalStoredResourceID(fp.ResourceID)
if strings.TrimSpace(fp.TargetID) == "" {
_, targetID, _, err := ParseResourceID(fp.ResourceID)
if err == nil {
fp.TargetID = strings.TrimSpace(targetID)
}
}
}
func unmarshalStoredDiscovery(data []byte, discovery *ResourceDiscovery) error {
if discovery == nil {
return fmt.Errorf("discovery output is required")
}
return json.Unmarshal(data, discovery)
}
func unmarshalStoredFingerprint(data []byte, fp *ContainerFingerprint) error {
if fp == nil {
return fmt.Errorf("fingerprint output is required")
}
return json.Unmarshal(data, fp)
}
// For testing - allows injecting a mock crypto manager
var newCryptoManagerAt = crypto.NewCryptoManagerAt
// For testing - allows injecting a mock marshaler.
var marshalDiscovery = json.Marshal
// File read limits for persisted discovery data.
// These are variables (not constants) so tests can temporarily override them.
var maxDiscoveryFileReadBytes int64 = 16 * 1024 * 1024 // 16 MiB
var maxFingerprintFileReadBytes int64 = 1 * 1024 * 1024 // 1 MiB
// NewStore creates a new discovery store with automatic encryption.
func NewStore(dataDir string) (*Store, error) {
trimmedDataDir := strings.TrimSpace(dataDir)
if trimmedDataDir == "" {
return nil, fmt.Errorf("discovery data directory is required")
}
normalizedDataDir := filepath.Clean(trimmedDataDir)
discoveryDir := filepath.Join(normalizedDataDir, "discovery")
if err := os.MkdirAll(discoveryDir, 0700); err != nil {
return nil, fmt.Errorf("failed to create discovery directory: %w", err)
}
// Create fingerprint subdirectory
fingerprintDir := filepath.Join(discoveryDir, "fingerprints")
if err := os.MkdirAll(fingerprintDir, 0700); err != nil {
return nil, fmt.Errorf("failed to create fingerprint directory: %w", err)
}
// Initialize crypto manager for encryption (uses same key as other Pulse secrets)
cryptoMgr, err := newCryptoManagerAt(normalizedDataDir)
if err != nil {
return nil, fmt.Errorf("failed to initialize crypto for discovery store: %w", err)
}
store := &Store{
dataDir: discoveryDir,
fingerprintDir: fingerprintDir,
crypto: cryptoMgr,
cache: make(map[string]*ResourceDiscovery),
cacheTime: make(map[string]time.Time),
cacheTTL: 5 * time.Minute,
fingerprints: make(map[string]*ContainerFingerprint),
}
// Load existing fingerprints from disk
store.loadFingerprints()
return store, nil
}
func (s *Store) discoveryPathForLeaf(leaf string) (string, error) {
return securityutil.JoinStorageLeaf(s.dataDir, leaf)
}
// getFilePath returns the file path for a resource ID.
func (s *Store) getFilePath(id string) string {
path, err := s.discoveryPathForLeaf(securityutil.HashedStorageName(id) + ".enc")
if err != nil {
panic(fmt.Sprintf("invalid hashed discovery storage leaf for %q: %v", id, err))
}
return path
}
func (s *Store) findLegacyDiscoveryPath(id string) (string, error) {
canonicalName := securityutil.HashedStorageName(id) + ".enc"
entries, err := os.ReadDir(s.dataDir)
if err != nil {
if os.IsNotExist(err) {
return "", nil
}
return "", fmt.Errorf("failed to scan discovery directory: %w", err)
}
for _, entry := range entries {
if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".enc") {
continue
}
if entry.Name() == canonicalName {
continue
}
path, err := s.discoveryPathForLeaf(entry.Name())
if err != nil {
log.Warn().Err(err).Str("file", entry.Name()).Msg("skipping invalid legacy discovery candidate")
continue
}
storedID, err := s.readDiscoveryIDFromPath(path)
if err != nil {
log.Warn().Err(err).Str("file", entry.Name()).Msg("failed to inspect legacy discovery candidate")
continue
}
if storedID == id {
return path, nil
}
}
return "", nil
}
// readRegularFileWithLimit reads a file with a strict size cap and rejects non-regular files.
func readRegularFileWithLimit(path string, maxBytes int64) ([]byte, error) {
info, err := os.Lstat(path)
if err != nil {
return nil, err
}
if !info.Mode().IsRegular() {
return nil, fmt.Errorf("not a regular file")
}
if maxBytes > 0 && info.Size() > maxBytes {
return nil, fmt.Errorf("file exceeds max size (%d bytes)", maxBytes)
}
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
openedInfo, err := f.Stat()
if err != nil {
return nil, err
}
if !openedInfo.Mode().IsRegular() {
return nil, fmt.Errorf("not a regular file")
}
if maxBytes > 0 && openedInfo.Size() > maxBytes {
return nil, fmt.Errorf("file exceeds max size (%d bytes)", maxBytes)
}
if maxBytes <= 0 {
return io.ReadAll(f)
}
data, err := io.ReadAll(io.LimitReader(f, maxBytes+1))
if err != nil {
return nil, err
}
if int64(len(data)) > maxBytes {
return nil, fmt.Errorf("file exceeds max size (%d bytes)", maxBytes)
}
return data, nil
}
func (s *Store) persistDiscoveryBytes(filePath string, data []byte) error {
tmpPath := filePath + ".tmp"
if err := os.WriteFile(tmpPath, data, 0600); err != nil {
return fmt.Errorf("failed to write discovery file: %w", err)
}
if err := os.Rename(tmpPath, filePath); err != nil {
if cleanupErr := os.Remove(tmpPath); cleanupErr != nil && !os.IsNotExist(cleanupErr) {
log.Warn().Err(cleanupErr).Str("tmp_path", tmpPath).Msg("Failed to remove temp discovery file after rename failure")
}
return fmt.Errorf("failed to finalize discovery file: %w", err)
}
return nil
}
func (s *Store) loadDiscoveryFileData(filePath string, maxBytes int64) ([]byte, bool, error) {
data, err := readRegularFileWithLimit(filePath, maxBytes)
if err != nil {
return nil, false, err
}
migratedPlaintext := false
if s.crypto != nil {
if decrypted, decErr := s.crypto.Decrypt(data); decErr == nil {
data = decrypted
} else {
migratedPlaintext = true
}
}
return data, migratedPlaintext, nil
}
func (s *Store) maybeRewritePlaintextDiscovery(filePath string, data []byte, migratedPlaintext bool) error {
if !migratedPlaintext || s.crypto == nil {
return nil
}
encrypted, err := s.crypto.Encrypt(data)
if err != nil {
return fmt.Errorf("failed to encrypt migrated discovery: %w", err)
}
if err := s.persistDiscoveryBytes(filePath, encrypted); err != nil {
return err
}
return nil
}
func (s *Store) marshalDiscoveryForStorage(discovery *ResourceDiscovery) ([]byte, error) {
data, err := marshalDiscovery(discovery)
if err != nil {
return nil, fmt.Errorf("failed to marshal discovery: %w", err)
}
if s.crypto != nil {
encrypted, err := s.crypto.Encrypt(data)
if err != nil {
return nil, fmt.Errorf("failed to encrypt discovery: %w", err)
}
data = encrypted
}
return data, nil
}
// Save persists a discovery to encrypted storage.
func (s *Store) Save(d *ResourceDiscovery) error {
s.mu.Lock()
defer s.mu.Unlock()
if d.ID == "" {
return fmt.Errorf("discovery ID is required")
}
// Update timestamp
d.UpdatedAt = time.Now()
if d.DiscoveredAt.IsZero() {
d.DiscoveredAt = d.UpdatedAt
}
// Persist/cache a defensive copy so callers cannot mutate shared state after Save.
toSave := cloneResourceDiscovery(d)
normalizeDiscovery(toSave)
data, err := s.marshalDiscoveryForStorage(toSave)
if err != nil {
return err
}
filePath := s.getFilePath(toSave.ID)
if err := s.persistDiscoveryBytes(filePath, data); err != nil {
return err
}
if legacyPath, err := s.findLegacyDiscoveryPath(toSave.ID); err == nil && legacyPath != "" && legacyPath != filePath {
_ = os.Remove(legacyPath)
}
// Update cache
s.cache[toSave.ID] = toSave
s.cacheTime[toSave.ID] = time.Now()
log.Debug().Str("id", toSave.ID).Str("service", toSave.ServiceType).Msg("discovery saved")
return nil
}
// Get retrieves a discovery from storage.
func (s *Store) Get(id string) (*ResourceDiscovery, error) {
s.mu.RLock()
// Check cache first
if cached, ok := s.cache[id]; ok {
if cacheTime, hasTime := s.cacheTime[id]; hasTime {
if time.Since(cacheTime) < s.cacheTTL {
s.mu.RUnlock()
return cloneResourceDiscovery(cached), nil
}
}
}
s.mu.RUnlock()
s.mu.Lock()
defer s.mu.Unlock()
filePath := s.getFilePath(id)
activePath := filePath
data, migratedPlaintext, err := s.loadDiscoveryFileData(filePath, maxDiscoveryFileReadBytes)
if err != nil {
if os.IsNotExist(err) {
activePath, err = s.findLegacyDiscoveryPath(id)
if err != nil {
return nil, err
}
if activePath == "" {
return nil, nil // Not found is not an error
}
data, migratedPlaintext, err = s.loadDiscoveryFileData(activePath, maxDiscoveryFileReadBytes)
}
if err != nil {
return nil, fmt.Errorf("failed to read discovery file: %w", err)
}
}
var discovery ResourceDiscovery
if err := unmarshalStoredDiscovery(data, &discovery); err != nil {
return nil, fmt.Errorf("failed to unmarshal discovery: %w", err)
}
if err := s.maybeRewritePlaintextDiscovery(activePath, data, migratedPlaintext); err != nil {
return nil, err
}
// Canonicalize stored fields loaded from disk.
normalizeDiscovery(&discovery)
if activePath != filePath {
rewritten, err := s.marshalDiscoveryForStorage(&discovery)
if err != nil {
return nil, err
}
if err := s.persistDiscoveryBytes(filePath, rewritten); err != nil {
return nil, err
}
_ = os.Remove(activePath)
}
// Update cache
s.cache[id] = cloneResourceDiscovery(&discovery)
s.cacheTime[id] = time.Now()
return cloneResourceDiscovery(&discovery), nil
}
// GetByResource retrieves a discovery by resource type and ID.
func (s *Store) GetByResource(resourceType ResourceType, targetID, resourceID string) (*ResourceDiscovery, error) {
id := MakeResourceID(resourceType, targetID, resourceID)
return s.Get(id)
}
// Delete removes a discovery from storage.
func (s *Store) Delete(id string) error {
s.mu.Lock()
defer s.mu.Unlock()
filePath := s.getFilePath(id)
var removed bool
if err := os.Remove(filePath); err != nil {
if !os.IsNotExist(err) {
return fmt.Errorf("failed to delete discovery file: %w", err)
}
} else {
removed = true
}
if legacyPath, err := s.findLegacyDiscoveryPath(id); err == nil && legacyPath != "" && legacyPath != filePath {
if err := os.Remove(legacyPath); err != nil {
if !os.IsNotExist(err) {
return fmt.Errorf("failed to delete legacy discovery file: %w", err)
}
} else {
removed = true
}
}
if !removed {
return nil // Already deleted
}
// Remove from cache
delete(s.cache, id)
delete(s.cacheTime, id)
log.Debug().Str("id", id).Msg("discovery deleted")
return nil
}
// List returns all discoveries.
func (s *Store) List() ([]*ResourceDiscovery, error) {
s.mu.RLock()
defer s.mu.RUnlock()
entries, err := os.ReadDir(s.dataDir)
if err != nil {
if os.IsNotExist(err) {
return []*ResourceDiscovery{}, nil
}
return nil, fmt.Errorf("failed to list discovery directory: %w", err)
}
var discoveries []*ResourceDiscovery
for _, entry := range entries {
// Skip tmp files first to avoid reading partial writes.
if strings.HasSuffix(entry.Name(), ".tmp") {
continue
}
if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".enc") {
continue
}
filePath, err := s.discoveryPathForLeaf(entry.Name())
if err != nil {
log.Warn().Err(err).Str("file", entry.Name()).Msg("skipping invalid discovery file leaf")
continue
}
data, migratedPlaintext, err := s.loadDiscoveryFileData(filePath, maxDiscoveryFileReadBytes)
if err != nil {
log.Warn().Err(err).Str("file", entry.Name()).Msg("failed to read discovery file")
continue
}
var discovery ResourceDiscovery
if err := unmarshalStoredDiscovery(data, &discovery); err != nil {
log.Warn().Err(err).Str("file", entry.Name()).Msg("failed to unmarshal discovery")
continue
}
if err := s.maybeRewritePlaintextDiscovery(filePath, data, migratedPlaintext); err != nil {
log.Warn().Err(err).Str("file", entry.Name()).Msg("failed to rewrite migrated discovery")
continue
}
// Canonicalize stored fields loaded from disk.
normalizeDiscovery(&discovery)
discoveries = append(discoveries, &discovery)
}
// Deduplicate by canonical ID in case multiple files decode to the same resource.
seen := make(map[string]int, len(discoveries))
deduped := make([]*ResourceDiscovery, 0, len(discoveries))
for _, d := range discoveries {
if idx, exists := seen[d.ID]; exists {
// Keep the more recently updated entry
if d.UpdatedAt.After(deduped[idx].UpdatedAt) {
deduped[idx] = d
}
} else {
seen[d.ID] = len(deduped)
deduped = append(deduped, d)
}
}
return deduped, nil
}
// ListByType returns discoveries for a specific resource type.
func (s *Store) ListByType(resourceType ResourceType) ([]*ResourceDiscovery, error) {
all, err := s.List()
if err != nil {
return nil, fmt.Errorf("list discoveries for type %s: %w", resourceType, err)
}
var filtered []*ResourceDiscovery
for _, d := range all {
if d.ResourceType == resourceType {
filtered = append(filtered, d)
}
}
return filtered, nil
}
// ListByTarget returns discoveries for a specific target ID.
func (s *Store) ListByTarget(targetID string) ([]*ResourceDiscovery, error) {
all, err := s.List()
if err != nil {
return nil, fmt.Errorf("list discoveries for target %s: %w", targetID, err)
}
var filtered []*ResourceDiscovery
for _, d := range all {
if strings.TrimSpace(d.TargetID) == targetID {
filtered = append(filtered, d)
}
}
return filtered, nil
}
// UpdateNotes updates just the user notes and secrets for a discovery.
func (s *Store) UpdateNotes(id string, notes string, secrets map[string]string) error {
discovery, err := s.Get(id)
if err != nil {
return fmt.Errorf("get discovery %s for note update: %w", id, err)
}
if discovery == nil {
return fmt.Errorf("discovery not found: %s", id)
}
discovery.UserNotes = notes
if secrets != nil {
discovery.UserSecrets = secrets
}
if err := s.Save(discovery); err != nil {
return fmt.Errorf("save updated discovery %s notes: %w", id, err)
}
return nil
}
// GetMultiple retrieves multiple discoveries by ID.
func (s *Store) GetMultiple(ids []string) ([]*ResourceDiscovery, error) {
var discoveries []*ResourceDiscovery
for _, id := range ids {
d, err := s.Get(id)
if err != nil {
log.Warn().Err(err).Str("id", id).Msg("failed to get discovery")
continue
}
if d != nil {
discoveries = append(discoveries, d)
}
}
return discoveries, nil
}
// ClearCache clears the in-memory cache.
func (s *Store) ClearCache() {
s.mu.Lock()
defer s.mu.Unlock()
s.cache = make(map[string]*ResourceDiscovery)
s.cacheTime = make(map[string]time.Time)
}
// Exists checks if a discovery exists for the given ID.
func (s *Store) Exists(id string) bool {
s.mu.RLock()
if _, ok := s.cache[id]; ok {
s.mu.RUnlock()
return true
}
s.mu.RUnlock()
filePath := s.getFilePath(id)
_, err := os.Stat(filePath)
if err == nil {
return true
}
if err != nil && !os.IsNotExist(err) {
log.Warn().Err(err).Str("id", id).Str("file", filePath).Msg("Failed to stat discovery file")
}
legacyPath, findErr := s.findLegacyDiscoveryPath(id)
if findErr != nil {
log.Warn().Err(findErr).Str("id", id).Msg("Failed to scan for legacy discovery file")
}
return legacyPath != ""
}
// GetAge returns how old the discovery is, or -1 if not found.
func (s *Store) GetAge(id string) time.Duration {
d, err := s.Get(id)
if err != nil || d == nil {
return -1
}
return time.Since(d.UpdatedAt)
}
// NeedsRefresh checks if a discovery needs to be refreshed.
func (s *Store) NeedsRefresh(id string, maxAge time.Duration) bool {
age := s.GetAge(id)
if age < 0 {
return true // Not found, needs discovery
}
return age > maxAge
}
// --- Fingerprint Storage Methods ---
// getFingerprintFilePath returns the file path for a fingerprint.
func (s *Store) getFingerprintFilePath(resourceID string) string {
path, err := securityutil.JoinStorageLeaf(s.fingerprintDir, securityutil.HashedStorageName(resourceID)+".json")
if err != nil {
panic(fmt.Sprintf("invalid hashed fingerprint storage leaf for %q: %v", resourceID, err))
}
return path
}
func (s *Store) findLegacyFingerprintPath(resourceID string) (string, error) {
canonicalName := securityutil.HashedStorageName(resourceID) + ".json"
entries, err := os.ReadDir(s.fingerprintDir)
if err != nil {
if os.IsNotExist(err) {
return "", nil
}
return "", fmt.Errorf("failed to scan fingerprint directory: %w", err)
}
for _, entry := range entries {
if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".json") {
continue
}
if entry.Name() == canonicalName {
continue
}
path, err := securityutil.JoinStorageLeaf(s.fingerprintDir, entry.Name())
if err != nil {
log.Warn().Err(err).Str("file", entry.Name()).Msg("skipping invalid legacy fingerprint candidate")
continue
}
data, err := readRegularFileWithLimit(path, maxFingerprintFileReadBytes)
if err != nil {
log.Warn().Err(err).Str("file", entry.Name()).Msg("failed to inspect legacy fingerprint candidate")
continue
}
var fp ContainerFingerprint
if err := unmarshalStoredFingerprint(data, &fp); err != nil {
log.Warn().Err(err).Str("file", entry.Name()).Msg("failed to parse legacy fingerprint candidate")
continue
}
canonicalizeFingerprint(&fp)
if fp.ResourceID == resourceID {
return path, nil
}
}
return "", nil
}
// loadFingerprints loads all fingerprints from disk into memory.
func (s *Store) loadFingerprints() {
s.fingerprintMu.Lock()
defer s.fingerprintMu.Unlock()
entries, err := os.ReadDir(s.fingerprintDir)
if err != nil {
if !os.IsNotExist(err) {
log.Warn().Err(err).Msg("failed to read fingerprint directory")
}
return
}
for _, entry := range entries {
if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".json") {
continue
}
filePath, err := securityutil.JoinStorageLeaf(s.fingerprintDir, entry.Name())
if err != nil {
log.Warn().Err(err).Str("file", entry.Name()).Msg("skipping invalid fingerprint file leaf")
continue
}
data, err := readRegularFileWithLimit(filePath, maxFingerprintFileReadBytes)
if err != nil {
log.Warn().Err(err).Str("file", entry.Name()).Msg("failed to read fingerprint file")
continue
}
var fp ContainerFingerprint
if err := unmarshalStoredFingerprint(data, &fp); err != nil {
log.Warn().Err(err).Str("file", entry.Name()).Msg("failed to unmarshal fingerprint")
continue
}
canonicalizeFingerprint(&fp)
s.fingerprints[fp.ResourceID] = &fp
}
log.Debug().Int("count", len(s.fingerprints)).Msg("loaded fingerprints from disk")
}
// SaveFingerprint stores a container fingerprint.
func (s *Store) SaveFingerprint(fp *ContainerFingerprint) error {
if fp == nil || fp.ResourceID == "" {
return fmt.Errorf("fingerprint or resource ID is required")
}
s.fingerprintMu.Lock()
defer s.fingerprintMu.Unlock()
// Update in-memory cache
fpCopy := cloneContainerFingerprint(fp)
canonicalizeFingerprint(fpCopy)
s.fingerprints[fpCopy.ResourceID] = fpCopy
// Persist to disk
data, err := json.Marshal(fpCopy)
if err != nil {
return fmt.Errorf("failed to marshal fingerprint: %w", err)
}
filePath := s.getFingerprintFilePath(fpCopy.ResourceID)
tmpPath := filePath + ".tmp"
if err := os.WriteFile(tmpPath, data, 0600); err != nil {
return fmt.Errorf("failed to write fingerprint file: %w", err)
}
if err := os.Rename(tmpPath, filePath); err != nil {
if cleanupErr := os.Remove(tmpPath); cleanupErr != nil && !os.IsNotExist(cleanupErr) {
log.Warn().Err(cleanupErr).Str("tmp_path", tmpPath).Msg("Failed to remove temp fingerprint file after rename failure")
}
return fmt.Errorf("failed to finalize fingerprint file: %w", err)
}
if legacyPath, err := s.findLegacyFingerprintPath(fpCopy.ResourceID); err == nil && legacyPath != "" && legacyPath != filePath {
_ = os.Remove(legacyPath)
}
return nil
}
// GetFingerprint retrieves the last known fingerprint for a resource.
func (s *Store) GetFingerprint(resourceID string) (*ContainerFingerprint, error) {
s.fingerprintMu.RLock()
defer s.fingerprintMu.RUnlock()
fp, ok := s.fingerprints[resourceID]
if !ok {
return nil, nil // Not found is not an error
}
return cloneContainerFingerprint(fp), nil
}
// GetAllFingerprints returns all stored fingerprints.
func (s *Store) GetAllFingerprints() map[string]*ContainerFingerprint {
s.fingerprintMu.RLock()
defer s.fingerprintMu.RUnlock()
result := make(map[string]*ContainerFingerprint, len(s.fingerprints))
for k, v := range s.fingerprints {
result[k] = cloneContainerFingerprint(v)
}
return result
}
// GetChangedResources returns resource IDs where the fingerprint changed since last discovery.
// It compares the stored fingerprint hash against the discovery's fingerprint field.
func (s *Store) GetChangedResources() ([]string, error) {
s.fingerprintMu.RLock()
fingerprints := make(map[string]*ContainerFingerprint, len(s.fingerprints))
for k, v := range s.fingerprints {
fingerprints[k] = cloneContainerFingerprint(v)
}
s.fingerprintMu.RUnlock()
var changed []string
for resourceID, fp := range fingerprints {
// The fingerprint key is already in resource ID format (type:scope:id)
// so use it directly as the discovery ID
discovery, err := s.Get(resourceID)
if err != nil {
log.Warn().Err(err).Str("resource_id", resourceID).Msg("Failed to load discovery while checking fingerprint changes")
continue
}
// If no discovery exists, it needs discovery
if discovery == nil {
changed = append(changed, resourceID)
continue
}
// If fingerprint hash differs from discovery's stored fingerprint, it changed
if discovery.Fingerprint != fp.Hash {
changed = append(changed, resourceID)
}
}
return changed, nil
}
// GetStaleResources returns resources not discovered in maxAge duration.
func (s *Store) GetStaleResources(maxAge time.Duration) ([]string, error) {
discoveries, err := s.List()
if err != nil {
return nil, fmt.Errorf("list discoveries for stale scan: %w", err)
}
var stale []string
now := time.Now()
for _, d := range discoveries {
if d == nil {
continue
}
// Staleness should be based on the last successful discovery update.
// DiscoveredAt is intentionally preserved as first-seen time.
lastSeenAt := d.UpdatedAt
if lastSeenAt.IsZero() {
lastSeenAt = d.DiscoveredAt
}
if lastSeenAt.IsZero() || now.Sub(lastSeenAt) > maxAge {
stale = append(stale, d.ID)
}
}
return stale, nil
}
// SetLastFingerprintScan updates the timestamp of the last fingerprint scan.
func (s *Store) SetLastFingerprintScan(t time.Time) {
s.fingerprintMu.Lock()
defer s.fingerprintMu.Unlock()
s.lastFingerprintScan = t
}
// GetLastFingerprintScan returns the timestamp of the last fingerprint scan.
func (s *Store) GetLastFingerprintScan() time.Time {
s.fingerprintMu.RLock()
defer s.fingerprintMu.RUnlock()
return s.lastFingerprintScan
}
// GetFingerprintCount returns the number of stored fingerprints.
func (s *Store) GetFingerprintCount() int {
s.fingerprintMu.RLock()
defer s.fingerprintMu.RUnlock()
return len(s.fingerprints)
}
// CleanupOrphanedFingerprints removes fingerprints for resources that no longer exist.
// Pass in a set of current resource IDs (e.g., "docker:host1:nginx", "system-container:node1:101").
// Returns the number of fingerprints removed.
func (s *Store) CleanupOrphanedFingerprints(currentResourceIDs map[string]bool) int {
s.fingerprintMu.Lock()
defer s.fingerprintMu.Unlock()
removed := 0
for fpID := range s.fingerprints {
if !currentResourceIDs[fpID] {
// Remove from memory
delete(s.fingerprints, fpID)
// Remove from disk
filePath := s.getFingerprintFilePath(fpID)
removeErr := error(nil)
if err := os.Remove(filePath); err != nil && !os.IsNotExist(err) {
removeErr = err
}
if legacyPath, err := s.findLegacyFingerprintPath(fpID); err == nil && legacyPath != "" && legacyPath != filePath {
if err := os.Remove(legacyPath); err != nil && !os.IsNotExist(err) {
removeErr = err
}
}
if removeErr != nil {
log.Warn().Err(removeErr).Str("id", fpID).Msg("failed to remove orphaned fingerprint file")
} else {
log.Debug().Str("id", fpID).Msg("removed orphaned fingerprint")
}
removed++
}
}
return removed
}
// CleanupOrphanedDiscoveries removes discoveries for resources that no longer exist.
// Pass in a set of current resource IDs.
// Returns the number of discoveries removed.
func (s *Store) CleanupOrphanedDiscoveries(currentResourceIDs map[string]bool) int {
// List all discovery files
entries, err := os.ReadDir(s.dataDir)
if err != nil {
log.Warn().Err(err).Msg("failed to read discovery directory for cleanup")
return 0
}
removed := 0
for _, entry := range entries {
if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".enc") {
continue
}
resourceID, err := s.readDiscoveryIDFromFile(entry.Name())
if err != nil {
log.Warn().
Err(err).
Str("file", entry.Name()).
Msg("Skipping orphan cleanup for discovery file with unreadable ID")
continue
}
if !currentResourceIDs[resourceID] {
filePath, err := s.discoveryPathForLeaf(entry.Name())
if err != nil {
log.Warn().Err(err).Str("file", entry.Name()).Msg("skipping invalid orphaned discovery file leaf")
continue
}
if err := os.Remove(filePath); err != nil {
log.Warn().Err(err).Str("file", entry.Name()).Msg("failed to remove orphaned discovery file")
} else {
log.Debug().Str("id", resourceID).Msg("removed orphaned discovery")
removed++
}
}
}
return removed
}
// filenameToResourceID converts a discovery filename back to a resource ID.
// Reverses the transformation done in getFilePath.
func filenameToResourceID(filename string) string {
// The filename uses underscores for colons and slashes
// We need to be smart about this - the format is type_host_resourceid
// First underscore separates type, rest could have underscores in host/resource names
parts := strings.SplitN(filename, "_", 3)
if len(parts) < 3 {
return filename // Can't parse, return as-is
}
resourceType := parts[0]
host := parts[1]
resourceID := parts[2]
// For k8s, the resource ID might have been namespace/name which became namespace_name
// We convert back: k8s:cluster:namespace/name
if resourceType == "k8s" && strings.Contains(resourceID, "_") {
// Could be namespace_name, convert back to namespace/name
resourceID = strings.Replace(resourceID, "_", "/", 1)
}
return resourceType + ":" + host + ":" + resourceID
}
func (s *Store) readDiscoveryIDFromPath(filePath string) (string, error) {
data, migratedPlaintext, err := s.loadDiscoveryFileData(filePath, maxDiscoveryFileReadBytes)
if err != nil {
return "", fmt.Errorf("failed to read discovery file: %w", err)
}
var discovery struct {
ID string `json:"id"`
}
if err := json.Unmarshal(data, &discovery); err != nil {
return "", fmt.Errorf("failed to unmarshal discovery ID: %w", err)
}
if err := s.maybeRewritePlaintextDiscovery(filePath, data, migratedPlaintext); err != nil {
return "", err
}
if strings.TrimSpace(discovery.ID) == "" {
return "", fmt.Errorf("discovery ID is empty")
}
return discovery.ID, nil
}
func (s *Store) readDiscoveryIDFromFile(filename string) (string, error) {
filePath, err := s.discoveryPathForLeaf(filename)
if err != nil {
return "", err
}
return s.readDiscoveryIDFromPath(filePath)
}
// ListDiscoveryIDs returns all discovery IDs currently stored.
func (s *Store) ListDiscoveryIDs() []string {
entries, err := os.ReadDir(s.dataDir)
if err != nil {
return nil
}
var ids []string
for _, entry := range entries {
if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".enc") {
continue
}
id, err := s.readDiscoveryIDFromFile(entry.Name())
if err != nil {
log.Warn().
Err(err).
Str("file", entry.Name()).
Msg("Skipping discovery ID listing for unreadable discovery file")
continue
}
ids = append(ids, id)
}
return ids
}