mirror of
https://github.com/rcourtman/Pulse.git
synced 2026-05-07 00:37:36 +00:00
1068 lines
30 KiB
Go
1068 lines
30 KiB
Go
package monitoring
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/config"
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/unifiedresources"
|
|
"github.com/rcourtman/pulse-go-rewrite/internal/vmware"
|
|
"github.com/rs/zerolog/log"
|
|
)
|
|
|
|
const defaultVMwarePollInterval = 60 * time.Second
|
|
|
|
// VMwareConnectionPollError captures the last runtime poll error for one
|
|
// configured VMware connection.
|
|
type VMwareConnectionPollError struct {
|
|
At *time.Time `json:"at,omitempty"`
|
|
Message string `json:"message,omitempty"`
|
|
Category string `json:"category,omitempty"`
|
|
}
|
|
|
|
// VMwareConnectionPollStatus summarizes the runtime polling state for one
|
|
// configured VMware connection.
|
|
type VMwareConnectionPollStatus struct {
|
|
IntervalSeconds int `json:"intervalSeconds"`
|
|
LastAttemptAt *time.Time `json:"lastAttemptAt,omitempty"`
|
|
LastSuccessAt *time.Time `json:"lastSuccessAt,omitempty"`
|
|
ConsecutiveFailures int `json:"consecutiveFailures,omitempty"`
|
|
LastError *VMwareConnectionPollError `json:"lastError,omitempty"`
|
|
}
|
|
|
|
// VMwareConnectionObservedSummary summarizes the canonical resources the most
|
|
// recent successful poll contributed for one configured VMware connection.
|
|
type VMwareConnectionObservedSummary struct {
|
|
CollectedAt *time.Time `json:"collectedAt,omitempty"`
|
|
Hosts int `json:"hosts"`
|
|
VMs int `json:"vms"`
|
|
Datastores int `json:"datastores"`
|
|
VIRelease string `json:"viRelease,omitempty"`
|
|
Degraded bool `json:"degraded,omitempty"`
|
|
IssueCount int `json:"issueCount,omitempty"`
|
|
Issues []VMwareConnectionObservedIssue `json:"issues,omitempty"`
|
|
}
|
|
|
|
// VMwareConnectionObservedIssue summarizes one class of optional VMware reads
|
|
// that degraded an otherwise successful inventory refresh.
|
|
type VMwareConnectionObservedIssue struct {
|
|
Stage string `json:"stage,omitempty"`
|
|
Category string `json:"category,omitempty"`
|
|
Message string `json:"message,omitempty"`
|
|
Occurrences int `json:"occurrences,omitempty"`
|
|
}
|
|
|
|
// VMwareConnectionSummary merges poll health with the most recent discovered
|
|
// platform contribution for one configured VMware connection.
|
|
type VMwareConnectionSummary struct {
|
|
Poll *VMwareConnectionPollStatus `json:"poll,omitempty"`
|
|
Observed *VMwareConnectionObservedSummary `json:"observed,omitempty"`
|
|
}
|
|
|
|
type vmwareConnectionRuntimeStatus struct {
|
|
lastAttemptAt time.Time
|
|
lastSuccessAt time.Time
|
|
lastError *VMwareConnectionPollError
|
|
consecutiveFailures int
|
|
observed *VMwareConnectionObservedSummary
|
|
observedIssueKey string
|
|
}
|
|
|
|
type vmwareObservedTransition struct {
|
|
action string
|
|
message string
|
|
issueCount int
|
|
}
|
|
|
|
type vmwarePollerProvider interface {
|
|
Refresh(ctx context.Context) error
|
|
Records() []unifiedresources.IngestRecord
|
|
ActivityChanges() []unifiedresources.ResourceChange
|
|
Snapshot() *vmware.InventorySnapshot
|
|
Close()
|
|
}
|
|
|
|
// VMwarePoller manages periodic polling of configured VMware vCenter connections.
|
|
type VMwarePoller struct {
|
|
multiTenant *config.MultiTenantPersistence
|
|
|
|
mu sync.Mutex
|
|
// providersByOrg is keyed by orgID, then connection ID.
|
|
providersByOrg map[string]map[string]vmwarePollerProvider
|
|
// configsByOrg is keyed by orgID, then connection ID.
|
|
configsByOrg map[string]map[string]config.VMwareVCenterInstance
|
|
// statusByOrg is keyed by orgID, then connection ID.
|
|
statusByOrg map[string]map[string]*vmwareConnectionRuntimeStatus
|
|
// cachedRecordsByOrg is keyed by orgID, then connection ID.
|
|
cachedRecordsByOrg map[string]map[string][]unifiedresources.IngestRecord
|
|
// cachedChangesByOrg is keyed by orgID, then connection ID.
|
|
cachedChangesByOrg map[string]map[string][]unifiedresources.ResourceChange
|
|
newProvider func(config.VMwareVCenterInstance) (vmwarePollerProvider, error)
|
|
cancel context.CancelFunc
|
|
stopped chan struct{}
|
|
interval time.Duration
|
|
explicitInterval bool
|
|
}
|
|
|
|
type vmwarePollerProviderEntry struct {
|
|
orgID string
|
|
connectionID string
|
|
provider vmwarePollerProvider
|
|
}
|
|
|
|
// NewVMwarePoller builds a new VMware poller with the provided interval.
|
|
func NewVMwarePoller(multiTenant *config.MultiTenantPersistence, interval time.Duration) *VMwarePoller {
|
|
explicitInterval := interval > 0
|
|
if interval <= 0 {
|
|
interval = defaultVMwarePollInterval
|
|
}
|
|
|
|
stopped := make(chan struct{})
|
|
close(stopped)
|
|
|
|
poller := &VMwarePoller{
|
|
multiTenant: multiTenant,
|
|
providersByOrg: make(map[string]map[string]vmwarePollerProvider),
|
|
configsByOrg: make(map[string]map[string]config.VMwareVCenterInstance),
|
|
statusByOrg: make(map[string]map[string]*vmwareConnectionRuntimeStatus),
|
|
cachedRecordsByOrg: make(map[string]map[string][]unifiedresources.IngestRecord),
|
|
cachedChangesByOrg: make(map[string]map[string][]unifiedresources.ResourceChange),
|
|
stopped: stopped,
|
|
interval: interval,
|
|
explicitInterval: explicitInterval,
|
|
}
|
|
poller.newProvider = poller.defaultProvider
|
|
return poller
|
|
}
|
|
|
|
// Start begins periodic VMware polling if the feature flag is enabled.
|
|
func (p *VMwarePoller) Start(ctx context.Context) {
|
|
if p == nil || !vmware.IsFeatureEnabled() {
|
|
return
|
|
}
|
|
if ctx == nil {
|
|
ctx = context.Background()
|
|
}
|
|
|
|
p.mu.Lock()
|
|
if p.cancel != nil {
|
|
p.mu.Unlock()
|
|
return
|
|
}
|
|
|
|
runCtx, cancel := context.WithCancel(ctx)
|
|
p.cancel = cancel
|
|
p.stopped = make(chan struct{})
|
|
stopped := p.stopped
|
|
p.mu.Unlock()
|
|
|
|
go func() {
|
|
defer close(stopped)
|
|
defer func() {
|
|
p.mu.Lock()
|
|
if p.stopped == stopped {
|
|
p.cancel = nil
|
|
}
|
|
p.mu.Unlock()
|
|
}()
|
|
|
|
p.syncConnections()
|
|
p.pollAll(runCtx)
|
|
for {
|
|
wait := p.nextWaitDuration(time.Now())
|
|
timer := time.NewTimer(wait)
|
|
select {
|
|
case <-runCtx.Done():
|
|
if !timer.Stop() {
|
|
select {
|
|
case <-timer.C:
|
|
default:
|
|
}
|
|
}
|
|
return
|
|
case <-timer.C:
|
|
p.syncConnections()
|
|
p.pollAll(runCtx)
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
// Stop requests poller shutdown and waits up to five seconds for exit.
|
|
func (p *VMwarePoller) Stop() {
|
|
if p == nil {
|
|
return
|
|
}
|
|
|
|
p.mu.Lock()
|
|
cancel := p.cancel
|
|
stopped := p.stopped
|
|
p.mu.Unlock()
|
|
|
|
if cancel != nil {
|
|
cancel()
|
|
}
|
|
if stopped == nil {
|
|
return
|
|
}
|
|
|
|
timer := time.NewTimer(5 * time.Second)
|
|
defer timer.Stop()
|
|
|
|
select {
|
|
case <-stopped:
|
|
case <-timer.C:
|
|
log.Warn().
|
|
Str("component", "vmware_poller").
|
|
Str("action", "stop").
|
|
Dur("timeout", 5*time.Second).
|
|
Msg("VMware poller stop timed out waiting for shutdown")
|
|
}
|
|
}
|
|
|
|
func (p *VMwarePoller) syncConnections() {
|
|
if p == nil {
|
|
return
|
|
}
|
|
if p.multiTenant == nil {
|
|
log.Warn().
|
|
Str("component", "vmware_poller").
|
|
Str("action", "sync_connections").
|
|
Msg("VMware poller cannot sync connections because multi-tenant persistence is nil")
|
|
return
|
|
}
|
|
|
|
orgs, err := p.multiTenant.ListOrganizations()
|
|
if err != nil {
|
|
log.Warn().
|
|
Str("component", "vmware_poller").
|
|
Str("action", "list_organizations").
|
|
Err(err).
|
|
Msg("VMware poller failed to list organizations")
|
|
return
|
|
}
|
|
|
|
active := make(map[string]map[string]config.VMwareVCenterInstance, len(orgs))
|
|
for _, org := range orgs {
|
|
if org == nil {
|
|
continue
|
|
}
|
|
orgID := strings.TrimSpace(org.ID)
|
|
if orgID == "" {
|
|
continue
|
|
}
|
|
persistence, err := p.multiTenant.GetPersistence(orgID)
|
|
if err != nil || persistence == nil {
|
|
log.Warn().
|
|
Str("component", "vmware_poller").
|
|
Str("action", "get_persistence").
|
|
Str("org_id", orgID).
|
|
Err(err).
|
|
Msg("VMware poller failed to open tenant persistence")
|
|
continue
|
|
}
|
|
|
|
instances, err := persistence.LoadVMwareConfig()
|
|
if err != nil {
|
|
log.Warn().
|
|
Str("component", "vmware_poller").
|
|
Str("action", "load_vmware_config").
|
|
Str("org_id", orgID).
|
|
Err(err).
|
|
Msg("VMware poller failed to load tenant VMware config")
|
|
continue
|
|
}
|
|
|
|
activeByConnection := make(map[string]config.VMwareVCenterInstance)
|
|
for _, instance := range instances {
|
|
instance.ApplyDefaults()
|
|
if !instance.Enabled {
|
|
continue
|
|
}
|
|
connectionID := strings.TrimSpace(instance.ID)
|
|
if connectionID == "" {
|
|
continue
|
|
}
|
|
activeByConnection[connectionID] = instance
|
|
}
|
|
active[orgID] = activeByConnection
|
|
}
|
|
|
|
p.mu.Lock()
|
|
defer p.mu.Unlock()
|
|
|
|
for orgID, connectionMap := range active {
|
|
if p.providersByOrg[orgID] == nil {
|
|
p.providersByOrg[orgID] = make(map[string]vmwarePollerProvider)
|
|
}
|
|
if p.configsByOrg[orgID] == nil {
|
|
p.configsByOrg[orgID] = make(map[string]config.VMwareVCenterInstance)
|
|
}
|
|
if p.cachedRecordsByOrg[orgID] == nil {
|
|
p.cachedRecordsByOrg[orgID] = make(map[string][]unifiedresources.IngestRecord)
|
|
}
|
|
if p.cachedChangesByOrg[orgID] == nil {
|
|
p.cachedChangesByOrg[orgID] = make(map[string][]unifiedresources.ResourceChange)
|
|
}
|
|
if p.statusByOrg[orgID] == nil {
|
|
p.statusByOrg[orgID] = make(map[string]*vmwareConnectionRuntimeStatus)
|
|
}
|
|
|
|
for connectionID, instance := range connectionMap {
|
|
existingConfig, exists := p.configsByOrg[orgID][connectionID]
|
|
if exists && existingConfig == instance && p.providersByOrg[orgID][connectionID] != nil {
|
|
continue
|
|
}
|
|
|
|
if provider := p.providersByOrg[orgID][connectionID]; provider != nil {
|
|
provider.Close()
|
|
}
|
|
|
|
provider, err := p.newProvider(instance)
|
|
if err != nil {
|
|
log.Warn().
|
|
Str("component", "vmware_poller").
|
|
Str("action", "build_provider").
|
|
Str("org_id", orgID).
|
|
Str("connection_id", connectionID).
|
|
Err(err).
|
|
Msg("VMware poller failed to build provider")
|
|
delete(p.providersByOrg[orgID], connectionID)
|
|
delete(p.cachedRecordsByOrg[orgID], connectionID)
|
|
delete(p.cachedChangesByOrg[orgID], connectionID)
|
|
p.configsByOrg[orgID][connectionID] = instance
|
|
continue
|
|
}
|
|
|
|
p.providersByOrg[orgID][connectionID] = provider
|
|
p.configsByOrg[orgID][connectionID] = instance
|
|
}
|
|
}
|
|
|
|
for orgID, providers := range p.providersByOrg {
|
|
activeConnections := active[orgID]
|
|
for connectionID, provider := range providers {
|
|
if _, ok := activeConnections[connectionID]; ok {
|
|
continue
|
|
}
|
|
if provider != nil {
|
|
provider.Close()
|
|
}
|
|
delete(providers, connectionID)
|
|
if p.cachedRecordsByOrg[orgID] != nil {
|
|
delete(p.cachedRecordsByOrg[orgID], connectionID)
|
|
}
|
|
if p.cachedChangesByOrg[orgID] != nil {
|
|
delete(p.cachedChangesByOrg[orgID], connectionID)
|
|
}
|
|
if p.configsByOrg[orgID] != nil {
|
|
delete(p.configsByOrg[orgID], connectionID)
|
|
}
|
|
if p.statusByOrg[orgID] != nil {
|
|
delete(p.statusByOrg[orgID], connectionID)
|
|
}
|
|
}
|
|
|
|
if len(providers) == 0 {
|
|
delete(p.providersByOrg, orgID)
|
|
delete(p.configsByOrg, orgID)
|
|
delete(p.statusByOrg, orgID)
|
|
delete(p.cachedRecordsByOrg, orgID)
|
|
delete(p.cachedChangesByOrg, orgID)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (p *VMwarePoller) pollAll(ctx context.Context) {
|
|
for _, entry := range p.providerEntries() {
|
|
p.pollConnection(ctx, entry)
|
|
}
|
|
}
|
|
|
|
func (p *VMwarePoller) pollConnection(ctx context.Context, entry vmwarePollerProviderEntry) {
|
|
if entry.provider == nil {
|
|
return
|
|
}
|
|
|
|
pollCtx, cancel := context.WithTimeout(ctx, 20*time.Second)
|
|
defer cancel()
|
|
|
|
at := time.Now().UTC()
|
|
if err := entry.provider.Refresh(pollCtx); err != nil {
|
|
p.mu.Lock()
|
|
p.recordConnectionFailureLocked(entry.orgID, entry.connectionID, err, at)
|
|
p.mu.Unlock()
|
|
log.Warn().
|
|
Str("component", "vmware_poller").
|
|
Str("action", "refresh").
|
|
Str("org_id", entry.orgID).
|
|
Str("connection_id", entry.connectionID).
|
|
Err(err).
|
|
Msg("VMware poller refresh failed")
|
|
return
|
|
}
|
|
|
|
records := cloneIngestRecords(entry.provider.Records())
|
|
changes := cloneResourceChanges(entry.provider.ActivityChanges())
|
|
snapshot := entry.provider.Snapshot()
|
|
|
|
p.mu.Lock()
|
|
transition := p.recordConnectionSuccessLocked(entry.orgID, entry.connectionID, at, snapshot)
|
|
if p.cachedRecordsByOrg[entry.orgID] == nil {
|
|
p.cachedRecordsByOrg[entry.orgID] = make(map[string][]unifiedresources.IngestRecord)
|
|
}
|
|
if p.cachedChangesByOrg[entry.orgID] == nil {
|
|
p.cachedChangesByOrg[entry.orgID] = make(map[string][]unifiedresources.ResourceChange)
|
|
}
|
|
p.cachedRecordsByOrg[entry.orgID][entry.connectionID] = records
|
|
p.cachedChangesByOrg[entry.orgID][entry.connectionID] = changes
|
|
p.mu.Unlock()
|
|
|
|
if transition != nil {
|
|
event := log.Info()
|
|
if strings.HasPrefix(transition.action, "refresh_partial") {
|
|
event = log.Warn()
|
|
}
|
|
event.
|
|
Str("component", "vmware_poller").
|
|
Str("action", transition.action).
|
|
Str("org_id", entry.orgID).
|
|
Str("connection_id", entry.connectionID).
|
|
Int("issue_count", transition.issueCount).
|
|
Msg(transition.message)
|
|
}
|
|
}
|
|
|
|
func (p *VMwarePoller) providerEntries() []vmwarePollerProviderEntry {
|
|
if p == nil {
|
|
return nil
|
|
}
|
|
|
|
p.mu.Lock()
|
|
defer p.mu.Unlock()
|
|
|
|
var entries []vmwarePollerProviderEntry
|
|
for orgID, providers := range p.providersByOrg {
|
|
for connectionID, provider := range providers {
|
|
if provider == nil {
|
|
continue
|
|
}
|
|
entries = append(entries, vmwarePollerProviderEntry{
|
|
orgID: orgID,
|
|
connectionID: connectionID,
|
|
provider: provider,
|
|
})
|
|
}
|
|
}
|
|
sort.Slice(entries, func(i, j int) bool {
|
|
if entries[i].orgID == entries[j].orgID {
|
|
return entries[i].connectionID < entries[j].connectionID
|
|
}
|
|
return entries[i].orgID < entries[j].orgID
|
|
})
|
|
return entries
|
|
}
|
|
|
|
func (p *VMwarePoller) defaultProvider(instance config.VMwareVCenterInstance) (vmwarePollerProvider, error) {
|
|
client, err := vmware.NewClient(vmware.ClientConfig{
|
|
Host: instance.Host,
|
|
Port: instance.Port,
|
|
Username: instance.Username,
|
|
Password: instance.Password,
|
|
InsecureSkipVerify: instance.InsecureSkipVerify,
|
|
Timeout: 20 * time.Second,
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return vmware.NewAPIProvider(vmware.ProviderMetadata{
|
|
ConnectionID: strings.TrimSpace(instance.ID),
|
|
ConnectionName: strings.TrimSpace(instance.Name),
|
|
VCenterHost: strings.TrimSpace(instance.Host),
|
|
}, client), nil
|
|
}
|
|
|
|
func (p *VMwarePoller) nextWaitDuration(now time.Time) time.Duration {
|
|
if p == nil || p.interval <= 0 {
|
|
return defaultVMwarePollInterval
|
|
}
|
|
return p.interval
|
|
}
|
|
|
|
// ConnectionSummaries returns per-connection runtime poll health and discovered
|
|
// contribution summaries for the supplied VMware settings records.
|
|
func (p *VMwarePoller) ConnectionSummaries(orgID string, instances []config.VMwareVCenterInstance) map[string]VMwareConnectionSummary {
|
|
if len(instances) == 0 {
|
|
return nil
|
|
}
|
|
|
|
orgID = normalizeVMwareOrgID(orgID)
|
|
summaries := make(map[string]VMwareConnectionSummary, len(instances))
|
|
intervalSeconds := defaultVMwareConnectionPollIntervalSeconds()
|
|
if p != nil {
|
|
intervalSeconds = p.connectionPollIntervalSeconds()
|
|
}
|
|
|
|
for i := range instances {
|
|
instance := instances[i]
|
|
instance.ApplyDefaults()
|
|
connID := strings.TrimSpace(instance.ID)
|
|
if connID == "" {
|
|
continue
|
|
}
|
|
|
|
summary := VMwareConnectionSummary{
|
|
Poll: &VMwareConnectionPollStatus{
|
|
IntervalSeconds: int(intervalSeconds),
|
|
},
|
|
}
|
|
|
|
if p != nil {
|
|
p.mu.Lock()
|
|
status := p.cloneRuntimeStatusLocked(orgID, connID)
|
|
p.mu.Unlock()
|
|
if status != nil {
|
|
summary.Poll.LastAttemptAt = cloneTimePointer(status.LastAttemptAt)
|
|
summary.Poll.LastSuccessAt = cloneTimePointer(status.LastSuccessAt)
|
|
summary.Poll.ConsecutiveFailures = status.ConsecutiveFailures
|
|
summary.Poll.LastError = cloneVMwareConnectionPollError(status.LastError)
|
|
summary.Observed = cloneVMwareObservedSummary(status.Observed)
|
|
}
|
|
}
|
|
|
|
summaries[connID] = summary
|
|
}
|
|
|
|
return summaries
|
|
}
|
|
|
|
func (p *VMwarePoller) connectionPollIntervalSeconds() int {
|
|
if p == nil {
|
|
return defaultVMwareConnectionPollIntervalSeconds()
|
|
}
|
|
if p.interval < time.Second {
|
|
return defaultVMwareConnectionPollIntervalSeconds()
|
|
}
|
|
return int(p.interval / time.Second)
|
|
}
|
|
|
|
func defaultVMwareConnectionPollIntervalSeconds() int {
|
|
return int(defaultVMwarePollInterval / time.Second)
|
|
}
|
|
|
|
// GetCurrentRecords returns the latest known VMware records for the default org.
|
|
func (p *VMwarePoller) GetCurrentRecords() []unifiedresources.IngestRecord {
|
|
return p.GetCurrentRecordsForOrg("default")
|
|
}
|
|
|
|
// SupplementalRecords implements the monitor supplemental-record provider contract.
|
|
func (p *VMwarePoller) SupplementalRecords(_ *Monitor, orgID string) []unifiedresources.IngestRecord {
|
|
return p.GetCurrentRecordsForOrg(orgID)
|
|
}
|
|
|
|
// SupplementalChanges implements the monitor supplemental-change provider contract.
|
|
func (p *VMwarePoller) SupplementalChanges(_ *Monitor, orgID string) []unifiedresources.ResourceChange {
|
|
return p.GetCurrentChangesForOrg(orgID)
|
|
}
|
|
|
|
// SnapshotOwnedSources declares that VMware records are ingested through the
|
|
// supplemental resource path rather than legacy monitor snapshot slices.
|
|
func (p *VMwarePoller) SnapshotOwnedSources() []unifiedresources.DataSource {
|
|
return []unifiedresources.DataSource{unifiedresources.SourceVMware}
|
|
}
|
|
|
|
// SnapshotOwnedSourcesForOrg is the tenant-aware variant of SnapshotOwnedSources.
|
|
func (p *VMwarePoller) SnapshotOwnedSourcesForOrg(string) []unifiedresources.DataSource {
|
|
return []unifiedresources.DataSource{unifiedresources.SourceVMware}
|
|
}
|
|
|
|
// SupplementalInventoryReadyAt reports when the current org-scoped VMware
|
|
// inventory has reached a settled initial baseline that has to be reflected in
|
|
// the canonical monitor store before billing can consume monitored-system
|
|
// counts.
|
|
func (p *VMwarePoller) SupplementalInventoryReadyAt(_ *Monitor, orgID string) (time.Time, bool) {
|
|
if p == nil {
|
|
return time.Time{}, true
|
|
}
|
|
|
|
orgID = normalizeVMwareOrgID(orgID)
|
|
configs := p.activeConnectionConfigsForOrg(orgID)
|
|
if len(configs) == 0 {
|
|
return time.Time{}, true
|
|
}
|
|
|
|
p.mu.Lock()
|
|
defer p.mu.Unlock()
|
|
|
|
var watermark time.Time
|
|
statusByConn := p.statusByOrg[orgID]
|
|
for connID := range configs {
|
|
status := statusByConn[connID]
|
|
if status == nil || status.lastAttemptAt.IsZero() {
|
|
return time.Time{}, false
|
|
}
|
|
if status.lastAttemptAt.After(watermark) {
|
|
watermark = status.lastAttemptAt
|
|
}
|
|
}
|
|
|
|
return watermark, true
|
|
}
|
|
|
|
// GetCurrentRecordsForOrg returns the latest known VMware records for the specified org.
|
|
func (p *VMwarePoller) GetCurrentRecordsForOrg(orgID string) []unifiedresources.IngestRecord {
|
|
if p == nil {
|
|
return nil
|
|
}
|
|
|
|
orgID = strings.TrimSpace(orgID)
|
|
if orgID == "" {
|
|
orgID = "default"
|
|
}
|
|
|
|
p.mu.Lock()
|
|
defer p.mu.Unlock()
|
|
|
|
recordsByConnection := p.cachedRecordsByOrg[orgID]
|
|
if len(recordsByConnection) == 0 {
|
|
return nil
|
|
}
|
|
|
|
connectionIDs := make([]string, 0, len(recordsByConnection))
|
|
for connectionID := range recordsByConnection {
|
|
connectionIDs = append(connectionIDs, connectionID)
|
|
}
|
|
sort.Strings(connectionIDs)
|
|
|
|
total := 0
|
|
for _, connectionID := range connectionIDs {
|
|
total += len(recordsByConnection[connectionID])
|
|
}
|
|
if total == 0 {
|
|
return nil
|
|
}
|
|
|
|
records := make([]unifiedresources.IngestRecord, 0, total)
|
|
for _, connectionID := range connectionIDs {
|
|
records = append(records, cloneIngestRecords(recordsByConnection[connectionID])...)
|
|
}
|
|
return records
|
|
}
|
|
|
|
func (p *VMwarePoller) activeConnectionConfigsForOrg(orgID string) map[string]config.VMwareVCenterInstance {
|
|
orgID = normalizeVMwareOrgID(orgID)
|
|
if p == nil {
|
|
return nil
|
|
}
|
|
|
|
p.mu.Lock()
|
|
cached := cloneVMwareConnectionConfigMap(p.configsByOrg[orgID])
|
|
p.mu.Unlock()
|
|
if len(cached) > 0 || p.multiTenant == nil {
|
|
return cached
|
|
}
|
|
|
|
persistence, err := p.multiTenant.GetPersistence(orgID)
|
|
if err != nil || persistence == nil {
|
|
return cached
|
|
}
|
|
instances, err := persistence.LoadVMwareConfig()
|
|
if err != nil {
|
|
return cached
|
|
}
|
|
|
|
active := make(map[string]config.VMwareVCenterInstance)
|
|
for i := range instances {
|
|
instance := instances[i]
|
|
instance.ApplyDefaults()
|
|
if !instance.Enabled {
|
|
continue
|
|
}
|
|
connID := strings.TrimSpace(instance.ID)
|
|
if connID == "" {
|
|
continue
|
|
}
|
|
active[connID] = instance
|
|
}
|
|
return active
|
|
}
|
|
|
|
func cloneVMwareConnectionConfigMap(
|
|
in map[string]config.VMwareVCenterInstance,
|
|
) map[string]config.VMwareVCenterInstance {
|
|
if len(in) == 0 {
|
|
return nil
|
|
}
|
|
out := make(map[string]config.VMwareVCenterInstance, len(in))
|
|
for connID, instance := range in {
|
|
out[connID] = instance
|
|
}
|
|
return out
|
|
}
|
|
|
|
// GetCurrentChangesForOrg returns the latest known VMware timeline changes for the specified org.
|
|
func (p *VMwarePoller) GetCurrentChangesForOrg(orgID string) []unifiedresources.ResourceChange {
|
|
if p == nil {
|
|
return nil
|
|
}
|
|
|
|
orgID = strings.TrimSpace(orgID)
|
|
if orgID == "" {
|
|
orgID = "default"
|
|
}
|
|
|
|
p.mu.Lock()
|
|
defer p.mu.Unlock()
|
|
|
|
changesByConnection := p.cachedChangesByOrg[orgID]
|
|
if len(changesByConnection) == 0 {
|
|
return nil
|
|
}
|
|
|
|
connectionIDs := make([]string, 0, len(changesByConnection))
|
|
for connectionID := range changesByConnection {
|
|
connectionIDs = append(connectionIDs, connectionID)
|
|
}
|
|
sort.Strings(connectionIDs)
|
|
|
|
total := 0
|
|
for _, connectionID := range connectionIDs {
|
|
total += len(changesByConnection[connectionID])
|
|
}
|
|
if total == 0 {
|
|
return nil
|
|
}
|
|
|
|
changes := make([]unifiedresources.ResourceChange, 0, total)
|
|
for _, connectionID := range connectionIDs {
|
|
changes = append(changes, cloneResourceChanges(changesByConnection[connectionID])...)
|
|
}
|
|
return changes
|
|
}
|
|
|
|
func cloneResourceChanges(in []unifiedresources.ResourceChange) []unifiedresources.ResourceChange {
|
|
if in == nil {
|
|
return nil
|
|
}
|
|
out := make([]unifiedresources.ResourceChange, len(in))
|
|
for i := range in {
|
|
out[i] = in[i]
|
|
if in[i].OccurredAt != nil {
|
|
occurredAt := in[i].OccurredAt.UTC()
|
|
out[i].OccurredAt = &occurredAt
|
|
}
|
|
if in[i].RelatedResources != nil {
|
|
out[i].RelatedResources = append([]string(nil), in[i].RelatedResources...)
|
|
}
|
|
if in[i].Metadata != nil {
|
|
out[i].Metadata = make(map[string]any, len(in[i].Metadata))
|
|
for key, value := range in[i].Metadata {
|
|
out[i].Metadata[key] = value
|
|
}
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
func (p *VMwarePoller) cloneRuntimeStatusLocked(orgID string, connID string) *vmwareConnectionRuntimeSnapshot {
|
|
if p == nil {
|
|
return nil
|
|
}
|
|
byConn := p.statusByOrg[orgID]
|
|
if byConn == nil {
|
|
return nil
|
|
}
|
|
status := byConn[connID]
|
|
if status == nil {
|
|
return nil
|
|
}
|
|
return &vmwareConnectionRuntimeSnapshot{
|
|
LastAttemptAt: timePointerIfSet(status.lastAttemptAt),
|
|
LastSuccessAt: timePointerIfSet(status.lastSuccessAt),
|
|
ConsecutiveFailures: status.consecutiveFailures,
|
|
LastError: cloneVMwareConnectionPollError(status.lastError),
|
|
Observed: cloneVMwareObservedSummary(status.observed),
|
|
}
|
|
}
|
|
|
|
type vmwareConnectionRuntimeSnapshot struct {
|
|
LastAttemptAt *time.Time
|
|
LastSuccessAt *time.Time
|
|
ConsecutiveFailures int
|
|
LastError *VMwareConnectionPollError
|
|
Observed *VMwareConnectionObservedSummary
|
|
}
|
|
|
|
func (p *VMwarePoller) ensureConnectionRuntimeStatusLocked(orgID, connID string) *vmwareConnectionRuntimeStatus {
|
|
if p.statusByOrg[orgID] == nil {
|
|
p.statusByOrg[orgID] = make(map[string]*vmwareConnectionRuntimeStatus)
|
|
}
|
|
if p.statusByOrg[orgID][connID] == nil {
|
|
p.statusByOrg[orgID][connID] = &vmwareConnectionRuntimeStatus{}
|
|
}
|
|
return p.statusByOrg[orgID][connID]
|
|
}
|
|
|
|
func (p *VMwarePoller) recordConnectionSuccessLocked(orgID, connID string, at time.Time, snapshot *vmware.InventorySnapshot) *vmwareObservedTransition {
|
|
status := p.ensureConnectionRuntimeStatusLocked(orgID, connID)
|
|
status.lastAttemptAt = at
|
|
status.lastSuccessAt = at
|
|
status.lastError = nil
|
|
status.consecutiveFailures = 0
|
|
if snapshot != nil {
|
|
nextObserved := buildVMwareObservedSummary(snapshot)
|
|
nextIssueKey := summarizeVMwareObservedIssueKey(snapshot.EnrichmentIssues)
|
|
transition := classifyVMwareObservedTransition(status.observed, status.observedIssueKey, nextObserved, nextIssueKey)
|
|
status.observed = nextObserved
|
|
status.observedIssueKey = nextIssueKey
|
|
return transition
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (p *VMwarePoller) recordConnectionFailureLocked(orgID, connID string, err error, at time.Time) {
|
|
if err == nil {
|
|
return
|
|
}
|
|
status := p.ensureConnectionRuntimeStatusLocked(orgID, connID)
|
|
status.lastAttemptAt = at
|
|
status.consecutiveFailures++
|
|
status.lastError = &VMwareConnectionPollError{
|
|
At: timePointerIfSet(at),
|
|
Message: strings.TrimSpace(err.Error()),
|
|
Category: classifyVMwarePollError(err),
|
|
}
|
|
}
|
|
|
|
// RecordConnectionTestSuccess updates one saved VMware connection summary after
|
|
// a manual row-level test using the same canonical runtime health owner as the
|
|
// live poller.
|
|
func (p *VMwarePoller) RecordConnectionTestSuccess(orgID, connID string, summary *vmware.InventorySummary, at time.Time) {
|
|
if p == nil {
|
|
return
|
|
}
|
|
connID = strings.TrimSpace(connID)
|
|
if connID == "" {
|
|
return
|
|
}
|
|
orgID = normalizeVMwareOrgID(orgID)
|
|
|
|
p.mu.Lock()
|
|
defer p.mu.Unlock()
|
|
status := p.ensureConnectionRuntimeStatusLocked(orgID, connID)
|
|
status.lastAttemptAt = at
|
|
status.lastSuccessAt = at
|
|
status.lastError = nil
|
|
status.consecutiveFailures = 0
|
|
if summary != nil {
|
|
status.observed = &VMwareConnectionObservedSummary{
|
|
CollectedAt: timePointerIfSet(at),
|
|
Hosts: summary.Hosts,
|
|
VMs: summary.VMs,
|
|
Datastores: summary.Datastores,
|
|
VIRelease: strings.TrimSpace(summary.VIRelease),
|
|
}
|
|
status.observedIssueKey = ""
|
|
}
|
|
}
|
|
|
|
// RecordConnectionTestFailure updates one saved VMware connection summary after
|
|
// a manual row-level test failure while preserving previously observed
|
|
// contribution summary.
|
|
func (p *VMwarePoller) RecordConnectionTestFailure(orgID, connID string, err error, at time.Time) {
|
|
if p == nil || err == nil {
|
|
return
|
|
}
|
|
connID = strings.TrimSpace(connID)
|
|
if connID == "" {
|
|
return
|
|
}
|
|
orgID = normalizeVMwareOrgID(orgID)
|
|
|
|
p.mu.Lock()
|
|
defer p.mu.Unlock()
|
|
p.recordConnectionFailureLocked(orgID, connID, err, at)
|
|
}
|
|
|
|
func buildVMwareObservedSummary(snapshot *vmware.InventorySnapshot) *VMwareConnectionObservedSummary {
|
|
if snapshot == nil {
|
|
return nil
|
|
}
|
|
collectedAt := snapshot.CollectedAt
|
|
summary := &VMwareConnectionObservedSummary{
|
|
CollectedAt: timePointerIfSet(collectedAt),
|
|
Hosts: len(snapshot.Hosts),
|
|
VMs: len(snapshot.VMs),
|
|
Datastores: len(snapshot.Datastores),
|
|
VIRelease: strings.TrimSpace(snapshot.VIRelease),
|
|
}
|
|
if len(snapshot.EnrichmentIssues) > 0 {
|
|
summary.Degraded = true
|
|
summary.IssueCount = len(snapshot.EnrichmentIssues)
|
|
summary.Issues = summarizeVMwareObservedIssues(snapshot.EnrichmentIssues)
|
|
}
|
|
return summary
|
|
}
|
|
|
|
func classifyVMwareObservedTransition(previous *VMwareConnectionObservedSummary, previousIssueKey string, next *VMwareConnectionObservedSummary, nextIssueKey string) *vmwareObservedTransition {
|
|
prevDegraded := previous != nil && previous.Degraded
|
|
nextDegraded := next != nil && next.Degraded
|
|
|
|
switch {
|
|
case !prevDegraded && nextDegraded:
|
|
return &vmwareObservedTransition{
|
|
action: "refresh_partial",
|
|
message: "VMware poller refreshed base inventory with degraded optional enrichment",
|
|
issueCount: next.IssueCount,
|
|
}
|
|
case prevDegraded && nextDegraded && previousIssueKey != nextIssueKey:
|
|
return &vmwareObservedTransition{
|
|
action: "refresh_partial_changed",
|
|
message: "VMware poller degraded optional enrichment changed",
|
|
issueCount: next.IssueCount,
|
|
}
|
|
case prevDegraded && !nextDegraded:
|
|
return &vmwareObservedTransition{
|
|
action: "refresh_recovered",
|
|
message: "VMware poller optional enrichment recovered",
|
|
}
|
|
default:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func classifyVMwarePollError(err error) string {
|
|
if err == nil {
|
|
return ""
|
|
}
|
|
var connectionErr *vmware.ConnectionError
|
|
if errors.As(err, &connectionErr) && connectionErr != nil {
|
|
return strings.TrimSpace(connectionErr.Category)
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func normalizeVMwareOrgID(orgID string) string {
|
|
orgID = strings.TrimSpace(orgID)
|
|
if orgID == "" {
|
|
return "default"
|
|
}
|
|
return orgID
|
|
}
|
|
|
|
func cloneVMwareConnectionPollError(value *VMwareConnectionPollError) *VMwareConnectionPollError {
|
|
if value == nil {
|
|
return nil
|
|
}
|
|
return &VMwareConnectionPollError{
|
|
At: cloneTimePointer(value.At),
|
|
Message: strings.TrimSpace(value.Message),
|
|
Category: strings.TrimSpace(value.Category),
|
|
}
|
|
}
|
|
|
|
func cloneVMwareObservedSummary(value *VMwareConnectionObservedSummary) *VMwareConnectionObservedSummary {
|
|
if value == nil {
|
|
return nil
|
|
}
|
|
return &VMwareConnectionObservedSummary{
|
|
CollectedAt: cloneTimePointer(value.CollectedAt),
|
|
Hosts: value.Hosts,
|
|
VMs: value.VMs,
|
|
Datastores: value.Datastores,
|
|
VIRelease: strings.TrimSpace(value.VIRelease),
|
|
Degraded: value.Degraded,
|
|
IssueCount: value.IssueCount,
|
|
Issues: cloneVMwareObservedIssues(value.Issues),
|
|
}
|
|
}
|
|
|
|
func summarizeVMwareObservedIssues(issues []vmware.InventoryEnrichmentIssue) []VMwareConnectionObservedIssue {
|
|
if len(issues) == 0 {
|
|
return nil
|
|
}
|
|
|
|
type aggregate struct {
|
|
issue VMwareConnectionObservedIssue
|
|
}
|
|
|
|
byKey := make(map[string]*aggregate, len(issues))
|
|
for _, issue := range issues {
|
|
key := strings.ToLower(strings.TrimSpace(issue.Stage)) + "\x00" +
|
|
strings.ToLower(strings.TrimSpace(issue.Category)) + "\x00" +
|
|
strings.ToLower(strings.TrimSpace(issue.Message))
|
|
entry := byKey[key]
|
|
if entry == nil {
|
|
entry = &aggregate{
|
|
issue: VMwareConnectionObservedIssue{
|
|
Stage: strings.TrimSpace(issue.Stage),
|
|
Category: strings.TrimSpace(issue.Category),
|
|
Message: strings.TrimSpace(issue.Message),
|
|
},
|
|
}
|
|
byKey[key] = entry
|
|
}
|
|
entry.issue.Occurrences++
|
|
}
|
|
|
|
summary := make([]VMwareConnectionObservedIssue, 0, len(byKey))
|
|
for _, entry := range byKey {
|
|
summary = append(summary, entry.issue)
|
|
}
|
|
sort.Slice(summary, func(i, j int) bool {
|
|
if summary[i].Occurrences != summary[j].Occurrences {
|
|
return summary[i].Occurrences > summary[j].Occurrences
|
|
}
|
|
if summary[i].Stage != summary[j].Stage {
|
|
return summary[i].Stage < summary[j].Stage
|
|
}
|
|
if summary[i].Category != summary[j].Category {
|
|
return summary[i].Category < summary[j].Category
|
|
}
|
|
return summary[i].Message < summary[j].Message
|
|
})
|
|
if len(summary) > 3 {
|
|
summary = summary[:3]
|
|
}
|
|
return summary
|
|
}
|
|
|
|
func summarizeVMwareObservedIssueKey(issues []vmware.InventoryEnrichmentIssue) string {
|
|
if len(issues) == 0 {
|
|
return ""
|
|
}
|
|
|
|
counts := make(map[string]int, len(issues))
|
|
for _, issue := range issues {
|
|
key := strings.ToLower(strings.TrimSpace(issue.Stage)) + "\x00" +
|
|
strings.ToLower(strings.TrimSpace(issue.Category)) + "\x00" +
|
|
strings.ToLower(strings.TrimSpace(issue.Message))
|
|
counts[key]++
|
|
}
|
|
|
|
parts := make([]string, 0, len(counts))
|
|
for key, count := range counts {
|
|
parts = append(parts, key+"\x00"+strconv.Itoa(count))
|
|
}
|
|
sort.Strings(parts)
|
|
return strings.Join(parts, "\n")
|
|
}
|
|
|
|
func cloneVMwareObservedIssues(in []VMwareConnectionObservedIssue) []VMwareConnectionObservedIssue {
|
|
if in == nil {
|
|
return nil
|
|
}
|
|
out := make([]VMwareConnectionObservedIssue, len(in))
|
|
copy(out, in)
|
|
for i := range out {
|
|
out[i].Stage = strings.TrimSpace(out[i].Stage)
|
|
out[i].Category = strings.TrimSpace(out[i].Category)
|
|
out[i].Message = strings.TrimSpace(out[i].Message)
|
|
}
|
|
return out
|
|
}
|