mirror of
https://github.com/safing/portbase
synced 2025-09-01 18:19:57 +00:00
Merge pull request #149 from safing/feature/host-metrics
Add host metrics
This commit is contained in:
commit
31a7c99f74
12 changed files with 433 additions and 18 deletions
30
log/input.go
30
log/input.go
|
@ -8,6 +8,12 @@ import (
|
|||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
warnLogLines = new(uint64)
|
||||
errLogLines = new(uint64)
|
||||
critLogLines = new(uint64)
|
||||
)
|
||||
|
||||
func log(level Severity, msg string, tracer *ContextTracer) {
|
||||
|
||||
if !started.IsSet() {
|
||||
|
@ -146,6 +152,7 @@ func Infof(format string, things ...interface{}) {
|
|||
|
||||
// Warning is used to log (potentially) bad events, but nothing broke (even a little) and there is no need to panic yet.
|
||||
func Warning(msg string) {
|
||||
atomic.AddUint64(warnLogLines, 1)
|
||||
if fastcheck(WarningLevel) {
|
||||
log(WarningLevel, msg, nil)
|
||||
}
|
||||
|
@ -153,6 +160,7 @@ func Warning(msg string) {
|
|||
|
||||
// Warningf is used to log (potentially) bad events, but nothing broke (even a little) and there is no need to panic yet.
|
||||
func Warningf(format string, things ...interface{}) {
|
||||
atomic.AddUint64(warnLogLines, 1)
|
||||
if fastcheck(WarningLevel) {
|
||||
log(WarningLevel, fmt.Sprintf(format, things...), nil)
|
||||
}
|
||||
|
@ -160,6 +168,7 @@ func Warningf(format string, things ...interface{}) {
|
|||
|
||||
// Error is used to log errors that break or impair functionality. The task/process may have to be aborted and tried again later. The system is still operational. Maybe User/Admin should be informed.
|
||||
func Error(msg string) {
|
||||
atomic.AddUint64(errLogLines, 1)
|
||||
if fastcheck(ErrorLevel) {
|
||||
log(ErrorLevel, msg, nil)
|
||||
}
|
||||
|
@ -167,6 +176,7 @@ func Error(msg string) {
|
|||
|
||||
// Errorf is used to log errors that break or impair functionality. The task/process may have to be aborted and tried again later. The system is still operational.
|
||||
func Errorf(format string, things ...interface{}) {
|
||||
atomic.AddUint64(errLogLines, 1)
|
||||
if fastcheck(ErrorLevel) {
|
||||
log(ErrorLevel, fmt.Sprintf(format, things...), nil)
|
||||
}
|
||||
|
@ -174,6 +184,7 @@ func Errorf(format string, things ...interface{}) {
|
|||
|
||||
// Critical is used to log events that completely break the system. Operation connot continue. User/Admin must be informed.
|
||||
func Critical(msg string) {
|
||||
atomic.AddUint64(critLogLines, 1)
|
||||
if fastcheck(CriticalLevel) {
|
||||
log(CriticalLevel, msg, nil)
|
||||
}
|
||||
|
@ -181,7 +192,26 @@ func Critical(msg string) {
|
|||
|
||||
// Criticalf is used to log events that completely break the system. Operation connot continue. User/Admin must be informed.
|
||||
func Criticalf(format string, things ...interface{}) {
|
||||
atomic.AddUint64(critLogLines, 1)
|
||||
if fastcheck(CriticalLevel) {
|
||||
log(CriticalLevel, fmt.Sprintf(format, things...), nil)
|
||||
}
|
||||
}
|
||||
|
||||
// TotalWarningLogLines returns the total amount of warning log lines since
|
||||
// start of the program.
|
||||
func TotalWarningLogLines() uint64 {
|
||||
return atomic.LoadUint64(warnLogLines)
|
||||
}
|
||||
|
||||
// TotalErrorLogLines returns the total amount of error log lines since start
|
||||
// of the program.
|
||||
func TotalErrorLogLines() uint64 {
|
||||
return atomic.LoadUint64(errLogLines)
|
||||
}
|
||||
|
||||
// TotalCriticalLogLines returns the total amount of critical log lines since
|
||||
// start of the program.
|
||||
func TotalCriticalLogLines() uint64 {
|
||||
return atomic.LoadUint64(critLogLines)
|
||||
}
|
||||
|
|
|
@ -120,7 +120,7 @@ func writeMetricsTo(ctx context.Context, url string) error {
|
|||
|
||||
func metricsWriter(ctx context.Context) error {
|
||||
pushURL := pushOption()
|
||||
ticker := time.NewTicker(10 * time.Second)
|
||||
ticker := time.NewTicker(1 * time.Minute)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
|
|
|
@ -2,11 +2,13 @@ package metrics
|
|||
|
||||
import (
|
||||
"flag"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/safing/portbase/config"
|
||||
)
|
||||
|
||||
// Configuration Keys
|
||||
// Configuration Keys.
|
||||
var (
|
||||
CfgOptionInstanceKey = "core/metrics/instance"
|
||||
instanceOption config.StringOption
|
||||
|
@ -16,13 +18,22 @@ var (
|
|||
pushOption config.StringOption
|
||||
cfgOptionPushOrder = 0
|
||||
|
||||
pushFlag string
|
||||
instanceFlag string
|
||||
pushFlag string
|
||||
instanceFlag string
|
||||
defaultInstance string
|
||||
)
|
||||
|
||||
func init() {
|
||||
hostname, err := os.Hostname()
|
||||
if err == nil {
|
||||
hostname = strings.ReplaceAll(hostname, "-", "")
|
||||
if prometheusFormat.MatchString(hostname) {
|
||||
defaultInstance = hostname
|
||||
}
|
||||
}
|
||||
|
||||
flag.StringVar(&pushFlag, "push-metrics", "", "set default URL to push prometheus metrics to")
|
||||
flag.StringVar(&instanceFlag, "metrics-instance", "", "set the default global instance label")
|
||||
flag.StringVar(&instanceFlag, "metrics-instance", defaultInstance, "set the default global instance label")
|
||||
}
|
||||
|
||||
func prepConfig() error {
|
||||
|
|
|
@ -7,16 +7,18 @@ import (
|
|||
"sort"
|
||||
"strings"
|
||||
|
||||
vm "github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/safing/portbase/api"
|
||||
"github.com/safing/portbase/config"
|
||||
|
||||
vm "github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
// PrometheusFormatRequirement is required format defined by prometheus for
|
||||
// metric and label names.
|
||||
const prometheusBaseFormt = "[a-zA-Z_][a-zA-Z0-9_]*"
|
||||
const PrometheusFormatRequirement = "^" + prometheusBaseFormt + "$"
|
||||
const (
|
||||
prometheusBaseFormt = "[a-zA-Z_][a-zA-Z0-9_]*"
|
||||
PrometheusFormatRequirement = "^" + prometheusBaseFormt + "$"
|
||||
)
|
||||
|
||||
var prometheusFormat = regexp.MustCompile(PrometheusFormatRequirement)
|
||||
|
||||
|
|
57
metrics/metric_counter_fetching.go
Normal file
57
metrics/metric_counter_fetching.go
Normal file
|
@ -0,0 +1,57 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
vm "github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
// FetchingCounter is a counter metric that fetches the values via a function call.
|
||||
type FetchingCounter struct {
|
||||
*metricBase
|
||||
counter *vm.Counter
|
||||
fetchCnt func() uint64
|
||||
}
|
||||
|
||||
// NewFetchingCounter registers a new fetching counter metric.
|
||||
func NewFetchingCounter(id string, labels map[string]string, fn func() uint64, opts *Options) (*FetchingCounter, error) {
|
||||
// Check if a fetch function is provided.
|
||||
if fn == nil {
|
||||
return nil, fmt.Errorf("%w: no fetch function provided", ErrInvalidOptions)
|
||||
}
|
||||
|
||||
// Ensure that there are options.
|
||||
if opts == nil {
|
||||
opts = &Options{}
|
||||
}
|
||||
|
||||
// Make base.
|
||||
base, err := newMetricBase(id, labels, *opts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create metric struct.
|
||||
m := &FetchingCounter{
|
||||
metricBase: base,
|
||||
fetchCnt: fn,
|
||||
}
|
||||
|
||||
// Create metric in set
|
||||
m.counter = m.set.NewCounter(m.LabeledID())
|
||||
|
||||
// Register metric.
|
||||
err = register(m)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// WritePrometheus writes the metric in the prometheus format to the given writer.
|
||||
func (fc *FetchingCounter) WritePrometheus(w io.Writer) {
|
||||
fc.counter.Set(fc.fetchCnt())
|
||||
fc.metricBase.set.WritePrometheus(w)
|
||||
}
|
252
metrics/metrics_host.go
Normal file
252
metrics/metrics_host.go
Normal file
|
@ -0,0 +1,252 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/shirou/gopsutil/disk"
|
||||
"github.com/shirou/gopsutil/load"
|
||||
"github.com/shirou/gopsutil/mem"
|
||||
|
||||
"github.com/safing/portbase/api"
|
||||
"github.com/safing/portbase/dataroot"
|
||||
"github.com/safing/portbase/log"
|
||||
)
|
||||
|
||||
const hostStatTTL = 1 * time.Second
|
||||
|
||||
func registeHostMetrics() (err error) {
|
||||
// Register load average metrics.
|
||||
_, err = NewGauge("host/load/avg/1", nil, getFloat64HostStat(LoadAvg1), &Options{Name: "Host Load Avg 1min", Permission: api.PermitUser})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = NewGauge("host/load/avg/5", nil, getFloat64HostStat(LoadAvg5), &Options{Name: "Host Load Avg 5min", Permission: api.PermitUser})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = NewGauge("host/load/avg/15", nil, getFloat64HostStat(LoadAvg15), &Options{Name: "Host Load Avg 15min", Permission: api.PermitUser})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Register memory usage metrics.
|
||||
_, err = NewGauge("host/mem/total", nil, getUint64HostStat(MemTotal), &Options{Name: "Host Memory Total", Permission: api.PermitUser})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = NewGauge("host/mem/used", nil, getUint64HostStat(MemUsed), &Options{Name: "Host Memory Used", Permission: api.PermitUser})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = NewGauge("host/mem/available", nil, getUint64HostStat(MemAvailable), &Options{Name: "Host Memory Available", Permission: api.PermitUser})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = NewGauge("host/mem/used/percent", nil, getFloat64HostStat(MemUsedPercent), &Options{Name: "Host Memory Used in Percent", Permission: api.PermitUser})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Register disk usage metrics.
|
||||
_, err = NewGauge("host/disk/total", nil, getUint64HostStat(DiskTotal), &Options{Name: "Host Disk Total", Permission: api.PermitUser})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = NewGauge("host/disk/used", nil, getUint64HostStat(DiskUsed), &Options{Name: "Host Disk Used", Permission: api.PermitUser})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = NewGauge("host/disk/free", nil, getUint64HostStat(DiskFree), &Options{Name: "Host Disk Free", Permission: api.PermitUser})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = NewGauge("host/disk/used/percent", nil, getFloat64HostStat(DiskUsedPercent), &Options{Name: "Host Disk Used in Percent", Permission: api.PermitUser})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getUint64HostStat(getStat func() (uint64, bool)) func() float64 {
|
||||
return func() float64 {
|
||||
val, _ := getStat()
|
||||
return float64(val)
|
||||
}
|
||||
}
|
||||
|
||||
func getFloat64HostStat(getStat func() (float64, bool)) func() float64 {
|
||||
return func() float64 {
|
||||
val, _ := getStat()
|
||||
return val
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
loadAvg *load.AvgStat
|
||||
loadAvgExpires time.Time
|
||||
loadAvgLock sync.Mutex
|
||||
)
|
||||
|
||||
func getLoadAvg() *load.AvgStat {
|
||||
loadAvgLock.Lock()
|
||||
defer loadAvgLock.Unlock()
|
||||
|
||||
// Return cache if still valid.
|
||||
if time.Now().Before(loadAvgExpires) {
|
||||
return loadAvg
|
||||
}
|
||||
|
||||
// Refresh.
|
||||
var err error
|
||||
loadAvg, err = load.Avg()
|
||||
if err != nil {
|
||||
log.Warningf("metrics: failed to get load avg: %s", err)
|
||||
loadAvg = nil
|
||||
}
|
||||
loadAvgExpires = time.Now().Add(hostStatTTL)
|
||||
|
||||
return loadAvg
|
||||
}
|
||||
|
||||
func LoadAvg1() (loadAvg float64, ok bool) {
|
||||
if stat := getLoadAvg(); stat != nil {
|
||||
return stat.Load1 / float64(runtime.NumCPU()), true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
func LoadAvg5() (loadAvg float64, ok bool) {
|
||||
if stat := getLoadAvg(); stat != nil {
|
||||
return stat.Load5 / float64(runtime.NumCPU()), true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
func LoadAvg15() (loadAvg float64, ok bool) {
|
||||
if stat := getLoadAvg(); stat != nil {
|
||||
return stat.Load15 / float64(runtime.NumCPU()), true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
var (
|
||||
memStat *mem.VirtualMemoryStat
|
||||
memStatExpires time.Time
|
||||
memStatLock sync.Mutex
|
||||
)
|
||||
|
||||
func getMemStat() *mem.VirtualMemoryStat {
|
||||
memStatLock.Lock()
|
||||
defer memStatLock.Unlock()
|
||||
|
||||
// Return cache if still valid.
|
||||
if time.Now().Before(memStatExpires) {
|
||||
return memStat
|
||||
}
|
||||
|
||||
// Refresh.
|
||||
var err error
|
||||
memStat, err = mem.VirtualMemory()
|
||||
if err != nil {
|
||||
log.Warningf("metrics: failed to get load avg: %s", err)
|
||||
memStat = nil
|
||||
}
|
||||
memStatExpires = time.Now().Add(hostStatTTL)
|
||||
|
||||
return memStat
|
||||
}
|
||||
|
||||
func MemTotal() (total uint64, ok bool) {
|
||||
if stat := getMemStat(); stat != nil {
|
||||
return stat.Total, true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
func MemUsed() (used uint64, ok bool) {
|
||||
if stat := getMemStat(); stat != nil {
|
||||
return stat.Used, true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
func MemAvailable() (available uint64, ok bool) {
|
||||
if stat := getMemStat(); stat != nil {
|
||||
return stat.Available, true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
func MemUsedPercent() (usedPercent float64, ok bool) {
|
||||
if stat := getMemStat(); stat != nil {
|
||||
return stat.UsedPercent, true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
var (
|
||||
diskStat *disk.UsageStat
|
||||
diskStatExpires time.Time
|
||||
diskStatLock sync.Mutex
|
||||
)
|
||||
|
||||
func getDiskStat() *disk.UsageStat {
|
||||
diskStatLock.Lock()
|
||||
defer diskStatLock.Unlock()
|
||||
|
||||
// Return cache if still valid.
|
||||
if time.Now().Before(diskStatExpires) {
|
||||
return diskStat
|
||||
}
|
||||
|
||||
// Check if we have a data root.
|
||||
dataRoot := dataroot.Root()
|
||||
if dataRoot == nil {
|
||||
log.Warning("metrics: cannot get disk stats without data root")
|
||||
diskStat = nil
|
||||
diskStatExpires = time.Now().Add(hostStatTTL)
|
||||
return diskStat
|
||||
}
|
||||
|
||||
// Refresh.
|
||||
var err error
|
||||
diskStat, err = disk.Usage(dataRoot.Path)
|
||||
if err != nil {
|
||||
log.Warningf("metrics: failed to get load avg: %s", err)
|
||||
diskStat = nil
|
||||
}
|
||||
diskStatExpires = time.Now().Add(hostStatTTL)
|
||||
|
||||
return diskStat
|
||||
}
|
||||
|
||||
func DiskTotal() (total uint64, ok bool) {
|
||||
if stat := getDiskStat(); stat != nil {
|
||||
return stat.Total, true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
func DiskUsed() (used uint64, ok bool) {
|
||||
if stat := getDiskStat(); stat != nil {
|
||||
return stat.Used, true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
func DiskFree() (free uint64, ok bool) {
|
||||
if stat := getDiskStat(); stat != nil {
|
||||
return stat.Free, true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
func DiskUsedPercent() (usedPercent float64, ok bool) {
|
||||
if stat := getDiskStat(); stat != nil {
|
||||
return stat.UsedPercent, true
|
||||
}
|
||||
return 0, false
|
||||
}
|
49
metrics/metrics_logs.go
Normal file
49
metrics/metrics_logs.go
Normal file
|
@ -0,0 +1,49 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"github.com/safing/portbase/api"
|
||||
"github.com/safing/portbase/log"
|
||||
)
|
||||
|
||||
func registeLogMetrics() (err error) {
|
||||
_, err = NewFetchingCounter(
|
||||
"logs/warning/total",
|
||||
nil,
|
||||
log.TotalWarningLogLines,
|
||||
&Options{
|
||||
Name: "Total Warning Log Lines",
|
||||
Permission: api.PermitUser,
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = NewFetchingCounter(
|
||||
"logs/error/total",
|
||||
nil,
|
||||
log.TotalErrorLogLines,
|
||||
&Options{
|
||||
Name: "Total Error Log Lines",
|
||||
Permission: api.PermitUser,
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = NewFetchingCounter(
|
||||
"logs/critical/total",
|
||||
nil,
|
||||
log.TotalCriticalLogLines,
|
||||
&Options{
|
||||
Name: "Total Critical Log Lines",
|
||||
Permission: api.PermitUser,
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -4,6 +4,7 @@ import (
|
|||
"io"
|
||||
|
||||
vm "github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/safing/portbase/api"
|
||||
"github.com/safing/portbase/config"
|
||||
)
|
|
@ -29,10 +29,13 @@ var (
|
|||
|
||||
// ErrAlreadySet is returned when a value is already set and cannot be changed.
|
||||
ErrAlreadySet = errors.New("already set")
|
||||
|
||||
// ErrInvalidOptions is returned when invalid options where provided.
|
||||
ErrInvalidOptions = errors.New("invalid options")
|
||||
)
|
||||
|
||||
func init() {
|
||||
module = modules.Register("metrics", prep, start, stop, "database", "api")
|
||||
module = modules.Register("metrics", prep, start, stop, "config", "database", "api")
|
||||
}
|
||||
|
||||
func prep() error {
|
||||
|
@ -55,6 +58,14 @@ func start() error {
|
|||
return err
|
||||
}
|
||||
|
||||
if err := registeHostMetrics(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := registeLogMetrics(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := registerAPI(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -6,10 +6,11 @@ import (
|
|||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/tevino/abool"
|
||||
|
||||
"github.com/safing/portbase/database"
|
||||
"github.com/safing/portbase/database/record"
|
||||
"github.com/safing/portbase/log"
|
||||
"github.com/tevino/abool"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -96,6 +97,7 @@ func storePersistentMetrics() {
|
|||
|
||||
// Create new storage.
|
||||
newStorage := &metricsStorage{
|
||||
// TODO: This timestamp should be taken from previous save, if possible.
|
||||
Start: time.Now(),
|
||||
Counters: make(map[string]uint64),
|
||||
}
|
||||
|
@ -134,18 +136,18 @@ func getMetricsStorage(key string) (*metricsStorage, error) {
|
|||
// unwrap
|
||||
if r.IsWrapped() {
|
||||
// only allocate a new struct, if we need it
|
||||
new := &metricsStorage{}
|
||||
err = record.Unwrap(r, new)
|
||||
newStorage := &metricsStorage{}
|
||||
err = record.Unwrap(r, newStorage)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return new, nil
|
||||
return newStorage, nil
|
||||
}
|
||||
|
||||
// or adjust type
|
||||
new, ok := r.(*metricsStorage)
|
||||
newStorage, ok := r.(*metricsStorage)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("record not of type *metricsStorage, but %T", r)
|
||||
}
|
||||
return new, nil
|
||||
return newStorage, nil
|
||||
}
|
||||
|
|
|
@ -106,9 +106,9 @@ func Start() error {
|
|||
|
||||
// complete startup
|
||||
if moduleMgmtEnabled.IsSet() {
|
||||
log.Info("modules: initiated subsystems manager")
|
||||
log.Info("modules: started enabled modules")
|
||||
} else {
|
||||
log.Infof("modules: started %d modules", len(modules))
|
||||
log.Infof("modules: started all %d modules", len(modules))
|
||||
}
|
||||
|
||||
go taskQueueHandler()
|
||||
|
|
Loading…
Add table
Reference in a new issue