Merge pull request #149 from safing/feature/host-metrics

Add host metrics
This commit is contained in:
Daniel 2021-12-22 11:24:45 +01:00 committed by GitHub
commit 31a7c99f74
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 433 additions and 18 deletions

View file

@ -8,6 +8,12 @@ import (
"time"
)
var (
warnLogLines = new(uint64)
errLogLines = new(uint64)
critLogLines = new(uint64)
)
func log(level Severity, msg string, tracer *ContextTracer) {
if !started.IsSet() {
@ -146,6 +152,7 @@ func Infof(format string, things ...interface{}) {
// Warning is used to log (potentially) bad events, but nothing broke (even a little) and there is no need to panic yet.
func Warning(msg string) {
atomic.AddUint64(warnLogLines, 1)
if fastcheck(WarningLevel) {
log(WarningLevel, msg, nil)
}
@ -153,6 +160,7 @@ func Warning(msg string) {
// Warningf is used to log (potentially) bad events, but nothing broke (even a little) and there is no need to panic yet.
func Warningf(format string, things ...interface{}) {
atomic.AddUint64(warnLogLines, 1)
if fastcheck(WarningLevel) {
log(WarningLevel, fmt.Sprintf(format, things...), nil)
}
@ -160,6 +168,7 @@ func Warningf(format string, things ...interface{}) {
// Error is used to log errors that break or impair functionality. The task/process may have to be aborted and tried again later. The system is still operational. Maybe User/Admin should be informed.
func Error(msg string) {
atomic.AddUint64(errLogLines, 1)
if fastcheck(ErrorLevel) {
log(ErrorLevel, msg, nil)
}
@ -167,6 +176,7 @@ func Error(msg string) {
// Errorf is used to log errors that break or impair functionality. The task/process may have to be aborted and tried again later. The system is still operational.
func Errorf(format string, things ...interface{}) {
atomic.AddUint64(errLogLines, 1)
if fastcheck(ErrorLevel) {
log(ErrorLevel, fmt.Sprintf(format, things...), nil)
}
@ -174,6 +184,7 @@ func Errorf(format string, things ...interface{}) {
// Critical is used to log events that completely break the system. Operation connot continue. User/Admin must be informed.
func Critical(msg string) {
atomic.AddUint64(critLogLines, 1)
if fastcheck(CriticalLevel) {
log(CriticalLevel, msg, nil)
}
@ -181,7 +192,26 @@ func Critical(msg string) {
// Criticalf is used to log events that completely break the system. Operation connot continue. User/Admin must be informed.
func Criticalf(format string, things ...interface{}) {
atomic.AddUint64(critLogLines, 1)
if fastcheck(CriticalLevel) {
log(CriticalLevel, fmt.Sprintf(format, things...), nil)
}
}
// TotalWarningLogLines returns the total amount of warning log lines since
// start of the program.
func TotalWarningLogLines() uint64 {
return atomic.LoadUint64(warnLogLines)
}
// TotalErrorLogLines returns the total amount of error log lines since start
// of the program.
func TotalErrorLogLines() uint64 {
return atomic.LoadUint64(errLogLines)
}
// TotalCriticalLogLines returns the total amount of critical log lines since
// start of the program.
func TotalCriticalLogLines() uint64 {
return atomic.LoadUint64(critLogLines)
}

View file

@ -120,7 +120,7 @@ func writeMetricsTo(ctx context.Context, url string) error {
func metricsWriter(ctx context.Context) error {
pushURL := pushOption()
ticker := time.NewTicker(10 * time.Second)
ticker := time.NewTicker(1 * time.Minute)
defer ticker.Stop()
for {

View file

@ -2,11 +2,13 @@ package metrics
import (
"flag"
"os"
"strings"
"github.com/safing/portbase/config"
)
// Configuration Keys
// Configuration Keys.
var (
CfgOptionInstanceKey = "core/metrics/instance"
instanceOption config.StringOption
@ -16,13 +18,22 @@ var (
pushOption config.StringOption
cfgOptionPushOrder = 0
pushFlag string
instanceFlag string
pushFlag string
instanceFlag string
defaultInstance string
)
func init() {
hostname, err := os.Hostname()
if err == nil {
hostname = strings.ReplaceAll(hostname, "-", "")
if prometheusFormat.MatchString(hostname) {
defaultInstance = hostname
}
}
flag.StringVar(&pushFlag, "push-metrics", "", "set default URL to push prometheus metrics to")
flag.StringVar(&instanceFlag, "metrics-instance", "", "set the default global instance label")
flag.StringVar(&instanceFlag, "metrics-instance", defaultInstance, "set the default global instance label")
}
func prepConfig() error {

View file

@ -7,16 +7,18 @@ import (
"sort"
"strings"
vm "github.com/VictoriaMetrics/metrics"
"github.com/safing/portbase/api"
"github.com/safing/portbase/config"
vm "github.com/VictoriaMetrics/metrics"
)
// PrometheusFormatRequirement is required format defined by prometheus for
// metric and label names.
const prometheusBaseFormt = "[a-zA-Z_][a-zA-Z0-9_]*"
const PrometheusFormatRequirement = "^" + prometheusBaseFormt + "$"
const (
prometheusBaseFormt = "[a-zA-Z_][a-zA-Z0-9_]*"
PrometheusFormatRequirement = "^" + prometheusBaseFormt + "$"
)
var prometheusFormat = regexp.MustCompile(PrometheusFormatRequirement)

View file

@ -0,0 +1,57 @@
package metrics
import (
"fmt"
"io"
vm "github.com/VictoriaMetrics/metrics"
)
// FetchingCounter is a counter metric that fetches the values via a function call.
type FetchingCounter struct {
*metricBase
counter *vm.Counter
fetchCnt func() uint64
}
// NewFetchingCounter registers a new fetching counter metric.
func NewFetchingCounter(id string, labels map[string]string, fn func() uint64, opts *Options) (*FetchingCounter, error) {
// Check if a fetch function is provided.
if fn == nil {
return nil, fmt.Errorf("%w: no fetch function provided", ErrInvalidOptions)
}
// Ensure that there are options.
if opts == nil {
opts = &Options{}
}
// Make base.
base, err := newMetricBase(id, labels, *opts)
if err != nil {
return nil, err
}
// Create metric struct.
m := &FetchingCounter{
metricBase: base,
fetchCnt: fn,
}
// Create metric in set
m.counter = m.set.NewCounter(m.LabeledID())
// Register metric.
err = register(m)
if err != nil {
return nil, err
}
return m, nil
}
// WritePrometheus writes the metric in the prometheus format to the given writer.
func (fc *FetchingCounter) WritePrometheus(w io.Writer) {
fc.counter.Set(fc.fetchCnt())
fc.metricBase.set.WritePrometheus(w)
}

252
metrics/metrics_host.go Normal file
View file

@ -0,0 +1,252 @@
package metrics
import (
"runtime"
"sync"
"time"
"github.com/shirou/gopsutil/disk"
"github.com/shirou/gopsutil/load"
"github.com/shirou/gopsutil/mem"
"github.com/safing/portbase/api"
"github.com/safing/portbase/dataroot"
"github.com/safing/portbase/log"
)
const hostStatTTL = 1 * time.Second
func registeHostMetrics() (err error) {
// Register load average metrics.
_, err = NewGauge("host/load/avg/1", nil, getFloat64HostStat(LoadAvg1), &Options{Name: "Host Load Avg 1min", Permission: api.PermitUser})
if err != nil {
return err
}
_, err = NewGauge("host/load/avg/5", nil, getFloat64HostStat(LoadAvg5), &Options{Name: "Host Load Avg 5min", Permission: api.PermitUser})
if err != nil {
return err
}
_, err = NewGauge("host/load/avg/15", nil, getFloat64HostStat(LoadAvg15), &Options{Name: "Host Load Avg 15min", Permission: api.PermitUser})
if err != nil {
return err
}
// Register memory usage metrics.
_, err = NewGauge("host/mem/total", nil, getUint64HostStat(MemTotal), &Options{Name: "Host Memory Total", Permission: api.PermitUser})
if err != nil {
return err
}
_, err = NewGauge("host/mem/used", nil, getUint64HostStat(MemUsed), &Options{Name: "Host Memory Used", Permission: api.PermitUser})
if err != nil {
return err
}
_, err = NewGauge("host/mem/available", nil, getUint64HostStat(MemAvailable), &Options{Name: "Host Memory Available", Permission: api.PermitUser})
if err != nil {
return err
}
_, err = NewGauge("host/mem/used/percent", nil, getFloat64HostStat(MemUsedPercent), &Options{Name: "Host Memory Used in Percent", Permission: api.PermitUser})
if err != nil {
return err
}
// Register disk usage metrics.
_, err = NewGauge("host/disk/total", nil, getUint64HostStat(DiskTotal), &Options{Name: "Host Disk Total", Permission: api.PermitUser})
if err != nil {
return err
}
_, err = NewGauge("host/disk/used", nil, getUint64HostStat(DiskUsed), &Options{Name: "Host Disk Used", Permission: api.PermitUser})
if err != nil {
return err
}
_, err = NewGauge("host/disk/free", nil, getUint64HostStat(DiskFree), &Options{Name: "Host Disk Free", Permission: api.PermitUser})
if err != nil {
return err
}
_, err = NewGauge("host/disk/used/percent", nil, getFloat64HostStat(DiskUsedPercent), &Options{Name: "Host Disk Used in Percent", Permission: api.PermitUser})
if err != nil {
return err
}
return nil
}
func getUint64HostStat(getStat func() (uint64, bool)) func() float64 {
return func() float64 {
val, _ := getStat()
return float64(val)
}
}
func getFloat64HostStat(getStat func() (float64, bool)) func() float64 {
return func() float64 {
val, _ := getStat()
return val
}
}
var (
loadAvg *load.AvgStat
loadAvgExpires time.Time
loadAvgLock sync.Mutex
)
func getLoadAvg() *load.AvgStat {
loadAvgLock.Lock()
defer loadAvgLock.Unlock()
// Return cache if still valid.
if time.Now().Before(loadAvgExpires) {
return loadAvg
}
// Refresh.
var err error
loadAvg, err = load.Avg()
if err != nil {
log.Warningf("metrics: failed to get load avg: %s", err)
loadAvg = nil
}
loadAvgExpires = time.Now().Add(hostStatTTL)
return loadAvg
}
func LoadAvg1() (loadAvg float64, ok bool) {
if stat := getLoadAvg(); stat != nil {
return stat.Load1 / float64(runtime.NumCPU()), true
}
return 0, false
}
func LoadAvg5() (loadAvg float64, ok bool) {
if stat := getLoadAvg(); stat != nil {
return stat.Load5 / float64(runtime.NumCPU()), true
}
return 0, false
}
func LoadAvg15() (loadAvg float64, ok bool) {
if stat := getLoadAvg(); stat != nil {
return stat.Load15 / float64(runtime.NumCPU()), true
}
return 0, false
}
var (
memStat *mem.VirtualMemoryStat
memStatExpires time.Time
memStatLock sync.Mutex
)
func getMemStat() *mem.VirtualMemoryStat {
memStatLock.Lock()
defer memStatLock.Unlock()
// Return cache if still valid.
if time.Now().Before(memStatExpires) {
return memStat
}
// Refresh.
var err error
memStat, err = mem.VirtualMemory()
if err != nil {
log.Warningf("metrics: failed to get load avg: %s", err)
memStat = nil
}
memStatExpires = time.Now().Add(hostStatTTL)
return memStat
}
func MemTotal() (total uint64, ok bool) {
if stat := getMemStat(); stat != nil {
return stat.Total, true
}
return 0, false
}
func MemUsed() (used uint64, ok bool) {
if stat := getMemStat(); stat != nil {
return stat.Used, true
}
return 0, false
}
func MemAvailable() (available uint64, ok bool) {
if stat := getMemStat(); stat != nil {
return stat.Available, true
}
return 0, false
}
func MemUsedPercent() (usedPercent float64, ok bool) {
if stat := getMemStat(); stat != nil {
return stat.UsedPercent, true
}
return 0, false
}
var (
diskStat *disk.UsageStat
diskStatExpires time.Time
diskStatLock sync.Mutex
)
func getDiskStat() *disk.UsageStat {
diskStatLock.Lock()
defer diskStatLock.Unlock()
// Return cache if still valid.
if time.Now().Before(diskStatExpires) {
return diskStat
}
// Check if we have a data root.
dataRoot := dataroot.Root()
if dataRoot == nil {
log.Warning("metrics: cannot get disk stats without data root")
diskStat = nil
diskStatExpires = time.Now().Add(hostStatTTL)
return diskStat
}
// Refresh.
var err error
diskStat, err = disk.Usage(dataRoot.Path)
if err != nil {
log.Warningf("metrics: failed to get load avg: %s", err)
diskStat = nil
}
diskStatExpires = time.Now().Add(hostStatTTL)
return diskStat
}
func DiskTotal() (total uint64, ok bool) {
if stat := getDiskStat(); stat != nil {
return stat.Total, true
}
return 0, false
}
func DiskUsed() (used uint64, ok bool) {
if stat := getDiskStat(); stat != nil {
return stat.Used, true
}
return 0, false
}
func DiskFree() (free uint64, ok bool) {
if stat := getDiskStat(); stat != nil {
return stat.Free, true
}
return 0, false
}
func DiskUsedPercent() (usedPercent float64, ok bool) {
if stat := getDiskStat(); stat != nil {
return stat.UsedPercent, true
}
return 0, false
}

49
metrics/metrics_logs.go Normal file
View file

@ -0,0 +1,49 @@
package metrics
import (
"github.com/safing/portbase/api"
"github.com/safing/portbase/log"
)
func registeLogMetrics() (err error) {
_, err = NewFetchingCounter(
"logs/warning/total",
nil,
log.TotalWarningLogLines,
&Options{
Name: "Total Warning Log Lines",
Permission: api.PermitUser,
},
)
if err != nil {
return err
}
_, err = NewFetchingCounter(
"logs/error/total",
nil,
log.TotalErrorLogLines,
&Options{
Name: "Total Error Log Lines",
Permission: api.PermitUser,
},
)
if err != nil {
return err
}
_, err = NewFetchingCounter(
"logs/critical/total",
nil,
log.TotalCriticalLogLines,
&Options{
Name: "Total Critical Log Lines",
Permission: api.PermitUser,
},
)
if err != nil {
return err
}
return nil
}

View file

@ -4,6 +4,7 @@ import (
"io"
vm "github.com/VictoriaMetrics/metrics"
"github.com/safing/portbase/api"
"github.com/safing/portbase/config"
)

View file

@ -29,10 +29,13 @@ var (
// ErrAlreadySet is returned when a value is already set and cannot be changed.
ErrAlreadySet = errors.New("already set")
// ErrInvalidOptions is returned when invalid options where provided.
ErrInvalidOptions = errors.New("invalid options")
)
func init() {
module = modules.Register("metrics", prep, start, stop, "database", "api")
module = modules.Register("metrics", prep, start, stop, "config", "database", "api")
}
func prep() error {
@ -55,6 +58,14 @@ func start() error {
return err
}
if err := registeHostMetrics(); err != nil {
return err
}
if err := registeLogMetrics(); err != nil {
return err
}
if err := registerAPI(); err != nil {
return err
}

View file

@ -6,10 +6,11 @@ import (
"sync"
"time"
"github.com/tevino/abool"
"github.com/safing/portbase/database"
"github.com/safing/portbase/database/record"
"github.com/safing/portbase/log"
"github.com/tevino/abool"
)
var (
@ -96,6 +97,7 @@ func storePersistentMetrics() {
// Create new storage.
newStorage := &metricsStorage{
// TODO: This timestamp should be taken from previous save, if possible.
Start: time.Now(),
Counters: make(map[string]uint64),
}
@ -134,18 +136,18 @@ func getMetricsStorage(key string) (*metricsStorage, error) {
// unwrap
if r.IsWrapped() {
// only allocate a new struct, if we need it
new := &metricsStorage{}
err = record.Unwrap(r, new)
newStorage := &metricsStorage{}
err = record.Unwrap(r, newStorage)
if err != nil {
return nil, err
}
return new, nil
return newStorage, nil
}
// or adjust type
new, ok := r.(*metricsStorage)
newStorage, ok := r.(*metricsStorage)
if !ok {
return nil, fmt.Errorf("record not of type *metricsStorage, but %T", r)
}
return new, nil
return newStorage, nil
}

View file

@ -106,9 +106,9 @@ func Start() error {
// complete startup
if moduleMgmtEnabled.IsSet() {
log.Info("modules: initiated subsystems manager")
log.Info("modules: started enabled modules")
} else {
log.Infof("modules: started %d modules", len(modules))
log.Infof("modules: started all %d modules", len(modules))
}
go taskQueueHandler()