safing-portmaster/service/resolver/failing.go
Daniel Hååvi 80664d1a27
Restructure modules ()
* Move portbase into monorepo

* Add new simple module mgr

* [WIP] Switch to new simple module mgr

* Add StateMgr and more worker variants

* [WIP] Switch more modules

* [WIP] Switch more modules

* [WIP] swtich more modules

* [WIP] switch all SPN modules

* [WIP] switch all service modules

* [WIP] Convert all workers to the new module system

* [WIP] add new task system to module manager

* [WIP] Add second take for scheduling workers

* [WIP] Add FIXME for bugs in new scheduler

* [WIP] Add minor improvements to scheduler

* [WIP] Add new worker scheduler

* [WIP] Fix more bug related to new module system

* [WIP] Fix start handing of the new module system

* [WIP] Improve startup process

* [WIP] Fix minor issues

* [WIP] Fix missing subsystem in settings

* [WIP] Initialize managers in constructor

* [WIP] Move module event initialization to constrictors

* [WIP] Fix setting for enabling and disabling the SPN module

* [WIP] Move API registeration into module construction

* [WIP] Update states mgr for all modules

* [WIP] Add CmdLine operation support

* Add state helper methods to module group and instance

* Add notification and module status handling to status package

* Fix starting issues

* Remove pilot widget and update security lock to new status data

* Remove debug logs

* Improve http server shutdown

* Add workaround for cleanly shutting down firewall+netquery

* Improve logging

* Add syncing states with notifications for new module system

* Improve starting, stopping, shutdown; resolve FIXMEs/TODOs

* [WIP] Fix most unit tests

* Review new module system and fix minor issues

* Push shutdown and restart events again via API

* Set sleep mode via interface

* Update example/template module

* [WIP] Fix spn/cabin unit test

* Remove deprecated UI elements

* Make log output more similar for the logging transition phase

* Switch spn hub and observer cmds to new module system

* Fix log sources

* Make worker mgr less error prone

* Fix tests and minor issues

* Fix observation hub

* Improve shutdown and restart handling

* Split up big connection.go source file

* Move varint and dsd packages to structures repo

* Improve expansion test

* Fix linter warnings

* Fix interception module on windows

* Fix linter errors

---------

Co-authored-by: Vladimir Stoilov <vladimir@safing.io>
2024-08-09 18:15:48 +03:00

122 lines
3.2 KiB
Go

package resolver
import (
"time"
"github.com/safing/portmaster/base/log"
"github.com/safing/portmaster/service/mgr"
"github.com/safing/portmaster/service/netenv"
)
var (
// FailThreshold is amount of errors a resolvers must experience in order to be regarded as failed.
FailThreshold = 5
// FailObserveDuration is the duration in which failures are counted in order to mark a resolver as failed.
FailObserveDuration = time.Duration(FailThreshold) * 10 * time.Second
)
// IsFailing returns if this resolver is currently failing.
func (brc *BasicResolverConn) IsFailing() bool {
return brc.failing.IsSet()
}
// ReportFailure reports that an error occurred with this resolver.
func (brc *BasicResolverConn) ReportFailure() {
// Don't mark resolver as failed if we are offline.
if !netenv.Online() {
return
}
// Ingore report when we are already failing.
if brc.IsFailing() {
return
}
brc.failLock.Lock()
defer brc.failLock.Unlock()
// Check if we are within the observation period.
if time.Since(brc.failingStarted) > FailObserveDuration {
brc.fails = 1
brc.failingStarted = time.Now()
return
}
// Increase and check if we need to set to failing.
brc.fails++
if brc.fails > FailThreshold {
brc.failing.Set()
}
// Report to netenv that a configured server failed.
if brc.resolver.Info.Source == ServerSourceConfigured {
netenv.ConnectedToDNS.UnSet()
}
}
// ResetFailure resets the failure status.
func (brc *BasicResolverConn) ResetFailure() {
if brc.failing.SetToIf(true, false) {
brc.failLock.Lock()
defer brc.failLock.Unlock()
brc.fails = 0
brc.failingStarted = time.Time{}
}
// Report to netenv that a configured server succeeded.
if brc.resolver.Info.Source == ServerSourceConfigured {
netenv.ConnectedToDNS.Set()
}
}
func checkFailingResolvers(wc *mgr.WorkerCtx) error {
var resolvers []*Resolver
// Set next execution time.
module.failingResolverWorkerMgr.Delay(time.Duration(nameserverRetryRate()) * time.Second)
// Make a copy of the resolver list.
func() {
resolversLock.Lock()
defer resolversLock.Unlock()
resolvers = make([]*Resolver, len(globalResolvers))
copy(resolvers, globalResolvers)
}()
// Start logging.
ctx, tracer := log.AddTracer(wc.Ctx())
tracer.Debugf("resolver: checking failed resolvers")
defer tracer.Submit()
// Go through all resolvers and check if they are reachable again.
for i, resolver := range resolvers {
// Skip resolver that are not failing.
if !resolver.Conn.IsFailing() {
continue
}
tracer.Tracef("resolver: testing failed resolver [%d/%d] %s", i+1, len(resolvers), resolver)
// Test if we can resolve via this resolver.
ips, _, err := testConnectivity(ctx, netenv.DNSTestDomain, resolver)
switch {
case err != nil:
tracer.Debugf("resolver: failed resolver %s is still failing: %s", resolver, err)
case len(ips) == 0 || !ips[0].Equal(netenv.DNSTestExpectedIP):
tracer.Debugf("resolver: failed resolver %s received unexpected A records: %s", resolver, ips)
default:
// Resolver test successful.
tracer.Infof("resolver: check successful, resolver %s is available again", resolver)
resolver.Conn.ResetFailure()
}
// Check if context was canceled.
if ctx.Err() != nil {
return ctx.Err()
}
}
return nil
}