diff --git a/nameserver/nameserver.go b/nameserver/nameserver.go index 83cf8f15..ce1b7a62 100644 --- a/nameserver/nameserver.go +++ b/nameserver/nameserver.go @@ -106,12 +106,6 @@ func handleRequestAsWorker(w dns.ResponseWriter, query *dns.Msg) { } func handleRequest(ctx context.Context, w dns.ResponseWriter, query *dns.Msg) error { //nolint:gocognit // TODO - // return with server failure if offline - if netenv.GetOnlineStatus() == netenv.StatusOffline { - returnServerFailure(w, query) - return nil - } - // only process first question, that's how everyone does it. question := query.Question[0] q := &resolver.Query{ @@ -119,6 +113,14 @@ func handleRequest(ctx context.Context, w dns.ResponseWriter, query *dns.Msg) er QType: dns.Type(question.Qtype), } + // return with server failure if offline + if netenv.GetOnlineStatus() == netenv.StatusOffline && + !netenv.IsOnlineStatusTestDomain(q.FQDN) { + log.Tracer(ctx).Debugf("resolver: not resolving %s, device is offline", q.FQDN) + returnServerFailure(w, query) + return nil + } + // check class if question.Qclass != dns.ClassINET { // we only serve IN records, return nxdomain diff --git a/netenv/online-status.go b/netenv/online-status.go index 16a6c50d..62b75f6c 100644 --- a/netenv/online-status.go +++ b/netenv/online-status.go @@ -106,6 +106,8 @@ var ( captivePortalURL string captivePortalLock sync.Mutex + + waitForever = make(chan time.Time) ) func init() { @@ -200,12 +202,14 @@ func triggerOnlineStatusInvestigation() { } func monitorOnlineStatus(ctx context.Context) error { + triggerOnlineStatusInvestigation() for { // wait for trigger select { case <-ctx.Done(): return nil case <-onlineStatusInvestigationTrigger: + case <-getDynamicStatusTrigger(): } // enable waiting @@ -221,6 +225,21 @@ func monitorOnlineStatus(ctx context.Context) error { } } +func getDynamicStatusTrigger() <-chan time.Time { + switch GetOnlineStatus() { + case StatusOffline: + return time.After(10 * time.Second) + case StatusLimited, StatusPortal: + return time.After(1 * time.Minute) + case StatusSemiOnline: + return time.After(5 * time.Minute) + case StatusOnline: + return waitForever + default: // unknown status + return time.After(5 * time.Minute) + } +} + func checkOnlineStatus(ctx context.Context) { // TODO: implement more methods /*status, err := getConnectivityStateFromDbus() diff --git a/resolver/resolve.go b/resolver/resolve.go index 4dad1b3a..5b0ac937 100644 --- a/resolver/resolve.go +++ b/resolver/resolve.go @@ -7,6 +7,8 @@ import ( "sync" "time" + "github.com/safing/portmaster/netenv" + "github.com/miekg/dns" "github.com/safing/portbase/database" @@ -24,6 +26,10 @@ var ( ErrLocalhost = errors.New("query for localhost") // ErrTimeout is returned when a query times out ErrTimeout = errors.New("query timed out") + // ErrOffline is returned when no network connection is detected + ErrOffline = errors.New("device is offine") + // ErrFailure is returned when the type of failure is unclear + ErrFailure = errors.New("query failed") // detailed errors @@ -213,13 +219,24 @@ func deduplicateRequest(ctx context.Context, q *Query) (finishRequest func()) { } } -func resolveAndCache(ctx context.Context, q *Query) (rrCache *RRCache, err error) { +func resolveAndCache(ctx context.Context, q *Query) (rrCache *RRCache, err error) { //nolint:gocognit // get resolvers resolvers := GetResolversInScope(ctx, q) if len(resolvers) == 0 { return nil, ErrNoCompliance } + // check if we are online + if netenv.GetOnlineStatus() == netenv.StatusOffline { + if netenv.IsOnlineStatusTestDomain(q.FQDN) { + log.Tracer(ctx).Debugf("resolver: permitting online status test domain %s to resolve even though offline", q.FQDN) + } else { + log.Tracer(ctx).Debugf("resolver: not resolving %s, device is offline", q.FQDN) + // we are offline and this is not an online check query + return nil, ErrOffline + } + } + // start resolving var i int @@ -246,6 +263,11 @@ resolveLoop: case errors.Is(err, ErrBlocked): // some resolvers might also block return nil, err + case netenv.GetOnlineStatus() == netenv.StatusOffline && + !netenv.IsOnlineStatusTestDomain(q.FQDN): + log.Tracer(ctx).Debugf("resolver: not resolving %s, device is offline", q.FQDN) + // we are offline and this is not an online check query + return nil, ErrOffline } } else { // no error diff --git a/resolver/resolver-tcp.go b/resolver/resolver-tcp.go index f4300f39..44dbd8a6 100644 --- a/resolver/resolver-tcp.go +++ b/resolver/resolver-tcp.go @@ -93,11 +93,9 @@ func (tr *TCPResolver) client(workerCtx context.Context) error { //nolint:gocogn var cancelConnCtx func() var recycleConn bool var shuttingDown bool + var failCnt int var incoming = make(chan *dns.Msg, 100) - // enable client restarting after crash - defer tr.clientStarted.UnSet() - connMgmt: for { // cleanup old connection @@ -111,7 +109,7 @@ connMgmt: } // check if we are shutting down or failing - if shuttingDown || tr.IsFailing() { + if shuttingDown || failCnt >= FailThreshold || tr.IsFailing() { // reply to all waiting queries tr.Lock() for id, inFlight := range tr.inFlightQueries { @@ -181,7 +179,12 @@ connMgmt: c, err := tr.dnsClient.Dial(tr.resolver.ServerAddress) if err != nil { tr.ReportFailure() + failCnt++ + if tr.IsFailing() { + shuttingDown = true + } log.Debugf("resolver: failed to connect to %s (%s)", tr.resolver.Name, tr.resolver.ServerAddress) + netenv.ReportFailedConnection() continue connMgmt } tr.dnsConnection = c @@ -208,6 +211,10 @@ connMgmt: if connClosing.SetToIf(false, true) { cancelConnCtx() tr.ReportFailure() + failCnt++ + if tr.IsFailing() { + shuttingDown = true + } log.Warningf("resolver: read error from %s (%s): %s", tr.resolver.Name, tr.dnsConnection.RemoteAddr(), err) } return nil @@ -244,6 +251,10 @@ connMgmt: if connClosing.SetToIf(false, true) { cancelConnCtx() tr.ReportFailure() + failCnt++ + if tr.IsFailing() { + shuttingDown = true + } log.Warningf("resolver: write error to %s (%s): %s", tr.resolver.Name, tr.dnsConnection.RemoteAddr(), err) } continue connMgmt @@ -263,6 +274,7 @@ connMgmt: if ok { select { case inFlight.Response <- msg: + failCnt = 0 // reset fail counter // responded! default: // save to cache, if enabled @@ -351,6 +363,11 @@ func (tr *TCPResolver) Query(ctx context.Context, q *Query) (*RRCache, error) { return nil, ErrTimeout } + if reply == nil { + // Resolver is shutting down, could be server failure or we are offline + return nil, ErrFailure + } + if tr.resolver.IsBlockedUpstream(reply) { return nil, &BlockedUpstreamError{tr.resolver.GetName()} }