Fix netenv resolver interaction

Also fix endless loop in tcp resolver when network is down
This commit is contained in:
Daniel 2020-06-26 22:50:35 +02:00
parent 111e324d26
commit 881a757667
4 changed files with 71 additions and 11 deletions

View file

@ -106,12 +106,6 @@ func handleRequestAsWorker(w dns.ResponseWriter, query *dns.Msg) {
}
func handleRequest(ctx context.Context, w dns.ResponseWriter, query *dns.Msg) error { //nolint:gocognit // TODO
// return with server failure if offline
if netenv.GetOnlineStatus() == netenv.StatusOffline {
returnServerFailure(w, query)
return nil
}
// only process first question, that's how everyone does it.
question := query.Question[0]
q := &resolver.Query{
@ -119,6 +113,14 @@ func handleRequest(ctx context.Context, w dns.ResponseWriter, query *dns.Msg) er
QType: dns.Type(question.Qtype),
}
// return with server failure if offline
if netenv.GetOnlineStatus() == netenv.StatusOffline &&
!netenv.IsOnlineStatusTestDomain(q.FQDN) {
log.Tracer(ctx).Debugf("resolver: not resolving %s, device is offline", q.FQDN)
returnServerFailure(w, query)
return nil
}
// check class
if question.Qclass != dns.ClassINET {
// we only serve IN records, return nxdomain

View file

@ -106,6 +106,8 @@ var (
captivePortalURL string
captivePortalLock sync.Mutex
waitForever = make(chan time.Time)
)
func init() {
@ -200,12 +202,14 @@ func triggerOnlineStatusInvestigation() {
}
func monitorOnlineStatus(ctx context.Context) error {
triggerOnlineStatusInvestigation()
for {
// wait for trigger
select {
case <-ctx.Done():
return nil
case <-onlineStatusInvestigationTrigger:
case <-getDynamicStatusTrigger():
}
// enable waiting
@ -221,6 +225,21 @@ func monitorOnlineStatus(ctx context.Context) error {
}
}
func getDynamicStatusTrigger() <-chan time.Time {
switch GetOnlineStatus() {
case StatusOffline:
return time.After(10 * time.Second)
case StatusLimited, StatusPortal:
return time.After(1 * time.Minute)
case StatusSemiOnline:
return time.After(5 * time.Minute)
case StatusOnline:
return waitForever
default: // unknown status
return time.After(5 * time.Minute)
}
}
func checkOnlineStatus(ctx context.Context) {
// TODO: implement more methods
/*status, err := getConnectivityStateFromDbus()

View file

@ -7,6 +7,8 @@ import (
"sync"
"time"
"github.com/safing/portmaster/netenv"
"github.com/miekg/dns"
"github.com/safing/portbase/database"
@ -24,6 +26,10 @@ var (
ErrLocalhost = errors.New("query for localhost")
// ErrTimeout is returned when a query times out
ErrTimeout = errors.New("query timed out")
// ErrOffline is returned when no network connection is detected
ErrOffline = errors.New("device is offine")
// ErrFailure is returned when the type of failure is unclear
ErrFailure = errors.New("query failed")
// detailed errors
@ -213,13 +219,24 @@ func deduplicateRequest(ctx context.Context, q *Query) (finishRequest func()) {
}
}
func resolveAndCache(ctx context.Context, q *Query) (rrCache *RRCache, err error) {
func resolveAndCache(ctx context.Context, q *Query) (rrCache *RRCache, err error) { //nolint:gocognit
// get resolvers
resolvers := GetResolversInScope(ctx, q)
if len(resolvers) == 0 {
return nil, ErrNoCompliance
}
// check if we are online
if netenv.GetOnlineStatus() == netenv.StatusOffline {
if netenv.IsOnlineStatusTestDomain(q.FQDN) {
log.Tracer(ctx).Debugf("resolver: permitting online status test domain %s to resolve even though offline", q.FQDN)
} else {
log.Tracer(ctx).Debugf("resolver: not resolving %s, device is offline", q.FQDN)
// we are offline and this is not an online check query
return nil, ErrOffline
}
}
// start resolving
var i int
@ -246,6 +263,11 @@ resolveLoop:
case errors.Is(err, ErrBlocked):
// some resolvers might also block
return nil, err
case netenv.GetOnlineStatus() == netenv.StatusOffline &&
!netenv.IsOnlineStatusTestDomain(q.FQDN):
log.Tracer(ctx).Debugf("resolver: not resolving %s, device is offline", q.FQDN)
// we are offline and this is not an online check query
return nil, ErrOffline
}
} else {
// no error

View file

@ -93,11 +93,9 @@ func (tr *TCPResolver) client(workerCtx context.Context) error { //nolint:gocogn
var cancelConnCtx func()
var recycleConn bool
var shuttingDown bool
var failCnt int
var incoming = make(chan *dns.Msg, 100)
// enable client restarting after crash
defer tr.clientStarted.UnSet()
connMgmt:
for {
// cleanup old connection
@ -111,7 +109,7 @@ connMgmt:
}
// check if we are shutting down or failing
if shuttingDown || tr.IsFailing() {
if shuttingDown || failCnt >= FailThreshold || tr.IsFailing() {
// reply to all waiting queries
tr.Lock()
for id, inFlight := range tr.inFlightQueries {
@ -181,7 +179,12 @@ connMgmt:
c, err := tr.dnsClient.Dial(tr.resolver.ServerAddress)
if err != nil {
tr.ReportFailure()
failCnt++
if tr.IsFailing() {
shuttingDown = true
}
log.Debugf("resolver: failed to connect to %s (%s)", tr.resolver.Name, tr.resolver.ServerAddress)
netenv.ReportFailedConnection()
continue connMgmt
}
tr.dnsConnection = c
@ -208,6 +211,10 @@ connMgmt:
if connClosing.SetToIf(false, true) {
cancelConnCtx()
tr.ReportFailure()
failCnt++
if tr.IsFailing() {
shuttingDown = true
}
log.Warningf("resolver: read error from %s (%s): %s", tr.resolver.Name, tr.dnsConnection.RemoteAddr(), err)
}
return nil
@ -244,6 +251,10 @@ connMgmt:
if connClosing.SetToIf(false, true) {
cancelConnCtx()
tr.ReportFailure()
failCnt++
if tr.IsFailing() {
shuttingDown = true
}
log.Warningf("resolver: write error to %s (%s): %s", tr.resolver.Name, tr.dnsConnection.RemoteAddr(), err)
}
continue connMgmt
@ -263,6 +274,7 @@ connMgmt:
if ok {
select {
case inFlight.Response <- msg:
failCnt = 0 // reset fail counter
// responded!
default:
// save to cache, if enabled
@ -351,6 +363,11 @@ func (tr *TCPResolver) Query(ctx context.Context, q *Query) (*RRCache, error) {
return nil, ErrTimeout
}
if reply == nil {
// Resolver is shutting down, could be server failure or we are offline
return nil, ErrFailure
}
if tr.resolver.IsBlockedUpstream(reply) {
return nil, &BlockedUpstreamError{tr.resolver.GetName()}
}