Improve and fix module startup and shutdown procedures as well as error reporting

This commit is contained in:
Daniel 2019-10-09 16:35:33 +02:00
parent b0204f95ff
commit 2282c6bb71
5 changed files with 106 additions and 35 deletions

View file

@ -2,11 +2,16 @@ package modules
import ( import (
"fmt" "fmt"
"os"
"runtime/debug" "runtime/debug"
"sync"
"time"
) )
var ( var (
errorReportingChannel chan *ModuleError errorReportingChannel chan *ModuleError
reportToStdErr bool
reportingLock sync.RWMutex
) )
// ModuleError wraps a panic, error or message into an error that can be reported. // ModuleError wraps a panic, error or message into an error that can be reported.
@ -64,12 +69,43 @@ func (me *ModuleError) Error() string {
// Report reports the error through the configured reporting channel. // Report reports the error through the configured reporting channel.
func (me *ModuleError) Report() { func (me *ModuleError) Report() {
reportingLock.RLock()
defer reportingLock.RUnlock()
if errorReportingChannel != nil { if errorReportingChannel != nil {
select { select {
case errorReportingChannel <- me: case errorReportingChannel <- me:
default: default:
} }
} }
if reportToStdErr {
// default to writing to stderr
fmt.Fprintf(
os.Stderr,
`===== Error Report =====
Message: %s
Timestamp: %s
ModuleName: %s
TaskName: %s
TaskType: %s
Severity: %s
PanicValue: %s
StackTrace:
%s
===== End of Report =====
`,
me.Message,
time.Now(),
me.ModuleName,
me.TaskName,
me.TaskType,
me.Severity,
me.PanicValue,
me.StackTrace,
)
}
} }
// IsPanic returns whether the given error is a wrapped panic by the modules package and additionally returns it, if true. // IsPanic returns whether the given error is a wrapped panic by the modules package and additionally returns it, if true.
@ -84,7 +120,16 @@ func IsPanic(err error) (bool, *ModuleError) {
// SetErrorReportingChannel sets the channel to report module errors through. By default only panics are reported, all other errors need to be manually wrapped into a *ModuleError and reported. // SetErrorReportingChannel sets the channel to report module errors through. By default only panics are reported, all other errors need to be manually wrapped into a *ModuleError and reported.
func SetErrorReportingChannel(reportingChannel chan *ModuleError) { func SetErrorReportingChannel(reportingChannel chan *ModuleError) {
if errorReportingChannel == nil { reportingLock.Lock()
errorReportingChannel = reportingChannel defer reportingLock.Unlock()
}
errorReportingChannel = reportingChannel
}
// SetStdErrReporting controls error reporting to stderr.
func SetStdErrReporting(on bool) {
reportingLock.Lock()
defer reportingLock.Unlock()
reportToStdErr = on
} }

View file

@ -13,7 +13,7 @@ import (
) )
var ( var (
modulesLock sync.Mutex modulesLock sync.RWMutex
modules = make(map[string]*Module) modules = make(map[string]*Module)
// ErrCleanExit is returned by Start() when the program is interrupted before starting. This can happen for example, when using the "--help" flag. // ErrCleanExit is returned by Start() when the program is interrupted before starting. This can happen for example, when using the "--help" flag.
@ -46,6 +46,10 @@ type Module struct {
microTaskCnt *int32 microTaskCnt *int32
waitGroup sync.WaitGroup waitGroup sync.WaitGroup
// events
eventHooks map[string][]*eventHook
eventHooksLock sync.RWMutex
// dependency mgmt // dependency mgmt
depNames []string depNames []string
depModules []*Module depModules []*Module
@ -69,9 +73,9 @@ func (m *Module) shutdown() error {
// start shutdown function // start shutdown function
m.waitGroup.Add(1) m.waitGroup.Add(1)
stopFnError := make(chan error) stopFnError := make(chan error, 1)
go func() { go func() {
stopFnError <- m.runModuleCtrlFn("stop module", m.stop) stopFnError <- m.runCtrlFn("stop module", m.stop)
m.waitGroup.Done() m.waitGroup.Done()
}() }()
@ -84,16 +88,8 @@ func (m *Module) shutdown() error {
// wait for results // wait for results
select { select {
case err := <-stopFnError:
return err
case <-done: case <-done:
select { case <-time.After(30 * time.Second):
case err := <-stopFnError:
return err
default:
return nil
}
case <-time.After(3 * time.Second):
log.Warningf( log.Warningf(
"%s: timed out while waiting for workers/tasks to finish: workers=%d tasks=%d microtasks=%d, continuing shutdown...", "%s: timed out while waiting for workers/tasks to finish: workers=%d tasks=%d microtasks=%d, continuing shutdown...",
m.Name, m.Name,
@ -102,11 +98,18 @@ func (m *Module) shutdown() error {
atomic.LoadInt32(m.microTaskCnt), atomic.LoadInt32(m.microTaskCnt),
) )
} }
return nil
}
func dummyAction() error { // collect error
return nil select {
case err := <-stopFnError:
return err
default:
log.Warningf(
"%s: timed out while waiting for stop function to finish, continuing shutdown...",
m.Name,
)
return nil
}
} }
// Register registers a new module. The control functions `prep`, `start` and `stop` are technically optional. `stop` is called _after_ all added module workers finished. // Register registers a new module. The control functions `prep`, `start` and `stop` are technically optional. `stop` is called _after_ all added module workers finished.
@ -141,20 +144,10 @@ func initNewModule(name string, prep, start, stop func() error, dependencies ...
prep: prep, prep: prep,
start: start, start: start,
stop: stop, stop: stop,
eventHooks: make(map[string][]*eventHook),
depNames: dependencies, depNames: dependencies,
} }
// replace nil arguments with dummy action
if newModule.prep == nil {
newModule.prep = dummyAction
}
if newModule.start == nil {
newModule.start = dummyAction
}
if newModule.stop == nil {
newModule.stop = dummyAction
}
return newModule return newModule
} }

View file

@ -4,6 +4,7 @@ import (
"fmt" "fmt"
"os" "os"
"runtime" "runtime"
"time"
"github.com/safing/portbase/log" "github.com/safing/portbase/log"
"github.com/tevino/abool" "github.com/tevino/abool"
@ -26,8 +27,8 @@ func WaitForStartCompletion() <-chan struct{} {
// Start starts all modules in the correct order. In case of an error, it will automatically shutdown again. // Start starts all modules in the correct order. In case of an error, it will automatically shutdown again.
func Start() error { func Start() error {
modulesLock.Lock() modulesLock.RLock()
defer modulesLock.Unlock() defer modulesLock.RUnlock()
// start microtask scheduler // start microtask scheduler
go microTaskScheduler() go microTaskScheduler()
@ -106,7 +107,11 @@ func prepareModules() error {
go func() { go func() {
reports <- &report{ reports <- &report{
module: execM, module: execM,
err: execM.runModuleCtrlFn("prep module", execM.prep), err: execM.runCtrlFnWithTimeout(
"prep module",
10*time.Second,
execM.prep,
),
} }
}() }()
} }
@ -154,7 +159,11 @@ func startModules() error {
go func() { go func() {
reports <- &report{ reports <- &report{
module: execM, module: execM,
err: execM.runModuleCtrlFn("start module", execM.start), err: execM.runCtrlFnWithTimeout(
"start module",
60*time.Second,
execM.start,
),
} }
}() }()
} }

View file

@ -12,6 +12,8 @@ import (
var ( var (
shutdownSignal = make(chan struct{}) shutdownSignal = make(chan struct{})
shutdownSignalClosed = abool.NewBool(false) shutdownSignalClosed = abool.NewBool(false)
shutdownCompleteSignal = make(chan struct{})
) )
// ShuttingDown returns a channel read on the global shutdown signal. // ShuttingDown returns a channel read on the global shutdown signal.
@ -45,6 +47,7 @@ func Shutdown() error {
} }
log.Shutdown() log.Shutdown()
close(shutdownCompleteSignal)
return err return err
} }

View file

@ -3,6 +3,7 @@ package modules
import ( import (
"context" "context"
"errors" "errors"
"fmt"
"sync/atomic" "sync/atomic"
"time" "time"
@ -101,7 +102,27 @@ func (m *Module) runWorker(name string, fn func(context.Context) error) (err err
return return
} }
func (m *Module) runModuleCtrlFn(name string, fn func() error) (err error) { func (m *Module) runCtrlFnWithTimeout(name string, timeout time.Duration, fn func() error) error {
stopFnError := make(chan error)
go func() {
stopFnError <- m.runCtrlFn(name, fn)
}()
// wait for results
select {
case err := <-stopFnError:
return err
case <-time.After(timeout):
return fmt.Errorf("timed out (%s)", timeout)
}
}
func (m *Module) runCtrlFn(name string, fn func() error) (err error) {
if fn == nil {
return
}
defer func() { defer func() {
// recover from panic // recover from panic
panicVal := recover() panicVal := recover()