github.com/safing/portbase@v0.19.5/modules/worker.go (about)

     1  package modules
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"sync/atomic"
     8  	"time"
     9  
    10  	"github.com/safing/portbase/log"
    11  )
    12  
    13  // Default Worker Configuration.
    14  const (
    15  	DefaultBackoffDuration = 2 * time.Second
    16  )
    17  
    18  var (
    19  	// ErrRestartNow may be returned (wrapped) by service workers to request an immediate restart.
    20  	ErrRestartNow = errors.New("requested restart")
    21  	errNoModule   = errors.New("missing module (is nil!)")
    22  )
    23  
    24  // StartWorker directly starts a generic worker that does not fit to be a Task or MicroTask, such as long running (and possibly mostly idle) sessions. A call to StartWorker starts a new goroutine and returns immediately.
    25  func (m *Module) StartWorker(name string, fn func(context.Context) error) {
    26  	go func() {
    27  		err := m.RunWorker(name, fn)
    28  		switch {
    29  		case err == nil:
    30  			return
    31  		case errors.Is(err, context.Canceled):
    32  			log.Debugf("%s: worker %s was canceled: %s", m.Name, name, err)
    33  		default:
    34  			log.Errorf("%s: worker %s failed: %s", m.Name, name, err)
    35  		}
    36  	}()
    37  }
    38  
    39  // RunWorker directly runs a generic worker that does not fit to be a Task or MicroTask, such as long running (and possibly mostly idle) sessions. A call to RunWorker blocks until the worker is finished.
    40  func (m *Module) RunWorker(name string, fn func(context.Context) error) error {
    41  	if m == nil {
    42  		log.Errorf(`modules: cannot start worker "%s" with nil module`, name)
    43  		return errNoModule
    44  	}
    45  
    46  	atomic.AddInt32(m.workerCnt, 1)
    47  	defer func() {
    48  		atomic.AddInt32(m.workerCnt, -1)
    49  		m.checkIfStopComplete()
    50  	}()
    51  
    52  	return m.runWorker(name, fn)
    53  }
    54  
    55  // StartServiceWorker starts a generic worker, which is automatically restarted in case of an error. A call to StartServiceWorker runs the service-worker in a new goroutine and returns immediately. `backoffDuration` specifies how to long to wait before restarts, multiplied by the number of failed attempts. Pass `0` for the default backoff duration. For custom error remediation functionality, build your own error handling procedure using calls to RunWorker.
    56  // Returning nil error or context.Canceled will stop the service worker.
    57  func (m *Module) StartServiceWorker(name string, backoffDuration time.Duration, fn func(context.Context) error) {
    58  	if m == nil {
    59  		log.Errorf(`modules: cannot start service worker "%s" with nil module`, name)
    60  		return
    61  	}
    62  
    63  	go m.runServiceWorker(name, backoffDuration, fn)
    64  }
    65  
    66  func (m *Module) runServiceWorker(name string, backoffDuration time.Duration, fn func(context.Context) error) {
    67  	atomic.AddInt32(m.workerCnt, 1)
    68  	defer func() {
    69  		atomic.AddInt32(m.workerCnt, -1)
    70  		m.checkIfStopComplete()
    71  	}()
    72  
    73  	if backoffDuration == 0 {
    74  		backoffDuration = DefaultBackoffDuration
    75  	}
    76  	failCnt := 0
    77  	lastFail := time.Now()
    78  
    79  	for {
    80  		if m.IsStopping() {
    81  			return
    82  		}
    83  
    84  		err := m.runWorker(name, fn)
    85  		switch {
    86  		case err == nil:
    87  			// No error means that the worker is finished.
    88  			return
    89  
    90  		case errors.Is(err, context.Canceled):
    91  			// A canceled context also means that the worker is finished.
    92  			return
    93  
    94  		case errors.Is(err, ErrRestartNow):
    95  			// Worker requested a restart - silently continue with loop.
    96  
    97  		default:
    98  			// Any other errors triggers a restart with backoff.
    99  
   100  			// Reset fail counter if running without error for some time.
   101  			if time.Now().Add(-5 * time.Minute).After(lastFail) {
   102  				failCnt = 0
   103  			}
   104  			// Increase fail counter and set last failed time.
   105  			failCnt++
   106  			lastFail = time.Now()
   107  			// Log error and back off for some time.
   108  			sleepFor := time.Duration(failCnt) * backoffDuration
   109  			log.Errorf("%s: service-worker %s failed (%d): %s - restarting in %s", m.Name, name, failCnt, err, sleepFor)
   110  			select {
   111  			case <-time.After(sleepFor):
   112  			case <-m.Ctx.Done():
   113  				return
   114  			}
   115  		}
   116  	}
   117  }
   118  
   119  func (m *Module) runWorker(name string, fn func(context.Context) error) (err error) {
   120  	defer func() {
   121  		// recover from panic
   122  		panicVal := recover()
   123  		if panicVal != nil {
   124  			me := m.NewPanicError(name, "worker", panicVal)
   125  			me.Report()
   126  			err = me
   127  		}
   128  	}()
   129  
   130  	// run
   131  	// TODO: get cancel func for worker context and cancel when worker is done.
   132  	// This ensure that when the worker passes its context to another (async) function, it will also be shutdown when the worker finished or dies.
   133  	err = fn(m.Ctx)
   134  	return
   135  }
   136  
   137  func (m *Module) runCtrlFnWithTimeout(name string, timeout time.Duration, fn func() error) error {
   138  	stopFnError := m.startCtrlFn(name, fn)
   139  
   140  	// wait for results
   141  	select {
   142  	case err := <-stopFnError:
   143  		return err
   144  	case <-time.After(timeout):
   145  		return fmt.Errorf("timed out (%s)", timeout)
   146  	}
   147  }
   148  
   149  func (m *Module) startCtrlFn(name string, fn func() error) chan error {
   150  	ctrlFnError := make(chan error, 1)
   151  
   152  	// If no function is given, still act as if it was run.
   153  	if fn == nil {
   154  		// Signal finish.
   155  		m.ctrlFuncRunning.UnSet()
   156  		m.checkIfStopComplete()
   157  
   158  		// Report nil error and return.
   159  		ctrlFnError <- nil
   160  		return ctrlFnError
   161  	}
   162  
   163  	// Signal that a control function is running.
   164  	m.ctrlFuncRunning.Set()
   165  
   166  	// Start control function in goroutine.
   167  	go func() {
   168  		// Recover from panic and reset control function signal.
   169  		defer func() {
   170  			// recover from panic
   171  			panicVal := recover()
   172  			if panicVal != nil {
   173  				me := m.NewPanicError(name, "module-control", panicVal)
   174  				me.Report()
   175  				ctrlFnError <- fmt.Errorf("panic: %s", panicVal)
   176  			}
   177  
   178  			// Signal finish.
   179  			m.ctrlFuncRunning.UnSet()
   180  			m.checkIfStopComplete()
   181  		}()
   182  
   183  		// Run control function and report error.
   184  		err := fn()
   185  		ctrlFnError <- err
   186  	}()
   187  
   188  	return ctrlFnError
   189  }