github.com/safing/portbase@v0.19.5/modules/worker.go (about) 1 package modules 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "sync/atomic" 8 "time" 9 10 "github.com/safing/portbase/log" 11 ) 12 13 // Default Worker Configuration. 14 const ( 15 DefaultBackoffDuration = 2 * time.Second 16 ) 17 18 var ( 19 // ErrRestartNow may be returned (wrapped) by service workers to request an immediate restart. 20 ErrRestartNow = errors.New("requested restart") 21 errNoModule = errors.New("missing module (is nil!)") 22 ) 23 24 // StartWorker directly starts a generic worker that does not fit to be a Task or MicroTask, such as long running (and possibly mostly idle) sessions. A call to StartWorker starts a new goroutine and returns immediately. 25 func (m *Module) StartWorker(name string, fn func(context.Context) error) { 26 go func() { 27 err := m.RunWorker(name, fn) 28 switch { 29 case err == nil: 30 return 31 case errors.Is(err, context.Canceled): 32 log.Debugf("%s: worker %s was canceled: %s", m.Name, name, err) 33 default: 34 log.Errorf("%s: worker %s failed: %s", m.Name, name, err) 35 } 36 }() 37 } 38 39 // RunWorker directly runs a generic worker that does not fit to be a Task or MicroTask, such as long running (and possibly mostly idle) sessions. A call to RunWorker blocks until the worker is finished. 40 func (m *Module) RunWorker(name string, fn func(context.Context) error) error { 41 if m == nil { 42 log.Errorf(`modules: cannot start worker "%s" with nil module`, name) 43 return errNoModule 44 } 45 46 atomic.AddInt32(m.workerCnt, 1) 47 defer func() { 48 atomic.AddInt32(m.workerCnt, -1) 49 m.checkIfStopComplete() 50 }() 51 52 return m.runWorker(name, fn) 53 } 54 55 // StartServiceWorker starts a generic worker, which is automatically restarted in case of an error. A call to StartServiceWorker runs the service-worker in a new goroutine and returns immediately. `backoffDuration` specifies how to long to wait before restarts, multiplied by the number of failed attempts. Pass `0` for the default backoff duration. For custom error remediation functionality, build your own error handling procedure using calls to RunWorker. 56 // Returning nil error or context.Canceled will stop the service worker. 57 func (m *Module) StartServiceWorker(name string, backoffDuration time.Duration, fn func(context.Context) error) { 58 if m == nil { 59 log.Errorf(`modules: cannot start service worker "%s" with nil module`, name) 60 return 61 } 62 63 go m.runServiceWorker(name, backoffDuration, fn) 64 } 65 66 func (m *Module) runServiceWorker(name string, backoffDuration time.Duration, fn func(context.Context) error) { 67 atomic.AddInt32(m.workerCnt, 1) 68 defer func() { 69 atomic.AddInt32(m.workerCnt, -1) 70 m.checkIfStopComplete() 71 }() 72 73 if backoffDuration == 0 { 74 backoffDuration = DefaultBackoffDuration 75 } 76 failCnt := 0 77 lastFail := time.Now() 78 79 for { 80 if m.IsStopping() { 81 return 82 } 83 84 err := m.runWorker(name, fn) 85 switch { 86 case err == nil: 87 // No error means that the worker is finished. 88 return 89 90 case errors.Is(err, context.Canceled): 91 // A canceled context also means that the worker is finished. 92 return 93 94 case errors.Is(err, ErrRestartNow): 95 // Worker requested a restart - silently continue with loop. 96 97 default: 98 // Any other errors triggers a restart with backoff. 99 100 // Reset fail counter if running without error for some time. 101 if time.Now().Add(-5 * time.Minute).After(lastFail) { 102 failCnt = 0 103 } 104 // Increase fail counter and set last failed time. 105 failCnt++ 106 lastFail = time.Now() 107 // Log error and back off for some time. 108 sleepFor := time.Duration(failCnt) * backoffDuration 109 log.Errorf("%s: service-worker %s failed (%d): %s - restarting in %s", m.Name, name, failCnt, err, sleepFor) 110 select { 111 case <-time.After(sleepFor): 112 case <-m.Ctx.Done(): 113 return 114 } 115 } 116 } 117 } 118 119 func (m *Module) runWorker(name string, fn func(context.Context) error) (err error) { 120 defer func() { 121 // recover from panic 122 panicVal := recover() 123 if panicVal != nil { 124 me := m.NewPanicError(name, "worker", panicVal) 125 me.Report() 126 err = me 127 } 128 }() 129 130 // run 131 // TODO: get cancel func for worker context and cancel when worker is done. 132 // This ensure that when the worker passes its context to another (async) function, it will also be shutdown when the worker finished or dies. 133 err = fn(m.Ctx) 134 return 135 } 136 137 func (m *Module) runCtrlFnWithTimeout(name string, timeout time.Duration, fn func() error) error { 138 stopFnError := m.startCtrlFn(name, fn) 139 140 // wait for results 141 select { 142 case err := <-stopFnError: 143 return err 144 case <-time.After(timeout): 145 return fmt.Errorf("timed out (%s)", timeout) 146 } 147 } 148 149 func (m *Module) startCtrlFn(name string, fn func() error) chan error { 150 ctrlFnError := make(chan error, 1) 151 152 // If no function is given, still act as if it was run. 153 if fn == nil { 154 // Signal finish. 155 m.ctrlFuncRunning.UnSet() 156 m.checkIfStopComplete() 157 158 // Report nil error and return. 159 ctrlFnError <- nil 160 return ctrlFnError 161 } 162 163 // Signal that a control function is running. 164 m.ctrlFuncRunning.Set() 165 166 // Start control function in goroutine. 167 go func() { 168 // Recover from panic and reset control function signal. 169 defer func() { 170 // recover from panic 171 panicVal := recover() 172 if panicVal != nil { 173 me := m.NewPanicError(name, "module-control", panicVal) 174 me.Report() 175 ctrlFnError <- fmt.Errorf("panic: %s", panicVal) 176 } 177 178 // Signal finish. 179 m.ctrlFuncRunning.UnSet() 180 m.checkIfStopComplete() 181 }() 182 183 // Run control function and report error. 184 err := fn() 185 ctrlFnError <- err 186 }() 187 188 return ctrlFnError 189 }