go.charczuk.com@v0.0.0-20240327042549-bc490516bd1a/sdk/supervisor/service.go (about) 1 /* 2 3 Copyright (c) 2024 - Present. Will Charczuk. All rights reserved. 4 Use of this source code is governed by a MIT license that can be found in the LICENSE file at the root of the repository. 5 6 */ 7 8 package supervisor 9 10 import ( 11 "context" 12 "fmt" 13 "io" 14 "sync" 15 "syscall" 16 "time" 17 18 "github.com/rjeczalik/notify" 19 ) 20 21 // Service is a specific program to start. 22 type Service struct { 23 Background func(context.Context) context.Context 24 Command string 25 Args []string 26 ShutdownSignal syscall.Signal 27 Env []string 28 WorkDir string 29 WatchedPaths []string 30 WatchedPathChangeDebounce time.Duration 31 Stdin io.Reader 32 Stdout io.Writer 33 Stderr io.Writer 34 RestartPolicy RestartPolicy 35 OnStart []func(context.Context) 36 OnRestart []func(context.Context) 37 OnExit []func(context.Context) 38 SubprocessProvider SubprocessProvider // used for testing 39 FileEventProvider FileEventProvider // used for testing 40 41 // internal fields 42 // no peeking! 43 44 subprocessMu sync.Mutex 45 subprocess Subprocess 46 fsevents chan notify.EventInfo 47 fileEventDebouncedAtMu sync.Mutex 48 fileEventDebouncedAt time.Time 49 history ServiceHistory 50 crashed func(error) 51 finalizer func() 52 stopping bool 53 restarting bool 54 done chan struct{} 55 } 56 57 // Start starts the service. 58 func (s *Service) Start(ctx context.Context) error { 59 if s.safeSubprocessIsSet() { 60 return nil 61 } 62 if err := s.safeInitializeExecHandle(ctx); err != nil { 63 return err 64 } 65 66 s.history.StartedAt = time.Now() 67 s.done = make(chan struct{}) 68 69 if len(s.WatchedPaths) > 0 { 70 s.fsevents = make(chan notify.EventInfo, 1) 71 notifyProvider := s.fileEventProviderOrDefault() 72 for _, watchedPath := range s.WatchedPaths { 73 if err := notifyProvider.Notify(watchedPath, s.fsevents); err != nil { 74 return err 75 } 76 } 77 } 78 if err := s.subprocess.Start(); err != nil { 79 return err 80 } 81 for _, handler := range s.OnStart { 82 handler(ctx) 83 } 84 85 if len(s.WatchedPaths) > 0 { 86 go func() { 87 var e notify.EventInfo 88 var restartErr error 89 for { 90 select { 91 case <-s.done: 92 return 93 case e = <-s.fsevents: 94 s.errPrintf("restarting on filesystem changes") 95 restartErr = s.safeDebouncedSignalOnWatchedEvent(e) 96 if restartErr != nil { 97 s.errPrintf("restarting on filesystem changes; error on terminate signal; %v", restartErr) 98 } 99 } 100 } 101 }() 102 } 103 104 // fork the goroutine which will handle the process itself, including restarts and termination. 105 go func() { 106 // finalErr is the error that will be passed to the crashed handler. 107 var finalErr error 108 109 defer func() { 110 // do not call the crashed handler if we're 111 // specifically being told to stop! 112 if !s.stopping && finalErr != nil && s.crashed != nil { 113 s.crashed(finalErr) 114 } 115 if s.finalizer != nil { 116 s.finalizer() 117 } 118 for _, handler := range s.OnExit { 119 handler(ctx) 120 } 121 close(s.done) 122 }() 123 var waitErr, startErr error 124 var delay time.Duration 125 for { 126 s.errPrintf("started with pid: %v", s.subprocess.Pid()) 127 128 // wait for the sub-process to exit 129 // 130 // waitErr will be set if the process was terminated with a signal! 131 waitErr = s.subprocess.Wait() 132 133 // add the event to history but factor that the restart 134 // may have been by a file change that we _do not_ want to 135 // record as a failure! 136 s.addHistoryEvent(waitErr) 137 138 // we should only consider restarting if we are _not_ stopping 139 if s.maybeShouldRestart(ctx) { 140 141 // we may need to delay the restart 142 if delay = s.maybeShouldDelayRestart(ctx); delay > 0 { 143 s.errPrintf("delaying %v to restart", delay.Round(time.Millisecond)) 144 select { 145 case <-time.After(delay): 146 case <-ctx.Done(): 147 return 148 } 149 } 150 151 if s.stopping { 152 s.errPrintf("exiting on shutdown") 153 finalErr = waitErr 154 return 155 } else if s.restarting { 156 s.errPrintf("starting after restart") 157 } else if waitErr != nil { 158 s.errPrintf("starting after process exit error: %v", waitErr) 159 } else { 160 s.errPrintf("starting after process exit") 161 } 162 163 // re-initialize the sub-process 164 s.safeInitializeExecHandle(ctx) 165 166 // call the on restart handlers (before we actually start!) 167 // but _after_ we've re-initialized the sub-process 168 for _, handler := range s.OnRestart { 169 handler(ctx) 170 } 171 172 if startErr = s.subprocess.Start(); startErr != nil { 173 s.errPrintf("failed to restart") 174 finalErr = startErr 175 return 176 } 177 178 } else { 179 if s.stopping { 180 s.errPrintf("exiting on shutdown") 181 } else { 182 s.errPrintf("exiting based on exhausting restart policy") 183 } 184 finalErr = waitErr 185 return 186 } 187 } 188 }() 189 return nil 190 } 191 192 // Stop stops the service. 193 func (s *Service) Stop() error { 194 s.subprocessMu.Lock() 195 defer s.subprocessMu.Unlock() 196 if s.subprocess != nil { 197 s.stopping = true 198 notify.Stop(s.fsevents) 199 return s.signalTerminate() 200 } 201 return nil 202 } 203 204 // Restart tells the service to quit with the shutdown signal restarting the serivce. 205 func (s *Service) Restart() (err error) { 206 s.subprocessMu.Lock() 207 defer s.subprocessMu.Unlock() 208 s.restarting = true 209 err = s.signalTerminate() 210 return 211 } 212 213 // 214 // internal methods 215 // 216 217 func (s *Service) watchedPathChangeDebounceOrDefault() time.Duration { 218 if s.WatchedPathChangeDebounce > 0 { 219 return s.WatchedPathChangeDebounce 220 } 221 return 500 * time.Millisecond 222 } 223 224 func (s *Service) safeDebouncedSignalOnWatchedEvent(e notify.EventInfo) error { 225 s.fileEventDebouncedAtMu.Lock() 226 defer s.fileEventDebouncedAtMu.Unlock() 227 228 if s.fileEventDebouncedAt.IsZero() || time.Since(s.fileEventDebouncedAt) > s.watchedPathChangeDebounceOrDefault() { 229 s.fileEventDebouncedAt = time.Now() 230 return s.Restart() 231 } 232 return nil 233 } 234 235 func (s *Service) signalTerminate() (err error) { 236 if s.subprocess == nil { 237 return 238 } 239 var signal syscall.Signal 240 if s.ShutdownSignal > 0 { 241 signal = s.ShutdownSignal 242 } else { 243 signal = syscall.SIGINT 244 } 245 s.errPrintf("being sent terminate signal: %v", signal) 246 err = s.subprocess.Signal(signal) 247 return 248 } 249 250 func (s *Service) maybeShouldRestart(ctx context.Context) bool { 251 // we _never_ restart if we're stopping. 252 if s.stopping { 253 return false 254 } 255 256 // we _always_ restart if it's because of an explicit 257 // restart or a filesystem change. 258 if s.restarting { 259 return true 260 } 261 262 // return the result of the restart policy. 263 return s.RestartPolicy != nil && s.RestartPolicy.ShouldRestart(ctx, &s.history) 264 } 265 266 func (s *Service) maybeShouldDelayRestart(ctx context.Context) time.Duration { 267 if s.RestartPolicy != nil { 268 return s.RestartPolicy.Delay(ctx, &s.history) 269 } 270 return 0 271 } 272 273 func (s *Service) safeSubprocessIsSet() (set bool) { 274 s.subprocessMu.Lock() 275 set = s.subprocess != nil 276 s.subprocessMu.Unlock() 277 return 278 } 279 280 func (s *Service) safeInitializeExecHandle(ctx context.Context) error { 281 s.subprocessMu.Lock() 282 defer s.subprocessMu.Unlock() 283 s.stopping = false 284 s.restarting = false 285 if s.Background != nil { 286 ctx = s.Background(ctx) 287 } 288 sub, err := s.subprocessProviderOrDefault().Exec(ctx, s) 289 if err != nil { 290 return err 291 } 292 s.subprocess = sub 293 return nil 294 } 295 296 func (s *Service) fileEventProviderOrDefault() FileEventProvider { 297 if s.FileEventProvider != nil { 298 return s.FileEventProvider 299 } 300 return new(NotifyProvider) 301 } 302 303 func (s *Service) subprocessProviderOrDefault() SubprocessProvider { 304 if s.SubprocessProvider != nil { 305 return s.SubprocessProvider 306 } 307 return new(ExecSubprocessProvider) 308 } 309 310 func (s *Service) addHistoryEvent(err error) { 311 now := time.Now() 312 313 // elide the error on restart as we do _not_ consider 314 // signal errors from restarts as real failures 315 // for restart policies. 316 if s.restarting { 317 s.history.Exits = append(s.history.Exits, Exit{ 318 Timestamp: now, 319 }) 320 return 321 } 322 323 s.history.Exits = append(s.history.Exits, Exit{ 324 Timestamp: now, 325 Error: err, 326 }) 327 } 328 329 func (s *Service) errPrintf(format string, args ...any) { 330 if s.Stderr != nil { 331 fmt.Fprintf(s.Stderr, "[supervisor] process %s\n", fmt.Sprintf(format, args...)) 332 } 333 }