go.charczuk.com@v0.0.0-20240327042549-bc490516bd1a/sdk/supervisor/service.go (about)

     1  /*
     2  
     3  Copyright (c) 2024 - Present. Will Charczuk. All rights reserved.
     4  Use of this source code is governed by a MIT license that can be found in the LICENSE file at the root of the repository.
     5  
     6  */
     7  
     8  package supervisor
     9  
    10  import (
    11  	"context"
    12  	"fmt"
    13  	"io"
    14  	"sync"
    15  	"syscall"
    16  	"time"
    17  
    18  	"github.com/rjeczalik/notify"
    19  )
    20  
    21  // Service is a specific program to start.
    22  type Service struct {
    23  	Background                func(context.Context) context.Context
    24  	Command                   string
    25  	Args                      []string
    26  	ShutdownSignal            syscall.Signal
    27  	Env                       []string
    28  	WorkDir                   string
    29  	WatchedPaths              []string
    30  	WatchedPathChangeDebounce time.Duration
    31  	Stdin                     io.Reader
    32  	Stdout                    io.Writer
    33  	Stderr                    io.Writer
    34  	RestartPolicy             RestartPolicy
    35  	OnStart                   []func(context.Context)
    36  	OnRestart                 []func(context.Context)
    37  	OnExit                    []func(context.Context)
    38  	SubprocessProvider        SubprocessProvider // used for testing
    39  	FileEventProvider         FileEventProvider  // used for testing
    40  
    41  	// internal fields
    42  	// no peeking!
    43  
    44  	subprocessMu           sync.Mutex
    45  	subprocess             Subprocess
    46  	fsevents               chan notify.EventInfo
    47  	fileEventDebouncedAtMu sync.Mutex
    48  	fileEventDebouncedAt   time.Time
    49  	history                ServiceHistory
    50  	crashed                func(error)
    51  	finalizer              func()
    52  	stopping               bool
    53  	restarting             bool
    54  	done                   chan struct{}
    55  }
    56  
    57  // Start starts the service.
    58  func (s *Service) Start(ctx context.Context) error {
    59  	if s.safeSubprocessIsSet() {
    60  		return nil
    61  	}
    62  	if err := s.safeInitializeExecHandle(ctx); err != nil {
    63  		return err
    64  	}
    65  
    66  	s.history.StartedAt = time.Now()
    67  	s.done = make(chan struct{})
    68  
    69  	if len(s.WatchedPaths) > 0 {
    70  		s.fsevents = make(chan notify.EventInfo, 1)
    71  		notifyProvider := s.fileEventProviderOrDefault()
    72  		for _, watchedPath := range s.WatchedPaths {
    73  			if err := notifyProvider.Notify(watchedPath, s.fsevents); err != nil {
    74  				return err
    75  			}
    76  		}
    77  	}
    78  	if err := s.subprocess.Start(); err != nil {
    79  		return err
    80  	}
    81  	for _, handler := range s.OnStart {
    82  		handler(ctx)
    83  	}
    84  
    85  	if len(s.WatchedPaths) > 0 {
    86  		go func() {
    87  			var e notify.EventInfo
    88  			var restartErr error
    89  			for {
    90  				select {
    91  				case <-s.done:
    92  					return
    93  				case e = <-s.fsevents:
    94  					s.errPrintf("restarting on filesystem changes")
    95  					restartErr = s.safeDebouncedSignalOnWatchedEvent(e)
    96  					if restartErr != nil {
    97  						s.errPrintf("restarting on filesystem changes; error on terminate signal; %v", restartErr)
    98  					}
    99  				}
   100  			}
   101  		}()
   102  	}
   103  
   104  	// fork the goroutine which will handle the process itself, including restarts and termination.
   105  	go func() {
   106  		// finalErr is the error that will be passed to the crashed handler.
   107  		var finalErr error
   108  
   109  		defer func() {
   110  			// do not call the crashed handler if we're
   111  			// specifically being told to stop!
   112  			if !s.stopping && finalErr != nil && s.crashed != nil {
   113  				s.crashed(finalErr)
   114  			}
   115  			if s.finalizer != nil {
   116  				s.finalizer()
   117  			}
   118  			for _, handler := range s.OnExit {
   119  				handler(ctx)
   120  			}
   121  			close(s.done)
   122  		}()
   123  		var waitErr, startErr error
   124  		var delay time.Duration
   125  		for {
   126  			s.errPrintf("started with pid: %v", s.subprocess.Pid())
   127  
   128  			// wait for the sub-process to exit
   129  			//
   130  			// waitErr will be set if the process was terminated with a signal!
   131  			waitErr = s.subprocess.Wait()
   132  
   133  			// add the event to history but factor that the restart
   134  			// may have been by a file change that we _do not_ want to
   135  			// record as a failure!
   136  			s.addHistoryEvent(waitErr)
   137  
   138  			// we should only consider restarting if we are _not_ stopping
   139  			if s.maybeShouldRestart(ctx) {
   140  
   141  				// we may need to delay the restart
   142  				if delay = s.maybeShouldDelayRestart(ctx); delay > 0 {
   143  					s.errPrintf("delaying %v to restart", delay.Round(time.Millisecond))
   144  					select {
   145  					case <-time.After(delay):
   146  					case <-ctx.Done():
   147  						return
   148  					}
   149  				}
   150  
   151  				if s.stopping {
   152  					s.errPrintf("exiting on shutdown")
   153  					finalErr = waitErr
   154  					return
   155  				} else if s.restarting {
   156  					s.errPrintf("starting after restart")
   157  				} else if waitErr != nil {
   158  					s.errPrintf("starting after process exit error: %v", waitErr)
   159  				} else {
   160  					s.errPrintf("starting after process exit")
   161  				}
   162  
   163  				// re-initialize the sub-process
   164  				s.safeInitializeExecHandle(ctx)
   165  
   166  				// call the on restart handlers (before we actually start!)
   167  				// but _after_ we've re-initialized the sub-process
   168  				for _, handler := range s.OnRestart {
   169  					handler(ctx)
   170  				}
   171  
   172  				if startErr = s.subprocess.Start(); startErr != nil {
   173  					s.errPrintf("failed to restart")
   174  					finalErr = startErr
   175  					return
   176  				}
   177  
   178  			} else {
   179  				if s.stopping {
   180  					s.errPrintf("exiting on shutdown")
   181  				} else {
   182  					s.errPrintf("exiting based on exhausting restart policy")
   183  				}
   184  				finalErr = waitErr
   185  				return
   186  			}
   187  		}
   188  	}()
   189  	return nil
   190  }
   191  
   192  // Stop stops the service.
   193  func (s *Service) Stop() error {
   194  	s.subprocessMu.Lock()
   195  	defer s.subprocessMu.Unlock()
   196  	if s.subprocess != nil {
   197  		s.stopping = true
   198  		notify.Stop(s.fsevents)
   199  		return s.signalTerminate()
   200  	}
   201  	return nil
   202  }
   203  
   204  // Restart tells the service to quit with the shutdown signal restarting the serivce.
   205  func (s *Service) Restart() (err error) {
   206  	s.subprocessMu.Lock()
   207  	defer s.subprocessMu.Unlock()
   208  	s.restarting = true
   209  	err = s.signalTerminate()
   210  	return
   211  }
   212  
   213  //
   214  // internal methods
   215  //
   216  
   217  func (s *Service) watchedPathChangeDebounceOrDefault() time.Duration {
   218  	if s.WatchedPathChangeDebounce > 0 {
   219  		return s.WatchedPathChangeDebounce
   220  	}
   221  	return 500 * time.Millisecond
   222  }
   223  
   224  func (s *Service) safeDebouncedSignalOnWatchedEvent(e notify.EventInfo) error {
   225  	s.fileEventDebouncedAtMu.Lock()
   226  	defer s.fileEventDebouncedAtMu.Unlock()
   227  
   228  	if s.fileEventDebouncedAt.IsZero() || time.Since(s.fileEventDebouncedAt) > s.watchedPathChangeDebounceOrDefault() {
   229  		s.fileEventDebouncedAt = time.Now()
   230  		return s.Restart()
   231  	}
   232  	return nil
   233  }
   234  
   235  func (s *Service) signalTerminate() (err error) {
   236  	if s.subprocess == nil {
   237  		return
   238  	}
   239  	var signal syscall.Signal
   240  	if s.ShutdownSignal > 0 {
   241  		signal = s.ShutdownSignal
   242  	} else {
   243  		signal = syscall.SIGINT
   244  	}
   245  	s.errPrintf("being sent terminate signal: %v", signal)
   246  	err = s.subprocess.Signal(signal)
   247  	return
   248  }
   249  
   250  func (s *Service) maybeShouldRestart(ctx context.Context) bool {
   251  	// we _never_ restart if we're stopping.
   252  	if s.stopping {
   253  		return false
   254  	}
   255  
   256  	// we _always_ restart if it's because of an explicit
   257  	// restart or a filesystem change.
   258  	if s.restarting {
   259  		return true
   260  	}
   261  
   262  	// return the result of the restart policy.
   263  	return s.RestartPolicy != nil && s.RestartPolicy.ShouldRestart(ctx, &s.history)
   264  }
   265  
   266  func (s *Service) maybeShouldDelayRestart(ctx context.Context) time.Duration {
   267  	if s.RestartPolicy != nil {
   268  		return s.RestartPolicy.Delay(ctx, &s.history)
   269  	}
   270  	return 0
   271  }
   272  
   273  func (s *Service) safeSubprocessIsSet() (set bool) {
   274  	s.subprocessMu.Lock()
   275  	set = s.subprocess != nil
   276  	s.subprocessMu.Unlock()
   277  	return
   278  }
   279  
   280  func (s *Service) safeInitializeExecHandle(ctx context.Context) error {
   281  	s.subprocessMu.Lock()
   282  	defer s.subprocessMu.Unlock()
   283  	s.stopping = false
   284  	s.restarting = false
   285  	if s.Background != nil {
   286  		ctx = s.Background(ctx)
   287  	}
   288  	sub, err := s.subprocessProviderOrDefault().Exec(ctx, s)
   289  	if err != nil {
   290  		return err
   291  	}
   292  	s.subprocess = sub
   293  	return nil
   294  }
   295  
   296  func (s *Service) fileEventProviderOrDefault() FileEventProvider {
   297  	if s.FileEventProvider != nil {
   298  		return s.FileEventProvider
   299  	}
   300  	return new(NotifyProvider)
   301  }
   302  
   303  func (s *Service) subprocessProviderOrDefault() SubprocessProvider {
   304  	if s.SubprocessProvider != nil {
   305  		return s.SubprocessProvider
   306  	}
   307  	return new(ExecSubprocessProvider)
   308  }
   309  
   310  func (s *Service) addHistoryEvent(err error) {
   311  	now := time.Now()
   312  
   313  	// elide the error on restart as we do _not_ consider
   314  	// signal errors from restarts as real failures
   315  	// for restart policies.
   316  	if s.restarting {
   317  		s.history.Exits = append(s.history.Exits, Exit{
   318  			Timestamp: now,
   319  		})
   320  		return
   321  	}
   322  
   323  	s.history.Exits = append(s.history.Exits, Exit{
   324  		Timestamp: now,
   325  		Error:     err,
   326  	})
   327  }
   328  
   329  func (s *Service) errPrintf(format string, args ...any) {
   330  	if s.Stderr != nil {
   331  		fmt.Fprintf(s.Stderr, "[supervisor] process %s\n", fmt.Sprintf(format, args...))
   332  	}
   333  }