github.com/blend/go-sdk@v1.20220411.3/cron/job_scheduler.go (about)

     1  /*
     2  
     3  Copyright (c) 2022 - Present. Blend Labs, Inc. All rights reserved
     4  Use of this source code is governed by a MIT license that can be found in the LICENSE file.
     5  
     6  */
     7  
     8  package cron
     9  
    10  import (
    11  	"context"
    12  	"fmt"
    13  	"sync"
    14  	"time"
    15  
    16  	"github.com/blend/go-sdk/async"
    17  	"github.com/blend/go-sdk/ex"
    18  	"github.com/blend/go-sdk/logger"
    19  	"github.com/blend/go-sdk/ref"
    20  	"github.com/blend/go-sdk/stringutil"
    21  )
    22  
    23  // NewJobScheduler returns a job scheduler for a given job.
    24  func NewJobScheduler(job Job, options ...JobSchedulerOption) *JobScheduler {
    25  	js := &JobScheduler{
    26  		Latch:       async.NewLatch(),
    27  		BaseContext: context.Background(),
    28  		Job:         job,
    29  	}
    30  	if typed, ok := job.(ScheduleProvider); ok {
    31  		js.JobSchedule = typed.Schedule()
    32  	}
    33  	for _, option := range options {
    34  		option(js)
    35  	}
    36  	return js
    37  }
    38  
    39  // JobScheduler is a job instance.
    40  type JobScheduler struct {
    41  	Latch *async.Latch
    42  
    43  	Job          Job
    44  	JobConfig    JobConfig
    45  	JobSchedule  Schedule
    46  	JobLifecycle JobLifecycle
    47  
    48  	BaseContext context.Context
    49  
    50  	Tracer Tracer
    51  	Log    logger.Log
    52  
    53  	NextRuntime time.Time
    54  
    55  	currentLock sync.Mutex
    56  	current     *JobInvocation
    57  	lastLock    sync.Mutex
    58  	last        *JobInvocation
    59  }
    60  
    61  // Name returns the job name.
    62  func (js *JobScheduler) Name() string {
    63  	return js.Job.Name()
    64  }
    65  
    66  // Background returns the job scheduler base context.
    67  //
    68  // It should be used as the root context for _any_ operations.
    69  func (js *JobScheduler) Background() context.Context {
    70  	if js.BaseContext != nil {
    71  		return js.BaseContext
    72  	}
    73  	return context.Background()
    74  }
    75  
    76  // Config returns the job config provided by a job or an empty config.
    77  func (js *JobScheduler) Config() JobConfig {
    78  	if typed, ok := js.Job.(ConfigProvider); ok {
    79  		return typed.Config()
    80  	}
    81  	return js.JobConfig
    82  }
    83  
    84  // Lifecycle returns job lifecycle steps or an empty set.
    85  func (js *JobScheduler) Lifecycle() JobLifecycle {
    86  	if typed, ok := js.Job.(LifecycleProvider); ok {
    87  		return typed.Lifecycle()
    88  	}
    89  	return js.JobLifecycle
    90  }
    91  
    92  // Description returns the description.
    93  func (js *JobScheduler) Description() string {
    94  	return js.Config().Description
    95  }
    96  
    97  // Disabled returns if the job is disabled or not.
    98  func (js *JobScheduler) Disabled() bool {
    99  	if js.JobConfig.Disabled != nil {
   100  		return *js.JobConfig.Disabled
   101  	}
   102  	return js.Config().DisabledOrDefault()
   103  }
   104  
   105  // Labels returns the job labels, including
   106  // automatically added ones like `name`.
   107  func (js *JobScheduler) Labels() map[string]string {
   108  	output := map[string]string{
   109  		"name":      stringutil.Slugify(js.Name()),
   110  		"scheduler": string(js.State()),
   111  		"active":    fmt.Sprint(!js.IsIdle()),
   112  		"enabled":   fmt.Sprint(!js.Disabled()),
   113  	}
   114  	if js.Last() != nil {
   115  		output["last"] = stringutil.Slugify(string(js.Last().Status))
   116  	}
   117  	for key, value := range js.Config().Labels {
   118  		output[key] = value
   119  	}
   120  	return output
   121  }
   122  
   123  // State returns the job scheduler state.
   124  func (js *JobScheduler) State() JobSchedulerState {
   125  	if js.Latch.IsStarted() {
   126  		return JobSchedulerStateRunning
   127  	}
   128  	if js.Latch.IsStopped() {
   129  		return JobSchedulerStateStopped
   130  	}
   131  	return JobSchedulerStateUnknown
   132  }
   133  
   134  // Start starts the scheduler.
   135  // This call blocks.
   136  func (js *JobScheduler) Start() error {
   137  	if !js.Latch.CanStart() {
   138  		return async.ErrCannotStart
   139  	}
   140  	js.Latch.Starting()
   141  	js.RunLoop()
   142  	return nil
   143  }
   144  
   145  // Stop stops the scheduler.
   146  func (js *JobScheduler) Stop() error {
   147  	if !js.Latch.CanStop() {
   148  		return async.ErrCannotStop
   149  	}
   150  
   151  	ctx := js.withBaseContext(js.Background())
   152  	js.Latch.Stopping()
   153  
   154  	if current := js.Current(); current != nil {
   155  		gracePeriod := js.Config().ShutdownGracePeriodOrDefault()
   156  		if gracePeriod > 0 {
   157  			var cancel func()
   158  			ctx, cancel = js.withTimeoutOrCancel(ctx, gracePeriod)
   159  			defer cancel()
   160  			js.waitCurrentComplete(ctx)
   161  		}
   162  	}
   163  	if current := js.Current(); current != nil && current.Status == JobInvocationStatusRunning {
   164  		current.Cancel()
   165  	}
   166  
   167  	<-js.Latch.NotifyStopped()
   168  	js.Latch.Reset()
   169  	js.NextRuntime = Zero
   170  	return nil
   171  }
   172  
   173  // OnLoad triggers the on load even on the job lifecycle handler.
   174  func (js *JobScheduler) OnLoad(ctx context.Context) error {
   175  	ctx = js.withBaseContext(ctx)
   176  	if js.Lifecycle().OnLoad != nil {
   177  		if err := js.Lifecycle().OnLoad(ctx); err != nil {
   178  			return err
   179  		}
   180  	}
   181  	return nil
   182  }
   183  
   184  // OnUnload triggers the on unload even on the job lifecycle handler.
   185  func (js *JobScheduler) OnUnload(ctx context.Context) error {
   186  	ctx = js.withBaseContext(ctx)
   187  	if js.Lifecycle().OnUnload != nil {
   188  		return js.Lifecycle().OnUnload(ctx)
   189  	}
   190  	return nil
   191  }
   192  
   193  // NotifyStarted notifies the job scheduler has started.
   194  func (js *JobScheduler) NotifyStarted() <-chan struct{} {
   195  	return js.Latch.NotifyStarted()
   196  }
   197  
   198  // NotifyStopped notifies the job scheduler has stopped.
   199  func (js *JobScheduler) NotifyStopped() <-chan struct{} {
   200  	return js.Latch.NotifyStopped()
   201  }
   202  
   203  // Enable sets the job as enabled.
   204  func (js *JobScheduler) Enable() {
   205  	ctx := js.withBaseContext(js.Background())
   206  	js.JobConfig.Disabled = ref.Bool(false)
   207  	if lifecycle := js.Lifecycle(); lifecycle.OnEnabled != nil {
   208  		lifecycle.OnEnabled(ctx)
   209  	}
   210  	if js.Log != nil && !js.Config().SkipLoggerTrigger {
   211  		js.Log.TriggerContext(ctx, NewEvent(FlagEnabled, js.Name()))
   212  	}
   213  }
   214  
   215  // Disable sets the job as disabled.
   216  func (js *JobScheduler) Disable() {
   217  	ctx := js.withBaseContext(js.Background())
   218  	js.JobConfig.Disabled = ref.Bool(true)
   219  	if lifecycle := js.Lifecycle(); lifecycle.OnDisabled != nil {
   220  		lifecycle.OnDisabled(ctx)
   221  	}
   222  	if js.Log != nil && !js.Config().SkipLoggerTrigger {
   223  		js.Log.TriggerContext(ctx, NewEvent(FlagDisabled, js.Name()))
   224  	}
   225  }
   226  
   227  // Cancel stops all running invocations.
   228  func (js *JobScheduler) Cancel() error {
   229  	ctx := js.withBaseContext(js.Background())
   230  
   231  	if js.Current() == nil {
   232  		logger.MaybeDebugfContext(ctx, js.Log, "cannot cancel; job is not runnning")
   233  		return nil
   234  	}
   235  	gracePeriod := js.Config().ShutdownGracePeriodOrDefault()
   236  	if gracePeriod > 0 {
   237  		ctx, cancel := js.withTimeoutOrCancel(ctx, gracePeriod)
   238  		defer cancel()
   239  		js.waitCurrentComplete(ctx)
   240  	}
   241  	if current := js.Current(); current != nil && current.Status == JobInvocationStatusRunning {
   242  		current.Cancel()
   243  	} else {
   244  		logger.MaybeDebugfContext(ctx, js.Log, "cannot cancel; job is not runnning")
   245  	}
   246  	return nil
   247  }
   248  
   249  // RunLoop is the main scheduler loop.
   250  // This call blocks.
   251  // It alarms on the next runtime and forks a new routine to run the job.
   252  // It can be aborted with the scheduler's async.Latch, or calling `.Stop()`.
   253  // If this function exits for any reason, it will mark the scheduler as stopped.
   254  func (js *JobScheduler) RunLoop() {
   255  	js.Latch.Started()
   256  	defer func() {
   257  		js.Latch.Stopped()
   258  		js.Latch.Reset()
   259  	}()
   260  
   261  	if js.JobSchedule != nil {
   262  		js.NextRuntime = js.JobSchedule.Next(js.NextRuntime)
   263  	}
   264  
   265  	// if the schedule returns a zero timestamp
   266  	// it should be interpretted as *not* to automatically
   267  	// schedule the job to be run.
   268  	// The run loop will return and the job scheduler will be interpretted as stopped.
   269  	if js.NextRuntime.IsZero() {
   270  		return
   271  	}
   272  
   273  	for {
   274  		if js.NextRuntime.IsZero() {
   275  			return
   276  		}
   277  
   278  		runAt := time.After(js.NextRuntime.UTC().Sub(Now()))
   279  		select {
   280  		case <-runAt:
   281  			if js.CanBeScheduled() {
   282  				if _, _, err := js.RunAsyncContext(js.Background()); err != nil {
   283  					_ = js.error(js.Background(), err)
   284  				}
   285  			}
   286  
   287  			// set up the next runtime.
   288  			if js.JobSchedule != nil {
   289  				js.NextRuntime = js.JobSchedule.Next(js.NextRuntime)
   290  			} else {
   291  				js.NextRuntime = Zero
   292  			}
   293  
   294  		case <-js.Latch.NotifyStopping():
   295  			// note: we bail hard here
   296  			// because the job executions in flight are
   297  			// handled by the context cancellation.
   298  			return
   299  		}
   300  	}
   301  }
   302  
   303  // RunAsync starts a job invocation with the BaseContext the root context.
   304  func (js *JobScheduler) RunAsync() (*JobInvocation, <-chan struct{}, error) {
   305  	return js.RunAsyncContext(js.Background())
   306  }
   307  
   308  // RunAsyncContext starts a job invocation with a given context.
   309  func (js *JobScheduler) RunAsyncContext(ctx context.Context) (*JobInvocation, <-chan struct{}, error) {
   310  	if !js.IsIdle() {
   311  		return nil, nil, ex.New(ErrJobAlreadyRunning, ex.OptMessagef("job: %s", js.Name()))
   312  	}
   313  
   314  	ctx = js.withBaseContext(ctx)
   315  	ctx, ji := js.withInvocationContext(ctx)
   316  	done := make(chan struct{})
   317  	js.SetCurrent(ji)
   318  
   319  	var err error
   320  	var tracer TraceFinisher
   321  	go func() {
   322  		defer func() {
   323  			switch {
   324  			case err != nil && IsJobCanceled(err):
   325  				js.onJobCompleteCanceled(ctx) // the job was canceled, either manually or by a timeout
   326  			case err != nil:
   327  				js.onJobCompleteError(ctx, err) // the job completed with an error
   328  			default:
   329  				js.onJobCompleteSuccess(ctx) // the job completed without error
   330  			}
   331  
   332  			if tracer != nil {
   333  				tracer.Finish(ctx, err) // call the trace finisher if one was started
   334  			}
   335  			ji.Cancel() // if the job was created with a timeout, end the timeout
   336  
   337  			close(done)              // signal callers the job is done
   338  			js.assignCurrentToLast() // rotate in the current to the last result
   339  		}()
   340  
   341  		if js.Tracer != nil {
   342  			ctx, tracer = js.Tracer.Start(ctx, js.Name())
   343  		}
   344  		js.onJobBegin(ctx) // signal the job is starting
   345  
   346  		select {
   347  		case <-ctx.Done(): // if the timeout or cancel is triggered
   348  			err = ErrJobCanceled // set the error to a known error
   349  			return
   350  		case err = <-js.safeBackgroundExec(ctx): // run the job in a background routine and catch pancis
   351  			return
   352  		}
   353  	}()
   354  	return ji, done, nil
   355  }
   356  
   357  // Run forces the job to run.
   358  // This call will block.
   359  func (js *JobScheduler) Run() {
   360  	_, done, err := js.RunAsync()
   361  	if err != nil {
   362  		return
   363  	}
   364  	<-done
   365  }
   366  
   367  // RunContext runs a job with a given context as the root context.
   368  func (js *JobScheduler) RunContext(ctx context.Context) {
   369  	_, done, err := js.RunAsyncContext(ctx)
   370  	if err != nil {
   371  		return
   372  	}
   373  	<-done
   374  }
   375  
   376  //
   377  // exported utility methods
   378  //
   379  
   380  // CanBeScheduled returns if a job will be triggered automatically
   381  // and isn't already in flight and set to be serial.
   382  func (js *JobScheduler) CanBeScheduled() bool {
   383  	return !js.Disabled() && js.IsIdle()
   384  }
   385  
   386  // IsIdle returns if the job is not currently running.
   387  func (js *JobScheduler) IsIdle() (isIdle bool) {
   388  	isIdle = js.Current() == nil
   389  	return
   390  }
   391  
   392  //
   393  // utility functions
   394  //
   395  
   396  // Current returns the current job invocation.
   397  func (js *JobScheduler) Current() (current *JobInvocation) {
   398  	js.currentLock.Lock()
   399  	if js.current != nil {
   400  		current = js.current.Clone()
   401  	}
   402  	js.currentLock.Unlock()
   403  	return
   404  }
   405  
   406  // SetCurrent sets the current invocation, it is useful for tests etc.
   407  func (js *JobScheduler) SetCurrent(ji *JobInvocation) {
   408  	js.currentLock.Lock()
   409  	js.current = ji
   410  	js.currentLock.Unlock()
   411  }
   412  
   413  // Last returns the last job invocation.
   414  func (js *JobScheduler) Last() (last *JobInvocation) {
   415  	js.lastLock.Lock()
   416  	if js.last != nil {
   417  		last = js.last
   418  	}
   419  	js.lastLock.Unlock()
   420  	return
   421  }
   422  
   423  // SetLast sets the last invocation, it is useful for tests etc.
   424  func (js *JobScheduler) SetLast(ji *JobInvocation) {
   425  	js.lastLock.Lock()
   426  	js.last = ji
   427  	js.lastLock.Unlock()
   428  }
   429  
   430  func (js *JobScheduler) assignCurrentToLast() {
   431  	js.lastLock.Lock()
   432  	js.currentLock.Lock()
   433  	js.last = js.current
   434  	js.current = nil
   435  	js.currentLock.Unlock()
   436  	js.lastLock.Unlock()
   437  }
   438  
   439  func (js *JobScheduler) waitCurrentComplete(ctx context.Context) {
   440  	deadlinePoll := time.NewTicker(100 * time.Millisecond)
   441  	defer deadlinePoll.Stop()
   442  	for {
   443  		if js.Current().Status != JobInvocationStatusRunning {
   444  			return
   445  		}
   446  		select {
   447  		case <-ctx.Done(): // once the timeout triggers
   448  			return
   449  		case <-deadlinePoll.C:
   450  			// tick over the loop to check if the current job is complete
   451  			continue
   452  		}
   453  	}
   454  }
   455  
   456  func (js *JobScheduler) safeBackgroundExec(ctx context.Context) chan error {
   457  	errors := make(chan error, 2)
   458  	go func() {
   459  		defer func() {
   460  			if r := recover(); r != nil {
   461  				errors <- ex.New(r)
   462  			}
   463  		}()
   464  		errors <- js.Job.Execute(ctx)
   465  	}()
   466  	return errors
   467  }
   468  
   469  func (js *JobScheduler) withBaseContext(ctx context.Context) context.Context {
   470  	if typed, ok := js.Job.(BackgroundProvider); ok {
   471  		ctx = typed.Background(ctx)
   472  	}
   473  	ctx = logger.WithPathAppend(ctx, js.Name())
   474  	ctx = WithJobScheduler(ctx, js)
   475  	return ctx
   476  }
   477  
   478  func (js *JobScheduler) withTimeoutOrCancel(ctx context.Context, timeout time.Duration) (context.Context, context.CancelFunc) {
   479  	if timeout > 0 {
   480  		return context.WithTimeout(ctx, timeout)
   481  	}
   482  	return context.WithCancel(ctx)
   483  }
   484  
   485  func (js *JobScheduler) withInvocationContext(ctx context.Context) (context.Context, *JobInvocation) {
   486  	ji := NewJobInvocation(js.Name())
   487  	ji.Parameters = MergeJobParameterValues(js.Config().ParameterValues, GetJobParameterValues(ctx))
   488  	ctx = logger.WithPathAppend(ctx, ji.ID)
   489  	ctx, ji.Cancel = js.withTimeoutOrCancel(ctx, js.Config().TimeoutOrDefault())
   490  	ctx = WithJobInvocation(ctx, ji)
   491  	ctx = WithJobParameterValues(ctx, ji.Parameters)
   492  	return ctx, ji
   493  }
   494  
   495  // job lifecycle hooks
   496  
   497  func (js *JobScheduler) onJobBegin(ctx context.Context) {
   498  	js.currentLock.Lock()
   499  	js.current.Started = time.Now().UTC()
   500  	js.current.Status = JobInvocationStatusRunning
   501  	id := js.current.ID
   502  	js.currentLock.Unlock()
   503  
   504  	if lifecycle := js.Lifecycle(); lifecycle.OnBegin != nil {
   505  		lifecycle.OnBegin(ctx)
   506  	}
   507  	if js.Log != nil && !js.Config().SkipLoggerTrigger {
   508  		js.logTrigger(ctx, NewEvent(FlagBegin, js.Name(), OptEventJobInvocation(id)))
   509  	}
   510  }
   511  
   512  func (js *JobScheduler) onJobCompleteCanceled(ctx context.Context) {
   513  	js.currentLock.Lock()
   514  	js.current.Complete = time.Now().UTC()
   515  	js.current.Status = JobInvocationStatusCanceled
   516  	id := js.current.ID
   517  	elapsed := js.current.Elapsed()
   518  	js.currentLock.Unlock()
   519  
   520  	lifecycle := js.Lifecycle()
   521  	if lifecycle.OnCancellation != nil {
   522  		lifecycle.OnCancellation(ctx)
   523  	}
   524  	if js.Log != nil && !js.Config().SkipLoggerTrigger {
   525  		js.logTrigger(ctx, NewEvent(FlagCanceled, js.Name(), OptEventJobInvocation(id), OptEventElapsed(elapsed)))
   526  		js.logTrigger(ctx, NewEvent(FlagComplete, js.Name(), OptEventJobInvocation(id), OptEventElapsed(elapsed)))
   527  	}
   528  	if lifecycle.OnComplete != nil {
   529  		lifecycle.OnComplete(ctx)
   530  	}
   531  }
   532  
   533  func (js *JobScheduler) onJobCompleteSuccess(ctx context.Context) {
   534  	js.currentLock.Lock()
   535  	js.current.Complete = time.Now().UTC()
   536  	js.current.Status = JobInvocationStatusSuccess
   537  	id := js.current.ID
   538  	elapsed := js.current.Elapsed()
   539  	js.currentLock.Unlock()
   540  
   541  	lifecycle := js.Lifecycle()
   542  	if lifecycle.OnSuccess != nil {
   543  		lifecycle.OnSuccess(ctx)
   544  	}
   545  	if js.Log != nil && !js.Config().SkipLoggerTrigger {
   546  		js.logTrigger(ctx, NewEvent(FlagSuccess, js.Name(), OptEventJobInvocation(id), OptEventElapsed(elapsed)))
   547  		js.logTrigger(ctx, NewEvent(FlagComplete, js.Name(), OptEventJobInvocation(id), OptEventElapsed(elapsed)))
   548  	}
   549  	if last := js.Last(); last != nil && last.Status == JobInvocationStatusErrored {
   550  		if lifecycle.OnFixed != nil {
   551  			lifecycle.OnFixed(ctx)
   552  		}
   553  		if js.Log != nil && !js.Config().SkipLoggerTrigger {
   554  			js.logTrigger(ctx, NewEvent(FlagFixed, js.Name(), OptEventJobInvocation(id), OptEventElapsed(elapsed)))
   555  		}
   556  	}
   557  	if lifecycle.OnComplete != nil {
   558  		lifecycle.OnComplete(ctx)
   559  	}
   560  }
   561  
   562  func (js *JobScheduler) onJobCompleteError(ctx context.Context, err error) {
   563  	js.currentLock.Lock()
   564  	js.current.Complete = time.Now().UTC()
   565  	js.current.Status = JobInvocationStatusErrored
   566  	js.current.Err = err
   567  	id := js.current.ID
   568  	elapsed := js.current.Elapsed()
   569  	js.currentLock.Unlock()
   570  
   571  	//
   572  	// error
   573  	//
   574  
   575  	// always log the error
   576  	_ = js.error(ctx, err)
   577  	lifecycle := js.Lifecycle()
   578  	if lifecycle.OnError != nil {
   579  		lifecycle.OnError(ctx)
   580  	}
   581  	if js.Log != nil && !js.Config().SkipLoggerTrigger {
   582  		js.logTrigger(ctx, NewEvent(FlagErrored, js.Name(),
   583  			OptEventJobInvocation(id),
   584  			OptEventErr(err),
   585  			OptEventElapsed(elapsed),
   586  		))
   587  		js.logTrigger(ctx, NewEvent(FlagComplete, js.Name(), OptEventJobInvocation(id), OptEventElapsed(elapsed)))
   588  	}
   589  
   590  	//
   591  	// broken; assumes that last is set, and last was a success
   592  	//
   593  
   594  	if last := js.Last(); last != nil && last.Status != JobInvocationStatusErrored {
   595  		if lifecycle.OnBroken != nil {
   596  			lifecycle.OnBroken(ctx)
   597  		}
   598  		if js.Log != nil && !js.Config().SkipLoggerTrigger {
   599  			js.logTrigger(ctx, NewEvent(FlagBroken, js.Name(),
   600  				OptEventJobInvocation(id),
   601  				OptEventErr(err),
   602  				OptEventElapsed(elapsed)),
   603  			)
   604  		}
   605  	}
   606  	if lifecycle.OnComplete != nil {
   607  		lifecycle.OnComplete(ctx)
   608  	}
   609  }
   610  
   611  //
   612  // logging helpers
   613  //
   614  
   615  func (js *JobScheduler) logTrigger(ctx context.Context, e logger.Event) {
   616  	if !logger.IsLoggerSet(js.Log) {
   617  		return
   618  	}
   619  	js.Log.TriggerContext(ctx, e)
   620  }
   621  
   622  func (js *JobScheduler) debugf(ctx context.Context, format string, args ...interface{}) {
   623  	if !logger.IsLoggerSet(js.Log) {
   624  		return
   625  	}
   626  	js.Log.DebugfContext(ctx, format, args...)
   627  }
   628  
   629  func (js *JobScheduler) error(ctx context.Context, err error) error {
   630  	if !logger.IsLoggerSet(js.Log) {
   631  		return err
   632  	}
   633  	js.Log.ErrorContext(ctx, err)
   634  	return err
   635  }