
     1  /*
     3  Copyright (c) 2024 - Present. Blend Labs, Inc. All rights reserved
     4  Use of this source code is governed by a MIT license that can be found in the LICENSE file.
     6  */
     8  package cron
    10  import (
    11  	"context"
    12  	"fmt"
    13  	"sync"
    14  	"time"
    16  	""
    17  	""
    18  	""
    19  	""
    20  	""
    21  )
    23  // NewJobScheduler returns a job scheduler for a given job.
    24  func NewJobScheduler(job Job, options ...JobSchedulerOption) *JobScheduler {
    25  	js := &JobScheduler{
    26  		Latch:       async.NewLatch(),
    27  		BaseContext: context.Background(),
    28  		Job:         job,
    29  	}
    30  	if typed, ok := job.(ScheduleProvider); ok {
    31  		js.JobSchedule = typed.Schedule()
    32  	}
    33  	for _, option := range options {
    34  		option(js)
    35  	}
    36  	return js
    37  }
    39  // JobScheduler is a job instance.
    40  type JobScheduler struct {
    41  	Latch *async.Latch
    43  	Job          Job
    44  	JobConfig    JobConfig
    45  	JobSchedule  Schedule
    46  	JobLifecycle JobLifecycle
    48  	BaseContext context.Context
    50  	Tracer Tracer
    51  	Log    logger.Log
    53  	NextRuntime time.Time
    55  	currentLock sync.Mutex
    56  	current     *JobInvocation
    57  	lastLock    sync.Mutex
    58  	last        *JobInvocation
    59  }
    61  // Name returns the job name.
    62  func (js *JobScheduler) Name() string {
    63  	return js.Job.Name()
    64  }
    66  // Background returns the job scheduler base context.
    67  //
    68  // It should be used as the root context for _any_ operations.
    69  func (js *JobScheduler) Background() context.Context {
    70  	if js.BaseContext != nil {
    71  		return js.BaseContext
    72  	}
    73  	return context.Background()
    74  }
    76  // Config returns the job config provided by a job or an empty config.
    77  func (js *JobScheduler) Config() JobConfig {
    78  	if typed, ok := js.Job.(ConfigProvider); ok {
    79  		return typed.Config()
    80  	}
    81  	return js.JobConfig
    82  }
    84  // Lifecycle returns job lifecycle steps or an empty set.
    85  func (js *JobScheduler) Lifecycle() JobLifecycle {
    86  	if typed, ok := js.Job.(LifecycleProvider); ok {
    87  		return typed.Lifecycle()
    88  	}
    89  	return js.JobLifecycle
    90  }
    92  // Description returns the description.
    93  func (js *JobScheduler) Description() string {
    94  	return js.Config().Description
    95  }
    97  // Disabled returns if the job is disabled or not.
    98  func (js *JobScheduler) Disabled() bool {
    99  	if js.JobConfig.Disabled != nil {
   100  		return *js.JobConfig.Disabled
   101  	}
   102  	return js.Config().DisabledOrDefault()
   103  }
   105  // Labels returns the job labels, including
   106  // automatically added ones like `name`.
   107  func (js *JobScheduler) Labels() map[string]string {
   108  	output := map[string]string{
   109  		"name":      stringutil.Slugify(js.Name()),
   110  		"scheduler": string(js.State()),
   111  		"active":    fmt.Sprint(!js.IsIdle()),
   112  		"enabled":   fmt.Sprint(!js.Disabled()),
   113  	}
   114  	if js.Last() != nil {
   115  		output["last"] = stringutil.Slugify(string(js.Last().Status))
   116  	}
   117  	for key, value := range js.Config().Labels {
   118  		output[key] = value
   119  	}
   120  	return output
   121  }
   123  // State returns the job scheduler state.
   124  func (js *JobScheduler) State() JobSchedulerState {
   125  	if js.Latch.IsStarted() {
   126  		return JobSchedulerStateRunning
   127  	}
   128  	if js.Latch.IsStopped() {
   129  		return JobSchedulerStateStopped
   130  	}
   131  	return JobSchedulerStateUnknown
   132  }
   134  // Start starts the scheduler.
   135  // This call blocks.
   136  func (js *JobScheduler) Start() error {
   137  	if !js.Latch.CanStart() {
   138  		return async.ErrCannotStart
   139  	}
   140  	js.Latch.Starting()
   141  	js.RunLoop()
   142  	return nil
   143  }
   145  // Stop stops the scheduler.
   146  func (js *JobScheduler) Stop() error {
   147  	if !js.Latch.CanStop() {
   148  		return async.ErrCannotStop
   149  	}
   151  	ctx := js.withBaseContext(js.Background())
   152  	js.Latch.Stopping()
   154  	if current := js.Current(); current != nil {
   155  		gracePeriod := js.Config().ShutdownGracePeriodOrDefault()
   156  		if gracePeriod > 0 {
   157  			var cancel func()
   158  			ctx, cancel = js.withTimeoutOrCancel(ctx, gracePeriod)
   159  			defer cancel()
   160  			js.waitCurrentComplete(ctx)
   161  		}
   162  	}
   163  	if current := js.Current(); current != nil && current.Status == JobInvocationStatusRunning {
   164  		current.Cancel()
   165  	}
   167  	<-js.Latch.NotifyStopped()
   168  	js.Latch.Reset()
   169  	js.NextRuntime = Zero
   170  	return nil
   171  }
   173  // OnLoad triggers the on load even on the job lifecycle handler.
   174  func (js *JobScheduler) OnLoad(ctx context.Context) error {
   175  	ctx = js.withBaseContext(ctx)
   176  	if js.Lifecycle().OnLoad != nil {
   177  		if err := js.Lifecycle().OnLoad(ctx); err != nil {
   178  			return err
   179  		}
   180  	}
   181  	return nil
   182  }
   184  // OnUnload triggers the on unload even on the job lifecycle handler.
   185  func (js *JobScheduler) OnUnload(ctx context.Context) error {
   186  	ctx = js.withBaseContext(ctx)
   187  	if js.Lifecycle().OnUnload != nil {
   188  		return js.Lifecycle().OnUnload(ctx)
   189  	}
   190  	return nil
   191  }
   193  // NotifyStarted notifies the job scheduler has started.
   194  func (js *JobScheduler) NotifyStarted() <-chan struct{} {
   195  	return js.Latch.NotifyStarted()
   196  }
   198  // NotifyStopped notifies the job scheduler has stopped.
   199  func (js *JobScheduler) NotifyStopped() <-chan struct{} {
   200  	return js.Latch.NotifyStopped()
   201  }
   203  // Enable sets the job as enabled.
   204  func (js *JobScheduler) Enable() {
   205  	ctx := js.withBaseContext(js.Background())
   206  	js.JobConfig.Disabled = ref.Bool(false)
   207  	if lifecycle := js.Lifecycle(); lifecycle.OnEnabled != nil {
   208  		lifecycle.OnEnabled(ctx)
   209  	}
   210  	if js.Log != nil && !js.Config().SkipLoggerTrigger {
   211  		js.Log.TriggerContext(ctx, NewEvent(FlagEnabled, js.Name()))
   212  	}
   213  }
   215  // Disable sets the job as disabled.
   216  func (js *JobScheduler) Disable() {
   217  	ctx := js.withBaseContext(js.Background())
   218  	js.JobConfig.Disabled = ref.Bool(true)
   219  	if lifecycle := js.Lifecycle(); lifecycle.OnDisabled != nil {
   220  		lifecycle.OnDisabled(ctx)
   221  	}
   222  	if js.Log != nil && !js.Config().SkipLoggerTrigger {
   223  		js.Log.TriggerContext(ctx, NewEvent(FlagDisabled, js.Name()))
   224  	}
   225  }
   227  // Cancel stops all running invocations.
   228  func (js *JobScheduler) Cancel() error {
   229  	ctx := js.withBaseContext(js.Background())
   231  	if js.Current() == nil {
   232  		logger.MaybeDebugfContext(ctx, js.Log, "cannot cancel; job is not runnning")
   233  		return nil
   234  	}
   235  	gracePeriod := js.Config().ShutdownGracePeriodOrDefault()
   236  	if gracePeriod > 0 {
   237  		ctx, cancel := js.withTimeoutOrCancel(ctx, gracePeriod)
   238  		defer cancel()
   239  		js.waitCurrentComplete(ctx)
   240  	}
   241  	if current := js.Current(); current != nil && current.Status == JobInvocationStatusRunning {
   242  		current.Cancel()
   243  	} else {
   244  		logger.MaybeDebugfContext(ctx, js.Log, "cannot cancel; job is not runnning")
   245  	}
   246  	return nil
   247  }
   249  // RunLoop is the main scheduler loop.
   250  // This call blocks.
   251  // It alarms on the next runtime and forks a new routine to run the job.
   252  // It can be aborted with the scheduler's async.Latch, or calling `.Stop()`.
   253  // If this function exits for any reason, it will mark the scheduler as stopped.
   254  func (js *JobScheduler) RunLoop() {
   255  	js.Latch.Started()
   256  	defer func() {
   257  		js.Latch.Stopped()
   258  		js.Latch.Reset()
   259  	}()
   261  	if js.JobSchedule != nil {
   262  		js.NextRuntime = js.JobSchedule.Next(js.NextRuntime)
   263  	}
   265  	// if the schedule returns a zero timestamp
   266  	// it should be interpretted as *not* to automatically
   267  	// schedule the job to be run.
   268  	// The run loop will return and the job scheduler will be interpretted as stopped.
   269  	if js.NextRuntime.IsZero() {
   270  		return
   271  	}
   273  	for {
   274  		if js.NextRuntime.IsZero() {
   275  			return
   276  		}
   278  		runAt := time.After(js.NextRuntime.UTC().Sub(Now()))
   279  		select {
   280  		case <-runAt:
   281  			if js.CanBeScheduled() {
   282  				if _, _, err := js.RunAsyncContext(js.Background()); err != nil {
   283  					_ = js.error(js.Background(), err)
   284  				}
   285  			}
   287  			// set up the next runtime.
   288  			if js.JobSchedule != nil {
   289  				js.NextRuntime = js.JobSchedule.Next(js.NextRuntime)
   290  			} else {
   291  				js.NextRuntime = Zero
   292  			}
   294  		case <-js.Latch.NotifyStopping():
   295  			// note: we bail hard here
   296  			// because the job executions in flight are
   297  			// handled by the context cancellation.
   298  			return
   299  		}
   300  	}
   301  }
   303  // RunAsync starts a job invocation with the BaseContext the root context.
   304  func (js *JobScheduler) RunAsync() (*JobInvocation, <-chan struct{}, error) {
   305  	return js.RunAsyncContext(js.Background())
   306  }
   308  // RunAsyncContext starts a job invocation with a given context.
   309  func (js *JobScheduler) RunAsyncContext(ctx context.Context) (*JobInvocation, <-chan struct{}, error) {
   310  	if !js.IsIdle() {
   311  		return nil, nil, ex.New(ErrJobAlreadyRunning, ex.OptMessagef("job: %s", js.Name()))
   312  	}
   314  	ctx = js.withBaseContext(ctx)
   315  	ctx, ji := js.withInvocationContext(ctx)
   316  	done := make(chan struct{})
   317  	js.SetCurrent(ji)
   319  	var err error
   320  	var tracer TraceFinisher
   321  	go func() {
   322  		defer func() {
   323  			switch {
   324  			case err != nil && IsJobCanceled(err):
   325  				js.onJobCompleteCanceled(ctx) // the job was canceled, either manually or by a timeout
   326  			case err != nil:
   327  				js.onJobCompleteError(ctx, err) // the job completed with an error
   328  			default:
   329  				js.onJobCompleteSuccess(ctx) // the job completed without error
   330  			}
   332  			if tracer != nil {
   333  				tracer.Finish(ctx, err) // call the trace finisher if one was started
   334  			}
   335  			ji.Cancel() // if the job was created with a timeout, end the timeout
   337  			close(done)              // signal callers the job is done
   338  			js.assignCurrentToLast() // rotate in the current to the last result
   339  		}()
   341  		if js.Tracer != nil {
   342  			ctx, tracer = js.Tracer.Start(ctx, js.Name())
   343  		}
   344  		js.onJobBegin(ctx) // signal the job is starting
   346  		select {
   347  		case <-ctx.Done(): // if the timeout or cancel is triggered
   348  			err = ErrJobCanceled // set the error to a known error
   349  			return
   350  		case err = <-js.safeBackgroundExec(ctx): // run the job in a background routine and catch pancis
   351  			return
   352  		}
   353  	}()
   354  	return ji, done, nil
   355  }
   357  // Run forces the job to run.
   358  // This call will block.
   359  func (js *JobScheduler) Run() {
   360  	_, done, err := js.RunAsync()
   361  	if err != nil {
   362  		return
   363  	}
   364  	<-done
   365  }
   367  // RunContext runs a job with a given context as the root context.
   368  func (js *JobScheduler) RunContext(ctx context.Context) {
   369  	_, done, err := js.RunAsyncContext(ctx)
   370  	if err != nil {
   371  		return
   372  	}
   373  	<-done
   374  }
   376  //
   377  // exported utility methods
   378  //
   380  // CanBeScheduled returns if a job will be triggered automatically
   381  // and isn't already in flight and set to be serial.
   382  func (js *JobScheduler) CanBeScheduled() bool {
   383  	return !js.Disabled() && js.IsIdle()
   384  }
   386  // IsIdle returns if the job is not currently running.
   387  func (js *JobScheduler) IsIdle() (isIdle bool) {
   388  	isIdle = js.Current() == nil
   389  	return
   390  }
   392  //
   393  // utility functions
   394  //
   396  // Current returns the current job invocation.
   397  func (js *JobScheduler) Current() (current *JobInvocation) {
   398  	js.currentLock.Lock()
   399  	if js.current != nil {
   400  		current = js.current.Clone()
   401  	}
   402  	js.currentLock.Unlock()
   403  	return
   404  }
   406  // SetCurrent sets the current invocation, it is useful for tests etc.
   407  func (js *JobScheduler) SetCurrent(ji *JobInvocation) {
   408  	js.currentLock.Lock()
   409  	js.current = ji
   410  	js.currentLock.Unlock()
   411  }
   413  // Last returns the last job invocation.
   414  func (js *JobScheduler) Last() (last *JobInvocation) {
   415  	js.lastLock.Lock()
   416  	if js.last != nil {
   417  		last = js.last
   418  	}
   419  	js.lastLock.Unlock()
   420  	return
   421  }
   423  // SetLast sets the last invocation, it is useful for tests etc.
   424  func (js *JobScheduler) SetLast(ji *JobInvocation) {
   425  	js.lastLock.Lock()
   426  	js.last = ji
   427  	js.lastLock.Unlock()
   428  }
   430  func (js *JobScheduler) assignCurrentToLast() {
   431  	js.lastLock.Lock()
   432  	js.currentLock.Lock()
   433  	js.last = js.current
   434  	js.current = nil
   435  	js.currentLock.Unlock()
   436  	js.lastLock.Unlock()
   437  }
   439  func (js *JobScheduler) waitCurrentComplete(ctx context.Context) {
   440  	deadlinePoll := time.NewTicker(100 * time.Millisecond)
   441  	defer deadlinePoll.Stop()
   442  	for {
   443  		if js.Current().Status != JobInvocationStatusRunning {
   444  			return
   445  		}
   446  		select {
   447  		case <-ctx.Done(): // once the timeout triggers
   448  			return
   449  		case <-deadlinePoll.C:
   450  			// tick over the loop to check if the current job is complete
   451  			continue
   452  		}
   453  	}
   454  }
   456  func (js *JobScheduler) safeBackgroundExec(ctx context.Context) chan error {
   457  	errors := make(chan error, 2)
   458  	go func() {
   459  		defer func() {
   460  			if r := recover(); r != nil {
   461  				errors <- ex.New(r)
   462  			}
   463  		}()
   464  		errors <- js.Job.Execute(ctx)
   465  	}()
   466  	return errors
   467  }
   469  func (js *JobScheduler) withBaseContext(ctx context.Context) context.Context {
   470  	if typed, ok := js.Job.(BackgroundProvider); ok {
   471  		ctx = typed.Background(ctx)
   472  	}
   473  	ctx = logger.WithPathAppend(ctx, js.Name())
   474  	ctx = WithJobScheduler(ctx, js)
   475  	return ctx
   476  }
   478  func (js *JobScheduler) withTimeoutOrCancel(ctx context.Context, timeout time.Duration) (context.Context, context.CancelFunc) {
   479  	if timeout > 0 {
   480  		return context.WithTimeout(ctx, timeout)
   481  	}
   482  	return context.WithCancel(ctx)
   483  }
   485  func (js *JobScheduler) withInvocationContext(ctx context.Context) (context.Context, *JobInvocation) {
   486  	ji := NewJobInvocation(js.Name())
   487  	ji.Parameters = MergeJobParameterValues(js.Config().ParameterValues, GetJobParameterValues(ctx))
   488  	ctx = logger.WithPathAppend(ctx, ji.ID)
   489  	ctx, ji.Cancel = js.withTimeoutOrCancel(ctx, js.Config().TimeoutOrDefault())
   490  	ctx = WithJobInvocation(ctx, ji)
   491  	ctx = WithJobParameterValues(ctx, ji.Parameters)
   492  	return ctx, ji
   493  }
   495  // job lifecycle hooks
   497  func (js *JobScheduler) onJobBegin(ctx context.Context) {
   498  	js.currentLock.Lock()
   499  	js.current.Started = time.Now().UTC()
   500  	js.current.Status = JobInvocationStatusRunning
   501  	id := js.current.ID
   502  	js.currentLock.Unlock()
   504  	if lifecycle := js.Lifecycle(); lifecycle.OnBegin != nil {
   505  		lifecycle.OnBegin(ctx)
   506  	}
   507  	if js.Log != nil && !js.Config().SkipLoggerTrigger {
   508  		js.logTrigger(ctx, NewEvent(FlagBegin, js.Name(), OptEventJobInvocation(id)))
   509  	}
   510  }
   512  func (js *JobScheduler) onJobCompleteCanceled(ctx context.Context) {
   513  	js.currentLock.Lock()
   514  	js.current.Complete = time.Now().UTC()
   515  	js.current.Status = JobInvocationStatusCanceled
   516  	id := js.current.ID
   517  	elapsed := js.current.Elapsed()
   518  	js.currentLock.Unlock()
   520  	lifecycle := js.Lifecycle()
   521  	if lifecycle.OnCancellation != nil {
   522  		lifecycle.OnCancellation(ctx)
   523  	}
   524  	if js.Log != nil && !js.Config().SkipLoggerTrigger {
   525  		js.logTrigger(ctx, NewEvent(FlagCanceled, js.Name(), OptEventJobInvocation(id), OptEventElapsed(elapsed)))
   526  		js.logTrigger(ctx, NewEvent(FlagComplete, js.Name(), OptEventJobInvocation(id), OptEventElapsed(elapsed)))
   527  	}
   528  	if lifecycle.OnComplete != nil {
   529  		lifecycle.OnComplete(ctx)
   530  	}
   531  }
   533  func (js *JobScheduler) onJobCompleteSuccess(ctx context.Context) {
   534  	js.currentLock.Lock()
   535  	js.current.Complete = time.Now().UTC()
   536  	js.current.Status = JobInvocationStatusSuccess
   537  	id := js.current.ID
   538  	elapsed := js.current.Elapsed()
   539  	js.currentLock.Unlock()
   541  	lifecycle := js.Lifecycle()
   542  	if lifecycle.OnSuccess != nil {
   543  		lifecycle.OnSuccess(ctx)
   544  	}
   545  	if js.Log != nil && !js.Config().SkipLoggerTrigger {
   546  		js.logTrigger(ctx, NewEvent(FlagSuccess, js.Name(), OptEventJobInvocation(id), OptEventElapsed(elapsed)))
   547  		js.logTrigger(ctx, NewEvent(FlagComplete, js.Name(), OptEventJobInvocation(id), OptEventElapsed(elapsed)))
   548  	}
   549  	if last := js.Last(); last != nil && last.Status == JobInvocationStatusErrored {
   550  		if lifecycle.OnFixed != nil {
   551  			lifecycle.OnFixed(ctx)
   552  		}
   553  		if js.Log != nil && !js.Config().SkipLoggerTrigger {
   554  			js.logTrigger(ctx, NewEvent(FlagFixed, js.Name(), OptEventJobInvocation(id), OptEventElapsed(elapsed)))
   555  		}
   556  	}
   557  	if lifecycle.OnComplete != nil {
   558  		lifecycle.OnComplete(ctx)
   559  	}
   560  }
   562  func (js *JobScheduler) onJobCompleteError(ctx context.Context, err error) {
   563  	js.currentLock.Lock()
   564  	js.current.Complete = time.Now().UTC()
   565  	js.current.Status = JobInvocationStatusErrored
   566  	js.current.Err = err
   567  	id := js.current.ID
   568  	elapsed := js.current.Elapsed()
   569  	js.currentLock.Unlock()
   571  	//
   572  	// error
   573  	//
   575  	// always log the error
   576  	_ = js.error(ctx, err)
   577  	lifecycle := js.Lifecycle()
   578  	if lifecycle.OnError != nil {
   579  		lifecycle.OnError(ctx)
   580  	}
   581  	if js.Log != nil && !js.Config().SkipLoggerTrigger {
   582  		js.logTrigger(ctx, NewEvent(FlagErrored, js.Name(),
   583  			OptEventJobInvocation(id),
   584  			OptEventErr(err),
   585  			OptEventElapsed(elapsed),
   586  		))
   587  		js.logTrigger(ctx, NewEvent(FlagComplete, js.Name(), OptEventJobInvocation(id), OptEventElapsed(elapsed)))
   588  	}
   590  	//
   591  	// broken; assumes that last is set, and last was a success
   592  	//
   594  	if last := js.Last(); last != nil && last.Status != JobInvocationStatusErrored {
   595  		if lifecycle.OnBroken != nil {
   596  			lifecycle.OnBroken(ctx)
   597  		}
   598  		if js.Log != nil && !js.Config().SkipLoggerTrigger {
   599  			js.logTrigger(ctx, NewEvent(FlagBroken, js.Name(),
   600  				OptEventJobInvocation(id),
   601  				OptEventErr(err),
   602  				OptEventElapsed(elapsed)),
   603  			)
   604  		}
   605  	}
   606  	if lifecycle.OnComplete != nil {
   607  		lifecycle.OnComplete(ctx)
   608  	}
   609  }
   611  //
   612  // logging helpers
   613  //
   615  func (js *JobScheduler) logTrigger(ctx context.Context, e logger.Event) {
   616  	if !logger.IsLoggerSet(js.Log) {
   617  		return
   618  	}
   619  	js.Log.TriggerContext(ctx, e)
   620  }
   622  func (js *JobScheduler) debugf(ctx context.Context, format string, args ...interface{}) {
   623  	if !logger.IsLoggerSet(js.Log) {
   624  		return
   625  	}
   626  	js.Log.DebugfContext(ctx, format, args...)
   627  }
   629  func (js *JobScheduler) error(ctx context.Context, err error) error {
   630  	if !logger.IsLoggerSet(js.Log) {
   631  		return err
   632  	}
   633  	js.Log.ErrorContext(ctx, err)
   634  	return err
   635  }