github.com/blend/go-sdk@v1.20220411.3/cron/job_scheduler.go (about) 1 /* 2 3 Copyright (c) 2022 - Present. Blend Labs, Inc. All rights reserved 4 Use of this source code is governed by a MIT license that can be found in the LICENSE file. 5 6 */ 7 8 package cron 9 10 import ( 11 "context" 12 "fmt" 13 "sync" 14 "time" 15 16 "github.com/blend/go-sdk/async" 17 "github.com/blend/go-sdk/ex" 18 "github.com/blend/go-sdk/logger" 19 "github.com/blend/go-sdk/ref" 20 "github.com/blend/go-sdk/stringutil" 21 ) 22 23 // NewJobScheduler returns a job scheduler for a given job. 24 func NewJobScheduler(job Job, options ...JobSchedulerOption) *JobScheduler { 25 js := &JobScheduler{ 26 Latch: async.NewLatch(), 27 BaseContext: context.Background(), 28 Job: job, 29 } 30 if typed, ok := job.(ScheduleProvider); ok { 31 js.JobSchedule = typed.Schedule() 32 } 33 for _, option := range options { 34 option(js) 35 } 36 return js 37 } 38 39 // JobScheduler is a job instance. 40 type JobScheduler struct { 41 Latch *async.Latch 42 43 Job Job 44 JobConfig JobConfig 45 JobSchedule Schedule 46 JobLifecycle JobLifecycle 47 48 BaseContext context.Context 49 50 Tracer Tracer 51 Log logger.Log 52 53 NextRuntime time.Time 54 55 currentLock sync.Mutex 56 current *JobInvocation 57 lastLock sync.Mutex 58 last *JobInvocation 59 } 60 61 // Name returns the job name. 62 func (js *JobScheduler) Name() string { 63 return js.Job.Name() 64 } 65 66 // Background returns the job scheduler base context. 67 // 68 // It should be used as the root context for _any_ operations. 69 func (js *JobScheduler) Background() context.Context { 70 if js.BaseContext != nil { 71 return js.BaseContext 72 } 73 return context.Background() 74 } 75 76 // Config returns the job config provided by a job or an empty config. 77 func (js *JobScheduler) Config() JobConfig { 78 if typed, ok := js.Job.(ConfigProvider); ok { 79 return typed.Config() 80 } 81 return js.JobConfig 82 } 83 84 // Lifecycle returns job lifecycle steps or an empty set. 85 func (js *JobScheduler) Lifecycle() JobLifecycle { 86 if typed, ok := js.Job.(LifecycleProvider); ok { 87 return typed.Lifecycle() 88 } 89 return js.JobLifecycle 90 } 91 92 // Description returns the description. 93 func (js *JobScheduler) Description() string { 94 return js.Config().Description 95 } 96 97 // Disabled returns if the job is disabled or not. 98 func (js *JobScheduler) Disabled() bool { 99 if js.JobConfig.Disabled != nil { 100 return *js.JobConfig.Disabled 101 } 102 return js.Config().DisabledOrDefault() 103 } 104 105 // Labels returns the job labels, including 106 // automatically added ones like `name`. 107 func (js *JobScheduler) Labels() map[string]string { 108 output := map[string]string{ 109 "name": stringutil.Slugify(js.Name()), 110 "scheduler": string(js.State()), 111 "active": fmt.Sprint(!js.IsIdle()), 112 "enabled": fmt.Sprint(!js.Disabled()), 113 } 114 if js.Last() != nil { 115 output["last"] = stringutil.Slugify(string(js.Last().Status)) 116 } 117 for key, value := range js.Config().Labels { 118 output[key] = value 119 } 120 return output 121 } 122 123 // State returns the job scheduler state. 124 func (js *JobScheduler) State() JobSchedulerState { 125 if js.Latch.IsStarted() { 126 return JobSchedulerStateRunning 127 } 128 if js.Latch.IsStopped() { 129 return JobSchedulerStateStopped 130 } 131 return JobSchedulerStateUnknown 132 } 133 134 // Start starts the scheduler. 135 // This call blocks. 136 func (js *JobScheduler) Start() error { 137 if !js.Latch.CanStart() { 138 return async.ErrCannotStart 139 } 140 js.Latch.Starting() 141 js.RunLoop() 142 return nil 143 } 144 145 // Stop stops the scheduler. 146 func (js *JobScheduler) Stop() error { 147 if !js.Latch.CanStop() { 148 return async.ErrCannotStop 149 } 150 151 ctx := js.withBaseContext(js.Background()) 152 js.Latch.Stopping() 153 154 if current := js.Current(); current != nil { 155 gracePeriod := js.Config().ShutdownGracePeriodOrDefault() 156 if gracePeriod > 0 { 157 var cancel func() 158 ctx, cancel = js.withTimeoutOrCancel(ctx, gracePeriod) 159 defer cancel() 160 js.waitCurrentComplete(ctx) 161 } 162 } 163 if current := js.Current(); current != nil && current.Status == JobInvocationStatusRunning { 164 current.Cancel() 165 } 166 167 <-js.Latch.NotifyStopped() 168 js.Latch.Reset() 169 js.NextRuntime = Zero 170 return nil 171 } 172 173 // OnLoad triggers the on load even on the job lifecycle handler. 174 func (js *JobScheduler) OnLoad(ctx context.Context) error { 175 ctx = js.withBaseContext(ctx) 176 if js.Lifecycle().OnLoad != nil { 177 if err := js.Lifecycle().OnLoad(ctx); err != nil { 178 return err 179 } 180 } 181 return nil 182 } 183 184 // OnUnload triggers the on unload even on the job lifecycle handler. 185 func (js *JobScheduler) OnUnload(ctx context.Context) error { 186 ctx = js.withBaseContext(ctx) 187 if js.Lifecycle().OnUnload != nil { 188 return js.Lifecycle().OnUnload(ctx) 189 } 190 return nil 191 } 192 193 // NotifyStarted notifies the job scheduler has started. 194 func (js *JobScheduler) NotifyStarted() <-chan struct{} { 195 return js.Latch.NotifyStarted() 196 } 197 198 // NotifyStopped notifies the job scheduler has stopped. 199 func (js *JobScheduler) NotifyStopped() <-chan struct{} { 200 return js.Latch.NotifyStopped() 201 } 202 203 // Enable sets the job as enabled. 204 func (js *JobScheduler) Enable() { 205 ctx := js.withBaseContext(js.Background()) 206 js.JobConfig.Disabled = ref.Bool(false) 207 if lifecycle := js.Lifecycle(); lifecycle.OnEnabled != nil { 208 lifecycle.OnEnabled(ctx) 209 } 210 if js.Log != nil && !js.Config().SkipLoggerTrigger { 211 js.Log.TriggerContext(ctx, NewEvent(FlagEnabled, js.Name())) 212 } 213 } 214 215 // Disable sets the job as disabled. 216 func (js *JobScheduler) Disable() { 217 ctx := js.withBaseContext(js.Background()) 218 js.JobConfig.Disabled = ref.Bool(true) 219 if lifecycle := js.Lifecycle(); lifecycle.OnDisabled != nil { 220 lifecycle.OnDisabled(ctx) 221 } 222 if js.Log != nil && !js.Config().SkipLoggerTrigger { 223 js.Log.TriggerContext(ctx, NewEvent(FlagDisabled, js.Name())) 224 } 225 } 226 227 // Cancel stops all running invocations. 228 func (js *JobScheduler) Cancel() error { 229 ctx := js.withBaseContext(js.Background()) 230 231 if js.Current() == nil { 232 logger.MaybeDebugfContext(ctx, js.Log, "cannot cancel; job is not runnning") 233 return nil 234 } 235 gracePeriod := js.Config().ShutdownGracePeriodOrDefault() 236 if gracePeriod > 0 { 237 ctx, cancel := js.withTimeoutOrCancel(ctx, gracePeriod) 238 defer cancel() 239 js.waitCurrentComplete(ctx) 240 } 241 if current := js.Current(); current != nil && current.Status == JobInvocationStatusRunning { 242 current.Cancel() 243 } else { 244 logger.MaybeDebugfContext(ctx, js.Log, "cannot cancel; job is not runnning") 245 } 246 return nil 247 } 248 249 // RunLoop is the main scheduler loop. 250 // This call blocks. 251 // It alarms on the next runtime and forks a new routine to run the job. 252 // It can be aborted with the scheduler's async.Latch, or calling `.Stop()`. 253 // If this function exits for any reason, it will mark the scheduler as stopped. 254 func (js *JobScheduler) RunLoop() { 255 js.Latch.Started() 256 defer func() { 257 js.Latch.Stopped() 258 js.Latch.Reset() 259 }() 260 261 if js.JobSchedule != nil { 262 js.NextRuntime = js.JobSchedule.Next(js.NextRuntime) 263 } 264 265 // if the schedule returns a zero timestamp 266 // it should be interpretted as *not* to automatically 267 // schedule the job to be run. 268 // The run loop will return and the job scheduler will be interpretted as stopped. 269 if js.NextRuntime.IsZero() { 270 return 271 } 272 273 for { 274 if js.NextRuntime.IsZero() { 275 return 276 } 277 278 runAt := time.After(js.NextRuntime.UTC().Sub(Now())) 279 select { 280 case <-runAt: 281 if js.CanBeScheduled() { 282 if _, _, err := js.RunAsyncContext(js.Background()); err != nil { 283 _ = js.error(js.Background(), err) 284 } 285 } 286 287 // set up the next runtime. 288 if js.JobSchedule != nil { 289 js.NextRuntime = js.JobSchedule.Next(js.NextRuntime) 290 } else { 291 js.NextRuntime = Zero 292 } 293 294 case <-js.Latch.NotifyStopping(): 295 // note: we bail hard here 296 // because the job executions in flight are 297 // handled by the context cancellation. 298 return 299 } 300 } 301 } 302 303 // RunAsync starts a job invocation with the BaseContext the root context. 304 func (js *JobScheduler) RunAsync() (*JobInvocation, <-chan struct{}, error) { 305 return js.RunAsyncContext(js.Background()) 306 } 307 308 // RunAsyncContext starts a job invocation with a given context. 309 func (js *JobScheduler) RunAsyncContext(ctx context.Context) (*JobInvocation, <-chan struct{}, error) { 310 if !js.IsIdle() { 311 return nil, nil, ex.New(ErrJobAlreadyRunning, ex.OptMessagef("job: %s", js.Name())) 312 } 313 314 ctx = js.withBaseContext(ctx) 315 ctx, ji := js.withInvocationContext(ctx) 316 done := make(chan struct{}) 317 js.SetCurrent(ji) 318 319 var err error 320 var tracer TraceFinisher 321 go func() { 322 defer func() { 323 switch { 324 case err != nil && IsJobCanceled(err): 325 js.onJobCompleteCanceled(ctx) // the job was canceled, either manually or by a timeout 326 case err != nil: 327 js.onJobCompleteError(ctx, err) // the job completed with an error 328 default: 329 js.onJobCompleteSuccess(ctx) // the job completed without error 330 } 331 332 if tracer != nil { 333 tracer.Finish(ctx, err) // call the trace finisher if one was started 334 } 335 ji.Cancel() // if the job was created with a timeout, end the timeout 336 337 close(done) // signal callers the job is done 338 js.assignCurrentToLast() // rotate in the current to the last result 339 }() 340 341 if js.Tracer != nil { 342 ctx, tracer = js.Tracer.Start(ctx, js.Name()) 343 } 344 js.onJobBegin(ctx) // signal the job is starting 345 346 select { 347 case <-ctx.Done(): // if the timeout or cancel is triggered 348 err = ErrJobCanceled // set the error to a known error 349 return 350 case err = <-js.safeBackgroundExec(ctx): // run the job in a background routine and catch pancis 351 return 352 } 353 }() 354 return ji, done, nil 355 } 356 357 // Run forces the job to run. 358 // This call will block. 359 func (js *JobScheduler) Run() { 360 _, done, err := js.RunAsync() 361 if err != nil { 362 return 363 } 364 <-done 365 } 366 367 // RunContext runs a job with a given context as the root context. 368 func (js *JobScheduler) RunContext(ctx context.Context) { 369 _, done, err := js.RunAsyncContext(ctx) 370 if err != nil { 371 return 372 } 373 <-done 374 } 375 376 // 377 // exported utility methods 378 // 379 380 // CanBeScheduled returns if a job will be triggered automatically 381 // and isn't already in flight and set to be serial. 382 func (js *JobScheduler) CanBeScheduled() bool { 383 return !js.Disabled() && js.IsIdle() 384 } 385 386 // IsIdle returns if the job is not currently running. 387 func (js *JobScheduler) IsIdle() (isIdle bool) { 388 isIdle = js.Current() == nil 389 return 390 } 391 392 // 393 // utility functions 394 // 395 396 // Current returns the current job invocation. 397 func (js *JobScheduler) Current() (current *JobInvocation) { 398 js.currentLock.Lock() 399 if js.current != nil { 400 current = js.current.Clone() 401 } 402 js.currentLock.Unlock() 403 return 404 } 405 406 // SetCurrent sets the current invocation, it is useful for tests etc. 407 func (js *JobScheduler) SetCurrent(ji *JobInvocation) { 408 js.currentLock.Lock() 409 js.current = ji 410 js.currentLock.Unlock() 411 } 412 413 // Last returns the last job invocation. 414 func (js *JobScheduler) Last() (last *JobInvocation) { 415 js.lastLock.Lock() 416 if js.last != nil { 417 last = js.last 418 } 419 js.lastLock.Unlock() 420 return 421 } 422 423 // SetLast sets the last invocation, it is useful for tests etc. 424 func (js *JobScheduler) SetLast(ji *JobInvocation) { 425 js.lastLock.Lock() 426 js.last = ji 427 js.lastLock.Unlock() 428 } 429 430 func (js *JobScheduler) assignCurrentToLast() { 431 js.lastLock.Lock() 432 js.currentLock.Lock() 433 js.last = js.current 434 js.current = nil 435 js.currentLock.Unlock() 436 js.lastLock.Unlock() 437 } 438 439 func (js *JobScheduler) waitCurrentComplete(ctx context.Context) { 440 deadlinePoll := time.NewTicker(100 * time.Millisecond) 441 defer deadlinePoll.Stop() 442 for { 443 if js.Current().Status != JobInvocationStatusRunning { 444 return 445 } 446 select { 447 case <-ctx.Done(): // once the timeout triggers 448 return 449 case <-deadlinePoll.C: 450 // tick over the loop to check if the current job is complete 451 continue 452 } 453 } 454 } 455 456 func (js *JobScheduler) safeBackgroundExec(ctx context.Context) chan error { 457 errors := make(chan error, 2) 458 go func() { 459 defer func() { 460 if r := recover(); r != nil { 461 errors <- ex.New(r) 462 } 463 }() 464 errors <- js.Job.Execute(ctx) 465 }() 466 return errors 467 } 468 469 func (js *JobScheduler) withBaseContext(ctx context.Context) context.Context { 470 if typed, ok := js.Job.(BackgroundProvider); ok { 471 ctx = typed.Background(ctx) 472 } 473 ctx = logger.WithPathAppend(ctx, js.Name()) 474 ctx = WithJobScheduler(ctx, js) 475 return ctx 476 } 477 478 func (js *JobScheduler) withTimeoutOrCancel(ctx context.Context, timeout time.Duration) (context.Context, context.CancelFunc) { 479 if timeout > 0 { 480 return context.WithTimeout(ctx, timeout) 481 } 482 return context.WithCancel(ctx) 483 } 484 485 func (js *JobScheduler) withInvocationContext(ctx context.Context) (context.Context, *JobInvocation) { 486 ji := NewJobInvocation(js.Name()) 487 ji.Parameters = MergeJobParameterValues(js.Config().ParameterValues, GetJobParameterValues(ctx)) 488 ctx = logger.WithPathAppend(ctx, ji.ID) 489 ctx, ji.Cancel = js.withTimeoutOrCancel(ctx, js.Config().TimeoutOrDefault()) 490 ctx = WithJobInvocation(ctx, ji) 491 ctx = WithJobParameterValues(ctx, ji.Parameters) 492 return ctx, ji 493 } 494 495 // job lifecycle hooks 496 497 func (js *JobScheduler) onJobBegin(ctx context.Context) { 498 js.currentLock.Lock() 499 js.current.Started = time.Now().UTC() 500 js.current.Status = JobInvocationStatusRunning 501 id := js.current.ID 502 js.currentLock.Unlock() 503 504 if lifecycle := js.Lifecycle(); lifecycle.OnBegin != nil { 505 lifecycle.OnBegin(ctx) 506 } 507 if js.Log != nil && !js.Config().SkipLoggerTrigger { 508 js.logTrigger(ctx, NewEvent(FlagBegin, js.Name(), OptEventJobInvocation(id))) 509 } 510 } 511 512 func (js *JobScheduler) onJobCompleteCanceled(ctx context.Context) { 513 js.currentLock.Lock() 514 js.current.Complete = time.Now().UTC() 515 js.current.Status = JobInvocationStatusCanceled 516 id := js.current.ID 517 elapsed := js.current.Elapsed() 518 js.currentLock.Unlock() 519 520 lifecycle := js.Lifecycle() 521 if lifecycle.OnCancellation != nil { 522 lifecycle.OnCancellation(ctx) 523 } 524 if js.Log != nil && !js.Config().SkipLoggerTrigger { 525 js.logTrigger(ctx, NewEvent(FlagCanceled, js.Name(), OptEventJobInvocation(id), OptEventElapsed(elapsed))) 526 js.logTrigger(ctx, NewEvent(FlagComplete, js.Name(), OptEventJobInvocation(id), OptEventElapsed(elapsed))) 527 } 528 if lifecycle.OnComplete != nil { 529 lifecycle.OnComplete(ctx) 530 } 531 } 532 533 func (js *JobScheduler) onJobCompleteSuccess(ctx context.Context) { 534 js.currentLock.Lock() 535 js.current.Complete = time.Now().UTC() 536 js.current.Status = JobInvocationStatusSuccess 537 id := js.current.ID 538 elapsed := js.current.Elapsed() 539 js.currentLock.Unlock() 540 541 lifecycle := js.Lifecycle() 542 if lifecycle.OnSuccess != nil { 543 lifecycle.OnSuccess(ctx) 544 } 545 if js.Log != nil && !js.Config().SkipLoggerTrigger { 546 js.logTrigger(ctx, NewEvent(FlagSuccess, js.Name(), OptEventJobInvocation(id), OptEventElapsed(elapsed))) 547 js.logTrigger(ctx, NewEvent(FlagComplete, js.Name(), OptEventJobInvocation(id), OptEventElapsed(elapsed))) 548 } 549 if last := js.Last(); last != nil && last.Status == JobInvocationStatusErrored { 550 if lifecycle.OnFixed != nil { 551 lifecycle.OnFixed(ctx) 552 } 553 if js.Log != nil && !js.Config().SkipLoggerTrigger { 554 js.logTrigger(ctx, NewEvent(FlagFixed, js.Name(), OptEventJobInvocation(id), OptEventElapsed(elapsed))) 555 } 556 } 557 if lifecycle.OnComplete != nil { 558 lifecycle.OnComplete(ctx) 559 } 560 } 561 562 func (js *JobScheduler) onJobCompleteError(ctx context.Context, err error) { 563 js.currentLock.Lock() 564 js.current.Complete = time.Now().UTC() 565 js.current.Status = JobInvocationStatusErrored 566 js.current.Err = err 567 id := js.current.ID 568 elapsed := js.current.Elapsed() 569 js.currentLock.Unlock() 570 571 // 572 // error 573 // 574 575 // always log the error 576 _ = js.error(ctx, err) 577 lifecycle := js.Lifecycle() 578 if lifecycle.OnError != nil { 579 lifecycle.OnError(ctx) 580 } 581 if js.Log != nil && !js.Config().SkipLoggerTrigger { 582 js.logTrigger(ctx, NewEvent(FlagErrored, js.Name(), 583 OptEventJobInvocation(id), 584 OptEventErr(err), 585 OptEventElapsed(elapsed), 586 )) 587 js.logTrigger(ctx, NewEvent(FlagComplete, js.Name(), OptEventJobInvocation(id), OptEventElapsed(elapsed))) 588 } 589 590 // 591 // broken; assumes that last is set, and last was a success 592 // 593 594 if last := js.Last(); last != nil && last.Status != JobInvocationStatusErrored { 595 if lifecycle.OnBroken != nil { 596 lifecycle.OnBroken(ctx) 597 } 598 if js.Log != nil && !js.Config().SkipLoggerTrigger { 599 js.logTrigger(ctx, NewEvent(FlagBroken, js.Name(), 600 OptEventJobInvocation(id), 601 OptEventErr(err), 602 OptEventElapsed(elapsed)), 603 ) 604 } 605 } 606 if lifecycle.OnComplete != nil { 607 lifecycle.OnComplete(ctx) 608 } 609 } 610 611 // 612 // logging helpers 613 // 614 615 func (js *JobScheduler) logTrigger(ctx context.Context, e logger.Event) { 616 if !logger.IsLoggerSet(js.Log) { 617 return 618 } 619 js.Log.TriggerContext(ctx, e) 620 } 621 622 func (js *JobScheduler) debugf(ctx context.Context, format string, args ...interface{}) { 623 if !logger.IsLoggerSet(js.Log) { 624 return 625 } 626 js.Log.DebugfContext(ctx, format, args...) 627 } 628 629 func (js *JobScheduler) error(ctx context.Context, err error) error { 630 if !logger.IsLoggerSet(js.Log) { 631 return err 632 } 633 js.Log.ErrorContext(ctx, err) 634 return err 635 }