github.com/pf-qiu/concourse/v6@v6.7.3-0.20201207032516-1f455d73275f/atc/scheduler/runner.go (about) 1 package scheduler 2 3 import ( 4 "context" 5 "fmt" 6 "sync" 7 "time" 8 9 "code.cloudfoundry.org/lager" 10 "github.com/pf-qiu/concourse/v6/atc/db" 11 "github.com/pf-qiu/concourse/v6/atc/metric" 12 "github.com/pf-qiu/concourse/v6/atc/util" 13 "github.com/pf-qiu/concourse/v6/tracing" 14 "go.opentelemetry.io/otel/label" 15 ) 16 17 //go:generate counterfeiter . BuildScheduler 18 19 type BuildScheduler interface { 20 Schedule( 21 ctx context.Context, 22 logger lager.Logger, 23 job db.SchedulerJob, 24 ) (bool, error) 25 } 26 27 type Runner struct { 28 logger lager.Logger 29 jobFactory db.JobFactory 30 scheduler BuildScheduler 31 32 guardJobScheduling chan struct{} 33 running *sync.Map 34 } 35 36 func NewRunner(logger lager.Logger, jobFactory db.JobFactory, scheduler BuildScheduler, maxJobs uint64) *Runner { 37 return &Runner{ 38 logger: logger, 39 jobFactory: jobFactory, 40 scheduler: scheduler, 41 42 guardJobScheduling: make(chan struct{}, maxJobs), 43 running: &sync.Map{}, 44 } 45 } 46 47 func (s *Runner) Run(ctx context.Context) error { 48 sLog := s.logger.Session("run") 49 50 sLog.Debug("start") 51 defer sLog.Debug("done") 52 spanCtx, span := tracing.StartSpan(ctx, "scheduler.Run", nil) 53 defer span.End() 54 55 jobs, err := s.jobFactory.JobsToSchedule() 56 if err != nil { 57 return fmt.Errorf("find jobs to schedule: %w", err) 58 } 59 60 for _, j := range jobs { 61 if _, exists := s.running.LoadOrStore(j.ID(), true); exists { 62 // already scheduling this job 63 continue 64 } 65 66 s.guardJobScheduling <- struct{}{} 67 68 jLog := sLog.Session("job", lager.Data{"job": j.Name()}) 69 70 go func(job db.SchedulerJob) { 71 defer func() { 72 err := util.DumpPanic(recover(), "scheduling job %d", job.ID()) 73 if err != nil { 74 jLog.Error("panic-in-scheduler-run", err) 75 } 76 }() 77 78 defer func() { 79 <-s.guardJobScheduling 80 s.running.Delete(job.ID()) 81 }() 82 83 schedulingLock, acquired, err := job.AcquireSchedulingLock(sLog) 84 if err != nil { 85 jLog.Error("failed-to-acquire-lock", err) 86 return 87 } 88 89 if !acquired { 90 return 91 } 92 93 defer schedulingLock.Release() 94 95 err = s.scheduleJob(spanCtx, sLog, job) 96 if err != nil { 97 jLog.Error("failed-to-schedule-job", err) 98 } 99 }(j) 100 } 101 102 return nil 103 } 104 105 func (s *Runner) scheduleJob(ctx context.Context, logger lager.Logger, job db.SchedulerJob) error { 106 metric.Metrics.JobsScheduling.Inc() 107 defer metric.Metrics.JobsScheduling.Dec() 108 defer metric.Metrics.JobsScheduled.Inc() 109 110 logger = logger.Session("schedule-job", lager.Data{"job": job.Name()}) 111 spanCtx, span := tracing.StartSpan(ctx, "schedule-job", tracing.Attrs{ 112 "team": job.TeamName(), 113 "pipeline": job.PipelineName(), 114 "job": job.Name(), 115 }) 116 defer span.End() 117 118 logger.Debug("schedule") 119 120 // Grabs out the requested time that triggered off the job schedule in 121 // order to set the last scheduled to the exact time of this triggering 122 // request 123 requestedTime := job.ScheduleRequestedTime() 124 125 found, err := job.Reload() 126 if err != nil { 127 return fmt.Errorf("reload job: %w", err) 128 } 129 130 if !found { 131 logger.Debug("could-not-find-job-to-reload") 132 return nil 133 } 134 135 jStart := time.Now() 136 137 needsRetry, err := s.scheduler.Schedule( 138 spanCtx, 139 logger, 140 job, 141 ) 142 if err != nil { 143 return fmt.Errorf("schedule job: %w", err) 144 } 145 146 span.SetAttributes(label.Bool("needs-retry", needsRetry)) 147 if !needsRetry { 148 err = job.UpdateLastScheduled(requestedTime) 149 if err != nil { 150 logger.Error("failed-to-update-last-scheduled", err, lager.Data{"job": job.Name()}) 151 return fmt.Errorf("update last scheduled: %w", err) 152 } 153 } 154 155 metric.SchedulingJobDuration{ 156 PipelineName: job.PipelineName(), 157 JobName: job.Name(), 158 JobID: job.ID(), 159 Duration: time.Since(jStart), 160 }.Emit(logger) 161 162 return nil 163 }