github.com/pf-qiu/concourse/v6@v6.7.3-0.20201207032516-1f455d73275f/atc/scheduler/runner.go (about)

     1  package scheduler
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sync"
     7  	"time"
     8  
     9  	"code.cloudfoundry.org/lager"
    10  	"github.com/pf-qiu/concourse/v6/atc/db"
    11  	"github.com/pf-qiu/concourse/v6/atc/metric"
    12  	"github.com/pf-qiu/concourse/v6/atc/util"
    13  	"github.com/pf-qiu/concourse/v6/tracing"
    14  	"go.opentelemetry.io/otel/label"
    15  )
    16  
    17  //go:generate counterfeiter . BuildScheduler
    18  
    19  type BuildScheduler interface {
    20  	Schedule(
    21  		ctx context.Context,
    22  		logger lager.Logger,
    23  		job db.SchedulerJob,
    24  	) (bool, error)
    25  }
    26  
    27  type Runner struct {
    28  	logger     lager.Logger
    29  	jobFactory db.JobFactory
    30  	scheduler  BuildScheduler
    31  
    32  	guardJobScheduling chan struct{}
    33  	running            *sync.Map
    34  }
    35  
    36  func NewRunner(logger lager.Logger, jobFactory db.JobFactory, scheduler BuildScheduler, maxJobs uint64) *Runner {
    37  	return &Runner{
    38  		logger:     logger,
    39  		jobFactory: jobFactory,
    40  		scheduler:  scheduler,
    41  
    42  		guardJobScheduling: make(chan struct{}, maxJobs),
    43  		running:            &sync.Map{},
    44  	}
    45  }
    46  
    47  func (s *Runner) Run(ctx context.Context) error {
    48  	sLog := s.logger.Session("run")
    49  
    50  	sLog.Debug("start")
    51  	defer sLog.Debug("done")
    52  	spanCtx, span := tracing.StartSpan(ctx, "scheduler.Run", nil)
    53  	defer span.End()
    54  
    55  	jobs, err := s.jobFactory.JobsToSchedule()
    56  	if err != nil {
    57  		return fmt.Errorf("find jobs to schedule: %w", err)
    58  	}
    59  
    60  	for _, j := range jobs {
    61  		if _, exists := s.running.LoadOrStore(j.ID(), true); exists {
    62  			// already scheduling this job
    63  			continue
    64  		}
    65  
    66  		s.guardJobScheduling <- struct{}{}
    67  
    68  		jLog := sLog.Session("job", lager.Data{"job": j.Name()})
    69  
    70  		go func(job db.SchedulerJob) {
    71  			defer func() {
    72  				err := util.DumpPanic(recover(), "scheduling job %d", job.ID())
    73  				if err != nil {
    74  					jLog.Error("panic-in-scheduler-run", err)
    75  				}
    76  			}()
    77  
    78  			defer func() {
    79  				<-s.guardJobScheduling
    80  				s.running.Delete(job.ID())
    81  			}()
    82  
    83  			schedulingLock, acquired, err := job.AcquireSchedulingLock(sLog)
    84  			if err != nil {
    85  				jLog.Error("failed-to-acquire-lock", err)
    86  				return
    87  			}
    88  
    89  			if !acquired {
    90  				return
    91  			}
    92  
    93  			defer schedulingLock.Release()
    94  
    95  			err = s.scheduleJob(spanCtx, sLog, job)
    96  			if err != nil {
    97  				jLog.Error("failed-to-schedule-job", err)
    98  			}
    99  		}(j)
   100  	}
   101  
   102  	return nil
   103  }
   104  
   105  func (s *Runner) scheduleJob(ctx context.Context, logger lager.Logger, job db.SchedulerJob) error {
   106  	metric.Metrics.JobsScheduling.Inc()
   107  	defer metric.Metrics.JobsScheduling.Dec()
   108  	defer metric.Metrics.JobsScheduled.Inc()
   109  
   110  	logger = logger.Session("schedule-job", lager.Data{"job": job.Name()})
   111  	spanCtx, span := tracing.StartSpan(ctx, "schedule-job", tracing.Attrs{
   112  		"team":     job.TeamName(),
   113  		"pipeline": job.PipelineName(),
   114  		"job":      job.Name(),
   115  	})
   116  	defer span.End()
   117  
   118  	logger.Debug("schedule")
   119  
   120  	// Grabs out the requested time that triggered off the job schedule in
   121  	// order to set the last scheduled to the exact time of this triggering
   122  	// request
   123  	requestedTime := job.ScheduleRequestedTime()
   124  
   125  	found, err := job.Reload()
   126  	if err != nil {
   127  		return fmt.Errorf("reload job: %w", err)
   128  	}
   129  
   130  	if !found {
   131  		logger.Debug("could-not-find-job-to-reload")
   132  		return nil
   133  	}
   134  
   135  	jStart := time.Now()
   136  
   137  	needsRetry, err := s.scheduler.Schedule(
   138  		spanCtx,
   139  		logger,
   140  		job,
   141  	)
   142  	if err != nil {
   143  		return fmt.Errorf("schedule job: %w", err)
   144  	}
   145  
   146  	span.SetAttributes(label.Bool("needs-retry", needsRetry))
   147  	if !needsRetry {
   148  		err = job.UpdateLastScheduled(requestedTime)
   149  		if err != nil {
   150  			logger.Error("failed-to-update-last-scheduled", err, lager.Data{"job": job.Name()})
   151  			return fmt.Errorf("update last scheduled: %w", err)
   152  		}
   153  	}
   154  
   155  	metric.SchedulingJobDuration{
   156  		PipelineName: job.PipelineName(),
   157  		JobName:      job.Name(),
   158  		JobID:        job.ID(),
   159  		Duration:     time.Since(jStart),
   160  	}.Emit(logger)
   161  
   162  	return nil
   163  }