zotregistry.dev/zot@v1.4.4-0.20240314164342-eec277e14d20/pkg/scheduler/scheduler.go (about)

     1  package scheduler
     2  
     3  import (
     4  	"container/heap"
     5  	"context"
     6  	"math"
     7  	"runtime"
     8  	"sync"
     9  	"sync/atomic"
    10  	"time"
    11  
    12  	"zotregistry.dev/zot/pkg/api/config"
    13  	"zotregistry.dev/zot/pkg/extensions/monitoring"
    14  	"zotregistry.dev/zot/pkg/log"
    15  )
    16  
    17  type Task interface {
    18  	DoWork(ctx context.Context) error
    19  	Name() string
    20  	String() string
    21  }
    22  
    23  type generatorsPriorityQueue []*generator
    24  
    25  func (pq generatorsPriorityQueue) Len() int {
    26  	return len(pq)
    27  }
    28  
    29  func (pq generatorsPriorityQueue) Less(i, j int) bool {
    30  	return pq[i].getRanking() > pq[j].getRanking()
    31  }
    32  
    33  func (pq generatorsPriorityQueue) Swap(i, j int) {
    34  	pq[i], pq[j] = pq[j], pq[i]
    35  	pq[i].index = i
    36  	pq[j].index = j
    37  }
    38  
    39  func (pq *generatorsPriorityQueue) Push(x any) {
    40  	n := len(*pq)
    41  
    42  	item, ok := x.(*generator)
    43  	if !ok {
    44  		return
    45  	}
    46  
    47  	item.index = n
    48  	*pq = append(*pq, item)
    49  }
    50  
    51  func (pq *generatorsPriorityQueue) Pop() any {
    52  	old := *pq
    53  	n := len(old)
    54  	item := old[n-1]
    55  	old[n-1] = nil
    56  	item.index = -1
    57  	*pq = old[0 : n-1]
    58  
    59  	return item
    60  }
    61  
    62  const (
    63  	rateLimiterScheduler = 400
    64  	rateLimit            = 50 * time.Millisecond
    65  	NumWorkersMultiplier = 4
    66  	sendMetricsInterval  = 5 * time.Second
    67  )
    68  
    69  type Scheduler struct {
    70  	tasksQLow         chan Task
    71  	tasksQMedium      chan Task
    72  	tasksQHigh        chan Task
    73  	tasksDoWork       int
    74  	tasksLock         *sync.Mutex
    75  	generators        generatorsPriorityQueue
    76  	waitingGenerators []*generator
    77  	doneGenerators    []*generator
    78  	generatorsLock    *sync.Mutex
    79  	log               log.Logger
    80  	RateLimit         time.Duration
    81  	NumWorkers        int
    82  	workerChan        chan Task
    83  	metricsChan       chan struct{}
    84  	workerWg          *sync.WaitGroup
    85  	isShuttingDown    atomic.Bool
    86  	metricServer      monitoring.MetricServer
    87  	cancelFunc        context.CancelFunc
    88  }
    89  
    90  func NewScheduler(cfg *config.Config, ms monitoring.MetricServer, logC log.Logger) *Scheduler { //nolint: varnamelen
    91  	chLow := make(chan Task, rateLimiterScheduler)
    92  	chMedium := make(chan Task, rateLimiterScheduler)
    93  	chHigh := make(chan Task, rateLimiterScheduler)
    94  	generatorPQ := make(generatorsPriorityQueue, 0)
    95  	numWorkers := getNumWorkers(cfg)
    96  	sublogger := logC.With().Str("component", "scheduler").Logger()
    97  
    98  	heap.Init(&generatorPQ)
    99  	// force pushing this metric (for zot minimal metrics are enabled on first scraping)
   100  	monitoring.SetSchedulerNumWorkers(ms, numWorkers)
   101  
   102  	return &Scheduler{
   103  		tasksQLow:      chLow,
   104  		tasksQMedium:   chMedium,
   105  		tasksQHigh:     chHigh,
   106  		tasksDoWork:    0, // number of tasks that are in working state
   107  		tasksLock:      new(sync.Mutex),
   108  		generators:     generatorPQ,
   109  		generatorsLock: new(sync.Mutex),
   110  		log:            log.Logger{Logger: sublogger},
   111  		// default value
   112  		metricServer: ms,
   113  		RateLimit:    rateLimit,
   114  		NumWorkers:   numWorkers,
   115  		workerChan:   make(chan Task, numWorkers),
   116  		metricsChan:  make(chan struct{}, 1),
   117  		workerWg:     new(sync.WaitGroup),
   118  	}
   119  }
   120  
   121  func (scheduler *Scheduler) poolWorker(ctx context.Context) {
   122  	for i := 0; i < scheduler.NumWorkers; i++ {
   123  		go func(workerID int) {
   124  			defer scheduler.workerWg.Done()
   125  
   126  			var workStart time.Time
   127  
   128  			var workDuration time.Duration
   129  
   130  			for task := range scheduler.workerChan {
   131  				// leave below line here (for zot minimal metrics can be enabled on first scraping)
   132  				metricsEnabled := scheduler.metricServer.IsEnabled()
   133  				scheduler.log.Debug().Int("worker", workerID).Str("task", task.String()).Msg("starting task")
   134  
   135  				if metricsEnabled {
   136  					scheduler.tasksLock.Lock()
   137  					scheduler.tasksDoWork++
   138  					scheduler.tasksLock.Unlock()
   139  					workStart = time.Now()
   140  				}
   141  
   142  				if err := task.DoWork(ctx); err != nil {
   143  					scheduler.log.Error().Int("worker", workerID).Str("task", task.String()).Err(err).
   144  						Msg("failed to execute task")
   145  				}
   146  
   147  				if metricsEnabled {
   148  					scheduler.tasksLock.Lock()
   149  					scheduler.tasksDoWork--
   150  					scheduler.tasksLock.Unlock()
   151  					workDuration = time.Since(workStart)
   152  					monitoring.ObserveWorkersTasksDuration(scheduler.metricServer, task.Name(), workDuration)
   153  				}
   154  
   155  				scheduler.log.Debug().Int("worker", workerID).Str("task", task.String()).Msg("finished task")
   156  			}
   157  		}(i + 1)
   158  	}
   159  }
   160  
   161  func (scheduler *Scheduler) metricsWorker() {
   162  	ticker := time.NewTicker(sendMetricsInterval)
   163  
   164  	for {
   165  		if scheduler.inShutdown() {
   166  			return
   167  		}
   168  		select {
   169  		case <-scheduler.metricsChan:
   170  			ticker.Stop()
   171  
   172  			return
   173  		case <-ticker.C:
   174  			genMap := make(map[string]map[string]uint64)
   175  			tasksMap := make(map[string]int)
   176  			// initialize map
   177  			for _, p := range []Priority{LowPriority, MediumPriority, HighPriority} {
   178  				priority := p.String()
   179  				genMap[priority] = make(map[string]uint64)
   180  
   181  				for _, s := range []State{Ready, Waiting, Done} {
   182  					genMap[priority][s.String()] = 0
   183  				}
   184  			}
   185  
   186  			scheduler.generatorsLock.Lock()
   187  			generators := append(append(scheduler.generators, scheduler.waitingGenerators...),
   188  				scheduler.doneGenerators...)
   189  
   190  			for _, gen := range generators {
   191  				p := gen.priority.String()
   192  				s := gen.getState().String()
   193  				genMap[p][s]++
   194  			}
   195  
   196  			// tasks queue size by priority
   197  			tasksMap[LowPriority.String()] = len(scheduler.tasksQLow)
   198  			tasksMap[MediumPriority.String()] = len(scheduler.tasksQMedium)
   199  			tasksMap[HighPriority.String()] = len(scheduler.tasksQHigh)
   200  			scheduler.generatorsLock.Unlock()
   201  
   202  			monitoring.SetSchedulerGenerators(scheduler.metricServer, genMap)
   203  			monitoring.SetSchedulerTasksQueue(scheduler.metricServer, tasksMap)
   204  			workersMap := make(map[string]int)
   205  
   206  			scheduler.tasksLock.Lock()
   207  			workersMap["idle"] = scheduler.NumWorkers - scheduler.tasksDoWork
   208  			workersMap["working"] = scheduler.tasksDoWork
   209  			scheduler.tasksLock.Unlock()
   210  			monitoring.SetSchedulerWorkers(scheduler.metricServer, workersMap)
   211  		}
   212  	}
   213  }
   214  
   215  /*
   216  Scheduler can be stopped by calling Shutdown().
   217  it will wait for all tasks being run to finish their work before exiting.
   218  */
   219  func (scheduler *Scheduler) Shutdown() {
   220  	defer scheduler.workerWg.Wait()
   221  
   222  	if !scheduler.inShutdown() {
   223  		scheduler.shutdown()
   224  	}
   225  }
   226  
   227  func (scheduler *Scheduler) inShutdown() bool {
   228  	return scheduler.isShuttingDown.Load()
   229  }
   230  
   231  func (scheduler *Scheduler) shutdown() {
   232  	scheduler.isShuttingDown.Store(true)
   233  
   234  	scheduler.cancelFunc()
   235  	close(scheduler.metricsChan)
   236  }
   237  
   238  func (scheduler *Scheduler) RunScheduler() {
   239  	/*This context is passed to all task generators
   240  	calling scheduler.Shutdown() will cancel this context and will wait for all tasks
   241  	to finish their work gracefully.*/
   242  	ctx, cancel := context.WithCancel(context.Background())
   243  	scheduler.cancelFunc = cancel
   244  
   245  	throttle := time.NewTicker(scheduler.RateLimit).C
   246  
   247  	numWorkers := scheduler.NumWorkers
   248  
   249  	// wait all workers to finish their work before exiting from Shutdown()
   250  	scheduler.workerWg.Add(numWorkers)
   251  
   252  	// start worker pool
   253  	go scheduler.poolWorker(ctx)
   254  
   255  	// periodically send metrics
   256  	go scheduler.metricsWorker()
   257  
   258  	go func() {
   259  		// will close workers chan when either ctx is canceled or scheduler.Shutdown()
   260  		defer close(scheduler.workerChan)
   261  
   262  		for {
   263  			select {
   264  			case <-ctx.Done():
   265  				if !scheduler.inShutdown() {
   266  					scheduler.shutdown()
   267  				}
   268  
   269  				scheduler.log.Debug().Msg("received stop signal, gracefully shutting down...")
   270  
   271  				return
   272  			default:
   273  				// we don't want to block on sending task in workerChan.
   274  				if len(scheduler.workerChan) == scheduler.NumWorkers {
   275  					<-throttle
   276  
   277  					continue
   278  				}
   279  
   280  				task := scheduler.getTask()
   281  
   282  				if task == nil {
   283  					<-throttle
   284  
   285  					continue
   286  				}
   287  
   288  				// push tasks into worker pool until workerChan is full.
   289  				scheduler.workerChan <- task
   290  			}
   291  		}
   292  	}()
   293  }
   294  
   295  func (scheduler *Scheduler) pushReadyGenerators() {
   296  	// iterate through waiting generators list and resubmit those which become ready to run
   297  	for {
   298  		modified := false
   299  
   300  		for i, gen := range scheduler.waitingGenerators {
   301  			if gen.getState() == Ready {
   302  				gen.done = false
   303  				heap.Push(&scheduler.generators, gen)
   304  				scheduler.waitingGenerators = append(scheduler.waitingGenerators[:i], scheduler.waitingGenerators[i+1:]...)
   305  				modified = true
   306  
   307  				scheduler.log.Debug().Str("generator", gen.taskGenerator.Name()).
   308  					Msg("waiting generator is ready, pushing to ready generators")
   309  
   310  				break
   311  			}
   312  		}
   313  
   314  		if !modified {
   315  			break
   316  		}
   317  	}
   318  }
   319  
   320  func (scheduler *Scheduler) generateTasks() {
   321  	scheduler.generatorsLock.Lock()
   322  	defer scheduler.generatorsLock.Unlock()
   323  
   324  	// resubmit ready generators(which were in a waiting state) to generators priority queue
   325  	scheduler.pushReadyGenerators()
   326  
   327  	// get the highest priority generator from queue
   328  	if scheduler.generators.Len() == 0 {
   329  		return
   330  	}
   331  
   332  	var gen *generator
   333  
   334  	// check if the generator with highest priority is ready to run
   335  	if scheduler.generators[0].getState() == Ready {
   336  		// we are not popping it as we will generate multiple tasks until it is done
   337  		// we are going to pop after all tasks are generated
   338  		gen = scheduler.generators[0]
   339  
   340  		// trigger a generator reorder, as generating a task may impact the order
   341  		// equivalent of pop/remove followed by push, but more efficient
   342  		heap.Fix(&scheduler.generators, 0)
   343  	} else {
   344  		gen, _ = heap.Pop(&scheduler.generators).(*generator)
   345  		if gen.getState() == Waiting {
   346  			scheduler.waitingGenerators = append(scheduler.waitingGenerators, gen)
   347  		} else if gen.getState() == Done {
   348  			scheduler.doneGenerators = append(scheduler.doneGenerators, gen)
   349  		}
   350  
   351  		return
   352  	}
   353  
   354  	// run generator to generate a new task which will be added to a channel by priority
   355  	gen.generate(scheduler)
   356  }
   357  
   358  func (scheduler *Scheduler) getTask() Task {
   359  	// first, generate a task with highest possible priority
   360  	scheduler.generateTasks()
   361  
   362  	// then, return a task with highest possible priority
   363  	select {
   364  	case t := <-scheduler.tasksQHigh:
   365  		return t
   366  	default:
   367  	}
   368  
   369  	select {
   370  	case t := <-scheduler.tasksQMedium:
   371  		return t
   372  	default:
   373  	}
   374  
   375  	select {
   376  	case t := <-scheduler.tasksQLow:
   377  		return t
   378  	default:
   379  	}
   380  
   381  	return nil
   382  }
   383  
   384  func (scheduler *Scheduler) getTasksChannelByPriority(priority Priority) chan Task {
   385  	switch priority {
   386  	case LowPriority:
   387  		return scheduler.tasksQLow
   388  	case MediumPriority:
   389  		return scheduler.tasksQMedium
   390  	case HighPriority:
   391  		return scheduler.tasksQHigh
   392  	}
   393  
   394  	return nil
   395  }
   396  
   397  func (scheduler *Scheduler) SubmitTask(task Task, priority Priority) {
   398  	// get by priority the channel where the task should be added to
   399  	tasksQ := scheduler.getTasksChannelByPriority(priority)
   400  	if tasksQ == nil {
   401  		return
   402  	}
   403  
   404  	// check if the scheduler is still running in order to add the task to the channel
   405  	if scheduler.inShutdown() {
   406  		return
   407  	}
   408  
   409  	select {
   410  	case tasksQ <- task:
   411  		scheduler.log.Info().Msg("adding a new task")
   412  	default:
   413  		if scheduler.inShutdown() {
   414  			return
   415  		}
   416  	}
   417  }
   418  
   419  type Priority int
   420  
   421  const (
   422  	LowPriority Priority = iota
   423  	MediumPriority
   424  	HighPriority
   425  )
   426  
   427  type State int
   428  
   429  const (
   430  	Ready State = iota
   431  	Waiting
   432  	Done
   433  )
   434  
   435  type TaskGenerator interface {
   436  	Next() (Task, error)
   437  	IsDone() bool
   438  	IsReady() bool
   439  	Name() string
   440  	Reset()
   441  }
   442  
   443  type generator struct {
   444  	interval      time.Duration
   445  	lastRun       time.Time
   446  	done          bool
   447  	priority      Priority
   448  	taskGenerator TaskGenerator
   449  	remainingTask Task
   450  	index         int
   451  	taskCount     int64
   452  }
   453  
   454  func (gen *generator) generate(sch *Scheduler) {
   455  	// get by priority the channel where the new generated task should be added to
   456  	taskQ := sch.getTasksChannelByPriority(gen.priority)
   457  
   458  	task := gen.remainingTask
   459  
   460  	// in case there is no task already generated, generate a new task
   461  	if gen.remainingTask == nil {
   462  		nextTask, err := gen.taskGenerator.Next()
   463  		if err != nil {
   464  			sch.log.Error().Err(err).Str("generator", gen.taskGenerator.Name()).
   465  				Msg("failed to execute generator")
   466  
   467  			return
   468  		}
   469  
   470  		// check if the generator is done
   471  		if gen.taskGenerator.IsDone() {
   472  			gen.done = true
   473  			gen.lastRun = time.Now()
   474  			gen.taskCount = 0
   475  			gen.taskGenerator.Reset()
   476  
   477  			sch.log.Debug().Str("generator", gen.taskGenerator.Name()).
   478  				Msg("generator is done")
   479  
   480  			return
   481  		}
   482  
   483  		task = nextTask
   484  	}
   485  
   486  	// keep track of generated task count to use it for generator ordering
   487  	gen.taskCount++
   488  
   489  	// check if it's possible to add a new task to the channel
   490  	// if not, keep the generated task and retry to add it next time
   491  	select {
   492  	case taskQ <- task:
   493  		gen.remainingTask = nil
   494  
   495  		return
   496  	default:
   497  		gen.remainingTask = task
   498  	}
   499  }
   500  
   501  // getState() returns the state of a generator.
   502  // if the generator is not periodic then it can be done or ready to generate a new task.
   503  // if the generator is periodic then it can be waiting (finished its work and wait for its interval to pass)
   504  // or ready to generate a new task.
   505  func (gen *generator) getState() State {
   506  	if gen.interval == time.Duration(0) {
   507  		if gen.done && gen.remainingTask == nil {
   508  			return Done
   509  		}
   510  	} else {
   511  		if gen.done && time.Since(gen.lastRun) < gen.interval && gen.remainingTask == nil {
   512  			return Waiting
   513  		}
   514  	}
   515  
   516  	if !gen.taskGenerator.IsReady() {
   517  		return Waiting
   518  	}
   519  
   520  	return Ready
   521  }
   522  
   523  func (gen *generator) getRanking() float64 {
   524  	// take into account the priority, but also how many tasks of
   525  	// a specific generator were executed in the current generator run
   526  	return math.Pow(10, float64(gen.priority)) / (1 + float64(gen.taskCount)) //nolint:gomnd
   527  }
   528  
   529  func (scheduler *Scheduler) SubmitGenerator(taskGenerator TaskGenerator, interval time.Duration, priority Priority) {
   530  	newGenerator := &generator{
   531  		interval:      interval,
   532  		done:          false,
   533  		priority:      priority,
   534  		taskGenerator: taskGenerator,
   535  		taskCount:     0,
   536  		remainingTask: nil,
   537  	}
   538  
   539  	scheduler.generatorsLock.Lock()
   540  	defer scheduler.generatorsLock.Unlock()
   541  
   542  	// add generator to the generators priority queue
   543  	heap.Push(&scheduler.generators, newGenerator)
   544  	// force pushing this metric (for zot minimal metrics are enabled on first scraping)
   545  	monitoring.IncSchedulerGenerators(scheduler.metricServer)
   546  }
   547  
   548  func getNumWorkers(cfg *config.Config) int {
   549  	if cfg.Scheduler != nil && cfg.Scheduler.NumWorkers != 0 {
   550  		return cfg.Scheduler.NumWorkers
   551  	}
   552  
   553  	return runtime.NumCPU() * NumWorkersMultiplier
   554  }
   555  
   556  func (p Priority) String() string {
   557  	var priority string
   558  
   559  	switch p {
   560  	case LowPriority:
   561  		priority = "low"
   562  	case MediumPriority:
   563  		priority = "medium"
   564  	case HighPriority:
   565  		priority = "high"
   566  	default:
   567  		priority = "invalid"
   568  	}
   569  
   570  	return priority
   571  }
   572  
   573  func (s State) String() string {
   574  	var status string
   575  
   576  	switch s {
   577  	case Ready:
   578  		status = "ready"
   579  	case Waiting:
   580  		status = "waiting"
   581  	case Done:
   582  		status = "done"
   583  	default:
   584  		status = "invalid"
   585  	}
   586  
   587  	return status
   588  }