zotregistry.io/zot@v1.4.4-0.20231124084042-02a8ed785457/pkg/scheduler/scheduler.go (about)

     1  package scheduler
     2  
     3  import (
     4  	"container/heap"
     5  	"context"
     6  	"runtime"
     7  	"sync"
     8  	"sync/atomic"
     9  	"time"
    10  
    11  	"zotregistry.io/zot/pkg/api/config"
    12  	"zotregistry.io/zot/pkg/log"
    13  )
    14  
    15  type Task interface {
    16  	DoWork(ctx context.Context) error
    17  }
    18  
    19  type generatorsPriorityQueue []*generator
    20  
    21  func (pq generatorsPriorityQueue) Len() int {
    22  	return len(pq)
    23  }
    24  
    25  func (pq generatorsPriorityQueue) Less(i, j int) bool {
    26  	return pq[i].priority > pq[j].priority
    27  }
    28  
    29  func (pq generatorsPriorityQueue) Swap(i, j int) {
    30  	pq[i], pq[j] = pq[j], pq[i]
    31  	pq[i].index = i
    32  	pq[j].index = j
    33  }
    34  
    35  func (pq *generatorsPriorityQueue) Push(x any) {
    36  	n := len(*pq)
    37  
    38  	item, ok := x.(*generator)
    39  	if !ok {
    40  		return
    41  	}
    42  
    43  	item.index = n
    44  	*pq = append(*pq, item)
    45  }
    46  
    47  func (pq *generatorsPriorityQueue) Pop() any {
    48  	old := *pq
    49  	n := len(old)
    50  	item := old[n-1]
    51  	old[n-1] = nil
    52  	item.index = -1
    53  	*pq = old[0 : n-1]
    54  
    55  	return item
    56  }
    57  
    58  const (
    59  	rateLimiterScheduler = 400
    60  	rateLimit            = 5 * time.Second
    61  	numWorkersMultiplier = 4
    62  )
    63  
    64  type Scheduler struct {
    65  	tasksQLow         chan Task
    66  	tasksQMedium      chan Task
    67  	tasksQHigh        chan Task
    68  	generators        generatorsPriorityQueue
    69  	waitingGenerators []*generator
    70  	generatorsLock    *sync.Mutex
    71  	log               log.Logger
    72  	RateLimit         time.Duration
    73  	NumWorkers        int
    74  	workerChan        chan Task
    75  	workerWg          *sync.WaitGroup
    76  	isShuttingDown    atomic.Bool
    77  }
    78  
    79  func NewScheduler(cfg *config.Config, logC log.Logger) *Scheduler {
    80  	chLow := make(chan Task, rateLimiterScheduler)
    81  	chMedium := make(chan Task, rateLimiterScheduler)
    82  	chHigh := make(chan Task, rateLimiterScheduler)
    83  	generatorPQ := make(generatorsPriorityQueue, 0)
    84  	numWorkers := getNumWorkers(cfg)
    85  	sublogger := logC.With().Str("component", "scheduler").Logger()
    86  
    87  	heap.Init(&generatorPQ)
    88  
    89  	return &Scheduler{
    90  		tasksQLow:      chLow,
    91  		tasksQMedium:   chMedium,
    92  		tasksQHigh:     chHigh,
    93  		generators:     generatorPQ,
    94  		generatorsLock: new(sync.Mutex),
    95  		log:            log.Logger{Logger: sublogger},
    96  		// default value
    97  		RateLimit:  rateLimit,
    98  		NumWorkers: numWorkers,
    99  		workerChan: make(chan Task, numWorkers),
   100  		workerWg:   new(sync.WaitGroup),
   101  	}
   102  }
   103  
   104  func (scheduler *Scheduler) poolWorker(ctx context.Context) {
   105  	for i := 0; i < scheduler.NumWorkers; i++ {
   106  		go func(workerID int) {
   107  			defer scheduler.workerWg.Done()
   108  
   109  			for task := range scheduler.workerChan {
   110  				scheduler.log.Debug().Int("worker", workerID).Msg("scheduler: starting task")
   111  
   112  				if err := task.DoWork(ctx); err != nil {
   113  					scheduler.log.Error().Int("worker", workerID).Err(err).Msg("scheduler: error while executing task")
   114  				}
   115  
   116  				scheduler.log.Debug().Int("worker", workerID).Msg("scheduler: finished task")
   117  			}
   118  		}(i + 1)
   119  	}
   120  }
   121  
   122  func (scheduler *Scheduler) Shutdown() {
   123  	if !scheduler.inShutdown() {
   124  		scheduler.shutdown()
   125  	}
   126  
   127  	scheduler.workerWg.Wait()
   128  }
   129  
   130  func (scheduler *Scheduler) inShutdown() bool {
   131  	return scheduler.isShuttingDown.Load()
   132  }
   133  
   134  func (scheduler *Scheduler) shutdown() {
   135  	close(scheduler.workerChan)
   136  	scheduler.isShuttingDown.Store(true)
   137  }
   138  
   139  func (scheduler *Scheduler) RunScheduler(ctx context.Context) {
   140  	throttle := time.NewTicker(rateLimit).C
   141  
   142  	numWorkers := scheduler.NumWorkers
   143  
   144  	// wait all workers to finish their work before exiting from Shutdown()
   145  	scheduler.workerWg.Add(numWorkers)
   146  
   147  	// start worker pool
   148  	go scheduler.poolWorker(ctx)
   149  
   150  	go func() {
   151  		for {
   152  			select {
   153  			case <-ctx.Done():
   154  				if !scheduler.inShutdown() {
   155  					scheduler.shutdown()
   156  				}
   157  
   158  				scheduler.log.Debug().Msg("scheduler: received stop signal, gracefully shutting down...")
   159  
   160  				return
   161  			default:
   162  				i := 0
   163  				for i < numWorkers {
   164  					task := scheduler.getTask()
   165  
   166  					if task != nil {
   167  						// push tasks into worker pool
   168  						if !scheduler.inShutdown() {
   169  							scheduler.log.Debug().Msg("scheduler: pushing task into worker pool")
   170  							scheduler.workerChan <- task
   171  						}
   172  					}
   173  					i++
   174  				}
   175  			}
   176  
   177  			<-throttle
   178  		}
   179  	}()
   180  }
   181  
   182  func (scheduler *Scheduler) pushReadyGenerators() {
   183  	// iterate through waiting generators list and resubmit those which become ready to run
   184  	for {
   185  		modified := false
   186  
   187  		for i, gen := range scheduler.waitingGenerators {
   188  			if gen.getState() == ready {
   189  				gen.done = false
   190  				heap.Push(&scheduler.generators, gen)
   191  				scheduler.waitingGenerators = append(scheduler.waitingGenerators[:i], scheduler.waitingGenerators[i+1:]...)
   192  				modified = true
   193  
   194  				scheduler.log.Debug().Msg("scheduler: waiting generator is ready, pushing to ready generators")
   195  
   196  				break
   197  			}
   198  		}
   199  
   200  		if !modified {
   201  			break
   202  		}
   203  	}
   204  }
   205  
   206  func (scheduler *Scheduler) generateTasks() {
   207  	scheduler.generatorsLock.Lock()
   208  	defer scheduler.generatorsLock.Unlock()
   209  
   210  	// resubmit ready generators(which were in a waiting state) to generators priority queue
   211  	scheduler.pushReadyGenerators()
   212  
   213  	// get the highest priority generator from queue
   214  	if scheduler.generators.Len() == 0 {
   215  		return
   216  	}
   217  
   218  	var gen *generator
   219  
   220  	// check if the generator with highest prioriy is ready to run
   221  	if scheduler.generators[0].getState() == ready {
   222  		gen = scheduler.generators[0]
   223  	} else {
   224  		gen, _ = heap.Pop(&scheduler.generators).(*generator)
   225  		if gen.getState() == waiting {
   226  			scheduler.waitingGenerators = append(scheduler.waitingGenerators, gen)
   227  		}
   228  
   229  		return
   230  	}
   231  
   232  	// run generator to generate a new task which will be added to a channel by priority
   233  	gen.generate(scheduler)
   234  }
   235  
   236  func (scheduler *Scheduler) getTask() Task {
   237  	// first, generate a task with highest possible priority
   238  	scheduler.generateTasks()
   239  
   240  	// then, return a task with highest possible priority
   241  	select {
   242  	case t := <-scheduler.tasksQHigh:
   243  		return t
   244  	default:
   245  	}
   246  
   247  	select {
   248  	case t := <-scheduler.tasksQMedium:
   249  		return t
   250  	default:
   251  	}
   252  
   253  	select {
   254  	case t := <-scheduler.tasksQLow:
   255  		return t
   256  	default:
   257  	}
   258  
   259  	return nil
   260  }
   261  
   262  func (scheduler *Scheduler) getTasksChannelByPriority(priority Priority) chan Task {
   263  	switch priority {
   264  	case LowPriority:
   265  		return scheduler.tasksQLow
   266  	case MediumPriority:
   267  		return scheduler.tasksQMedium
   268  	case HighPriority:
   269  		return scheduler.tasksQHigh
   270  	}
   271  
   272  	return nil
   273  }
   274  
   275  func (scheduler *Scheduler) SubmitTask(task Task, priority Priority) {
   276  	// get by priority the channel where the task should be added to
   277  	tasksQ := scheduler.getTasksChannelByPriority(priority)
   278  	if tasksQ == nil {
   279  		return
   280  	}
   281  
   282  	// check if the scheduler it's still running in order to add the task to the channel
   283  	if scheduler.inShutdown() {
   284  		return
   285  	}
   286  
   287  	select {
   288  	case tasksQ <- task:
   289  		scheduler.log.Info().Msg("scheduler: adding a new task")
   290  	default:
   291  		if scheduler.inShutdown() {
   292  			return
   293  		}
   294  	}
   295  }
   296  
   297  type Priority int
   298  
   299  const (
   300  	LowPriority Priority = iota
   301  	MediumPriority
   302  	HighPriority
   303  )
   304  
   305  type state int
   306  
   307  const (
   308  	ready state = iota
   309  	waiting
   310  	done
   311  )
   312  
   313  type TaskGenerator interface {
   314  	Next() (Task, error)
   315  	IsDone() bool
   316  	IsReady() bool
   317  	Reset()
   318  }
   319  
   320  type generator struct {
   321  	interval      time.Duration
   322  	lastRun       time.Time
   323  	done          bool
   324  	priority      Priority
   325  	taskGenerator TaskGenerator
   326  	remainingTask Task
   327  	index         int
   328  }
   329  
   330  func (gen *generator) generate(sch *Scheduler) {
   331  	// get by priority the channel where the new generated task should be added to
   332  	taskQ := sch.getTasksChannelByPriority(gen.priority)
   333  
   334  	task := gen.remainingTask
   335  
   336  	// in case there is no task already generated, generate a new task
   337  	if gen.remainingTask == nil {
   338  		nextTask, err := gen.taskGenerator.Next()
   339  		if err != nil {
   340  			sch.log.Error().Err(err).Msg("scheduler: error while executing generator")
   341  
   342  			return
   343  		}
   344  
   345  		task = nextTask
   346  
   347  		// check if the generator is done
   348  		if gen.taskGenerator.IsDone() {
   349  			gen.done = true
   350  			gen.lastRun = time.Now()
   351  			gen.taskGenerator.Reset()
   352  
   353  			return
   354  		}
   355  	}
   356  
   357  	// check if it's possible to add a new task to the channel
   358  	// if not, keep the generated task and retry to add it next time
   359  	select {
   360  	case taskQ <- task:
   361  		gen.remainingTask = nil
   362  
   363  		return
   364  	default:
   365  		gen.remainingTask = task
   366  	}
   367  }
   368  
   369  // getState() returns the state of a generator.
   370  // if the generator is not periodic then it can be done or ready to generate a new task.
   371  // if the generator is periodic then it can be waiting (finished its work and wait for its interval to pass)
   372  // or ready to generate a new task.
   373  func (gen *generator) getState() state {
   374  	if gen.interval == time.Duration(0) {
   375  		if gen.done && gen.remainingTask == nil {
   376  			return done
   377  		}
   378  	} else {
   379  		if gen.done && time.Since(gen.lastRun) < gen.interval && gen.remainingTask == nil {
   380  			return waiting
   381  		}
   382  	}
   383  
   384  	if !gen.taskGenerator.IsReady() {
   385  		return waiting
   386  	}
   387  
   388  	return ready
   389  }
   390  
   391  func (scheduler *Scheduler) SubmitGenerator(taskGenerator TaskGenerator, interval time.Duration, priority Priority) {
   392  	newGenerator := &generator{
   393  		interval:      interval,
   394  		done:          false,
   395  		priority:      priority,
   396  		taskGenerator: taskGenerator,
   397  		remainingTask: nil,
   398  	}
   399  
   400  	scheduler.generatorsLock.Lock()
   401  	defer scheduler.generatorsLock.Unlock()
   402  
   403  	// add generator to the generators priority queue
   404  	heap.Push(&scheduler.generators, newGenerator)
   405  }
   406  
   407  func getNumWorkers(cfg *config.Config) int {
   408  	if cfg.Scheduler != nil && cfg.Scheduler.NumWorkers != 0 {
   409  		return cfg.Scheduler.NumWorkers
   410  	}
   411  
   412  	return runtime.NumCPU() * numWorkersMultiplier
   413  }