zotregistry.dev/zot@v1.4.4-0.20240314164342-eec277e14d20/pkg/scheduler/scheduler.go (about) 1 package scheduler 2 3 import ( 4 "container/heap" 5 "context" 6 "math" 7 "runtime" 8 "sync" 9 "sync/atomic" 10 "time" 11 12 "zotregistry.dev/zot/pkg/api/config" 13 "zotregistry.dev/zot/pkg/extensions/monitoring" 14 "zotregistry.dev/zot/pkg/log" 15 ) 16 17 type Task interface { 18 DoWork(ctx context.Context) error 19 Name() string 20 String() string 21 } 22 23 type generatorsPriorityQueue []*generator 24 25 func (pq generatorsPriorityQueue) Len() int { 26 return len(pq) 27 } 28 29 func (pq generatorsPriorityQueue) Less(i, j int) bool { 30 return pq[i].getRanking() > pq[j].getRanking() 31 } 32 33 func (pq generatorsPriorityQueue) Swap(i, j int) { 34 pq[i], pq[j] = pq[j], pq[i] 35 pq[i].index = i 36 pq[j].index = j 37 } 38 39 func (pq *generatorsPriorityQueue) Push(x any) { 40 n := len(*pq) 41 42 item, ok := x.(*generator) 43 if !ok { 44 return 45 } 46 47 item.index = n 48 *pq = append(*pq, item) 49 } 50 51 func (pq *generatorsPriorityQueue) Pop() any { 52 old := *pq 53 n := len(old) 54 item := old[n-1] 55 old[n-1] = nil 56 item.index = -1 57 *pq = old[0 : n-1] 58 59 return item 60 } 61 62 const ( 63 rateLimiterScheduler = 400 64 rateLimit = 50 * time.Millisecond 65 NumWorkersMultiplier = 4 66 sendMetricsInterval = 5 * time.Second 67 ) 68 69 type Scheduler struct { 70 tasksQLow chan Task 71 tasksQMedium chan Task 72 tasksQHigh chan Task 73 tasksDoWork int 74 tasksLock *sync.Mutex 75 generators generatorsPriorityQueue 76 waitingGenerators []*generator 77 doneGenerators []*generator 78 generatorsLock *sync.Mutex 79 log log.Logger 80 RateLimit time.Duration 81 NumWorkers int 82 workerChan chan Task 83 metricsChan chan struct{} 84 workerWg *sync.WaitGroup 85 isShuttingDown atomic.Bool 86 metricServer monitoring.MetricServer 87 cancelFunc context.CancelFunc 88 } 89 90 func NewScheduler(cfg *config.Config, ms monitoring.MetricServer, logC log.Logger) *Scheduler { //nolint: varnamelen 91 chLow := make(chan Task, rateLimiterScheduler) 92 chMedium := make(chan Task, rateLimiterScheduler) 93 chHigh := make(chan Task, rateLimiterScheduler) 94 generatorPQ := make(generatorsPriorityQueue, 0) 95 numWorkers := getNumWorkers(cfg) 96 sublogger := logC.With().Str("component", "scheduler").Logger() 97 98 heap.Init(&generatorPQ) 99 // force pushing this metric (for zot minimal metrics are enabled on first scraping) 100 monitoring.SetSchedulerNumWorkers(ms, numWorkers) 101 102 return &Scheduler{ 103 tasksQLow: chLow, 104 tasksQMedium: chMedium, 105 tasksQHigh: chHigh, 106 tasksDoWork: 0, // number of tasks that are in working state 107 tasksLock: new(sync.Mutex), 108 generators: generatorPQ, 109 generatorsLock: new(sync.Mutex), 110 log: log.Logger{Logger: sublogger}, 111 // default value 112 metricServer: ms, 113 RateLimit: rateLimit, 114 NumWorkers: numWorkers, 115 workerChan: make(chan Task, numWorkers), 116 metricsChan: make(chan struct{}, 1), 117 workerWg: new(sync.WaitGroup), 118 } 119 } 120 121 func (scheduler *Scheduler) poolWorker(ctx context.Context) { 122 for i := 0; i < scheduler.NumWorkers; i++ { 123 go func(workerID int) { 124 defer scheduler.workerWg.Done() 125 126 var workStart time.Time 127 128 var workDuration time.Duration 129 130 for task := range scheduler.workerChan { 131 // leave below line here (for zot minimal metrics can be enabled on first scraping) 132 metricsEnabled := scheduler.metricServer.IsEnabled() 133 scheduler.log.Debug().Int("worker", workerID).Str("task", task.String()).Msg("starting task") 134 135 if metricsEnabled { 136 scheduler.tasksLock.Lock() 137 scheduler.tasksDoWork++ 138 scheduler.tasksLock.Unlock() 139 workStart = time.Now() 140 } 141 142 if err := task.DoWork(ctx); err != nil { 143 scheduler.log.Error().Int("worker", workerID).Str("task", task.String()).Err(err). 144 Msg("failed to execute task") 145 } 146 147 if metricsEnabled { 148 scheduler.tasksLock.Lock() 149 scheduler.tasksDoWork-- 150 scheduler.tasksLock.Unlock() 151 workDuration = time.Since(workStart) 152 monitoring.ObserveWorkersTasksDuration(scheduler.metricServer, task.Name(), workDuration) 153 } 154 155 scheduler.log.Debug().Int("worker", workerID).Str("task", task.String()).Msg("finished task") 156 } 157 }(i + 1) 158 } 159 } 160 161 func (scheduler *Scheduler) metricsWorker() { 162 ticker := time.NewTicker(sendMetricsInterval) 163 164 for { 165 if scheduler.inShutdown() { 166 return 167 } 168 select { 169 case <-scheduler.metricsChan: 170 ticker.Stop() 171 172 return 173 case <-ticker.C: 174 genMap := make(map[string]map[string]uint64) 175 tasksMap := make(map[string]int) 176 // initialize map 177 for _, p := range []Priority{LowPriority, MediumPriority, HighPriority} { 178 priority := p.String() 179 genMap[priority] = make(map[string]uint64) 180 181 for _, s := range []State{Ready, Waiting, Done} { 182 genMap[priority][s.String()] = 0 183 } 184 } 185 186 scheduler.generatorsLock.Lock() 187 generators := append(append(scheduler.generators, scheduler.waitingGenerators...), 188 scheduler.doneGenerators...) 189 190 for _, gen := range generators { 191 p := gen.priority.String() 192 s := gen.getState().String() 193 genMap[p][s]++ 194 } 195 196 // tasks queue size by priority 197 tasksMap[LowPriority.String()] = len(scheduler.tasksQLow) 198 tasksMap[MediumPriority.String()] = len(scheduler.tasksQMedium) 199 tasksMap[HighPriority.String()] = len(scheduler.tasksQHigh) 200 scheduler.generatorsLock.Unlock() 201 202 monitoring.SetSchedulerGenerators(scheduler.metricServer, genMap) 203 monitoring.SetSchedulerTasksQueue(scheduler.metricServer, tasksMap) 204 workersMap := make(map[string]int) 205 206 scheduler.tasksLock.Lock() 207 workersMap["idle"] = scheduler.NumWorkers - scheduler.tasksDoWork 208 workersMap["working"] = scheduler.tasksDoWork 209 scheduler.tasksLock.Unlock() 210 monitoring.SetSchedulerWorkers(scheduler.metricServer, workersMap) 211 } 212 } 213 } 214 215 /* 216 Scheduler can be stopped by calling Shutdown(). 217 it will wait for all tasks being run to finish their work before exiting. 218 */ 219 func (scheduler *Scheduler) Shutdown() { 220 defer scheduler.workerWg.Wait() 221 222 if !scheduler.inShutdown() { 223 scheduler.shutdown() 224 } 225 } 226 227 func (scheduler *Scheduler) inShutdown() bool { 228 return scheduler.isShuttingDown.Load() 229 } 230 231 func (scheduler *Scheduler) shutdown() { 232 scheduler.isShuttingDown.Store(true) 233 234 scheduler.cancelFunc() 235 close(scheduler.metricsChan) 236 } 237 238 func (scheduler *Scheduler) RunScheduler() { 239 /*This context is passed to all task generators 240 calling scheduler.Shutdown() will cancel this context and will wait for all tasks 241 to finish their work gracefully.*/ 242 ctx, cancel := context.WithCancel(context.Background()) 243 scheduler.cancelFunc = cancel 244 245 throttle := time.NewTicker(scheduler.RateLimit).C 246 247 numWorkers := scheduler.NumWorkers 248 249 // wait all workers to finish their work before exiting from Shutdown() 250 scheduler.workerWg.Add(numWorkers) 251 252 // start worker pool 253 go scheduler.poolWorker(ctx) 254 255 // periodically send metrics 256 go scheduler.metricsWorker() 257 258 go func() { 259 // will close workers chan when either ctx is canceled or scheduler.Shutdown() 260 defer close(scheduler.workerChan) 261 262 for { 263 select { 264 case <-ctx.Done(): 265 if !scheduler.inShutdown() { 266 scheduler.shutdown() 267 } 268 269 scheduler.log.Debug().Msg("received stop signal, gracefully shutting down...") 270 271 return 272 default: 273 // we don't want to block on sending task in workerChan. 274 if len(scheduler.workerChan) == scheduler.NumWorkers { 275 <-throttle 276 277 continue 278 } 279 280 task := scheduler.getTask() 281 282 if task == nil { 283 <-throttle 284 285 continue 286 } 287 288 // push tasks into worker pool until workerChan is full. 289 scheduler.workerChan <- task 290 } 291 } 292 }() 293 } 294 295 func (scheduler *Scheduler) pushReadyGenerators() { 296 // iterate through waiting generators list and resubmit those which become ready to run 297 for { 298 modified := false 299 300 for i, gen := range scheduler.waitingGenerators { 301 if gen.getState() == Ready { 302 gen.done = false 303 heap.Push(&scheduler.generators, gen) 304 scheduler.waitingGenerators = append(scheduler.waitingGenerators[:i], scheduler.waitingGenerators[i+1:]...) 305 modified = true 306 307 scheduler.log.Debug().Str("generator", gen.taskGenerator.Name()). 308 Msg("waiting generator is ready, pushing to ready generators") 309 310 break 311 } 312 } 313 314 if !modified { 315 break 316 } 317 } 318 } 319 320 func (scheduler *Scheduler) generateTasks() { 321 scheduler.generatorsLock.Lock() 322 defer scheduler.generatorsLock.Unlock() 323 324 // resubmit ready generators(which were in a waiting state) to generators priority queue 325 scheduler.pushReadyGenerators() 326 327 // get the highest priority generator from queue 328 if scheduler.generators.Len() == 0 { 329 return 330 } 331 332 var gen *generator 333 334 // check if the generator with highest priority is ready to run 335 if scheduler.generators[0].getState() == Ready { 336 // we are not popping it as we will generate multiple tasks until it is done 337 // we are going to pop after all tasks are generated 338 gen = scheduler.generators[0] 339 340 // trigger a generator reorder, as generating a task may impact the order 341 // equivalent of pop/remove followed by push, but more efficient 342 heap.Fix(&scheduler.generators, 0) 343 } else { 344 gen, _ = heap.Pop(&scheduler.generators).(*generator) 345 if gen.getState() == Waiting { 346 scheduler.waitingGenerators = append(scheduler.waitingGenerators, gen) 347 } else if gen.getState() == Done { 348 scheduler.doneGenerators = append(scheduler.doneGenerators, gen) 349 } 350 351 return 352 } 353 354 // run generator to generate a new task which will be added to a channel by priority 355 gen.generate(scheduler) 356 } 357 358 func (scheduler *Scheduler) getTask() Task { 359 // first, generate a task with highest possible priority 360 scheduler.generateTasks() 361 362 // then, return a task with highest possible priority 363 select { 364 case t := <-scheduler.tasksQHigh: 365 return t 366 default: 367 } 368 369 select { 370 case t := <-scheduler.tasksQMedium: 371 return t 372 default: 373 } 374 375 select { 376 case t := <-scheduler.tasksQLow: 377 return t 378 default: 379 } 380 381 return nil 382 } 383 384 func (scheduler *Scheduler) getTasksChannelByPriority(priority Priority) chan Task { 385 switch priority { 386 case LowPriority: 387 return scheduler.tasksQLow 388 case MediumPriority: 389 return scheduler.tasksQMedium 390 case HighPriority: 391 return scheduler.tasksQHigh 392 } 393 394 return nil 395 } 396 397 func (scheduler *Scheduler) SubmitTask(task Task, priority Priority) { 398 // get by priority the channel where the task should be added to 399 tasksQ := scheduler.getTasksChannelByPriority(priority) 400 if tasksQ == nil { 401 return 402 } 403 404 // check if the scheduler is still running in order to add the task to the channel 405 if scheduler.inShutdown() { 406 return 407 } 408 409 select { 410 case tasksQ <- task: 411 scheduler.log.Info().Msg("adding a new task") 412 default: 413 if scheduler.inShutdown() { 414 return 415 } 416 } 417 } 418 419 type Priority int 420 421 const ( 422 LowPriority Priority = iota 423 MediumPriority 424 HighPriority 425 ) 426 427 type State int 428 429 const ( 430 Ready State = iota 431 Waiting 432 Done 433 ) 434 435 type TaskGenerator interface { 436 Next() (Task, error) 437 IsDone() bool 438 IsReady() bool 439 Name() string 440 Reset() 441 } 442 443 type generator struct { 444 interval time.Duration 445 lastRun time.Time 446 done bool 447 priority Priority 448 taskGenerator TaskGenerator 449 remainingTask Task 450 index int 451 taskCount int64 452 } 453 454 func (gen *generator) generate(sch *Scheduler) { 455 // get by priority the channel where the new generated task should be added to 456 taskQ := sch.getTasksChannelByPriority(gen.priority) 457 458 task := gen.remainingTask 459 460 // in case there is no task already generated, generate a new task 461 if gen.remainingTask == nil { 462 nextTask, err := gen.taskGenerator.Next() 463 if err != nil { 464 sch.log.Error().Err(err).Str("generator", gen.taskGenerator.Name()). 465 Msg("failed to execute generator") 466 467 return 468 } 469 470 // check if the generator is done 471 if gen.taskGenerator.IsDone() { 472 gen.done = true 473 gen.lastRun = time.Now() 474 gen.taskCount = 0 475 gen.taskGenerator.Reset() 476 477 sch.log.Debug().Str("generator", gen.taskGenerator.Name()). 478 Msg("generator is done") 479 480 return 481 } 482 483 task = nextTask 484 } 485 486 // keep track of generated task count to use it for generator ordering 487 gen.taskCount++ 488 489 // check if it's possible to add a new task to the channel 490 // if not, keep the generated task and retry to add it next time 491 select { 492 case taskQ <- task: 493 gen.remainingTask = nil 494 495 return 496 default: 497 gen.remainingTask = task 498 } 499 } 500 501 // getState() returns the state of a generator. 502 // if the generator is not periodic then it can be done or ready to generate a new task. 503 // if the generator is periodic then it can be waiting (finished its work and wait for its interval to pass) 504 // or ready to generate a new task. 505 func (gen *generator) getState() State { 506 if gen.interval == time.Duration(0) { 507 if gen.done && gen.remainingTask == nil { 508 return Done 509 } 510 } else { 511 if gen.done && time.Since(gen.lastRun) < gen.interval && gen.remainingTask == nil { 512 return Waiting 513 } 514 } 515 516 if !gen.taskGenerator.IsReady() { 517 return Waiting 518 } 519 520 return Ready 521 } 522 523 func (gen *generator) getRanking() float64 { 524 // take into account the priority, but also how many tasks of 525 // a specific generator were executed in the current generator run 526 return math.Pow(10, float64(gen.priority)) / (1 + float64(gen.taskCount)) //nolint:gomnd 527 } 528 529 func (scheduler *Scheduler) SubmitGenerator(taskGenerator TaskGenerator, interval time.Duration, priority Priority) { 530 newGenerator := &generator{ 531 interval: interval, 532 done: false, 533 priority: priority, 534 taskGenerator: taskGenerator, 535 taskCount: 0, 536 remainingTask: nil, 537 } 538 539 scheduler.generatorsLock.Lock() 540 defer scheduler.generatorsLock.Unlock() 541 542 // add generator to the generators priority queue 543 heap.Push(&scheduler.generators, newGenerator) 544 // force pushing this metric (for zot minimal metrics are enabled on first scraping) 545 monitoring.IncSchedulerGenerators(scheduler.metricServer) 546 } 547 548 func getNumWorkers(cfg *config.Config) int { 549 if cfg.Scheduler != nil && cfg.Scheduler.NumWorkers != 0 { 550 return cfg.Scheduler.NumWorkers 551 } 552 553 return runtime.NumCPU() * NumWorkersMultiplier 554 } 555 556 func (p Priority) String() string { 557 var priority string 558 559 switch p { 560 case LowPriority: 561 priority = "low" 562 case MediumPriority: 563 priority = "medium" 564 case HighPriority: 565 priority = "high" 566 default: 567 priority = "invalid" 568 } 569 570 return priority 571 } 572 573 func (s State) String() string { 574 var status string 575 576 switch s { 577 case Ready: 578 status = "ready" 579 case Waiting: 580 status = "waiting" 581 case Done: 582 status = "done" 583 default: 584 status = "invalid" 585 } 586 587 return status 588 }