github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/scheduler/scheduler.go (about) 1 package scheduler 2 3 import ( 4 "context" 5 "time" 6 7 "github.com/docker/swarmkit/api" 8 "github.com/docker/swarmkit/api/genericresource" 9 "github.com/docker/swarmkit/log" 10 "github.com/docker/swarmkit/manager/state" 11 "github.com/docker/swarmkit/manager/state/store" 12 "github.com/docker/swarmkit/protobuf/ptypes" 13 ) 14 15 const ( 16 // monitorFailures is the lookback period for counting failures of 17 // a task to determine if a node is faulty for a particular service. 18 monitorFailures = 5 * time.Minute 19 20 // maxFailures is the number of failures within monitorFailures that 21 // triggers downweighting of a node in the sorting function. 22 maxFailures = 5 23 ) 24 25 type schedulingDecision struct { 26 old *api.Task 27 new *api.Task 28 } 29 30 // Scheduler assigns tasks to nodes. 31 type Scheduler struct { 32 store *store.MemoryStore 33 unassignedTasks map[string]*api.Task 34 // pendingPreassignedTasks already have NodeID, need resource validation 35 pendingPreassignedTasks map[string]*api.Task 36 // preassignedTasks tracks tasks that were preassigned, including those 37 // past the pending state. 38 preassignedTasks map[string]struct{} 39 nodeSet nodeSet 40 allTasks map[string]*api.Task 41 pipeline *Pipeline 42 43 // stopChan signals to the state machine to stop running 44 stopChan chan struct{} 45 // doneChan is closed when the state machine terminates 46 doneChan chan struct{} 47 } 48 49 // New creates a new scheduler. 50 func New(store *store.MemoryStore) *Scheduler { 51 return &Scheduler{ 52 store: store, 53 unassignedTasks: make(map[string]*api.Task), 54 pendingPreassignedTasks: make(map[string]*api.Task), 55 preassignedTasks: make(map[string]struct{}), 56 allTasks: make(map[string]*api.Task), 57 stopChan: make(chan struct{}), 58 doneChan: make(chan struct{}), 59 pipeline: NewPipeline(), 60 } 61 } 62 63 func (s *Scheduler) setupTasksList(tx store.ReadTx) error { 64 tasks, err := store.FindTasks(tx, store.All) 65 if err != nil { 66 return err 67 } 68 69 tasksByNode := make(map[string]map[string]*api.Task) 70 for _, t := range tasks { 71 // Ignore all tasks that have not reached PENDING 72 // state and tasks that no longer consume resources. 73 if t.Status.State < api.TaskStatePending || t.Status.State > api.TaskStateRunning { 74 continue 75 } 76 77 // Also ignore tasks that have not yet been assigned but desired state 78 // is beyond TaskStateCompleted. This can happen if you update, delete 79 // or scale down a service before its tasks were assigned. 80 if t.Status.State == api.TaskStatePending && t.DesiredState > api.TaskStateCompleted { 81 continue 82 } 83 84 s.allTasks[t.ID] = t 85 if t.NodeID == "" { 86 s.enqueue(t) 87 continue 88 } 89 // preassigned tasks need to validate resource requirement on corresponding node 90 if t.Status.State == api.TaskStatePending { 91 s.preassignedTasks[t.ID] = struct{}{} 92 s.pendingPreassignedTasks[t.ID] = t 93 continue 94 } 95 96 if tasksByNode[t.NodeID] == nil { 97 tasksByNode[t.NodeID] = make(map[string]*api.Task) 98 } 99 tasksByNode[t.NodeID][t.ID] = t 100 } 101 102 return s.buildNodeSet(tx, tasksByNode) 103 } 104 105 // Run is the scheduler event loop. 106 func (s *Scheduler) Run(ctx context.Context) error { 107 defer close(s.doneChan) 108 109 updates, cancel, err := store.ViewAndWatch(s.store, s.setupTasksList) 110 if err != nil { 111 log.G(ctx).WithError(err).Errorf("snapshot store update failed") 112 return err 113 } 114 defer cancel() 115 116 // Validate resource for tasks from preassigned tasks 117 // do this before other tasks because preassigned tasks like 118 // global service should start before other tasks 119 s.processPreassignedTasks(ctx) 120 121 // Queue all unassigned tasks before processing changes. 122 s.tick(ctx) 123 124 const ( 125 // commitDebounceGap is the amount of time to wait between 126 // commit events to debounce them. 127 commitDebounceGap = 50 * time.Millisecond 128 // maxLatency is a time limit on the debouncing. 129 maxLatency = time.Second 130 ) 131 var ( 132 debouncingStarted time.Time 133 commitDebounceTimer *time.Timer 134 commitDebounceTimeout <-chan time.Time 135 ) 136 137 tickRequired := false 138 139 schedule := func() { 140 if len(s.pendingPreassignedTasks) > 0 { 141 s.processPreassignedTasks(ctx) 142 } 143 if tickRequired { 144 s.tick(ctx) 145 tickRequired = false 146 } 147 } 148 149 // Watch for changes. 150 for { 151 select { 152 case event := <-updates: 153 switch v := event.(type) { 154 case api.EventCreateTask: 155 if s.createTask(ctx, v.Task) { 156 tickRequired = true 157 } 158 case api.EventUpdateTask: 159 if s.updateTask(ctx, v.Task) { 160 tickRequired = true 161 } 162 case api.EventDeleteTask: 163 if s.deleteTask(v.Task) { 164 // deleting tasks may free up node resource, pending tasks should be re-evaluated. 165 tickRequired = true 166 } 167 case api.EventCreateNode: 168 s.createOrUpdateNode(v.Node) 169 tickRequired = true 170 case api.EventUpdateNode: 171 s.createOrUpdateNode(v.Node) 172 tickRequired = true 173 case api.EventDeleteNode: 174 s.nodeSet.remove(v.Node.ID) 175 case state.EventCommit: 176 if commitDebounceTimer != nil { 177 if time.Since(debouncingStarted) > maxLatency { 178 commitDebounceTimer.Stop() 179 commitDebounceTimer = nil 180 commitDebounceTimeout = nil 181 schedule() 182 } else { 183 commitDebounceTimer.Reset(commitDebounceGap) 184 } 185 } else { 186 commitDebounceTimer = time.NewTimer(commitDebounceGap) 187 commitDebounceTimeout = commitDebounceTimer.C 188 debouncingStarted = time.Now() 189 } 190 } 191 case <-commitDebounceTimeout: 192 schedule() 193 commitDebounceTimer = nil 194 commitDebounceTimeout = nil 195 case <-s.stopChan: 196 return nil 197 } 198 } 199 } 200 201 // Stop causes the scheduler event loop to stop running. 202 func (s *Scheduler) Stop() { 203 close(s.stopChan) 204 <-s.doneChan 205 } 206 207 // enqueue queues a task for scheduling. 208 func (s *Scheduler) enqueue(t *api.Task) { 209 s.unassignedTasks[t.ID] = t 210 } 211 212 func (s *Scheduler) createTask(ctx context.Context, t *api.Task) bool { 213 // Ignore all tasks that have not reached PENDING 214 // state, and tasks that no longer consume resources. 215 if t.Status.State < api.TaskStatePending || t.Status.State > api.TaskStateRunning { 216 return false 217 } 218 219 s.allTasks[t.ID] = t 220 if t.NodeID == "" { 221 // unassigned task 222 s.enqueue(t) 223 return true 224 } 225 226 if t.Status.State == api.TaskStatePending { 227 s.preassignedTasks[t.ID] = struct{}{} 228 s.pendingPreassignedTasks[t.ID] = t 229 // preassigned tasks do not contribute to running tasks count 230 return false 231 } 232 233 nodeInfo, err := s.nodeSet.nodeInfo(t.NodeID) 234 if err == nil && nodeInfo.addTask(t) { 235 s.nodeSet.updateNode(nodeInfo) 236 } 237 238 return false 239 } 240 241 func (s *Scheduler) updateTask(ctx context.Context, t *api.Task) bool { 242 // Ignore all tasks that have not reached PENDING 243 // state. 244 if t.Status.State < api.TaskStatePending { 245 return false 246 } 247 248 oldTask := s.allTasks[t.ID] 249 250 // Ignore all tasks that have not reached Pending 251 // state, and tasks that no longer consume resources. 252 if t.Status.State > api.TaskStateRunning { 253 if oldTask == nil { 254 return false 255 } 256 257 if t.Status.State != oldTask.Status.State && 258 (t.Status.State == api.TaskStateFailed || t.Status.State == api.TaskStateRejected) { 259 // Keep track of task failures, so other nodes can be preferred 260 // for scheduling this service if it looks like the service is 261 // failing in a loop on this node. However, skip this for 262 // preassigned tasks, because the scheduler does not choose 263 // which nodes those run on. 264 if _, wasPreassigned := s.preassignedTasks[t.ID]; !wasPreassigned { 265 nodeInfo, err := s.nodeSet.nodeInfo(t.NodeID) 266 if err == nil { 267 nodeInfo.taskFailed(ctx, t) 268 s.nodeSet.updateNode(nodeInfo) 269 } 270 } 271 } 272 273 s.deleteTask(oldTask) 274 275 return true 276 } 277 278 if t.NodeID == "" { 279 // unassigned task 280 if oldTask != nil { 281 s.deleteTask(oldTask) 282 } 283 s.allTasks[t.ID] = t 284 s.enqueue(t) 285 return true 286 } 287 288 if t.Status.State == api.TaskStatePending { 289 if oldTask != nil { 290 s.deleteTask(oldTask) 291 } 292 s.preassignedTasks[t.ID] = struct{}{} 293 s.allTasks[t.ID] = t 294 s.pendingPreassignedTasks[t.ID] = t 295 // preassigned tasks do not contribute to running tasks count 296 return false 297 } 298 299 s.allTasks[t.ID] = t 300 nodeInfo, err := s.nodeSet.nodeInfo(t.NodeID) 301 if err == nil && nodeInfo.addTask(t) { 302 s.nodeSet.updateNode(nodeInfo) 303 } 304 305 return false 306 } 307 308 func (s *Scheduler) deleteTask(t *api.Task) bool { 309 delete(s.allTasks, t.ID) 310 delete(s.preassignedTasks, t.ID) 311 delete(s.pendingPreassignedTasks, t.ID) 312 nodeInfo, err := s.nodeSet.nodeInfo(t.NodeID) 313 if err == nil && nodeInfo.removeTask(t) { 314 s.nodeSet.updateNode(nodeInfo) 315 return true 316 } 317 return false 318 } 319 320 func (s *Scheduler) createOrUpdateNode(n *api.Node) { 321 nodeInfo, nodeInfoErr := s.nodeSet.nodeInfo(n.ID) 322 var resources *api.Resources 323 if n.Description != nil && n.Description.Resources != nil { 324 resources = n.Description.Resources.Copy() 325 // reconcile resources by looping over all tasks in this node 326 if nodeInfoErr == nil { 327 for _, task := range nodeInfo.Tasks { 328 reservations := taskReservations(task.Spec) 329 330 resources.MemoryBytes -= reservations.MemoryBytes 331 resources.NanoCPUs -= reservations.NanoCPUs 332 333 genericresource.ConsumeNodeResources(&resources.Generic, 334 task.AssignedGenericResources) 335 } 336 } 337 } else { 338 resources = &api.Resources{} 339 } 340 341 if nodeInfoErr != nil { 342 nodeInfo = newNodeInfo(n, nil, *resources) 343 } else { 344 nodeInfo.Node = n 345 nodeInfo.AvailableResources = resources 346 } 347 s.nodeSet.addOrUpdateNode(nodeInfo) 348 } 349 350 func (s *Scheduler) processPreassignedTasks(ctx context.Context) { 351 schedulingDecisions := make(map[string]schedulingDecision, len(s.pendingPreassignedTasks)) 352 for _, t := range s.pendingPreassignedTasks { 353 newT := s.taskFitNode(ctx, t, t.NodeID) 354 if newT == nil { 355 continue 356 } 357 schedulingDecisions[t.ID] = schedulingDecision{old: t, new: newT} 358 } 359 360 successful, failed := s.applySchedulingDecisions(ctx, schedulingDecisions) 361 362 for _, decision := range successful { 363 if decision.new.Status.State == api.TaskStateAssigned { 364 delete(s.pendingPreassignedTasks, decision.old.ID) 365 } 366 } 367 for _, decision := range failed { 368 s.allTasks[decision.old.ID] = decision.old 369 nodeInfo, err := s.nodeSet.nodeInfo(decision.new.NodeID) 370 if err == nil && nodeInfo.removeTask(decision.new) { 371 s.nodeSet.updateNode(nodeInfo) 372 } 373 } 374 } 375 376 // tick attempts to schedule the queue. 377 func (s *Scheduler) tick(ctx context.Context) { 378 type commonSpecKey struct { 379 serviceID string 380 specVersion api.Version 381 } 382 tasksByCommonSpec := make(map[commonSpecKey]map[string]*api.Task) 383 var oneOffTasks []*api.Task 384 schedulingDecisions := make(map[string]schedulingDecision, len(s.unassignedTasks)) 385 386 for taskID, t := range s.unassignedTasks { 387 if t == nil || t.NodeID != "" { 388 // task deleted or already assigned 389 delete(s.unassignedTasks, taskID) 390 continue 391 } 392 393 // Group tasks with common specs 394 if t.SpecVersion != nil { 395 taskGroupKey := commonSpecKey{ 396 serviceID: t.ServiceID, 397 specVersion: *t.SpecVersion, 398 } 399 400 if tasksByCommonSpec[taskGroupKey] == nil { 401 tasksByCommonSpec[taskGroupKey] = make(map[string]*api.Task) 402 } 403 tasksByCommonSpec[taskGroupKey][taskID] = t 404 } else { 405 // This task doesn't have a spec version. We have to 406 // schedule it as a one-off. 407 oneOffTasks = append(oneOffTasks, t) 408 } 409 delete(s.unassignedTasks, taskID) 410 } 411 412 for _, taskGroup := range tasksByCommonSpec { 413 s.scheduleTaskGroup(ctx, taskGroup, schedulingDecisions) 414 } 415 for _, t := range oneOffTasks { 416 s.scheduleTaskGroup(ctx, map[string]*api.Task{t.ID: t}, schedulingDecisions) 417 } 418 419 _, failed := s.applySchedulingDecisions(ctx, schedulingDecisions) 420 for _, decision := range failed { 421 s.allTasks[decision.old.ID] = decision.old 422 423 nodeInfo, err := s.nodeSet.nodeInfo(decision.new.NodeID) 424 if err == nil && nodeInfo.removeTask(decision.new) { 425 s.nodeSet.updateNode(nodeInfo) 426 } 427 428 // enqueue task for next scheduling attempt 429 s.enqueue(decision.old) 430 } 431 } 432 433 func (s *Scheduler) applySchedulingDecisions(ctx context.Context, schedulingDecisions map[string]schedulingDecision) (successful, failed []schedulingDecision) { 434 if len(schedulingDecisions) == 0 { 435 return 436 } 437 438 successful = make([]schedulingDecision, 0, len(schedulingDecisions)) 439 440 // Apply changes to master store 441 err := s.store.Batch(func(batch *store.Batch) error { 442 for len(schedulingDecisions) > 0 { 443 err := batch.Update(func(tx store.Tx) error { 444 // Update exactly one task inside this Update 445 // callback. 446 for taskID, decision := range schedulingDecisions { 447 delete(schedulingDecisions, taskID) 448 449 t := store.GetTask(tx, taskID) 450 if t == nil { 451 // Task no longer exists 452 s.deleteTask(decision.new) 453 continue 454 } 455 456 if t.Status.State == decision.new.Status.State && 457 t.Status.Message == decision.new.Status.Message && 458 t.Status.Err == decision.new.Status.Err { 459 // No changes, ignore 460 continue 461 } 462 463 if t.Status.State >= api.TaskStateAssigned { 464 nodeInfo, err := s.nodeSet.nodeInfo(decision.new.NodeID) 465 if err != nil { 466 failed = append(failed, decision) 467 continue 468 } 469 node := store.GetNode(tx, decision.new.NodeID) 470 if node == nil || node.Meta.Version != nodeInfo.Meta.Version { 471 // node is out of date 472 failed = append(failed, decision) 473 continue 474 } 475 } 476 477 if err := store.UpdateTask(tx, decision.new); err != nil { 478 log.G(ctx).Debugf("scheduler failed to update task %s; will retry", taskID) 479 failed = append(failed, decision) 480 continue 481 } 482 successful = append(successful, decision) 483 return nil 484 } 485 return nil 486 }) 487 if err != nil { 488 return err 489 } 490 } 491 return nil 492 }) 493 494 if err != nil { 495 log.G(ctx).WithError(err).Error("scheduler tick transaction failed") 496 failed = append(failed, successful...) 497 successful = nil 498 } 499 return 500 } 501 502 // taskFitNode checks if a node has enough resources to accommodate a task. 503 func (s *Scheduler) taskFitNode(ctx context.Context, t *api.Task, nodeID string) *api.Task { 504 nodeInfo, err := s.nodeSet.nodeInfo(nodeID) 505 if err != nil { 506 // node does not exist in set (it may have been deleted) 507 return nil 508 } 509 newT := *t 510 s.pipeline.SetTask(t) 511 if !s.pipeline.Process(&nodeInfo) { 512 // this node cannot accommodate this task 513 newT.Status.Timestamp = ptypes.MustTimestampProto(time.Now()) 514 newT.Status.Err = s.pipeline.Explain() 515 s.allTasks[t.ID] = &newT 516 517 return &newT 518 } 519 newT.Status = api.TaskStatus{ 520 State: api.TaskStateAssigned, 521 Timestamp: ptypes.MustTimestampProto(time.Now()), 522 Message: "scheduler confirmed task can run on preassigned node", 523 } 524 s.allTasks[t.ID] = &newT 525 526 if nodeInfo.addTask(&newT) { 527 s.nodeSet.updateNode(nodeInfo) 528 } 529 return &newT 530 } 531 532 // scheduleTaskGroup schedules a batch of tasks that are part of the same 533 // service and share the same version of the spec. 534 func (s *Scheduler) scheduleTaskGroup(ctx context.Context, taskGroup map[string]*api.Task, schedulingDecisions map[string]schedulingDecision) { 535 // Pick at task at random from taskGroup to use for constraint 536 // evaluation. It doesn't matter which one we pick because all the 537 // tasks in the group are equal in terms of the fields the constraint 538 // filters consider. 539 var t *api.Task 540 for _, t = range taskGroup { 541 break 542 } 543 544 s.pipeline.SetTask(t) 545 546 now := time.Now() 547 548 nodeLess := func(a *NodeInfo, b *NodeInfo) bool { 549 // If either node has at least maxFailures recent failures, 550 // that's the deciding factor. 551 recentFailuresA := a.countRecentFailures(now, t) 552 recentFailuresB := b.countRecentFailures(now, t) 553 554 if recentFailuresA >= maxFailures || recentFailuresB >= maxFailures { 555 if recentFailuresA > recentFailuresB { 556 return false 557 } 558 if recentFailuresB > recentFailuresA { 559 return true 560 } 561 } 562 563 tasksByServiceA := a.ActiveTasksCountByService[t.ServiceID] 564 tasksByServiceB := b.ActiveTasksCountByService[t.ServiceID] 565 566 if tasksByServiceA < tasksByServiceB { 567 return true 568 } 569 if tasksByServiceA > tasksByServiceB { 570 return false 571 } 572 573 // Total number of tasks breaks ties. 574 return a.ActiveTasksCount < b.ActiveTasksCount 575 } 576 577 var prefs []*api.PlacementPreference 578 if t.Spec.Placement != nil { 579 prefs = t.Spec.Placement.Preferences 580 } 581 582 tree := s.nodeSet.tree(t.ServiceID, prefs, len(taskGroup), s.pipeline.Process, nodeLess) 583 584 s.scheduleNTasksOnSubtree(ctx, len(taskGroup), taskGroup, &tree, schedulingDecisions, nodeLess) 585 if len(taskGroup) != 0 { 586 s.noSuitableNode(ctx, taskGroup, schedulingDecisions) 587 } 588 } 589 590 func (s *Scheduler) scheduleNTasksOnSubtree(ctx context.Context, n int, taskGroup map[string]*api.Task, tree *decisionTree, schedulingDecisions map[string]schedulingDecision, nodeLess func(a *NodeInfo, b *NodeInfo) bool) int { 591 if tree.next == nil { 592 nodes := tree.orderedNodes(s.pipeline.Process, nodeLess) 593 if len(nodes) == 0 { 594 return 0 595 } 596 597 return s.scheduleNTasksOnNodes(ctx, n, taskGroup, nodes, schedulingDecisions, nodeLess) 598 } 599 600 // Walk the tree and figure out how the tasks should be split at each 601 // level. 602 tasksScheduled := 0 603 tasksInUsableBranches := tree.tasks 604 var noRoom map[*decisionTree]struct{} 605 606 // Try to make branches even until either all branches are 607 // full, or all tasks have been scheduled. 608 for tasksScheduled != n && len(noRoom) != len(tree.next) { 609 desiredTasksPerBranch := (tasksInUsableBranches + n - tasksScheduled) / (len(tree.next) - len(noRoom)) 610 remainder := (tasksInUsableBranches + n - tasksScheduled) % (len(tree.next) - len(noRoom)) 611 612 for _, subtree := range tree.next { 613 if noRoom != nil { 614 if _, ok := noRoom[subtree]; ok { 615 continue 616 } 617 } 618 subtreeTasks := subtree.tasks 619 if subtreeTasks < desiredTasksPerBranch || (subtreeTasks == desiredTasksPerBranch && remainder > 0) { 620 tasksToAssign := desiredTasksPerBranch - subtreeTasks 621 if remainder > 0 { 622 tasksToAssign++ 623 } 624 res := s.scheduleNTasksOnSubtree(ctx, tasksToAssign, taskGroup, subtree, schedulingDecisions, nodeLess) 625 if res < tasksToAssign { 626 if noRoom == nil { 627 noRoom = make(map[*decisionTree]struct{}) 628 } 629 noRoom[subtree] = struct{}{} 630 tasksInUsableBranches -= subtreeTasks 631 } else if remainder > 0 { 632 remainder-- 633 } 634 tasksScheduled += res 635 } 636 } 637 } 638 639 return tasksScheduled 640 } 641 642 func (s *Scheduler) scheduleNTasksOnNodes(ctx context.Context, n int, taskGroup map[string]*api.Task, nodes []NodeInfo, schedulingDecisions map[string]schedulingDecision, nodeLess func(a *NodeInfo, b *NodeInfo) bool) int { 643 tasksScheduled := 0 644 failedConstraints := make(map[int]bool) // key is index in nodes slice 645 nodeIter := 0 646 nodeCount := len(nodes) 647 for taskID, t := range taskGroup { 648 // Skip tasks which were already scheduled because they ended 649 // up in two groups at once. 650 if _, exists := schedulingDecisions[taskID]; exists { 651 continue 652 } 653 654 node := &nodes[nodeIter%nodeCount] 655 656 log.G(ctx).WithField("task.id", t.ID).Debugf("assigning to node %s", node.ID) 657 newT := *t 658 newT.NodeID = node.ID 659 newT.Status = api.TaskStatus{ 660 State: api.TaskStateAssigned, 661 Timestamp: ptypes.MustTimestampProto(time.Now()), 662 Message: "scheduler assigned task to node", 663 } 664 s.allTasks[t.ID] = &newT 665 666 nodeInfo, err := s.nodeSet.nodeInfo(node.ID) 667 if err == nil && nodeInfo.addTask(&newT) { 668 s.nodeSet.updateNode(nodeInfo) 669 nodes[nodeIter%nodeCount] = nodeInfo 670 } 671 672 schedulingDecisions[taskID] = schedulingDecision{old: t, new: &newT} 673 delete(taskGroup, taskID) 674 tasksScheduled++ 675 if tasksScheduled == n { 676 return tasksScheduled 677 } 678 679 if nodeIter+1 < nodeCount { 680 // First pass fills the nodes until they have the same 681 // number of tasks from this service. 682 nextNode := nodes[(nodeIter+1)%nodeCount] 683 if nodeLess(&nextNode, &nodeInfo) { 684 nodeIter++ 685 } 686 } else { 687 // In later passes, we just assign one task at a time 688 // to each node that still meets the constraints. 689 nodeIter++ 690 } 691 692 origNodeIter := nodeIter 693 for failedConstraints[nodeIter%nodeCount] || !s.pipeline.Process(&nodes[nodeIter%nodeCount]) { 694 failedConstraints[nodeIter%nodeCount] = true 695 nodeIter++ 696 if nodeIter-origNodeIter == nodeCount { 697 // None of the nodes meet the constraints anymore. 698 return tasksScheduled 699 } 700 } 701 } 702 703 return tasksScheduled 704 } 705 706 // noSuitableNode checks unassigned tasks and make sure they have an existing service in the store before 707 // updating the task status and adding it back to: schedulingDecisions, unassignedTasks and allTasks 708 func (s *Scheduler) noSuitableNode(ctx context.Context, taskGroup map[string]*api.Task, schedulingDecisions map[string]schedulingDecision) { 709 explanation := s.pipeline.Explain() 710 for _, t := range taskGroup { 711 var service *api.Service 712 s.store.View(func(tx store.ReadTx) { 713 service = store.GetService(tx, t.ServiceID) 714 }) 715 if service == nil { 716 log.G(ctx).WithField("task.id", t.ID).Debug("removing task from the scheduler") 717 continue 718 } 719 720 log.G(ctx).WithField("task.id", t.ID).Debug("no suitable node available for task") 721 722 newT := *t 723 newT.Status.Timestamp = ptypes.MustTimestampProto(time.Now()) 724 sv := service.SpecVersion 725 tv := newT.SpecVersion 726 if sv != nil && tv != nil && sv.Index > tv.Index { 727 log.G(ctx).WithField("task.id", t.ID).Debug( 728 "task belongs to old revision of service", 729 ) 730 if t.Status.State == api.TaskStatePending && t.DesiredState >= api.TaskStateShutdown { 731 log.G(ctx).WithField("task.id", t.ID).Debug( 732 "task is desired shutdown, scheduler will go ahead and do so", 733 ) 734 newT.Status.State = api.TaskStateShutdown 735 newT.Status.Err = "" 736 } 737 } else { 738 if explanation != "" { 739 newT.Status.Err = "no suitable node (" + explanation + ")" 740 } else { 741 newT.Status.Err = "no suitable node" 742 } 743 744 // re-enqueue a task that should still be attempted 745 s.enqueue(&newT) 746 } 747 748 s.allTasks[t.ID] = &newT 749 schedulingDecisions[t.ID] = schedulingDecision{old: t, new: &newT} 750 } 751 } 752 753 func (s *Scheduler) buildNodeSet(tx store.ReadTx, tasksByNode map[string]map[string]*api.Task) error { 754 nodes, err := store.FindNodes(tx, store.All) 755 if err != nil { 756 return err 757 } 758 759 s.nodeSet.alloc(len(nodes)) 760 761 for _, n := range nodes { 762 var resources api.Resources 763 if n.Description != nil && n.Description.Resources != nil { 764 resources = *n.Description.Resources 765 } 766 s.nodeSet.addOrUpdateNode(newNodeInfo(n, tasksByNode[n.ID], resources)) 767 } 768 769 return nil 770 }