github.com/adityamillind98/nomad@v0.11.8/nomad/plan_apply.go (about) 1 package nomad 2 3 import ( 4 "context" 5 "fmt" 6 "runtime" 7 "time" 8 9 metrics "github.com/armon/go-metrics" 10 log "github.com/hashicorp/go-hclog" 11 memdb "github.com/hashicorp/go-memdb" 12 multierror "github.com/hashicorp/go-multierror" 13 "github.com/hashicorp/nomad/helper/uuid" 14 "github.com/hashicorp/nomad/nomad/state" 15 "github.com/hashicorp/nomad/nomad/structs" 16 "github.com/hashicorp/raft" 17 ) 18 19 // planner is used to manage the submitted allocation plans that are waiting 20 // to be accessed by the leader 21 type planner struct { 22 *Server 23 log log.Logger 24 25 // planQueue is used to manage the submitted allocation 26 // plans that are waiting to be assessed by the leader 27 planQueue *PlanQueue 28 } 29 30 // newPlanner returns a new planner to be used for managing allocation plans. 31 func newPlanner(s *Server) (*planner, error) { 32 // Create a plan queue 33 planQueue, err := NewPlanQueue() 34 if err != nil { 35 return nil, err 36 } 37 38 return &planner{ 39 Server: s, 40 log: s.logger.Named("planner"), 41 planQueue: planQueue, 42 }, nil 43 } 44 45 // planApply is a long lived goroutine that reads plan allocations from 46 // the plan queue, determines if they can be applied safely and applies 47 // them via Raft. 48 // 49 // Naively, we could simply dequeue a plan, verify, apply and then respond. 50 // However, the plan application is bounded by the Raft apply time and 51 // subject to some latency. This creates a stall condition, where we are 52 // not evaluating, but simply waiting for a transaction to apply. 53 // 54 // To avoid this, we overlap verification with apply. This means once 55 // we've verified plan N we attempt to apply it. However, while waiting 56 // for apply, we begin to verify plan N+1 under the assumption that plan 57 // N has succeeded. 58 // 59 // In this sense, we track two parallel versions of the world. One is 60 // the pessimistic one driven by the Raft log which is replicated. The 61 // other is optimistic and assumes our transactions will succeed. In the 62 // happy path, this lets us do productive work during the latency of 63 // apply. 64 // 65 // In the unhappy path (Raft transaction fails), effectively we only 66 // wasted work during a time we would have been waiting anyways. However, 67 // in anticipation of this case we cannot respond to the plan until 68 // the Raft log is updated. This means our schedulers will stall, 69 // but there are many of those and only a single plan verifier. 70 // 71 func (p *planner) planApply() { 72 // planIndexCh is used to track an outstanding application and receive 73 // its committed index while snap holds an optimistic state which 74 // includes that plan application. 75 var planIndexCh chan uint64 76 var snap *state.StateSnapshot 77 78 // prevPlanResultIndex is the index when the last PlanResult was 79 // committed. Since only the last plan is optimistically applied to the 80 // snapshot, it's possible the current snapshot's and plan's indexes 81 // are less than the index the previous plan result was committed at. 82 // prevPlanResultIndex also guards against the previous plan committing 83 // during Dequeue, thus causing the snapshot containing the optimistic 84 // commit to be discarded and potentially evaluating the current plan 85 // against an index older than the previous plan was committed at. 86 var prevPlanResultIndex uint64 87 88 // Setup a worker pool with half the cores, with at least 1 89 poolSize := runtime.NumCPU() / 2 90 if poolSize == 0 { 91 poolSize = 1 92 } 93 pool := NewEvaluatePool(poolSize, workerPoolBufferSize) 94 defer pool.Shutdown() 95 96 for { 97 // Pull the next pending plan, exit if we are no longer leader 98 pending, err := p.planQueue.Dequeue(0) 99 if err != nil { 100 return 101 } 102 103 // If last plan has completed get a new snapshot 104 select { 105 case idx := <-planIndexCh: 106 // Previous plan committed. Discard snapshot and ensure 107 // future snapshots include this plan. idx may be 0 if 108 // plan failed to apply, so use max(prev, idx) 109 prevPlanResultIndex = max(prevPlanResultIndex, idx) 110 planIndexCh = nil 111 snap = nil 112 default: 113 } 114 115 if snap != nil { 116 // If snapshot doesn't contain the previous plan 117 // result's index and the current plan's snapshot it, 118 // discard it and get a new one below. 119 minIndex := max(prevPlanResultIndex, pending.plan.SnapshotIndex) 120 if idx, err := snap.LatestIndex(); err != nil || idx < minIndex { 121 snap = nil 122 } 123 } 124 125 // Snapshot the state so that we have a consistent view of the world 126 // if no snapshot is available. 127 // - planIndexCh will be nil if the previous plan result applied 128 // during Dequeue 129 // - snap will be nil if its index < max(prevIndex, curIndex) 130 if planIndexCh == nil || snap == nil { 131 snap, err = p.snapshotMinIndex(prevPlanResultIndex, pending.plan.SnapshotIndex) 132 if err != nil { 133 p.logger.Error("failed to snapshot state", "error", err) 134 pending.respond(nil, err) 135 continue 136 } 137 } 138 139 // Evaluate the plan 140 result, err := evaluatePlan(pool, snap, pending.plan, p.logger) 141 if err != nil { 142 p.logger.Error("failed to evaluate plan", "error", err) 143 pending.respond(nil, err) 144 continue 145 } 146 147 // Fast-path the response if there is nothing to do 148 if result.IsNoOp() { 149 pending.respond(result, nil) 150 continue 151 } 152 153 // Ensure any parallel apply is complete before starting the next one. 154 // This also limits how out of date our snapshot can be. 155 if planIndexCh != nil { 156 idx := <-planIndexCh 157 prevPlanResultIndex = max(prevPlanResultIndex, idx) 158 snap, err = p.snapshotMinIndex(prevPlanResultIndex, pending.plan.SnapshotIndex) 159 if err != nil { 160 p.logger.Error("failed to update snapshot state", "error", err) 161 pending.respond(nil, err) 162 continue 163 } 164 } 165 166 // Dispatch the Raft transaction for the plan 167 future, err := p.applyPlan(pending.plan, result, snap) 168 if err != nil { 169 p.logger.Error("failed to submit plan", "error", err) 170 pending.respond(nil, err) 171 continue 172 } 173 174 // Respond to the plan in async; receive plan's committed index via chan 175 planIndexCh = make(chan uint64, 1) 176 go p.asyncPlanWait(planIndexCh, future, result, pending) 177 } 178 } 179 180 // snapshotMinIndex wraps SnapshotAfter with a 5s timeout and converts timeout 181 // errors to a more descriptive error message. The snapshot is guaranteed to 182 // include both the previous plan and all objects referenced by the plan or 183 // return an error. 184 func (p *planner) snapshotMinIndex(prevPlanResultIndex, planSnapshotIndex uint64) (*state.StateSnapshot, error) { 185 defer metrics.MeasureSince([]string{"nomad", "plan", "wait_for_index"}, time.Now()) 186 187 // Minimum index the snapshot must include is the max of the previous 188 // plan result's and current plan's snapshot index. 189 minIndex := max(prevPlanResultIndex, planSnapshotIndex) 190 191 const timeout = 5 * time.Second 192 ctx, cancel := context.WithTimeout(context.Background(), timeout) 193 snap, err := p.fsm.State().SnapshotMinIndex(ctx, minIndex) 194 cancel() 195 if err == context.DeadlineExceeded { 196 return nil, fmt.Errorf("timed out after %s waiting for index=%d (previous plan result index=%d; plan snapshot index=%d)", 197 timeout, minIndex, prevPlanResultIndex, planSnapshotIndex) 198 } 199 200 return snap, err 201 } 202 203 // applyPlan is used to apply the plan result and to return the alloc index 204 func (p *planner) applyPlan(plan *structs.Plan, result *structs.PlanResult, snap *state.StateSnapshot) (raft.ApplyFuture, error) { 205 // Setup the update request 206 req := structs.ApplyPlanResultsRequest{ 207 AllocUpdateRequest: structs.AllocUpdateRequest{ 208 Job: plan.Job, 209 }, 210 Deployment: result.Deployment, 211 DeploymentUpdates: result.DeploymentUpdates, 212 EvalID: plan.EvalID, 213 } 214 215 preemptedJobIDs := make(map[structs.NamespacedID]struct{}) 216 now := time.Now().UTC().UnixNano() 217 218 if ServersMeetMinimumVersion(p.Members(), MinVersionPlanNormalization, true) { 219 // Initialize the allocs request using the new optimized log entry format. 220 // Determine the minimum number of updates, could be more if there 221 // are multiple updates per node 222 req.AllocsStopped = make([]*structs.AllocationDiff, 0, len(result.NodeUpdate)) 223 req.AllocsUpdated = make([]*structs.Allocation, 0, len(result.NodeAllocation)) 224 req.AllocsPreempted = make([]*structs.AllocationDiff, 0, len(result.NodePreemptions)) 225 226 for _, updateList := range result.NodeUpdate { 227 for _, stoppedAlloc := range updateList { 228 req.AllocsStopped = append(req.AllocsStopped, normalizeStoppedAlloc(stoppedAlloc, now)) 229 } 230 } 231 232 for _, allocList := range result.NodeAllocation { 233 req.AllocsUpdated = append(req.AllocsUpdated, allocList...) 234 } 235 236 // Set the time the alloc was applied for the first time. This can be used 237 // to approximate the scheduling time. 238 updateAllocTimestamps(req.AllocsUpdated, now) 239 240 for _, preemptions := range result.NodePreemptions { 241 for _, preemptedAlloc := range preemptions { 242 req.AllocsPreempted = append(req.AllocsPreempted, normalizePreemptedAlloc(preemptedAlloc, now)) 243 244 // Gather jobids to create follow up evals 245 appendNamespacedJobID(preemptedJobIDs, preemptedAlloc) 246 } 247 } 248 } else { 249 // COMPAT 0.11: This branch is deprecated and will only be used to support 250 // application of older log entries. Expected to be removed in a future version. 251 252 // Determine the minimum number of updates, could be more if there 253 // are multiple updates per node 254 minUpdates := len(result.NodeUpdate) 255 minUpdates += len(result.NodeAllocation) 256 257 // Initialize using the older log entry format for Alloc and NodePreemptions 258 req.Alloc = make([]*structs.Allocation, 0, minUpdates) 259 req.NodePreemptions = make([]*structs.Allocation, 0, len(result.NodePreemptions)) 260 261 for _, updateList := range result.NodeUpdate { 262 req.Alloc = append(req.Alloc, updateList...) 263 } 264 for _, allocList := range result.NodeAllocation { 265 req.Alloc = append(req.Alloc, allocList...) 266 } 267 268 for _, preemptions := range result.NodePreemptions { 269 req.NodePreemptions = append(req.NodePreemptions, preemptions...) 270 } 271 272 // Set the time the alloc was applied for the first time. This can be used 273 // to approximate the scheduling time. 274 updateAllocTimestamps(req.Alloc, now) 275 276 // Set modify time for preempted allocs if any 277 // Also gather jobids to create follow up evals 278 for _, alloc := range req.NodePreemptions { 279 alloc.ModifyTime = now 280 appendNamespacedJobID(preemptedJobIDs, alloc) 281 } 282 } 283 284 var evals []*structs.Evaluation 285 for preemptedJobID := range preemptedJobIDs { 286 job, _ := p.State().JobByID(nil, preemptedJobID.Namespace, preemptedJobID.ID) 287 if job != nil { 288 eval := &structs.Evaluation{ 289 ID: uuid.Generate(), 290 Namespace: job.Namespace, 291 TriggeredBy: structs.EvalTriggerPreemption, 292 JobID: job.ID, 293 Type: job.Type, 294 Priority: job.Priority, 295 Status: structs.EvalStatusPending, 296 CreateTime: now, 297 ModifyTime: now, 298 } 299 evals = append(evals, eval) 300 } 301 } 302 req.PreemptionEvals = evals 303 304 // Dispatch the Raft transaction 305 future, err := p.raftApplyFuture(structs.ApplyPlanResultsRequestType, &req) 306 if err != nil { 307 return nil, err 308 } 309 310 // Optimistically apply to our state view 311 if snap != nil { 312 nextIdx := p.raft.AppliedIndex() + 1 313 if err := snap.UpsertPlanResults(nextIdx, &req); err != nil { 314 return future, err 315 } 316 } 317 return future, nil 318 } 319 320 // normalizePreemptedAlloc removes redundant fields from a preempted allocation and 321 // returns AllocationDiff. Since a preempted allocation is always an existing allocation, 322 // the struct returned by this method contains only the differential, which can be 323 // applied to an existing allocation, to yield the updated struct 324 func normalizePreemptedAlloc(preemptedAlloc *structs.Allocation, now int64) *structs.AllocationDiff { 325 return &structs.AllocationDiff{ 326 ID: preemptedAlloc.ID, 327 PreemptedByAllocation: preemptedAlloc.PreemptedByAllocation, 328 ModifyTime: now, 329 } 330 } 331 332 // normalizeStoppedAlloc removes redundant fields from a stopped allocation and 333 // returns AllocationDiff. Since a stopped allocation is always an existing allocation, 334 // the struct returned by this method contains only the differential, which can be 335 // applied to an existing allocation, to yield the updated struct 336 func normalizeStoppedAlloc(stoppedAlloc *structs.Allocation, now int64) *structs.AllocationDiff { 337 return &structs.AllocationDiff{ 338 ID: stoppedAlloc.ID, 339 DesiredDescription: stoppedAlloc.DesiredDescription, 340 ClientStatus: stoppedAlloc.ClientStatus, 341 ModifyTime: now, 342 FollowupEvalID: stoppedAlloc.FollowupEvalID, 343 } 344 } 345 346 // appendNamespacedJobID appends the namespaced Job ID for the alloc to the jobIDs set 347 func appendNamespacedJobID(jobIDs map[structs.NamespacedID]struct{}, alloc *structs.Allocation) { 348 id := structs.NamespacedID{Namespace: alloc.Namespace, ID: alloc.JobID} 349 if _, ok := jobIDs[id]; !ok { 350 jobIDs[id] = struct{}{} 351 } 352 } 353 354 // updateAllocTimestamps sets the CreateTime and ModifyTime for the allocations 355 // to the timestamp provided 356 func updateAllocTimestamps(allocations []*structs.Allocation, timestamp int64) { 357 for _, alloc := range allocations { 358 if alloc.CreateTime == 0 { 359 alloc.CreateTime = timestamp 360 } 361 alloc.ModifyTime = timestamp 362 } 363 } 364 365 // asyncPlanWait is used to apply and respond to a plan async. On successful 366 // commit the plan's index will be sent on the chan. On error the chan will be 367 // closed. 368 func (p *planner) asyncPlanWait(indexCh chan<- uint64, future raft.ApplyFuture, 369 result *structs.PlanResult, pending *pendingPlan) { 370 defer metrics.MeasureSince([]string{"nomad", "plan", "apply"}, time.Now()) 371 372 // Wait for the plan to apply 373 if err := future.Error(); err != nil { 374 p.logger.Error("failed to apply plan", "error", err) 375 pending.respond(nil, err) 376 377 // Close indexCh on error 378 close(indexCh) 379 return 380 } 381 382 // Respond to the plan 383 index := future.Index() 384 result.AllocIndex = index 385 386 // If this is a partial plan application, we need to ensure the scheduler 387 // at least has visibility into any placements it made to avoid double placement. 388 // The RefreshIndex computed by evaluatePlan may be stale due to evaluation 389 // against an optimistic copy of the state. 390 if result.RefreshIndex != 0 { 391 result.RefreshIndex = maxUint64(result.RefreshIndex, result.AllocIndex) 392 } 393 pending.respond(result, nil) 394 indexCh <- index 395 } 396 397 // evaluatePlan is used to determine what portions of a plan 398 // can be applied if any. Returns if there should be a plan application 399 // which may be partial or if there was an error 400 func evaluatePlan(pool *EvaluatePool, snap *state.StateSnapshot, plan *structs.Plan, logger log.Logger) (*structs.PlanResult, error) { 401 defer metrics.MeasureSince([]string{"nomad", "plan", "evaluate"}, time.Now()) 402 403 // Denormalize without the job 404 err := snap.DenormalizeAllocationsMap(plan.NodeUpdate) 405 if err != nil { 406 return nil, err 407 } 408 // Denormalize without the job 409 err = snap.DenormalizeAllocationsMap(plan.NodePreemptions) 410 if err != nil { 411 return nil, err 412 } 413 414 // Check if the plan exceeds quota 415 overQuota, err := evaluatePlanQuota(snap, plan) 416 if err != nil { 417 return nil, err 418 } 419 420 // Reject the plan and force the scheduler to refresh 421 if overQuota { 422 index, err := refreshIndex(snap) 423 if err != nil { 424 return nil, err 425 } 426 427 logger.Debug("plan for evaluation exceeds quota limit. Forcing state refresh", "eval_id", plan.EvalID, "refresh_index", index) 428 return &structs.PlanResult{RefreshIndex: index}, nil 429 } 430 431 return evaluatePlanPlacements(pool, snap, plan, logger) 432 } 433 434 // evaluatePlanPlacements is used to determine what portions of a plan can be 435 // applied if any, looking for node over commitment. Returns if there should be 436 // a plan application which may be partial or if there was an error 437 func evaluatePlanPlacements(pool *EvaluatePool, snap *state.StateSnapshot, plan *structs.Plan, logger log.Logger) (*structs.PlanResult, error) { 438 // Create a result holder for the plan 439 result := &structs.PlanResult{ 440 NodeUpdate: make(map[string][]*structs.Allocation), 441 NodeAllocation: make(map[string][]*structs.Allocation), 442 Deployment: plan.Deployment.Copy(), 443 DeploymentUpdates: plan.DeploymentUpdates, 444 NodePreemptions: make(map[string][]*structs.Allocation), 445 } 446 447 // Collect all the nodeIDs 448 nodeIDs := make(map[string]struct{}) 449 nodeIDList := make([]string, 0, len(plan.NodeUpdate)+len(plan.NodeAllocation)) 450 for nodeID := range plan.NodeUpdate { 451 if _, ok := nodeIDs[nodeID]; !ok { 452 nodeIDs[nodeID] = struct{}{} 453 nodeIDList = append(nodeIDList, nodeID) 454 } 455 } 456 for nodeID := range plan.NodeAllocation { 457 if _, ok := nodeIDs[nodeID]; !ok { 458 nodeIDs[nodeID] = struct{}{} 459 nodeIDList = append(nodeIDList, nodeID) 460 } 461 } 462 463 // Setup a multierror to handle potentially getting many 464 // errors since we are processing in parallel. 465 var mErr multierror.Error 466 partialCommit := false 467 468 // handleResult is used to process the result of evaluateNodePlan 469 handleResult := func(nodeID string, fit bool, reason string, err error) (cancel bool) { 470 // Evaluate the plan for this node 471 if err != nil { 472 mErr.Errors = append(mErr.Errors, err) 473 return true 474 } 475 if !fit { 476 // Log the reason why the node's allocations could not be made 477 if reason != "" { 478 logger.Debug("plan for node rejected", "node_id", nodeID, "reason", reason, "eval_id", plan.EvalID) 479 } 480 // Set that this is a partial commit 481 partialCommit = true 482 483 // If we require all-at-once scheduling, there is no point 484 // to continue the evaluation, as we've already failed. 485 if plan.AllAtOnce { 486 result.NodeUpdate = nil 487 result.NodeAllocation = nil 488 result.DeploymentUpdates = nil 489 result.Deployment = nil 490 result.NodePreemptions = nil 491 return true 492 } 493 494 // Skip this node, since it cannot be used. 495 return 496 } 497 498 // Add this to the plan result 499 if nodeUpdate := plan.NodeUpdate[nodeID]; len(nodeUpdate) > 0 { 500 result.NodeUpdate[nodeID] = nodeUpdate 501 } 502 if nodeAlloc := plan.NodeAllocation[nodeID]; len(nodeAlloc) > 0 { 503 result.NodeAllocation[nodeID] = nodeAlloc 504 } 505 506 if nodePreemptions := plan.NodePreemptions[nodeID]; nodePreemptions != nil { 507 508 // Do a pass over preempted allocs in the plan to check 509 // whether the alloc is already in a terminal state 510 var filteredNodePreemptions []*structs.Allocation 511 for _, preemptedAlloc := range nodePreemptions { 512 alloc, err := snap.AllocByID(nil, preemptedAlloc.ID) 513 if err != nil { 514 mErr.Errors = append(mErr.Errors, err) 515 continue 516 } 517 if alloc != nil && !alloc.TerminalStatus() { 518 filteredNodePreemptions = append(filteredNodePreemptions, preemptedAlloc) 519 } 520 } 521 522 result.NodePreemptions[nodeID] = filteredNodePreemptions 523 } 524 525 return 526 } 527 528 // Get the pool channels 529 req := pool.RequestCh() 530 resp := pool.ResultCh() 531 outstanding := 0 532 didCancel := false 533 534 // Evaluate each node in the plan, handling results as they are ready to 535 // avoid blocking. 536 OUTER: 537 for len(nodeIDList) > 0 { 538 nodeID := nodeIDList[0] 539 select { 540 case req <- evaluateRequest{snap, plan, nodeID}: 541 outstanding++ 542 nodeIDList = nodeIDList[1:] 543 case r := <-resp: 544 outstanding-- 545 546 // Handle a result that allows us to cancel evaluation, 547 // which may save time processing additional entries. 548 if cancel := handleResult(r.nodeID, r.fit, r.reason, r.err); cancel { 549 didCancel = true 550 break OUTER 551 } 552 } 553 } 554 555 // Drain the remaining results 556 for outstanding > 0 { 557 r := <-resp 558 if !didCancel { 559 if cancel := handleResult(r.nodeID, r.fit, r.reason, r.err); cancel { 560 didCancel = true 561 } 562 } 563 outstanding-- 564 } 565 566 // If the plan resulted in a partial commit, we need to determine 567 // a minimum refresh index to force the scheduler to work on a more 568 // up-to-date state to avoid the failures. 569 if partialCommit { 570 index, err := refreshIndex(snap) 571 if err != nil { 572 mErr.Errors = append(mErr.Errors, err) 573 } 574 result.RefreshIndex = index 575 576 if result.RefreshIndex == 0 { 577 err := fmt.Errorf("partialCommit with RefreshIndex of 0") 578 mErr.Errors = append(mErr.Errors, err) 579 } 580 581 // If there was a partial commit and we are operating within a 582 // deployment correct for any canary that may have been desired to be 583 // placed but wasn't actually placed 584 correctDeploymentCanaries(result) 585 } 586 return result, mErr.ErrorOrNil() 587 } 588 589 // correctDeploymentCanaries ensures that the deployment object doesn't list any 590 // canaries as placed if they didn't actually get placed. This could happen if 591 // the plan had a partial commit. 592 func correctDeploymentCanaries(result *structs.PlanResult) { 593 // Hot path 594 if result.Deployment == nil || !result.Deployment.HasPlacedCanaries() { 595 return 596 } 597 598 // Build a set of all the allocations IDs that were placed 599 placedAllocs := make(map[string]struct{}, len(result.NodeAllocation)) 600 for _, placed := range result.NodeAllocation { 601 for _, alloc := range placed { 602 placedAllocs[alloc.ID] = struct{}{} 603 } 604 } 605 606 // Go through all the canaries and ensure that the result list only contains 607 // those that have been placed 608 for _, group := range result.Deployment.TaskGroups { 609 canaries := group.PlacedCanaries 610 if len(canaries) == 0 { 611 continue 612 } 613 614 // Prune the canaries in place to avoid allocating an extra slice 615 i := 0 616 for _, canaryID := range canaries { 617 if _, ok := placedAllocs[canaryID]; ok { 618 canaries[i] = canaryID 619 i++ 620 } 621 } 622 623 group.PlacedCanaries = canaries[:i] 624 } 625 } 626 627 // evaluateNodePlan is used to evaluate the plan for a single node, 628 // returning if the plan is valid or if an error is encountered 629 func evaluateNodePlan(snap *state.StateSnapshot, plan *structs.Plan, nodeID string) (bool, string, error) { 630 // If this is an evict-only plan, it always 'fits' since we are removing things. 631 if len(plan.NodeAllocation[nodeID]) == 0 { 632 return true, "", nil 633 } 634 635 // Get the node itself 636 ws := memdb.NewWatchSet() 637 node, err := snap.NodeByID(ws, nodeID) 638 if err != nil { 639 return false, "", fmt.Errorf("failed to get node '%s': %v", nodeID, err) 640 } 641 642 // If the node does not exist or is not ready for scheduling it is not fit 643 // XXX: There is a potential race between when we do this check and when 644 // the Raft commit happens. 645 if node == nil { 646 return false, "node does not exist", nil 647 } else if node.Status != structs.NodeStatusReady { 648 return false, "node is not ready for placements", nil 649 } else if node.SchedulingEligibility == structs.NodeSchedulingIneligible { 650 return false, "node is not eligible for draining", nil 651 } else if node.Drain { 652 // Deprecate in favor of scheduling eligibility and remove post-0.8 653 return false, "node is draining", nil 654 } 655 656 // Get the existing allocations that are non-terminal 657 existingAlloc, err := snap.AllocsByNodeTerminal(ws, nodeID, false) 658 if err != nil { 659 return false, "", fmt.Errorf("failed to get existing allocations for '%s': %v", nodeID, err) 660 } 661 662 // Determine the proposed allocation by first removing allocations 663 // that are planned evictions and adding the new allocations. 664 var remove []*structs.Allocation 665 if update := plan.NodeUpdate[nodeID]; len(update) > 0 { 666 remove = append(remove, update...) 667 } 668 669 // Remove any preempted allocs 670 if preempted := plan.NodePreemptions[nodeID]; len(preempted) > 0 { 671 remove = append(remove, preempted...) 672 } 673 674 if updated := plan.NodeAllocation[nodeID]; len(updated) > 0 { 675 remove = append(remove, updated...) 676 } 677 proposed := structs.RemoveAllocs(existingAlloc, remove) 678 proposed = append(proposed, plan.NodeAllocation[nodeID]...) 679 680 // Check if these allocations fit 681 fit, reason, _, err := structs.AllocsFit(node, proposed, nil, true) 682 return fit, reason, err 683 } 684 685 func max(a, b uint64) uint64 { 686 if a > b { 687 return a 688 } 689 return b 690 }