github.com/kardianos/nomad@v0.1.3-0.20151022182107-b13df73ee850/scheduler/generic_sched.go (about) 1 package scheduler 2 3 import ( 4 "fmt" 5 "log" 6 7 "github.com/hashicorp/nomad/nomad/structs" 8 ) 9 10 const ( 11 // maxServiceScheduleAttempts is used to limit the number of times 12 // we will attempt to schedule if we continue to hit conflicts for services. 13 maxServiceScheduleAttempts = 5 14 15 // maxBatchScheduleAttempts is used to limit the number of times 16 // we will attempt to schedule if we continue to hit conflicts for batch. 17 maxBatchScheduleAttempts = 2 18 19 // allocNotNeeded is the status used when a job no longer requires an allocation 20 allocNotNeeded = "alloc not needed due to job update" 21 22 // allocMigrating is the status used when we must migrate an allocation 23 allocMigrating = "alloc is being migrated" 24 25 // allocUpdating is the status used when a job requires an update 26 allocUpdating = "alloc is being updated due to job update" 27 28 // allocInPlace is the status used when speculating on an in-place update 29 allocInPlace = "alloc updating in-place" 30 ) 31 32 // SetStatusError is used to set the status of the evaluation to the given error 33 type SetStatusError struct { 34 Err error 35 EvalStatus string 36 } 37 38 func (s *SetStatusError) Error() string { 39 return s.Err.Error() 40 } 41 42 // GenericScheduler is used for 'service' and 'batch' type jobs. This scheduler is 43 // designed for long-lived services, and as such spends more time attemping 44 // to make a high quality placement. This is the primary scheduler for 45 // most workloads. It also supports a 'batch' mode to optimize for fast decision 46 // making at the cost of quality. 47 type GenericScheduler struct { 48 logger *log.Logger 49 state State 50 planner Planner 51 batch bool 52 53 eval *structs.Evaluation 54 job *structs.Job 55 plan *structs.Plan 56 ctx *EvalContext 57 stack *GenericStack 58 59 limitReached bool 60 nextEval *structs.Evaluation 61 } 62 63 // NewServiceScheduler is a factory function to instantiate a new service scheduler 64 func NewServiceScheduler(logger *log.Logger, state State, planner Planner) Scheduler { 65 s := &GenericScheduler{ 66 logger: logger, 67 state: state, 68 planner: planner, 69 batch: false, 70 } 71 return s 72 } 73 74 // NewBatchScheduler is a factory function to instantiate a new batch scheduler 75 func NewBatchScheduler(logger *log.Logger, state State, planner Planner) Scheduler { 76 s := &GenericScheduler{ 77 logger: logger, 78 state: state, 79 planner: planner, 80 batch: true, 81 } 82 return s 83 } 84 85 // Process is used to handle a single evaluation 86 func (s *GenericScheduler) Process(eval *structs.Evaluation) error { 87 // Store the evaluation 88 s.eval = eval 89 90 // Verify the evaluation trigger reason is understood 91 switch eval.TriggeredBy { 92 case structs.EvalTriggerJobRegister, structs.EvalTriggerNodeUpdate, 93 structs.EvalTriggerJobDeregister, structs.EvalTriggerRollingUpdate: 94 default: 95 desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason", 96 eval.TriggeredBy) 97 return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusFailed, desc) 98 } 99 100 // Retry up to the maxScheduleAttempts 101 limit := maxServiceScheduleAttempts 102 if s.batch { 103 limit = maxBatchScheduleAttempts 104 } 105 if err := retryMax(limit, s.process); err != nil { 106 if statusErr, ok := err.(*SetStatusError); ok { 107 return setStatus(s.logger, s.planner, s.eval, s.nextEval, statusErr.EvalStatus, err.Error()) 108 } 109 return err 110 } 111 112 // Update the status to complete 113 return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusComplete, "") 114 } 115 116 // process is wrapped in retryMax to iteratively run the handler until we have no 117 // further work or we've made the maximum number of attempts. 118 func (s *GenericScheduler) process() (bool, error) { 119 // Lookup the Job by ID 120 var err error 121 s.job, err = s.state.JobByID(s.eval.JobID) 122 if err != nil { 123 return false, fmt.Errorf("failed to get job '%s': %v", 124 s.eval.JobID, err) 125 } 126 127 // Create a plan 128 s.plan = s.eval.MakePlan(s.job) 129 130 // Create an evaluation context 131 s.ctx = NewEvalContext(s.state, s.plan, s.logger) 132 133 // Construct the placement stack 134 s.stack = NewGenericStack(s.batch, s.ctx) 135 if s.job != nil { 136 s.stack.SetJob(s.job) 137 } 138 139 // Compute the target job allocations 140 if err := s.computeJobAllocs(); err != nil { 141 s.logger.Printf("[ERR] sched: %#v: %v", s.eval, err) 142 return false, err 143 } 144 145 // If the plan is a no-op, we can bail 146 if s.plan.IsNoOp() { 147 return true, nil 148 } 149 150 // If the limit of placements was reached we need to create an evaluation 151 // to pickup from here after the stagger period. 152 if s.limitReached && s.nextEval == nil { 153 s.nextEval = s.eval.NextRollingEval(s.job.Update.Stagger) 154 if err := s.planner.CreateEval(s.nextEval); err != nil { 155 s.logger.Printf("[ERR] sched: %#v failed to make next eval for rolling update: %v", s.eval, err) 156 return false, err 157 } 158 s.logger.Printf("[DEBUG] sched: %#v: rolling update limit reached, next eval '%s' created", s.eval, s.nextEval.ID) 159 } 160 161 // Submit the plan 162 result, newState, err := s.planner.SubmitPlan(s.plan) 163 if err != nil { 164 return false, err 165 } 166 167 // If we got a state refresh, try again since we have stale data 168 if newState != nil { 169 s.logger.Printf("[DEBUG] sched: %#v: refresh forced", s.eval) 170 s.state = newState 171 return false, nil 172 } 173 174 // Try again if the plan was not fully committed, potential conflict 175 fullCommit, expected, actual := result.FullCommit(s.plan) 176 if !fullCommit { 177 s.logger.Printf("[DEBUG] sched: %#v: attempted %d placements, %d placed", 178 s.eval, expected, actual) 179 return false, nil 180 } 181 182 // Success! 183 return true, nil 184 } 185 186 // computeJobAllocs is used to reconcile differences between the job, 187 // existing allocations and node status to update the allocations. 188 func (s *GenericScheduler) computeJobAllocs() error { 189 // Materialize all the task groups, job could be missing if deregistered 190 var groups map[string]*structs.TaskGroup 191 if s.job != nil { 192 groups = materializeTaskGroups(s.job) 193 } 194 195 // Lookup the allocations by JobID 196 allocs, err := s.state.AllocsByJob(s.eval.JobID) 197 if err != nil { 198 return fmt.Errorf("failed to get allocs for job '%s': %v", 199 s.eval.JobID, err) 200 } 201 202 // Filter out the allocations in a terminal state 203 allocs = structs.FilterTerminalAllocs(allocs) 204 205 // Determine the tainted nodes containing job allocs 206 tainted, err := taintedNodes(s.state, allocs) 207 if err != nil { 208 return fmt.Errorf("failed to get tainted nodes for job '%s': %v", 209 s.eval.JobID, err) 210 } 211 212 // Diff the required and existing allocations 213 diff := diffAllocs(s.job, tainted, groups, allocs) 214 s.logger.Printf("[DEBUG] sched: %#v: %#v", s.eval, diff) 215 216 // Add all the allocs to stop 217 for _, e := range diff.stop { 218 s.plan.AppendUpdate(e.Alloc, structs.AllocDesiredStatusStop, allocNotNeeded) 219 } 220 221 // Attempt to do the upgrades in place 222 diff.update = inplaceUpdate(s.ctx, s.eval, s.job, s.stack, diff.update) 223 224 // Check if a rolling upgrade strategy is being used 225 limit := len(diff.update) + len(diff.migrate) 226 if s.job != nil && s.job.Update.Rolling() { 227 limit = s.job.Update.MaxParallel 228 } 229 230 // Treat migrations as an eviction and a new placement. 231 s.limitReached = evictAndPlace(s.ctx, diff, diff.migrate, allocMigrating, &limit) 232 233 // Treat non in-place updates as an eviction and new placement. 234 s.limitReached = evictAndPlace(s.ctx, diff, diff.update, allocUpdating, &limit) 235 236 // Nothing remaining to do if placement is not required 237 if len(diff.place) == 0 { 238 return nil 239 } 240 241 // Compute the placements 242 return s.computePlacements(diff.place) 243 } 244 245 // computePlacements computes placements for allocations 246 func (s *GenericScheduler) computePlacements(place []allocTuple) error { 247 // Get the base nodes 248 nodes, err := readyNodesInDCs(s.state, s.job.Datacenters) 249 if err != nil { 250 return err 251 } 252 253 // Update the set of placement ndoes 254 s.stack.SetNodes(nodes) 255 256 // Track the failed task groups so that we can coalesce 257 // the failures together to avoid creating many failed allocs. 258 failedTG := make(map[*structs.TaskGroup]*structs.Allocation) 259 260 for _, missing := range place { 261 // Check if this task group has already failed 262 if alloc, ok := failedTG[missing.TaskGroup]; ok { 263 alloc.Metrics.CoalescedFailures += 1 264 continue 265 } 266 267 // Attempt to match the task group 268 option, size := s.stack.Select(missing.TaskGroup) 269 270 // Create an allocation for this 271 alloc := &structs.Allocation{ 272 ID: structs.GenerateUUID(), 273 EvalID: s.eval.ID, 274 Name: missing.Name, 275 JobID: s.job.ID, 276 Job: s.job, 277 TaskGroup: missing.TaskGroup.Name, 278 Resources: size, 279 Metrics: s.ctx.Metrics(), 280 } 281 282 // Set fields based on if we found an allocation option 283 if option != nil { 284 alloc.NodeID = option.Node.ID 285 alloc.TaskResources = option.TaskResources 286 alloc.DesiredStatus = structs.AllocDesiredStatusRun 287 alloc.ClientStatus = structs.AllocClientStatusPending 288 s.plan.AppendAlloc(alloc) 289 } else { 290 alloc.DesiredStatus = structs.AllocDesiredStatusFailed 291 alloc.DesiredDescription = "failed to find a node for placement" 292 alloc.ClientStatus = structs.AllocClientStatusFailed 293 s.plan.AppendFailed(alloc) 294 failedTG[missing.TaskGroup] = alloc 295 } 296 } 297 return nil 298 }