github.com/hspak/nomad@v0.7.2-0.20180309000617-bc4ae22a39a5/scheduler/reconcile_util.go (about) 1 package scheduler 2 3 import ( 4 "fmt" 5 "sort" 6 "strings" 7 8 "time" 9 10 "github.com/hashicorp/nomad/nomad/structs" 11 ) 12 13 // placementResult is an allocation that must be placed. It potentially has a 14 // previous allocation attached to it that should be stopped only if the 15 // paired placement is complete. This gives an atomic place/stop behavior to 16 // prevent an impossible resource ask as part of a rolling update to wipe the 17 // job out. 18 type placementResult interface { 19 // TaskGroup returns the task group the placement is for 20 TaskGroup() *structs.TaskGroup 21 22 // Name returns the name of the desired allocation 23 Name() string 24 25 // Canary returns whether the placement should be a canary 26 Canary() bool 27 28 // PreviousAllocation returns the previous allocation 29 PreviousAllocation() *structs.Allocation 30 31 // IsRescheduling returns whether the placement was rescheduling a failed allocation 32 IsRescheduling() bool 33 34 // StopPreviousAlloc returns whether the previous allocation should be 35 // stopped and if so the status description. 36 StopPreviousAlloc() (bool, string) 37 } 38 39 // allocStopResult contains the information required to stop a single allocation 40 type allocStopResult struct { 41 alloc *structs.Allocation 42 clientStatus string 43 statusDescription string 44 } 45 46 // allocPlaceResult contains the information required to place a single 47 // allocation 48 type allocPlaceResult struct { 49 name string 50 canary bool 51 taskGroup *structs.TaskGroup 52 previousAlloc *structs.Allocation 53 reschedule bool 54 } 55 56 func (a allocPlaceResult) TaskGroup() *structs.TaskGroup { return a.taskGroup } 57 func (a allocPlaceResult) Name() string { return a.name } 58 func (a allocPlaceResult) Canary() bool { return a.canary } 59 func (a allocPlaceResult) PreviousAllocation() *structs.Allocation { return a.previousAlloc } 60 func (a allocPlaceResult) IsRescheduling() bool { return a.reschedule } 61 func (a allocPlaceResult) StopPreviousAlloc() (bool, string) { return false, "" } 62 63 // allocDestructiveResult contains the information required to do a destructive 64 // update. Destructive changes should be applied atomically, as in the old alloc 65 // is only stopped if the new one can be placed. 66 type allocDestructiveResult struct { 67 placeName string 68 placeTaskGroup *structs.TaskGroup 69 stopAlloc *structs.Allocation 70 stopStatusDescription string 71 } 72 73 func (a allocDestructiveResult) TaskGroup() *structs.TaskGroup { return a.placeTaskGroup } 74 func (a allocDestructiveResult) Name() string { return a.placeName } 75 func (a allocDestructiveResult) Canary() bool { return false } 76 func (a allocDestructiveResult) PreviousAllocation() *structs.Allocation { return a.stopAlloc } 77 func (a allocDestructiveResult) IsRescheduling() bool { return false } 78 func (a allocDestructiveResult) StopPreviousAlloc() (bool, string) { 79 return true, a.stopStatusDescription 80 } 81 82 // allocMatrix is a mapping of task groups to their allocation set. 83 type allocMatrix map[string]allocSet 84 85 // newAllocMatrix takes a job and the existing allocations for the job and 86 // creates an allocMatrix 87 func newAllocMatrix(job *structs.Job, allocs []*structs.Allocation) allocMatrix { 88 m := allocMatrix(make(map[string]allocSet)) 89 for _, a := range allocs { 90 s, ok := m[a.TaskGroup] 91 if !ok { 92 s = make(map[string]*structs.Allocation) 93 m[a.TaskGroup] = s 94 } 95 s[a.ID] = a 96 } 97 98 if job != nil { 99 for _, tg := range job.TaskGroups { 100 if _, ok := m[tg.Name]; !ok { 101 m[tg.Name] = make(map[string]*structs.Allocation) 102 } 103 } 104 } 105 return m 106 } 107 108 // allocSet is a set of allocations with a series of helper functions defined 109 // that help reconcile state. 110 type allocSet map[string]*structs.Allocation 111 112 // GoString provides a human readable view of the set 113 func (a allocSet) GoString() string { 114 if len(a) == 0 { 115 return "[]" 116 } 117 118 start := fmt.Sprintf("len(%d) [\n", len(a)) 119 var s []string 120 for k, v := range a { 121 s = append(s, fmt.Sprintf("%q: %v", k, v.Name)) 122 } 123 return start + strings.Join(s, "\n") + "]" 124 } 125 126 // nameSet returns the set of allocation names 127 func (a allocSet) nameSet() map[string]struct{} { 128 names := make(map[string]struct{}, len(a)) 129 for _, alloc := range a { 130 names[alloc.Name] = struct{}{} 131 } 132 return names 133 } 134 135 // nameOrder returns the set of allocation names in sorted order 136 func (a allocSet) nameOrder() []*structs.Allocation { 137 allocs := make([]*structs.Allocation, 0, len(a)) 138 for _, alloc := range a { 139 allocs = append(allocs, alloc) 140 } 141 sort.Slice(allocs, func(i, j int) bool { 142 return allocs[i].Index() < allocs[j].Index() 143 }) 144 return allocs 145 } 146 147 // difference returns a new allocSet that has all the existing item except those 148 // contained within the other allocation sets 149 func (a allocSet) difference(others ...allocSet) allocSet { 150 diff := make(map[string]*structs.Allocation) 151 OUTER: 152 for k, v := range a { 153 for _, other := range others { 154 if _, ok := other[k]; ok { 155 continue OUTER 156 } 157 } 158 diff[k] = v 159 } 160 return diff 161 } 162 163 // union returns a new allocSet that has the union of the two allocSets. 164 // Conflicts prefer the last passed allocSet containing the value 165 func (a allocSet) union(others ...allocSet) allocSet { 166 union := make(map[string]*structs.Allocation, len(a)) 167 order := []allocSet{a} 168 order = append(order, others...) 169 170 for _, set := range order { 171 for k, v := range set { 172 union[k] = v 173 } 174 } 175 176 return union 177 } 178 179 // fromKeys returns an alloc set matching the passed keys 180 func (a allocSet) fromKeys(keys ...[]string) allocSet { 181 from := make(map[string]*structs.Allocation) 182 for _, set := range keys { 183 for _, k := range set { 184 if alloc, ok := a[k]; ok { 185 from[k] = alloc 186 } 187 } 188 } 189 return from 190 } 191 192 // filterByTainted takes a set of tainted nodes and filters the allocation set 193 // into three groups: 194 // 1. Those that exist on untainted nodes 195 // 2. Those exist on nodes that are draining 196 // 3. Those that exist on lost nodes 197 func (a allocSet) filterByTainted(nodes map[string]*structs.Node) (untainted, migrate, lost allocSet) { 198 untainted = make(map[string]*structs.Allocation) 199 migrate = make(map[string]*structs.Allocation) 200 lost = make(map[string]*structs.Allocation) 201 for _, alloc := range a { 202 n, ok := nodes[alloc.NodeID] 203 if !ok { 204 untainted[alloc.ID] = alloc 205 continue 206 } 207 208 // If the job is batch and finished successfully, the fact that the 209 // node is tainted does not mean it should be migrated or marked as 210 // lost as the work was already successfully finished. However for 211 // service/system jobs, tasks should never complete. The check of 212 // batch type, defends against client bugs. 213 if alloc.Job.Type == structs.JobTypeBatch && alloc.RanSuccessfully() { 214 untainted[alloc.ID] = alloc 215 continue 216 } 217 if !alloc.TerminalStatus() { 218 if n == nil || n.TerminalStatus() { 219 lost[alloc.ID] = alloc 220 } else { 221 migrate[alloc.ID] = alloc 222 } 223 } else { 224 untainted[alloc.ID] = alloc 225 } 226 } 227 return 228 } 229 230 // filterByRescheduleable filters the allocation set to return the set of allocations that are either 231 // terminal or running, and a set of allocations that must be rescheduled 232 func (a allocSet) filterByRescheduleable(isBatch bool, reschedulePolicy *structs.ReschedulePolicy) (untainted, reschedule allocSet) { 233 untainted = make(map[string]*structs.Allocation) 234 reschedule = make(map[string]*structs.Allocation) 235 236 now := time.Now() 237 for _, alloc := range a { 238 if isBatch { 239 // Allocs from batch jobs should be filtered when the desired status 240 // is terminal and the client did not finish or when the client 241 // status is failed so that they will be replaced. If they are 242 // complete but not failed, they shouldn't be replaced. 243 switch alloc.DesiredStatus { 244 case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict: 245 if alloc.RanSuccessfully() { 246 untainted[alloc.ID] = alloc 247 } 248 continue 249 default: 250 } 251 if alloc.NextAllocation == "" { 252 if alloc.ShouldReschedule(reschedulePolicy, now) { 253 reschedule[alloc.ID] = alloc 254 } else { 255 untainted[alloc.ID] = alloc 256 } 257 } 258 } else { 259 //ignore allocs that have already been rescheduled 260 if alloc.NextAllocation == "" { 261 // ignore allocs whose desired state is stop/evict 262 // everything else is either rescheduleable or untainted 263 if alloc.ShouldReschedule(reschedulePolicy, now) { 264 reschedule[alloc.ID] = alloc 265 } else if alloc.DesiredStatus != structs.AllocDesiredStatusStop && alloc.DesiredStatus != structs.AllocDesiredStatusEvict { 266 untainted[alloc.ID] = alloc 267 } 268 } 269 } 270 } 271 272 return 273 } 274 275 // filterByTerminal filters out terminal allocs 276 func filterByTerminal(untainted allocSet) (nonTerminal allocSet) { 277 nonTerminal = make(map[string]*structs.Allocation) 278 for id, alloc := range untainted { 279 if !alloc.TerminalStatus() { 280 nonTerminal[id] = alloc 281 } 282 } 283 return 284 } 285 286 // filterByDeployment filters allocations into two sets, those that match the 287 // given deployment ID and those that don't 288 func (a allocSet) filterByDeployment(id string) (match, nonmatch allocSet) { 289 match = make(map[string]*structs.Allocation) 290 nonmatch = make(map[string]*structs.Allocation) 291 for _, alloc := range a { 292 if alloc.DeploymentID == id { 293 match[alloc.ID] = alloc 294 } else { 295 nonmatch[alloc.ID] = alloc 296 } 297 } 298 return 299 } 300 301 // allocNameIndex is used to select allocation names for placement or removal 302 // given an existing set of placed allocations. 303 type allocNameIndex struct { 304 job, taskGroup string 305 count int 306 b structs.Bitmap 307 } 308 309 // newAllocNameIndex returns an allocNameIndex for use in selecting names of 310 // allocations to create or stop. It takes the job and task group name, desired 311 // count and any existing allocations as input. 312 func newAllocNameIndex(job, taskGroup string, count int, in allocSet) *allocNameIndex { 313 return &allocNameIndex{ 314 count: count, 315 b: bitmapFrom(in, uint(count)), 316 job: job, 317 taskGroup: taskGroup, 318 } 319 } 320 321 // bitmapFrom creates a bitmap from the given allocation set and a minimum size 322 // maybe given. The size of the bitmap is as the larger of the passed minimum 323 // and the maximum alloc index of the passed input (byte aligned). 324 func bitmapFrom(input allocSet, minSize uint) structs.Bitmap { 325 var max uint 326 for _, a := range input { 327 if num := a.Index(); num > max { 328 max = num 329 } 330 } 331 332 if l := uint(len(input)); minSize < l { 333 minSize = l 334 } 335 336 if max < minSize { 337 max = minSize 338 } else if max%8 == 0 { 339 // This may be possible if the job was scaled down. We want to make sure 340 // that the max index is not byte-aligned otherwise we will overflow 341 // the bitmap. 342 max++ 343 } 344 345 if max == 0 { 346 max = 8 347 } 348 349 // byteAlign the count 350 if remainder := max % 8; remainder != 0 { 351 max = max + 8 - remainder 352 } 353 354 bitmap, err := structs.NewBitmap(max) 355 if err != nil { 356 panic(err) 357 } 358 359 for _, a := range input { 360 bitmap.Set(a.Index()) 361 } 362 363 return bitmap 364 } 365 366 // RemoveHighest removes and returns the highest n used names. The returned set 367 // can be less than n if there aren't n names set in the index 368 func (a *allocNameIndex) Highest(n uint) map[string]struct{} { 369 h := make(map[string]struct{}, n) 370 for i := a.b.Size(); i > uint(0) && uint(len(h)) < n; i-- { 371 // Use this to avoid wrapping around b/c of the unsigned int 372 idx := i - 1 373 if a.b.Check(idx) { 374 a.b.Unset(idx) 375 h[structs.AllocName(a.job, a.taskGroup, idx)] = struct{}{} 376 } 377 } 378 379 return h 380 } 381 382 // Set sets the indexes from the passed alloc set as used 383 func (a *allocNameIndex) Set(set allocSet) { 384 for _, alloc := range set { 385 a.b.Set(alloc.Index()) 386 } 387 } 388 389 // Unset unsets all indexes of the passed alloc set as being used 390 func (a *allocNameIndex) Unset(as allocSet) { 391 for _, alloc := range as { 392 a.b.Unset(alloc.Index()) 393 } 394 } 395 396 // UnsetIndex unsets the index as having its name used 397 func (a *allocNameIndex) UnsetIndex(idx uint) { 398 a.b.Unset(idx) 399 } 400 401 // NextCanaries returns the next n names for use as canaries and sets them as 402 // used. The existing canaries and destructive updates are also passed in. 403 func (a *allocNameIndex) NextCanaries(n uint, existing, destructive allocSet) []string { 404 next := make([]string, 0, n) 405 406 // Create a name index 407 existingNames := existing.nameSet() 408 409 // First select indexes from the allocations that are undergoing destructive 410 // updates. This way we avoid duplicate names as they will get replaced. 411 dmap := bitmapFrom(destructive, uint(a.count)) 412 var remainder uint 413 for _, idx := range dmap.IndexesInRange(true, uint(0), uint(a.count)-1) { 414 name := structs.AllocName(a.job, a.taskGroup, uint(idx)) 415 if _, used := existingNames[name]; !used { 416 next = append(next, name) 417 a.b.Set(uint(idx)) 418 419 // If we have enough, return 420 remainder := n - uint(len(next)) 421 if remainder == 0 { 422 return next 423 } 424 } 425 } 426 427 // Get the set of unset names that can be used 428 for _, idx := range a.b.IndexesInRange(false, uint(0), uint(a.count)-1) { 429 name := structs.AllocName(a.job, a.taskGroup, uint(idx)) 430 if _, used := existingNames[name]; !used { 431 next = append(next, name) 432 a.b.Set(uint(idx)) 433 434 // If we have enough, return 435 remainder = n - uint(len(next)) 436 if remainder == 0 { 437 return next 438 } 439 } 440 } 441 442 // We have exhausted the preferred and free set, now just pick overlapping 443 // indexes 444 var i uint 445 for i = 0; i < remainder; i++ { 446 name := structs.AllocName(a.job, a.taskGroup, i) 447 if _, used := existingNames[name]; !used { 448 next = append(next, name) 449 a.b.Set(i) 450 451 // If we have enough, return 452 remainder = n - uint(len(next)) 453 if remainder == 0 { 454 return next 455 } 456 } 457 } 458 459 return next 460 } 461 462 // Next returns the next n names for use as new placements and sets them as 463 // used. 464 func (a *allocNameIndex) Next(n uint) []string { 465 next := make([]string, 0, n) 466 467 // Get the set of unset names that can be used 468 remainder := n 469 for _, idx := range a.b.IndexesInRange(false, uint(0), uint(a.count)-1) { 470 next = append(next, structs.AllocName(a.job, a.taskGroup, uint(idx))) 471 a.b.Set(uint(idx)) 472 473 // If we have enough, return 474 remainder = n - uint(len(next)) 475 if remainder == 0 { 476 return next 477 } 478 } 479 480 // We have exhausted the free set, now just pick overlapping indexes 481 var i uint 482 for i = 0; i < remainder; i++ { 483 next = append(next, structs.AllocName(a.job, a.taskGroup, i)) 484 a.b.Set(i) 485 } 486 487 return next 488 }