github.com/diptanu/nomad@v0.5.7-0.20170516172507-d72e86cbe3d9/nomad/blocked_evals.go (about) 1 package nomad 2 3 import ( 4 "sync" 5 "time" 6 7 "github.com/armon/go-metrics" 8 "github.com/hashicorp/consul/lib" 9 "github.com/hashicorp/nomad/nomad/structs" 10 ) 11 12 const ( 13 // unblockBuffer is the buffer size for the unblock channel. The buffer 14 // should be large to ensure that the FSM doesn't block when calling Unblock 15 // as this would apply back-pressure on Raft. 16 unblockBuffer = 8096 17 ) 18 19 // BlockedEvals is used to track evaluations that shouldn't be queued until a 20 // certain class of nodes becomes available. An evaluation is put into the 21 // blocked state when it is run through the scheduler and produced failed 22 // allocations. It is unblocked when the capacity of a node that could run the 23 // failed allocation becomes available. 24 type BlockedEvals struct { 25 evalBroker *EvalBroker 26 enabled bool 27 stats *BlockedStats 28 l sync.RWMutex 29 30 // captured is the set of evaluations that are captured by computed node 31 // classes. 32 captured map[string]wrappedEval 33 34 // escaped is the set of evaluations that have escaped computed node 35 // classes. 36 escaped map[string]wrappedEval 37 38 // unblockCh is used to buffer unblocking of evaluations. 39 capacityChangeCh chan *capacityUpdate 40 41 // jobs is the map of blocked job and is used to ensure that only one 42 // blocked eval exists for each job. The value is the blocked evaluation ID. 43 jobs map[string]string 44 45 // unblockIndexes maps computed node classes to the index in which they were 46 // unblocked. This is used to check if an evaluation could have been 47 // unblocked between the time they were in the scheduler and the time they 48 // are being blocked. 49 unblockIndexes map[string]uint64 50 51 // duplicates is the set of evaluations for jobs that had pre-existing 52 // blocked evaluations. These should be marked as cancelled since only one 53 // blocked eval is neeeded per job. 54 duplicates []*structs.Evaluation 55 56 // duplicateCh is used to signal that a duplicate eval was added to the 57 // duplicate set. It can be used to unblock waiting callers looking for 58 // duplicates. 59 duplicateCh chan struct{} 60 61 // stopCh is used to stop any created goroutines. 62 stopCh chan struct{} 63 } 64 65 // capacityUpdate stores unblock data. 66 type capacityUpdate struct { 67 computedClass string 68 index uint64 69 } 70 71 // wrappedEval captures both the evaluation and the optional token 72 type wrappedEval struct { 73 eval *structs.Evaluation 74 token string 75 } 76 77 // BlockedStats returns all the stats about the blocked eval tracker. 78 type BlockedStats struct { 79 // TotalEscaped is the total number of blocked evaluations that have escaped 80 // computed node classes. 81 TotalEscaped int 82 83 // TotalBlocked is the total number of blocked evaluations. 84 TotalBlocked int 85 } 86 87 // NewBlockedEvals creates a new blocked eval tracker that will enqueue 88 // unblocked evals into the passed broker. 89 func NewBlockedEvals(evalBroker *EvalBroker) *BlockedEvals { 90 return &BlockedEvals{ 91 evalBroker: evalBroker, 92 captured: make(map[string]wrappedEval), 93 escaped: make(map[string]wrappedEval), 94 jobs: make(map[string]string), 95 unblockIndexes: make(map[string]uint64), 96 capacityChangeCh: make(chan *capacityUpdate, unblockBuffer), 97 duplicateCh: make(chan struct{}, 1), 98 stopCh: make(chan struct{}), 99 stats: new(BlockedStats), 100 } 101 } 102 103 // Enabled is used to check if the broker is enabled. 104 func (b *BlockedEvals) Enabled() bool { 105 b.l.RLock() 106 defer b.l.RUnlock() 107 return b.enabled 108 } 109 110 // SetEnabled is used to control if the blocked eval tracker is enabled. The 111 // tracker should only be enabled on the active leader. 112 func (b *BlockedEvals) SetEnabled(enabled bool) { 113 b.l.Lock() 114 if b.enabled == enabled { 115 // No-op 116 b.l.Unlock() 117 return 118 } else if enabled { 119 go b.watchCapacity() 120 } else { 121 close(b.stopCh) 122 } 123 b.enabled = enabled 124 b.l.Unlock() 125 if !enabled { 126 b.Flush() 127 } 128 } 129 130 // Block tracks the passed evaluation and enqueues it into the eval broker when 131 // a suitable node calls unblock. 132 func (b *BlockedEvals) Block(eval *structs.Evaluation) { 133 b.processBlock(eval, "") 134 } 135 136 // Reblock tracks the passed evaluation and enqueues it into the eval broker when 137 // a suitable node calls unblock. Reblock should be used over Block when the 138 // blocking is occurring by an outstanding evaluation. The token is the 139 // evaluation's token. 140 func (b *BlockedEvals) Reblock(eval *structs.Evaluation, token string) { 141 b.processBlock(eval, token) 142 } 143 144 // processBlock is the implementation of blocking an evaluation. It supports 145 // taking an optional evaluation token to use when reblocking an evaluation that 146 // may be outstanding. 147 func (b *BlockedEvals) processBlock(eval *structs.Evaluation, token string) { 148 b.l.Lock() 149 defer b.l.Unlock() 150 151 // Do nothing if not enabled 152 if !b.enabled { 153 return 154 } 155 156 // Check if the job already has a blocked evaluation. If it does add it to 157 // the list of duplicates. We omly ever want one blocked evaluation per job, 158 // otherwise we would create unnecessary work for the scheduler as multiple 159 // evals for the same job would be run, all producing the same outcome. 160 if _, existing := b.jobs[eval.JobID]; existing { 161 b.duplicates = append(b.duplicates, eval) 162 163 // Unblock any waiter. 164 select { 165 case b.duplicateCh <- struct{}{}: 166 default: 167 } 168 169 return 170 } 171 172 // Check if the eval missed an unblock while it was in the scheduler at an 173 // older index. The scheduler could have been invoked with a snapshot of 174 // state that was prior to additional capacity being added or allocations 175 // becoming terminal. 176 if b.missedUnblock(eval) { 177 // Just re-enqueue the eval immediately. We pass the token so that the 178 // eval_broker can properly handle the case in which the evaluation is 179 // still outstanding. 180 b.evalBroker.EnqueueAll(map[*structs.Evaluation]string{eval: token}) 181 return 182 } 183 184 // Mark the job as tracked. 185 b.stats.TotalBlocked++ 186 b.jobs[eval.JobID] = eval.ID 187 188 // Wrap the evaluation, capturing its token. 189 wrapped := wrappedEval{ 190 eval: eval, 191 token: token, 192 } 193 194 // If the eval has escaped, meaning computed node classes could not capture 195 // the constraints of the job, we store the eval separately as we have to 196 // unblock it whenever node capacity changes. This is because we don't know 197 // what node class is feasible for the jobs constraints. 198 if eval.EscapedComputedClass { 199 b.escaped[eval.ID] = wrapped 200 b.stats.TotalEscaped++ 201 return 202 } 203 204 // Add the eval to the set of blocked evals whose jobs constraints are 205 // captured by computed node class. 206 b.captured[eval.ID] = wrapped 207 } 208 209 // missedUnblock returns whether an evaluation missed an unblock while it was in 210 // the scheduler. Since the scheduler can operate at an index in the past, the 211 // evaluation may have been processed missing data that would allow it to 212 // complete. This method returns if that is the case and should be called with 213 // the lock held. 214 func (b *BlockedEvals) missedUnblock(eval *structs.Evaluation) bool { 215 var max uint64 = 0 216 for class, index := range b.unblockIndexes { 217 // Calculate the max unblock index 218 if max < index { 219 max = index 220 } 221 222 elig, ok := eval.ClassEligibility[class] 223 if !ok && eval.SnapshotIndex < index { 224 // The evaluation was processed and did not encounter this class 225 // because it was added after it was processed. Thus for correctness 226 // we need to unblock it. 227 return true 228 } 229 230 // The evaluation could use the computed node class and the eval was 231 // processed before the last unblock. 232 if elig && eval.SnapshotIndex < index { 233 return true 234 } 235 } 236 237 // If the evaluation has escaped, and the map contains an index older than 238 // the evaluations, it should be unblocked. 239 if eval.EscapedComputedClass && eval.SnapshotIndex < max { 240 return true 241 } 242 243 // The evaluation is ahead of all recent unblocks. 244 return false 245 } 246 247 // Untrack causes any blocked evaluation for the passed job to be no longer 248 // tracked. Untrack is called when there is a successful evaluation for the job 249 // and a blocked evaluation is no longer needed. 250 func (b *BlockedEvals) Untrack(jobID string) { 251 b.l.Lock() 252 defer b.l.Unlock() 253 254 // Do nothing if not enabled 255 if !b.enabled { 256 return 257 } 258 259 // Get the evaluation ID to cancel 260 evalID, ok := b.jobs[jobID] 261 if !ok { 262 // No blocked evaluation so exit 263 return 264 } 265 266 // Attempt to delete the evaluation 267 if w, ok := b.captured[evalID]; ok { 268 delete(b.jobs, w.eval.JobID) 269 delete(b.captured, evalID) 270 b.stats.TotalBlocked-- 271 } 272 273 if w, ok := b.escaped[evalID]; ok { 274 delete(b.jobs, w.eval.JobID) 275 delete(b.escaped, evalID) 276 b.stats.TotalEscaped-- 277 b.stats.TotalBlocked-- 278 } 279 } 280 281 // Unblock causes any evaluation that could potentially make progress on a 282 // capacity change on the passed computed node class to be enqueued into the 283 // eval broker. 284 func (b *BlockedEvals) Unblock(computedClass string, index uint64) { 285 b.l.Lock() 286 287 // Do nothing if not enabled 288 if !b.enabled { 289 b.l.Unlock() 290 return 291 } 292 293 // Store the index in which the unblock happened. We use this on subsequent 294 // block calls in case the evaluation was in the scheduler when a trigger 295 // occurred. 296 b.unblockIndexes[computedClass] = index 297 b.l.Unlock() 298 299 b.capacityChangeCh <- &capacityUpdate{ 300 computedClass: computedClass, 301 index: index, 302 } 303 } 304 305 // watchCapacity is a long lived function that watches for capacity changes in 306 // nodes and unblocks the correct set of evals. 307 func (b *BlockedEvals) watchCapacity() { 308 for { 309 select { 310 case <-b.stopCh: 311 return 312 case update := <-b.capacityChangeCh: 313 b.unblock(update.computedClass, update.index) 314 } 315 } 316 } 317 318 // unblock unblocks all blocked evals that could run on the passed computed node 319 // class. 320 func (b *BlockedEvals) unblock(computedClass string, index uint64) { 321 b.l.Lock() 322 defer b.l.Unlock() 323 324 // Protect against the case of a flush. 325 if !b.enabled { 326 return 327 } 328 329 // Every eval that has escaped computed node class has to be unblocked 330 // because any node could potentially be feasible. 331 numEscaped := len(b.escaped) 332 unblocked := make(map[*structs.Evaluation]string, lib.MaxInt(numEscaped, 4)) 333 if numEscaped != 0 { 334 for id, wrapped := range b.escaped { 335 unblocked[wrapped.eval] = wrapped.token 336 delete(b.escaped, id) 337 delete(b.jobs, wrapped.eval.JobID) 338 } 339 } 340 341 // We unblock any eval that is explicitly eligible for the computed class 342 // and also any eval that is not eligible or uneligible. This signifies that 343 // when the evaluation was originally run through the scheduler, that it 344 // never saw a node with the given computed class and thus needs to be 345 // unblocked for correctness. 346 for id, wrapped := range b.captured { 347 if elig, ok := wrapped.eval.ClassEligibility[computedClass]; ok && !elig { 348 // Can skip because the eval has explicitly marked the node class 349 // as ineligible. 350 continue 351 } 352 353 // The computed node class has never been seen by the eval so we unblock 354 // it. 355 unblocked[wrapped.eval] = wrapped.token 356 delete(b.jobs, wrapped.eval.JobID) 357 delete(b.captured, id) 358 } 359 360 if l := len(unblocked); l != 0 { 361 // Update the counters 362 b.stats.TotalEscaped = 0 363 b.stats.TotalBlocked -= l 364 365 // Enqueue all the unblocked evals into the broker. 366 b.evalBroker.EnqueueAll(unblocked) 367 } 368 } 369 370 // UnblockFailed unblocks all blocked evaluation that were due to scheduler 371 // failure. 372 func (b *BlockedEvals) UnblockFailed() { 373 b.l.Lock() 374 defer b.l.Unlock() 375 376 // Do nothing if not enabled 377 if !b.enabled { 378 return 379 } 380 381 unblocked := make(map[*structs.Evaluation]string, 4) 382 for id, wrapped := range b.captured { 383 if wrapped.eval.TriggeredBy == structs.EvalTriggerMaxPlans { 384 unblocked[wrapped.eval] = wrapped.token 385 delete(b.captured, id) 386 delete(b.jobs, wrapped.eval.JobID) 387 } 388 } 389 390 for id, wrapped := range b.escaped { 391 if wrapped.eval.TriggeredBy == structs.EvalTriggerMaxPlans { 392 unblocked[wrapped.eval] = wrapped.token 393 delete(b.escaped, id) 394 delete(b.jobs, wrapped.eval.JobID) 395 b.stats.TotalEscaped -= 1 396 } 397 } 398 399 if l := len(unblocked); l > 0 { 400 b.stats.TotalBlocked -= l 401 b.evalBroker.EnqueueAll(unblocked) 402 } 403 } 404 405 // GetDuplicates returns all the duplicate evaluations and blocks until the 406 // passed timeout. 407 func (b *BlockedEvals) GetDuplicates(timeout time.Duration) []*structs.Evaluation { 408 var timeoutTimer *time.Timer 409 var timeoutCh <-chan time.Time 410 SCAN: 411 b.l.Lock() 412 if len(b.duplicates) != 0 { 413 dups := b.duplicates 414 b.duplicates = nil 415 b.l.Unlock() 416 return dups 417 } 418 b.l.Unlock() 419 420 // Create the timer 421 if timeoutTimer == nil && timeout != 0 { 422 timeoutTimer = time.NewTimer(timeout) 423 timeoutCh = timeoutTimer.C 424 defer timeoutTimer.Stop() 425 } 426 427 select { 428 case <-b.stopCh: 429 return nil 430 case <-timeoutCh: 431 return nil 432 case <-b.duplicateCh: 433 goto SCAN 434 } 435 } 436 437 // Flush is used to clear the state of blocked evaluations. 438 func (b *BlockedEvals) Flush() { 439 b.l.Lock() 440 defer b.l.Unlock() 441 442 // Reset the blocked eval tracker. 443 b.stats.TotalEscaped = 0 444 b.stats.TotalBlocked = 0 445 b.captured = make(map[string]wrappedEval) 446 b.escaped = make(map[string]wrappedEval) 447 b.jobs = make(map[string]string) 448 b.duplicates = nil 449 b.capacityChangeCh = make(chan *capacityUpdate, unblockBuffer) 450 b.stopCh = make(chan struct{}) 451 b.duplicateCh = make(chan struct{}, 1) 452 } 453 454 // Stats is used to query the state of the blocked eval tracker. 455 func (b *BlockedEvals) Stats() *BlockedStats { 456 // Allocate a new stats struct 457 stats := new(BlockedStats) 458 459 b.l.RLock() 460 defer b.l.RUnlock() 461 462 // Copy all the stats 463 stats.TotalEscaped = b.stats.TotalEscaped 464 stats.TotalBlocked = b.stats.TotalBlocked 465 return stats 466 } 467 468 // EmitStats is used to export metrics about the blocked eval tracker while enabled 469 func (b *BlockedEvals) EmitStats(period time.Duration, stopCh chan struct{}) { 470 for { 471 select { 472 case <-time.After(period): 473 stats := b.Stats() 474 metrics.SetGauge([]string{"nomad", "blocked_evals", "total_blocked"}, float32(stats.TotalBlocked)) 475 metrics.SetGauge([]string{"nomad", "blocked_evals", "total_escaped"}, float32(stats.TotalEscaped)) 476 case <-stopCh: 477 return 478 } 479 } 480 }