github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/nomad/blocked_evals.go (about) 1 package nomad 2 3 import ( 4 "sync" 5 "time" 6 7 "github.com/armon/go-metrics" 8 "github.com/hashicorp/consul/lib" 9 "github.com/hashicorp/nomad/nomad/structs" 10 ) 11 12 const ( 13 // unblockBuffer is the buffer size for the unblock channel. The buffer 14 // should be large to ensure that the FSM doesn't block when calling Unblock 15 // as this would apply back-pressure on Raft. 16 unblockBuffer = 8096 17 ) 18 19 // BlockedEvals is used to track evaluations that shouldn't be queued until a 20 // certain class of nodes becomes available. An evaluation is put into the 21 // blocked state when it is run through the scheduler and produced failed 22 // allocations. It is unblocked when the capacity of a node that could run the 23 // failed allocation becomes available. 24 type BlockedEvals struct { 25 evalBroker *EvalBroker 26 enabled bool 27 stats *BlockedStats 28 l sync.RWMutex 29 30 // captured is the set of evaluations that are captured by computed node 31 // classes. 32 captured map[string]wrappedEval 33 34 // escaped is the set of evaluations that have escaped computed node 35 // classes. 36 escaped map[string]wrappedEval 37 38 // unblockCh is used to buffer unblocking of evaluations. 39 capacityChangeCh chan *capacityUpdate 40 41 // jobs is the map of blocked job and is used to ensure that only one 42 // blocked eval exists for each job. 43 jobs map[string]struct{} 44 45 // unblockIndexes maps computed node classes to the index in which they were 46 // unblocked. This is used to check if an evaluation could have been 47 // unblocked between the time they were in the scheduler and the time they 48 // are being blocked. 49 unblockIndexes map[string]uint64 50 51 // duplicates is the set of evaluations for jobs that had pre-existing 52 // blocked evaluations. These should be marked as cancelled since only one 53 // blocked eval is neeeded per job. 54 duplicates []*structs.Evaluation 55 56 // duplicateCh is used to signal that a duplicate eval was added to the 57 // duplicate set. It can be used to unblock waiting callers looking for 58 // duplicates. 59 duplicateCh chan struct{} 60 61 // stopCh is used to stop any created goroutines. 62 stopCh chan struct{} 63 } 64 65 // capacityUpdate stores unblock data. 66 type capacityUpdate struct { 67 computedClass string 68 index uint64 69 } 70 71 // wrappedEval captures both the evaluation and the optional token 72 type wrappedEval struct { 73 eval *structs.Evaluation 74 token string 75 } 76 77 // BlockedStats returns all the stats about the blocked eval tracker. 78 type BlockedStats struct { 79 // TotalEscaped is the total number of blocked evaluations that have escaped 80 // computed node classes. 81 TotalEscaped int 82 83 // TotalBlocked is the total number of blocked evaluations. 84 TotalBlocked int 85 } 86 87 // NewBlockedEvals creates a new blocked eval tracker that will enqueue 88 // unblocked evals into the passed broker. 89 func NewBlockedEvals(evalBroker *EvalBroker) *BlockedEvals { 90 return &BlockedEvals{ 91 evalBroker: evalBroker, 92 captured: make(map[string]wrappedEval), 93 escaped: make(map[string]wrappedEval), 94 jobs: make(map[string]struct{}), 95 unblockIndexes: make(map[string]uint64), 96 capacityChangeCh: make(chan *capacityUpdate, unblockBuffer), 97 duplicateCh: make(chan struct{}, 1), 98 stopCh: make(chan struct{}), 99 stats: new(BlockedStats), 100 } 101 } 102 103 // Enabled is used to check if the broker is enabled. 104 func (b *BlockedEvals) Enabled() bool { 105 b.l.RLock() 106 defer b.l.RUnlock() 107 return b.enabled 108 } 109 110 // SetEnabled is used to control if the blocked eval tracker is enabled. The 111 // tracker should only be enabled on the active leader. 112 func (b *BlockedEvals) SetEnabled(enabled bool) { 113 b.l.Lock() 114 if b.enabled == enabled { 115 // No-op 116 b.l.Unlock() 117 return 118 } else if enabled { 119 go b.watchCapacity() 120 } else { 121 close(b.stopCh) 122 } 123 b.enabled = enabled 124 b.l.Unlock() 125 if !enabled { 126 b.Flush() 127 } 128 } 129 130 // Block tracks the passed evaluation and enqueues it into the eval broker when 131 // a suitable node calls unblock. 132 func (b *BlockedEvals) Block(eval *structs.Evaluation) { 133 b.processBlock(eval, "") 134 } 135 136 // Reblock tracks the passed evaluation and enqueues it into the eval broker when 137 // a suitable node calls unblock. Reblock should be used over Block when the 138 // blocking is occurring by an outstanding evaluation. The token is the 139 // evaluation's token. 140 func (b *BlockedEvals) Reblock(eval *structs.Evaluation, token string) { 141 b.processBlock(eval, token) 142 } 143 144 // processBlock is the implementation of blocking an evaluation. It supports 145 // taking an optional evaluation token to use when reblocking an evaluation that 146 // may be outstanding. 147 func (b *BlockedEvals) processBlock(eval *structs.Evaluation, token string) { 148 b.l.Lock() 149 defer b.l.Unlock() 150 151 // Do nothing if not enabled 152 if !b.enabled { 153 return 154 } 155 156 // Check if the job already has a blocked evaluation. If it does add it to 157 // the list of duplicates. We omly ever want one blocked evaluation per job, 158 // otherwise we would create unnecessary work for the scheduler as multiple 159 // evals for the same job would be run, all producing the same outcome. 160 if _, existing := b.jobs[eval.JobID]; existing { 161 b.duplicates = append(b.duplicates, eval) 162 163 // Unblock any waiter. 164 select { 165 case b.duplicateCh <- struct{}{}: 166 default: 167 } 168 169 return 170 } 171 172 // Check if the eval missed an unblock while it was in the scheduler at an 173 // older index. The scheduler could have been invoked with a snapshot of 174 // state that was prior to additional capacity being added or allocations 175 // becoming terminal. 176 if b.missedUnblock(eval) { 177 // Just re-enqueue the eval immediately. We pass the token so that the 178 // eval_broker can properly handle the case in which the evaluation is 179 // still outstanding. 180 b.evalBroker.EnqueueAll(map[*structs.Evaluation]string{eval: token}) 181 return 182 } 183 184 // Mark the job as tracked. 185 b.stats.TotalBlocked++ 186 b.jobs[eval.JobID] = struct{}{} 187 188 // Wrap the evaluation, capturing its token. 189 wrapped := wrappedEval{ 190 eval: eval, 191 token: token, 192 } 193 194 // If the eval has escaped, meaning computed node classes could not capture 195 // the constraints of the job, we store the eval separately as we have to 196 // unblock it whenever node capacity changes. This is because we don't know 197 // what node class is feasible for the jobs constraints. 198 if eval.EscapedComputedClass { 199 b.escaped[eval.ID] = wrapped 200 b.stats.TotalEscaped++ 201 return 202 } 203 204 // Add the eval to the set of blocked evals whose jobs constraints are 205 // captured by computed node class. 206 b.captured[eval.ID] = wrapped 207 } 208 209 // missedUnblock returns whether an evaluation missed an unblock while it was in 210 // the scheduler. Since the scheduler can operate at an index in the past, the 211 // evaluation may have been processed missing data that would allow it to 212 // complete. This method returns if that is the case and should be called with 213 // the lock held. 214 func (b *BlockedEvals) missedUnblock(eval *structs.Evaluation) bool { 215 var max uint64 = 0 216 for class, index := range b.unblockIndexes { 217 // Calculate the max unblock index 218 if max < index { 219 max = index 220 } 221 222 elig, ok := eval.ClassEligibility[class] 223 if !ok && eval.SnapshotIndex < index { 224 // The evaluation was processed and did not encounter this class 225 // because it was added after it was processed. Thus for correctness 226 // we need to unblock it. 227 return true 228 } 229 230 // The evaluation could use the computed node class and the eval was 231 // processed before the last unblock. 232 if elig && eval.SnapshotIndex < index { 233 return true 234 } 235 } 236 237 // If the evaluation has escaped, and the map contains an index older than 238 // the evaluations, it should be unblocked. 239 if eval.EscapedComputedClass && eval.SnapshotIndex < max { 240 return true 241 } 242 243 // The evaluation is ahead of all recent unblocks. 244 return false 245 } 246 247 // Unblock causes any evaluation that could potentially make progress on a 248 // capacity change on the passed computed node class to be enqueued into the 249 // eval broker. 250 func (b *BlockedEvals) Unblock(computedClass string, index uint64) { 251 b.l.Lock() 252 253 // Do nothing if not enabled 254 if !b.enabled { 255 b.l.Unlock() 256 return 257 } 258 259 // Store the index in which the unblock happened. We use this on subsequent 260 // block calls in case the evaluation was in the scheduler when a trigger 261 // occurred. 262 b.unblockIndexes[computedClass] = index 263 b.l.Unlock() 264 265 b.capacityChangeCh <- &capacityUpdate{ 266 computedClass: computedClass, 267 index: index, 268 } 269 } 270 271 // watchCapacity is a long lived function that watches for capacity changes in 272 // nodes and unblocks the correct set of evals. 273 func (b *BlockedEvals) watchCapacity() { 274 for { 275 select { 276 case <-b.stopCh: 277 return 278 case update := <-b.capacityChangeCh: 279 b.unblock(update.computedClass, update.index) 280 } 281 } 282 } 283 284 // unblock unblocks all blocked evals that could run on the passed computed node 285 // class. 286 func (b *BlockedEvals) unblock(computedClass string, index uint64) { 287 b.l.Lock() 288 defer b.l.Unlock() 289 290 // Protect against the case of a flush. 291 if !b.enabled { 292 return 293 } 294 295 // Every eval that has escaped computed node class has to be unblocked 296 // because any node could potentially be feasible. 297 numEscaped := len(b.escaped) 298 unblocked := make(map[*structs.Evaluation]string, lib.MaxInt(numEscaped, 4)) 299 if numEscaped != 0 { 300 for id, wrapped := range b.escaped { 301 unblocked[wrapped.eval] = wrapped.token 302 delete(b.escaped, id) 303 delete(b.jobs, wrapped.eval.JobID) 304 } 305 } 306 307 // We unblock any eval that is explicitly eligible for the computed class 308 // and also any eval that is not eligible or uneligible. This signifies that 309 // when the evaluation was originally run through the scheduler, that it 310 // never saw a node with the given computed class and thus needs to be 311 // unblocked for correctness. 312 for id, wrapped := range b.captured { 313 if elig, ok := wrapped.eval.ClassEligibility[computedClass]; ok && !elig { 314 // Can skip because the eval has explicitly marked the node class 315 // as ineligible. 316 continue 317 } 318 319 // The computed node class has never been seen by the eval so we unblock 320 // it. 321 unblocked[wrapped.eval] = wrapped.token 322 delete(b.jobs, wrapped.eval.JobID) 323 delete(b.captured, id) 324 } 325 326 if l := len(unblocked); l != 0 { 327 // Update the counters 328 b.stats.TotalEscaped = 0 329 b.stats.TotalBlocked -= l 330 331 // Enqueue all the unblocked evals into the broker. 332 b.evalBroker.EnqueueAll(unblocked) 333 } 334 } 335 336 // UnblockFailed unblocks all blocked evaluation that were due to scheduler 337 // failure. 338 func (b *BlockedEvals) UnblockFailed() { 339 b.l.Lock() 340 defer b.l.Unlock() 341 342 // Do nothing if not enabled 343 if !b.enabled { 344 return 345 } 346 347 unblocked := make(map[*structs.Evaluation]string, 4) 348 for id, wrapped := range b.captured { 349 if wrapped.eval.TriggeredBy == structs.EvalTriggerMaxPlans { 350 unblocked[wrapped.eval] = wrapped.token 351 delete(b.captured, id) 352 delete(b.jobs, wrapped.eval.JobID) 353 } 354 } 355 356 for id, wrapped := range b.escaped { 357 if wrapped.eval.TriggeredBy == structs.EvalTriggerMaxPlans { 358 unblocked[wrapped.eval] = wrapped.token 359 delete(b.escaped, id) 360 delete(b.jobs, wrapped.eval.JobID) 361 b.stats.TotalEscaped -= 1 362 } 363 } 364 365 if l := len(unblocked); l > 0 { 366 b.stats.TotalBlocked -= l 367 b.evalBroker.EnqueueAll(unblocked) 368 } 369 } 370 371 // GetDuplicates returns all the duplicate evaluations and blocks until the 372 // passed timeout. 373 func (b *BlockedEvals) GetDuplicates(timeout time.Duration) []*structs.Evaluation { 374 var timeoutTimer *time.Timer 375 var timeoutCh <-chan time.Time 376 SCAN: 377 b.l.Lock() 378 if len(b.duplicates) != 0 { 379 dups := b.duplicates 380 b.duplicates = nil 381 b.l.Unlock() 382 return dups 383 } 384 b.l.Unlock() 385 386 // Create the timer 387 if timeoutTimer == nil && timeout != 0 { 388 timeoutTimer = time.NewTimer(timeout) 389 timeoutCh = timeoutTimer.C 390 defer timeoutTimer.Stop() 391 } 392 393 select { 394 case <-b.stopCh: 395 return nil 396 case <-timeoutCh: 397 return nil 398 case <-b.duplicateCh: 399 goto SCAN 400 } 401 } 402 403 // Flush is used to clear the state of blocked evaluations. 404 func (b *BlockedEvals) Flush() { 405 b.l.Lock() 406 defer b.l.Unlock() 407 408 // Reset the blocked eval tracker. 409 b.stats.TotalEscaped = 0 410 b.stats.TotalBlocked = 0 411 b.captured = make(map[string]wrappedEval) 412 b.escaped = make(map[string]wrappedEval) 413 b.jobs = make(map[string]struct{}) 414 b.duplicates = nil 415 b.capacityChangeCh = make(chan *capacityUpdate, unblockBuffer) 416 b.stopCh = make(chan struct{}) 417 b.duplicateCh = make(chan struct{}, 1) 418 } 419 420 // Stats is used to query the state of the blocked eval tracker. 421 func (b *BlockedEvals) Stats() *BlockedStats { 422 // Allocate a new stats struct 423 stats := new(BlockedStats) 424 425 b.l.RLock() 426 defer b.l.RUnlock() 427 428 // Copy all the stats 429 stats.TotalEscaped = b.stats.TotalEscaped 430 stats.TotalBlocked = b.stats.TotalBlocked 431 return stats 432 } 433 434 // EmitStats is used to export metrics about the blocked eval tracker while enabled 435 func (b *BlockedEvals) EmitStats(period time.Duration, stopCh chan struct{}) { 436 for { 437 select { 438 case <-time.After(period): 439 stats := b.Stats() 440 metrics.SetGauge([]string{"nomad", "blocked_evals", "total_blocked"}, float32(stats.TotalBlocked)) 441 metrics.SetGauge([]string{"nomad", "blocked_evals", "total_escaped"}, float32(stats.TotalEscaped)) 442 case <-stopCh: 443 return 444 } 445 } 446 }