github.com/dkerwin/nomad@v0.3.3-0.20160525181927-74554135514b/nomad/blocked_evals.go (about) 1 package nomad 2 3 import ( 4 "sync" 5 "time" 6 7 "github.com/armon/go-metrics" 8 "github.com/hashicorp/nomad/nomad/structs" 9 ) 10 11 const ( 12 // unblockBuffer is the buffer size for the unblock channel. The buffer 13 // should be large to ensure that the FSM doesn't block when calling Unblock 14 // as this would apply back-pressure on Raft. 15 unblockBuffer = 8096 16 ) 17 18 // BlockedEvals is used to track evaluations that shouldn't be queued until a 19 // certain class of nodes becomes available. An evaluation is put into the 20 // blocked state when it is run through the scheduler and produced failed 21 // allocations. It is unblocked when the capacity of a node that could run the 22 // failed allocation becomes available. 23 type BlockedEvals struct { 24 evalBroker *EvalBroker 25 enabled bool 26 stats *BlockedStats 27 l sync.RWMutex 28 29 // captured is the set of evaluations that are captured by computed node 30 // classes. 31 captured map[string]*structs.Evaluation 32 33 // escaped is the set of evaluations that have escaped computed node 34 // classes. 35 escaped map[string]*structs.Evaluation 36 37 // unblockCh is used to buffer unblocking of evaluations. 38 capacityChangeCh chan *capacityUpdate 39 40 // jobs is the map of blocked job and is used to ensure that only one 41 // blocked eval exists for each job. 42 jobs map[string]struct{} 43 44 // unblockIndexes maps computed node classes to the index in which they were 45 // unblocked. This is used to check if an evaluation could have been 46 // unblocked between the time they were in the scheduler and the time they 47 // are being blocked. 48 unblockIndexes map[string]uint64 49 50 // duplicates is the set of evaluations for jobs that had pre-existing 51 // blocked evaluations. These should be marked as cancelled since only one 52 // blocked eval is neeeded per job. 53 duplicates []*structs.Evaluation 54 55 // duplicateCh is used to signal that a duplicate eval was added to the 56 // duplicate set. It can be used to unblock waiting callers looking for 57 // duplicates. 58 duplicateCh chan struct{} 59 60 // stopCh is used to stop any created goroutines. 61 stopCh chan struct{} 62 } 63 64 // capacityUpdate stores unblock data. 65 type capacityUpdate struct { 66 computedClass string 67 index uint64 68 } 69 70 // BlockedStats returns all the stats about the blocked eval tracker. 71 type BlockedStats struct { 72 // TotalEscaped is the total number of blocked evaluations that have escaped 73 // computed node classes. 74 TotalEscaped int 75 76 // TotalBlocked is the total number of blocked evaluations. 77 TotalBlocked int 78 } 79 80 // NewBlockedEvals creates a new blocked eval tracker that will enqueue 81 // unblocked evals into the passed broker. 82 func NewBlockedEvals(evalBroker *EvalBroker) *BlockedEvals { 83 return &BlockedEvals{ 84 evalBroker: evalBroker, 85 captured: make(map[string]*structs.Evaluation), 86 escaped: make(map[string]*structs.Evaluation), 87 jobs: make(map[string]struct{}), 88 unblockIndexes: make(map[string]uint64), 89 capacityChangeCh: make(chan *capacityUpdate, unblockBuffer), 90 duplicateCh: make(chan struct{}, 1), 91 stopCh: make(chan struct{}), 92 stats: new(BlockedStats), 93 } 94 } 95 96 // Enabled is used to check if the broker is enabled. 97 func (b *BlockedEvals) Enabled() bool { 98 b.l.RLock() 99 defer b.l.RUnlock() 100 return b.enabled 101 } 102 103 // SetEnabled is used to control if the broker is enabled. The broker 104 // should only be enabled on the active leader. 105 func (b *BlockedEvals) SetEnabled(enabled bool) { 106 b.l.Lock() 107 if b.enabled == enabled { 108 // No-op 109 return 110 } else if enabled { 111 go b.watchCapacity() 112 } else { 113 close(b.stopCh) 114 } 115 b.enabled = enabled 116 b.l.Unlock() 117 if !enabled { 118 b.Flush() 119 } 120 } 121 122 // Block tracks the passed evaluation and enqueues it into the eval broker when 123 // a suitable node calls unblock. 124 func (b *BlockedEvals) Block(eval *structs.Evaluation) { 125 b.l.Lock() 126 defer b.l.Unlock() 127 128 // Do nothing if not enabled 129 if !b.enabled { 130 return 131 } 132 133 // Check if the job already has a blocked evaluation. If it does add it to 134 // the list of duplicates. We omly ever want one blocked evaluation per job, 135 // otherwise we would create unnecessary work for the scheduler as multiple 136 // evals for the same job would be run, all producing the same outcome. 137 if _, existing := b.jobs[eval.JobID]; existing { 138 b.duplicates = append(b.duplicates, eval) 139 140 // Unblock any waiter. 141 select { 142 case b.duplicateCh <- struct{}{}: 143 default: 144 } 145 146 return 147 } 148 149 // Check if the eval missed an unblock while it was in the scheduler at an 150 // older index. The scheduler could have been invoked with a snapshot of 151 // state that was prior to additional capacity being added or allocations 152 // becoming terminal. 153 if b.missedUnblock(eval) { 154 // Just re-enqueue the eval immediately 155 b.evalBroker.Enqueue(eval) 156 return 157 } 158 159 // Mark the job as tracked. 160 b.stats.TotalBlocked++ 161 b.jobs[eval.JobID] = struct{}{} 162 163 // If the eval has escaped, meaning computed node classes could not capture 164 // the constraints of the job, we store the eval separately as we have to 165 // unblock it whenever node capacity changes. This is because we don't know 166 // what node class is feasible for the jobs constraints. 167 if eval.EscapedComputedClass { 168 b.escaped[eval.ID] = eval 169 b.stats.TotalEscaped++ 170 return 171 } 172 173 // Add the eval to the set of blocked evals whose jobs constraints are 174 // captured by computed node class. 175 b.captured[eval.ID] = eval 176 } 177 178 // missedUnblock returns whether an evaluation missed an unblock while it was in 179 // the scheduler. Since the scheduler can operate at an index in the past, the 180 // evaluation may have been processed missing data that would allow it to 181 // complete. This method returns if that is the case and should be called with 182 // the lock held. 183 func (b *BlockedEvals) missedUnblock(eval *structs.Evaluation) bool { 184 var max uint64 = 0 185 for class, index := range b.unblockIndexes { 186 // Calculate the max unblock index 187 if max < index { 188 max = index 189 } 190 191 elig, ok := eval.ClassEligibility[class] 192 if !ok { 193 // The evaluation was processed and did not encounter this class. 194 // Thus for correctness we need to unblock it. 195 return true 196 } 197 198 // The evaluation could use the computed node class and the eval was 199 // processed before the last unblock. 200 if elig && eval.SnapshotIndex < index { 201 return true 202 } 203 } 204 205 // If the evaluation has escaped, and the map contains an index older than 206 // the evaluations, it should be unblocked. 207 if eval.EscapedComputedClass && eval.SnapshotIndex < max { 208 return true 209 } 210 211 // The evaluation is ahead of all recent unblocks. 212 return false 213 } 214 215 // Unblock causes any evaluation that could potentially make progress on a 216 // capacity change on the passed computed node class to be enqueued into the 217 // eval broker. 218 func (b *BlockedEvals) Unblock(computedClass string, index uint64) { 219 b.l.Lock() 220 221 // Do nothing if not enabled 222 if !b.enabled { 223 b.l.Unlock() 224 return 225 } 226 227 // Store the index in which the unblock happened. We use this on subsequent 228 // block calls in case the evaluation was in the scheduler when a trigger 229 // occured. 230 b.unblockIndexes[computedClass] = index 231 b.l.Unlock() 232 233 b.capacityChangeCh <- &capacityUpdate{ 234 computedClass: computedClass, 235 index: index, 236 } 237 } 238 239 // watchCapacity is a long lived function that watches for capacity changes in 240 // nodes and unblocks the correct set of evals. 241 func (b *BlockedEvals) watchCapacity() { 242 for { 243 select { 244 case <-b.stopCh: 245 return 246 case update := <-b.capacityChangeCh: 247 b.unblock(update.computedClass, update.index) 248 } 249 } 250 } 251 252 // unblock unblocks all blocked evals that could run on the passed computed node 253 // class. 254 func (b *BlockedEvals) unblock(computedClass string, index uint64) { 255 b.l.Lock() 256 defer b.l.Unlock() 257 258 // Protect against the case of a flush. 259 if !b.enabled { 260 return 261 } 262 263 // Every eval that has escaped computed node class has to be unblocked 264 // because any node could potentially be feasible. 265 var unblocked []*structs.Evaluation 266 if l := len(b.escaped); l != 0 { 267 unblocked = make([]*structs.Evaluation, 0, l) 268 for id, eval := range b.escaped { 269 unblocked = append(unblocked, eval) 270 delete(b.escaped, id) 271 delete(b.jobs, eval.JobID) 272 } 273 } 274 275 // We unblock any eval that is explicitly eligible for the computed class 276 // and also any eval that is not eligible or uneligible. This signifies that 277 // when the evaluation was originally run through the scheduler, that it 278 // never saw a node with the given computed class and thus needs to be 279 // unblocked for correctness. 280 for id, eval := range b.captured { 281 if elig, ok := eval.ClassEligibility[computedClass]; ok && !elig { 282 // Can skip because the eval has explicitly marked the node class 283 // as ineligible. 284 continue 285 } 286 287 // The computed node class has never been seen by the eval so we unblock 288 // it. 289 unblocked = append(unblocked, eval) 290 delete(b.jobs, eval.JobID) 291 delete(b.captured, id) 292 } 293 294 if l := len(unblocked); l != 0 { 295 // Update the counters 296 b.stats.TotalEscaped = 0 297 b.stats.TotalBlocked -= l 298 299 // Enqueue all the unblocked evals into the broker. 300 b.evalBroker.EnqueueAll(unblocked) 301 } 302 } 303 304 // UnblockFailed unblocks all blocked evaluation that were due to scheduler 305 // failure. 306 func (b *BlockedEvals) UnblockFailed() { 307 b.l.Lock() 308 defer b.l.Unlock() 309 310 // Do nothing if not enabled 311 if !b.enabled { 312 return 313 } 314 315 var unblock []*structs.Evaluation 316 for id, eval := range b.captured { 317 if eval.TriggeredBy == structs.EvalTriggerMaxPlans { 318 unblock = append(unblock, eval) 319 delete(b.captured, id) 320 } 321 } 322 323 for id, eval := range b.escaped { 324 if eval.TriggeredBy == structs.EvalTriggerMaxPlans { 325 unblock = append(unblock, eval) 326 delete(b.escaped, id) 327 } 328 } 329 330 b.evalBroker.EnqueueAll(unblock) 331 } 332 333 // GetDuplicates returns all the duplicate evaluations and blocks until the 334 // passed timeout. 335 func (b *BlockedEvals) GetDuplicates(timeout time.Duration) []*structs.Evaluation { 336 var timeoutTimer *time.Timer 337 var timeoutCh <-chan time.Time 338 SCAN: 339 b.l.Lock() 340 if len(b.duplicates) != 0 { 341 dups := b.duplicates 342 b.duplicates = nil 343 b.l.Unlock() 344 return dups 345 } 346 b.l.Unlock() 347 348 // Create the timer 349 if timeoutTimer == nil && timeout != 0 { 350 timeoutTimer = time.NewTimer(timeout) 351 timeoutCh = timeoutTimer.C 352 defer timeoutTimer.Stop() 353 } 354 355 select { 356 case <-b.stopCh: 357 return nil 358 case <-timeoutCh: 359 return nil 360 case <-b.duplicateCh: 361 goto SCAN 362 } 363 } 364 365 // Flush is used to clear the state of blocked evaluations. 366 func (b *BlockedEvals) Flush() { 367 b.l.Lock() 368 defer b.l.Unlock() 369 370 // Reset the blocked eval tracker. 371 b.stats.TotalEscaped = 0 372 b.stats.TotalBlocked = 0 373 b.captured = make(map[string]*structs.Evaluation) 374 b.escaped = make(map[string]*structs.Evaluation) 375 b.jobs = make(map[string]struct{}) 376 b.duplicates = nil 377 b.capacityChangeCh = make(chan *capacityUpdate, unblockBuffer) 378 b.stopCh = make(chan struct{}) 379 b.duplicateCh = make(chan struct{}, 1) 380 } 381 382 // Stats is used to query the state of the blocked eval tracker. 383 func (b *BlockedEvals) Stats() *BlockedStats { 384 // Allocate a new stats struct 385 stats := new(BlockedStats) 386 387 b.l.RLock() 388 defer b.l.RUnlock() 389 390 // Copy all the stats 391 stats.TotalEscaped = b.stats.TotalEscaped 392 stats.TotalBlocked = b.stats.TotalBlocked 393 return stats 394 } 395 396 // EmitStats is used to export metrics about the blocked eval tracker while enabled 397 func (b *BlockedEvals) EmitStats(period time.Duration, stopCh chan struct{}) { 398 for { 399 select { 400 case <-time.After(period): 401 stats := b.Stats() 402 metrics.SetGauge([]string{"nomad", "blocked_evals", "total_blocked"}, float32(stats.TotalBlocked)) 403 metrics.SetGauge([]string{"nomad", "blocked_evals", "total_escaped"}, float32(stats.TotalEscaped)) 404 case <-stopCh: 405 return 406 } 407 } 408 }