github.com/ryanslade/nomad@v0.2.4-0.20160128061903-fc95782f2089/nomad/periodic.go (about) 1 package nomad 2 3 import ( 4 "container/heap" 5 "fmt" 6 "log" 7 "strconv" 8 "strings" 9 "sync" 10 "time" 11 12 "github.com/hashicorp/nomad/nomad/structs" 13 ) 14 15 // PeriodicDispatch is used to track and launch periodic jobs. It maintains the 16 // set of periodic jobs and creates derived jobs and evaluations per 17 // instantiation which is determined by the periodic spec. 18 type PeriodicDispatch struct { 19 dispatcher JobEvalDispatcher 20 enabled bool 21 running bool 22 23 tracked map[string]*structs.Job 24 heap *periodicHeap 25 26 updateCh chan struct{} 27 stopCh chan struct{} 28 waitCh chan struct{} 29 logger *log.Logger 30 l sync.RWMutex 31 } 32 33 // JobEvalDispatcher is an interface to submit jobs and have evaluations created 34 // for them. 35 type JobEvalDispatcher interface { 36 // DispatchJob takes a job a new, untracked job and creates an evaluation 37 // for it and returns the eval. 38 DispatchJob(job *structs.Job) (*structs.Evaluation, error) 39 40 // RunningChildren returns whether the passed job has any running children. 41 RunningChildren(job *structs.Job) (bool, error) 42 } 43 44 // DispatchJob creates an evaluation for the passed job and commits both the 45 // evaluation and the job to the raft log. It returns the eval. 46 func (s *Server) DispatchJob(job *structs.Job) (*structs.Evaluation, error) { 47 // Commit this update via Raft 48 req := structs.JobRegisterRequest{Job: job} 49 _, index, err := s.raftApply(structs.JobRegisterRequestType, req) 50 if err != nil { 51 return nil, err 52 } 53 54 // Create a new evaluation 55 eval := &structs.Evaluation{ 56 ID: structs.GenerateUUID(), 57 Priority: job.Priority, 58 Type: job.Type, 59 TriggeredBy: structs.EvalTriggerPeriodicJob, 60 JobID: job.ID, 61 JobModifyIndex: index, 62 Status: structs.EvalStatusPending, 63 } 64 update := &structs.EvalUpdateRequest{ 65 Evals: []*structs.Evaluation{eval}, 66 } 67 68 // Commit this evaluation via Raft 69 // XXX: There is a risk of partial failure where the JobRegister succeeds 70 // but that the EvalUpdate does not. 71 _, evalIndex, err := s.raftApply(structs.EvalUpdateRequestType, update) 72 if err != nil { 73 return nil, err 74 } 75 76 // Update its indexes. 77 eval.CreateIndex = evalIndex 78 eval.ModifyIndex = evalIndex 79 return eval, nil 80 } 81 82 // RunningChildren checks whether the passed job has any running children. 83 func (s *Server) RunningChildren(job *structs.Job) (bool, error) { 84 state := s.fsm.State() 85 prefix := fmt.Sprintf("%s%s", job.ID, structs.PeriodicLaunchSuffix) 86 iter, err := state.JobsByIDPrefix(prefix) 87 if err != nil { 88 return false, err 89 } 90 91 var child *structs.Job 92 for i := iter.Next(); i != nil; i = iter.Next() { 93 child = i.(*structs.Job) 94 95 // Ensure the job is actually a child. 96 if child.ParentID != job.ID { 97 continue 98 } 99 100 // Get the childs evaluations. 101 evals, err := state.EvalsByJob(child.ID) 102 if err != nil { 103 return false, err 104 } 105 106 // Check if any of the evals are active or have running allocations. 107 for _, eval := range evals { 108 if !eval.TerminalStatus() { 109 return true, nil 110 } 111 112 allocs, err := state.AllocsByEval(eval.ID) 113 if err != nil { 114 return false, err 115 } 116 117 for _, alloc := range allocs { 118 if !alloc.TerminalStatus() { 119 return true, nil 120 } 121 } 122 } 123 } 124 125 // There are no evals or allocations that aren't terminal. 126 return false, nil 127 } 128 129 // NewPeriodicDispatch returns a periodic dispatcher that is used to track and 130 // launch periodic jobs. 131 func NewPeriodicDispatch(logger *log.Logger, dispatcher JobEvalDispatcher) *PeriodicDispatch { 132 return &PeriodicDispatch{ 133 dispatcher: dispatcher, 134 tracked: make(map[string]*structs.Job), 135 heap: NewPeriodicHeap(), 136 updateCh: make(chan struct{}, 1), 137 stopCh: make(chan struct{}), 138 waitCh: make(chan struct{}), 139 logger: logger, 140 } 141 } 142 143 // SetEnabled is used to control if the periodic dispatcher is enabled. It 144 // should only be enabled on the active leader. Disabling an active dispatcher 145 // will stop any launched go routine and flush the dispatcher. 146 func (p *PeriodicDispatch) SetEnabled(enabled bool) { 147 p.l.Lock() 148 p.enabled = enabled 149 p.l.Unlock() 150 if !enabled { 151 if p.running { 152 close(p.stopCh) 153 <-p.waitCh 154 p.running = false 155 } 156 p.Flush() 157 } 158 } 159 160 // Start begins the goroutine that creates derived jobs and evals. 161 func (p *PeriodicDispatch) Start() { 162 p.l.Lock() 163 p.running = true 164 p.l.Unlock() 165 go p.run() 166 } 167 168 // Tracked returns the set of tracked job IDs. 169 func (p *PeriodicDispatch) Tracked() []*structs.Job { 170 p.l.RLock() 171 defer p.l.RUnlock() 172 tracked := make([]*structs.Job, len(p.tracked)) 173 i := 0 174 for _, job := range p.tracked { 175 tracked[i] = job 176 i++ 177 } 178 return tracked 179 } 180 181 // Add begins tracking of a periodic job. If it is already tracked, it acts as 182 // an update to the jobs periodic spec. 183 func (p *PeriodicDispatch) Add(job *structs.Job) error { 184 p.l.Lock() 185 defer p.l.Unlock() 186 187 // Do nothing if not enabled 188 if !p.enabled { 189 return nil 190 } 191 192 // If we were tracking a job and it has been disabled or made non-periodic remove it. 193 disabled := !job.IsPeriodic() || !job.Periodic.Enabled 194 _, tracked := p.tracked[job.ID] 195 if disabled { 196 if tracked { 197 p.removeLocked(job.ID) 198 } 199 200 // If the job is disabled and we aren't tracking it, do nothing. 201 return nil 202 } 203 204 // Add or update the job. 205 p.tracked[job.ID] = job 206 next := job.Periodic.Next(time.Now()) 207 if tracked { 208 if err := p.heap.Update(job, next); err != nil { 209 return fmt.Errorf("failed to update job %v launch time: %v", job.ID, err) 210 } 211 p.logger.Printf("[DEBUG] nomad.periodic: updated periodic job %q", job.ID) 212 } else { 213 if err := p.heap.Push(job, next); err != nil { 214 return fmt.Errorf("failed to add job %v: %v", job.ID, err) 215 } 216 p.logger.Printf("[DEBUG] nomad.periodic: registered periodic job %q", job.ID) 217 } 218 219 // Signal an update. 220 if p.running { 221 select { 222 case p.updateCh <- struct{}{}: 223 default: 224 } 225 } 226 227 return nil 228 } 229 230 // Remove stops tracking the passed job. If the job is not tracked, it is a 231 // no-op. 232 func (p *PeriodicDispatch) Remove(jobID string) error { 233 p.l.Lock() 234 defer p.l.Unlock() 235 return p.removeLocked(jobID) 236 } 237 238 // Remove stops tracking the passed job. If the job is not tracked, it is a 239 // no-op. It assumes this is called while a lock is held. 240 func (p *PeriodicDispatch) removeLocked(jobID string) error { 241 // Do nothing if not enabled 242 if !p.enabled { 243 return nil 244 } 245 246 job, tracked := p.tracked[jobID] 247 if !tracked { 248 return nil 249 } 250 251 delete(p.tracked, jobID) 252 if err := p.heap.Remove(job); err != nil { 253 return fmt.Errorf("failed to remove tracked job %v: %v", jobID, err) 254 } 255 256 // Signal an update. 257 if p.running { 258 select { 259 case p.updateCh <- struct{}{}: 260 default: 261 } 262 } 263 264 p.logger.Printf("[DEBUG] nomad.periodic: deregistered periodic job %q", jobID) 265 return nil 266 } 267 268 // ForceRun causes the periodic job to be evaluated immediately and returns the 269 // subsequent eval. 270 func (p *PeriodicDispatch) ForceRun(jobID string) (*structs.Evaluation, error) { 271 p.l.Lock() 272 273 // Do nothing if not enabled 274 if !p.enabled { 275 return nil, fmt.Errorf("periodic dispatch disabled") 276 } 277 278 job, tracked := p.tracked[jobID] 279 if !tracked { 280 return nil, fmt.Errorf("can't force run non-tracked job %v", jobID) 281 } 282 283 p.l.Unlock() 284 return p.createEval(job, time.Now()) 285 } 286 287 // shouldRun returns whether the long lived run function should run. 288 func (p *PeriodicDispatch) shouldRun() bool { 289 p.l.RLock() 290 defer p.l.RUnlock() 291 return p.enabled && p.running 292 } 293 294 // run is a long-lived function that waits till a job's periodic spec is met and 295 // then creates an evaluation to run the job. 296 func (p *PeriodicDispatch) run() { 297 defer close(p.waitCh) 298 var launchCh <-chan time.Time 299 for p.shouldRun() { 300 job, launch := p.nextLaunch() 301 if launch.IsZero() { 302 launchCh = nil 303 } else { 304 launchDur := launch.Sub(time.Now()) 305 launchCh = time.After(launchDur) 306 p.logger.Printf("[DEBUG] nomad.periodic: launching job %q in %s", job.ID, launchDur) 307 } 308 309 select { 310 case <-p.stopCh: 311 return 312 case <-p.updateCh: 313 continue 314 case <-launchCh: 315 p.dispatch(job, launch) 316 } 317 } 318 } 319 320 // dispatch creates an evaluation for the job and updates its next launchtime 321 // based on the passed launch time. 322 func (p *PeriodicDispatch) dispatch(job *structs.Job, launchTime time.Time) { 323 p.l.Lock() 324 325 nextLaunch := job.Periodic.Next(launchTime) 326 if err := p.heap.Update(job, nextLaunch); err != nil { 327 p.logger.Printf("[ERR] nomad.periodic: failed to update next launch of periodic job %q: %v", job.ID, err) 328 } 329 330 // If the job prohibits overlapping and there are running children, we skip 331 // the launch. 332 if job.Periodic.ProhibitOverlap { 333 running, err := p.dispatcher.RunningChildren(job) 334 if err != nil { 335 msg := fmt.Sprintf("[ERR] nomad.periodic: failed to determine if"+ 336 " periodic job %q has running children: %v", job.ID, err) 337 p.logger.Println(msg) 338 p.l.Unlock() 339 return 340 } 341 342 if running { 343 msg := fmt.Sprintf("[DEBUG] nomad.periodic: skipping launch of"+ 344 " periodic job %q because job prohibits overlap", job.ID) 345 p.logger.Println(msg) 346 p.l.Unlock() 347 return 348 } 349 } 350 351 p.logger.Printf("[DEBUG] nomad.periodic: launching job %v at %v", job.ID, launchTime) 352 p.l.Unlock() 353 p.createEval(job, launchTime) 354 } 355 356 // nextLaunch returns the next job to launch and when it should be launched. If 357 // the next job can't be determined, an error is returned. If the dispatcher is 358 // stopped, a nil job will be returned. 359 func (p *PeriodicDispatch) nextLaunch() (*structs.Job, time.Time) { 360 // If there is nothing wait for an update. 361 p.l.RLock() 362 defer p.l.RUnlock() 363 if p.heap.Length() == 0 { 364 return nil, time.Time{} 365 } 366 367 nextJob := p.heap.Peek() 368 if nextJob == nil { 369 return nil, time.Time{} 370 } 371 372 return nextJob.job, nextJob.next 373 } 374 375 // createEval instantiates a job based on the passed periodic job and submits an 376 // evaluation for it. This should not be called with the lock held. 377 func (p *PeriodicDispatch) createEval(periodicJob *structs.Job, time time.Time) (*structs.Evaluation, error) { 378 derived, err := p.deriveJob(periodicJob, time) 379 if err != nil { 380 return nil, err 381 } 382 383 eval, err := p.dispatcher.DispatchJob(derived) 384 if err != nil { 385 p.logger.Printf("[ERR] nomad.periodic: failed to dispatch job %q: %v", periodicJob.ID, err) 386 return nil, err 387 } 388 389 return eval, nil 390 } 391 392 // deriveJob instantiates a new job based on the passed periodic job and the 393 // launch time. 394 func (p *PeriodicDispatch) deriveJob(periodicJob *structs.Job, time time.Time) ( 395 derived *structs.Job, err error) { 396 397 // Have to recover in case the job copy panics. 398 defer func() { 399 if r := recover(); r != nil { 400 p.logger.Printf("[ERR] nomad.periodic: deriving job from"+ 401 " periodic job %v failed; deregistering from periodic runner: %v", 402 periodicJob.ID, r) 403 p.Remove(periodicJob.ID) 404 derived = nil 405 err = fmt.Errorf("Failed to create a copy of the periodic job %v: %v", periodicJob.ID, r) 406 } 407 }() 408 409 // Create a copy of the periodic job, give it a derived ID/Name and make it 410 // non-periodic. 411 derived = periodicJob.Copy() 412 derived.ParentID = periodicJob.ID 413 derived.ID = p.derivedJobID(periodicJob, time) 414 derived.Name = derived.ID 415 derived.Periodic = nil 416 derived.GC = true 417 return 418 } 419 420 // deriveJobID returns a job ID based on the parent periodic job and the launch 421 // time. 422 func (p *PeriodicDispatch) derivedJobID(periodicJob *structs.Job, time time.Time) string { 423 return fmt.Sprintf("%s%s%d", periodicJob.ID, structs.PeriodicLaunchSuffix, time.Unix()) 424 } 425 426 // LaunchTime returns the launch time of the job. This is only valid for 427 // jobs created by PeriodicDispatch and will otherwise return an error. 428 func (p *PeriodicDispatch) LaunchTime(jobID string) (time.Time, error) { 429 index := strings.LastIndex(jobID, structs.PeriodicLaunchSuffix) 430 if index == -1 { 431 return time.Time{}, fmt.Errorf("couldn't parse launch time from eval: %v", jobID) 432 } 433 434 launch, err := strconv.Atoi(jobID[index+len(structs.PeriodicLaunchSuffix):]) 435 if err != nil { 436 return time.Time{}, fmt.Errorf("couldn't parse launch time from eval: %v", jobID) 437 } 438 439 return time.Unix(int64(launch), 0), nil 440 } 441 442 // Flush clears the state of the PeriodicDispatcher 443 func (p *PeriodicDispatch) Flush() { 444 p.l.Lock() 445 defer p.l.Unlock() 446 p.stopCh = make(chan struct{}) 447 p.updateCh = make(chan struct{}, 1) 448 p.waitCh = make(chan struct{}) 449 p.tracked = make(map[string]*structs.Job) 450 p.heap = NewPeriodicHeap() 451 } 452 453 // periodicHeap wraps a heap and gives operations other than Push/Pop. 454 type periodicHeap struct { 455 index map[string]*periodicJob 456 heap periodicHeapImp 457 } 458 459 type periodicJob struct { 460 job *structs.Job 461 next time.Time 462 index int 463 } 464 465 func NewPeriodicHeap() *periodicHeap { 466 return &periodicHeap{ 467 index: make(map[string]*periodicJob), 468 heap: make(periodicHeapImp, 0), 469 } 470 } 471 472 func (p *periodicHeap) Push(job *structs.Job, next time.Time) error { 473 if _, ok := p.index[job.ID]; ok { 474 return fmt.Errorf("job %v already exists", job.ID) 475 } 476 477 pJob := &periodicJob{job, next, 0} 478 p.index[job.ID] = pJob 479 heap.Push(&p.heap, pJob) 480 return nil 481 } 482 483 func (p *periodicHeap) Pop() *periodicJob { 484 if len(p.heap) == 0 { 485 return nil 486 } 487 488 pJob := heap.Pop(&p.heap).(*periodicJob) 489 delete(p.index, pJob.job.ID) 490 return pJob 491 } 492 493 func (p *periodicHeap) Peek() *periodicJob { 494 if len(p.heap) == 0 { 495 return nil 496 } 497 498 return p.heap[0] 499 } 500 501 func (p *periodicHeap) Contains(job *structs.Job) bool { 502 _, ok := p.index[job.ID] 503 return ok 504 } 505 506 func (p *periodicHeap) Update(job *structs.Job, next time.Time) error { 507 if pJob, ok := p.index[job.ID]; ok { 508 // Need to update the job as well because its spec can change. 509 pJob.job = job 510 pJob.next = next 511 heap.Fix(&p.heap, pJob.index) 512 return nil 513 } 514 515 return fmt.Errorf("heap doesn't contain job %v", job.ID) 516 } 517 518 func (p *periodicHeap) Remove(job *structs.Job) error { 519 if pJob, ok := p.index[job.ID]; ok { 520 heap.Remove(&p.heap, pJob.index) 521 delete(p.index, job.ID) 522 return nil 523 } 524 525 return fmt.Errorf("heap doesn't contain job %v", job.ID) 526 } 527 528 func (p *periodicHeap) Length() int { 529 return len(p.heap) 530 } 531 532 type periodicHeapImp []*periodicJob 533 534 func (h periodicHeapImp) Len() int { return len(h) } 535 536 func (h periodicHeapImp) Less(i, j int) bool { 537 // Two zero times should return false. 538 // Otherwise, zero is "greater" than any other time. 539 // (To sort it at the end of the list.) 540 // Sort such that zero times are at the end of the list. 541 iZero, jZero := h[i].next.IsZero(), h[j].next.IsZero() 542 if iZero && jZero { 543 return false 544 } else if iZero { 545 return false 546 } else if jZero { 547 return true 548 } 549 550 return h[i].next.Before(h[j].next) 551 } 552 553 func (h periodicHeapImp) Swap(i, j int) { 554 h[i], h[j] = h[j], h[i] 555 h[i].index = i 556 h[j].index = j 557 } 558 559 func (h *periodicHeapImp) Push(x interface{}) { 560 n := len(*h) 561 job := x.(*periodicJob) 562 job.index = n 563 *h = append(*h, job) 564 } 565 566 func (h *periodicHeapImp) Pop() interface{} { 567 old := *h 568 n := len(old) 569 job := old[n-1] 570 job.index = -1 // for safety 571 *h = old[0 : n-1] 572 return job 573 }