github.com/diptanu/nomad@v0.5.7-0.20170516172507-d72e86cbe3d9/nomad/periodic.go (about) 1 package nomad 2 3 import ( 4 "container/heap" 5 "fmt" 6 "log" 7 "strconv" 8 "strings" 9 "sync" 10 "time" 11 12 memdb "github.com/hashicorp/go-memdb" 13 "github.com/hashicorp/nomad/nomad/structs" 14 ) 15 16 // PeriodicDispatch is used to track and launch periodic jobs. It maintains the 17 // set of periodic jobs and creates derived jobs and evaluations per 18 // instantiation which is determined by the periodic spec. 19 type PeriodicDispatch struct { 20 dispatcher JobEvalDispatcher 21 enabled bool 22 running bool 23 24 tracked map[string]*structs.Job 25 heap *periodicHeap 26 27 updateCh chan struct{} 28 stopCh chan struct{} 29 waitCh chan struct{} 30 logger *log.Logger 31 l sync.RWMutex 32 } 33 34 // JobEvalDispatcher is an interface to submit jobs and have evaluations created 35 // for them. 36 type JobEvalDispatcher interface { 37 // DispatchJob takes a job a new, untracked job and creates an evaluation 38 // for it and returns the eval. 39 DispatchJob(job *structs.Job) (*structs.Evaluation, error) 40 41 // RunningChildren returns whether the passed job has any running children. 42 RunningChildren(job *structs.Job) (bool, error) 43 } 44 45 // DispatchJob creates an evaluation for the passed job and commits both the 46 // evaluation and the job to the raft log. It returns the eval. 47 func (s *Server) DispatchJob(job *structs.Job) (*structs.Evaluation, error) { 48 // Commit this update via Raft 49 req := structs.JobRegisterRequest{Job: job} 50 _, index, err := s.raftApply(structs.JobRegisterRequestType, req) 51 if err != nil { 52 return nil, err 53 } 54 55 // Create a new evaluation 56 eval := &structs.Evaluation{ 57 ID: structs.GenerateUUID(), 58 Priority: job.Priority, 59 Type: job.Type, 60 TriggeredBy: structs.EvalTriggerPeriodicJob, 61 JobID: job.ID, 62 JobModifyIndex: index, 63 Status: structs.EvalStatusPending, 64 } 65 update := &structs.EvalUpdateRequest{ 66 Evals: []*structs.Evaluation{eval}, 67 } 68 69 // Commit this evaluation via Raft 70 // XXX: There is a risk of partial failure where the JobRegister succeeds 71 // but that the EvalUpdate does not. 72 _, evalIndex, err := s.raftApply(structs.EvalUpdateRequestType, update) 73 if err != nil { 74 return nil, err 75 } 76 77 // Update its indexes. 78 eval.CreateIndex = evalIndex 79 eval.ModifyIndex = evalIndex 80 return eval, nil 81 } 82 83 // RunningChildren checks whether the passed job has any running children. 84 func (s *Server) RunningChildren(job *structs.Job) (bool, error) { 85 state, err := s.fsm.State().Snapshot() 86 if err != nil { 87 return false, err 88 } 89 90 ws := memdb.NewWatchSet() 91 prefix := fmt.Sprintf("%s%s", job.ID, structs.PeriodicLaunchSuffix) 92 iter, err := state.JobsByIDPrefix(ws, prefix) 93 if err != nil { 94 return false, err 95 } 96 97 var child *structs.Job 98 for i := iter.Next(); i != nil; i = iter.Next() { 99 child = i.(*structs.Job) 100 101 // Ensure the job is actually a child. 102 if child.ParentID != job.ID { 103 continue 104 } 105 106 // Get the childs evaluations. 107 evals, err := state.EvalsByJob(ws, child.ID) 108 if err != nil { 109 return false, err 110 } 111 112 // Check if any of the evals are active or have running allocations. 113 for _, eval := range evals { 114 if !eval.TerminalStatus() { 115 return true, nil 116 } 117 118 allocs, err := state.AllocsByEval(ws, eval.ID) 119 if err != nil { 120 return false, err 121 } 122 123 for _, alloc := range allocs { 124 if !alloc.TerminalStatus() { 125 return true, nil 126 } 127 } 128 } 129 } 130 131 // There are no evals or allocations that aren't terminal. 132 return false, nil 133 } 134 135 // NewPeriodicDispatch returns a periodic dispatcher that is used to track and 136 // launch periodic jobs. 137 func NewPeriodicDispatch(logger *log.Logger, dispatcher JobEvalDispatcher) *PeriodicDispatch { 138 return &PeriodicDispatch{ 139 dispatcher: dispatcher, 140 tracked: make(map[string]*structs.Job), 141 heap: NewPeriodicHeap(), 142 updateCh: make(chan struct{}, 1), 143 stopCh: make(chan struct{}), 144 waitCh: make(chan struct{}), 145 logger: logger, 146 } 147 } 148 149 // SetEnabled is used to control if the periodic dispatcher is enabled. It 150 // should only be enabled on the active leader. Disabling an active dispatcher 151 // will stop any launched go routine and flush the dispatcher. 152 func (p *PeriodicDispatch) SetEnabled(enabled bool) { 153 p.l.Lock() 154 p.enabled = enabled 155 p.l.Unlock() 156 if !enabled { 157 if p.running { 158 close(p.stopCh) 159 <-p.waitCh 160 p.running = false 161 } 162 p.Flush() 163 } 164 } 165 166 // Start begins the goroutine that creates derived jobs and evals. 167 func (p *PeriodicDispatch) Start() { 168 p.l.Lock() 169 p.running = true 170 p.l.Unlock() 171 go p.run() 172 } 173 174 // Tracked returns the set of tracked job IDs. 175 func (p *PeriodicDispatch) Tracked() []*structs.Job { 176 p.l.RLock() 177 defer p.l.RUnlock() 178 tracked := make([]*structs.Job, len(p.tracked)) 179 i := 0 180 for _, job := range p.tracked { 181 tracked[i] = job 182 i++ 183 } 184 return tracked 185 } 186 187 // Add begins tracking of a periodic job. If it is already tracked, it acts as 188 // an update to the jobs periodic spec. 189 func (p *PeriodicDispatch) Add(job *structs.Job) error { 190 p.l.Lock() 191 defer p.l.Unlock() 192 193 // Do nothing if not enabled 194 if !p.enabled { 195 return nil 196 } 197 198 // If we were tracking a job and it has been disabled or made non-periodic remove it. 199 disabled := !job.IsPeriodic() || !job.Periodic.Enabled 200 _, tracked := p.tracked[job.ID] 201 if disabled { 202 if tracked { 203 p.removeLocked(job.ID) 204 } 205 206 // If the job is disabled and we aren't tracking it, do nothing. 207 return nil 208 } 209 210 // Check if the job is also a parameterized job. If it is, then we do not want to 211 // treat it as a periodic job but only its dispatched children. 212 if job.IsParameterized() { 213 return nil 214 } 215 216 // Add or update the job. 217 p.tracked[job.ID] = job 218 next := job.Periodic.Next(time.Now().In(job.Periodic.GetLocation())) 219 if tracked { 220 if err := p.heap.Update(job, next); err != nil { 221 return fmt.Errorf("failed to update job %v launch time: %v", job.ID, err) 222 } 223 p.logger.Printf("[DEBUG] nomad.periodic: updated periodic job %q", job.ID) 224 } else { 225 if err := p.heap.Push(job, next); err != nil { 226 return fmt.Errorf("failed to add job %v: %v", job.ID, err) 227 } 228 p.logger.Printf("[DEBUG] nomad.periodic: registered periodic job %q", job.ID) 229 } 230 231 // Signal an update. 232 if p.running { 233 select { 234 case p.updateCh <- struct{}{}: 235 default: 236 } 237 } 238 239 return nil 240 } 241 242 // Remove stops tracking the passed job. If the job is not tracked, it is a 243 // no-op. 244 func (p *PeriodicDispatch) Remove(jobID string) error { 245 p.l.Lock() 246 defer p.l.Unlock() 247 return p.removeLocked(jobID) 248 } 249 250 // Remove stops tracking the passed job. If the job is not tracked, it is a 251 // no-op. It assumes this is called while a lock is held. 252 func (p *PeriodicDispatch) removeLocked(jobID string) error { 253 // Do nothing if not enabled 254 if !p.enabled { 255 return nil 256 } 257 258 job, tracked := p.tracked[jobID] 259 if !tracked { 260 return nil 261 } 262 263 delete(p.tracked, jobID) 264 if err := p.heap.Remove(job); err != nil { 265 return fmt.Errorf("failed to remove tracked job %v: %v", jobID, err) 266 } 267 268 // Signal an update. 269 if p.running { 270 select { 271 case p.updateCh <- struct{}{}: 272 default: 273 } 274 } 275 276 p.logger.Printf("[DEBUG] nomad.periodic: deregistered periodic job %q", jobID) 277 return nil 278 } 279 280 // ForceRun causes the periodic job to be evaluated immediately and returns the 281 // subsequent eval. 282 func (p *PeriodicDispatch) ForceRun(jobID string) (*structs.Evaluation, error) { 283 p.l.Lock() 284 285 // Do nothing if not enabled 286 if !p.enabled { 287 p.l.Unlock() 288 return nil, fmt.Errorf("periodic dispatch disabled") 289 } 290 291 job, tracked := p.tracked[jobID] 292 if !tracked { 293 p.l.Unlock() 294 return nil, fmt.Errorf("can't force run non-tracked job %v", jobID) 295 } 296 297 p.l.Unlock() 298 return p.createEval(job, time.Now().In(job.Periodic.GetLocation())) 299 } 300 301 // shouldRun returns whether the long lived run function should run. 302 func (p *PeriodicDispatch) shouldRun() bool { 303 p.l.RLock() 304 defer p.l.RUnlock() 305 return p.enabled && p.running 306 } 307 308 // run is a long-lived function that waits till a job's periodic spec is met and 309 // then creates an evaluation to run the job. 310 func (p *PeriodicDispatch) run() { 311 defer close(p.waitCh) 312 var launchCh <-chan time.Time 313 for p.shouldRun() { 314 job, launch := p.nextLaunch() 315 if launch.IsZero() { 316 launchCh = nil 317 } else { 318 launchDur := launch.Sub(time.Now().In(job.Periodic.GetLocation())) 319 launchCh = time.After(launchDur) 320 p.logger.Printf("[DEBUG] nomad.periodic: launching job %q in %s", job.ID, launchDur) 321 } 322 323 select { 324 case <-p.stopCh: 325 return 326 case <-p.updateCh: 327 continue 328 case <-launchCh: 329 p.dispatch(job, launch) 330 } 331 } 332 } 333 334 // dispatch creates an evaluation for the job and updates its next launchtime 335 // based on the passed launch time. 336 func (p *PeriodicDispatch) dispatch(job *structs.Job, launchTime time.Time) { 337 p.l.Lock() 338 339 nextLaunch := job.Periodic.Next(launchTime) 340 if err := p.heap.Update(job, nextLaunch); err != nil { 341 p.logger.Printf("[ERR] nomad.periodic: failed to update next launch of periodic job %q: %v", job.ID, err) 342 } 343 344 // If the job prohibits overlapping and there are running children, we skip 345 // the launch. 346 if job.Periodic.ProhibitOverlap { 347 running, err := p.dispatcher.RunningChildren(job) 348 if err != nil { 349 msg := fmt.Sprintf("[ERR] nomad.periodic: failed to determine if"+ 350 " periodic job %q has running children: %v", job.ID, err) 351 p.logger.Println(msg) 352 p.l.Unlock() 353 return 354 } 355 356 if running { 357 msg := fmt.Sprintf("[DEBUG] nomad.periodic: skipping launch of"+ 358 " periodic job %q because job prohibits overlap", job.ID) 359 p.logger.Println(msg) 360 p.l.Unlock() 361 return 362 } 363 } 364 365 p.logger.Printf("[DEBUG] nomad.periodic: launching job %v at %v", job.ID, launchTime) 366 p.l.Unlock() 367 p.createEval(job, launchTime) 368 } 369 370 // nextLaunch returns the next job to launch and when it should be launched. If 371 // the next job can't be determined, an error is returned. If the dispatcher is 372 // stopped, a nil job will be returned. 373 func (p *PeriodicDispatch) nextLaunch() (*structs.Job, time.Time) { 374 // If there is nothing wait for an update. 375 p.l.RLock() 376 defer p.l.RUnlock() 377 if p.heap.Length() == 0 { 378 return nil, time.Time{} 379 } 380 381 nextJob := p.heap.Peek() 382 if nextJob == nil { 383 return nil, time.Time{} 384 } 385 386 return nextJob.job, nextJob.next 387 } 388 389 // createEval instantiates a job based on the passed periodic job and submits an 390 // evaluation for it. This should not be called with the lock held. 391 func (p *PeriodicDispatch) createEval(periodicJob *structs.Job, time time.Time) (*structs.Evaluation, error) { 392 derived, err := p.deriveJob(periodicJob, time) 393 if err != nil { 394 return nil, err 395 } 396 397 eval, err := p.dispatcher.DispatchJob(derived) 398 if err != nil { 399 p.logger.Printf("[ERR] nomad.periodic: failed to dispatch job %q: %v", periodicJob.ID, err) 400 return nil, err 401 } 402 403 return eval, nil 404 } 405 406 // deriveJob instantiates a new job based on the passed periodic job and the 407 // launch time. 408 func (p *PeriodicDispatch) deriveJob(periodicJob *structs.Job, time time.Time) ( 409 derived *structs.Job, err error) { 410 411 // Have to recover in case the job copy panics. 412 defer func() { 413 if r := recover(); r != nil { 414 p.logger.Printf("[ERR] nomad.periodic: deriving job from"+ 415 " periodic job %v failed; deregistering from periodic runner: %v", 416 periodicJob.ID, r) 417 p.Remove(periodicJob.ID) 418 derived = nil 419 err = fmt.Errorf("Failed to create a copy of the periodic job %v: %v", periodicJob.ID, r) 420 } 421 }() 422 423 // Create a copy of the periodic job, give it a derived ID/Name and make it 424 // non-periodic. 425 derived = periodicJob.Copy() 426 derived.ParentID = periodicJob.ID 427 derived.ID = p.derivedJobID(periodicJob, time) 428 derived.Name = derived.ID 429 derived.Periodic = nil 430 return 431 } 432 433 // deriveJobID returns a job ID based on the parent periodic job and the launch 434 // time. 435 func (p *PeriodicDispatch) derivedJobID(periodicJob *structs.Job, time time.Time) string { 436 return fmt.Sprintf("%s%s%d", periodicJob.ID, structs.PeriodicLaunchSuffix, time.Unix()) 437 } 438 439 // LaunchTime returns the launch time of the job. This is only valid for 440 // jobs created by PeriodicDispatch and will otherwise return an error. 441 func (p *PeriodicDispatch) LaunchTime(jobID string) (time.Time, error) { 442 index := strings.LastIndex(jobID, structs.PeriodicLaunchSuffix) 443 if index == -1 { 444 return time.Time{}, fmt.Errorf("couldn't parse launch time from eval: %v", jobID) 445 } 446 447 launch, err := strconv.Atoi(jobID[index+len(structs.PeriodicLaunchSuffix):]) 448 if err != nil { 449 return time.Time{}, fmt.Errorf("couldn't parse launch time from eval: %v", jobID) 450 } 451 452 return time.Unix(int64(launch), 0), nil 453 } 454 455 // Flush clears the state of the PeriodicDispatcher 456 func (p *PeriodicDispatch) Flush() { 457 p.l.Lock() 458 defer p.l.Unlock() 459 p.stopCh = make(chan struct{}) 460 p.updateCh = make(chan struct{}, 1) 461 p.waitCh = make(chan struct{}) 462 p.tracked = make(map[string]*structs.Job) 463 p.heap = NewPeriodicHeap() 464 } 465 466 // periodicHeap wraps a heap and gives operations other than Push/Pop. 467 type periodicHeap struct { 468 index map[string]*periodicJob 469 heap periodicHeapImp 470 } 471 472 type periodicJob struct { 473 job *structs.Job 474 next time.Time 475 index int 476 } 477 478 func NewPeriodicHeap() *periodicHeap { 479 return &periodicHeap{ 480 index: make(map[string]*periodicJob), 481 heap: make(periodicHeapImp, 0), 482 } 483 } 484 485 func (p *periodicHeap) Push(job *structs.Job, next time.Time) error { 486 if _, ok := p.index[job.ID]; ok { 487 return fmt.Errorf("job %v already exists", job.ID) 488 } 489 490 pJob := &periodicJob{job, next, 0} 491 p.index[job.ID] = pJob 492 heap.Push(&p.heap, pJob) 493 return nil 494 } 495 496 func (p *periodicHeap) Pop() *periodicJob { 497 if len(p.heap) == 0 { 498 return nil 499 } 500 501 pJob := heap.Pop(&p.heap).(*periodicJob) 502 delete(p.index, pJob.job.ID) 503 return pJob 504 } 505 506 func (p *periodicHeap) Peek() *periodicJob { 507 if len(p.heap) == 0 { 508 return nil 509 } 510 511 return p.heap[0] 512 } 513 514 func (p *periodicHeap) Contains(job *structs.Job) bool { 515 _, ok := p.index[job.ID] 516 return ok 517 } 518 519 func (p *periodicHeap) Update(job *structs.Job, next time.Time) error { 520 if pJob, ok := p.index[job.ID]; ok { 521 // Need to update the job as well because its spec can change. 522 pJob.job = job 523 pJob.next = next 524 heap.Fix(&p.heap, pJob.index) 525 return nil 526 } 527 528 return fmt.Errorf("heap doesn't contain job %v", job.ID) 529 } 530 531 func (p *periodicHeap) Remove(job *structs.Job) error { 532 if pJob, ok := p.index[job.ID]; ok { 533 heap.Remove(&p.heap, pJob.index) 534 delete(p.index, job.ID) 535 return nil 536 } 537 538 return fmt.Errorf("heap doesn't contain job %v", job.ID) 539 } 540 541 func (p *periodicHeap) Length() int { 542 return len(p.heap) 543 } 544 545 type periodicHeapImp []*periodicJob 546 547 func (h periodicHeapImp) Len() int { return len(h) } 548 549 func (h periodicHeapImp) Less(i, j int) bool { 550 // Two zero times should return false. 551 // Otherwise, zero is "greater" than any other time. 552 // (To sort it at the end of the list.) 553 // Sort such that zero times are at the end of the list. 554 iZero, jZero := h[i].next.IsZero(), h[j].next.IsZero() 555 if iZero && jZero { 556 return false 557 } else if iZero { 558 return false 559 } else if jZero { 560 return true 561 } 562 563 return h[i].next.Before(h[j].next) 564 } 565 566 func (h periodicHeapImp) Swap(i, j int) { 567 h[i], h[j] = h[j], h[i] 568 h[i].index = i 569 h[j].index = j 570 } 571 572 func (h *periodicHeapImp) Push(x interface{}) { 573 n := len(*h) 574 job := x.(*periodicJob) 575 job.index = n 576 *h = append(*h, job) 577 } 578 579 func (h *periodicHeapImp) Pop() interface{} { 580 old := *h 581 n := len(old) 582 job := old[n-1] 583 job.index = -1 // for safety 584 *h = old[0 : n-1] 585 return job 586 }