github.com/hhrutter/nomad@v0.6.0-rc2.0.20170723054333-80c4b03f0705/nomad/periodic.go (about) 1 package nomad 2 3 import ( 4 "container/heap" 5 "fmt" 6 "log" 7 "strconv" 8 "strings" 9 "sync" 10 "time" 11 12 memdb "github.com/hashicorp/go-memdb" 13 "github.com/hashicorp/nomad/nomad/structs" 14 ) 15 16 // PeriodicDispatch is used to track and launch periodic jobs. It maintains the 17 // set of periodic jobs and creates derived jobs and evaluations per 18 // instantiation which is determined by the periodic spec. 19 type PeriodicDispatch struct { 20 dispatcher JobEvalDispatcher 21 enabled bool 22 running bool 23 24 tracked map[string]*structs.Job 25 heap *periodicHeap 26 27 updateCh chan struct{} 28 stopCh chan struct{} 29 waitCh chan struct{} 30 logger *log.Logger 31 l sync.RWMutex 32 } 33 34 // JobEvalDispatcher is an interface to submit jobs and have evaluations created 35 // for them. 36 type JobEvalDispatcher interface { 37 // DispatchJob takes a job a new, untracked job and creates an evaluation 38 // for it and returns the eval. 39 DispatchJob(job *structs.Job) (*structs.Evaluation, error) 40 41 // RunningChildren returns whether the passed job has any running children. 42 RunningChildren(job *structs.Job) (bool, error) 43 } 44 45 // DispatchJob creates an evaluation for the passed job and commits both the 46 // evaluation and the job to the raft log. It returns the eval. 47 func (s *Server) DispatchJob(job *structs.Job) (*structs.Evaluation, error) { 48 // Commit this update via Raft 49 job.SetSubmitTime() 50 req := structs.JobRegisterRequest{Job: job} 51 _, index, err := s.raftApply(structs.JobRegisterRequestType, req) 52 if err != nil { 53 return nil, err 54 } 55 56 // Create a new evaluation 57 eval := &structs.Evaluation{ 58 ID: structs.GenerateUUID(), 59 Priority: job.Priority, 60 Type: job.Type, 61 TriggeredBy: structs.EvalTriggerPeriodicJob, 62 JobID: job.ID, 63 JobModifyIndex: index, 64 Status: structs.EvalStatusPending, 65 } 66 update := &structs.EvalUpdateRequest{ 67 Evals: []*structs.Evaluation{eval}, 68 } 69 70 // Commit this evaluation via Raft 71 // XXX: There is a risk of partial failure where the JobRegister succeeds 72 // but that the EvalUpdate does not. 73 _, evalIndex, err := s.raftApply(structs.EvalUpdateRequestType, update) 74 if err != nil { 75 return nil, err 76 } 77 78 // Update its indexes. 79 eval.CreateIndex = evalIndex 80 eval.ModifyIndex = evalIndex 81 return eval, nil 82 } 83 84 // RunningChildren checks whether the passed job has any running children. 85 func (s *Server) RunningChildren(job *structs.Job) (bool, error) { 86 state, err := s.fsm.State().Snapshot() 87 if err != nil { 88 return false, err 89 } 90 91 ws := memdb.NewWatchSet() 92 prefix := fmt.Sprintf("%s%s", job.ID, structs.PeriodicLaunchSuffix) 93 iter, err := state.JobsByIDPrefix(ws, prefix) 94 if err != nil { 95 return false, err 96 } 97 98 var child *structs.Job 99 for i := iter.Next(); i != nil; i = iter.Next() { 100 child = i.(*structs.Job) 101 102 // Ensure the job is actually a child. 103 if child.ParentID != job.ID { 104 continue 105 } 106 107 // Get the childs evaluations. 108 evals, err := state.EvalsByJob(ws, child.ID) 109 if err != nil { 110 return false, err 111 } 112 113 // Check if any of the evals are active or have running allocations. 114 for _, eval := range evals { 115 if !eval.TerminalStatus() { 116 return true, nil 117 } 118 119 allocs, err := state.AllocsByEval(ws, eval.ID) 120 if err != nil { 121 return false, err 122 } 123 124 for _, alloc := range allocs { 125 if !alloc.TerminalStatus() { 126 return true, nil 127 } 128 } 129 } 130 } 131 132 // There are no evals or allocations that aren't terminal. 133 return false, nil 134 } 135 136 // NewPeriodicDispatch returns a periodic dispatcher that is used to track and 137 // launch periodic jobs. 138 func NewPeriodicDispatch(logger *log.Logger, dispatcher JobEvalDispatcher) *PeriodicDispatch { 139 return &PeriodicDispatch{ 140 dispatcher: dispatcher, 141 tracked: make(map[string]*structs.Job), 142 heap: NewPeriodicHeap(), 143 updateCh: make(chan struct{}, 1), 144 stopCh: make(chan struct{}), 145 waitCh: make(chan struct{}), 146 logger: logger, 147 } 148 } 149 150 // SetEnabled is used to control if the periodic dispatcher is enabled. It 151 // should only be enabled on the active leader. Disabling an active dispatcher 152 // will stop any launched go routine and flush the dispatcher. 153 func (p *PeriodicDispatch) SetEnabled(enabled bool) { 154 p.l.Lock() 155 p.enabled = enabled 156 p.l.Unlock() 157 if !enabled { 158 if p.running { 159 close(p.stopCh) 160 <-p.waitCh 161 p.running = false 162 } 163 p.Flush() 164 } 165 } 166 167 // Start begins the goroutine that creates derived jobs and evals. 168 func (p *PeriodicDispatch) Start() { 169 p.l.Lock() 170 p.running = true 171 p.l.Unlock() 172 go p.run() 173 } 174 175 // Tracked returns the set of tracked job IDs. 176 func (p *PeriodicDispatch) Tracked() []*structs.Job { 177 p.l.RLock() 178 defer p.l.RUnlock() 179 tracked := make([]*structs.Job, len(p.tracked)) 180 i := 0 181 for _, job := range p.tracked { 182 tracked[i] = job 183 i++ 184 } 185 return tracked 186 } 187 188 // Add begins tracking of a periodic job. If it is already tracked, it acts as 189 // an update to the jobs periodic spec. 190 func (p *PeriodicDispatch) Add(job *structs.Job) error { 191 p.l.Lock() 192 defer p.l.Unlock() 193 194 // Do nothing if not enabled 195 if !p.enabled { 196 return nil 197 } 198 199 // If we were tracking a job and it has been disabled or made non-periodic remove it. 200 disabled := !job.IsPeriodic() || !job.Periodic.Enabled 201 _, tracked := p.tracked[job.ID] 202 if disabled { 203 if tracked { 204 p.removeLocked(job.ID) 205 } 206 207 // If the job is disabled and we aren't tracking it, do nothing. 208 return nil 209 } 210 211 // Check if the job is also a parameterized job. If it is, then we do not want to 212 // treat it as a periodic job but only its dispatched children. 213 if job.IsParameterized() { 214 return nil 215 } 216 217 // Add or update the job. 218 p.tracked[job.ID] = job 219 next := job.Periodic.Next(time.Now().In(job.Periodic.GetLocation())) 220 if tracked { 221 if err := p.heap.Update(job, next); err != nil { 222 return fmt.Errorf("failed to update job %v launch time: %v", job.ID, err) 223 } 224 p.logger.Printf("[DEBUG] nomad.periodic: updated periodic job %q", job.ID) 225 } else { 226 if err := p.heap.Push(job, next); err != nil { 227 return fmt.Errorf("failed to add job %v: %v", job.ID, err) 228 } 229 p.logger.Printf("[DEBUG] nomad.periodic: registered periodic job %q", job.ID) 230 } 231 232 // Signal an update. 233 if p.running { 234 select { 235 case p.updateCh <- struct{}{}: 236 default: 237 } 238 } 239 240 return nil 241 } 242 243 // Remove stops tracking the passed job. If the job is not tracked, it is a 244 // no-op. 245 func (p *PeriodicDispatch) Remove(jobID string) error { 246 p.l.Lock() 247 defer p.l.Unlock() 248 return p.removeLocked(jobID) 249 } 250 251 // Remove stops tracking the passed job. If the job is not tracked, it is a 252 // no-op. It assumes this is called while a lock is held. 253 func (p *PeriodicDispatch) removeLocked(jobID string) error { 254 // Do nothing if not enabled 255 if !p.enabled { 256 return nil 257 } 258 259 job, tracked := p.tracked[jobID] 260 if !tracked { 261 return nil 262 } 263 264 delete(p.tracked, jobID) 265 if err := p.heap.Remove(job); err != nil { 266 return fmt.Errorf("failed to remove tracked job %v: %v", jobID, err) 267 } 268 269 // Signal an update. 270 if p.running { 271 select { 272 case p.updateCh <- struct{}{}: 273 default: 274 } 275 } 276 277 p.logger.Printf("[DEBUG] nomad.periodic: deregistered periodic job %q", jobID) 278 return nil 279 } 280 281 // ForceRun causes the periodic job to be evaluated immediately and returns the 282 // subsequent eval. 283 func (p *PeriodicDispatch) ForceRun(jobID string) (*structs.Evaluation, error) { 284 p.l.Lock() 285 286 // Do nothing if not enabled 287 if !p.enabled { 288 p.l.Unlock() 289 return nil, fmt.Errorf("periodic dispatch disabled") 290 } 291 292 job, tracked := p.tracked[jobID] 293 if !tracked { 294 p.l.Unlock() 295 return nil, fmt.Errorf("can't force run non-tracked job %v", jobID) 296 } 297 298 p.l.Unlock() 299 return p.createEval(job, time.Now().In(job.Periodic.GetLocation())) 300 } 301 302 // shouldRun returns whether the long lived run function should run. 303 func (p *PeriodicDispatch) shouldRun() bool { 304 p.l.RLock() 305 defer p.l.RUnlock() 306 return p.enabled && p.running 307 } 308 309 // run is a long-lived function that waits till a job's periodic spec is met and 310 // then creates an evaluation to run the job. 311 func (p *PeriodicDispatch) run() { 312 defer close(p.waitCh) 313 var launchCh <-chan time.Time 314 for p.shouldRun() { 315 job, launch := p.nextLaunch() 316 if launch.IsZero() { 317 launchCh = nil 318 } else { 319 launchDur := launch.Sub(time.Now().In(job.Periodic.GetLocation())) 320 launchCh = time.After(launchDur) 321 p.logger.Printf("[DEBUG] nomad.periodic: launching job %q in %s", job.ID, launchDur) 322 } 323 324 select { 325 case <-p.stopCh: 326 return 327 case <-p.updateCh: 328 continue 329 case <-launchCh: 330 p.dispatch(job, launch) 331 } 332 } 333 } 334 335 // dispatch creates an evaluation for the job and updates its next launchtime 336 // based on the passed launch time. 337 func (p *PeriodicDispatch) dispatch(job *structs.Job, launchTime time.Time) { 338 p.l.Lock() 339 340 nextLaunch := job.Periodic.Next(launchTime) 341 if err := p.heap.Update(job, nextLaunch); err != nil { 342 p.logger.Printf("[ERR] nomad.periodic: failed to update next launch of periodic job %q: %v", job.ID, err) 343 } 344 345 // If the job prohibits overlapping and there are running children, we skip 346 // the launch. 347 if job.Periodic.ProhibitOverlap { 348 running, err := p.dispatcher.RunningChildren(job) 349 if err != nil { 350 msg := fmt.Sprintf("[ERR] nomad.periodic: failed to determine if"+ 351 " periodic job %q has running children: %v", job.ID, err) 352 p.logger.Println(msg) 353 p.l.Unlock() 354 return 355 } 356 357 if running { 358 msg := fmt.Sprintf("[DEBUG] nomad.periodic: skipping launch of"+ 359 " periodic job %q because job prohibits overlap", job.ID) 360 p.logger.Println(msg) 361 p.l.Unlock() 362 return 363 } 364 } 365 366 p.logger.Printf("[DEBUG] nomad.periodic: launching job %v at %v", job.ID, launchTime) 367 p.l.Unlock() 368 p.createEval(job, launchTime) 369 } 370 371 // nextLaunch returns the next job to launch and when it should be launched. If 372 // the next job can't be determined, an error is returned. If the dispatcher is 373 // stopped, a nil job will be returned. 374 func (p *PeriodicDispatch) nextLaunch() (*structs.Job, time.Time) { 375 // If there is nothing wait for an update. 376 p.l.RLock() 377 defer p.l.RUnlock() 378 if p.heap.Length() == 0 { 379 return nil, time.Time{} 380 } 381 382 nextJob := p.heap.Peek() 383 if nextJob == nil { 384 return nil, time.Time{} 385 } 386 387 return nextJob.job, nextJob.next 388 } 389 390 // createEval instantiates a job based on the passed periodic job and submits an 391 // evaluation for it. This should not be called with the lock held. 392 func (p *PeriodicDispatch) createEval(periodicJob *structs.Job, time time.Time) (*structs.Evaluation, error) { 393 derived, err := p.deriveJob(periodicJob, time) 394 if err != nil { 395 return nil, err 396 } 397 398 eval, err := p.dispatcher.DispatchJob(derived) 399 if err != nil { 400 p.logger.Printf("[ERR] nomad.periodic: failed to dispatch job %q: %v", periodicJob.ID, err) 401 return nil, err 402 } 403 404 return eval, nil 405 } 406 407 // deriveJob instantiates a new job based on the passed periodic job and the 408 // launch time. 409 func (p *PeriodicDispatch) deriveJob(periodicJob *structs.Job, time time.Time) ( 410 derived *structs.Job, err error) { 411 412 // Have to recover in case the job copy panics. 413 defer func() { 414 if r := recover(); r != nil { 415 p.logger.Printf("[ERR] nomad.periodic: deriving job from"+ 416 " periodic job %v failed; deregistering from periodic runner: %v", 417 periodicJob.ID, r) 418 p.Remove(periodicJob.ID) 419 derived = nil 420 err = fmt.Errorf("Failed to create a copy of the periodic job %v: %v", periodicJob.ID, r) 421 } 422 }() 423 424 // Create a copy of the periodic job, give it a derived ID/Name and make it 425 // non-periodic. 426 derived = periodicJob.Copy() 427 derived.ParentID = periodicJob.ID 428 derived.ID = p.derivedJobID(periodicJob, time) 429 derived.Name = derived.ID 430 derived.Periodic = nil 431 return 432 } 433 434 // deriveJobID returns a job ID based on the parent periodic job and the launch 435 // time. 436 func (p *PeriodicDispatch) derivedJobID(periodicJob *structs.Job, time time.Time) string { 437 return fmt.Sprintf("%s%s%d", periodicJob.ID, structs.PeriodicLaunchSuffix, time.Unix()) 438 } 439 440 // LaunchTime returns the launch time of the job. This is only valid for 441 // jobs created by PeriodicDispatch and will otherwise return an error. 442 func (p *PeriodicDispatch) LaunchTime(jobID string) (time.Time, error) { 443 index := strings.LastIndex(jobID, structs.PeriodicLaunchSuffix) 444 if index == -1 { 445 return time.Time{}, fmt.Errorf("couldn't parse launch time from eval: %v", jobID) 446 } 447 448 launch, err := strconv.Atoi(jobID[index+len(structs.PeriodicLaunchSuffix):]) 449 if err != nil { 450 return time.Time{}, fmt.Errorf("couldn't parse launch time from eval: %v", jobID) 451 } 452 453 return time.Unix(int64(launch), 0), nil 454 } 455 456 // Flush clears the state of the PeriodicDispatcher 457 func (p *PeriodicDispatch) Flush() { 458 p.l.Lock() 459 defer p.l.Unlock() 460 p.stopCh = make(chan struct{}) 461 p.updateCh = make(chan struct{}, 1) 462 p.waitCh = make(chan struct{}) 463 p.tracked = make(map[string]*structs.Job) 464 p.heap = NewPeriodicHeap() 465 } 466 467 // periodicHeap wraps a heap and gives operations other than Push/Pop. 468 type periodicHeap struct { 469 index map[string]*periodicJob 470 heap periodicHeapImp 471 } 472 473 type periodicJob struct { 474 job *structs.Job 475 next time.Time 476 index int 477 } 478 479 func NewPeriodicHeap() *periodicHeap { 480 return &periodicHeap{ 481 index: make(map[string]*periodicJob), 482 heap: make(periodicHeapImp, 0), 483 } 484 } 485 486 func (p *periodicHeap) Push(job *structs.Job, next time.Time) error { 487 if _, ok := p.index[job.ID]; ok { 488 return fmt.Errorf("job %v already exists", job.ID) 489 } 490 491 pJob := &periodicJob{job, next, 0} 492 p.index[job.ID] = pJob 493 heap.Push(&p.heap, pJob) 494 return nil 495 } 496 497 func (p *periodicHeap) Pop() *periodicJob { 498 if len(p.heap) == 0 { 499 return nil 500 } 501 502 pJob := heap.Pop(&p.heap).(*periodicJob) 503 delete(p.index, pJob.job.ID) 504 return pJob 505 } 506 507 func (p *periodicHeap) Peek() *periodicJob { 508 if len(p.heap) == 0 { 509 return nil 510 } 511 512 return p.heap[0] 513 } 514 515 func (p *periodicHeap) Contains(job *structs.Job) bool { 516 _, ok := p.index[job.ID] 517 return ok 518 } 519 520 func (p *periodicHeap) Update(job *structs.Job, next time.Time) error { 521 if pJob, ok := p.index[job.ID]; ok { 522 // Need to update the job as well because its spec can change. 523 pJob.job = job 524 pJob.next = next 525 heap.Fix(&p.heap, pJob.index) 526 return nil 527 } 528 529 return fmt.Errorf("heap doesn't contain job %v", job.ID) 530 } 531 532 func (p *periodicHeap) Remove(job *structs.Job) error { 533 if pJob, ok := p.index[job.ID]; ok { 534 heap.Remove(&p.heap, pJob.index) 535 delete(p.index, job.ID) 536 return nil 537 } 538 539 return fmt.Errorf("heap doesn't contain job %v", job.ID) 540 } 541 542 func (p *periodicHeap) Length() int { 543 return len(p.heap) 544 } 545 546 type periodicHeapImp []*periodicJob 547 548 func (h periodicHeapImp) Len() int { return len(h) } 549 550 func (h periodicHeapImp) Less(i, j int) bool { 551 // Two zero times should return false. 552 // Otherwise, zero is "greater" than any other time. 553 // (To sort it at the end of the list.) 554 // Sort such that zero times are at the end of the list. 555 iZero, jZero := h[i].next.IsZero(), h[j].next.IsZero() 556 if iZero && jZero { 557 return false 558 } else if iZero { 559 return false 560 } else if jZero { 561 return true 562 } 563 564 return h[i].next.Before(h[j].next) 565 } 566 567 func (h periodicHeapImp) Swap(i, j int) { 568 h[i], h[j] = h[j], h[i] 569 h[i].index = i 570 h[j].index = j 571 } 572 573 func (h *periodicHeapImp) Push(x interface{}) { 574 n := len(*h) 575 job := x.(*periodicJob) 576 job.index = n 577 *h = append(*h, job) 578 } 579 580 func (h *periodicHeapImp) Pop() interface{} { 581 old := *h 582 n := len(old) 583 job := old[n-1] 584 job.index = -1 // for safety 585 *h = old[0 : n-1] 586 return job 587 }