github.com/zhizhiboom/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/client/allocrunner/alloc_runner.go (about) 1 package allocrunner 2 3 import ( 4 "context" 5 "fmt" 6 "log" 7 "path/filepath" 8 "sync" 9 "time" 10 11 metrics "github.com/armon/go-metrics" 12 "github.com/boltdb/bolt" 13 "github.com/hashicorp/go-multierror" 14 "github.com/hashicorp/nomad/client/allocdir" 15 "github.com/hashicorp/nomad/client/allocrunner/taskrunner" 16 "github.com/hashicorp/nomad/client/config" 17 consulApi "github.com/hashicorp/nomad/client/consul" 18 "github.com/hashicorp/nomad/client/state" 19 "github.com/hashicorp/nomad/client/vaultclient" 20 "github.com/hashicorp/nomad/helper" 21 "github.com/hashicorp/nomad/nomad/structs" 22 23 cstructs "github.com/hashicorp/nomad/client/structs" 24 ) 25 26 var ( 27 // The following are the key paths written to the state database 28 allocRunnerStateAllocKey = []byte("alloc") 29 allocRunnerStateImmutableKey = []byte("immutable") 30 allocRunnerStateMutableKey = []byte("mutable") 31 allocRunnerStateAllocDirKey = []byte("alloc-dir") 32 ) 33 34 // AllocStateUpdater is used to update the status of an allocation 35 type AllocStateUpdater func(alloc *structs.Allocation) 36 37 type AllocStatsReporter interface { 38 LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error) 39 } 40 41 // AllocRunner is used to wrap an allocation and provide the execution context. 42 type AllocRunner struct { 43 config *config.Config 44 updater AllocStateUpdater 45 logger *log.Logger 46 47 // allocID is the ID of this runner's allocation. Since it does not 48 // change for the lifetime of the AllocRunner it is safe to read 49 // without acquiring a lock (unlike alloc). 50 allocID string 51 52 alloc *structs.Allocation 53 allocClientStatus string // Explicit status of allocation. Set when there are failures 54 allocClientDescription string 55 allocHealth *bool // Whether the allocation is healthy 56 allocHealthTime time.Time // Time at which allocation health has been set 57 allocBroadcast *cstructs.AllocBroadcaster 58 allocLock sync.Mutex 59 60 dirtyCh chan struct{} 61 62 allocDir *allocdir.AllocDir 63 allocDirLock sync.Mutex 64 65 tasks map[string]*taskrunner.TaskRunner 66 taskStates map[string]*structs.TaskState 67 restored map[string]struct{} 68 taskLock sync.RWMutex 69 70 taskStatusLock sync.RWMutex 71 72 updateCh chan *structs.Allocation 73 74 vaultClient vaultclient.VaultClient 75 consulClient consulApi.ConsulServiceAPI 76 77 // prevAlloc allows for Waiting until a previous allocation exits and 78 // the migrates it data. If sticky volumes aren't used and there's no 79 // previous allocation a noop implementation is used so it always safe 80 // to call. 81 prevAlloc prevAllocWatcher 82 83 // ctx is cancelled with exitFn to cause the alloc to be destroyed 84 // (stopped and GC'd). 85 ctx context.Context 86 exitFn context.CancelFunc 87 88 // waitCh is closed when the Run method exits. At that point the alloc 89 // has stopped and been GC'd. 90 waitCh chan struct{} 91 92 // State related fields 93 // stateDB is used to store the alloc runners state 94 stateDB *bolt.DB 95 allocStateLock sync.Mutex 96 97 // persistedEval is the last persisted evaluation ID. Since evaluation 98 // IDs change on every allocation update we only need to persist the 99 // allocation when its eval ID != the last persisted eval ID. 100 persistedEvalLock sync.Mutex 101 persistedEval string 102 103 // immutablePersisted and allocDirPersisted are used to track whether the 104 // immutable data and the alloc dir have been persisted. Once persisted we 105 // can lower write volume by not re-writing these values 106 immutablePersisted bool 107 allocDirPersisted bool 108 109 // baseLabels are used when emitting tagged metrics. All alloc runner metrics 110 // will have these tags, and optionally more. 111 baseLabels []metrics.Label 112 } 113 114 // allocRunnerAllocState is state that only has to be written when the alloc 115 // changes. 116 type allocRunnerAllocState struct { 117 Alloc *structs.Allocation 118 } 119 120 // allocRunnerImmutableState is state that only has to be written once. 121 type allocRunnerImmutableState struct { 122 Version string 123 } 124 125 // allocRunnerMutableState is state that has to be written on each save as it 126 // changes over the life-cycle of the alloc_runner. 127 type allocRunnerMutableState struct { 128 AllocClientStatus string 129 AllocClientDescription string 130 TaskStates map[string]*structs.TaskState 131 DeploymentStatus *structs.AllocDeploymentStatus 132 } 133 134 // NewAllocRunner is used to create a new allocation context 135 func NewAllocRunner(logger *log.Logger, config *config.Config, stateDB *bolt.DB, updater AllocStateUpdater, 136 alloc *structs.Allocation, vaultClient vaultclient.VaultClient, consulClient consulApi.ConsulServiceAPI, 137 prevAlloc prevAllocWatcher) *AllocRunner { 138 139 ar := &AllocRunner{ 140 config: config, 141 stateDB: stateDB, 142 updater: updater, 143 logger: logger, 144 alloc: alloc, 145 allocID: alloc.ID, 146 allocBroadcast: cstructs.NewAllocBroadcaster(8), 147 prevAlloc: prevAlloc, 148 dirtyCh: make(chan struct{}, 1), 149 allocDir: allocdir.NewAllocDir(logger, filepath.Join(config.AllocDir, alloc.ID)), 150 tasks: make(map[string]*taskrunner.TaskRunner), 151 taskStates: copyTaskStates(alloc.TaskStates), 152 restored: make(map[string]struct{}), 153 updateCh: make(chan *structs.Allocation, 64), 154 waitCh: make(chan struct{}), 155 vaultClient: vaultClient, 156 consulClient: consulClient, 157 } 158 159 // TODO Should be passed a context 160 ar.ctx, ar.exitFn = context.WithCancel(context.TODO()) 161 162 return ar 163 } 164 165 // setBaseLabels creates the set of base labels. This should be called after 166 // Restore has been called so the allocation is guaranteed to be loaded 167 func (r *AllocRunner) setBaseLabels() { 168 r.baseLabels = make([]metrics.Label, 0, 3) 169 170 if r.alloc.Job != nil { 171 r.baseLabels = append(r.baseLabels, metrics.Label{ 172 Name: "job", 173 Value: r.alloc.Job.Name, 174 }) 175 } 176 if r.alloc.TaskGroup != "" { 177 r.baseLabels = append(r.baseLabels, metrics.Label{ 178 Name: "task_group", 179 Value: r.alloc.TaskGroup, 180 }) 181 } 182 if r.config != nil && r.config.Node != nil { 183 r.baseLabels = append(r.baseLabels, metrics.Label{ 184 Name: "node_id", 185 Value: r.config.Node.ID, 186 }) 187 } 188 } 189 190 // pre060StateFilePath returns the path to our state file that would have been 191 // written pre v0.6.0 192 // COMPAT: Remove in 0.7.0 193 func (r *AllocRunner) pre060StateFilePath() string { 194 r.allocLock.Lock() 195 defer r.allocLock.Unlock() 196 path := filepath.Join(r.config.StateDir, "alloc", r.allocID, "state.json") 197 return path 198 } 199 200 // RestoreState is used to restore the state of the alloc runner 201 func (r *AllocRunner) RestoreState() error { 202 err := r.stateDB.View(func(tx *bolt.Tx) error { 203 bkt, err := state.GetAllocationBucket(tx, r.allocID) 204 if err != nil { 205 return fmt.Errorf("failed to get allocation bucket: %v", err) 206 } 207 208 // Get the state objects 209 var mutable allocRunnerMutableState 210 var immutable allocRunnerImmutableState 211 var allocState allocRunnerAllocState 212 var allocDir allocdir.AllocDir 213 214 if err := state.GetObject(bkt, allocRunnerStateAllocKey, &allocState); err != nil { 215 return fmt.Errorf("failed to read alloc runner alloc state: %v", err) 216 } 217 if err := state.GetObject(bkt, allocRunnerStateImmutableKey, &immutable); err != nil { 218 return fmt.Errorf("failed to read alloc runner immutable state: %v", err) 219 } 220 if err := state.GetObject(bkt, allocRunnerStateMutableKey, &mutable); err != nil { 221 return fmt.Errorf("failed to read alloc runner mutable state: %v", err) 222 } 223 if err := state.GetObject(bkt, allocRunnerStateAllocDirKey, &allocDir); err != nil { 224 return fmt.Errorf("failed to read alloc runner alloc_dir state: %v", err) 225 } 226 227 // Populate the fields 228 r.alloc = allocState.Alloc 229 r.allocDir = &allocDir 230 r.allocClientStatus = mutable.AllocClientStatus 231 r.allocClientDescription = mutable.AllocClientDescription 232 r.taskStates = mutable.TaskStates 233 r.alloc.ClientStatus = getClientStatus(r.taskStates) 234 r.alloc.DeploymentStatus = mutable.DeploymentStatus 235 return nil 236 }) 237 238 if err != nil { 239 return fmt.Errorf("failed to read allocation state: %v", err) 240 } 241 242 var snapshotErrors multierror.Error 243 if r.alloc == nil { 244 snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil allocation")) 245 } 246 if r.allocDir == nil { 247 snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil alloc dir")) 248 } 249 if e := snapshotErrors.ErrorOrNil(); e != nil { 250 return e 251 } 252 253 tg := r.alloc.Job.LookupTaskGroup(r.alloc.TaskGroup) 254 if tg == nil { 255 return fmt.Errorf("restored allocation doesn't contain task group %q", r.alloc.TaskGroup) 256 } 257 258 // Restore the task runners 259 taskDestroyEvent := structs.NewTaskEvent(structs.TaskKilled) 260 var mErr multierror.Error 261 for _, task := range tg.Tasks { 262 name := task.Name 263 state := r.taskStates[name] 264 265 // Nomad exited before task could start, nothing to restore. 266 // AllocRunner.Run will start a new TaskRunner for this task 267 if state == nil { 268 continue 269 } 270 271 // Mark the task as restored. 272 r.restored[name] = struct{}{} 273 274 td, ok := r.allocDir.TaskDirs[name] 275 if !ok { 276 // Create the task dir metadata if it doesn't exist. 277 // Since task dirs are created during r.Run() the 278 // client may save state and exit before all task dirs 279 // are created 280 td = r.allocDir.NewTaskDir(name) 281 } 282 283 // Skip tasks in terminal states. 284 if state.State == structs.TaskStateDead { 285 continue 286 } 287 288 tr := taskrunner.NewTaskRunner(r.logger, r.config, r.stateDB, r.setTaskState, td, r.Alloc(), task, r.vaultClient, r.consulClient) 289 r.tasks[name] = tr 290 291 if restartReason, err := tr.RestoreState(); err != nil { 292 r.logger.Printf("[ERR] client: failed to restore state for alloc %s task %q: %v", r.allocID, name, err) 293 mErr.Errors = append(mErr.Errors, err) 294 } else if !r.alloc.TerminalStatus() { 295 // Only start if the alloc isn't in a terminal status. 296 go tr.Run() 297 298 // Restart task runner if RestoreState gave a reason 299 if restartReason != "" { 300 r.logger.Printf("[INFO] client: restarting alloc %s task %s: %v", r.allocID, name, restartReason) 301 const failure = false 302 tr.Restart("upgrade", restartReason, failure) 303 } 304 } else { 305 // XXX This does nothing and is broken since the task runner is not 306 // running yet, and there is nothing listening to the destroy ch. 307 // XXX When a single task is dead in the allocation we should kill 308 // all the task. This currently does NOT happen. Re-enable test: 309 // TestAllocRunner_TaskLeader_StopRestoredTG 310 tr.Destroy(taskDestroyEvent) 311 } 312 } 313 314 return mErr.ErrorOrNil() 315 } 316 317 // SaveState is used to snapshot the state of the alloc runner 318 // if the fullSync is marked as false only the state of the Alloc Runner 319 // is snapshotted. If fullSync is marked as true, we snapshot 320 // all the Task Runners associated with the Alloc 321 func (r *AllocRunner) SaveState() error { 322 if err := r.saveAllocRunnerState(); err != nil { 323 return err 324 } 325 326 // Save state for each task 327 runners := r.getTaskRunners() 328 var mErr multierror.Error 329 for _, tr := range runners { 330 if err := tr.SaveState(); err != nil { 331 mErr.Errors = append(mErr.Errors, fmt.Errorf("failed to save state for alloc %s task %q: %v", 332 r.allocID, tr.Name(), err)) 333 } 334 } 335 return mErr.ErrorOrNil() 336 } 337 338 func (r *AllocRunner) saveAllocRunnerState() error { 339 r.allocStateLock.Lock() 340 defer r.allocStateLock.Unlock() 341 342 if r.ctx.Err() == context.Canceled { 343 return nil 344 } 345 346 // Grab all the relevant data 347 alloc := r.Alloc() 348 349 r.allocLock.Lock() 350 allocClientStatus := r.allocClientStatus 351 allocClientDescription := r.allocClientDescription 352 r.allocLock.Unlock() 353 354 r.allocDirLock.Lock() 355 allocDir := r.allocDir.Copy() 356 r.allocDirLock.Unlock() 357 358 // Start the transaction. 359 return r.stateDB.Batch(func(tx *bolt.Tx) error { 360 361 // Grab the allocation bucket 362 allocBkt, err := state.GetAllocationBucket(tx, r.allocID) 363 if err != nil { 364 return fmt.Errorf("failed to retrieve allocation bucket: %v", err) 365 } 366 367 // Write the allocation if the eval has changed 368 r.persistedEvalLock.Lock() 369 lastPersisted := r.persistedEval 370 r.persistedEvalLock.Unlock() 371 if alloc.EvalID != lastPersisted { 372 allocState := &allocRunnerAllocState{ 373 Alloc: alloc, 374 } 375 376 if err := state.PutObject(allocBkt, allocRunnerStateAllocKey, &allocState); err != nil { 377 return fmt.Errorf("failed to write alloc_runner alloc state: %v", err) 378 } 379 380 tx.OnCommit(func() { 381 r.persistedEvalLock.Lock() 382 r.persistedEval = alloc.EvalID 383 r.persistedEvalLock.Unlock() 384 }) 385 } 386 387 // Write immutable data iff it hasn't been written yet 388 if !r.immutablePersisted { 389 immutable := &allocRunnerImmutableState{ 390 Version: r.config.Version.VersionNumber(), 391 } 392 393 if err := state.PutObject(allocBkt, allocRunnerStateImmutableKey, &immutable); err != nil { 394 return fmt.Errorf("failed to write alloc_runner immutable state: %v", err) 395 } 396 397 tx.OnCommit(func() { 398 r.immutablePersisted = true 399 }) 400 } 401 402 // Write the alloc dir data if it hasn't been written before and it exists. 403 if !r.allocDirPersisted && allocDir != nil { 404 if err := state.PutObject(allocBkt, allocRunnerStateAllocDirKey, allocDir); err != nil { 405 return fmt.Errorf("failed to write alloc_runner allocDir state: %v", err) 406 } 407 408 tx.OnCommit(func() { 409 r.allocDirPersisted = true 410 }) 411 } 412 413 // Write the mutable state every time 414 mutable := &allocRunnerMutableState{ 415 AllocClientStatus: allocClientStatus, 416 AllocClientDescription: allocClientDescription, 417 TaskStates: alloc.TaskStates, 418 DeploymentStatus: alloc.DeploymentStatus, 419 } 420 421 if err := state.PutObject(allocBkt, allocRunnerStateMutableKey, &mutable); err != nil { 422 return fmt.Errorf("failed to write alloc_runner mutable state: %v", err) 423 } 424 425 return nil 426 }) 427 } 428 429 // DestroyState is used to cleanup after ourselves 430 func (r *AllocRunner) DestroyState() error { 431 r.allocStateLock.Lock() 432 defer r.allocStateLock.Unlock() 433 434 return r.stateDB.Update(func(tx *bolt.Tx) error { 435 if err := state.DeleteAllocationBucket(tx, r.allocID); err != nil { 436 return fmt.Errorf("failed to delete allocation bucket: %v", err) 437 } 438 return nil 439 }) 440 } 441 442 // DestroyContext is used to destroy the context 443 func (r *AllocRunner) DestroyContext() error { 444 return r.allocDir.Destroy() 445 } 446 447 // GetAllocDir returns the alloc dir for the alloc runner 448 func (r *AllocRunner) GetAllocDir() *allocdir.AllocDir { 449 return r.allocDir 450 } 451 452 // GetListener returns a listener for updates broadcast by this alloc runner. 453 // Callers are responsible for calling Close on their Listener. 454 func (r *AllocRunner) GetListener() *cstructs.AllocListener { 455 return r.allocBroadcast.Listen() 456 } 457 458 // copyTaskStates returns a copy of the passed task states. 459 func copyTaskStates(states map[string]*structs.TaskState) map[string]*structs.TaskState { 460 copy := make(map[string]*structs.TaskState, len(states)) 461 for task, state := range states { 462 copy[task] = state.Copy() 463 } 464 return copy 465 } 466 467 // finalizeTerminalAlloc sets any missing required fields like 468 // finishedAt in the alloc runner's task States. finishedAt is used 469 // to calculate reschedule time for failed allocs, so we make sure that 470 // it is set 471 func (r *AllocRunner) finalizeTerminalAlloc(alloc *structs.Allocation) { 472 if !alloc.ClientTerminalStatus() { 473 return 474 } 475 r.taskStatusLock.Lock() 476 defer r.taskStatusLock.Unlock() 477 478 group := alloc.Job.LookupTaskGroup(alloc.TaskGroup) 479 if r.taskStates == nil { 480 r.taskStates = make(map[string]*structs.TaskState) 481 } 482 now := time.Now() 483 for _, task := range group.Tasks { 484 ts, ok := r.taskStates[task.Name] 485 if !ok { 486 ts = &structs.TaskState{} 487 r.taskStates[task.Name] = ts 488 } 489 if ts.FinishedAt.IsZero() { 490 ts.FinishedAt = now 491 } 492 } 493 alloc.TaskStates = copyTaskStates(r.taskStates) 494 } 495 496 // Alloc returns the associated allocation 497 func (r *AllocRunner) Alloc() *structs.Allocation { 498 r.allocLock.Lock() 499 500 // Don't do a deep copy of the job 501 alloc := r.alloc.CopySkipJob() 502 503 // The status has explicitly been set. 504 if r.allocClientStatus != "" || r.allocClientDescription != "" { 505 alloc.ClientStatus = r.allocClientStatus 506 alloc.ClientDescription = r.allocClientDescription 507 508 // Copy over the task states so we don't lose them 509 r.taskStatusLock.RLock() 510 alloc.TaskStates = copyTaskStates(r.taskStates) 511 r.taskStatusLock.RUnlock() 512 513 r.allocLock.Unlock() 514 r.finalizeTerminalAlloc(alloc) 515 return alloc 516 } 517 518 // The health has been set 519 if r.allocHealth != nil { 520 if alloc.DeploymentStatus == nil { 521 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{} 522 } 523 alloc.DeploymentStatus.Healthy = helper.BoolToPtr(*r.allocHealth) 524 alloc.DeploymentStatus.Timestamp = r.allocHealthTime 525 } 526 r.allocLock.Unlock() 527 528 // Scan the task states to determine the status of the alloc 529 r.taskStatusLock.RLock() 530 alloc.TaskStates = copyTaskStates(r.taskStates) 531 alloc.ClientStatus = getClientStatus(r.taskStates) 532 r.taskStatusLock.RUnlock() 533 534 // If the client status is failed and we are part of a deployment, mark the 535 // alloc as unhealthy. This guards against the watcher not be started. 536 r.allocLock.Lock() 537 if alloc.ClientStatus == structs.AllocClientStatusFailed && 538 alloc.DeploymentID != "" && !alloc.DeploymentStatus.IsUnhealthy() { 539 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 540 Healthy: helper.BoolToPtr(false), 541 } 542 } 543 r.allocLock.Unlock() 544 r.finalizeTerminalAlloc(alloc) 545 return alloc 546 } 547 548 // getClientStatus takes in the task states for a given allocation and computes 549 // the client status 550 func getClientStatus(taskStates map[string]*structs.TaskState) string { 551 var pending, running, dead, failed bool 552 for _, state := range taskStates { 553 switch state.State { 554 case structs.TaskStateRunning: 555 running = true 556 case structs.TaskStatePending: 557 pending = true 558 case structs.TaskStateDead: 559 if state.Failed { 560 failed = true 561 } else { 562 dead = true 563 } 564 } 565 } 566 567 // Determine the alloc status 568 if failed { 569 return structs.AllocClientStatusFailed 570 } else if running { 571 return structs.AllocClientStatusRunning 572 } else if pending { 573 return structs.AllocClientStatusPending 574 } else if dead { 575 return structs.AllocClientStatusComplete 576 } 577 578 return "" 579 } 580 581 // dirtySyncState is used to watch for state being marked dirty to sync 582 func (r *AllocRunner) dirtySyncState() { 583 for { 584 select { 585 case <-r.dirtyCh: 586 if err := r.syncStatus(); err != nil { 587 // Only WARN instead of ERR because we continue on 588 r.logger.Printf("[WARN] client: error persisting alloc %q state: %v", 589 r.allocID, err) 590 } 591 case <-r.ctx.Done(): 592 return 593 } 594 } 595 } 596 597 // syncStatus is used to run and sync the status when it changes 598 func (r *AllocRunner) syncStatus() error { 599 // Get a copy of our alloc, update status server side and sync to disk 600 alloc := r.Alloc() 601 r.updater(alloc) 602 r.sendBroadcast(alloc) 603 return r.saveAllocRunnerState() 604 } 605 606 // sendBroadcast broadcasts an alloc update. 607 func (r *AllocRunner) sendBroadcast(alloc *structs.Allocation) { 608 // Try to send the alloc up to three times with a delay to allow recovery. 609 sent := false 610 for i := 0; i < 3; i++ { 611 if sent = r.allocBroadcast.Send(alloc); sent { 612 break 613 } 614 time.Sleep(500 * time.Millisecond) 615 } 616 if !sent { 617 r.logger.Printf("[WARN] client: failed to broadcast update to allocation %q", r.allocID) 618 } 619 } 620 621 // setStatus is used to update the allocation status 622 func (r *AllocRunner) setStatus(status, desc string) { 623 r.allocLock.Lock() 624 r.allocClientStatus = status 625 r.allocClientDescription = desc 626 r.allocLock.Unlock() 627 select { 628 case r.dirtyCh <- struct{}{}: 629 default: 630 } 631 } 632 633 // setTaskState is used to set the status of a task. If lazySync is set then the 634 // event is appended but not synced with the server. If state is omitted, the 635 // last known state is used. 636 func (r *AllocRunner) setTaskState(taskName, state string, event *structs.TaskEvent, lazySync bool) { 637 r.taskStatusLock.Lock() 638 defer r.taskStatusLock.Unlock() 639 taskState, ok := r.taskStates[taskName] 640 if !ok { 641 taskState = &structs.TaskState{} 642 r.taskStates[taskName] = taskState 643 } 644 645 // Set the tasks state. 646 if event != nil { 647 if event.FailsTask { 648 taskState.Failed = true 649 } 650 if event.Type == structs.TaskRestarting { 651 if !r.config.DisableTaggedMetrics { 652 metrics.IncrCounterWithLabels([]string{"client", "allocs", "restart"}, 653 1, r.baseLabels) 654 } 655 if r.config.BackwardsCompatibleMetrics { 656 metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "restart"}, 1) 657 } 658 taskState.Restarts++ 659 taskState.LastRestart = time.Unix(0, event.Time) 660 } 661 r.appendTaskEvent(taskState, event) 662 } 663 664 if lazySync { 665 return 666 } 667 668 // If the state hasn't been set use the existing state. 669 if state == "" { 670 state = taskState.State 671 if taskState.State == "" { 672 state = structs.TaskStatePending 673 } 674 } 675 676 switch state { 677 case structs.TaskStateRunning: 678 // Capture the start time if it is just starting 679 if taskState.State != structs.TaskStateRunning { 680 taskState.StartedAt = time.Now().UTC() 681 if !r.config.DisableTaggedMetrics { 682 metrics.IncrCounterWithLabels([]string{"client", "allocs", "running"}, 683 1, r.baseLabels) 684 } 685 if r.config.BackwardsCompatibleMetrics { 686 metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "running"}, 1) 687 } 688 } 689 case structs.TaskStateDead: 690 // Capture the finished time if not already set 691 if taskState.FinishedAt.IsZero() { 692 taskState.FinishedAt = time.Now().UTC() 693 } 694 695 // Find all tasks that are not the one that is dead and check if the one 696 // that is dead is a leader 697 var otherTaskRunners []*taskrunner.TaskRunner 698 var otherTaskNames []string 699 leader := false 700 for task, tr := range r.tasks { 701 if task != taskName { 702 otherTaskRunners = append(otherTaskRunners, tr) 703 otherTaskNames = append(otherTaskNames, task) 704 } else if tr.IsLeader() { 705 leader = true 706 } 707 } 708 709 // Emitting metrics to indicate task complete and failures 710 if taskState.Failed { 711 if !r.config.DisableTaggedMetrics { 712 metrics.IncrCounterWithLabels([]string{"client", "allocs", "failed"}, 713 1, r.baseLabels) 714 } 715 if r.config.BackwardsCompatibleMetrics { 716 metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "failed"}, 1) 717 } 718 } else { 719 if !r.config.DisableTaggedMetrics { 720 metrics.IncrCounterWithLabels([]string{"client", "allocs", "complete"}, 721 1, r.baseLabels) 722 } 723 if r.config.BackwardsCompatibleMetrics { 724 metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "complete"}, 1) 725 } 726 } 727 728 // If the task failed, we should kill all the other tasks in the task group. 729 if taskState.Failed { 730 for _, tr := range otherTaskRunners { 731 tr.Destroy(structs.NewTaskEvent(structs.TaskSiblingFailed).SetFailedSibling(taskName)) 732 } 733 if len(otherTaskRunners) > 0 { 734 r.logger.Printf("[DEBUG] client: task %q failed, destroying other tasks in task group: %v", taskName, otherTaskNames) 735 } 736 } else if leader { 737 // If the task was a leader task we should kill all the other tasks. 738 for _, tr := range otherTaskRunners { 739 tr.Destroy(structs.NewTaskEvent(structs.TaskLeaderDead)) 740 } 741 if len(otherTaskRunners) > 0 { 742 r.logger.Printf("[DEBUG] client: leader task %q is dead, destroying other tasks in task group: %v", taskName, otherTaskNames) 743 } 744 } 745 } 746 747 // Store the new state 748 taskState.State = state 749 750 select { 751 case r.dirtyCh <- struct{}{}: 752 default: 753 } 754 } 755 756 // appendTaskEvent updates the task status by appending the new event. 757 func (r *AllocRunner) appendTaskEvent(state *structs.TaskState, event *structs.TaskEvent) { 758 capacity := 10 759 if state.Events == nil { 760 state.Events = make([]*structs.TaskEvent, 0, capacity) 761 } 762 763 // If we hit capacity, then shift it. 764 if len(state.Events) == capacity { 765 old := state.Events 766 state.Events = make([]*structs.TaskEvent, 0, capacity) 767 state.Events = append(state.Events, old[1:]...) 768 } 769 770 state.Events = append(state.Events, event) 771 } 772 773 // Run is a long running goroutine used to manage an allocation 774 func (r *AllocRunner) Run() { 775 defer close(r.waitCh) 776 r.setBaseLabels() 777 go r.dirtySyncState() 778 779 // Find the task group to run in the allocation 780 alloc := r.Alloc() 781 tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) 782 if tg == nil { 783 r.logger.Printf("[ERR] client: alloc %q for missing task group %q", r.allocID, alloc.TaskGroup) 784 r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("missing task group '%s'", alloc.TaskGroup)) 785 return 786 } 787 788 // Build allocation directory (idempotent) 789 r.allocDirLock.Lock() 790 err := r.allocDir.Build() 791 r.allocDirLock.Unlock() 792 793 if err != nil { 794 r.logger.Printf("[ERR] client: alloc %q failed to build task directories: %v", r.allocID, err) 795 r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to build task dirs for '%s'", alloc.TaskGroup)) 796 return 797 } 798 799 // Wait for a previous alloc - if any - to terminate 800 if err := r.prevAlloc.Wait(r.ctx); err != nil { 801 if err == context.Canceled { 802 return 803 } 804 r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("error while waiting for previous alloc to terminate: %v", err)) 805 return 806 } 807 808 // Wait for data to be migrated from a previous alloc if applicable 809 if err := r.prevAlloc.Migrate(r.ctx, r.allocDir); err != nil { 810 if err == context.Canceled { 811 return 812 } 813 814 // Soft-fail on migration errors 815 r.logger.Printf("[WARN] client: alloc %q error while migrating data from previous alloc: %v", r.allocID, err) 816 817 // Recreate alloc dir to ensure a clean slate 818 r.allocDir.Destroy() 819 if err := r.allocDir.Build(); err != nil { 820 r.logger.Printf("[ERR] client: alloc %q failed to clean task directories after failed migration: %v", r.allocID, err) 821 r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to rebuild task dirs for '%s'", alloc.TaskGroup)) 822 return 823 } 824 } 825 826 // Check if the allocation is in a terminal status. In this case, we don't 827 // start any of the task runners and directly wait for the destroy signal to 828 // clean up the allocation. 829 if alloc.TerminalStatus() { 830 r.logger.Printf("[DEBUG] client: alloc %q in terminal status, waiting for destroy", r.allocID) 831 // mark this allocation as completed if it is not already in a 832 // terminal state 833 if !alloc.Terminated() { 834 r.setStatus(structs.AllocClientStatusComplete, "canceled running tasks for allocation in terminal state") 835 } 836 r.handleDestroy() 837 r.logger.Printf("[DEBUG] client: terminating runner for alloc '%s'", r.allocID) 838 return 839 } 840 841 // Increment alloc runner start counter. Incr'd even when restoring existing tasks so 1 start != 1 task execution 842 if !r.config.DisableTaggedMetrics { 843 metrics.IncrCounterWithLabels([]string{"client", "allocs", "start"}, 844 1, r.baseLabels) 845 } 846 if r.config.BackwardsCompatibleMetrics { 847 metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, "start"}, 1) 848 } 849 850 // Start the watcher 851 wCtx, watcherCancel := context.WithCancel(r.ctx) 852 go r.watchHealth(wCtx) 853 854 // Start the task runners 855 r.logger.Printf("[DEBUG] client: starting task runners for alloc '%s'", r.allocID) 856 r.taskLock.Lock() 857 for _, task := range tg.Tasks { 858 if _, ok := r.restored[task.Name]; ok { 859 continue 860 } 861 862 r.allocDirLock.Lock() 863 taskdir := r.allocDir.NewTaskDir(task.Name) 864 r.allocDirLock.Unlock() 865 866 tr := taskrunner.NewTaskRunner(r.logger, r.config, r.stateDB, r.setTaskState, taskdir, r.Alloc(), task.Copy(), r.vaultClient, r.consulClient) 867 r.tasks[task.Name] = tr 868 tr.MarkReceived() 869 870 go tr.Run() 871 } 872 r.taskLock.Unlock() 873 874 // taskDestroyEvent contains an event that caused the destruction of a task 875 // in the allocation. 876 var taskDestroyEvent *structs.TaskEvent 877 878 OUTER: 879 // Wait for updates 880 for { 881 select { 882 case update := <-r.updateCh: 883 // Store the updated allocation. 884 r.allocLock.Lock() 885 886 // If the deployment ids have changed clear the health 887 if r.alloc.DeploymentID != update.DeploymentID { 888 r.allocHealth = nil 889 r.allocHealthTime = time.Time{} 890 } 891 892 r.alloc = update 893 r.allocLock.Unlock() 894 895 // Create a new watcher 896 watcherCancel() 897 wCtx, watcherCancel = context.WithCancel(r.ctx) 898 go r.watchHealth(wCtx) 899 900 // Check if we're in a terminal status 901 if update.TerminalStatus() { 902 taskDestroyEvent = structs.NewTaskEvent(structs.TaskKilled) 903 break OUTER 904 } 905 906 // Update the task groups 907 runners := r.getTaskRunners() 908 for _, tr := range runners { 909 tr.Update(update) 910 } 911 912 if err := r.syncStatus(); err != nil { 913 r.logger.Printf("[WARN] client: failed to sync alloc %q status upon receiving alloc update: %v", 914 r.allocID, err) 915 } 916 917 case <-r.ctx.Done(): 918 taskDestroyEvent = structs.NewTaskEvent(structs.TaskKilled) 919 break OUTER 920 } 921 } 922 923 // Kill the task runners 924 r.destroyTaskRunners(taskDestroyEvent) 925 926 // Block until we should destroy the state of the alloc 927 r.handleDestroy() 928 929 // Free up the context. It has likely exited already 930 watcherCancel() 931 932 r.logger.Printf("[DEBUG] client: terminating runner for alloc '%s'", r.allocID) 933 } 934 935 // destroyTaskRunners destroys the task runners, waits for them to terminate and 936 // then saves state. 937 func (r *AllocRunner) destroyTaskRunners(destroyEvent *structs.TaskEvent) { 938 // First destroy the leader if one exists 939 tg := r.alloc.Job.LookupTaskGroup(r.alloc.TaskGroup) 940 leader := "" 941 for _, task := range tg.Tasks { 942 if task.Leader { 943 leader = task.Name 944 break 945 } 946 } 947 if leader != "" { 948 r.taskLock.RLock() 949 tr := r.tasks[leader] 950 r.taskLock.RUnlock() 951 952 // Dead tasks don't have a task runner created so guard against 953 // the leader being dead when this AR was saved. 954 if tr == nil { 955 r.logger.Printf("[DEBUG] client: alloc %q leader task %q of task group %q already stopped", 956 r.allocID, leader, r.alloc.TaskGroup) 957 } else { 958 r.logger.Printf("[DEBUG] client: alloc %q destroying leader task %q of task group %q first", 959 r.allocID, leader, r.alloc.TaskGroup) 960 tr.Destroy(destroyEvent) 961 <-tr.WaitCh() 962 } 963 } 964 965 // Then destroy non-leader tasks concurrently 966 r.taskLock.RLock() 967 for name, tr := range r.tasks { 968 if name != leader { 969 tr.Destroy(destroyEvent) 970 } 971 } 972 r.taskLock.RUnlock() 973 974 // Wait for termination of the task runners 975 for _, tr := range r.getTaskRunners() { 976 <-tr.WaitCh() 977 } 978 } 979 980 // handleDestroy blocks till the AllocRunner should be destroyed and does the 981 // necessary cleanup. 982 func (r *AllocRunner) handleDestroy() { 983 // Final state sync. We do this to ensure that the server has the correct 984 // state as we wait for a destroy. 985 alloc := r.Alloc() 986 987 // Increment the destroy count for this alloc runner since this allocation is being removed from this client. 988 if !r.config.DisableTaggedMetrics { 989 metrics.IncrCounterWithLabels([]string{"client", "allocs", "destroy"}, 990 1, r.baseLabels) 991 } 992 if r.config.BackwardsCompatibleMetrics { 993 metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, "destroy"}, 1) 994 } 995 996 // Broadcast and persist state synchronously 997 r.sendBroadcast(alloc) 998 if err := r.saveAllocRunnerState(); err != nil { 999 r.logger.Printf("[WARN] client: alloc %q unable to persist state but should be GC'd soon anyway:%v", 1000 r.allocID, err) 1001 } 1002 1003 // Unmount any mounted directories as no tasks are running and makes 1004 // cleaning up Nomad's data directory simpler. 1005 if err := r.allocDir.UnmountAll(); err != nil { 1006 r.logger.Printf("[ERR] client: alloc %q unable unmount task directories: %v", r.allocID, err) 1007 } 1008 1009 // Update the server with the alloc's status -- also marks the alloc as 1010 // being eligible for GC, so from this point on the alloc can be gc'd 1011 // at any time. 1012 r.updater(alloc) 1013 1014 for { 1015 select { 1016 case <-r.ctx.Done(): 1017 if err := r.DestroyContext(); err != nil { 1018 r.logger.Printf("[ERR] client: failed to destroy context for alloc '%s': %v", 1019 r.allocID, err) 1020 } 1021 if err := r.DestroyState(); err != nil { 1022 r.logger.Printf("[ERR] client: failed to destroy state for alloc '%s': %v", 1023 r.allocID, err) 1024 } 1025 1026 return 1027 case <-r.updateCh: 1028 r.logger.Printf("[DEBUG] client: dropping update to terminal alloc '%s'", r.allocID) 1029 } 1030 } 1031 } 1032 1033 // IsWaiting returns true if this alloc is waiting on a previous allocation to 1034 // terminate. 1035 func (r *AllocRunner) IsWaiting() bool { 1036 return r.prevAlloc.IsWaiting() 1037 } 1038 1039 // IsMigrating returns true if this alloc is migrating data from a previous 1040 // allocation. 1041 func (r *AllocRunner) IsMigrating() bool { 1042 return r.prevAlloc.IsMigrating() 1043 } 1044 1045 // Update is used to update the allocation of the context 1046 func (r *AllocRunner) Update(update *structs.Allocation) { 1047 select { 1048 case r.updateCh <- update: 1049 default: 1050 r.logger.Printf("[ERR] client: dropping update to alloc '%s'", update.ID) 1051 } 1052 } 1053 1054 // StatsReporter returns an interface to query resource usage statistics of an 1055 // allocation 1056 func (r *AllocRunner) StatsReporter() AllocStatsReporter { 1057 return r 1058 } 1059 1060 // getTaskRunners is a helper that returns a copy of the task runners list using 1061 // the taskLock. 1062 func (r *AllocRunner) getTaskRunners() []*taskrunner.TaskRunner { 1063 // Get the task runners 1064 r.taskLock.RLock() 1065 defer r.taskLock.RUnlock() 1066 runners := make([]*taskrunner.TaskRunner, 0, len(r.tasks)) 1067 for _, tr := range r.tasks { 1068 runners = append(runners, tr) 1069 } 1070 return runners 1071 } 1072 1073 // LatestAllocStats returns the latest allocation stats. If the optional taskFilter is set 1074 // the allocation stats will only include the given task. 1075 func (r *AllocRunner) LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error) { 1076 astat := &cstructs.AllocResourceUsage{ 1077 Tasks: make(map[string]*cstructs.TaskResourceUsage), 1078 } 1079 1080 var flat []*cstructs.TaskResourceUsage 1081 if taskFilter != "" { 1082 r.taskLock.RLock() 1083 tr, ok := r.tasks[taskFilter] 1084 r.taskLock.RUnlock() 1085 if !ok { 1086 return nil, fmt.Errorf("allocation %q has no task %q", r.allocID, taskFilter) 1087 } 1088 l := tr.LatestResourceUsage() 1089 if l != nil { 1090 astat.Tasks[taskFilter] = l 1091 flat = []*cstructs.TaskResourceUsage{l} 1092 astat.Timestamp = l.Timestamp 1093 } 1094 } else { 1095 // Get the task runners 1096 runners := r.getTaskRunners() 1097 for _, tr := range runners { 1098 l := tr.LatestResourceUsage() 1099 if l != nil { 1100 astat.Tasks[tr.Name()] = l 1101 flat = append(flat, l) 1102 if l.Timestamp > astat.Timestamp { 1103 astat.Timestamp = l.Timestamp 1104 } 1105 } 1106 } 1107 } 1108 1109 astat.ResourceUsage = sumTaskResourceUsage(flat) 1110 return astat, nil 1111 } 1112 1113 // sumTaskResourceUsage takes a set of task resources and sums their resources 1114 func sumTaskResourceUsage(usages []*cstructs.TaskResourceUsage) *cstructs.ResourceUsage { 1115 summed := &cstructs.ResourceUsage{ 1116 MemoryStats: &cstructs.MemoryStats{}, 1117 CpuStats: &cstructs.CpuStats{}, 1118 } 1119 for _, usage := range usages { 1120 summed.Add(usage.ResourceUsage) 1121 } 1122 return summed 1123 } 1124 1125 // ShouldUpdate takes the AllocModifyIndex of an allocation sent from the server and 1126 // checks if the current running allocation is behind and should be updated. 1127 func (r *AllocRunner) ShouldUpdate(serverIndex uint64) bool { 1128 r.allocLock.Lock() 1129 defer r.allocLock.Unlock() 1130 return r.alloc.AllocModifyIndex < serverIndex 1131 } 1132 1133 // Destroy is used to indicate that the allocation context should be destroyed 1134 func (r *AllocRunner) Destroy() { 1135 // Lock when closing the context as that gives the save state code 1136 // serialization. 1137 r.allocStateLock.Lock() 1138 defer r.allocStateLock.Unlock() 1139 1140 r.exitFn() 1141 r.allocBroadcast.Close() 1142 } 1143 1144 // IsDestroyed returns true if the AllocRunner is not running and has been 1145 // destroyed (GC'd). 1146 func (r *AllocRunner) IsDestroyed() bool { 1147 select { 1148 case <-r.waitCh: 1149 return true 1150 default: 1151 return false 1152 } 1153 } 1154 1155 // WaitCh returns a channel to wait for termination 1156 func (r *AllocRunner) WaitCh() <-chan struct{} { 1157 return r.waitCh 1158 } 1159 1160 // AllocID returns the allocation ID of the allocation being run 1161 func (r *AllocRunner) AllocID() string { 1162 if r == nil { 1163 return "" 1164 } 1165 return r.allocID 1166 }