github.com/hspak/nomad@v0.7.2-0.20180309000617-bc4ae22a39a5/client/alloc_runner.go (about) 1 package client 2 3 import ( 4 "context" 5 "fmt" 6 "log" 7 "os" 8 "path/filepath" 9 "sync" 10 "time" 11 12 metrics "github.com/armon/go-metrics" 13 "github.com/boltdb/bolt" 14 "github.com/hashicorp/go-multierror" 15 "github.com/hashicorp/nomad/client/allocdir" 16 "github.com/hashicorp/nomad/client/config" 17 "github.com/hashicorp/nomad/client/vaultclient" 18 "github.com/hashicorp/nomad/helper" 19 "github.com/hashicorp/nomad/nomad/structs" 20 21 cstructs "github.com/hashicorp/nomad/client/structs" 22 ) 23 24 var ( 25 // The following are the key paths written to the state database 26 allocRunnerStateAllocKey = []byte("alloc") 27 allocRunnerStateImmutableKey = []byte("immutable") 28 allocRunnerStateMutableKey = []byte("mutable") 29 allocRunnerStateAllocDirKey = []byte("alloc-dir") 30 ) 31 32 // AllocStateUpdater is used to update the status of an allocation 33 type AllocStateUpdater func(alloc *structs.Allocation) 34 35 type AllocStatsReporter interface { 36 LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error) 37 } 38 39 // AllocRunner is used to wrap an allocation and provide the execution context. 40 type AllocRunner struct { 41 config *config.Config 42 updater AllocStateUpdater 43 logger *log.Logger 44 45 // allocID is the ID of this runner's allocation. Since it does not 46 // change for the lifetime of the AllocRunner it is safe to read 47 // without acquiring a lock (unlike alloc). 48 allocID string 49 50 alloc *structs.Allocation 51 allocClientStatus string // Explicit status of allocation. Set when there are failures 52 allocClientDescription string 53 allocHealth *bool // Whether the allocation is healthy 54 allocBroadcast *cstructs.AllocBroadcaster 55 allocLock sync.Mutex 56 57 dirtyCh chan struct{} 58 59 allocDir *allocdir.AllocDir 60 allocDirLock sync.Mutex 61 62 tasks map[string]*TaskRunner 63 taskStates map[string]*structs.TaskState 64 restored map[string]struct{} 65 taskLock sync.RWMutex 66 67 taskStatusLock sync.RWMutex 68 69 updateCh chan *structs.Allocation 70 71 vaultClient vaultclient.VaultClient 72 consulClient ConsulServiceAPI 73 74 // prevAlloc allows for Waiting until a previous allocation exits and 75 // the migrates it data. If sticky volumes aren't used and there's no 76 // previous allocation a noop implementation is used so it always safe 77 // to call. 78 prevAlloc prevAllocWatcher 79 80 // ctx is cancelled with exitFn to cause the alloc to be destroyed 81 // (stopped and GC'd). 82 ctx context.Context 83 exitFn context.CancelFunc 84 85 // waitCh is closed when the Run method exits. At that point the alloc 86 // has stopped and been GC'd. 87 waitCh chan struct{} 88 89 // State related fields 90 // stateDB is used to store the alloc runners state 91 stateDB *bolt.DB 92 allocStateLock sync.Mutex 93 94 // persistedEval is the last persisted evaluation ID. Since evaluation 95 // IDs change on every allocation update we only need to persist the 96 // allocation when its eval ID != the last persisted eval ID. 97 persistedEvalLock sync.Mutex 98 persistedEval string 99 100 // immutablePersisted and allocDirPersisted are used to track whether the 101 // immutable data and the alloc dir have been persisted. Once persisted we 102 // can lower write volume by not re-writing these values 103 immutablePersisted bool 104 allocDirPersisted bool 105 106 // baseLabels are used when emitting tagged metrics. All alloc runner metrics 107 // will have these tags, and optionally more. 108 baseLabels []metrics.Label 109 } 110 111 // COMPAT: Remove in 0.7.0 112 // allocRunnerState is used to snapshot the state of the alloc runner 113 type allocRunnerState struct { 114 Version string 115 Alloc *structs.Allocation 116 AllocDir *allocdir.AllocDir 117 AllocClientStatus string 118 AllocClientDescription string 119 120 // COMPAT: Remove in 0.7.0: removing will break upgrading directly from 121 // 0.5.2, so don't remove in the 0.6 series. 122 // Context is deprecated and only used to migrate from older releases. 123 // It will be removed in the future. 124 Context *struct { 125 AllocID string // unused; included for completeness 126 AllocDir struct { 127 AllocDir string 128 SharedDir string // unused; included for completeness 129 TaskDirs map[string]string 130 } 131 } `json:"Context,omitempty"` 132 } 133 134 // allocRunnerAllocState is state that only has to be written when the alloc 135 // changes. 136 type allocRunnerAllocState struct { 137 Alloc *structs.Allocation 138 } 139 140 // allocRunnerImmutableState is state that only has to be written once. 141 type allocRunnerImmutableState struct { 142 Version string 143 } 144 145 // allocRunnerMutableState is state that has to be written on each save as it 146 // changes over the life-cycle of the alloc_runner. 147 type allocRunnerMutableState struct { 148 AllocClientStatus string 149 AllocClientDescription string 150 TaskStates map[string]*structs.TaskState 151 DeploymentStatus *structs.AllocDeploymentStatus 152 } 153 154 // NewAllocRunner is used to create a new allocation context 155 func NewAllocRunner(logger *log.Logger, config *config.Config, stateDB *bolt.DB, updater AllocStateUpdater, 156 alloc *structs.Allocation, vaultClient vaultclient.VaultClient, consulClient ConsulServiceAPI, 157 prevAlloc prevAllocWatcher) *AllocRunner { 158 159 ar := &AllocRunner{ 160 config: config, 161 stateDB: stateDB, 162 updater: updater, 163 logger: logger, 164 alloc: alloc, 165 allocID: alloc.ID, 166 allocBroadcast: cstructs.NewAllocBroadcaster(8), 167 prevAlloc: prevAlloc, 168 dirtyCh: make(chan struct{}, 1), 169 allocDir: allocdir.NewAllocDir(logger, filepath.Join(config.AllocDir, alloc.ID)), 170 tasks: make(map[string]*TaskRunner), 171 taskStates: copyTaskStates(alloc.TaskStates), 172 restored: make(map[string]struct{}), 173 updateCh: make(chan *structs.Allocation, 64), 174 waitCh: make(chan struct{}), 175 vaultClient: vaultClient, 176 consulClient: consulClient, 177 } 178 179 // TODO Should be passed a context 180 ar.ctx, ar.exitFn = context.WithCancel(context.TODO()) 181 182 return ar 183 } 184 185 // setBaseLabels creates the set of base labels. This should be called after 186 // Restore has been called so the allocation is guaranteed to be loaded 187 func (r *AllocRunner) setBaseLabels() { 188 r.baseLabels = make([]metrics.Label, 0, 3) 189 190 if r.alloc.Job != nil { 191 r.baseLabels = append(r.baseLabels, metrics.Label{ 192 Name: "job", 193 Value: r.alloc.Job.Name, 194 }) 195 } 196 if r.alloc.TaskGroup != "" { 197 r.baseLabels = append(r.baseLabels, metrics.Label{ 198 Name: "task_group", 199 Value: r.alloc.TaskGroup, 200 }) 201 } 202 if r.config != nil && r.config.Node != nil { 203 r.baseLabels = append(r.baseLabels, metrics.Label{ 204 Name: "node_id", 205 Value: r.config.Node.ID, 206 }) 207 } 208 } 209 210 // pre060StateFilePath returns the path to our state file that would have been 211 // written pre v0.6.0 212 // COMPAT: Remove in 0.7.0 213 func (r *AllocRunner) pre060StateFilePath() string { 214 r.allocLock.Lock() 215 defer r.allocLock.Unlock() 216 path := filepath.Join(r.config.StateDir, "alloc", r.allocID, "state.json") 217 return path 218 } 219 220 // RestoreState is used to restore the state of the alloc runner 221 func (r *AllocRunner) RestoreState() error { 222 223 // COMPAT: Remove in 0.7.0 224 // Check if the old snapshot is there 225 oldPath := r.pre060StateFilePath() 226 var snap allocRunnerState 227 var upgrading bool 228 if err := pre060RestoreState(oldPath, &snap); err == nil { 229 // Restore fields 230 r.logger.Printf("[INFO] client: restoring pre v0.6.0 alloc runner state for alloc %q", r.allocID) 231 r.alloc = snap.Alloc 232 r.allocDir = snap.AllocDir 233 r.allocClientStatus = snap.AllocClientStatus 234 r.allocClientDescription = snap.AllocClientDescription 235 236 if r.alloc != nil { 237 r.taskStates = snap.Alloc.TaskStates 238 } 239 240 // COMPAT: Remove in 0.7.0 241 // #2132 Upgrade path: if snap.AllocDir is nil, try to convert old 242 // Context struct to new AllocDir struct 243 if snap.AllocDir == nil && snap.Context != nil { 244 r.logger.Printf("[DEBUG] client: migrating state snapshot for alloc %q", r.allocID) 245 r.allocDir = allocdir.NewAllocDir(r.logger, snap.Context.AllocDir.AllocDir) 246 for taskName := range snap.Context.AllocDir.TaskDirs { 247 r.allocDir.NewTaskDir(taskName) 248 } 249 } 250 251 // Delete the old state 252 os.RemoveAll(oldPath) 253 upgrading = true 254 } else if !os.IsNotExist(err) { 255 // Something corrupt in the old state file 256 return err 257 } else { 258 // We are doing a normal restore 259 err := r.stateDB.View(func(tx *bolt.Tx) error { 260 bkt, err := getAllocationBucket(tx, r.allocID) 261 if err != nil { 262 return fmt.Errorf("failed to get allocation bucket: %v", err) 263 } 264 265 // Get the state objects 266 var mutable allocRunnerMutableState 267 var immutable allocRunnerImmutableState 268 var allocState allocRunnerAllocState 269 var allocDir allocdir.AllocDir 270 271 if err := getObject(bkt, allocRunnerStateAllocKey, &allocState); err != nil { 272 return fmt.Errorf("failed to read alloc runner alloc state: %v", err) 273 } 274 if err := getObject(bkt, allocRunnerStateImmutableKey, &immutable); err != nil { 275 return fmt.Errorf("failed to read alloc runner immutable state: %v", err) 276 } 277 if err := getObject(bkt, allocRunnerStateMutableKey, &mutable); err != nil { 278 return fmt.Errorf("failed to read alloc runner mutable state: %v", err) 279 } 280 if err := getObject(bkt, allocRunnerStateAllocDirKey, &allocDir); err != nil { 281 return fmt.Errorf("failed to read alloc runner alloc_dir state: %v", err) 282 } 283 284 // Populate the fields 285 r.alloc = allocState.Alloc 286 r.allocDir = &allocDir 287 r.allocClientStatus = mutable.AllocClientStatus 288 r.allocClientDescription = mutable.AllocClientDescription 289 r.taskStates = mutable.TaskStates 290 r.alloc.ClientStatus = getClientStatus(r.taskStates) 291 r.alloc.DeploymentStatus = mutable.DeploymentStatus 292 return nil 293 }) 294 295 if err != nil { 296 return fmt.Errorf("failed to read allocation state: %v", err) 297 } 298 } 299 300 var snapshotErrors multierror.Error 301 if r.alloc == nil { 302 snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil allocation")) 303 } 304 if r.allocDir == nil { 305 snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil alloc dir")) 306 } 307 if e := snapshotErrors.ErrorOrNil(); e != nil { 308 return e 309 } 310 311 tg := r.alloc.Job.LookupTaskGroup(r.alloc.TaskGroup) 312 if tg == nil { 313 return fmt.Errorf("restored allocation doesn't contain task group %q", r.alloc.TaskGroup) 314 } 315 316 // Restore the task runners 317 taskDestroyEvent := structs.NewTaskEvent(structs.TaskKilled) 318 var mErr multierror.Error 319 for _, task := range tg.Tasks { 320 name := task.Name 321 state := r.taskStates[name] 322 323 // Nomad exited before task could start, nothing to restore. 324 // AllocRunner.Run will start a new TaskRunner for this task 325 if state == nil { 326 continue 327 } 328 329 // Mark the task as restored. 330 r.restored[name] = struct{}{} 331 332 td, ok := r.allocDir.TaskDirs[name] 333 if !ok { 334 // Create the task dir metadata if it doesn't exist. 335 // Since task dirs are created during r.Run() the 336 // client may save state and exit before all task dirs 337 // are created 338 td = r.allocDir.NewTaskDir(name) 339 } 340 341 // Skip tasks in terminal states. 342 if state.State == structs.TaskStateDead { 343 continue 344 } 345 346 tr := NewTaskRunner(r.logger, r.config, r.stateDB, r.setTaskState, td, r.Alloc(), task, r.vaultClient, r.consulClient) 347 r.tasks[name] = tr 348 349 if restartReason, err := tr.RestoreState(); err != nil { 350 r.logger.Printf("[ERR] client: failed to restore state for alloc %s task %q: %v", r.allocID, name, err) 351 mErr.Errors = append(mErr.Errors, err) 352 } else if !r.alloc.TerminalStatus() { 353 // Only start if the alloc isn't in a terminal status. 354 go tr.Run() 355 356 if upgrading { 357 if err := tr.SaveState(); err != nil { 358 r.logger.Printf("[WARN] client: initial save state for alloc %s task %s failed: %v", r.allocID, name, err) 359 } 360 } 361 362 // Restart task runner if RestoreState gave a reason 363 if restartReason != "" { 364 r.logger.Printf("[INFO] client: restarting alloc %s task %s: %v", r.allocID, name, restartReason) 365 const failure = false 366 tr.Restart("upgrade", restartReason, failure) 367 } 368 } else { 369 tr.Destroy(taskDestroyEvent) 370 } 371 } 372 373 return mErr.ErrorOrNil() 374 } 375 376 // SaveState is used to snapshot the state of the alloc runner 377 // if the fullSync is marked as false only the state of the Alloc Runner 378 // is snapshotted. If fullSync is marked as true, we snapshot 379 // all the Task Runners associated with the Alloc 380 func (r *AllocRunner) SaveState() error { 381 if err := r.saveAllocRunnerState(); err != nil { 382 return err 383 } 384 385 // Save state for each task 386 runners := r.getTaskRunners() 387 var mErr multierror.Error 388 for _, tr := range runners { 389 if err := tr.SaveState(); err != nil { 390 mErr.Errors = append(mErr.Errors, fmt.Errorf("failed to save state for alloc %s task %q: %v", 391 r.allocID, tr.task.Name, err)) 392 } 393 } 394 return mErr.ErrorOrNil() 395 } 396 397 func (r *AllocRunner) saveAllocRunnerState() error { 398 r.allocStateLock.Lock() 399 defer r.allocStateLock.Unlock() 400 401 if r.ctx.Err() == context.Canceled { 402 return nil 403 } 404 405 // Grab all the relevant data 406 alloc := r.Alloc() 407 408 r.allocLock.Lock() 409 allocClientStatus := r.allocClientStatus 410 allocClientDescription := r.allocClientDescription 411 r.allocLock.Unlock() 412 413 r.allocDirLock.Lock() 414 allocDir := r.allocDir.Copy() 415 r.allocDirLock.Unlock() 416 417 // Start the transaction. 418 return r.stateDB.Batch(func(tx *bolt.Tx) error { 419 420 // Grab the allocation bucket 421 allocBkt, err := getAllocationBucket(tx, r.allocID) 422 if err != nil { 423 return fmt.Errorf("failed to retrieve allocation bucket: %v", err) 424 } 425 426 // Write the allocation if the eval has changed 427 r.persistedEvalLock.Lock() 428 lastPersisted := r.persistedEval 429 r.persistedEvalLock.Unlock() 430 if alloc.EvalID != lastPersisted { 431 allocState := &allocRunnerAllocState{ 432 Alloc: alloc, 433 } 434 435 if err := putObject(allocBkt, allocRunnerStateAllocKey, &allocState); err != nil { 436 return fmt.Errorf("failed to write alloc_runner alloc state: %v", err) 437 } 438 439 tx.OnCommit(func() { 440 r.persistedEvalLock.Lock() 441 r.persistedEval = alloc.EvalID 442 r.persistedEvalLock.Unlock() 443 }) 444 } 445 446 // Write immutable data iff it hasn't been written yet 447 if !r.immutablePersisted { 448 immutable := &allocRunnerImmutableState{ 449 Version: r.config.Version.VersionNumber(), 450 } 451 452 if err := putObject(allocBkt, allocRunnerStateImmutableKey, &immutable); err != nil { 453 return fmt.Errorf("failed to write alloc_runner immutable state: %v", err) 454 } 455 456 tx.OnCommit(func() { 457 r.immutablePersisted = true 458 }) 459 } 460 461 // Write the alloc dir data if it hasn't been written before and it exists. 462 if !r.allocDirPersisted && allocDir != nil { 463 if err := putObject(allocBkt, allocRunnerStateAllocDirKey, allocDir); err != nil { 464 return fmt.Errorf("failed to write alloc_runner allocDir state: %v", err) 465 } 466 467 tx.OnCommit(func() { 468 r.allocDirPersisted = true 469 }) 470 } 471 472 // Write the mutable state every time 473 mutable := &allocRunnerMutableState{ 474 AllocClientStatus: allocClientStatus, 475 AllocClientDescription: allocClientDescription, 476 TaskStates: alloc.TaskStates, 477 DeploymentStatus: alloc.DeploymentStatus, 478 } 479 480 if err := putObject(allocBkt, allocRunnerStateMutableKey, &mutable); err != nil { 481 return fmt.Errorf("failed to write alloc_runner mutable state: %v", err) 482 } 483 484 return nil 485 }) 486 } 487 488 // DestroyState is used to cleanup after ourselves 489 func (r *AllocRunner) DestroyState() error { 490 r.allocStateLock.Lock() 491 defer r.allocStateLock.Unlock() 492 493 return r.stateDB.Update(func(tx *bolt.Tx) error { 494 if err := deleteAllocationBucket(tx, r.allocID); err != nil { 495 return fmt.Errorf("failed to delete allocation bucket: %v", err) 496 } 497 return nil 498 }) 499 } 500 501 // DestroyContext is used to destroy the context 502 func (r *AllocRunner) DestroyContext() error { 503 return r.allocDir.Destroy() 504 } 505 506 // GetAllocDir returns the alloc dir for the alloc runner 507 func (r *AllocRunner) GetAllocDir() *allocdir.AllocDir { 508 return r.allocDir 509 } 510 511 // GetListener returns a listener for updates broadcast by this alloc runner. 512 // Callers are responsible for calling Close on their Listener. 513 func (r *AllocRunner) GetListener() *cstructs.AllocListener { 514 return r.allocBroadcast.Listen() 515 } 516 517 // copyTaskStates returns a copy of the passed task states. 518 func copyTaskStates(states map[string]*structs.TaskState) map[string]*structs.TaskState { 519 copy := make(map[string]*structs.TaskState, len(states)) 520 for task, state := range states { 521 copy[task] = state.Copy() 522 } 523 return copy 524 } 525 526 // Alloc returns the associated allocation 527 func (r *AllocRunner) Alloc() *structs.Allocation { 528 r.allocLock.Lock() 529 530 // Don't do a deep copy of the job 531 alloc := r.alloc.CopySkipJob() 532 533 // The status has explicitly been set. 534 if r.allocClientStatus != "" || r.allocClientDescription != "" { 535 alloc.ClientStatus = r.allocClientStatus 536 alloc.ClientDescription = r.allocClientDescription 537 538 // Copy over the task states so we don't lose them 539 r.taskStatusLock.RLock() 540 alloc.TaskStates = copyTaskStates(r.taskStates) 541 r.taskStatusLock.RUnlock() 542 543 r.allocLock.Unlock() 544 return alloc 545 } 546 547 // The health has been set 548 if r.allocHealth != nil { 549 if alloc.DeploymentStatus == nil { 550 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{} 551 } 552 alloc.DeploymentStatus.Healthy = helper.BoolToPtr(*r.allocHealth) 553 } 554 r.allocLock.Unlock() 555 556 // Scan the task states to determine the status of the alloc 557 r.taskStatusLock.RLock() 558 alloc.TaskStates = copyTaskStates(r.taskStates) 559 alloc.ClientStatus = getClientStatus(r.taskStates) 560 r.taskStatusLock.RUnlock() 561 562 // If the client status is failed and we are part of a deployment, mark the 563 // alloc as unhealthy. This guards against the watcher not be started. 564 r.allocLock.Lock() 565 if alloc.ClientStatus == structs.AllocClientStatusFailed && 566 alloc.DeploymentID != "" && !alloc.DeploymentStatus.IsUnhealthy() { 567 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 568 Healthy: helper.BoolToPtr(false), 569 } 570 } 571 r.allocLock.Unlock() 572 573 return alloc 574 } 575 576 // getClientStatus takes in the task states for a given allocation and computes 577 // the client status 578 func getClientStatus(taskStates map[string]*structs.TaskState) string { 579 var pending, running, dead, failed bool 580 for _, state := range taskStates { 581 switch state.State { 582 case structs.TaskStateRunning: 583 running = true 584 case structs.TaskStatePending: 585 pending = true 586 case structs.TaskStateDead: 587 if state.Failed { 588 failed = true 589 } else { 590 dead = true 591 } 592 } 593 } 594 595 // Determine the alloc status 596 if failed { 597 return structs.AllocClientStatusFailed 598 } else if running { 599 return structs.AllocClientStatusRunning 600 } else if pending { 601 return structs.AllocClientStatusPending 602 } else if dead { 603 return structs.AllocClientStatusComplete 604 } 605 606 return "" 607 } 608 609 // dirtySyncState is used to watch for state being marked dirty to sync 610 func (r *AllocRunner) dirtySyncState() { 611 for { 612 select { 613 case <-r.dirtyCh: 614 if err := r.syncStatus(); err != nil { 615 // Only WARN instead of ERR because we continue on 616 r.logger.Printf("[WARN] client: error persisting alloc %q state: %v", 617 r.allocID, err) 618 } 619 case <-r.ctx.Done(): 620 return 621 } 622 } 623 } 624 625 // syncStatus is used to run and sync the status when it changes 626 func (r *AllocRunner) syncStatus() error { 627 // Get a copy of our alloc, update status server side and sync to disk 628 alloc := r.Alloc() 629 r.updater(alloc) 630 r.sendBroadcast(alloc) 631 return r.saveAllocRunnerState() 632 } 633 634 // sendBroadcast broadcasts an alloc update. 635 func (r *AllocRunner) sendBroadcast(alloc *structs.Allocation) { 636 // Try to send the alloc up to three times with a delay to allow recovery. 637 sent := false 638 for i := 0; i < 3; i++ { 639 if sent = r.allocBroadcast.Send(alloc); sent { 640 break 641 } 642 time.Sleep(500 * time.Millisecond) 643 } 644 if !sent { 645 r.logger.Printf("[WARN] client: failed to broadcast update to allocation %q", r.allocID) 646 } 647 } 648 649 // setStatus is used to update the allocation status 650 func (r *AllocRunner) setStatus(status, desc string) { 651 r.allocLock.Lock() 652 r.allocClientStatus = status 653 r.allocClientDescription = desc 654 r.allocLock.Unlock() 655 select { 656 case r.dirtyCh <- struct{}{}: 657 default: 658 } 659 } 660 661 // setTaskState is used to set the status of a task. If lazySync is set then the 662 // event is appended but not synced with the server. If state is omitted, the 663 // last known state is used. 664 func (r *AllocRunner) setTaskState(taskName, state string, event *structs.TaskEvent, lazySync bool) { 665 r.taskStatusLock.Lock() 666 defer r.taskStatusLock.Unlock() 667 taskState, ok := r.taskStates[taskName] 668 if !ok { 669 taskState = &structs.TaskState{} 670 r.taskStates[taskName] = taskState 671 } 672 673 // Set the tasks state. 674 if event != nil { 675 if event.FailsTask { 676 taskState.Failed = true 677 } 678 if event.Type == structs.TaskRestarting { 679 if !r.config.DisableTaggedMetrics { 680 metrics.IncrCounterWithLabels([]string{"client", "allocs", "restart"}, 681 1, r.baseLabels) 682 } 683 if r.config.BackwardsCompatibleMetrics { 684 metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "restart"}, 1) 685 } 686 taskState.Restarts++ 687 taskState.LastRestart = time.Unix(0, event.Time) 688 } 689 r.appendTaskEvent(taskState, event) 690 } 691 692 if lazySync { 693 return 694 } 695 696 // If the state hasn't been set use the existing state. 697 if state == "" { 698 state = taskState.State 699 if taskState.State == "" { 700 state = structs.TaskStatePending 701 } 702 } 703 704 switch state { 705 case structs.TaskStateRunning: 706 // Capture the start time if it is just starting 707 if taskState.State != structs.TaskStateRunning { 708 taskState.StartedAt = time.Now().UTC() 709 if !r.config.DisableTaggedMetrics { 710 metrics.IncrCounterWithLabels([]string{"client", "allocs", "running"}, 711 1, r.baseLabels) 712 } 713 if r.config.BackwardsCompatibleMetrics { 714 metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "running"}, 1) 715 } 716 } 717 case structs.TaskStateDead: 718 // Capture the finished time. If it has never started there is no finish 719 // time 720 if !taskState.StartedAt.IsZero() { 721 taskState.FinishedAt = time.Now().UTC() 722 } 723 724 // Find all tasks that are not the one that is dead and check if the one 725 // that is dead is a leader 726 var otherTaskRunners []*TaskRunner 727 var otherTaskNames []string 728 leader := false 729 for task, tr := range r.tasks { 730 if task != taskName { 731 otherTaskRunners = append(otherTaskRunners, tr) 732 otherTaskNames = append(otherTaskNames, task) 733 } else if tr.task.Leader { 734 leader = true 735 } 736 } 737 738 // Emitting metrics to indicate task complete and failures 739 if taskState.Failed { 740 if !r.config.DisableTaggedMetrics { 741 metrics.IncrCounterWithLabels([]string{"client", "allocs", "failed"}, 742 1, r.baseLabels) 743 } 744 if r.config.BackwardsCompatibleMetrics { 745 metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "failed"}, 1) 746 } 747 } else { 748 if !r.config.DisableTaggedMetrics { 749 metrics.IncrCounterWithLabels([]string{"client", "allocs", "complete"}, 750 1, r.baseLabels) 751 } 752 if r.config.BackwardsCompatibleMetrics { 753 metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "complete"}, 1) 754 } 755 } 756 // If the task failed, we should kill all the other tasks in the task group. 757 if taskState.Failed { 758 for _, tr := range otherTaskRunners { 759 tr.Destroy(structs.NewTaskEvent(structs.TaskSiblingFailed).SetFailedSibling(taskName)) 760 } 761 if len(otherTaskRunners) > 0 { 762 r.logger.Printf("[DEBUG] client: task %q failed, destroying other tasks in task group: %v", taskName, otherTaskNames) 763 } 764 } else if leader { 765 // If the task was a leader task we should kill all the other tasks. 766 for _, tr := range otherTaskRunners { 767 tr.Destroy(structs.NewTaskEvent(structs.TaskLeaderDead)) 768 } 769 if len(otherTaskRunners) > 0 { 770 r.logger.Printf("[DEBUG] client: leader task %q is dead, destroying other tasks in task group: %v", taskName, otherTaskNames) 771 } 772 } 773 } 774 775 // Store the new state 776 taskState.State = state 777 778 select { 779 case r.dirtyCh <- struct{}{}: 780 default: 781 } 782 } 783 784 // appendTaskEvent updates the task status by appending the new event. 785 func (r *AllocRunner) appendTaskEvent(state *structs.TaskState, event *structs.TaskEvent) { 786 capacity := 10 787 if state.Events == nil { 788 state.Events = make([]*structs.TaskEvent, 0, capacity) 789 } 790 791 // If we hit capacity, then shift it. 792 if len(state.Events) == capacity { 793 old := state.Events 794 state.Events = make([]*structs.TaskEvent, 0, capacity) 795 state.Events = append(state.Events, old[1:]...) 796 } 797 798 state.Events = append(state.Events, event) 799 } 800 801 // Run is a long running goroutine used to manage an allocation 802 func (r *AllocRunner) Run() { 803 defer close(r.waitCh) 804 r.setBaseLabels() 805 go r.dirtySyncState() 806 807 // Find the task group to run in the allocation 808 alloc := r.Alloc() 809 tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) 810 if tg == nil { 811 r.logger.Printf("[ERR] client: alloc %q for missing task group %q", r.allocID, alloc.TaskGroup) 812 r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("missing task group '%s'", alloc.TaskGroup)) 813 return 814 } 815 816 // Build allocation directory (idempotent) 817 r.allocDirLock.Lock() 818 err := r.allocDir.Build() 819 r.allocDirLock.Unlock() 820 821 if err != nil { 822 r.logger.Printf("[ERR] client: alloc %q failed to build task directories: %v", r.allocID, err) 823 r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to build task dirs for '%s'", alloc.TaskGroup)) 824 return 825 } 826 827 // Wait for a previous alloc - if any - to terminate 828 if err := r.prevAlloc.Wait(r.ctx); err != nil { 829 if err == context.Canceled { 830 return 831 } 832 r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("error while waiting for previous alloc to terminate: %v", err)) 833 return 834 } 835 836 // Wait for data to be migrated from a previous alloc if applicable 837 if err := r.prevAlloc.Migrate(r.ctx, r.allocDir); err != nil { 838 if err == context.Canceled { 839 return 840 } 841 842 // Soft-fail on migration errors 843 r.logger.Printf("[WARN] client: alloc %q error while migrating data from previous alloc: %v", r.allocID, err) 844 845 // Recreate alloc dir to ensure a clean slate 846 r.allocDir.Destroy() 847 if err := r.allocDir.Build(); err != nil { 848 r.logger.Printf("[ERR] client: alloc %q failed to clean task directories after failed migration: %v", r.allocID, err) 849 r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to rebuild task dirs for '%s'", alloc.TaskGroup)) 850 return 851 } 852 } 853 854 // Check if the allocation is in a terminal status. In this case, we don't 855 // start any of the task runners and directly wait for the destroy signal to 856 // clean up the allocation. 857 if alloc.TerminalStatus() { 858 r.logger.Printf("[DEBUG] client: alloc %q in terminal status, waiting for destroy", r.allocID) 859 // mark this allocation as completed if it is not already in a 860 // terminal state 861 if !alloc.Terminated() { 862 r.setStatus(structs.AllocClientStatusComplete, "canceled running tasks for allocation in terminal state") 863 } 864 r.handleDestroy() 865 r.logger.Printf("[DEBUG] client: terminating runner for alloc '%s'", r.allocID) 866 return 867 } 868 869 // Increment alloc runner start counter. Incr'd even when restoring existing tasks so 1 start != 1 task execution 870 if !r.config.DisableTaggedMetrics { 871 metrics.IncrCounterWithLabels([]string{"client", "allocs", "start"}, 872 1, r.baseLabels) 873 } 874 if r.config.BackwardsCompatibleMetrics { 875 metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, "start"}, 1) 876 } 877 878 // Start the watcher 879 wCtx, watcherCancel := context.WithCancel(r.ctx) 880 go r.watchHealth(wCtx) 881 882 // Start the task runners 883 r.logger.Printf("[DEBUG] client: starting task runners for alloc '%s'", r.allocID) 884 r.taskLock.Lock() 885 for _, task := range tg.Tasks { 886 if _, ok := r.restored[task.Name]; ok { 887 continue 888 } 889 890 r.allocDirLock.Lock() 891 taskdir := r.allocDir.NewTaskDir(task.Name) 892 r.allocDirLock.Unlock() 893 894 tr := NewTaskRunner(r.logger, r.config, r.stateDB, r.setTaskState, taskdir, r.Alloc(), task.Copy(), r.vaultClient, r.consulClient) 895 r.tasks[task.Name] = tr 896 tr.MarkReceived() 897 898 go tr.Run() 899 } 900 r.taskLock.Unlock() 901 902 // taskDestroyEvent contains an event that caused the destroyment of a task 903 // in the allocation. 904 var taskDestroyEvent *structs.TaskEvent 905 906 OUTER: 907 // Wait for updates 908 for { 909 select { 910 case update := <-r.updateCh: 911 // Store the updated allocation. 912 r.allocLock.Lock() 913 914 // If the deployment ids have changed clear the health 915 if r.alloc.DeploymentID != update.DeploymentID { 916 r.allocHealth = nil 917 } 918 919 r.alloc = update 920 r.allocLock.Unlock() 921 922 // Create a new watcher 923 watcherCancel() 924 wCtx, watcherCancel = context.WithCancel(r.ctx) 925 go r.watchHealth(wCtx) 926 927 // Check if we're in a terminal status 928 if update.TerminalStatus() { 929 taskDestroyEvent = structs.NewTaskEvent(structs.TaskKilled) 930 break OUTER 931 } 932 933 // Update the task groups 934 runners := r.getTaskRunners() 935 for _, tr := range runners { 936 tr.Update(update) 937 } 938 939 if err := r.syncStatus(); err != nil { 940 r.logger.Printf("[WARN] client: failed to sync alloc %q status upon receiving alloc update: %v", 941 r.allocID, err) 942 } 943 944 case <-r.ctx.Done(): 945 taskDestroyEvent = structs.NewTaskEvent(structs.TaskKilled) 946 break OUTER 947 } 948 } 949 950 // Kill the task runners 951 r.destroyTaskRunners(taskDestroyEvent) 952 953 // Block until we should destroy the state of the alloc 954 r.handleDestroy() 955 956 // Free up the context. It has likely exited already 957 watcherCancel() 958 959 r.logger.Printf("[DEBUG] client: terminating runner for alloc '%s'", r.allocID) 960 } 961 962 // destroyTaskRunners destroys the task runners, waits for them to terminate and 963 // then saves state. 964 func (r *AllocRunner) destroyTaskRunners(destroyEvent *structs.TaskEvent) { 965 // First destroy the leader if one exists 966 tg := r.alloc.Job.LookupTaskGroup(r.alloc.TaskGroup) 967 leader := "" 968 for _, task := range tg.Tasks { 969 if task.Leader { 970 leader = task.Name 971 break 972 } 973 } 974 if leader != "" { 975 r.taskLock.RLock() 976 tr := r.tasks[leader] 977 r.taskLock.RUnlock() 978 979 // Dead tasks don't have a task runner created so guard against 980 // the leader being dead when this AR was saved. 981 if tr == nil { 982 r.logger.Printf("[DEBUG] client: alloc %q leader task %q of task group %q already stopped", 983 r.allocID, leader, r.alloc.TaskGroup) 984 } else { 985 r.logger.Printf("[DEBUG] client: alloc %q destroying leader task %q of task group %q first", 986 r.allocID, leader, r.alloc.TaskGroup) 987 tr.Destroy(destroyEvent) 988 <-tr.WaitCh() 989 } 990 } 991 992 // Then destroy non-leader tasks concurrently 993 r.taskLock.RLock() 994 for name, tr := range r.tasks { 995 if name != leader { 996 tr.Destroy(destroyEvent) 997 } 998 } 999 r.taskLock.RUnlock() 1000 1001 // Wait for termination of the task runners 1002 for _, tr := range r.getTaskRunners() { 1003 <-tr.WaitCh() 1004 } 1005 } 1006 1007 // handleDestroy blocks till the AllocRunner should be destroyed and does the 1008 // necessary cleanup. 1009 func (r *AllocRunner) handleDestroy() { 1010 // Final state sync. We do this to ensure that the server has the correct 1011 // state as we wait for a destroy. 1012 alloc := r.Alloc() 1013 1014 // Increment the destroy count for this alloc runner since this allocation is being removed from this client. 1015 if !r.config.DisableTaggedMetrics { 1016 metrics.IncrCounterWithLabels([]string{"client", "allocs", "destroy"}, 1017 1, r.baseLabels) 1018 } 1019 if r.config.BackwardsCompatibleMetrics { 1020 metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, "destroy"}, 1) 1021 } 1022 1023 // Broadcast and persist state synchronously 1024 r.sendBroadcast(alloc) 1025 if err := r.saveAllocRunnerState(); err != nil { 1026 r.logger.Printf("[WARN] client: alloc %q unable to persist state but should be GC'd soon anyway:%v", 1027 r.allocID, err) 1028 } 1029 1030 // Unmount any mounted directories as no tasks are running and makes 1031 // cleaning up Nomad's data directory simpler. 1032 if err := r.allocDir.UnmountAll(); err != nil { 1033 r.logger.Printf("[ERR] client: alloc %q unable unmount task directories: %v", r.allocID, err) 1034 } 1035 1036 // Update the server with the alloc's status -- also marks the alloc as 1037 // being eligible for GC, so from this point on the alloc can be gc'd 1038 // at any time. 1039 r.updater(alloc) 1040 1041 for { 1042 select { 1043 case <-r.ctx.Done(): 1044 if err := r.DestroyContext(); err != nil { 1045 r.logger.Printf("[ERR] client: failed to destroy context for alloc '%s': %v", 1046 r.allocID, err) 1047 } 1048 if err := r.DestroyState(); err != nil { 1049 r.logger.Printf("[ERR] client: failed to destroy state for alloc '%s': %v", 1050 r.allocID, err) 1051 } 1052 1053 return 1054 case <-r.updateCh: 1055 r.logger.Printf("[DEBUG] client: dropping update to terminal alloc '%s'", r.allocID) 1056 } 1057 } 1058 } 1059 1060 // IsWaiting returns true if this alloc is waiting on a previous allocation to 1061 // terminate. 1062 func (r *AllocRunner) IsWaiting() bool { 1063 return r.prevAlloc.IsWaiting() 1064 } 1065 1066 // IsMigrating returns true if this alloc is migrating data from a previous 1067 // allocation. 1068 func (r *AllocRunner) IsMigrating() bool { 1069 return r.prevAlloc.IsMigrating() 1070 } 1071 1072 // Update is used to update the allocation of the context 1073 func (r *AllocRunner) Update(update *structs.Allocation) { 1074 select { 1075 case r.updateCh <- update: 1076 default: 1077 r.logger.Printf("[ERR] client: dropping update to alloc '%s'", update.ID) 1078 } 1079 } 1080 1081 // StatsReporter returns an interface to query resource usage statistics of an 1082 // allocation 1083 func (r *AllocRunner) StatsReporter() AllocStatsReporter { 1084 return r 1085 } 1086 1087 // getTaskRunners is a helper that returns a copy of the task runners list using 1088 // the taskLock. 1089 func (r *AllocRunner) getTaskRunners() []*TaskRunner { 1090 // Get the task runners 1091 r.taskLock.RLock() 1092 defer r.taskLock.RUnlock() 1093 runners := make([]*TaskRunner, 0, len(r.tasks)) 1094 for _, tr := range r.tasks { 1095 runners = append(runners, tr) 1096 } 1097 return runners 1098 } 1099 1100 // LatestAllocStats returns the latest allocation stats. If the optional taskFilter is set 1101 // the allocation stats will only include the given task. 1102 func (r *AllocRunner) LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error) { 1103 astat := &cstructs.AllocResourceUsage{ 1104 Tasks: make(map[string]*cstructs.TaskResourceUsage), 1105 } 1106 1107 var flat []*cstructs.TaskResourceUsage 1108 if taskFilter != "" { 1109 r.taskLock.RLock() 1110 tr, ok := r.tasks[taskFilter] 1111 r.taskLock.RUnlock() 1112 if !ok { 1113 return nil, fmt.Errorf("allocation %q has no task %q", r.allocID, taskFilter) 1114 } 1115 l := tr.LatestResourceUsage() 1116 if l != nil { 1117 astat.Tasks[taskFilter] = l 1118 flat = []*cstructs.TaskResourceUsage{l} 1119 astat.Timestamp = l.Timestamp 1120 } 1121 } else { 1122 // Get the task runners 1123 runners := r.getTaskRunners() 1124 for _, tr := range runners { 1125 l := tr.LatestResourceUsage() 1126 if l != nil { 1127 astat.Tasks[tr.task.Name] = l 1128 flat = append(flat, l) 1129 if l.Timestamp > astat.Timestamp { 1130 astat.Timestamp = l.Timestamp 1131 } 1132 } 1133 } 1134 } 1135 1136 astat.ResourceUsage = sumTaskResourceUsage(flat) 1137 return astat, nil 1138 } 1139 1140 // sumTaskResourceUsage takes a set of task resources and sums their resources 1141 func sumTaskResourceUsage(usages []*cstructs.TaskResourceUsage) *cstructs.ResourceUsage { 1142 summed := &cstructs.ResourceUsage{ 1143 MemoryStats: &cstructs.MemoryStats{}, 1144 CpuStats: &cstructs.CpuStats{}, 1145 } 1146 for _, usage := range usages { 1147 summed.Add(usage.ResourceUsage) 1148 } 1149 return summed 1150 } 1151 1152 // shouldUpdate takes the AllocModifyIndex of an allocation sent from the server and 1153 // checks if the current running allocation is behind and should be updated. 1154 func (r *AllocRunner) shouldUpdate(serverIndex uint64) bool { 1155 r.allocLock.Lock() 1156 defer r.allocLock.Unlock() 1157 return r.alloc.AllocModifyIndex < serverIndex 1158 } 1159 1160 // Destroy is used to indicate that the allocation context should be destroyed 1161 func (r *AllocRunner) Destroy() { 1162 // Lock when closing the context as that gives the save state code 1163 // serialization. 1164 r.allocStateLock.Lock() 1165 defer r.allocStateLock.Unlock() 1166 1167 r.exitFn() 1168 r.allocBroadcast.Close() 1169 } 1170 1171 // IsDestroyed returns true if the AllocRunner is not running and has been 1172 // destroyed (GC'd). 1173 func (r *AllocRunner) IsDestroyed() bool { 1174 select { 1175 case <-r.waitCh: 1176 return true 1177 default: 1178 return false 1179 } 1180 } 1181 1182 // WaitCh returns a channel to wait for termination 1183 func (r *AllocRunner) WaitCh() <-chan struct{} { 1184 return r.waitCh 1185 }