github.com/djenriquez/nomad-1@v0.8.1/client/alloc_runner.go (about) 1 package client 2 3 import ( 4 "context" 5 "fmt" 6 "log" 7 "os" 8 "path/filepath" 9 "sync" 10 "time" 11 12 metrics "github.com/armon/go-metrics" 13 "github.com/boltdb/bolt" 14 "github.com/hashicorp/go-multierror" 15 "github.com/hashicorp/nomad/client/allocdir" 16 "github.com/hashicorp/nomad/client/config" 17 "github.com/hashicorp/nomad/client/vaultclient" 18 "github.com/hashicorp/nomad/helper" 19 "github.com/hashicorp/nomad/nomad/structs" 20 21 cstructs "github.com/hashicorp/nomad/client/structs" 22 ) 23 24 var ( 25 // The following are the key paths written to the state database 26 allocRunnerStateAllocKey = []byte("alloc") 27 allocRunnerStateImmutableKey = []byte("immutable") 28 allocRunnerStateMutableKey = []byte("mutable") 29 allocRunnerStateAllocDirKey = []byte("alloc-dir") 30 ) 31 32 // AllocStateUpdater is used to update the status of an allocation 33 type AllocStateUpdater func(alloc *structs.Allocation) 34 35 type AllocStatsReporter interface { 36 LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error) 37 } 38 39 // AllocRunner is used to wrap an allocation and provide the execution context. 40 type AllocRunner struct { 41 config *config.Config 42 updater AllocStateUpdater 43 logger *log.Logger 44 45 // allocID is the ID of this runner's allocation. Since it does not 46 // change for the lifetime of the AllocRunner it is safe to read 47 // without acquiring a lock (unlike alloc). 48 allocID string 49 50 alloc *structs.Allocation 51 allocClientStatus string // Explicit status of allocation. Set when there are failures 52 allocClientDescription string 53 allocHealth *bool // Whether the allocation is healthy 54 allocBroadcast *cstructs.AllocBroadcaster 55 allocLock sync.Mutex 56 57 dirtyCh chan struct{} 58 59 allocDir *allocdir.AllocDir 60 allocDirLock sync.Mutex 61 62 tasks map[string]*TaskRunner 63 taskStates map[string]*structs.TaskState 64 restored map[string]struct{} 65 taskLock sync.RWMutex 66 67 taskStatusLock sync.RWMutex 68 69 updateCh chan *structs.Allocation 70 71 vaultClient vaultclient.VaultClient 72 consulClient ConsulServiceAPI 73 74 // prevAlloc allows for Waiting until a previous allocation exits and 75 // the migrates it data. If sticky volumes aren't used and there's no 76 // previous allocation a noop implementation is used so it always safe 77 // to call. 78 prevAlloc prevAllocWatcher 79 80 // ctx is cancelled with exitFn to cause the alloc to be destroyed 81 // (stopped and GC'd). 82 ctx context.Context 83 exitFn context.CancelFunc 84 85 // waitCh is closed when the Run method exits. At that point the alloc 86 // has stopped and been GC'd. 87 waitCh chan struct{} 88 89 // State related fields 90 // stateDB is used to store the alloc runners state 91 stateDB *bolt.DB 92 allocStateLock sync.Mutex 93 94 // persistedEval is the last persisted evaluation ID. Since evaluation 95 // IDs change on every allocation update we only need to persist the 96 // allocation when its eval ID != the last persisted eval ID. 97 persistedEvalLock sync.Mutex 98 persistedEval string 99 100 // immutablePersisted and allocDirPersisted are used to track whether the 101 // immutable data and the alloc dir have been persisted. Once persisted we 102 // can lower write volume by not re-writing these values 103 immutablePersisted bool 104 allocDirPersisted bool 105 106 // baseLabels are used when emitting tagged metrics. All alloc runner metrics 107 // will have these tags, and optionally more. 108 baseLabels []metrics.Label 109 } 110 111 // COMPAT: Remove in 0.7.0 112 // allocRunnerState is used to snapshot the state of the alloc runner 113 type allocRunnerState struct { 114 Version string 115 Alloc *structs.Allocation 116 AllocDir *allocdir.AllocDir 117 AllocClientStatus string 118 AllocClientDescription string 119 120 // COMPAT: Remove in 0.7.0: removing will break upgrading directly from 121 // 0.5.2, so don't remove in the 0.6 series. 122 // Context is deprecated and only used to migrate from older releases. 123 // It will be removed in the future. 124 Context *struct { 125 AllocID string // unused; included for completeness 126 AllocDir struct { 127 AllocDir string 128 SharedDir string // unused; included for completeness 129 TaskDirs map[string]string 130 } 131 } `json:"Context,omitempty"` 132 } 133 134 // allocRunnerAllocState is state that only has to be written when the alloc 135 // changes. 136 type allocRunnerAllocState struct { 137 Alloc *structs.Allocation 138 } 139 140 // allocRunnerImmutableState is state that only has to be written once. 141 type allocRunnerImmutableState struct { 142 Version string 143 } 144 145 // allocRunnerMutableState is state that has to be written on each save as it 146 // changes over the life-cycle of the alloc_runner. 147 type allocRunnerMutableState struct { 148 AllocClientStatus string 149 AllocClientDescription string 150 TaskStates map[string]*structs.TaskState 151 DeploymentStatus *structs.AllocDeploymentStatus 152 } 153 154 // NewAllocRunner is used to create a new allocation context 155 func NewAllocRunner(logger *log.Logger, config *config.Config, stateDB *bolt.DB, updater AllocStateUpdater, 156 alloc *structs.Allocation, vaultClient vaultclient.VaultClient, consulClient ConsulServiceAPI, 157 prevAlloc prevAllocWatcher) *AllocRunner { 158 159 ar := &AllocRunner{ 160 config: config, 161 stateDB: stateDB, 162 updater: updater, 163 logger: logger, 164 alloc: alloc, 165 allocID: alloc.ID, 166 allocBroadcast: cstructs.NewAllocBroadcaster(8), 167 prevAlloc: prevAlloc, 168 dirtyCh: make(chan struct{}, 1), 169 allocDir: allocdir.NewAllocDir(logger, filepath.Join(config.AllocDir, alloc.ID)), 170 tasks: make(map[string]*TaskRunner), 171 taskStates: copyTaskStates(alloc.TaskStates), 172 restored: make(map[string]struct{}), 173 updateCh: make(chan *structs.Allocation, 64), 174 waitCh: make(chan struct{}), 175 vaultClient: vaultClient, 176 consulClient: consulClient, 177 } 178 179 // TODO Should be passed a context 180 ar.ctx, ar.exitFn = context.WithCancel(context.TODO()) 181 182 return ar 183 } 184 185 // setBaseLabels creates the set of base labels. This should be called after 186 // Restore has been called so the allocation is guaranteed to be loaded 187 func (r *AllocRunner) setBaseLabels() { 188 r.baseLabels = make([]metrics.Label, 0, 3) 189 190 if r.alloc.Job != nil { 191 r.baseLabels = append(r.baseLabels, metrics.Label{ 192 Name: "job", 193 Value: r.alloc.Job.Name, 194 }) 195 } 196 if r.alloc.TaskGroup != "" { 197 r.baseLabels = append(r.baseLabels, metrics.Label{ 198 Name: "task_group", 199 Value: r.alloc.TaskGroup, 200 }) 201 } 202 if r.config != nil && r.config.Node != nil { 203 r.baseLabels = append(r.baseLabels, metrics.Label{ 204 Name: "node_id", 205 Value: r.config.Node.ID, 206 }) 207 } 208 } 209 210 // pre060StateFilePath returns the path to our state file that would have been 211 // written pre v0.6.0 212 // COMPAT: Remove in 0.7.0 213 func (r *AllocRunner) pre060StateFilePath() string { 214 r.allocLock.Lock() 215 defer r.allocLock.Unlock() 216 path := filepath.Join(r.config.StateDir, "alloc", r.allocID, "state.json") 217 return path 218 } 219 220 // RestoreState is used to restore the state of the alloc runner 221 func (r *AllocRunner) RestoreState() error { 222 223 // COMPAT: Remove in 0.7.0 224 // Check if the old snapshot is there 225 oldPath := r.pre060StateFilePath() 226 var snap allocRunnerState 227 var upgrading bool 228 if err := pre060RestoreState(oldPath, &snap); err == nil { 229 // Restore fields 230 r.logger.Printf("[INFO] client: restoring pre v0.6.0 alloc runner state for alloc %q", r.allocID) 231 r.alloc = snap.Alloc 232 r.allocDir = snap.AllocDir 233 r.allocClientStatus = snap.AllocClientStatus 234 r.allocClientDescription = snap.AllocClientDescription 235 236 if r.alloc != nil { 237 r.taskStates = snap.Alloc.TaskStates 238 } 239 240 // COMPAT: Remove in 0.7.0 241 // #2132 Upgrade path: if snap.AllocDir is nil, try to convert old 242 // Context struct to new AllocDir struct 243 if snap.AllocDir == nil && snap.Context != nil { 244 r.logger.Printf("[DEBUG] client: migrating state snapshot for alloc %q", r.allocID) 245 r.allocDir = allocdir.NewAllocDir(r.logger, snap.Context.AllocDir.AllocDir) 246 for taskName := range snap.Context.AllocDir.TaskDirs { 247 r.allocDir.NewTaskDir(taskName) 248 } 249 } 250 251 // Delete the old state 252 os.RemoveAll(oldPath) 253 upgrading = true 254 } else if !os.IsNotExist(err) { 255 // Something corrupt in the old state file 256 return err 257 } else { 258 // We are doing a normal restore 259 err := r.stateDB.View(func(tx *bolt.Tx) error { 260 bkt, err := getAllocationBucket(tx, r.allocID) 261 if err != nil { 262 return fmt.Errorf("failed to get allocation bucket: %v", err) 263 } 264 265 // Get the state objects 266 var mutable allocRunnerMutableState 267 var immutable allocRunnerImmutableState 268 var allocState allocRunnerAllocState 269 var allocDir allocdir.AllocDir 270 271 if err := getObject(bkt, allocRunnerStateAllocKey, &allocState); err != nil { 272 return fmt.Errorf("failed to read alloc runner alloc state: %v", err) 273 } 274 if err := getObject(bkt, allocRunnerStateImmutableKey, &immutable); err != nil { 275 return fmt.Errorf("failed to read alloc runner immutable state: %v", err) 276 } 277 if err := getObject(bkt, allocRunnerStateMutableKey, &mutable); err != nil { 278 return fmt.Errorf("failed to read alloc runner mutable state: %v", err) 279 } 280 if err := getObject(bkt, allocRunnerStateAllocDirKey, &allocDir); err != nil { 281 return fmt.Errorf("failed to read alloc runner alloc_dir state: %v", err) 282 } 283 284 // Populate the fields 285 r.alloc = allocState.Alloc 286 r.allocDir = &allocDir 287 r.allocClientStatus = mutable.AllocClientStatus 288 r.allocClientDescription = mutable.AllocClientDescription 289 r.taskStates = mutable.TaskStates 290 r.alloc.ClientStatus = getClientStatus(r.taskStates) 291 r.alloc.DeploymentStatus = mutable.DeploymentStatus 292 return nil 293 }) 294 295 if err != nil { 296 return fmt.Errorf("failed to read allocation state: %v", err) 297 } 298 } 299 300 var snapshotErrors multierror.Error 301 if r.alloc == nil { 302 snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil allocation")) 303 } 304 if r.allocDir == nil { 305 snapshotErrors.Errors = append(snapshotErrors.Errors, fmt.Errorf("alloc_runner snapshot includes a nil alloc dir")) 306 } 307 if e := snapshotErrors.ErrorOrNil(); e != nil { 308 return e 309 } 310 311 tg := r.alloc.Job.LookupTaskGroup(r.alloc.TaskGroup) 312 if tg == nil { 313 return fmt.Errorf("restored allocation doesn't contain task group %q", r.alloc.TaskGroup) 314 } 315 316 // Restore the task runners 317 taskDestroyEvent := structs.NewTaskEvent(structs.TaskKilled) 318 var mErr multierror.Error 319 for _, task := range tg.Tasks { 320 name := task.Name 321 state := r.taskStates[name] 322 323 // Nomad exited before task could start, nothing to restore. 324 // AllocRunner.Run will start a new TaskRunner for this task 325 if state == nil { 326 continue 327 } 328 329 // Mark the task as restored. 330 r.restored[name] = struct{}{} 331 332 td, ok := r.allocDir.TaskDirs[name] 333 if !ok { 334 // Create the task dir metadata if it doesn't exist. 335 // Since task dirs are created during r.Run() the 336 // client may save state and exit before all task dirs 337 // are created 338 td = r.allocDir.NewTaskDir(name) 339 } 340 341 // Skip tasks in terminal states. 342 if state.State == structs.TaskStateDead { 343 continue 344 } 345 346 tr := NewTaskRunner(r.logger, r.config, r.stateDB, r.setTaskState, td, r.Alloc(), task, r.vaultClient, r.consulClient) 347 r.tasks[name] = tr 348 349 if restartReason, err := tr.RestoreState(); err != nil { 350 r.logger.Printf("[ERR] client: failed to restore state for alloc %s task %q: %v", r.allocID, name, err) 351 mErr.Errors = append(mErr.Errors, err) 352 } else if !r.alloc.TerminalStatus() { 353 // Only start if the alloc isn't in a terminal status. 354 go tr.Run() 355 356 if upgrading { 357 if err := tr.SaveState(); err != nil { 358 r.logger.Printf("[WARN] client: initial save state for alloc %s task %s failed: %v", r.allocID, name, err) 359 } 360 } 361 362 // Restart task runner if RestoreState gave a reason 363 if restartReason != "" { 364 r.logger.Printf("[INFO] client: restarting alloc %s task %s: %v", r.allocID, name, restartReason) 365 const failure = false 366 tr.Restart("upgrade", restartReason, failure) 367 } 368 } else { 369 tr.Destroy(taskDestroyEvent) 370 } 371 } 372 373 return mErr.ErrorOrNil() 374 } 375 376 // SaveState is used to snapshot the state of the alloc runner 377 // if the fullSync is marked as false only the state of the Alloc Runner 378 // is snapshotted. If fullSync is marked as true, we snapshot 379 // all the Task Runners associated with the Alloc 380 func (r *AllocRunner) SaveState() error { 381 if err := r.saveAllocRunnerState(); err != nil { 382 return err 383 } 384 385 // Save state for each task 386 runners := r.getTaskRunners() 387 var mErr multierror.Error 388 for _, tr := range runners { 389 if err := tr.SaveState(); err != nil { 390 mErr.Errors = append(mErr.Errors, fmt.Errorf("failed to save state for alloc %s task %q: %v", 391 r.allocID, tr.task.Name, err)) 392 } 393 } 394 return mErr.ErrorOrNil() 395 } 396 397 func (r *AllocRunner) saveAllocRunnerState() error { 398 r.allocStateLock.Lock() 399 defer r.allocStateLock.Unlock() 400 401 if r.ctx.Err() == context.Canceled { 402 return nil 403 } 404 405 // Grab all the relevant data 406 alloc := r.Alloc() 407 408 r.allocLock.Lock() 409 allocClientStatus := r.allocClientStatus 410 allocClientDescription := r.allocClientDescription 411 r.allocLock.Unlock() 412 413 r.allocDirLock.Lock() 414 allocDir := r.allocDir.Copy() 415 r.allocDirLock.Unlock() 416 417 // Start the transaction. 418 return r.stateDB.Batch(func(tx *bolt.Tx) error { 419 420 // Grab the allocation bucket 421 allocBkt, err := getAllocationBucket(tx, r.allocID) 422 if err != nil { 423 return fmt.Errorf("failed to retrieve allocation bucket: %v", err) 424 } 425 426 // Write the allocation if the eval has changed 427 r.persistedEvalLock.Lock() 428 lastPersisted := r.persistedEval 429 r.persistedEvalLock.Unlock() 430 if alloc.EvalID != lastPersisted { 431 allocState := &allocRunnerAllocState{ 432 Alloc: alloc, 433 } 434 435 if err := putObject(allocBkt, allocRunnerStateAllocKey, &allocState); err != nil { 436 return fmt.Errorf("failed to write alloc_runner alloc state: %v", err) 437 } 438 439 tx.OnCommit(func() { 440 r.persistedEvalLock.Lock() 441 r.persistedEval = alloc.EvalID 442 r.persistedEvalLock.Unlock() 443 }) 444 } 445 446 // Write immutable data iff it hasn't been written yet 447 if !r.immutablePersisted { 448 immutable := &allocRunnerImmutableState{ 449 Version: r.config.Version.VersionNumber(), 450 } 451 452 if err := putObject(allocBkt, allocRunnerStateImmutableKey, &immutable); err != nil { 453 return fmt.Errorf("failed to write alloc_runner immutable state: %v", err) 454 } 455 456 tx.OnCommit(func() { 457 r.immutablePersisted = true 458 }) 459 } 460 461 // Write the alloc dir data if it hasn't been written before and it exists. 462 if !r.allocDirPersisted && allocDir != nil { 463 if err := putObject(allocBkt, allocRunnerStateAllocDirKey, allocDir); err != nil { 464 return fmt.Errorf("failed to write alloc_runner allocDir state: %v", err) 465 } 466 467 tx.OnCommit(func() { 468 r.allocDirPersisted = true 469 }) 470 } 471 472 // Write the mutable state every time 473 mutable := &allocRunnerMutableState{ 474 AllocClientStatus: allocClientStatus, 475 AllocClientDescription: allocClientDescription, 476 TaskStates: alloc.TaskStates, 477 DeploymentStatus: alloc.DeploymentStatus, 478 } 479 480 if err := putObject(allocBkt, allocRunnerStateMutableKey, &mutable); err != nil { 481 return fmt.Errorf("failed to write alloc_runner mutable state: %v", err) 482 } 483 484 return nil 485 }) 486 } 487 488 // DestroyState is used to cleanup after ourselves 489 func (r *AllocRunner) DestroyState() error { 490 r.allocStateLock.Lock() 491 defer r.allocStateLock.Unlock() 492 493 return r.stateDB.Update(func(tx *bolt.Tx) error { 494 if err := deleteAllocationBucket(tx, r.allocID); err != nil { 495 return fmt.Errorf("failed to delete allocation bucket: %v", err) 496 } 497 return nil 498 }) 499 } 500 501 // DestroyContext is used to destroy the context 502 func (r *AllocRunner) DestroyContext() error { 503 return r.allocDir.Destroy() 504 } 505 506 // GetAllocDir returns the alloc dir for the alloc runner 507 func (r *AllocRunner) GetAllocDir() *allocdir.AllocDir { 508 return r.allocDir 509 } 510 511 // GetListener returns a listener for updates broadcast by this alloc runner. 512 // Callers are responsible for calling Close on their Listener. 513 func (r *AllocRunner) GetListener() *cstructs.AllocListener { 514 return r.allocBroadcast.Listen() 515 } 516 517 // copyTaskStates returns a copy of the passed task states. 518 func copyTaskStates(states map[string]*structs.TaskState) map[string]*structs.TaskState { 519 copy := make(map[string]*structs.TaskState, len(states)) 520 for task, state := range states { 521 copy[task] = state.Copy() 522 } 523 return copy 524 } 525 526 // finalizeTerminalAlloc sets any missing required fields like 527 // finishedAt in the alloc runner's task States. finishedAt is used 528 // to calculate reschedule time for failed allocs, so we make sure that 529 // it is set 530 func (r *AllocRunner) finalizeTerminalAlloc(alloc *structs.Allocation) { 531 if !alloc.ClientTerminalStatus() { 532 return 533 } 534 r.taskStatusLock.Lock() 535 defer r.taskStatusLock.Unlock() 536 537 group := alloc.Job.LookupTaskGroup(alloc.TaskGroup) 538 if r.taskStates == nil { 539 r.taskStates = make(map[string]*structs.TaskState) 540 } 541 now := time.Now() 542 for _, task := range group.Tasks { 543 ts, ok := r.taskStates[task.Name] 544 if !ok { 545 ts = &structs.TaskState{} 546 r.taskStates[task.Name] = ts 547 } 548 if ts.FinishedAt.IsZero() { 549 ts.FinishedAt = now 550 } 551 } 552 alloc.TaskStates = copyTaskStates(r.taskStates) 553 } 554 555 // Alloc returns the associated allocation 556 func (r *AllocRunner) Alloc() *structs.Allocation { 557 r.allocLock.Lock() 558 559 // Don't do a deep copy of the job 560 alloc := r.alloc.CopySkipJob() 561 562 // The status has explicitly been set. 563 if r.allocClientStatus != "" || r.allocClientDescription != "" { 564 alloc.ClientStatus = r.allocClientStatus 565 alloc.ClientDescription = r.allocClientDescription 566 567 // Copy over the task states so we don't lose them 568 r.taskStatusLock.RLock() 569 alloc.TaskStates = copyTaskStates(r.taskStates) 570 r.taskStatusLock.RUnlock() 571 572 r.allocLock.Unlock() 573 r.finalizeTerminalAlloc(alloc) 574 return alloc 575 } 576 577 // The health has been set 578 if r.allocHealth != nil { 579 if alloc.DeploymentStatus == nil { 580 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{} 581 } 582 alloc.DeploymentStatus.Healthy = helper.BoolToPtr(*r.allocHealth) 583 } 584 r.allocLock.Unlock() 585 586 // Scan the task states to determine the status of the alloc 587 r.taskStatusLock.RLock() 588 alloc.TaskStates = copyTaskStates(r.taskStates) 589 alloc.ClientStatus = getClientStatus(r.taskStates) 590 r.taskStatusLock.RUnlock() 591 592 // If the client status is failed and we are part of a deployment, mark the 593 // alloc as unhealthy. This guards against the watcher not be started. 594 r.allocLock.Lock() 595 if alloc.ClientStatus == structs.AllocClientStatusFailed && 596 alloc.DeploymentID != "" && !alloc.DeploymentStatus.IsUnhealthy() { 597 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 598 Healthy: helper.BoolToPtr(false), 599 } 600 } 601 r.allocLock.Unlock() 602 r.finalizeTerminalAlloc(alloc) 603 return alloc 604 } 605 606 // getClientStatus takes in the task states for a given allocation and computes 607 // the client status 608 func getClientStatus(taskStates map[string]*structs.TaskState) string { 609 var pending, running, dead, failed bool 610 for _, state := range taskStates { 611 switch state.State { 612 case structs.TaskStateRunning: 613 running = true 614 case structs.TaskStatePending: 615 pending = true 616 case structs.TaskStateDead: 617 if state.Failed { 618 failed = true 619 } else { 620 dead = true 621 } 622 } 623 } 624 625 // Determine the alloc status 626 if failed { 627 return structs.AllocClientStatusFailed 628 } else if running { 629 return structs.AllocClientStatusRunning 630 } else if pending { 631 return structs.AllocClientStatusPending 632 } else if dead { 633 return structs.AllocClientStatusComplete 634 } 635 636 return "" 637 } 638 639 // dirtySyncState is used to watch for state being marked dirty to sync 640 func (r *AllocRunner) dirtySyncState() { 641 for { 642 select { 643 case <-r.dirtyCh: 644 if err := r.syncStatus(); err != nil { 645 // Only WARN instead of ERR because we continue on 646 r.logger.Printf("[WARN] client: error persisting alloc %q state: %v", 647 r.allocID, err) 648 } 649 case <-r.ctx.Done(): 650 return 651 } 652 } 653 } 654 655 // syncStatus is used to run and sync the status when it changes 656 func (r *AllocRunner) syncStatus() error { 657 // Get a copy of our alloc, update status server side and sync to disk 658 alloc := r.Alloc() 659 r.updater(alloc) 660 r.sendBroadcast(alloc) 661 return r.saveAllocRunnerState() 662 } 663 664 // sendBroadcast broadcasts an alloc update. 665 func (r *AllocRunner) sendBroadcast(alloc *structs.Allocation) { 666 // Try to send the alloc up to three times with a delay to allow recovery. 667 sent := false 668 for i := 0; i < 3; i++ { 669 if sent = r.allocBroadcast.Send(alloc); sent { 670 break 671 } 672 time.Sleep(500 * time.Millisecond) 673 } 674 if !sent { 675 r.logger.Printf("[WARN] client: failed to broadcast update to allocation %q", r.allocID) 676 } 677 } 678 679 // setStatus is used to update the allocation status 680 func (r *AllocRunner) setStatus(status, desc string) { 681 r.allocLock.Lock() 682 r.allocClientStatus = status 683 r.allocClientDescription = desc 684 r.allocLock.Unlock() 685 select { 686 case r.dirtyCh <- struct{}{}: 687 default: 688 } 689 } 690 691 // setTaskState is used to set the status of a task. If lazySync is set then the 692 // event is appended but not synced with the server. If state is omitted, the 693 // last known state is used. 694 func (r *AllocRunner) setTaskState(taskName, state string, event *structs.TaskEvent, lazySync bool) { 695 r.taskStatusLock.Lock() 696 defer r.taskStatusLock.Unlock() 697 taskState, ok := r.taskStates[taskName] 698 if !ok { 699 taskState = &structs.TaskState{} 700 r.taskStates[taskName] = taskState 701 } 702 703 // Set the tasks state. 704 if event != nil { 705 if event.FailsTask { 706 taskState.Failed = true 707 } 708 if event.Type == structs.TaskRestarting { 709 if !r.config.DisableTaggedMetrics { 710 metrics.IncrCounterWithLabels([]string{"client", "allocs", "restart"}, 711 1, r.baseLabels) 712 } 713 if r.config.BackwardsCompatibleMetrics { 714 metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "restart"}, 1) 715 } 716 taskState.Restarts++ 717 taskState.LastRestart = time.Unix(0, event.Time) 718 } 719 r.appendTaskEvent(taskState, event) 720 } 721 722 if lazySync { 723 return 724 } 725 726 // If the state hasn't been set use the existing state. 727 if state == "" { 728 state = taskState.State 729 if taskState.State == "" { 730 state = structs.TaskStatePending 731 } 732 } 733 734 switch state { 735 case structs.TaskStateRunning: 736 // Capture the start time if it is just starting 737 if taskState.State != structs.TaskStateRunning { 738 taskState.StartedAt = time.Now().UTC() 739 if !r.config.DisableTaggedMetrics { 740 metrics.IncrCounterWithLabels([]string{"client", "allocs", "running"}, 741 1, r.baseLabels) 742 } 743 if r.config.BackwardsCompatibleMetrics { 744 metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "running"}, 1) 745 } 746 } 747 case structs.TaskStateDead: 748 // Capture the finished time if not already set 749 if taskState.FinishedAt.IsZero() { 750 taskState.FinishedAt = time.Now().UTC() 751 } 752 753 // Find all tasks that are not the one that is dead and check if the one 754 // that is dead is a leader 755 var otherTaskRunners []*TaskRunner 756 var otherTaskNames []string 757 leader := false 758 for task, tr := range r.tasks { 759 if task != taskName { 760 otherTaskRunners = append(otherTaskRunners, tr) 761 otherTaskNames = append(otherTaskNames, task) 762 } else if tr.task.Leader { 763 leader = true 764 } 765 } 766 767 // Emitting metrics to indicate task complete and failures 768 if taskState.Failed { 769 if !r.config.DisableTaggedMetrics { 770 metrics.IncrCounterWithLabels([]string{"client", "allocs", "failed"}, 771 1, r.baseLabels) 772 } 773 if r.config.BackwardsCompatibleMetrics { 774 metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "failed"}, 1) 775 } 776 } else { 777 if !r.config.DisableTaggedMetrics { 778 metrics.IncrCounterWithLabels([]string{"client", "allocs", "complete"}, 779 1, r.baseLabels) 780 } 781 if r.config.BackwardsCompatibleMetrics { 782 metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, taskName, "complete"}, 1) 783 } 784 } 785 // If the task failed, we should kill all the other tasks in the task group. 786 if taskState.Failed { 787 for _, tr := range otherTaskRunners { 788 tr.Destroy(structs.NewTaskEvent(structs.TaskSiblingFailed).SetFailedSibling(taskName)) 789 } 790 if len(otherTaskRunners) > 0 { 791 r.logger.Printf("[DEBUG] client: task %q failed, destroying other tasks in task group: %v", taskName, otherTaskNames) 792 } 793 } else if leader { 794 // If the task was a leader task we should kill all the other tasks. 795 for _, tr := range otherTaskRunners { 796 tr.Destroy(structs.NewTaskEvent(structs.TaskLeaderDead)) 797 } 798 if len(otherTaskRunners) > 0 { 799 r.logger.Printf("[DEBUG] client: leader task %q is dead, destroying other tasks in task group: %v", taskName, otherTaskNames) 800 } 801 } 802 } 803 804 // Store the new state 805 taskState.State = state 806 807 select { 808 case r.dirtyCh <- struct{}{}: 809 default: 810 } 811 } 812 813 // appendTaskEvent updates the task status by appending the new event. 814 func (r *AllocRunner) appendTaskEvent(state *structs.TaskState, event *structs.TaskEvent) { 815 capacity := 10 816 if state.Events == nil { 817 state.Events = make([]*structs.TaskEvent, 0, capacity) 818 } 819 820 // If we hit capacity, then shift it. 821 if len(state.Events) == capacity { 822 old := state.Events 823 state.Events = make([]*structs.TaskEvent, 0, capacity) 824 state.Events = append(state.Events, old[1:]...) 825 } 826 827 state.Events = append(state.Events, event) 828 } 829 830 // Run is a long running goroutine used to manage an allocation 831 func (r *AllocRunner) Run() { 832 defer close(r.waitCh) 833 r.setBaseLabels() 834 go r.dirtySyncState() 835 836 // Find the task group to run in the allocation 837 alloc := r.Alloc() 838 tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) 839 if tg == nil { 840 r.logger.Printf("[ERR] client: alloc %q for missing task group %q", r.allocID, alloc.TaskGroup) 841 r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("missing task group '%s'", alloc.TaskGroup)) 842 return 843 } 844 845 // Build allocation directory (idempotent) 846 r.allocDirLock.Lock() 847 err := r.allocDir.Build() 848 r.allocDirLock.Unlock() 849 850 if err != nil { 851 r.logger.Printf("[ERR] client: alloc %q failed to build task directories: %v", r.allocID, err) 852 r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to build task dirs for '%s'", alloc.TaskGroup)) 853 return 854 } 855 856 // Wait for a previous alloc - if any - to terminate 857 if err := r.prevAlloc.Wait(r.ctx); err != nil { 858 if err == context.Canceled { 859 return 860 } 861 r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("error while waiting for previous alloc to terminate: %v", err)) 862 return 863 } 864 865 // Wait for data to be migrated from a previous alloc if applicable 866 if err := r.prevAlloc.Migrate(r.ctx, r.allocDir); err != nil { 867 if err == context.Canceled { 868 return 869 } 870 871 // Soft-fail on migration errors 872 r.logger.Printf("[WARN] client: alloc %q error while migrating data from previous alloc: %v", r.allocID, err) 873 874 // Recreate alloc dir to ensure a clean slate 875 r.allocDir.Destroy() 876 if err := r.allocDir.Build(); err != nil { 877 r.logger.Printf("[ERR] client: alloc %q failed to clean task directories after failed migration: %v", r.allocID, err) 878 r.setStatus(structs.AllocClientStatusFailed, fmt.Sprintf("failed to rebuild task dirs for '%s'", alloc.TaskGroup)) 879 return 880 } 881 } 882 883 // Check if the allocation is in a terminal status. In this case, we don't 884 // start any of the task runners and directly wait for the destroy signal to 885 // clean up the allocation. 886 if alloc.TerminalStatus() { 887 r.logger.Printf("[DEBUG] client: alloc %q in terminal status, waiting for destroy", r.allocID) 888 // mark this allocation as completed if it is not already in a 889 // terminal state 890 if !alloc.Terminated() { 891 r.setStatus(structs.AllocClientStatusComplete, "canceled running tasks for allocation in terminal state") 892 } 893 r.handleDestroy() 894 r.logger.Printf("[DEBUG] client: terminating runner for alloc '%s'", r.allocID) 895 return 896 } 897 898 // Increment alloc runner start counter. Incr'd even when restoring existing tasks so 1 start != 1 task execution 899 if !r.config.DisableTaggedMetrics { 900 metrics.IncrCounterWithLabels([]string{"client", "allocs", "start"}, 901 1, r.baseLabels) 902 } 903 if r.config.BackwardsCompatibleMetrics { 904 metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, "start"}, 1) 905 } 906 907 // Start the watcher 908 wCtx, watcherCancel := context.WithCancel(r.ctx) 909 go r.watchHealth(wCtx) 910 911 // Start the task runners 912 r.logger.Printf("[DEBUG] client: starting task runners for alloc '%s'", r.allocID) 913 r.taskLock.Lock() 914 for _, task := range tg.Tasks { 915 if _, ok := r.restored[task.Name]; ok { 916 continue 917 } 918 919 r.allocDirLock.Lock() 920 taskdir := r.allocDir.NewTaskDir(task.Name) 921 r.allocDirLock.Unlock() 922 923 tr := NewTaskRunner(r.logger, r.config, r.stateDB, r.setTaskState, taskdir, r.Alloc(), task.Copy(), r.vaultClient, r.consulClient) 924 r.tasks[task.Name] = tr 925 tr.MarkReceived() 926 927 go tr.Run() 928 } 929 r.taskLock.Unlock() 930 931 // taskDestroyEvent contains an event that caused the destruction of a task 932 // in the allocation. 933 var taskDestroyEvent *structs.TaskEvent 934 935 OUTER: 936 // Wait for updates 937 for { 938 select { 939 case update := <-r.updateCh: 940 // Store the updated allocation. 941 r.allocLock.Lock() 942 943 // If the deployment ids have changed clear the health 944 if r.alloc.DeploymentID != update.DeploymentID { 945 r.allocHealth = nil 946 } 947 948 r.alloc = update 949 r.allocLock.Unlock() 950 951 // Create a new watcher 952 watcherCancel() 953 wCtx, watcherCancel = context.WithCancel(r.ctx) 954 go r.watchHealth(wCtx) 955 956 // Check if we're in a terminal status 957 if update.TerminalStatus() { 958 taskDestroyEvent = structs.NewTaskEvent(structs.TaskKilled) 959 break OUTER 960 } 961 962 // Update the task groups 963 runners := r.getTaskRunners() 964 for _, tr := range runners { 965 tr.Update(update) 966 } 967 968 if err := r.syncStatus(); err != nil { 969 r.logger.Printf("[WARN] client: failed to sync alloc %q status upon receiving alloc update: %v", 970 r.allocID, err) 971 } 972 973 case <-r.ctx.Done(): 974 taskDestroyEvent = structs.NewTaskEvent(structs.TaskKilled) 975 break OUTER 976 } 977 } 978 979 // Kill the task runners 980 r.destroyTaskRunners(taskDestroyEvent) 981 982 // Block until we should destroy the state of the alloc 983 r.handleDestroy() 984 985 // Free up the context. It has likely exited already 986 watcherCancel() 987 988 r.logger.Printf("[DEBUG] client: terminating runner for alloc '%s'", r.allocID) 989 } 990 991 // destroyTaskRunners destroys the task runners, waits for them to terminate and 992 // then saves state. 993 func (r *AllocRunner) destroyTaskRunners(destroyEvent *structs.TaskEvent) { 994 // First destroy the leader if one exists 995 tg := r.alloc.Job.LookupTaskGroup(r.alloc.TaskGroup) 996 leader := "" 997 for _, task := range tg.Tasks { 998 if task.Leader { 999 leader = task.Name 1000 break 1001 } 1002 } 1003 if leader != "" { 1004 r.taskLock.RLock() 1005 tr := r.tasks[leader] 1006 r.taskLock.RUnlock() 1007 1008 // Dead tasks don't have a task runner created so guard against 1009 // the leader being dead when this AR was saved. 1010 if tr == nil { 1011 r.logger.Printf("[DEBUG] client: alloc %q leader task %q of task group %q already stopped", 1012 r.allocID, leader, r.alloc.TaskGroup) 1013 } else { 1014 r.logger.Printf("[DEBUG] client: alloc %q destroying leader task %q of task group %q first", 1015 r.allocID, leader, r.alloc.TaskGroup) 1016 tr.Destroy(destroyEvent) 1017 <-tr.WaitCh() 1018 } 1019 } 1020 1021 // Then destroy non-leader tasks concurrently 1022 r.taskLock.RLock() 1023 for name, tr := range r.tasks { 1024 if name != leader { 1025 tr.Destroy(destroyEvent) 1026 } 1027 } 1028 r.taskLock.RUnlock() 1029 1030 // Wait for termination of the task runners 1031 for _, tr := range r.getTaskRunners() { 1032 <-tr.WaitCh() 1033 } 1034 } 1035 1036 // handleDestroy blocks till the AllocRunner should be destroyed and does the 1037 // necessary cleanup. 1038 func (r *AllocRunner) handleDestroy() { 1039 // Final state sync. We do this to ensure that the server has the correct 1040 // state as we wait for a destroy. 1041 alloc := r.Alloc() 1042 1043 // Increment the destroy count for this alloc runner since this allocation is being removed from this client. 1044 if !r.config.DisableTaggedMetrics { 1045 metrics.IncrCounterWithLabels([]string{"client", "allocs", "destroy"}, 1046 1, r.baseLabels) 1047 } 1048 if r.config.BackwardsCompatibleMetrics { 1049 metrics.IncrCounter([]string{"client", "allocs", r.alloc.Job.Name, r.alloc.TaskGroup, "destroy"}, 1) 1050 } 1051 1052 // Broadcast and persist state synchronously 1053 r.sendBroadcast(alloc) 1054 if err := r.saveAllocRunnerState(); err != nil { 1055 r.logger.Printf("[WARN] client: alloc %q unable to persist state but should be GC'd soon anyway:%v", 1056 r.allocID, err) 1057 } 1058 1059 // Unmount any mounted directories as no tasks are running and makes 1060 // cleaning up Nomad's data directory simpler. 1061 if err := r.allocDir.UnmountAll(); err != nil { 1062 r.logger.Printf("[ERR] client: alloc %q unable unmount task directories: %v", r.allocID, err) 1063 } 1064 1065 // Update the server with the alloc's status -- also marks the alloc as 1066 // being eligible for GC, so from this point on the alloc can be gc'd 1067 // at any time. 1068 r.updater(alloc) 1069 1070 for { 1071 select { 1072 case <-r.ctx.Done(): 1073 if err := r.DestroyContext(); err != nil { 1074 r.logger.Printf("[ERR] client: failed to destroy context for alloc '%s': %v", 1075 r.allocID, err) 1076 } 1077 if err := r.DestroyState(); err != nil { 1078 r.logger.Printf("[ERR] client: failed to destroy state for alloc '%s': %v", 1079 r.allocID, err) 1080 } 1081 1082 return 1083 case <-r.updateCh: 1084 r.logger.Printf("[DEBUG] client: dropping update to terminal alloc '%s'", r.allocID) 1085 } 1086 } 1087 } 1088 1089 // IsWaiting returns true if this alloc is waiting on a previous allocation to 1090 // terminate. 1091 func (r *AllocRunner) IsWaiting() bool { 1092 return r.prevAlloc.IsWaiting() 1093 } 1094 1095 // IsMigrating returns true if this alloc is migrating data from a previous 1096 // allocation. 1097 func (r *AllocRunner) IsMigrating() bool { 1098 return r.prevAlloc.IsMigrating() 1099 } 1100 1101 // Update is used to update the allocation of the context 1102 func (r *AllocRunner) Update(update *structs.Allocation) { 1103 select { 1104 case r.updateCh <- update: 1105 default: 1106 r.logger.Printf("[ERR] client: dropping update to alloc '%s'", update.ID) 1107 } 1108 } 1109 1110 // StatsReporter returns an interface to query resource usage statistics of an 1111 // allocation 1112 func (r *AllocRunner) StatsReporter() AllocStatsReporter { 1113 return r 1114 } 1115 1116 // getTaskRunners is a helper that returns a copy of the task runners list using 1117 // the taskLock. 1118 func (r *AllocRunner) getTaskRunners() []*TaskRunner { 1119 // Get the task runners 1120 r.taskLock.RLock() 1121 defer r.taskLock.RUnlock() 1122 runners := make([]*TaskRunner, 0, len(r.tasks)) 1123 for _, tr := range r.tasks { 1124 runners = append(runners, tr) 1125 } 1126 return runners 1127 } 1128 1129 // LatestAllocStats returns the latest allocation stats. If the optional taskFilter is set 1130 // the allocation stats will only include the given task. 1131 func (r *AllocRunner) LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error) { 1132 astat := &cstructs.AllocResourceUsage{ 1133 Tasks: make(map[string]*cstructs.TaskResourceUsage), 1134 } 1135 1136 var flat []*cstructs.TaskResourceUsage 1137 if taskFilter != "" { 1138 r.taskLock.RLock() 1139 tr, ok := r.tasks[taskFilter] 1140 r.taskLock.RUnlock() 1141 if !ok { 1142 return nil, fmt.Errorf("allocation %q has no task %q", r.allocID, taskFilter) 1143 } 1144 l := tr.LatestResourceUsage() 1145 if l != nil { 1146 astat.Tasks[taskFilter] = l 1147 flat = []*cstructs.TaskResourceUsage{l} 1148 astat.Timestamp = l.Timestamp 1149 } 1150 } else { 1151 // Get the task runners 1152 runners := r.getTaskRunners() 1153 for _, tr := range runners { 1154 l := tr.LatestResourceUsage() 1155 if l != nil { 1156 astat.Tasks[tr.task.Name] = l 1157 flat = append(flat, l) 1158 if l.Timestamp > astat.Timestamp { 1159 astat.Timestamp = l.Timestamp 1160 } 1161 } 1162 } 1163 } 1164 1165 astat.ResourceUsage = sumTaskResourceUsage(flat) 1166 return astat, nil 1167 } 1168 1169 // sumTaskResourceUsage takes a set of task resources and sums their resources 1170 func sumTaskResourceUsage(usages []*cstructs.TaskResourceUsage) *cstructs.ResourceUsage { 1171 summed := &cstructs.ResourceUsage{ 1172 MemoryStats: &cstructs.MemoryStats{}, 1173 CpuStats: &cstructs.CpuStats{}, 1174 } 1175 for _, usage := range usages { 1176 summed.Add(usage.ResourceUsage) 1177 } 1178 return summed 1179 } 1180 1181 // shouldUpdate takes the AllocModifyIndex of an allocation sent from the server and 1182 // checks if the current running allocation is behind and should be updated. 1183 func (r *AllocRunner) shouldUpdate(serverIndex uint64) bool { 1184 r.allocLock.Lock() 1185 defer r.allocLock.Unlock() 1186 return r.alloc.AllocModifyIndex < serverIndex 1187 } 1188 1189 // Destroy is used to indicate that the allocation context should be destroyed 1190 func (r *AllocRunner) Destroy() { 1191 // Lock when closing the context as that gives the save state code 1192 // serialization. 1193 r.allocStateLock.Lock() 1194 defer r.allocStateLock.Unlock() 1195 1196 r.exitFn() 1197 r.allocBroadcast.Close() 1198 } 1199 1200 // IsDestroyed returns true if the AllocRunner is not running and has been 1201 // destroyed (GC'd). 1202 func (r *AllocRunner) IsDestroyed() bool { 1203 select { 1204 case <-r.waitCh: 1205 return true 1206 default: 1207 return false 1208 } 1209 } 1210 1211 // WaitCh returns a channel to wait for termination 1212 func (r *AllocRunner) WaitCh() <-chan struct{} { 1213 return r.waitCh 1214 }