github.com/jmitchell/nomad@v0.1.3-0.20151007230021-7ab84c2862d8/client/task_runner.go (about) 1 package client 2 3 import ( 4 "crypto/md5" 5 "encoding/hex" 6 "fmt" 7 "log" 8 "os" 9 "path/filepath" 10 "sync" 11 12 "github.com/hashicorp/nomad/client/config" 13 "github.com/hashicorp/nomad/client/driver" 14 "github.com/hashicorp/nomad/nomad/structs" 15 ) 16 17 // TaskRunner is used to wrap a task within an allocation and provide the execution context. 18 type TaskRunner struct { 19 config *config.Config 20 updater TaskStateUpdater 21 logger *log.Logger 22 ctx *driver.ExecContext 23 allocID string 24 25 task *structs.Task 26 updateCh chan *structs.Task 27 handle driver.DriverHandle 28 29 destroy bool 30 destroyCh chan struct{} 31 destroyLock sync.Mutex 32 waitCh chan struct{} 33 } 34 35 // taskRunnerState is used to snapshot the state of the task runner 36 type taskRunnerState struct { 37 Task *structs.Task 38 HandleID string 39 } 40 41 // TaskStateUpdater is used to update the status of a task 42 type TaskStateUpdater func(taskName, status, desc string) 43 44 // NewTaskRunner is used to create a new task context 45 func NewTaskRunner(logger *log.Logger, config *config.Config, 46 updater TaskStateUpdater, ctx *driver.ExecContext, 47 allocID string, task *structs.Task) *TaskRunner { 48 tc := &TaskRunner{ 49 config: config, 50 updater: updater, 51 logger: logger, 52 ctx: ctx, 53 allocID: allocID, 54 task: task, 55 updateCh: make(chan *structs.Task, 8), 56 destroyCh: make(chan struct{}), 57 waitCh: make(chan struct{}), 58 } 59 return tc 60 } 61 62 // WaitCh returns a channel to wait for termination 63 func (r *TaskRunner) WaitCh() <-chan struct{} { 64 return r.waitCh 65 } 66 67 // stateFilePath returns the path to our state file 68 func (r *TaskRunner) stateFilePath() string { 69 // Get the MD5 of the task name 70 hashVal := md5.Sum([]byte(r.task.Name)) 71 hashHex := hex.EncodeToString(hashVal[:]) 72 dirName := fmt.Sprintf("task-%s", hashHex) 73 74 // Generate the path 75 path := filepath.Join(r.config.StateDir, "alloc", r.allocID, 76 dirName, "state.json") 77 return path 78 } 79 80 // RestoreState is used to restore our state 81 func (r *TaskRunner) RestoreState() error { 82 // Load the snapshot 83 var snap taskRunnerState 84 if err := restoreState(r.stateFilePath(), &snap); err != nil { 85 return err 86 } 87 88 // Restore fields 89 r.task = snap.Task 90 91 // Restore the driver 92 if snap.HandleID != "" { 93 driver, err := r.createDriver() 94 if err != nil { 95 return err 96 } 97 98 handle, err := driver.Open(r.ctx, snap.HandleID) 99 if err != nil { 100 r.logger.Printf("[ERR] client: failed to open handle to task '%s' for alloc '%s': %v", 101 r.task.Name, r.allocID, err) 102 return err 103 } 104 r.handle = handle 105 } 106 return nil 107 } 108 109 // SaveState is used to snapshot our state 110 func (r *TaskRunner) SaveState() error { 111 snap := taskRunnerState{ 112 Task: r.task, 113 } 114 if r.handle != nil { 115 snap.HandleID = r.handle.ID() 116 } 117 return persistState(r.stateFilePath(), &snap) 118 } 119 120 // DestroyState is used to cleanup after ourselves 121 func (r *TaskRunner) DestroyState() error { 122 return os.RemoveAll(r.stateFilePath()) 123 } 124 125 // setStatus is used to update the status of the task runner 126 func (r *TaskRunner) setStatus(status, desc string) { 127 r.updater(r.task.Name, status, desc) 128 } 129 130 // createDriver makes a driver for the task 131 func (r *TaskRunner) createDriver() (driver.Driver, error) { 132 driverCtx := driver.NewDriverContext(r.task.Name, r.config, r.config.Node, r.logger) 133 driver, err := driver.NewDriver(r.task.Driver, driverCtx) 134 if err != nil { 135 err = fmt.Errorf("failed to create driver '%s' for alloc %s: %v", 136 r.task.Driver, r.allocID, err) 137 r.logger.Printf("[ERR] client: %s", err) 138 } 139 return driver, err 140 } 141 142 // startTask is used to start the task if there is no handle 143 func (r *TaskRunner) startTask() error { 144 // Create a driver 145 driver, err := r.createDriver() 146 if err != nil { 147 r.setStatus(structs.AllocClientStatusFailed, err.Error()) 148 return err 149 } 150 151 // Start the job 152 handle, err := driver.Start(r.ctx, r.task) 153 if err != nil { 154 r.logger.Printf("[ERR] client: failed to start task '%s' for alloc '%s': %v", 155 r.task.Name, r.allocID, err) 156 r.setStatus(structs.AllocClientStatusFailed, 157 fmt.Sprintf("failed to start: %v", err)) 158 return err 159 } 160 r.handle = handle 161 r.setStatus(structs.AllocClientStatusRunning, "task started") 162 return nil 163 } 164 165 // Run is a long running routine used to manage the task 166 func (r *TaskRunner) Run() { 167 defer close(r.waitCh) 168 r.logger.Printf("[DEBUG] client: starting task context for '%s' (alloc '%s')", 169 r.task.Name, r.allocID) 170 171 // Start the task if not yet started 172 if r.handle == nil { 173 if err := r.startTask(); err != nil { 174 return 175 } 176 } 177 178 OUTER: 179 // Wait for updates 180 for { 181 select { 182 case err := <-r.handle.WaitCh(): 183 if err != nil { 184 r.logger.Printf("[ERR] client: failed to complete task '%s' for alloc '%s': %v", 185 r.task.Name, r.allocID, err) 186 r.setStatus(structs.AllocClientStatusDead, 187 fmt.Sprintf("task failed with: %v", err)) 188 } else { 189 r.logger.Printf("[INFO] client: completed task '%s' for alloc '%s'", 190 r.task.Name, r.allocID) 191 r.setStatus(structs.AllocClientStatusDead, 192 "task completed") 193 } 194 break OUTER 195 196 case update := <-r.updateCh: 197 // Update 198 r.task = update 199 if err := r.handle.Update(update); err != nil { 200 r.logger.Printf("[ERR] client: failed to update task '%s' for alloc '%s': %v", 201 r.task.Name, r.allocID, err) 202 } 203 204 case <-r.destroyCh: 205 // Send the kill signal, and use the WaitCh to block until complete 206 if err := r.handle.Kill(); err != nil { 207 r.logger.Printf("[ERR] client: failed to kill task '%s' for alloc '%s': %v", 208 r.task.Name, r.allocID, err) 209 } 210 } 211 } 212 213 // Cleanup after ourselves 214 r.DestroyState() 215 } 216 217 // Update is used to update the task of the context 218 func (r *TaskRunner) Update(update *structs.Task) { 219 select { 220 case r.updateCh <- update: 221 default: 222 r.logger.Printf("[ERR] client: dropping task update '%s' (alloc '%s')", 223 update.Name, r.allocID) 224 } 225 } 226 227 // Destroy is used to indicate that the task context should be destroyed 228 func (r *TaskRunner) Destroy() { 229 r.destroyLock.Lock() 230 defer r.destroyLock.Unlock() 231 232 if r.destroy { 233 return 234 } 235 r.destroy = true 236 close(r.destroyCh) 237 }