github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/client/driver/exec.go (about) 1 package driver 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "log" 7 "os" 8 "os/exec" 9 "path/filepath" 10 "strings" 11 "time" 12 13 "github.com/hashicorp/go-multierror" 14 "github.com/hashicorp/go-plugin" 15 "github.com/hashicorp/nomad/client/allocdir" 16 "github.com/hashicorp/nomad/client/config" 17 "github.com/hashicorp/nomad/client/driver/executor" 18 dstructs "github.com/hashicorp/nomad/client/driver/structs" 19 cstructs "github.com/hashicorp/nomad/client/structs" 20 "github.com/hashicorp/nomad/helper/discover" 21 "github.com/hashicorp/nomad/helper/fields" 22 "github.com/hashicorp/nomad/nomad/structs" 23 "github.com/mitchellh/mapstructure" 24 ) 25 26 const ( 27 // The key populated in Node Attributes to indicate the presence of the Exec 28 // driver 29 execDriverAttr = "driver.exec" 30 ) 31 32 // ExecDriver fork/execs tasks using as many of the underlying OS's isolation 33 // features. 34 type ExecDriver struct { 35 DriverContext 36 } 37 38 type ExecDriverConfig struct { 39 Command string `mapstructure:"command"` 40 Args []string `mapstructure:"args"` 41 } 42 43 // execHandle is returned from Start/Open as a handle to the PID 44 type execHandle struct { 45 pluginClient *plugin.Client 46 executor executor.Executor 47 isolationConfig *dstructs.IsolationConfig 48 userPid int 49 allocDir *allocdir.AllocDir 50 killTimeout time.Duration 51 maxKillTimeout time.Duration 52 logger *log.Logger 53 waitCh chan *dstructs.WaitResult 54 doneCh chan struct{} 55 version string 56 } 57 58 // NewExecDriver is used to create a new exec driver 59 func NewExecDriver(ctx *DriverContext) Driver { 60 return &ExecDriver{DriverContext: *ctx} 61 } 62 63 // Validate is used to validate the driver configuration 64 func (d *ExecDriver) Validate(config map[string]interface{}) error { 65 fd := &fields.FieldData{ 66 Raw: config, 67 Schema: map[string]*fields.FieldSchema{ 68 "command": &fields.FieldSchema{ 69 Type: fields.TypeString, 70 Required: true, 71 }, 72 "args": &fields.FieldSchema{ 73 Type: fields.TypeArray, 74 }, 75 }, 76 } 77 78 if err := fd.Validate(); err != nil { 79 return err 80 } 81 82 return nil 83 } 84 85 func (d *ExecDriver) Abilities() DriverAbilities { 86 return DriverAbilities{ 87 SendSignals: true, 88 } 89 } 90 91 func (d *ExecDriver) Periodic() (bool, time.Duration) { 92 return true, 15 * time.Second 93 } 94 95 func (d *ExecDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) { 96 var driverConfig ExecDriverConfig 97 if err := mapstructure.WeakDecode(task.Config, &driverConfig); err != nil { 98 return nil, err 99 } 100 101 // Get the command to be ran 102 command := driverConfig.Command 103 if err := validateCommand(command, "args"); err != nil { 104 return nil, err 105 } 106 107 // Set the host environment variables. 108 filter := strings.Split(d.config.ReadDefault("env.blacklist", config.DefaultEnvBlacklist), ",") 109 d.taskEnv.AppendHostEnvvars(filter) 110 111 // Get the task directory for storing the executor logs. 112 taskDir, ok := ctx.AllocDir.TaskDirs[d.DriverContext.taskName] 113 if !ok { 114 return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName) 115 } 116 117 bin, err := discover.NomadExecutable() 118 if err != nil { 119 return nil, fmt.Errorf("unable to find the nomad binary: %v", err) 120 } 121 pluginLogFile := filepath.Join(taskDir, fmt.Sprintf("%s-executor.out", task.Name)) 122 pluginConfig := &plugin.ClientConfig{ 123 Cmd: exec.Command(bin, "executor", pluginLogFile), 124 } 125 126 exec, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config) 127 if err != nil { 128 return nil, err 129 } 130 executorCtx := &executor.ExecutorContext{ 131 TaskEnv: d.taskEnv, 132 Driver: "exec", 133 AllocDir: ctx.AllocDir, 134 AllocID: ctx.AllocID, 135 ChrootEnv: d.config.ChrootEnv, 136 Task: task, 137 } 138 if err := exec.SetContext(executorCtx); err != nil { 139 pluginClient.Kill() 140 return nil, fmt.Errorf("failed to set executor context: %v", err) 141 } 142 143 execCmd := &executor.ExecCommand{ 144 Cmd: command, 145 Args: driverConfig.Args, 146 FSIsolation: true, 147 ResourceLimits: true, 148 User: getExecutorUser(task), 149 } 150 151 ps, err := exec.LaunchCmd(execCmd) 152 if err != nil { 153 pluginClient.Kill() 154 return nil, err 155 } 156 157 d.logger.Printf("[DEBUG] driver.exec: started process via plugin with pid: %v", ps.Pid) 158 159 // Return a driver handle 160 maxKill := d.DriverContext.config.MaxKillTimeout 161 h := &execHandle{ 162 pluginClient: pluginClient, 163 userPid: ps.Pid, 164 executor: exec, 165 allocDir: ctx.AllocDir, 166 isolationConfig: ps.IsolationConfig, 167 killTimeout: GetKillTimeout(task.KillTimeout, maxKill), 168 maxKillTimeout: maxKill, 169 logger: d.logger, 170 version: d.config.Version, 171 doneCh: make(chan struct{}), 172 waitCh: make(chan *dstructs.WaitResult, 1), 173 } 174 if err := exec.SyncServices(consulContext(d.config, "")); err != nil { 175 d.logger.Printf("[ERR] driver.exec: error registering services with consul for task: %q: %v", task.Name, err) 176 } 177 go h.run() 178 return h, nil 179 } 180 181 type execId struct { 182 Version string 183 KillTimeout time.Duration 184 MaxKillTimeout time.Duration 185 UserPid int 186 TaskDir string 187 AllocDir *allocdir.AllocDir 188 IsolationConfig *dstructs.IsolationConfig 189 PluginConfig *PluginReattachConfig 190 } 191 192 func (d *ExecDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) { 193 id := &execId{} 194 if err := json.Unmarshal([]byte(handleID), id); err != nil { 195 return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err) 196 } 197 198 pluginConfig := &plugin.ClientConfig{ 199 Reattach: id.PluginConfig.PluginConfig(), 200 } 201 exec, client, err := createExecutor(pluginConfig, d.config.LogOutput, d.config) 202 if err != nil { 203 merrs := new(multierror.Error) 204 merrs.Errors = append(merrs.Errors, err) 205 d.logger.Println("[ERR] driver.exec: error connecting to plugin so destroying plugin pid and user pid") 206 if e := destroyPlugin(id.PluginConfig.Pid, id.UserPid); e != nil { 207 merrs.Errors = append(merrs.Errors, fmt.Errorf("error destroying plugin and userpid: %v", e)) 208 } 209 if id.IsolationConfig != nil { 210 ePid := pluginConfig.Reattach.Pid 211 if e := executor.ClientCleanup(id.IsolationConfig, ePid); e != nil { 212 merrs.Errors = append(merrs.Errors, fmt.Errorf("destroying cgroup failed: %v", e)) 213 } 214 } 215 if e := ctx.AllocDir.UnmountAll(); e != nil { 216 merrs.Errors = append(merrs.Errors, e) 217 } 218 return nil, fmt.Errorf("error connecting to plugin: %v", merrs.ErrorOrNil()) 219 } 220 221 ver, _ := exec.Version() 222 d.logger.Printf("[DEBUG] driver.exec : version of executor: %v", ver.Version) 223 // Return a driver handle 224 h := &execHandle{ 225 pluginClient: client, 226 executor: exec, 227 userPid: id.UserPid, 228 allocDir: id.AllocDir, 229 isolationConfig: id.IsolationConfig, 230 logger: d.logger, 231 version: id.Version, 232 killTimeout: id.KillTimeout, 233 maxKillTimeout: id.MaxKillTimeout, 234 doneCh: make(chan struct{}), 235 waitCh: make(chan *dstructs.WaitResult, 1), 236 } 237 if err := exec.SyncServices(consulContext(d.config, "")); err != nil { 238 d.logger.Printf("[ERR] driver.exec: error registering services with consul: %v", err) 239 } 240 go h.run() 241 return h, nil 242 } 243 244 func (h *execHandle) ID() string { 245 id := execId{ 246 Version: h.version, 247 KillTimeout: h.killTimeout, 248 MaxKillTimeout: h.maxKillTimeout, 249 PluginConfig: NewPluginReattachConfig(h.pluginClient.ReattachConfig()), 250 UserPid: h.userPid, 251 AllocDir: h.allocDir, 252 IsolationConfig: h.isolationConfig, 253 } 254 255 data, err := json.Marshal(id) 256 if err != nil { 257 h.logger.Printf("[ERR] driver.exec: failed to marshal ID to JSON: %s", err) 258 } 259 return string(data) 260 } 261 262 func (h *execHandle) WaitCh() chan *dstructs.WaitResult { 263 return h.waitCh 264 } 265 266 func (h *execHandle) Update(task *structs.Task) error { 267 // Store the updated kill timeout. 268 h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout) 269 h.executor.UpdateTask(task) 270 271 // Update is not possible 272 return nil 273 } 274 275 func (h *execHandle) Signal(s os.Signal) error { 276 return h.executor.Signal(s) 277 } 278 279 func (h *execHandle) Kill() error { 280 if err := h.executor.ShutDown(); err != nil { 281 if h.pluginClient.Exited() { 282 return nil 283 } 284 return fmt.Errorf("executor Shutdown failed: %v", err) 285 } 286 287 select { 288 case <-h.doneCh: 289 return nil 290 case <-time.After(h.killTimeout): 291 if h.pluginClient.Exited() { 292 return nil 293 } 294 if err := h.executor.Exit(); err != nil { 295 return fmt.Errorf("executor Exit failed: %v", err) 296 } 297 298 return nil 299 } 300 } 301 302 func (h *execHandle) Stats() (*cstructs.TaskResourceUsage, error) { 303 return h.executor.Stats() 304 } 305 306 func (h *execHandle) run() { 307 ps, werr := h.executor.Wait() 308 close(h.doneCh) 309 310 // If the exitcode is 0 and we had an error that means the plugin didn't 311 // connect and doesn't know the state of the user process so we are killing 312 // the user process so that when we create a new executor on restarting the 313 // new user process doesn't have collisions with resources that the older 314 // user pid might be holding onto. 315 if ps.ExitCode == 0 && werr != nil { 316 if h.isolationConfig != nil { 317 ePid := h.pluginClient.ReattachConfig().Pid 318 if e := executor.ClientCleanup(h.isolationConfig, ePid); e != nil { 319 h.logger.Printf("[ERR] driver.exec: destroying resource container failed: %v", e) 320 } 321 } 322 if e := h.allocDir.UnmountAll(); e != nil { 323 h.logger.Printf("[ERR] driver.exec: unmounting dev,proc and alloc dirs failed: %v", e) 324 } 325 } 326 327 // Remove services 328 if err := h.executor.DeregisterServices(); err != nil { 329 h.logger.Printf("[ERR] driver.exec: failed to deregister services: %v", err) 330 } 331 332 // Exit the executor 333 if err := h.executor.Exit(); err != nil { 334 h.logger.Printf("[ERR] driver.exec: error destroying executor: %v", err) 335 } 336 h.pluginClient.Kill() 337 338 // Send the results 339 h.waitCh <- dstructs.NewWaitResult(ps.ExitCode, ps.Signal, werr) 340 close(h.waitCh) 341 }