github.com/bigcommerce/nomad@v0.9.3-bc/drivers/rawexec/driver.go (about) 1 package rawexec 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "path/filepath" 8 "runtime" 9 "strconv" 10 "syscall" 11 "time" 12 13 "github.com/hashicorp/consul-template/signals" 14 hclog "github.com/hashicorp/go-hclog" 15 "github.com/hashicorp/nomad/drivers/shared/eventer" 16 "github.com/hashicorp/nomad/drivers/shared/executor" 17 "github.com/hashicorp/nomad/helper/pluginutils/loader" 18 "github.com/hashicorp/nomad/plugins/base" 19 "github.com/hashicorp/nomad/plugins/drivers" 20 "github.com/hashicorp/nomad/plugins/shared/hclspec" 21 pstructs "github.com/hashicorp/nomad/plugins/shared/structs" 22 ) 23 24 const ( 25 // pluginName is the name of the plugin 26 pluginName = "raw_exec" 27 28 // fingerprintPeriod is the interval at which the driver will send fingerprint responses 29 fingerprintPeriod = 30 * time.Second 30 31 // taskHandleVersion is the version of task handle which this driver sets 32 // and understands how to decode driver state 33 taskHandleVersion = 1 34 ) 35 36 var ( 37 // PluginID is the rawexec plugin metadata registered in the plugin 38 // catalog. 39 PluginID = loader.PluginID{ 40 Name: pluginName, 41 PluginType: base.PluginTypeDriver, 42 } 43 44 // PluginConfig is the rawexec factory function registered in the 45 // plugin catalog. 46 PluginConfig = &loader.InternalPluginConfig{ 47 Config: map[string]interface{}{}, 48 Factory: func(l hclog.Logger) interface{} { return NewRawExecDriver(l) }, 49 } 50 ) 51 52 // PluginLoader maps pre-0.9 client driver options to post-0.9 plugin options. 53 func PluginLoader(opts map[string]string) (map[string]interface{}, error) { 54 conf := map[string]interface{}{} 55 if v, err := strconv.ParseBool(opts["driver.raw_exec.enable"]); err == nil { 56 conf["enabled"] = v 57 } 58 if v, err := strconv.ParseBool(opts["driver.raw_exec.no_cgroups"]); err == nil { 59 conf["no_cgroups"] = v 60 } 61 return conf, nil 62 } 63 64 var ( 65 // pluginInfo is the response returned for the PluginInfo RPC 66 pluginInfo = &base.PluginInfoResponse{ 67 Type: base.PluginTypeDriver, 68 PluginApiVersions: []string{drivers.ApiVersion010}, 69 PluginVersion: "0.1.0", 70 Name: pluginName, 71 } 72 73 // configSpec is the hcl specification returned by the ConfigSchema RPC 74 configSpec = hclspec.NewObject(map[string]*hclspec.Spec{ 75 "enabled": hclspec.NewDefault( 76 hclspec.NewAttr("enabled", "bool", false), 77 hclspec.NewLiteral("false"), 78 ), 79 "no_cgroups": hclspec.NewDefault( 80 hclspec.NewAttr("no_cgroups", "bool", false), 81 hclspec.NewLiteral("false"), 82 ), 83 }) 84 85 // taskConfigSpec is the hcl specification for the driver config section of 86 // a task within a job. It is returned in the TaskConfigSchema RPC 87 taskConfigSpec = hclspec.NewObject(map[string]*hclspec.Spec{ 88 "command": hclspec.NewAttr("command", "string", true), 89 "args": hclspec.NewAttr("args", "list(string)", false), 90 }) 91 92 // capabilities is returned by the Capabilities RPC and indicates what 93 // optional features this driver supports 94 capabilities = &drivers.Capabilities{ 95 SendSignals: true, 96 Exec: true, 97 FSIsolation: drivers.FSIsolationNone, 98 } 99 ) 100 101 // Driver is a privileged version of the exec driver. It provides no 102 // resource isolation and just fork/execs. The Exec driver should be preferred 103 // and this should only be used when explicitly needed. 104 type Driver struct { 105 // eventer is used to handle multiplexing of TaskEvents calls such that an 106 // event can be broadcast to all callers 107 eventer *eventer.Eventer 108 109 // config is the driver configuration set by the SetConfig RPC 110 config *Config 111 112 // nomadConfig is the client config from nomad 113 nomadConfig *base.ClientDriverConfig 114 115 // tasks is the in memory datastore mapping taskIDs to driverHandles 116 tasks *taskStore 117 118 // ctx is the context for the driver. It is passed to other subsystems to 119 // coordinate shutdown 120 ctx context.Context 121 122 // signalShutdown is called when the driver is shutting down and cancels the 123 // ctx passed to any subsystems 124 signalShutdown context.CancelFunc 125 126 // logger will log to the Nomad agent 127 logger hclog.Logger 128 } 129 130 // Config is the driver configuration set by the SetConfig RPC call 131 type Config struct { 132 // NoCgroups tracks whether we should use a cgroup to manage the process 133 // tree 134 NoCgroups bool `codec:"no_cgroups"` 135 136 // Enabled is set to true to enable the raw_exec driver 137 Enabled bool `codec:"enabled"` 138 } 139 140 // TaskConfig is the driver configuration of a task within a job 141 type TaskConfig struct { 142 Command string `codec:"command"` 143 Args []string `codec:"args"` 144 } 145 146 // TaskState is the state which is encoded in the handle returned in 147 // StartTask. This information is needed to rebuild the task state and handler 148 // during recovery. 149 type TaskState struct { 150 ReattachConfig *pstructs.ReattachConfig 151 TaskConfig *drivers.TaskConfig 152 Pid int 153 StartedAt time.Time 154 } 155 156 // NewRawExecDriver returns a new DriverPlugin implementation 157 func NewRawExecDriver(logger hclog.Logger) drivers.DriverPlugin { 158 ctx, cancel := context.WithCancel(context.Background()) 159 logger = logger.Named(pluginName) 160 return &Driver{ 161 eventer: eventer.NewEventer(ctx, logger), 162 config: &Config{}, 163 tasks: newTaskStore(), 164 ctx: ctx, 165 signalShutdown: cancel, 166 logger: logger, 167 } 168 } 169 170 func (d *Driver) PluginInfo() (*base.PluginInfoResponse, error) { 171 return pluginInfo, nil 172 } 173 174 func (d *Driver) ConfigSchema() (*hclspec.Spec, error) { 175 return configSpec, nil 176 } 177 178 func (d *Driver) SetConfig(cfg *base.Config) error { 179 var config Config 180 if len(cfg.PluginConfig) != 0 { 181 if err := base.MsgPackDecode(cfg.PluginConfig, &config); err != nil { 182 return err 183 } 184 } 185 186 d.config = &config 187 if cfg.AgentConfig != nil { 188 d.nomadConfig = cfg.AgentConfig.Driver 189 } 190 return nil 191 } 192 193 func (d *Driver) Shutdown() { 194 d.signalShutdown() 195 } 196 197 func (d *Driver) TaskConfigSchema() (*hclspec.Spec, error) { 198 return taskConfigSpec, nil 199 } 200 201 func (d *Driver) Capabilities() (*drivers.Capabilities, error) { 202 return capabilities, nil 203 } 204 205 func (d *Driver) Fingerprint(ctx context.Context) (<-chan *drivers.Fingerprint, error) { 206 ch := make(chan *drivers.Fingerprint) 207 go d.handleFingerprint(ctx, ch) 208 return ch, nil 209 } 210 211 func (d *Driver) handleFingerprint(ctx context.Context, ch chan<- *drivers.Fingerprint) { 212 defer close(ch) 213 ticker := time.NewTimer(0) 214 for { 215 select { 216 case <-ctx.Done(): 217 return 218 case <-d.ctx.Done(): 219 return 220 case <-ticker.C: 221 ticker.Reset(fingerprintPeriod) 222 ch <- d.buildFingerprint() 223 } 224 } 225 } 226 227 func (d *Driver) buildFingerprint() *drivers.Fingerprint { 228 var health drivers.HealthState 229 var desc string 230 attrs := map[string]*pstructs.Attribute{} 231 if d.config.Enabled { 232 health = drivers.HealthStateHealthy 233 desc = drivers.DriverHealthy 234 attrs["driver.raw_exec"] = pstructs.NewBoolAttribute(true) 235 } else { 236 health = drivers.HealthStateUndetected 237 desc = "disabled" 238 } 239 240 return &drivers.Fingerprint{ 241 Attributes: attrs, 242 Health: health, 243 HealthDescription: desc, 244 } 245 } 246 247 func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error { 248 if handle == nil { 249 return fmt.Errorf("handle cannot be nil") 250 } 251 252 // COMPAT(0.10): pre 0.9 upgrade path check 253 if handle.Version == 0 { 254 return d.recoverPre09Task(handle) 255 } 256 257 // If already attached to handle there's nothing to recover. 258 if _, ok := d.tasks.Get(handle.Config.ID); ok { 259 d.logger.Trace("nothing to recover; task already exists", 260 "task_id", handle.Config.ID, 261 "task_name", handle.Config.Name, 262 ) 263 return nil 264 } 265 266 // Handle doesn't already exist, try to reattach 267 var taskState TaskState 268 if err := handle.GetDriverState(&taskState); err != nil { 269 d.logger.Error("failed to decode task state from handle", "error", err, "task_id", handle.Config.ID) 270 return fmt.Errorf("failed to decode task state from handle: %v", err) 271 } 272 273 plugRC, err := pstructs.ReattachConfigToGoPlugin(taskState.ReattachConfig) 274 if err != nil { 275 d.logger.Error("failed to build ReattachConfig from task state", "error", err, "task_id", handle.Config.ID) 276 return fmt.Errorf("failed to build ReattachConfig from task state: %v", err) 277 } 278 279 // Create client for reattached executor 280 exec, pluginClient, err := executor.ReattachToExecutor(plugRC, 281 d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID)) 282 if err != nil { 283 d.logger.Error("failed to reattach to executor", "error", err, "task_id", handle.Config.ID) 284 return fmt.Errorf("failed to reattach to executor: %v", err) 285 } 286 287 h := &taskHandle{ 288 exec: exec, 289 pid: taskState.Pid, 290 pluginClient: pluginClient, 291 taskConfig: taskState.TaskConfig, 292 procState: drivers.TaskStateRunning, 293 startedAt: taskState.StartedAt, 294 exitResult: &drivers.ExitResult{}, 295 logger: d.logger, 296 doneCh: make(chan struct{}), 297 } 298 299 d.tasks.Set(taskState.TaskConfig.ID, h) 300 301 go h.run() 302 return nil 303 } 304 305 func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drivers.DriverNetwork, error) { 306 if _, ok := d.tasks.Get(cfg.ID); ok { 307 return nil, nil, fmt.Errorf("task with ID %q already started", cfg.ID) 308 } 309 310 var driverConfig TaskConfig 311 if err := cfg.DecodeDriverConfig(&driverConfig); err != nil { 312 return nil, nil, fmt.Errorf("failed to decode driver config: %v", err) 313 } 314 315 d.logger.Info("starting task", "driver_cfg", hclog.Fmt("%+v", driverConfig)) 316 handle := drivers.NewTaskHandle(taskHandleVersion) 317 handle.Config = cfg 318 319 pluginLogFile := filepath.Join(cfg.TaskDir().Dir, "executor.out") 320 executorConfig := &executor.ExecutorConfig{ 321 LogFile: pluginLogFile, 322 LogLevel: "debug", 323 } 324 325 exec, pluginClient, err := executor.CreateExecutor( 326 d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID), 327 d.nomadConfig, executorConfig) 328 if err != nil { 329 return nil, nil, fmt.Errorf("failed to create executor: %v", err) 330 } 331 332 // Only use cgroups when running as root on linux - Doing so in other cases 333 // will cause an error. 334 useCgroups := !d.config.NoCgroups && runtime.GOOS == "linux" && syscall.Geteuid() == 0 335 336 execCmd := &executor.ExecCommand{ 337 Cmd: driverConfig.Command, 338 Args: driverConfig.Args, 339 Env: cfg.EnvList(), 340 User: cfg.User, 341 BasicProcessCgroup: useCgroups, 342 TaskDir: cfg.TaskDir().Dir, 343 StdoutPath: cfg.StdoutPath, 344 StderrPath: cfg.StderrPath, 345 } 346 347 ps, err := exec.Launch(execCmd) 348 if err != nil { 349 pluginClient.Kill() 350 return nil, nil, fmt.Errorf("failed to launch command with executor: %v", err) 351 } 352 353 h := &taskHandle{ 354 exec: exec, 355 pid: ps.Pid, 356 pluginClient: pluginClient, 357 taskConfig: cfg, 358 procState: drivers.TaskStateRunning, 359 startedAt: time.Now().Round(time.Millisecond), 360 logger: d.logger, 361 doneCh: make(chan struct{}), 362 } 363 364 driverState := TaskState{ 365 ReattachConfig: pstructs.ReattachConfigFromGoPlugin(pluginClient.ReattachConfig()), 366 Pid: ps.Pid, 367 TaskConfig: cfg, 368 StartedAt: h.startedAt, 369 } 370 371 if err := handle.SetDriverState(&driverState); err != nil { 372 d.logger.Error("failed to start task, error setting driver state", "error", err) 373 exec.Shutdown("", 0) 374 pluginClient.Kill() 375 return nil, nil, fmt.Errorf("failed to set driver state: %v", err) 376 } 377 378 d.tasks.Set(cfg.ID, h) 379 go h.run() 380 return handle, nil, nil 381 } 382 383 func (d *Driver) WaitTask(ctx context.Context, taskID string) (<-chan *drivers.ExitResult, error) { 384 handle, ok := d.tasks.Get(taskID) 385 if !ok { 386 return nil, drivers.ErrTaskNotFound 387 } 388 389 ch := make(chan *drivers.ExitResult) 390 go d.handleWait(ctx, handle, ch) 391 392 return ch, nil 393 } 394 395 func (d *Driver) handleWait(ctx context.Context, handle *taskHandle, ch chan *drivers.ExitResult) { 396 defer close(ch) 397 var result *drivers.ExitResult 398 ps, err := handle.exec.Wait(ctx) 399 if err != nil { 400 result = &drivers.ExitResult{ 401 Err: fmt.Errorf("executor: error waiting on process: %v", err), 402 } 403 } else { 404 result = &drivers.ExitResult{ 405 ExitCode: ps.ExitCode, 406 Signal: ps.Signal, 407 } 408 } 409 410 select { 411 case <-ctx.Done(): 412 return 413 case <-d.ctx.Done(): 414 return 415 case ch <- result: 416 } 417 } 418 419 func (d *Driver) StopTask(taskID string, timeout time.Duration, signal string) error { 420 handle, ok := d.tasks.Get(taskID) 421 if !ok { 422 return drivers.ErrTaskNotFound 423 } 424 425 if err := handle.exec.Shutdown(signal, timeout); err != nil { 426 if handle.pluginClient.Exited() { 427 return nil 428 } 429 return fmt.Errorf("executor Shutdown failed: %v", err) 430 } 431 432 // Wait for handle to finish 433 <-handle.doneCh 434 435 // Kill executor 436 handle.pluginClient.Kill() 437 438 return nil 439 } 440 441 func (d *Driver) DestroyTask(taskID string, force bool) error { 442 handle, ok := d.tasks.Get(taskID) 443 if !ok { 444 return drivers.ErrTaskNotFound 445 } 446 447 if handle.IsRunning() && !force { 448 return fmt.Errorf("cannot destroy running task") 449 } 450 451 if !handle.pluginClient.Exited() { 452 if handle.IsRunning() { 453 if err := handle.exec.Shutdown("", 0); err != nil { 454 handle.logger.Error("destroying executor failed", "err", err) 455 } 456 } 457 458 handle.pluginClient.Kill() 459 } 460 461 d.tasks.Delete(taskID) 462 return nil 463 } 464 465 func (d *Driver) InspectTask(taskID string) (*drivers.TaskStatus, error) { 466 handle, ok := d.tasks.Get(taskID) 467 if !ok { 468 return nil, drivers.ErrTaskNotFound 469 } 470 471 return handle.TaskStatus(), nil 472 } 473 474 func (d *Driver) TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *drivers.TaskResourceUsage, error) { 475 handle, ok := d.tasks.Get(taskID) 476 if !ok { 477 return nil, drivers.ErrTaskNotFound 478 } 479 480 return handle.exec.Stats(ctx, interval) 481 } 482 483 func (d *Driver) TaskEvents(ctx context.Context) (<-chan *drivers.TaskEvent, error) { 484 return d.eventer.TaskEvents(ctx) 485 } 486 487 func (d *Driver) SignalTask(taskID string, signal string) error { 488 handle, ok := d.tasks.Get(taskID) 489 if !ok { 490 return drivers.ErrTaskNotFound 491 } 492 493 sig := os.Interrupt 494 if s, ok := signals.SignalLookup[signal]; ok { 495 sig = s 496 } else { 497 d.logger.Warn("unknown signal to send to task, using SIGINT instead", "signal", signal, "task_id", handle.taskConfig.ID) 498 } 499 500 return handle.exec.Signal(sig) 501 } 502 503 func (d *Driver) ExecTask(taskID string, cmd []string, timeout time.Duration) (*drivers.ExecTaskResult, error) { 504 if len(cmd) == 0 { 505 return nil, fmt.Errorf("error cmd must have at least one value") 506 } 507 handle, ok := d.tasks.Get(taskID) 508 if !ok { 509 return nil, drivers.ErrTaskNotFound 510 } 511 512 out, exitCode, err := handle.exec.Exec(time.Now().Add(timeout), cmd[0], cmd[1:]) 513 if err != nil { 514 return nil, err 515 } 516 517 return &drivers.ExecTaskResult{ 518 Stdout: out, 519 ExitResult: &drivers.ExitResult{ 520 ExitCode: exitCode, 521 }, 522 }, nil 523 } 524 525 var _ drivers.ExecTaskStreamingRawDriver = (*Driver)(nil) 526 527 func (d *Driver) ExecTaskStreamingRaw(ctx context.Context, 528 taskID string, 529 command []string, 530 tty bool, 531 stream drivers.ExecTaskStream) error { 532 533 if len(command) == 0 { 534 return fmt.Errorf("error cmd must have at least one value") 535 } 536 handle, ok := d.tasks.Get(taskID) 537 if !ok { 538 return drivers.ErrTaskNotFound 539 } 540 541 return handle.exec.ExecStreaming(ctx, command, tty, stream) 542 }