github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/drivers/exec/driver.go (about) 1 package exec 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "path/filepath" 8 "runtime" 9 "sync" 10 "time" 11 12 "github.com/hashicorp/consul-template/signals" 13 hclog "github.com/hashicorp/go-hclog" 14 "github.com/hashicorp/nomad/client/fingerprint" 15 "github.com/hashicorp/nomad/drivers/shared/eventer" 16 "github.com/hashicorp/nomad/drivers/shared/executor" 17 "github.com/hashicorp/nomad/drivers/shared/resolvconf" 18 "github.com/hashicorp/nomad/helper" 19 "github.com/hashicorp/nomad/helper/pluginutils/loader" 20 "github.com/hashicorp/nomad/plugins/base" 21 "github.com/hashicorp/nomad/plugins/drivers" 22 "github.com/hashicorp/nomad/plugins/drivers/utils" 23 "github.com/hashicorp/nomad/plugins/shared/hclspec" 24 pstructs "github.com/hashicorp/nomad/plugins/shared/structs" 25 ) 26 27 const ( 28 // pluginName is the name of the plugin 29 pluginName = "exec" 30 31 // fingerprintPeriod is the interval at which the driver will send fingerprint responses 32 fingerprintPeriod = 30 * time.Second 33 34 // taskHandleVersion is the version of task handle which this driver sets 35 // and understands how to decode driver state 36 taskHandleVersion = 1 37 ) 38 39 var ( 40 // PluginID is the exec plugin metadata registered in the plugin 41 // catalog. 42 PluginID = loader.PluginID{ 43 Name: pluginName, 44 PluginType: base.PluginTypeDriver, 45 } 46 47 // PluginConfig is the exec driver factory function registered in the 48 // plugin catalog. 49 PluginConfig = &loader.InternalPluginConfig{ 50 Config: map[string]interface{}{}, 51 Factory: func(ctx context.Context, l hclog.Logger) interface{} { return NewExecDriver(ctx, l) }, 52 } 53 54 // pluginInfo is the response returned for the PluginInfo RPC 55 pluginInfo = &base.PluginInfoResponse{ 56 Type: base.PluginTypeDriver, 57 PluginApiVersions: []string{drivers.ApiVersion010}, 58 PluginVersion: "0.1.0", 59 Name: pluginName, 60 } 61 62 // configSpec is the hcl specification returned by the ConfigSchema RPC 63 configSpec = hclspec.NewObject(map[string]*hclspec.Spec{ 64 "no_pivot_root": hclspec.NewDefault( 65 hclspec.NewAttr("no_pivot_root", "bool", false), 66 hclspec.NewLiteral("false"), 67 ), 68 "default_pid_mode": hclspec.NewDefault( 69 hclspec.NewAttr("default_pid_mode", "string", false), 70 hclspec.NewLiteral(`"private"`), 71 ), 72 "default_ipc_mode": hclspec.NewDefault( 73 hclspec.NewAttr("default_ipc_mode", "string", false), 74 hclspec.NewLiteral(`"private"`), 75 ), 76 }) 77 78 // taskConfigSpec is the hcl specification for the driver config section of 79 // a task within a job. It is returned in the TaskConfigSchema RPC 80 taskConfigSpec = hclspec.NewObject(map[string]*hclspec.Spec{ 81 "command": hclspec.NewAttr("command", "string", true), 82 "args": hclspec.NewAttr("args", "list(string)", false), 83 "pid_mode": hclspec.NewAttr("pid_mode", "string", false), 84 "ipc_mode": hclspec.NewAttr("ipc_mode", "string", false), 85 }) 86 87 // capabilities is returned by the Capabilities RPC and indicates what 88 // optional features this driver supports 89 capabilities = &drivers.Capabilities{ 90 SendSignals: true, 91 Exec: true, 92 FSIsolation: drivers.FSIsolationChroot, 93 NetIsolationModes: []drivers.NetIsolationMode{ 94 drivers.NetIsolationModeHost, 95 drivers.NetIsolationModeGroup, 96 }, 97 MountConfigs: drivers.MountConfigSupportAll, 98 } 99 ) 100 101 // Driver fork/execs tasks using many of the underlying OS's isolation 102 // features where configured. 103 type Driver struct { 104 // eventer is used to handle multiplexing of TaskEvents calls such that an 105 // event can be broadcast to all callers 106 eventer *eventer.Eventer 107 108 // config is the driver configuration set by the SetConfig RPC 109 config Config 110 111 // nomadConfig is the client config from nomad 112 nomadConfig *base.ClientDriverConfig 113 114 // tasks is the in memory datastore mapping taskIDs to driverHandles 115 tasks *taskStore 116 117 // ctx is the context for the driver. It is passed to other subsystems to 118 // coordinate shutdown 119 ctx context.Context 120 121 // logger will log to the Nomad agent 122 logger hclog.Logger 123 124 // A tri-state boolean to know if the fingerprinting has happened and 125 // whether it has been successful 126 fingerprintSuccess *bool 127 fingerprintLock sync.Mutex 128 } 129 130 // Config is the driver configuration set by the SetConfig RPC call 131 type Config struct { 132 // NoPivotRoot disables the use of pivot_root, useful when the root partition 133 // is on ramdisk 134 NoPivotRoot bool `codec:"no_pivot_root"` 135 136 // DefaultModePID is the default PID isolation set for all tasks using 137 // exec-based task drivers. 138 DefaultModePID string `codec:"default_pid_mode"` 139 140 // DefaultModeIPC is the default IPC isolation set for all tasks using 141 // exec-based task drivers. 142 DefaultModeIPC string `codec:"default_ipc_mode"` 143 } 144 145 func (c *Config) validate() error { 146 switch c.DefaultModePID { 147 case executor.IsolationModePrivate, executor.IsolationModeHost: 148 default: 149 return fmt.Errorf("default_pid_mode must be %q or %q, got %q", executor.IsolationModePrivate, executor.IsolationModeHost, c.DefaultModePID) 150 } 151 152 switch c.DefaultModeIPC { 153 case executor.IsolationModePrivate, executor.IsolationModeHost: 154 default: 155 return fmt.Errorf("default_ipc_mode must be %q or %q, got %q", executor.IsolationModePrivate, executor.IsolationModeHost, c.DefaultModeIPC) 156 } 157 158 return nil 159 } 160 161 // TaskConfig is the driver configuration of a task within a job 162 type TaskConfig struct { 163 // Command is the thing to exec. 164 Command string `codec:"command"` 165 166 // Args are passed along to Command. 167 Args []string `codec:"args"` 168 169 // ModePID indicates whether PID namespace isolation is enabled for the task. 170 // Must be "private" or "host" if set. 171 ModePID string `codec:"pid_mode"` 172 173 // ModeIPC indicates whether IPC namespace isolation is enabled for the task. 174 // Must be "private" or "host" if set. 175 ModeIPC string `codec:"ipc_mode"` 176 } 177 178 func (tc *TaskConfig) validate() error { 179 switch tc.ModePID { 180 case "", executor.IsolationModePrivate, executor.IsolationModeHost: 181 default: 182 return fmt.Errorf("pid_mode must be %q or %q, got %q", executor.IsolationModePrivate, executor.IsolationModeHost, tc.ModePID) 183 } 184 185 switch tc.ModeIPC { 186 case "", executor.IsolationModePrivate, executor.IsolationModeHost: 187 default: 188 return fmt.Errorf("ipc_mode must be %q or %q, got %q", executor.IsolationModePrivate, executor.IsolationModeHost, tc.ModeIPC) 189 } 190 191 return nil 192 } 193 194 // TaskState is the state which is encoded in the handle returned in 195 // StartTask. This information is needed to rebuild the task state and handler 196 // during recovery. 197 type TaskState struct { 198 ReattachConfig *pstructs.ReattachConfig 199 TaskConfig *drivers.TaskConfig 200 Pid int 201 StartedAt time.Time 202 } 203 204 // NewExecDriver returns a new DrivePlugin implementation 205 func NewExecDriver(ctx context.Context, logger hclog.Logger) drivers.DriverPlugin { 206 logger = logger.Named(pluginName) 207 return &Driver{ 208 eventer: eventer.NewEventer(ctx, logger), 209 tasks: newTaskStore(), 210 ctx: ctx, 211 logger: logger, 212 } 213 } 214 215 // setFingerprintSuccess marks the driver as having fingerprinted successfully 216 func (d *Driver) setFingerprintSuccess() { 217 d.fingerprintLock.Lock() 218 d.fingerprintSuccess = helper.BoolToPtr(true) 219 d.fingerprintLock.Unlock() 220 } 221 222 // setFingerprintFailure marks the driver as having failed fingerprinting 223 func (d *Driver) setFingerprintFailure() { 224 d.fingerprintLock.Lock() 225 d.fingerprintSuccess = helper.BoolToPtr(false) 226 d.fingerprintLock.Unlock() 227 } 228 229 // fingerprintSuccessful returns true if the driver has 230 // never fingerprinted or has successfully fingerprinted 231 func (d *Driver) fingerprintSuccessful() bool { 232 d.fingerprintLock.Lock() 233 defer d.fingerprintLock.Unlock() 234 return d.fingerprintSuccess == nil || *d.fingerprintSuccess 235 } 236 237 func (d *Driver) PluginInfo() (*base.PluginInfoResponse, error) { 238 return pluginInfo, nil 239 } 240 241 func (d *Driver) ConfigSchema() (*hclspec.Spec, error) { 242 return configSpec, nil 243 } 244 245 func (d *Driver) SetConfig(cfg *base.Config) error { 246 // unpack, validate, and set agent plugin config 247 var config Config 248 if len(cfg.PluginConfig) != 0 { 249 if err := base.MsgPackDecode(cfg.PluginConfig, &config); err != nil { 250 return err 251 } 252 } 253 if err := config.validate(); err != nil { 254 return err 255 } 256 d.config = config 257 258 if cfg != nil && cfg.AgentConfig != nil { 259 d.nomadConfig = cfg.AgentConfig.Driver 260 } 261 return nil 262 } 263 264 func (d *Driver) TaskConfigSchema() (*hclspec.Spec, error) { 265 return taskConfigSpec, nil 266 } 267 268 func (d *Driver) Capabilities() (*drivers.Capabilities, error) { 269 return capabilities, nil 270 } 271 272 func (d *Driver) Fingerprint(ctx context.Context) (<-chan *drivers.Fingerprint, error) { 273 ch := make(chan *drivers.Fingerprint) 274 go d.handleFingerprint(ctx, ch) 275 return ch, nil 276 277 } 278 func (d *Driver) handleFingerprint(ctx context.Context, ch chan<- *drivers.Fingerprint) { 279 defer close(ch) 280 ticker := time.NewTimer(0) 281 for { 282 select { 283 case <-ctx.Done(): 284 return 285 case <-d.ctx.Done(): 286 return 287 case <-ticker.C: 288 ticker.Reset(fingerprintPeriod) 289 ch <- d.buildFingerprint() 290 } 291 } 292 } 293 294 func (d *Driver) buildFingerprint() *drivers.Fingerprint { 295 if runtime.GOOS != "linux" { 296 d.setFingerprintFailure() 297 return &drivers.Fingerprint{ 298 Health: drivers.HealthStateUndetected, 299 HealthDescription: "exec driver unsupported on client OS", 300 } 301 } 302 303 fp := &drivers.Fingerprint{ 304 Attributes: map[string]*pstructs.Attribute{}, 305 Health: drivers.HealthStateHealthy, 306 HealthDescription: drivers.DriverHealthy, 307 } 308 309 if !utils.IsUnixRoot() { 310 fp.Health = drivers.HealthStateUndetected 311 fp.HealthDescription = drivers.DriverRequiresRootMessage 312 d.setFingerprintFailure() 313 return fp 314 } 315 316 mount, err := fingerprint.FindCgroupMountpointDir() 317 if err != nil { 318 fp.Health = drivers.HealthStateUnhealthy 319 fp.HealthDescription = drivers.NoCgroupMountMessage 320 if d.fingerprintSuccessful() { 321 d.logger.Warn(fp.HealthDescription, "error", err) 322 } 323 d.setFingerprintFailure() 324 return fp 325 } 326 327 if mount == "" { 328 fp.Health = drivers.HealthStateUnhealthy 329 fp.HealthDescription = drivers.CgroupMountEmpty 330 d.setFingerprintFailure() 331 return fp 332 } 333 334 fp.Attributes["driver.exec"] = pstructs.NewBoolAttribute(true) 335 d.setFingerprintSuccess() 336 return fp 337 } 338 339 func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error { 340 if handle == nil { 341 return fmt.Errorf("handle cannot be nil") 342 } 343 344 // COMPAT(0.10): pre 0.9 upgrade path check 345 if handle.Version == 0 { 346 return d.recoverPre09Task(handle) 347 } 348 349 // If already attached to handle there's nothing to recover. 350 if _, ok := d.tasks.Get(handle.Config.ID); ok { 351 d.logger.Trace("nothing to recover; task already exists", 352 "task_id", handle.Config.ID, 353 "task_name", handle.Config.Name, 354 ) 355 return nil 356 } 357 358 // Handle doesn't already exist, try to reattach 359 var taskState TaskState 360 if err := handle.GetDriverState(&taskState); err != nil { 361 d.logger.Error("failed to decode task state from handle", "error", err, "task_id", handle.Config.ID) 362 return fmt.Errorf("failed to decode task state from handle: %v", err) 363 } 364 365 // Create client for reattached executor 366 plugRC, err := pstructs.ReattachConfigToGoPlugin(taskState.ReattachConfig) 367 if err != nil { 368 d.logger.Error("failed to build ReattachConfig from task state", "error", err, "task_id", handle.Config.ID) 369 return fmt.Errorf("failed to build ReattachConfig from task state: %v", err) 370 } 371 372 exec, pluginClient, err := executor.ReattachToExecutor(plugRC, 373 d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID)) 374 if err != nil { 375 d.logger.Error("failed to reattach to executor", "error", err, "task_id", handle.Config.ID) 376 return fmt.Errorf("failed to reattach to executor: %v", err) 377 } 378 379 h := &taskHandle{ 380 exec: exec, 381 pid: taskState.Pid, 382 pluginClient: pluginClient, 383 taskConfig: taskState.TaskConfig, 384 procState: drivers.TaskStateRunning, 385 startedAt: taskState.StartedAt, 386 exitResult: &drivers.ExitResult{}, 387 logger: d.logger, 388 } 389 390 d.tasks.Set(taskState.TaskConfig.ID, h) 391 392 go h.run() 393 return nil 394 } 395 396 func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drivers.DriverNetwork, error) { 397 if _, ok := d.tasks.Get(cfg.ID); ok { 398 return nil, nil, fmt.Errorf("task with ID %q already started", cfg.ID) 399 } 400 401 var driverConfig TaskConfig 402 if err := cfg.DecodeDriverConfig(&driverConfig); err != nil { 403 return nil, nil, fmt.Errorf("failed to decode driver config: %v", err) 404 } 405 406 if err := driverConfig.validate(); err != nil { 407 return nil, nil, fmt.Errorf("failed driver config validation: %v", err) 408 } 409 410 d.logger.Info("starting task", "driver_cfg", hclog.Fmt("%+v", driverConfig)) 411 handle := drivers.NewTaskHandle(taskHandleVersion) 412 handle.Config = cfg 413 414 pluginLogFile := filepath.Join(cfg.TaskDir().Dir, "executor.out") 415 executorConfig := &executor.ExecutorConfig{ 416 LogFile: pluginLogFile, 417 LogLevel: "debug", 418 FSIsolation: true, 419 } 420 421 exec, pluginClient, err := executor.CreateExecutor( 422 d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID), 423 d.nomadConfig, executorConfig) 424 if err != nil { 425 return nil, nil, fmt.Errorf("failed to create executor: %v", err) 426 } 427 428 user := cfg.User 429 if user == "" { 430 user = "nobody" 431 } 432 433 if cfg.DNS != nil { 434 dnsMount, err := resolvconf.GenerateDNSMount(cfg.TaskDir().Dir, cfg.DNS) 435 if err != nil { 436 return nil, nil, fmt.Errorf("failed to build mount for resolv.conf: %v", err) 437 } 438 cfg.Mounts = append(cfg.Mounts, dnsMount) 439 } 440 441 execCmd := &executor.ExecCommand{ 442 Cmd: driverConfig.Command, 443 Args: driverConfig.Args, 444 Env: cfg.EnvList(), 445 User: user, 446 ResourceLimits: true, 447 NoPivotRoot: d.config.NoPivotRoot, 448 Resources: cfg.Resources, 449 TaskDir: cfg.TaskDir().Dir, 450 StdoutPath: cfg.StdoutPath, 451 StderrPath: cfg.StderrPath, 452 Mounts: cfg.Mounts, 453 Devices: cfg.Devices, 454 NetworkIsolation: cfg.NetworkIsolation, 455 ModePID: executor.IsolationMode(d.config.DefaultModePID, driverConfig.ModePID), 456 ModeIPC: executor.IsolationMode(d.config.DefaultModeIPC, driverConfig.ModeIPC), 457 } 458 459 ps, err := exec.Launch(execCmd) 460 if err != nil { 461 pluginClient.Kill() 462 return nil, nil, fmt.Errorf("failed to launch command with executor: %v", err) 463 } 464 465 h := &taskHandle{ 466 exec: exec, 467 pid: ps.Pid, 468 pluginClient: pluginClient, 469 taskConfig: cfg, 470 procState: drivers.TaskStateRunning, 471 startedAt: time.Now().Round(time.Millisecond), 472 logger: d.logger, 473 } 474 475 driverState := TaskState{ 476 ReattachConfig: pstructs.ReattachConfigFromGoPlugin(pluginClient.ReattachConfig()), 477 Pid: ps.Pid, 478 TaskConfig: cfg, 479 StartedAt: h.startedAt, 480 } 481 482 if err := handle.SetDriverState(&driverState); err != nil { 483 d.logger.Error("failed to start task, error setting driver state", "error", err) 484 exec.Shutdown("", 0) 485 pluginClient.Kill() 486 return nil, nil, fmt.Errorf("failed to set driver state: %v", err) 487 } 488 489 d.tasks.Set(cfg.ID, h) 490 go h.run() 491 return handle, nil, nil 492 } 493 494 func (d *Driver) WaitTask(ctx context.Context, taskID string) (<-chan *drivers.ExitResult, error) { 495 handle, ok := d.tasks.Get(taskID) 496 if !ok { 497 return nil, drivers.ErrTaskNotFound 498 } 499 500 ch := make(chan *drivers.ExitResult) 501 go d.handleWait(ctx, handle, ch) 502 503 return ch, nil 504 } 505 506 func (d *Driver) handleWait(ctx context.Context, handle *taskHandle, ch chan *drivers.ExitResult) { 507 defer close(ch) 508 var result *drivers.ExitResult 509 ps, err := handle.exec.Wait(ctx) 510 if err != nil { 511 result = &drivers.ExitResult{ 512 Err: fmt.Errorf("executor: error waiting on process: %v", err), 513 } 514 } else { 515 result = &drivers.ExitResult{ 516 ExitCode: ps.ExitCode, 517 Signal: ps.Signal, 518 } 519 } 520 521 select { 522 case <-ctx.Done(): 523 return 524 case <-d.ctx.Done(): 525 return 526 case ch <- result: 527 } 528 } 529 530 func (d *Driver) StopTask(taskID string, timeout time.Duration, signal string) error { 531 handle, ok := d.tasks.Get(taskID) 532 if !ok { 533 return drivers.ErrTaskNotFound 534 } 535 536 if err := handle.exec.Shutdown(signal, timeout); err != nil { 537 if handle.pluginClient.Exited() { 538 return nil 539 } 540 return fmt.Errorf("executor Shutdown failed: %v", err) 541 } 542 543 return nil 544 } 545 546 func (d *Driver) DestroyTask(taskID string, force bool) error { 547 handle, ok := d.tasks.Get(taskID) 548 if !ok { 549 return drivers.ErrTaskNotFound 550 } 551 552 if handle.IsRunning() && !force { 553 return fmt.Errorf("cannot destroy running task") 554 } 555 556 if !handle.pluginClient.Exited() { 557 if err := handle.exec.Shutdown("", 0); err != nil { 558 handle.logger.Error("destroying executor failed", "err", err) 559 } 560 561 handle.pluginClient.Kill() 562 } 563 564 d.tasks.Delete(taskID) 565 return nil 566 } 567 568 func (d *Driver) InspectTask(taskID string) (*drivers.TaskStatus, error) { 569 handle, ok := d.tasks.Get(taskID) 570 if !ok { 571 return nil, drivers.ErrTaskNotFound 572 } 573 574 return handle.TaskStatus(), nil 575 } 576 577 func (d *Driver) TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *drivers.TaskResourceUsage, error) { 578 handle, ok := d.tasks.Get(taskID) 579 if !ok { 580 return nil, drivers.ErrTaskNotFound 581 } 582 583 return handle.exec.Stats(ctx, interval) 584 } 585 586 func (d *Driver) TaskEvents(ctx context.Context) (<-chan *drivers.TaskEvent, error) { 587 return d.eventer.TaskEvents(ctx) 588 } 589 590 func (d *Driver) SignalTask(taskID string, signal string) error { 591 handle, ok := d.tasks.Get(taskID) 592 if !ok { 593 return drivers.ErrTaskNotFound 594 } 595 596 sig := os.Interrupt 597 if s, ok := signals.SignalLookup[signal]; ok { 598 sig = s 599 } else { 600 d.logger.Warn("unknown signal to send to task, using SIGINT instead", "signal", signal, "task_id", handle.taskConfig.ID) 601 602 } 603 return handle.exec.Signal(sig) 604 } 605 606 func (d *Driver) ExecTask(taskID string, cmd []string, timeout time.Duration) (*drivers.ExecTaskResult, error) { 607 if len(cmd) == 0 { 608 return nil, fmt.Errorf("error cmd must have at least one value") 609 } 610 handle, ok := d.tasks.Get(taskID) 611 if !ok { 612 return nil, drivers.ErrTaskNotFound 613 } 614 615 args := []string{} 616 if len(cmd) > 1 { 617 args = cmd[1:] 618 } 619 620 out, exitCode, err := handle.exec.Exec(time.Now().Add(timeout), cmd[0], args) 621 if err != nil { 622 return nil, err 623 } 624 625 return &drivers.ExecTaskResult{ 626 Stdout: out, 627 ExitResult: &drivers.ExitResult{ 628 ExitCode: exitCode, 629 }, 630 }, nil 631 } 632 633 var _ drivers.ExecTaskStreamingRawDriver = (*Driver)(nil) 634 635 func (d *Driver) ExecTaskStreamingRaw(ctx context.Context, 636 taskID string, 637 command []string, 638 tty bool, 639 stream drivers.ExecTaskStream) error { 640 641 if len(command) == 0 { 642 return fmt.Errorf("error cmd must have at least one value") 643 } 644 handle, ok := d.tasks.Get(taskID) 645 if !ok { 646 return drivers.ErrTaskNotFound 647 } 648 649 return handle.exec.ExecStreaming(ctx, command, tty, stream) 650 }