github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/drivers/shared/executor/executor.go (about) 1 package executor 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 "io/ioutil" 8 "os" 9 "os/exec" 10 "path/filepath" 11 "runtime" 12 "strings" 13 "syscall" 14 "time" 15 16 "github.com/armon/circbuf" 17 "github.com/hashicorp/consul-template/signals" 18 hclog "github.com/hashicorp/go-hclog" 19 multierror "github.com/hashicorp/go-multierror" 20 "github.com/hashicorp/nomad/client/allocdir" 21 "github.com/hashicorp/nomad/client/lib/fifo" 22 "github.com/hashicorp/nomad/client/stats" 23 cstructs "github.com/hashicorp/nomad/client/structs" 24 "github.com/hashicorp/nomad/plugins/drivers" 25 "github.com/kr/pty" 26 27 shelpers "github.com/hashicorp/nomad/helper/stats" 28 ) 29 30 const ( 31 // ExecutorVersionLatest is the current and latest version of the executor 32 ExecutorVersionLatest = "2.0.0" 33 34 // ExecutorVersionPre0_9 is the version of executor use prior to the release 35 // of 0.9.x 36 ExecutorVersionPre0_9 = "1.1.0" 37 38 // IsolationModePrivate represents the private isolation mode for a namespace 39 IsolationModePrivate = "private" 40 41 // IsolationModeHost represents the host isolation mode for a namespace 42 IsolationModeHost = "host" 43 ) 44 45 var ( 46 // The statistics the basic executor exposes 47 ExecutorBasicMeasuredMemStats = []string{"RSS", "Swap"} 48 ExecutorBasicMeasuredCpuStats = []string{"System Mode", "User Mode", "Percent"} 49 ) 50 51 // Executor is the interface which allows a driver to launch and supervise 52 // a process 53 type Executor interface { 54 // Launch a user process configured by the given ExecCommand 55 Launch(launchCmd *ExecCommand) (*ProcessState, error) 56 57 // Wait blocks until the process exits or an error occures 58 Wait(ctx context.Context) (*ProcessState, error) 59 60 // Shutdown will shutdown the executor by stopping the user process, 61 // cleaning up and resources created by the executor. The shutdown sequence 62 // will first send the given signal to the process. This defaults to "SIGINT" 63 // if not specified. The executor will then wait for the process to exit 64 // before cleaning up other resources. If the executor waits longer than the 65 // given grace period, the process is forcefully killed. 66 // 67 // To force kill the user process, gracePeriod can be set to 0. 68 Shutdown(signal string, gracePeriod time.Duration) error 69 70 // UpdateResources updates any resource isolation enforcement with new 71 // constraints if supported. 72 UpdateResources(*drivers.Resources) error 73 74 // Version returns the executor API version 75 Version() (*ExecutorVersion, error) 76 77 // Returns a channel of stats. Stats are collected and 78 // pushed to the channel on the given interval 79 Stats(context.Context, time.Duration) (<-chan *cstructs.TaskResourceUsage, error) 80 81 // Signal sends the given signal to the user process 82 Signal(os.Signal) error 83 84 // Exec executes the given command and args inside the executor context 85 // and returns the output and exit code. 86 Exec(deadline time.Time, cmd string, args []string) ([]byte, int, error) 87 88 ExecStreaming(ctx context.Context, cmd []string, tty bool, 89 stream drivers.ExecTaskStream) error 90 } 91 92 // ExecCommand holds the user command, args, and other isolation related 93 // settings. 94 type ExecCommand struct { 95 // Cmd is the command that the user wants to run. 96 Cmd string 97 98 // Args is the args of the command that the user wants to run. 99 Args []string 100 101 // Resources defined by the task 102 Resources *drivers.Resources 103 104 // StdoutPath is the path the process stdout should be written to 105 StdoutPath string 106 stdout io.WriteCloser 107 108 // StderrPath is the path the process stderr should be written to 109 StderrPath string 110 stderr io.WriteCloser 111 112 // Env is the list of KEY=val pairs of environment variables to be set 113 Env []string 114 115 // User is the user which the executor uses to run the command. 116 User string 117 118 // TaskDir is the directory path on the host where for the task 119 TaskDir string 120 121 // ResourceLimits determines whether resource limits are enforced by the 122 // executor. 123 ResourceLimits bool 124 125 // Cgroup marks whether we put the process in a cgroup. Setting this field 126 // doesn't enforce resource limits. To enforce limits, set ResourceLimits. 127 // Using the cgroup does allow more precise cleanup of processes. 128 BasicProcessCgroup bool 129 130 // NoPivotRoot disables using pivot_root for isolation, useful when the root 131 // partition is on a ramdisk which does not support pivot_root, 132 // see man 2 pivot_root 133 NoPivotRoot bool 134 135 // Mounts are the host paths to be be made available inside rootfs 136 Mounts []*drivers.MountConfig 137 138 // Devices are the the device nodes to be created in isolation environment 139 Devices []*drivers.DeviceConfig 140 141 // NetworkIsolation is the network isolation configuration. 142 NetworkIsolation *drivers.NetworkIsolationSpec 143 144 // ModePID is the PID isolation mode (private or host). 145 ModePID string 146 147 // ModeIPC is the IPC isolation mode (private or host). 148 ModeIPC string 149 } 150 151 // SetWriters sets the writer for the process stdout and stderr. This should 152 // not be used if writing to a file path such as a fifo file. SetStdoutWriter 153 // is mainly used for unit testing purposes. 154 func (c *ExecCommand) SetWriters(out io.WriteCloser, err io.WriteCloser) { 155 c.stdout = out 156 c.stderr = err 157 } 158 159 // GetWriters returns the unexported io.WriteCloser for the stdout and stderr 160 // handles. This is mainly used for unit testing purposes. 161 func (c *ExecCommand) GetWriters() (stdout io.WriteCloser, stderr io.WriteCloser) { 162 return c.stdout, c.stderr 163 } 164 165 type nopCloser struct { 166 io.Writer 167 } 168 169 func (nopCloser) Close() error { return nil } 170 171 // Stdout returns a writer for the configured file descriptor 172 func (c *ExecCommand) Stdout() (io.WriteCloser, error) { 173 if c.stdout == nil { 174 if c.StdoutPath != "" { 175 f, err := fifo.OpenWriter(c.StdoutPath) 176 if err != nil { 177 return nil, fmt.Errorf("failed to create stdout: %v", err) 178 } 179 c.stdout = f 180 } else { 181 c.stdout = nopCloser{ioutil.Discard} 182 } 183 } 184 return c.stdout, nil 185 } 186 187 // Stderr returns a writer for the configured file descriptor 188 func (c *ExecCommand) Stderr() (io.WriteCloser, error) { 189 if c.stderr == nil { 190 if c.StderrPath != "" { 191 f, err := fifo.OpenWriter(c.StderrPath) 192 if err != nil { 193 return nil, fmt.Errorf("failed to create stderr: %v", err) 194 } 195 c.stderr = f 196 } else { 197 c.stderr = nopCloser{ioutil.Discard} 198 } 199 } 200 return c.stderr, nil 201 } 202 203 func (c *ExecCommand) Close() { 204 if c.stdout != nil { 205 c.stdout.Close() 206 } 207 if c.stderr != nil { 208 c.stderr.Close() 209 } 210 } 211 212 // ProcessState holds information about the state of a user process. 213 type ProcessState struct { 214 Pid int 215 ExitCode int 216 Signal int 217 Time time.Time 218 } 219 220 // ExecutorVersion is the version of the executor 221 type ExecutorVersion struct { 222 Version string 223 } 224 225 func (v *ExecutorVersion) GoString() string { 226 return v.Version 227 } 228 229 // UniversalExecutor is an implementation of the Executor which launches and 230 // supervises processes. In addition to process supervision it provides resource 231 // and file system isolation 232 type UniversalExecutor struct { 233 childCmd exec.Cmd 234 commandCfg *ExecCommand 235 236 exitState *ProcessState 237 processExited chan interface{} 238 239 // resConCtx is used to track and cleanup additional resources created by 240 // the executor. Currently this is only used for cgroups. 241 resConCtx resourceContainerContext 242 243 totalCpuStats *stats.CpuStats 244 userCpuStats *stats.CpuStats 245 systemCpuStats *stats.CpuStats 246 pidCollector *pidCollector 247 248 logger hclog.Logger 249 } 250 251 // NewExecutor returns an Executor 252 func NewExecutor(logger hclog.Logger) Executor { 253 logger = logger.Named("executor") 254 if err := shelpers.Init(); err != nil { 255 logger.Error("unable to initialize stats", "error", err) 256 } 257 return &UniversalExecutor{ 258 logger: logger, 259 processExited: make(chan interface{}), 260 totalCpuStats: stats.NewCpuStats(), 261 userCpuStats: stats.NewCpuStats(), 262 systemCpuStats: stats.NewCpuStats(), 263 pidCollector: newPidCollector(logger), 264 } 265 } 266 267 // Version returns the api version of the executor 268 func (e *UniversalExecutor) Version() (*ExecutorVersion, error) { 269 return &ExecutorVersion{Version: ExecutorVersionLatest}, nil 270 } 271 272 // Launch launches the main process and returns its state. It also 273 // configures an applies isolation on certain platforms. 274 func (e *UniversalExecutor) Launch(command *ExecCommand) (*ProcessState, error) { 275 e.logger.Trace("preparing to launch command", "command", command.Cmd, "args", strings.Join(command.Args, " ")) 276 277 e.commandCfg = command 278 279 // setting the user of the process 280 if command.User != "" { 281 e.logger.Debug("running command as user", "user", command.User) 282 if err := setCmdUser(&e.childCmd, command.User); err != nil { 283 return nil, err 284 } 285 } 286 287 // set the task dir as the working directory for the command 288 e.childCmd.Dir = e.commandCfg.TaskDir 289 290 // start command in separate process group 291 if err := e.setNewProcessGroup(); err != nil { 292 return nil, err 293 } 294 295 // Setup cgroups on linux 296 if e.commandCfg.ResourceLimits || e.commandCfg.BasicProcessCgroup { 297 if err := e.configureResourceContainer(os.Getpid()); err != nil { 298 return nil, err 299 } 300 } 301 302 stdout, err := e.commandCfg.Stdout() 303 if err != nil { 304 return nil, err 305 } 306 stderr, err := e.commandCfg.Stderr() 307 if err != nil { 308 return nil, err 309 } 310 311 e.childCmd.Stdout = stdout 312 e.childCmd.Stderr = stderr 313 314 // Look up the binary path and make it executable 315 absPath, err := lookupBin(command.TaskDir, command.Cmd) 316 if err != nil { 317 return nil, err 318 } 319 320 if err := makeExecutable(absPath); err != nil { 321 return nil, err 322 } 323 324 path := absPath 325 326 // Set the commands arguments 327 e.childCmd.Path = path 328 e.childCmd.Args = append([]string{e.childCmd.Path}, command.Args...) 329 e.childCmd.Env = e.commandCfg.Env 330 331 // Start the process 332 if err = withNetworkIsolation(e.childCmd.Start, command.NetworkIsolation); err != nil { 333 return nil, fmt.Errorf("failed to start command path=%q --- args=%q: %v", path, e.childCmd.Args, err) 334 } 335 336 go e.pidCollector.collectPids(e.processExited, e.getAllPids) 337 go e.wait() 338 return &ProcessState{Pid: e.childCmd.Process.Pid, ExitCode: -1, Time: time.Now()}, nil 339 } 340 341 // Exec a command inside a container for exec and java drivers. 342 func (e *UniversalExecutor) Exec(deadline time.Time, name string, args []string) ([]byte, int, error) { 343 ctx, cancel := context.WithDeadline(context.Background(), deadline) 344 defer cancel() 345 return ExecScript(ctx, e.childCmd.Dir, e.commandCfg.Env, e.childCmd.SysProcAttr, e.commandCfg.NetworkIsolation, name, args) 346 } 347 348 // ExecScript executes cmd with args and returns the output, exit code, and 349 // error. Output is truncated to drivers/shared/structs.CheckBufSize 350 func ExecScript(ctx context.Context, dir string, env []string, attrs *syscall.SysProcAttr, 351 netSpec *drivers.NetworkIsolationSpec, name string, args []string) ([]byte, int, error) { 352 353 cmd := exec.CommandContext(ctx, name, args...) 354 355 // Copy runtime environment from the main command 356 cmd.SysProcAttr = attrs 357 cmd.Dir = dir 358 cmd.Env = env 359 360 // Capture output 361 buf, _ := circbuf.NewBuffer(int64(drivers.CheckBufSize)) 362 cmd.Stdout = buf 363 cmd.Stderr = buf 364 365 if err := withNetworkIsolation(cmd.Run, netSpec); err != nil { 366 exitErr, ok := err.(*exec.ExitError) 367 if !ok { 368 // Non-exit error, return it and let the caller treat 369 // it as a critical failure 370 return nil, 0, err 371 } 372 373 // Some kind of error happened; default to critical 374 exitCode := 2 375 if status, ok := exitErr.Sys().(syscall.WaitStatus); ok { 376 exitCode = status.ExitStatus() 377 } 378 379 // Don't return the exitError as the caller only needs the 380 // output and code. 381 return buf.Bytes(), exitCode, nil 382 } 383 return buf.Bytes(), 0, nil 384 } 385 386 func (e *UniversalExecutor) ExecStreaming(ctx context.Context, command []string, tty bool, 387 stream drivers.ExecTaskStream) error { 388 389 if len(command) == 0 { 390 return fmt.Errorf("command is required") 391 } 392 393 cmd := exec.CommandContext(ctx, command[0], command[1:]...) 394 395 cmd.Dir = "/" 396 cmd.Env = e.childCmd.Env 397 398 execHelper := &execHelper{ 399 logger: e.logger, 400 401 newTerminal: func() (func() (*os.File, error), *os.File, error) { 402 pty, tty, err := pty.Open() 403 if err != nil { 404 return nil, nil, err 405 } 406 407 return func() (*os.File, error) { return pty, nil }, tty, err 408 }, 409 setTTY: func(tty *os.File) error { 410 cmd.SysProcAttr = sessionCmdAttr(tty) 411 412 cmd.Stdin = tty 413 cmd.Stdout = tty 414 cmd.Stderr = tty 415 return nil 416 }, 417 setIO: func(stdin io.Reader, stdout, stderr io.Writer) error { 418 cmd.Stdin = stdin 419 cmd.Stdout = stdout 420 cmd.Stderr = stderr 421 return nil 422 }, 423 processStart: func() error { 424 if u := e.commandCfg.User; u != "" { 425 if err := setCmdUser(cmd, u); err != nil { 426 return err 427 } 428 } 429 430 return withNetworkIsolation(cmd.Start, e.commandCfg.NetworkIsolation) 431 }, 432 processWait: func() (*os.ProcessState, error) { 433 err := cmd.Wait() 434 return cmd.ProcessState, err 435 }, 436 } 437 438 return execHelper.run(ctx, tty, stream) 439 } 440 441 // Wait waits until a process has exited and returns it's exitcode and errors 442 func (e *UniversalExecutor) Wait(ctx context.Context) (*ProcessState, error) { 443 select { 444 case <-ctx.Done(): 445 return nil, ctx.Err() 446 case <-e.processExited: 447 return e.exitState, nil 448 } 449 } 450 451 func (e *UniversalExecutor) UpdateResources(resources *drivers.Resources) error { 452 return nil 453 } 454 455 func (e *UniversalExecutor) wait() { 456 defer close(e.processExited) 457 defer e.commandCfg.Close() 458 pid := e.childCmd.Process.Pid 459 err := e.childCmd.Wait() 460 if err == nil { 461 e.exitState = &ProcessState{Pid: pid, ExitCode: 0, Time: time.Now()} 462 return 463 } 464 465 exitCode := 1 466 var signal int 467 if exitErr, ok := err.(*exec.ExitError); ok { 468 if status, ok := exitErr.Sys().(syscall.WaitStatus); ok { 469 exitCode = status.ExitStatus() 470 if status.Signaled() { 471 // bash(1) uses the lower 7 bits of a uint8 472 // to indicate normal program failure (see 473 // <sysexits.h>). If a process terminates due 474 // to a signal, encode the signal number to 475 // indicate which signal caused the process 476 // to terminate. Mirror this exit code 477 // encoding scheme. 478 const exitSignalBase = 128 479 signal = int(status.Signal()) 480 exitCode = exitSignalBase + signal 481 } 482 } 483 } else { 484 e.logger.Warn("unexpected Cmd.Wait() error type", "error", err) 485 } 486 487 e.exitState = &ProcessState{Pid: pid, ExitCode: exitCode, Signal: signal, Time: time.Now()} 488 } 489 490 var ( 491 // finishedErr is the error message received when trying to kill and already 492 // exited process. 493 finishedErr = "os: process already finished" 494 495 // noSuchProcessErr is the error message received when trying to kill a non 496 // existing process (e.g. when killing a process group). 497 noSuchProcessErr = "no such process" 498 ) 499 500 // Exit cleans up the alloc directory, destroys resource container and kills the 501 // user process 502 func (e *UniversalExecutor) Shutdown(signal string, grace time.Duration) error { 503 e.logger.Debug("shutdown requested", "signal", signal, "grace_period_ms", grace.Round(time.Millisecond)) 504 var merr multierror.Error 505 506 // If the executor did not launch a process, return. 507 if e.commandCfg == nil { 508 return nil 509 } 510 511 // If there is no process we can't shutdown 512 if e.childCmd.Process == nil { 513 e.logger.Warn("failed to shutdown", "error", "no process found") 514 return fmt.Errorf("executor failed to shutdown error: no process found") 515 } 516 517 proc, err := os.FindProcess(e.childCmd.Process.Pid) 518 if err != nil { 519 err = fmt.Errorf("executor failed to find process: %v", err) 520 e.logger.Warn("failed to shutdown", "error", err) 521 return err 522 } 523 524 // If grace is 0 then skip shutdown logic 525 if grace > 0 { 526 // Default signal to SIGINT if not set 527 if signal == "" { 528 signal = "SIGINT" 529 } 530 531 sig, ok := signals.SignalLookup[signal] 532 if !ok { 533 err = fmt.Errorf("error unknown signal given for shutdown: %s", signal) 534 e.logger.Warn("failed to shutdown", "error", err) 535 return err 536 } 537 538 if err := e.shutdownProcess(sig, proc); err != nil { 539 e.logger.Warn("failed to shutdown", "error", err) 540 return err 541 } 542 543 select { 544 case <-e.processExited: 545 case <-time.After(grace): 546 proc.Kill() 547 } 548 } else { 549 proc.Kill() 550 } 551 552 // Wait for process to exit 553 select { 554 case <-e.processExited: 555 case <-time.After(time.Second * 15): 556 e.logger.Warn("process did not exit after 15 seconds") 557 merr.Errors = append(merr.Errors, fmt.Errorf("process did not exit after 15 seconds")) 558 } 559 560 // Prefer killing the process via the resource container. 561 if !(e.commandCfg.ResourceLimits || e.commandCfg.BasicProcessCgroup) { 562 if err := e.cleanupChildProcesses(proc); err != nil && err.Error() != finishedErr { 563 merr.Errors = append(merr.Errors, 564 fmt.Errorf("can't kill process with pid %d: %v", e.childCmd.Process.Pid, err)) 565 } 566 } 567 568 if e.commandCfg.ResourceLimits || e.commandCfg.BasicProcessCgroup { 569 if err := e.resConCtx.executorCleanup(); err != nil { 570 merr.Errors = append(merr.Errors, err) 571 } 572 } 573 574 if err := merr.ErrorOrNil(); err != nil { 575 e.logger.Warn("failed to shutdown", "error", err) 576 return err 577 } 578 579 return nil 580 } 581 582 // Signal sends the passed signal to the task 583 func (e *UniversalExecutor) Signal(s os.Signal) error { 584 if e.childCmd.Process == nil { 585 return fmt.Errorf("Task not yet run") 586 } 587 588 e.logger.Debug("sending signal to PID", "signal", s, "pid", e.childCmd.Process.Pid) 589 err := e.childCmd.Process.Signal(s) 590 if err != nil { 591 e.logger.Error("sending signal failed", "signal", s, "error", err) 592 return err 593 } 594 595 return nil 596 } 597 598 func (e *UniversalExecutor) Stats(ctx context.Context, interval time.Duration) (<-chan *cstructs.TaskResourceUsage, error) { 599 ch := make(chan *cstructs.TaskResourceUsage) 600 go e.handleStats(ch, ctx, interval) 601 return ch, nil 602 } 603 604 func (e *UniversalExecutor) handleStats(ch chan *cstructs.TaskResourceUsage, ctx context.Context, interval time.Duration) { 605 defer close(ch) 606 timer := time.NewTimer(0) 607 for { 608 select { 609 case <-ctx.Done(): 610 return 611 612 case <-timer.C: 613 timer.Reset(interval) 614 } 615 616 pidStats, err := e.pidCollector.pidStats() 617 if err != nil { 618 e.logger.Warn("error collecting stats", "error", err) 619 return 620 } 621 622 select { 623 case <-ctx.Done(): 624 return 625 case ch <- aggregatedResourceUsage(e.systemCpuStats, pidStats): 626 } 627 } 628 } 629 630 // lookupBin looks for path to the binary to run by looking for the binary in 631 // the following locations, in-order: 632 // task/local/, task/, on the host file system, in host $PATH 633 // The return path is absolute. 634 func lookupBin(taskDir string, bin string) (string, error) { 635 // Check in the local directory 636 local := filepath.Join(taskDir, allocdir.TaskLocal, bin) 637 if _, err := os.Stat(local); err == nil { 638 return local, nil 639 } 640 641 // Check at the root of the task's directory 642 root := filepath.Join(taskDir, bin) 643 if _, err := os.Stat(root); err == nil { 644 return root, nil 645 } 646 647 // when checking host paths, check with Stat first if path is absolute 648 // as exec.LookPath only considers files already marked as executable 649 // and only consider this for absolute paths to avoid depending on 650 // current directory of nomad which may cause unexpected behavior 651 if _, err := os.Stat(bin); err == nil && filepath.IsAbs(bin) { 652 return bin, nil 653 } 654 655 // Check the $PATH 656 if host, err := exec.LookPath(bin); err == nil { 657 return host, nil 658 } 659 660 return "", fmt.Errorf("binary %q could not be found", bin) 661 } 662 663 // makeExecutable makes the given file executable for root,group,others. 664 func makeExecutable(binPath string) error { 665 if runtime.GOOS == "windows" { 666 return nil 667 } 668 669 fi, err := os.Stat(binPath) 670 if err != nil { 671 if os.IsNotExist(err) { 672 return fmt.Errorf("binary %q does not exist", binPath) 673 } 674 return fmt.Errorf("specified binary is invalid: %v", err) 675 } 676 677 // If it is not executable, make it so. 678 perm := fi.Mode().Perm() 679 req := os.FileMode(0555) 680 if perm&req != req { 681 if err := os.Chmod(binPath, perm|req); err != nil { 682 return fmt.Errorf("error making %q executable: %s", binPath, err) 683 } 684 } 685 return nil 686 }