github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/drivers/shared/executor/executor.go (about) 1 package executor 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 "io/ioutil" 8 "os" 9 "os/exec" 10 "path/filepath" 11 "runtime" 12 "strings" 13 "syscall" 14 "time" 15 16 "github.com/armon/circbuf" 17 "github.com/hashicorp/consul-template/signals" 18 hclog "github.com/hashicorp/go-hclog" 19 multierror "github.com/hashicorp/go-multierror" 20 "github.com/hashicorp/nomad/client/allocdir" 21 "github.com/hashicorp/nomad/client/lib/fifo" 22 "github.com/hashicorp/nomad/client/stats" 23 cstructs "github.com/hashicorp/nomad/client/structs" 24 "github.com/hashicorp/nomad/plugins/drivers" 25 "github.com/kr/pty" 26 27 shelpers "github.com/hashicorp/nomad/helper/stats" 28 ) 29 30 const ( 31 // ExecutorVersionLatest is the current and latest version of the executor 32 ExecutorVersionLatest = "2.0.0" 33 34 // ExecutorVersionPre0_9 is the version of executor use prior to the release 35 // of 0.9.x 36 ExecutorVersionPre0_9 = "1.1.0" 37 ) 38 39 var ( 40 // The statistics the basic executor exposes 41 ExecutorBasicMeasuredMemStats = []string{"RSS", "Swap"} 42 ExecutorBasicMeasuredCpuStats = []string{"System Mode", "User Mode", "Percent"} 43 ) 44 45 // Executor is the interface which allows a driver to launch and supervise 46 // a process 47 type Executor interface { 48 // Launch a user process configured by the given ExecCommand 49 Launch(launchCmd *ExecCommand) (*ProcessState, error) 50 51 // Wait blocks until the process exits or an error occures 52 Wait(ctx context.Context) (*ProcessState, error) 53 54 // Shutdown will shutdown the executor by stopping the user process, 55 // cleaning up and resources created by the executor. The shutdown sequence 56 // will first send the given signal to the process. This defaults to "SIGINT" 57 // if not specified. The executor will then wait for the process to exit 58 // before cleaning up other resources. If the executor waits longer than the 59 // given grace period, the process is forcefully killed. 60 // 61 // To force kill the user process, gracePeriod can be set to 0. 62 Shutdown(signal string, gracePeriod time.Duration) error 63 64 // UpdateResources updates any resource isolation enforcement with new 65 // constraints if supported. 66 UpdateResources(*drivers.Resources) error 67 68 // Version returns the executor API version 69 Version() (*ExecutorVersion, error) 70 71 // Returns a channel of stats. Stats are collected and 72 // pushed to the channel on the given interval 73 Stats(context.Context, time.Duration) (<-chan *cstructs.TaskResourceUsage, error) 74 75 // Signal sends the given signal to the user process 76 Signal(os.Signal) error 77 78 // Exec executes the given command and args inside the executor context 79 // and returns the output and exit code. 80 Exec(deadline time.Time, cmd string, args []string) ([]byte, int, error) 81 82 ExecStreaming(ctx context.Context, cmd []string, tty bool, 83 stream drivers.ExecTaskStream) error 84 } 85 86 // ExecCommand holds the user command, args, and other isolation related 87 // settings. 88 type ExecCommand struct { 89 // Cmd is the command that the user wants to run. 90 Cmd string 91 92 // Args is the args of the command that the user wants to run. 93 Args []string 94 95 // Resources defined by the task 96 Resources *drivers.Resources 97 98 // StdoutPath is the path the process stdout should be written to 99 StdoutPath string 100 stdout io.WriteCloser 101 102 // StderrPath is the path the process stderr should be written to 103 StderrPath string 104 stderr io.WriteCloser 105 106 // Env is the list of KEY=val pairs of environment variables to be set 107 Env []string 108 109 // User is the user which the executor uses to run the command. 110 User string 111 112 // TaskDir is the directory path on the host where for the task 113 TaskDir string 114 115 // ResourceLimits determines whether resource limits are enforced by the 116 // executor. 117 ResourceLimits bool 118 119 // Cgroup marks whether we put the process in a cgroup. Setting this field 120 // doesn't enforce resource limits. To enforce limits, set ResourceLimits. 121 // Using the cgroup does allow more precise cleanup of processes. 122 BasicProcessCgroup bool 123 124 // NoPivotRoot disables using pivot_root for isolation, useful when the root 125 // partition is on a ramdisk which does not support pivot_root, 126 // see man 2 pivot_root 127 NoPivotRoot bool 128 129 // Mounts are the host paths to be be made available inside rootfs 130 Mounts []*drivers.MountConfig 131 132 // Devices are the the device nodes to be created in isolation environment 133 Devices []*drivers.DeviceConfig 134 135 NetworkIsolation *drivers.NetworkIsolationSpec 136 } 137 138 // SetWriters sets the writer for the process stdout and stderr. This should 139 // not be used if writing to a file path such as a fifo file. SetStdoutWriter 140 // is mainly used for unit testing purposes. 141 func (c *ExecCommand) SetWriters(out io.WriteCloser, err io.WriteCloser) { 142 c.stdout = out 143 c.stderr = err 144 } 145 146 // GetWriters returns the unexported io.WriteCloser for the stdout and stderr 147 // handles. This is mainly used for unit testing purposes. 148 func (c *ExecCommand) GetWriters() (stdout io.WriteCloser, stderr io.WriteCloser) { 149 return c.stdout, c.stderr 150 } 151 152 type nopCloser struct { 153 io.Writer 154 } 155 156 func (nopCloser) Close() error { return nil } 157 158 // Stdout returns a writer for the configured file descriptor 159 func (c *ExecCommand) Stdout() (io.WriteCloser, error) { 160 if c.stdout == nil { 161 if c.StdoutPath != "" { 162 f, err := fifo.OpenWriter(c.StdoutPath) 163 if err != nil { 164 return nil, fmt.Errorf("failed to create stdout: %v", err) 165 } 166 c.stdout = f 167 } else { 168 c.stdout = nopCloser{ioutil.Discard} 169 } 170 } 171 return c.stdout, nil 172 } 173 174 // Stderr returns a writer for the configured file descriptor 175 func (c *ExecCommand) Stderr() (io.WriteCloser, error) { 176 if c.stderr == nil { 177 if c.StderrPath != "" { 178 f, err := fifo.OpenWriter(c.StderrPath) 179 if err != nil { 180 return nil, fmt.Errorf("failed to create stderr: %v", err) 181 } 182 c.stderr = f 183 } else { 184 c.stderr = nopCloser{ioutil.Discard} 185 } 186 } 187 return c.stderr, nil 188 } 189 190 func (c *ExecCommand) Close() { 191 if c.stdout != nil { 192 c.stdout.Close() 193 } 194 if c.stderr != nil { 195 c.stderr.Close() 196 } 197 } 198 199 // ProcessState holds information about the state of a user process. 200 type ProcessState struct { 201 Pid int 202 ExitCode int 203 Signal int 204 Time time.Time 205 } 206 207 // ExecutorVersion is the version of the executor 208 type ExecutorVersion struct { 209 Version string 210 } 211 212 func (v *ExecutorVersion) GoString() string { 213 return v.Version 214 } 215 216 // UniversalExecutor is an implementation of the Executor which launches and 217 // supervises processes. In addition to process supervision it provides resource 218 // and file system isolation 219 type UniversalExecutor struct { 220 childCmd exec.Cmd 221 commandCfg *ExecCommand 222 223 exitState *ProcessState 224 processExited chan interface{} 225 226 // resConCtx is used to track and cleanup additional resources created by 227 // the executor. Currently this is only used for cgroups. 228 resConCtx resourceContainerContext 229 230 totalCpuStats *stats.CpuStats 231 userCpuStats *stats.CpuStats 232 systemCpuStats *stats.CpuStats 233 pidCollector *pidCollector 234 235 logger hclog.Logger 236 } 237 238 // NewExecutor returns an Executor 239 func NewExecutor(logger hclog.Logger) Executor { 240 logger = logger.Named("executor") 241 if err := shelpers.Init(); err != nil { 242 logger.Error("unable to initialize stats", "error", err) 243 } 244 return &UniversalExecutor{ 245 logger: logger, 246 processExited: make(chan interface{}), 247 totalCpuStats: stats.NewCpuStats(), 248 userCpuStats: stats.NewCpuStats(), 249 systemCpuStats: stats.NewCpuStats(), 250 pidCollector: newPidCollector(logger), 251 } 252 } 253 254 // Version returns the api version of the executor 255 func (e *UniversalExecutor) Version() (*ExecutorVersion, error) { 256 return &ExecutorVersion{Version: ExecutorVersionLatest}, nil 257 } 258 259 // Launch launches the main process and returns its state. It also 260 // configures an applies isolation on certain platforms. 261 func (e *UniversalExecutor) Launch(command *ExecCommand) (*ProcessState, error) { 262 e.logger.Trace("preparing to launch command", "command", command.Cmd, "args", strings.Join(command.Args, " ")) 263 264 e.commandCfg = command 265 266 // setting the user of the process 267 if command.User != "" { 268 e.logger.Debug("running command as user", "user", command.User) 269 if err := e.runAs(command.User); err != nil { 270 return nil, err 271 } 272 } 273 274 // set the task dir as the working directory for the command 275 e.childCmd.Dir = e.commandCfg.TaskDir 276 277 // start command in separate process group 278 if err := e.setNewProcessGroup(); err != nil { 279 return nil, err 280 } 281 282 // Setup cgroups on linux 283 if err := e.configureResourceContainer(os.Getpid()); err != nil { 284 return nil, err 285 } 286 287 stdout, err := e.commandCfg.Stdout() 288 if err != nil { 289 return nil, err 290 } 291 stderr, err := e.commandCfg.Stderr() 292 if err != nil { 293 return nil, err 294 } 295 296 e.childCmd.Stdout = stdout 297 e.childCmd.Stderr = stderr 298 299 // Look up the binary path and make it executable 300 absPath, err := lookupBin(command.TaskDir, command.Cmd) 301 if err != nil { 302 return nil, err 303 } 304 305 if err := makeExecutable(absPath); err != nil { 306 return nil, err 307 } 308 309 path := absPath 310 311 // Set the commands arguments 312 e.childCmd.Path = path 313 e.childCmd.Args = append([]string{e.childCmd.Path}, command.Args...) 314 e.childCmd.Env = e.commandCfg.Env 315 316 // Start the process 317 if err = withNetworkIsolation(e.childCmd.Start, command.NetworkIsolation); err != nil { 318 return nil, fmt.Errorf("failed to start command path=%q --- args=%q: %v", path, e.childCmd.Args, err) 319 } 320 321 go e.pidCollector.collectPids(e.processExited, e.getAllPids) 322 go e.wait() 323 return &ProcessState{Pid: e.childCmd.Process.Pid, ExitCode: -1, Time: time.Now()}, nil 324 } 325 326 // Exec a command inside a container for exec and java drivers. 327 func (e *UniversalExecutor) Exec(deadline time.Time, name string, args []string) ([]byte, int, error) { 328 ctx, cancel := context.WithDeadline(context.Background(), deadline) 329 defer cancel() 330 return ExecScript(ctx, e.childCmd.Dir, e.commandCfg.Env, e.childCmd.SysProcAttr, e.commandCfg.NetworkIsolation, name, args) 331 } 332 333 // ExecScript executes cmd with args and returns the output, exit code, and 334 // error. Output is truncated to drivers/shared/structs.CheckBufSize 335 func ExecScript(ctx context.Context, dir string, env []string, attrs *syscall.SysProcAttr, 336 netSpec *drivers.NetworkIsolationSpec, name string, args []string) ([]byte, int, error) { 337 338 cmd := exec.CommandContext(ctx, name, args...) 339 340 // Copy runtime environment from the main command 341 cmd.SysProcAttr = attrs 342 cmd.Dir = dir 343 cmd.Env = env 344 345 // Capture output 346 buf, _ := circbuf.NewBuffer(int64(drivers.CheckBufSize)) 347 cmd.Stdout = buf 348 cmd.Stderr = buf 349 350 if err := withNetworkIsolation(cmd.Run, netSpec); err != nil { 351 exitErr, ok := err.(*exec.ExitError) 352 if !ok { 353 // Non-exit error, return it and let the caller treat 354 // it as a critical failure 355 return nil, 0, err 356 } 357 358 // Some kind of error happened; default to critical 359 exitCode := 2 360 if status, ok := exitErr.Sys().(syscall.WaitStatus); ok { 361 exitCode = status.ExitStatus() 362 } 363 364 // Don't return the exitError as the caller only needs the 365 // output and code. 366 return buf.Bytes(), exitCode, nil 367 } 368 return buf.Bytes(), 0, nil 369 } 370 371 func (e *UniversalExecutor) ExecStreaming(ctx context.Context, command []string, tty bool, 372 stream drivers.ExecTaskStream) error { 373 374 if len(command) == 0 { 375 return fmt.Errorf("command is required") 376 } 377 378 cmd := exec.CommandContext(ctx, command[0], command[1:]...) 379 380 cmd.Dir = "/" 381 cmd.Env = e.childCmd.Env 382 383 execHelper := &execHelper{ 384 logger: e.logger, 385 386 newTerminal: func() (func() (*os.File, error), *os.File, error) { 387 pty, tty, err := pty.Open() 388 if err != nil { 389 return nil, nil, err 390 } 391 392 return func() (*os.File, error) { return pty, nil }, tty, err 393 }, 394 setTTY: func(tty *os.File) error { 395 cmd.SysProcAttr = sessionCmdAttr(tty) 396 397 cmd.Stdin = tty 398 cmd.Stdout = tty 399 cmd.Stderr = tty 400 return nil 401 }, 402 setIO: func(stdin io.Reader, stdout, stderr io.Writer) error { 403 cmd.Stdin = stdin 404 cmd.Stdout = stdout 405 cmd.Stderr = stderr 406 return nil 407 }, 408 processStart: func() error { 409 return withNetworkIsolation(cmd.Start, e.commandCfg.NetworkIsolation) 410 }, 411 processWait: func() (*os.ProcessState, error) { 412 err := cmd.Wait() 413 return cmd.ProcessState, err 414 }, 415 } 416 417 return execHelper.run(ctx, tty, stream) 418 } 419 420 // Wait waits until a process has exited and returns it's exitcode and errors 421 func (e *UniversalExecutor) Wait(ctx context.Context) (*ProcessState, error) { 422 select { 423 case <-ctx.Done(): 424 return nil, ctx.Err() 425 case <-e.processExited: 426 return e.exitState, nil 427 } 428 } 429 430 func (e *UniversalExecutor) UpdateResources(resources *drivers.Resources) error { 431 return nil 432 } 433 434 func (e *UniversalExecutor) wait() { 435 defer close(e.processExited) 436 defer e.commandCfg.Close() 437 pid := e.childCmd.Process.Pid 438 err := e.childCmd.Wait() 439 if err == nil { 440 e.exitState = &ProcessState{Pid: pid, ExitCode: 0, Time: time.Now()} 441 return 442 } 443 444 exitCode := 1 445 var signal int 446 if exitErr, ok := err.(*exec.ExitError); ok { 447 if status, ok := exitErr.Sys().(syscall.WaitStatus); ok { 448 exitCode = status.ExitStatus() 449 if status.Signaled() { 450 // bash(1) uses the lower 7 bits of a uint8 451 // to indicate normal program failure (see 452 // <sysexits.h>). If a process terminates due 453 // to a signal, encode the signal number to 454 // indicate which signal caused the process 455 // to terminate. Mirror this exit code 456 // encoding scheme. 457 const exitSignalBase = 128 458 signal = int(status.Signal()) 459 exitCode = exitSignalBase + signal 460 } 461 } 462 } else { 463 e.logger.Warn("unexpected Cmd.Wait() error type", "error", err) 464 } 465 466 e.exitState = &ProcessState{Pid: pid, ExitCode: exitCode, Signal: signal, Time: time.Now()} 467 } 468 469 var ( 470 // finishedErr is the error message received when trying to kill and already 471 // exited process. 472 finishedErr = "os: process already finished" 473 474 // noSuchProcessErr is the error message received when trying to kill a non 475 // existing process (e.g. when killing a process group). 476 noSuchProcessErr = "no such process" 477 ) 478 479 // Exit cleans up the alloc directory, destroys resource container and kills the 480 // user process 481 func (e *UniversalExecutor) Shutdown(signal string, grace time.Duration) error { 482 e.logger.Debug("shutdown requested", "signal", signal, "grace_period_ms", grace.Round(time.Millisecond)) 483 var merr multierror.Error 484 485 // If the executor did not launch a process, return. 486 if e.commandCfg == nil { 487 return nil 488 } 489 490 // If there is no process we can't shutdown 491 if e.childCmd.Process == nil { 492 e.logger.Warn("failed to shutdown", "error", "no process found") 493 return fmt.Errorf("executor failed to shutdown error: no process found") 494 } 495 496 proc, err := os.FindProcess(e.childCmd.Process.Pid) 497 if err != nil { 498 err = fmt.Errorf("executor failed to find process: %v", err) 499 e.logger.Warn("failed to shutdown", "error", err) 500 return err 501 } 502 503 // If grace is 0 then skip shutdown logic 504 if grace > 0 { 505 // Default signal to SIGINT if not set 506 if signal == "" { 507 signal = "SIGINT" 508 } 509 510 sig, ok := signals.SignalLookup[signal] 511 if !ok { 512 err = fmt.Errorf("error unknown signal given for shutdown: %s", signal) 513 e.logger.Warn("failed to shutdown", "error", err) 514 return err 515 } 516 517 if err := e.shutdownProcess(sig, proc); err != nil { 518 e.logger.Warn("failed to shutdown", "error", err) 519 return err 520 } 521 522 select { 523 case <-e.processExited: 524 case <-time.After(grace): 525 proc.Kill() 526 } 527 } else { 528 proc.Kill() 529 } 530 531 // Wait for process to exit 532 select { 533 case <-e.processExited: 534 case <-time.After(time.Second * 15): 535 e.logger.Warn("process did not exit after 15 seconds") 536 merr.Errors = append(merr.Errors, fmt.Errorf("process did not exit after 15 seconds")) 537 } 538 539 // Prefer killing the process via the resource container. 540 if !(e.commandCfg.ResourceLimits || e.commandCfg.BasicProcessCgroup) { 541 if err := e.cleanupChildProcesses(proc); err != nil && err.Error() != finishedErr { 542 merr.Errors = append(merr.Errors, 543 fmt.Errorf("can't kill process with pid %d: %v", e.childCmd.Process.Pid, err)) 544 } 545 } 546 547 if e.commandCfg.ResourceLimits || e.commandCfg.BasicProcessCgroup { 548 if err := e.resConCtx.executorCleanup(); err != nil { 549 merr.Errors = append(merr.Errors, err) 550 } 551 } 552 553 if err := merr.ErrorOrNil(); err != nil { 554 e.logger.Warn("failed to shutdown", "error", err) 555 return err 556 } 557 558 return nil 559 } 560 561 // Signal sends the passed signal to the task 562 func (e *UniversalExecutor) Signal(s os.Signal) error { 563 if e.childCmd.Process == nil { 564 return fmt.Errorf("Task not yet run") 565 } 566 567 e.logger.Debug("sending signal to PID", "signal", s, "pid", e.childCmd.Process.Pid) 568 err := e.childCmd.Process.Signal(s) 569 if err != nil { 570 e.logger.Error("sending signal failed", "signal", s, "error", err) 571 return err 572 } 573 574 return nil 575 } 576 577 func (e *UniversalExecutor) Stats(ctx context.Context, interval time.Duration) (<-chan *cstructs.TaskResourceUsage, error) { 578 ch := make(chan *cstructs.TaskResourceUsage) 579 go e.handleStats(ch, ctx, interval) 580 return ch, nil 581 } 582 583 func (e *UniversalExecutor) handleStats(ch chan *cstructs.TaskResourceUsage, ctx context.Context, interval time.Duration) { 584 defer close(ch) 585 timer := time.NewTimer(0) 586 for { 587 select { 588 case <-ctx.Done(): 589 return 590 591 case <-timer.C: 592 timer.Reset(interval) 593 } 594 595 pidStats, err := e.pidCollector.pidStats() 596 if err != nil { 597 e.logger.Warn("error collecting stats", "error", err) 598 return 599 } 600 601 select { 602 case <-ctx.Done(): 603 return 604 case ch <- aggregatedResourceUsage(e.systemCpuStats, pidStats): 605 } 606 } 607 } 608 609 // lookupBin looks for path to the binary to run by looking for the binary in 610 // the following locations, in-order: 611 // task/local/, task/, on the host file system, in host $PATH 612 // The return path is absolute. 613 func lookupBin(taskDir string, bin string) (string, error) { 614 // Check in the local directory 615 local := filepath.Join(taskDir, allocdir.TaskLocal, bin) 616 if _, err := os.Stat(local); err == nil { 617 return local, nil 618 } 619 620 // Check at the root of the task's directory 621 root := filepath.Join(taskDir, bin) 622 if _, err := os.Stat(root); err == nil { 623 return root, nil 624 } 625 626 // when checking host paths, check with Stat first if path is absolute 627 // as exec.LookPath only considers files already marked as executable 628 // and only consider this for absolute paths to avoid depending on 629 // current directory of nomad which may cause unexpected behavior 630 if _, err := os.Stat(bin); err == nil && filepath.IsAbs(bin) { 631 return bin, nil 632 } 633 634 // Check the $PATH 635 if host, err := exec.LookPath(bin); err == nil { 636 return host, nil 637 } 638 639 return "", fmt.Errorf("binary %q could not be found", bin) 640 } 641 642 // makeExecutable makes the given file executable for root,group,others. 643 func makeExecutable(binPath string) error { 644 if runtime.GOOS == "windows" { 645 return nil 646 } 647 648 fi, err := os.Stat(binPath) 649 if err != nil { 650 if os.IsNotExist(err) { 651 return fmt.Errorf("binary %q does not exist", binPath) 652 } 653 return fmt.Errorf("specified binary is invalid: %v", err) 654 } 655 656 // If it is not executable, make it so. 657 perm := fi.Mode().Perm() 658 req := os.FileMode(0555) 659 if perm&req != req { 660 if err := os.Chmod(binPath, perm|req); err != nil { 661 return fmt.Errorf("error making %q executable: %s", binPath, err) 662 } 663 } 664 return nil 665 }