github.com/hernad/nomad@v1.6.112/drivers/nix/_executor/executor.go (about) 1 package executor 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 "io/ioutil" 8 "os" 9 "os/exec" 10 "path/filepath" 11 "runtime" 12 "strings" 13 "syscall" 14 "time" 15 16 "github.com/armon/circbuf" 17 "github.com/creack/pty" 18 "github.com/hernad/consul-template/signals" 19 hclog "github.com/hashicorp/go-hclog" 20 multierror "github.com/hashicorp/go-multierror" 21 "github.com/hernad/nomad/client/allocdir" 22 "github.com/hernad/nomad/client/lib/fifo" 23 "github.com/hernad/nomad/client/lib/resources" 24 //"github.com/hernad/nomad/client/stats" 25 "github.com/hernad/nomad/helper/stats" 26 cstructs "github.com/hernad/nomad/client/structs" 27 "github.com/hernad/nomad/plugins/drivers" 28 "github.com/syndtr/gocapability/capability" 29 ) 30 31 const ( 32 // ExecutorVersionLatest is the current and latest version of the executor 33 ExecutorVersionLatest = "2.0.0" 34 35 // ExecutorVersionPre0_9 is the version of executor use prior to the release 36 // of 0.9.x 37 ExecutorVersionPre0_9 = "1.1.0" 38 39 // IsolationModePrivate represents the private isolation mode for a namespace 40 IsolationModePrivate = "private" 41 42 // IsolationModeHost represents the host isolation mode for a namespace 43 IsolationModeHost = "host" 44 ) 45 46 var ( 47 // The statistics the basic executor exposes 48 ExecutorBasicMeasuredMemStats = []string{"RSS", "Swap"} 49 ExecutorBasicMeasuredCpuStats = []string{"System Mode", "User Mode", "Percent"} 50 ) 51 52 // Executor is the interface which allows a driver to launch and supervise 53 // a process 54 type Executor interface { 55 // Launch a user process configured by the given ExecCommand 56 Launch(launchCmd *ExecCommand) (*ProcessState, error) 57 58 // Wait blocks until the process exits or an error occures 59 Wait(ctx context.Context) (*ProcessState, error) 60 61 // Shutdown will shutdown the executor by stopping the user process, 62 // cleaning up and resources created by the executor. The shutdown sequence 63 // will first send the given signal to the process. This defaults to "SIGINT" 64 // if not specified. The executor will then wait for the process to exit 65 // before cleaning up other resources. If the executor waits longer than the 66 // given grace period, the process is forcefully killed. 67 // 68 // To force kill the user process, gracePeriod can be set to 0. 69 Shutdown(signal string, gracePeriod time.Duration) error 70 71 // UpdateResources updates any resource isolation enforcement with new 72 // constraints if supported. 73 UpdateResources(*drivers.Resources) error 74 75 // Version returns the executor API version 76 Version() (*ExecutorVersion, error) 77 78 // Returns a channel of stats. Stats are collected and 79 // pushed to the channel on the given interval 80 Stats(context.Context, time.Duration) (<-chan *cstructs.TaskResourceUsage, error) 81 82 // Signal sends the given signal to the user process 83 Signal(os.Signal) error 84 85 // Exec executes the given command and args inside the executor context 86 // and returns the output and exit code. 87 Exec(deadline time.Time, cmd string, args []string) ([]byte, int, error) 88 89 ExecStreaming(ctx context.Context, cmd []string, tty bool, 90 stream drivers.ExecTaskStream) error 91 } 92 93 // ExecCommand holds the user command, args, and other isolation related 94 // settings. 95 // 96 // Important (!): when adding fields, make sure to update the RPC methods in 97 // grpcExecutorClient.Launch and grpcExecutorServer.Launch. Number of hours 98 // spent tracking this down: too many. 99 type ExecCommand struct { 100 // Cmd is the command that the user wants to run. 101 Cmd string 102 103 // Args is the args of the command that the user wants to run. 104 Args []string 105 106 // Resources defined by the task 107 Resources *drivers.Resources 108 109 // StdoutPath is the path the process stdout should be written to 110 StdoutPath string 111 stdout io.WriteCloser 112 113 // StderrPath is the path the process stderr should be written to 114 StderrPath string 115 stderr io.WriteCloser 116 117 // Env is the list of KEY=val pairs of environment variables to be set 118 Env []string 119 120 // User is the user which the executor uses to run the command. 121 User string 122 123 // TaskDir is the directory path on the host where for the task 124 TaskDir string 125 126 // ResourceLimits determines whether resource limits are enforced by the 127 // executor. 128 ResourceLimits bool 129 130 // Cgroup marks whether we put the process in a cgroup. Setting this field 131 // doesn't enforce resource limits. To enforce limits, set ResourceLimits. 132 // Using the cgroup does allow more precise cleanup of processes. 133 BasicProcessCgroup bool 134 135 // NoPivotRoot disables using pivot_root for isolation, useful when the root 136 // partition is on a ramdisk which does not support pivot_root, 137 // see man 2 pivot_root 138 NoPivotRoot bool 139 140 // Mounts are the host paths to be be made available inside rootfs 141 Mounts []*drivers.MountConfig 142 143 // Devices are the the device nodes to be created in isolation environment 144 Devices []*drivers.DeviceConfig 145 146 // NetworkIsolation is the network isolation configuration. 147 NetworkIsolation *drivers.NetworkIsolationSpec 148 149 // ModePID is the PID isolation mode (private or host). 150 ModePID string 151 152 // ModeIPC is the IPC isolation mode (private or host). 153 ModeIPC string 154 155 // Capabilities are the linux capabilities to be enabled by the task driver. 156 Capabilities []string 157 } 158 159 // SetWriters sets the writer for the process stdout and stderr. This should 160 // not be used if writing to a file path such as a fifo file. SetStdoutWriter 161 // is mainly used for unit testing purposes. 162 func (c *ExecCommand) SetWriters(out io.WriteCloser, err io.WriteCloser) { 163 c.stdout = out 164 c.stderr = err 165 } 166 167 // GetWriters returns the unexported io.WriteCloser for the stdout and stderr 168 // handles. This is mainly used for unit testing purposes. 169 func (c *ExecCommand) GetWriters() (stdout io.WriteCloser, stderr io.WriteCloser) { 170 return c.stdout, c.stderr 171 } 172 173 type nopCloser struct { 174 io.Writer 175 } 176 177 func (nopCloser) Close() error { return nil } 178 179 // Stdout returns a writer for the configured file descriptor 180 func (c *ExecCommand) Stdout() (io.WriteCloser, error) { 181 if c.stdout == nil { 182 if c.StdoutPath != "" { 183 f, err := fifo.OpenWriter(c.StdoutPath) 184 if err != nil { 185 return nil, fmt.Errorf("failed to create stdout: %v", err) 186 } 187 c.stdout = f 188 } else { 189 c.stdout = nopCloser{ioutil.Discard} 190 } 191 } 192 return c.stdout, nil 193 } 194 195 // Stderr returns a writer for the configured file descriptor 196 func (c *ExecCommand) Stderr() (io.WriteCloser, error) { 197 if c.stderr == nil { 198 if c.StderrPath != "" { 199 f, err := fifo.OpenWriter(c.StderrPath) 200 if err != nil { 201 return nil, fmt.Errorf("failed to create stderr: %v", err) 202 } 203 c.stderr = f 204 } else { 205 c.stderr = nopCloser{ioutil.Discard} 206 } 207 } 208 return c.stderr, nil 209 } 210 211 func (c *ExecCommand) Close() { 212 if c.stdout != nil { 213 c.stdout.Close() 214 } 215 if c.stderr != nil { 216 c.stderr.Close() 217 } 218 } 219 220 // ProcessState holds information about the state of a user process. 221 type ProcessState struct { 222 Pid int 223 ExitCode int 224 Signal int 225 Time time.Time 226 } 227 228 // ExecutorVersion is the version of the executor 229 type ExecutorVersion struct { 230 Version string 231 } 232 233 func (v *ExecutorVersion) GoString() string { 234 return v.Version 235 } 236 237 // UniversalExecutor is an implementation of the Executor which launches and 238 // supervises processes. In addition to process supervision it provides resource 239 // and file system isolation 240 type UniversalExecutor struct { 241 childCmd exec.Cmd 242 commandCfg *ExecCommand 243 244 exitState *ProcessState 245 processExited chan interface{} 246 247 // containment is used to cleanup resources created by the executor 248 // currently only used for killing pids via freezer cgroup on linux 249 containment resources.Containment 250 251 totalCpuStats *stats.CpuStats 252 userCpuStats *stats.CpuStats 253 systemCpuStats *stats.CpuStats 254 pidCollector *pidCollector 255 256 logger hclog.Logger 257 } 258 259 // NewExecutor returns an Executor 260 func NewExecutor(logger hclog.Logger) Executor { 261 logger = logger.Named("executor") 262 if err := shelpers.Init(); err != nil { 263 logger.Error("unable to initialize stats", "error", err) 264 } 265 266 return &UniversalExecutor{ 267 logger: logger, 268 processExited: make(chan interface{}), 269 totalCpuStats: stats.NewCpuStats(), 270 userCpuStats: stats.NewCpuStats(), 271 systemCpuStats: stats.NewCpuStats(), 272 pidCollector: newPidCollector(logger), 273 } 274 } 275 276 // Version returns the api version of the executor 277 func (e *UniversalExecutor) Version() (*ExecutorVersion, error) { 278 return &ExecutorVersion{Version: ExecutorVersionLatest}, nil 279 } 280 281 // Launch launches the main process and returns its state. It also 282 // configures an applies isolation on certain platforms. 283 func (e *UniversalExecutor) Launch(command *ExecCommand) (*ProcessState, error) { 284 e.logger.Trace("preparing to launch command", "command", command.Cmd, "args", strings.Join(command.Args, " ")) 285 286 e.commandCfg = command 287 288 // setting the user of the process 289 if command.User != "" { 290 e.logger.Debug("running command as user", "user", command.User) 291 if err := setCmdUser(&e.childCmd, command.User); err != nil { 292 return nil, err 293 } 294 } 295 296 // set the task dir as the working directory for the command 297 e.childCmd.Dir = e.commandCfg.TaskDir 298 299 // start command in separate process group 300 if err := e.setNewProcessGroup(); err != nil { 301 return nil, err 302 } 303 304 // Maybe setup containment (for now, cgroups only only on linux) 305 if e.commandCfg.ResourceLimits || e.commandCfg.BasicProcessCgroup { 306 pid := os.Getpid() 307 if err := e.configureResourceContainer(pid); err != nil { 308 e.logger.Error("failed to configure resource container", "pid", pid, "error", err) 309 return nil, err 310 } 311 } 312 313 stdout, err := e.commandCfg.Stdout() 314 if err != nil { 315 return nil, err 316 } 317 stderr, err := e.commandCfg.Stderr() 318 if err != nil { 319 return nil, err 320 } 321 322 e.childCmd.Stdout = stdout 323 e.childCmd.Stderr = stderr 324 325 // Look up the binary path and make it executable 326 absPath, err := lookupBin(command.TaskDir, command.Cmd) 327 if err != nil { 328 return nil, err 329 } 330 331 if err := makeExecutable(absPath); err != nil { 332 return nil, err 333 } 334 335 path := absPath 336 337 // Set the commands arguments 338 e.childCmd.Path = path 339 e.childCmd.Args = append([]string{e.childCmd.Path}, command.Args...) 340 e.childCmd.Env = e.commandCfg.Env 341 342 // Start the process 343 if err = withNetworkIsolation(e.childCmd.Start, command.NetworkIsolation); err != nil { 344 return nil, fmt.Errorf("failed to start command path=%q --- args=%q: %v", path, e.childCmd.Args, err) 345 } 346 347 go e.pidCollector.collectPids(e.processExited, e.getAllPids) 348 go e.wait() 349 return &ProcessState{Pid: e.childCmd.Process.Pid, ExitCode: -1, Time: time.Now()}, nil 350 } 351 352 // Exec a command inside a container for exec and java drivers. 353 func (e *UniversalExecutor) Exec(deadline time.Time, name string, args []string) ([]byte, int, error) { 354 ctx, cancel := context.WithDeadline(context.Background(), deadline) 355 defer cancel() 356 return ExecScript(ctx, e.childCmd.Dir, e.commandCfg.Env, e.childCmd.SysProcAttr, e.commandCfg.NetworkIsolation, name, args) 357 } 358 359 // ExecScript executes cmd with args and returns the output, exit code, and 360 // error. Output is truncated to drivers/shared/structs.CheckBufSize 361 func ExecScript(ctx context.Context, dir string, env []string, attrs *syscall.SysProcAttr, 362 netSpec *drivers.NetworkIsolationSpec, name string, args []string) ([]byte, int, error) { 363 364 cmd := exec.CommandContext(ctx, name, args...) 365 366 // Copy runtime environment from the main command 367 cmd.SysProcAttr = attrs 368 cmd.Dir = dir 369 cmd.Env = env 370 371 // Capture output 372 buf, _ := circbuf.NewBuffer(int64(drivers.CheckBufSize)) 373 cmd.Stdout = buf 374 cmd.Stderr = buf 375 376 if err := withNetworkIsolation(cmd.Run, netSpec); err != nil { 377 exitErr, ok := err.(*exec.ExitError) 378 if !ok { 379 // Non-exit error, return it and let the caller treat 380 // it as a critical failure 381 return nil, 0, err 382 } 383 384 // Some kind of error happened; default to critical 385 exitCode := 2 386 if status, ok := exitErr.Sys().(syscall.WaitStatus); ok { 387 exitCode = status.ExitStatus() 388 } 389 390 // Don't return the exitError as the caller only needs the 391 // output and code. 392 return buf.Bytes(), exitCode, nil 393 } 394 return buf.Bytes(), 0, nil 395 } 396 397 func (e *UniversalExecutor) ExecStreaming(ctx context.Context, command []string, tty bool, 398 stream drivers.ExecTaskStream) error { 399 400 if len(command) == 0 { 401 return fmt.Errorf("command is required") 402 } 403 404 cmd := exec.CommandContext(ctx, command[0], command[1:]...) 405 406 cmd.Dir = "/" 407 cmd.Env = e.childCmd.Env 408 409 execHelper := &execHelper{ 410 logger: e.logger, 411 412 newTerminal: func() (func() (*os.File, error), *os.File, error) { 413 pty, tty, err := pty.Open() 414 if err != nil { 415 return nil, nil, err 416 } 417 418 return func() (*os.File, error) { return pty, nil }, tty, err 419 }, 420 setTTY: func(tty *os.File) error { 421 cmd.SysProcAttr = sessionCmdAttr(tty) 422 423 cmd.Stdin = tty 424 cmd.Stdout = tty 425 cmd.Stderr = tty 426 return nil 427 }, 428 setIO: func(stdin io.Reader, stdout, stderr io.Writer) error { 429 cmd.Stdin = stdin 430 cmd.Stdout = stdout 431 cmd.Stderr = stderr 432 return nil 433 }, 434 processStart: func() error { 435 if u := e.commandCfg.User; u != "" { 436 if err := setCmdUser(cmd, u); err != nil { 437 return err 438 } 439 } 440 441 return withNetworkIsolation(cmd.Start, e.commandCfg.NetworkIsolation) 442 }, 443 processWait: func() (*os.ProcessState, error) { 444 err := cmd.Wait() 445 return cmd.ProcessState, err 446 }, 447 } 448 449 return execHelper.run(ctx, tty, stream) 450 } 451 452 // Wait waits until a process has exited and returns it's exitcode and errors 453 func (e *UniversalExecutor) Wait(ctx context.Context) (*ProcessState, error) { 454 select { 455 case <-ctx.Done(): 456 return nil, ctx.Err() 457 case <-e.processExited: 458 return e.exitState, nil 459 } 460 } 461 462 func (e *UniversalExecutor) UpdateResources(resources *drivers.Resources) error { 463 return nil 464 } 465 466 func (e *UniversalExecutor) wait() { 467 defer close(e.processExited) 468 defer e.commandCfg.Close() 469 pid := e.childCmd.Process.Pid 470 err := e.childCmd.Wait() 471 if err == nil { 472 e.exitState = &ProcessState{Pid: pid, ExitCode: 0, Time: time.Now()} 473 return 474 } 475 476 exitCode := 1 477 var signal int 478 if exitErr, ok := err.(*exec.ExitError); ok { 479 if status, ok := exitErr.Sys().(syscall.WaitStatus); ok { 480 exitCode = status.ExitStatus() 481 if status.Signaled() { 482 // bash(1) uses the lower 7 bits of a uint8 483 // to indicate normal program failure (see 484 // <sysexits.h>). If a process terminates due 485 // to a signal, encode the signal number to 486 // indicate which signal caused the process 487 // to terminate. Mirror this exit code 488 // encoding scheme. 489 const exitSignalBase = 128 490 signal = int(status.Signal()) 491 exitCode = exitSignalBase + signal 492 } 493 } 494 } else { 495 e.logger.Warn("unexpected Cmd.Wait() error type", "error", err) 496 } 497 498 e.exitState = &ProcessState{Pid: pid, ExitCode: exitCode, Signal: signal, Time: time.Now()} 499 } 500 501 var ( 502 // finishedErr is the error message received when trying to kill and already 503 // exited process. 504 finishedErr = "os: process already finished" 505 506 // noSuchProcessErr is the error message received when trying to kill a non 507 // existing process (e.g. when killing a process group). 508 noSuchProcessErr = "no such process" 509 ) 510 511 // Shutdown cleans up the alloc directory, destroys resource container and 512 // kills the user process. 513 func (e *UniversalExecutor) Shutdown(signal string, grace time.Duration) error { 514 e.logger.Debug("shutdown requested", "signal", signal, "grace_period_ms", grace.Round(time.Millisecond)) 515 var merr multierror.Error 516 517 // If the executor did not launch a process, return. 518 if e.commandCfg == nil { 519 return nil 520 } 521 522 // If there is no process we can't shutdown 523 if e.childCmd.Process == nil { 524 e.logger.Warn("failed to shutdown due to missing process", "error", "no process found") 525 return fmt.Errorf("executor failed to shutdown error: no process found") 526 } 527 528 proc, err := os.FindProcess(e.childCmd.Process.Pid) 529 if err != nil { 530 err = fmt.Errorf("executor failed to find process: %v", err) 531 e.logger.Warn("failed to shutdown due to inability to find process", "pid", e.childCmd.Process.Pid, "error", err) 532 return err 533 } 534 535 // If grace is 0 then skip shutdown logic 536 if grace > 0 { 537 // Default signal to SIGINT if not set 538 if signal == "" { 539 signal = "SIGINT" 540 } 541 542 sig, ok := signals.SignalLookup[signal] 543 if !ok { 544 err = fmt.Errorf("error unknown signal given for shutdown: %s", signal) 545 e.logger.Warn("failed to shutdown", "error", err) 546 return err 547 } 548 549 if err := e.shutdownProcess(sig, proc); err != nil { 550 e.logger.Warn("failed to shutdown process", "pid", proc.Pid, "error", err) 551 return err 552 } 553 554 select { 555 case <-e.processExited: 556 case <-time.After(grace): 557 proc.Kill() 558 } 559 } else { 560 proc.Kill() 561 } 562 563 // Wait for process to exit 564 select { 565 case <-e.processExited: 566 case <-time.After(time.Second * 15): 567 e.logger.Warn("process did not exit after 15 seconds") 568 merr.Errors = append(merr.Errors, fmt.Errorf("process did not exit after 15 seconds")) 569 } 570 571 // prefer killing the process via platform-dependent resource containment 572 killByContainment := e.commandCfg.ResourceLimits || e.commandCfg.BasicProcessCgroup 573 574 if !killByContainment { 575 // there is no containment, so kill the group the old fashioned way by sending 576 // SIGKILL to the negative pid 577 if cleanupChildrenErr := e.killProcessTree(proc); cleanupChildrenErr != nil && cleanupChildrenErr.Error() != finishedErr { 578 merr.Errors = append(merr.Errors, 579 fmt.Errorf("can't kill process with pid %d: %v", e.childCmd.Process.Pid, cleanupChildrenErr)) 580 } 581 } else { 582 // there is containment available (e.g. cgroups) so defer to that implementation 583 // for killing the processes 584 if cleanupErr := e.containment.Cleanup(); cleanupErr != nil { 585 e.logger.Warn("containment cleanup failed", "error", cleanupErr) 586 merr.Errors = append(merr.Errors, cleanupErr) 587 } 588 } 589 590 if err = merr.ErrorOrNil(); err != nil { 591 e.logger.Warn("failed to shutdown due to some error", "error", err.Error()) 592 return err 593 } 594 595 return nil 596 } 597 598 // Signal sends the passed signal to the task 599 func (e *UniversalExecutor) Signal(s os.Signal) error { 600 if e.childCmd.Process == nil { 601 return fmt.Errorf("Task not yet run") 602 } 603 604 e.logger.Debug("sending signal to PID", "signal", s, "pid", e.childCmd.Process.Pid) 605 err := e.childCmd.Process.Signal(s) 606 if err != nil { 607 e.logger.Error("sending signal failed", "signal", s, "error", err) 608 return err 609 } 610 611 return nil 612 } 613 614 func (e *UniversalExecutor) Stats(ctx context.Context, interval time.Duration) (<-chan *cstructs.TaskResourceUsage, error) { 615 ch := make(chan *cstructs.TaskResourceUsage) 616 go e.handleStats(ch, ctx, interval) 617 return ch, nil 618 } 619 620 func (e *UniversalExecutor) handleStats(ch chan *cstructs.TaskResourceUsage, ctx context.Context, interval time.Duration) { 621 defer close(ch) 622 timer := time.NewTimer(0) 623 for { 624 select { 625 case <-ctx.Done(): 626 return 627 628 case <-timer.C: 629 timer.Reset(interval) 630 } 631 632 pidStats, err := e.pidCollector.pidStats() 633 if err != nil { 634 e.logger.Warn("error collecting stats", "error", err) 635 return 636 } 637 638 select { 639 case <-ctx.Done(): 640 return 641 case ch <- aggregatedResourceUsage(e.systemCpuStats, pidStats): 642 } 643 } 644 } 645 646 // lookupBin looks for path to the binary to run by looking for the binary in 647 // the following locations, in-order: 648 // task/local/, task/, on the host file system, in host $PATH 649 // The return path is absolute. 650 func lookupBin(taskDir string, bin string) (string, error) { 651 // Check in the local directory 652 local := filepath.Join(taskDir, allocdir.TaskLocal, bin) 653 if _, err := os.Stat(local); err == nil { 654 return local, nil 655 } 656 657 // Check at the root of the task's directory 658 root := filepath.Join(taskDir, bin) 659 if _, err := os.Stat(root); err == nil { 660 return root, nil 661 } 662 663 // when checking host paths, check with Stat first if path is absolute 664 // as exec.LookPath only considers files already marked as executable 665 // and only consider this for absolute paths to avoid depending on 666 // current directory of nomad which may cause unexpected behavior 667 if _, err := os.Stat(bin); err == nil && filepath.IsAbs(bin) { 668 return bin, nil 669 } 670 671 // Check the $PATH 672 if host, err := exec.LookPath(bin); err == nil { 673 return host, nil 674 } 675 676 return "", fmt.Errorf("binary %q could not be found", bin) 677 } 678 679 // makeExecutable makes the given file executable for root,group,others. 680 func makeExecutable(binPath string) error { 681 if runtime.GOOS == "windows" { 682 return nil 683 } 684 685 fi, err := os.Stat(binPath) 686 if err != nil { 687 if os.IsNotExist(err) { 688 return fmt.Errorf("binary %q does not exist", binPath) 689 } 690 return fmt.Errorf("specified binary is invalid: %v", err) 691 } 692 693 // If it is not executable, make it so. 694 perm := fi.Mode().Perm() 695 req := os.FileMode(0555) 696 if perm&req != req { 697 if err := os.Chmod(binPath, perm|req); err != nil { 698 return fmt.Errorf("error making %q executable: %s", binPath, err) 699 } 700 } 701 return nil 702 } 703 704 // SupportedCaps returns a list of all supported capabilities in kernel. 705 func SupportedCaps(allowNetRaw bool) []string { 706 var allCaps []string 707 last := capability.CAP_LAST_CAP 708 // workaround for RHEL6 which has no /proc/sys/kernel/cap_last_cap 709 if last == capability.Cap(63) { 710 last = capability.CAP_BLOCK_SUSPEND 711 } 712 for _, cap := range capability.List() { 713 if cap > last { 714 continue 715 } 716 if !allowNetRaw && cap == capability.CAP_NET_RAW { 717 continue 718 } 719 allCaps = append(allCaps, fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String()))) 720 } 721 return allCaps 722 }