
     1  package executor
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  	"io/ioutil"
     8  	"os"
     9  	"os/exec"
    10  	"path/filepath"
    11  	"runtime"
    12  	"strings"
    13  	"syscall"
    14  	"time"
    16  	""
    17  	""
    18  	hclog ""
    19  	multierror ""
    20  	""
    21  	""
    22  	""
    23  	cstructs ""
    24  	""
    25  	""
    27  	shelpers ""
    28  )
    30  const (
    31  	// ExecutorVersionLatest is the current and latest version of the executor
    32  	ExecutorVersionLatest = "2.0.0"
    34  	// ExecutorVersionPre0_9 is the version of executor use prior to the release
    35  	// of 0.9.x
    36  	ExecutorVersionPre0_9 = "1.1.0"
    37  )
    39  var (
    40  	// The statistics the basic executor exposes
    41  	ExecutorBasicMeasuredMemStats = []string{"RSS", "Swap"}
    42  	ExecutorBasicMeasuredCpuStats = []string{"System Mode", "User Mode", "Percent"}
    43  )
    45  // Executor is the interface which allows a driver to launch and supervise
    46  // a process
    47  type Executor interface {
    48  	// Launch a user process configured by the given ExecCommand
    49  	Launch(launchCmd *ExecCommand) (*ProcessState, error)
    51  	// Wait blocks until the process exits or an error occures
    52  	Wait(ctx context.Context) (*ProcessState, error)
    54  	// Shutdown will shutdown the executor by stopping the user process,
    55  	// cleaning up and resources created by the executor. The shutdown sequence
    56  	// will first send the given signal to the process. This defaults to "SIGINT"
    57  	// if not specified. The executor will then wait for the process to exit
    58  	// before cleaning up other resources. If the executor waits longer than the
    59  	// given grace period, the process is forcefully killed.
    60  	//
    61  	// To force kill the user process, gracePeriod can be set to 0.
    62  	Shutdown(signal string, gracePeriod time.Duration) error
    64  	// UpdateResources updates any resource isolation enforcement with new
    65  	// constraints if supported.
    66  	UpdateResources(*drivers.Resources) error
    68  	// Version returns the executor API version
    69  	Version() (*ExecutorVersion, error)
    71  	// Returns a channel of stats. Stats are collected and
    72  	// pushed to the channel on the given interval
    73  	Stats(context.Context, time.Duration) (<-chan *cstructs.TaskResourceUsage, error)
    75  	// Signal sends the given signal to the user process
    76  	Signal(os.Signal) error
    78  	// Exec executes the given command and args inside the executor context
    79  	// and returns the output and exit code.
    80  	Exec(deadline time.Time, cmd string, args []string) ([]byte, int, error)
    82  	ExecStreaming(ctx context.Context, cmd []string, tty bool,
    83  		stream drivers.ExecTaskStream) error
    84  }
    86  // ExecCommand holds the user command, args, and other isolation related
    87  // settings.
    88  type ExecCommand struct {
    89  	// Cmd is the command that the user wants to run.
    90  	Cmd string
    92  	// Args is the args of the command that the user wants to run.
    93  	Args []string
    95  	// Resources defined by the task
    96  	Resources *drivers.Resources
    98  	// StdoutPath is the path the process stdout should be written to
    99  	StdoutPath string
   100  	stdout     io.WriteCloser
   102  	// StderrPath is the path the process stderr should be written to
   103  	StderrPath string
   104  	stderr     io.WriteCloser
   106  	// Env is the list of KEY=val pairs of environment variables to be set
   107  	Env []string
   109  	// User is the user which the executor uses to run the command.
   110  	User string
   112  	// TaskDir is the directory path on the host where for the task
   113  	TaskDir string
   115  	// ResourceLimits determines whether resource limits are enforced by the
   116  	// executor.
   117  	ResourceLimits bool
   119  	// Cgroup marks whether we put the process in a cgroup. Setting this field
   120  	// doesn't enforce resource limits. To enforce limits, set ResourceLimits.
   121  	// Using the cgroup does allow more precise cleanup of processes.
   122  	BasicProcessCgroup bool
   124  	// NoPivotRoot disables using pivot_root for isolation, useful when the root
   125  	// partition is on a ramdisk which does not support pivot_root,
   126  	// see man 2 pivot_root
   127  	NoPivotRoot bool
   129  	// Mounts are the host paths to be be made available inside rootfs
   130  	Mounts []*drivers.MountConfig
   132  	// Devices are the the device nodes to be created in isolation environment
   133  	Devices []*drivers.DeviceConfig
   135  	NetworkIsolation *drivers.NetworkIsolationSpec
   136  }
   138  // SetWriters sets the writer for the process stdout and stderr. This should
   139  // not be used if writing to a file path such as a fifo file. SetStdoutWriter
   140  // is mainly used for unit testing purposes.
   141  func (c *ExecCommand) SetWriters(out io.WriteCloser, err io.WriteCloser) {
   142  	c.stdout = out
   143  	c.stderr = err
   144  }
   146  // GetWriters returns the unexported io.WriteCloser for the stdout and stderr
   147  // handles. This is mainly used for unit testing purposes.
   148  func (c *ExecCommand) GetWriters() (stdout io.WriteCloser, stderr io.WriteCloser) {
   149  	return c.stdout, c.stderr
   150  }
   152  type nopCloser struct {
   153  	io.Writer
   154  }
   156  func (nopCloser) Close() error { return nil }
   158  // Stdout returns a writer for the configured file descriptor
   159  func (c *ExecCommand) Stdout() (io.WriteCloser, error) {
   160  	if c.stdout == nil {
   161  		if c.StdoutPath != "" {
   162  			f, err := fifo.OpenWriter(c.StdoutPath)
   163  			if err != nil {
   164  				return nil, fmt.Errorf("failed to create stdout: %v", err)
   165  			}
   166  			c.stdout = f
   167  		} else {
   168  			c.stdout = nopCloser{ioutil.Discard}
   169  		}
   170  	}
   171  	return c.stdout, nil
   172  }
   174  // Stderr returns a writer for the configured file descriptor
   175  func (c *ExecCommand) Stderr() (io.WriteCloser, error) {
   176  	if c.stderr == nil {
   177  		if c.StderrPath != "" {
   178  			f, err := fifo.OpenWriter(c.StderrPath)
   179  			if err != nil {
   180  				return nil, fmt.Errorf("failed to create stderr: %v", err)
   181  			}
   182  			c.stderr = f
   183  		} else {
   184  			c.stderr = nopCloser{ioutil.Discard}
   185  		}
   186  	}
   187  	return c.stderr, nil
   188  }
   190  func (c *ExecCommand) Close() {
   191  	if c.stdout != nil {
   192  		c.stdout.Close()
   193  	}
   194  	if c.stderr != nil {
   195  		c.stderr.Close()
   196  	}
   197  }
   199  // ProcessState holds information about the state of a user process.
   200  type ProcessState struct {
   201  	Pid      int
   202  	ExitCode int
   203  	Signal   int
   204  	Time     time.Time
   205  }
   207  // ExecutorVersion is the version of the executor
   208  type ExecutorVersion struct {
   209  	Version string
   210  }
   212  func (v *ExecutorVersion) GoString() string {
   213  	return v.Version
   214  }
   216  // UniversalExecutor is an implementation of the Executor which launches and
   217  // supervises processes. In addition to process supervision it provides resource
   218  // and file system isolation
   219  type UniversalExecutor struct {
   220  	childCmd   exec.Cmd
   221  	commandCfg *ExecCommand
   223  	exitState     *ProcessState
   224  	processExited chan interface{}
   226  	// resConCtx is used to track and cleanup additional resources created by
   227  	// the executor. Currently this is only used for cgroups.
   228  	resConCtx resourceContainerContext
   230  	totalCpuStats  *stats.CpuStats
   231  	userCpuStats   *stats.CpuStats
   232  	systemCpuStats *stats.CpuStats
   233  	pidCollector   *pidCollector
   235  	logger hclog.Logger
   236  }
   238  // NewExecutor returns an Executor
   239  func NewExecutor(logger hclog.Logger) Executor {
   240  	logger = logger.Named("executor")
   241  	if err := shelpers.Init(); err != nil {
   242  		logger.Error("unable to initialize stats", "error", err)
   243  	}
   244  	return &UniversalExecutor{
   245  		logger:         logger,
   246  		processExited:  make(chan interface{}),
   247  		totalCpuStats:  stats.NewCpuStats(),
   248  		userCpuStats:   stats.NewCpuStats(),
   249  		systemCpuStats: stats.NewCpuStats(),
   250  		pidCollector:   newPidCollector(logger),
   251  	}
   252  }
   254  // Version returns the api version of the executor
   255  func (e *UniversalExecutor) Version() (*ExecutorVersion, error) {
   256  	return &ExecutorVersion{Version: ExecutorVersionLatest}, nil
   257  }
   259  // Launch launches the main process and returns its state. It also
   260  // configures an applies isolation on certain platforms.
   261  func (e *UniversalExecutor) Launch(command *ExecCommand) (*ProcessState, error) {
   262  	e.logger.Trace("preparing to launch command", "command", command.Cmd, "args", strings.Join(command.Args, " "))
   264  	e.commandCfg = command
   266  	// setting the user of the process
   267  	if command.User != "" {
   268  		e.logger.Debug("running command as user", "user", command.User)
   269  		if err := e.runAs(command.User); err != nil {
   270  			return nil, err
   271  		}
   272  	}
   274  	// set the task dir as the working directory for the command
   275  	e.childCmd.Dir = e.commandCfg.TaskDir
   277  	// start command in separate process group
   278  	if err := e.setNewProcessGroup(); err != nil {
   279  		return nil, err
   280  	}
   282  	// Setup cgroups on linux
   283  	if err := e.configureResourceContainer(os.Getpid()); err != nil {
   284  		return nil, err
   285  	}
   287  	stdout, err := e.commandCfg.Stdout()
   288  	if err != nil {
   289  		return nil, err
   290  	}
   291  	stderr, err := e.commandCfg.Stderr()
   292  	if err != nil {
   293  		return nil, err
   294  	}
   296  	e.childCmd.Stdout = stdout
   297  	e.childCmd.Stderr = stderr
   299  	// Look up the binary path and make it executable
   300  	absPath, err := lookupBin(command.TaskDir, command.Cmd)
   301  	if err != nil {
   302  		return nil, err
   303  	}
   305  	if err := makeExecutable(absPath); err != nil {
   306  		return nil, err
   307  	}
   309  	path := absPath
   311  	// Set the commands arguments
   312  	e.childCmd.Path = path
   313  	e.childCmd.Args = append([]string{e.childCmd.Path}, command.Args...)
   314  	e.childCmd.Env = e.commandCfg.Env
   316  	// Start the process
   317  	if err = withNetworkIsolation(e.childCmd.Start, command.NetworkIsolation); err != nil {
   318  		return nil, fmt.Errorf("failed to start command path=%q --- args=%q: %v", path, e.childCmd.Args, err)
   319  	}
   321  	go e.pidCollector.collectPids(e.processExited, e.getAllPids)
   322  	go e.wait()
   323  	return &ProcessState{Pid: e.childCmd.Process.Pid, ExitCode: -1, Time: time.Now()}, nil
   324  }
   326  // Exec a command inside a container for exec and java drivers.
   327  func (e *UniversalExecutor) Exec(deadline time.Time, name string, args []string) ([]byte, int, error) {
   328  	ctx, cancel := context.WithDeadline(context.Background(), deadline)
   329  	defer cancel()
   330  	return ExecScript(ctx, e.childCmd.Dir, e.commandCfg.Env, e.childCmd.SysProcAttr, e.commandCfg.NetworkIsolation, name, args)
   331  }
   333  // ExecScript executes cmd with args and returns the output, exit code, and
   334  // error. Output is truncated to drivers/shared/structs.CheckBufSize
   335  func ExecScript(ctx context.Context, dir string, env []string, attrs *syscall.SysProcAttr,
   336  	netSpec *drivers.NetworkIsolationSpec, name string, args []string) ([]byte, int, error) {
   338  	cmd := exec.CommandContext(ctx, name, args...)
   340  	// Copy runtime environment from the main command
   341  	cmd.SysProcAttr = attrs
   342  	cmd.Dir = dir
   343  	cmd.Env = env
   345  	// Capture output
   346  	buf, _ := circbuf.NewBuffer(int64(drivers.CheckBufSize))
   347  	cmd.Stdout = buf
   348  	cmd.Stderr = buf
   350  	if err := withNetworkIsolation(cmd.Run, netSpec); err != nil {
   351  		exitErr, ok := err.(*exec.ExitError)
   352  		if !ok {
   353  			// Non-exit error, return it and let the caller treat
   354  			// it as a critical failure
   355  			return nil, 0, err
   356  		}
   358  		// Some kind of error happened; default to critical
   359  		exitCode := 2
   360  		if status, ok := exitErr.Sys().(syscall.WaitStatus); ok {
   361  			exitCode = status.ExitStatus()
   362  		}
   364  		// Don't return the exitError as the caller only needs the
   365  		// output and code.
   366  		return buf.Bytes(), exitCode, nil
   367  	}
   368  	return buf.Bytes(), 0, nil
   369  }
   371  func (e *UniversalExecutor) ExecStreaming(ctx context.Context, command []string, tty bool,
   372  	stream drivers.ExecTaskStream) error {
   374  	if len(command) == 0 {
   375  		return fmt.Errorf("command is required")
   376  	}
   378  	cmd := exec.CommandContext(ctx, command[0], command[1:]...)
   380  	cmd.Dir = "/"
   381  	cmd.Env = e.childCmd.Env
   383  	execHelper := &execHelper{
   384  		logger: e.logger,
   386  		newTerminal: func() (func() (*os.File, error), *os.File, error) {
   387  			pty, tty, err := pty.Open()
   388  			if err != nil {
   389  				return nil, nil, err
   390  			}
   392  			return func() (*os.File, error) { return pty, nil }, tty, err
   393  		},
   394  		setTTY: func(tty *os.File) error {
   395  			cmd.SysProcAttr = sessionCmdAttr(tty)
   397  			cmd.Stdin = tty
   398  			cmd.Stdout = tty
   399  			cmd.Stderr = tty
   400  			return nil
   401  		},
   402  		setIO: func(stdin io.Reader, stdout, stderr io.Writer) error {
   403  			cmd.Stdin = stdin
   404  			cmd.Stdout = stdout
   405  			cmd.Stderr = stderr
   406  			return nil
   407  		},
   408  		processStart: func() error {
   409  			return withNetworkIsolation(cmd.Start, e.commandCfg.NetworkIsolation)
   410  		},
   411  		processWait: func() (*os.ProcessState, error) {
   412  			err := cmd.Wait()
   413  			return cmd.ProcessState, err
   414  		},
   415  	}
   417  	return, tty, stream)
   418  }
   420  // Wait waits until a process has exited and returns it's exitcode and errors
   421  func (e *UniversalExecutor) Wait(ctx context.Context) (*ProcessState, error) {
   422  	select {
   423  	case <-ctx.Done():
   424  		return nil, ctx.Err()
   425  	case <-e.processExited:
   426  		return e.exitState, nil
   427  	}
   428  }
   430  func (e *UniversalExecutor) UpdateResources(resources *drivers.Resources) error {
   431  	return nil
   432  }
   434  func (e *UniversalExecutor) wait() {
   435  	defer close(e.processExited)
   436  	defer e.commandCfg.Close()
   437  	pid := e.childCmd.Process.Pid
   438  	err := e.childCmd.Wait()
   439  	if err == nil {
   440  		e.exitState = &ProcessState{Pid: pid, ExitCode: 0, Time: time.Now()}
   441  		return
   442  	}
   444  	exitCode := 1
   445  	var signal int
   446  	if exitErr, ok := err.(*exec.ExitError); ok {
   447  		if status, ok := exitErr.Sys().(syscall.WaitStatus); ok {
   448  			exitCode = status.ExitStatus()
   449  			if status.Signaled() {
   450  				// bash(1) uses the lower 7 bits of a uint8
   451  				// to indicate normal program failure (see
   452  				// <sysexits.h>). If a process terminates due
   453  				// to a signal, encode the signal number to
   454  				// indicate which signal caused the process
   455  				// to terminate.  Mirror this exit code
   456  				// encoding scheme.
   457  				const exitSignalBase = 128
   458  				signal = int(status.Signal())
   459  				exitCode = exitSignalBase + signal
   460  			}
   461  		}
   462  	} else {
   463  		e.logger.Warn("unexpected Cmd.Wait() error type", "error", err)
   464  	}
   466  	e.exitState = &ProcessState{Pid: pid, ExitCode: exitCode, Signal: signal, Time: time.Now()}
   467  }
   469  var (
   470  	// finishedErr is the error message received when trying to kill and already
   471  	// exited process.
   472  	finishedErr = "os: process already finished"
   474  	// noSuchProcessErr is the error message received when trying to kill a non
   475  	// existing process (e.g. when killing a process group).
   476  	noSuchProcessErr = "no such process"
   477  )
   479  // Exit cleans up the alloc directory, destroys resource container and kills the
   480  // user process
   481  func (e *UniversalExecutor) Shutdown(signal string, grace time.Duration) error {
   482  	e.logger.Debug("shutdown requested", "signal", signal, "grace_period_ms", grace.Round(time.Millisecond))
   483  	var merr multierror.Error
   485  	// If the executor did not launch a process, return.
   486  	if e.commandCfg == nil {
   487  		return nil
   488  	}
   490  	// If there is no process we can't shutdown
   491  	if e.childCmd.Process == nil {
   492  		e.logger.Warn("failed to shutdown", "error", "no process found")
   493  		return fmt.Errorf("executor failed to shutdown error: no process found")
   494  	}
   496  	proc, err := os.FindProcess(e.childCmd.Process.Pid)
   497  	if err != nil {
   498  		err = fmt.Errorf("executor failed to find process: %v", err)
   499  		e.logger.Warn("failed to shutdown", "error", err)
   500  		return err
   501  	}
   503  	// If grace is 0 then skip shutdown logic
   504  	if grace > 0 {
   505  		// Default signal to SIGINT if not set
   506  		if signal == "" {
   507  			signal = "SIGINT"
   508  		}
   510  		sig, ok := signals.SignalLookup[signal]
   511  		if !ok {
   512  			err = fmt.Errorf("error unknown signal given for shutdown: %s", signal)
   513  			e.logger.Warn("failed to shutdown", "error", err)
   514  			return err
   515  		}
   517  		if err := e.shutdownProcess(sig, proc); err != nil {
   518  			e.logger.Warn("failed to shutdown", "error", err)
   519  			return err
   520  		}
   522  		select {
   523  		case <-e.processExited:
   524  		case <-time.After(grace):
   525  			proc.Kill()
   526  		}
   527  	} else {
   528  		proc.Kill()
   529  	}
   531  	// Wait for process to exit
   532  	select {
   533  	case <-e.processExited:
   534  	case <-time.After(time.Second * 15):
   535  		e.logger.Warn("process did not exit after 15 seconds")
   536  		merr.Errors = append(merr.Errors, fmt.Errorf("process did not exit after 15 seconds"))
   537  	}
   539  	// Prefer killing the process via the resource container.
   540  	if !(e.commandCfg.ResourceLimits || e.commandCfg.BasicProcessCgroup) {
   541  		if err := e.cleanupChildProcesses(proc); err != nil && err.Error() != finishedErr {
   542  			merr.Errors = append(merr.Errors,
   543  				fmt.Errorf("can't kill process with pid %d: %v", e.childCmd.Process.Pid, err))
   544  		}
   545  	}
   547  	if e.commandCfg.ResourceLimits || e.commandCfg.BasicProcessCgroup {
   548  		if err := e.resConCtx.executorCleanup(); err != nil {
   549  			merr.Errors = append(merr.Errors, err)
   550  		}
   551  	}
   553  	if err := merr.ErrorOrNil(); err != nil {
   554  		e.logger.Warn("failed to shutdown", "error", err)
   555  		return err
   556  	}
   558  	return nil
   559  }
   561  // Signal sends the passed signal to the task
   562  func (e *UniversalExecutor) Signal(s os.Signal) error {
   563  	if e.childCmd.Process == nil {
   564  		return fmt.Errorf("Task not yet run")
   565  	}
   567  	e.logger.Debug("sending signal to PID", "signal", s, "pid", e.childCmd.Process.Pid)
   568  	err := e.childCmd.Process.Signal(s)
   569  	if err != nil {
   570  		e.logger.Error("sending signal failed", "signal", s, "error", err)
   571  		return err
   572  	}
   574  	return nil
   575  }
   577  func (e *UniversalExecutor) Stats(ctx context.Context, interval time.Duration) (<-chan *cstructs.TaskResourceUsage, error) {
   578  	ch := make(chan *cstructs.TaskResourceUsage)
   579  	go e.handleStats(ch, ctx, interval)
   580  	return ch, nil
   581  }
   583  func (e *UniversalExecutor) handleStats(ch chan *cstructs.TaskResourceUsage, ctx context.Context, interval time.Duration) {
   584  	defer close(ch)
   585  	timer := time.NewTimer(0)
   586  	for {
   587  		select {
   588  		case <-ctx.Done():
   589  			return
   591  		case <-timer.C:
   592  			timer.Reset(interval)
   593  		}
   595  		pidStats, err := e.pidCollector.pidStats()
   596  		if err != nil {
   597  			e.logger.Warn("error collecting stats", "error", err)
   598  			return
   599  		}
   601  		select {
   602  		case <-ctx.Done():
   603  			return
   604  		case ch <- aggregatedResourceUsage(e.systemCpuStats, pidStats):
   605  		}
   606  	}
   607  }
   609  // lookupBin looks for path to the binary to run by looking for the binary in
   610  // the following locations, in-order:
   611  // task/local/, task/, on the host file system, in host $PATH
   612  // The return path is absolute.
   613  func lookupBin(taskDir string, bin string) (string, error) {
   614  	// Check in the local directory
   615  	local := filepath.Join(taskDir, allocdir.TaskLocal, bin)
   616  	if _, err := os.Stat(local); err == nil {
   617  		return local, nil
   618  	}
   620  	// Check at the root of the task's directory
   621  	root := filepath.Join(taskDir, bin)
   622  	if _, err := os.Stat(root); err == nil {
   623  		return root, nil
   624  	}
   626  	// when checking host paths, check with Stat first if path is absolute
   627  	// as exec.LookPath only considers files already marked as executable
   628  	// and only consider this for absolute paths to avoid depending on
   629  	// current directory of nomad which may cause unexpected behavior
   630  	if _, err := os.Stat(bin); err == nil && filepath.IsAbs(bin) {
   631  		return bin, nil
   632  	}
   634  	// Check the $PATH
   635  	if host, err := exec.LookPath(bin); err == nil {
   636  		return host, nil
   637  	}
   639  	return "", fmt.Errorf("binary %q could not be found", bin)
   640  }
   642  // makeExecutable makes the given file executable for root,group,others.
   643  func makeExecutable(binPath string) error {
   644  	if runtime.GOOS == "windows" {
   645  		return nil
   646  	}
   648  	fi, err := os.Stat(binPath)
   649  	if err != nil {
   650  		if os.IsNotExist(err) {
   651  			return fmt.Errorf("binary %q does not exist", binPath)
   652  		}
   653  		return fmt.Errorf("specified binary is invalid: %v", err)
   654  	}
   656  	// If it is not executable, make it so.
   657  	perm := fi.Mode().Perm()
   658  	req := os.FileMode(0555)
   659  	if perm&req != req {
   660  		if err := os.Chmod(binPath, perm|req); err != nil {
   661  			return fmt.Errorf("error making %q executable: %s", binPath, err)
   662  		}
   663  	}
   664  	return nil
   665  }