github.com/hernad/nomad@v1.6.112/drivers/shared/executor/executor.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package executor
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"io"
    10  	"os"
    11  	"os/exec"
    12  	"path/filepath"
    13  	"runtime"
    14  	"strings"
    15  	"syscall"
    16  	"time"
    17  
    18  	"github.com/armon/circbuf"
    19  	"github.com/creack/pty"
    20  	"github.com/hernad/consul-template/signals"
    21  	hclog "github.com/hashicorp/go-hclog"
    22  	multierror "github.com/hashicorp/go-multierror"
    23  	"github.com/hernad/nomad/client/allocdir"
    24  	"github.com/hernad/nomad/client/lib/fifo"
    25  	"github.com/hernad/nomad/client/lib/resources"
    26  	cstructs "github.com/hernad/nomad/client/structs"
    27  	"github.com/hernad/nomad/helper/stats"
    28  	"github.com/hernad/nomad/plugins/drivers"
    29  	"github.com/syndtr/gocapability/capability"
    30  )
    31  
    32  const (
    33  	// ExecutorVersionLatest is the current and latest version of the executor
    34  	ExecutorVersionLatest = "2.0.0"
    35  
    36  	// ExecutorVersionPre0_9 is the version of executor use prior to the release
    37  	// of 0.9.x
    38  	ExecutorVersionPre0_9 = "1.1.0"
    39  
    40  	// IsolationModePrivate represents the private isolation mode for a namespace
    41  	IsolationModePrivate = "private"
    42  
    43  	// IsolationModeHost represents the host isolation mode for a namespace
    44  	IsolationModeHost = "host"
    45  )
    46  
    47  var (
    48  	// The statistics the basic executor exposes
    49  	ExecutorBasicMeasuredMemStats = []string{"RSS", "Swap"}
    50  	ExecutorBasicMeasuredCpuStats = []string{"System Mode", "User Mode", "Percent"}
    51  )
    52  
    53  // Executor is the interface which allows a driver to launch and supervise
    54  // a process
    55  type Executor interface {
    56  	// Launch a user process configured by the given ExecCommand
    57  	Launch(launchCmd *ExecCommand) (*ProcessState, error)
    58  
    59  	// Wait blocks until the process exits or an error occures
    60  	Wait(ctx context.Context) (*ProcessState, error)
    61  
    62  	// Shutdown will shutdown the executor by stopping the user process,
    63  	// cleaning up and resources created by the executor. The shutdown sequence
    64  	// will first send the given signal to the process. This defaults to "SIGINT"
    65  	// if not specified. The executor will then wait for the process to exit
    66  	// before cleaning up other resources. If the executor waits longer than the
    67  	// given grace period, the process is forcefully killed.
    68  	//
    69  	// To force kill the user process, gracePeriod can be set to 0.
    70  	Shutdown(signal string, gracePeriod time.Duration) error
    71  
    72  	// UpdateResources updates any resource isolation enforcement with new
    73  	// constraints if supported.
    74  	UpdateResources(*drivers.Resources) error
    75  
    76  	// Version returns the executor API version
    77  	Version() (*ExecutorVersion, error)
    78  
    79  	// Returns a channel of stats. Stats are collected and
    80  	// pushed to the channel on the given interval
    81  	Stats(context.Context, time.Duration) (<-chan *cstructs.TaskResourceUsage, error)
    82  
    83  	// Signal sends the given signal to the user process
    84  	Signal(os.Signal) error
    85  
    86  	// Exec executes the given command and args inside the executor context
    87  	// and returns the output and exit code.
    88  	Exec(deadline time.Time, cmd string, args []string) ([]byte, int, error)
    89  
    90  	ExecStreaming(ctx context.Context, cmd []string, tty bool,
    91  		stream drivers.ExecTaskStream) error
    92  }
    93  
    94  // ExecCommand holds the user command, args, and other isolation related
    95  // settings.
    96  //
    97  // Important (!): when adding fields, make sure to update the RPC methods in
    98  // grpcExecutorClient.Launch and grpcExecutorServer.Launch. Number of hours
    99  // spent tracking this down: too many.
   100  type ExecCommand struct {
   101  	// Cmd is the command that the user wants to run.
   102  	Cmd string
   103  
   104  	// Args is the args of the command that the user wants to run.
   105  	Args []string
   106  
   107  	// Resources defined by the task
   108  	Resources *drivers.Resources
   109  
   110  	// StdoutPath is the path the process stdout should be written to
   111  	StdoutPath string
   112  	stdout     io.WriteCloser
   113  
   114  	// StderrPath is the path the process stderr should be written to
   115  	StderrPath string
   116  	stderr     io.WriteCloser
   117  
   118  	// Env is the list of KEY=val pairs of environment variables to be set
   119  	Env []string
   120  
   121  	// User is the user which the executor uses to run the command.
   122  	User string
   123  
   124  	// TaskDir is the directory path on the host where for the task
   125  	TaskDir string
   126  
   127  	// ResourceLimits determines whether resource limits are enforced by the
   128  	// executor.
   129  	ResourceLimits bool
   130  
   131  	// Cgroup marks whether we put the process in a cgroup. Setting this field
   132  	// doesn't enforce resource limits. To enforce limits, set ResourceLimits.
   133  	// Using the cgroup does allow more precise cleanup of processes.
   134  	BasicProcessCgroup bool
   135  
   136  	// NoPivotRoot disables using pivot_root for isolation, useful when the root
   137  	// partition is on a ramdisk which does not support pivot_root,
   138  	// see man 2 pivot_root
   139  	NoPivotRoot bool
   140  
   141  	// Mounts are the host paths to be be made available inside rootfs
   142  	Mounts []*drivers.MountConfig
   143  
   144  	// Devices are the the device nodes to be created in isolation environment
   145  	Devices []*drivers.DeviceConfig
   146  
   147  	// NetworkIsolation is the network isolation configuration.
   148  	NetworkIsolation *drivers.NetworkIsolationSpec
   149  
   150  	// ModePID is the PID isolation mode (private or host).
   151  	ModePID string
   152  
   153  	// ModeIPC is the IPC isolation mode (private or host).
   154  	ModeIPC string
   155  
   156  	// Capabilities are the linux capabilities to be enabled by the task driver.
   157  	Capabilities []string
   158  }
   159  
   160  // SetWriters sets the writer for the process stdout and stderr. This should
   161  // not be used if writing to a file path such as a fifo file. SetStdoutWriter
   162  // is mainly used for unit testing purposes.
   163  func (c *ExecCommand) SetWriters(out io.WriteCloser, err io.WriteCloser) {
   164  	c.stdout = out
   165  	c.stderr = err
   166  }
   167  
   168  // GetWriters returns the unexported io.WriteCloser for the stdout and stderr
   169  // handles. This is mainly used for unit testing purposes.
   170  func (c *ExecCommand) GetWriters() (stdout io.WriteCloser, stderr io.WriteCloser) {
   171  	return c.stdout, c.stderr
   172  }
   173  
   174  type nopCloser struct {
   175  	io.Writer
   176  }
   177  
   178  func (nopCloser) Close() error { return nil }
   179  
   180  // Stdout returns a writer for the configured file descriptor
   181  func (c *ExecCommand) Stdout() (io.WriteCloser, error) {
   182  	if c.stdout == nil {
   183  		if c.StdoutPath != "" && c.StdoutPath != os.DevNull {
   184  			f, err := fifo.OpenWriter(c.StdoutPath)
   185  			if err != nil {
   186  				return nil, fmt.Errorf("failed to create stdout: %v", err)
   187  			}
   188  			c.stdout = f
   189  		} else {
   190  			c.stdout = nopCloser{io.Discard}
   191  		}
   192  	}
   193  	return c.stdout, nil
   194  }
   195  
   196  // Stderr returns a writer for the configured file descriptor
   197  func (c *ExecCommand) Stderr() (io.WriteCloser, error) {
   198  	if c.stderr == nil {
   199  		if c.StderrPath != "" && c.StderrPath != os.DevNull {
   200  			f, err := fifo.OpenWriter(c.StderrPath)
   201  			if err != nil {
   202  				return nil, fmt.Errorf("failed to create stderr: %v", err)
   203  			}
   204  			c.stderr = f
   205  		} else {
   206  			c.stderr = nopCloser{io.Discard}
   207  		}
   208  	}
   209  	return c.stderr, nil
   210  }
   211  
   212  func (c *ExecCommand) Close() {
   213  	if c.stdout != nil {
   214  		c.stdout.Close()
   215  	}
   216  	if c.stderr != nil {
   217  		c.stderr.Close()
   218  	}
   219  }
   220  
   221  // ProcessState holds information about the state of a user process.
   222  type ProcessState struct {
   223  	Pid      int
   224  	ExitCode int
   225  	Signal   int
   226  	Time     time.Time
   227  }
   228  
   229  // ExecutorVersion is the version of the executor
   230  type ExecutorVersion struct {
   231  	Version string
   232  }
   233  
   234  func (v *ExecutorVersion) GoString() string {
   235  	return v.Version
   236  }
   237  
   238  // UniversalExecutor is an implementation of the Executor which launches and
   239  // supervises processes. In addition to process supervision it provides resource
   240  // and file system isolation
   241  type UniversalExecutor struct {
   242  	childCmd   exec.Cmd
   243  	commandCfg *ExecCommand
   244  
   245  	exitState     *ProcessState
   246  	processExited chan interface{}
   247  
   248  	// containment is used to cleanup resources created by the executor
   249  	// currently only used for killing pids via freezer cgroup on linux
   250  	containment resources.Containment
   251  
   252  	totalCpuStats  *stats.CpuStats
   253  	userCpuStats   *stats.CpuStats
   254  	systemCpuStats *stats.CpuStats
   255  	pidCollector   *pidCollector
   256  
   257  	logger hclog.Logger
   258  }
   259  
   260  // NewExecutor returns an Executor
   261  func NewExecutor(logger hclog.Logger, cpuTotalTicks uint64) Executor {
   262  	logger = logger.Named("executor")
   263  	stats.SetCpuTotalTicks(cpuTotalTicks)
   264  
   265  	return &UniversalExecutor{
   266  		logger:         logger,
   267  		processExited:  make(chan interface{}),
   268  		totalCpuStats:  stats.NewCpuStats(),
   269  		userCpuStats:   stats.NewCpuStats(),
   270  		systemCpuStats: stats.NewCpuStats(),
   271  		pidCollector:   newPidCollector(logger),
   272  	}
   273  }
   274  
   275  // Version returns the api version of the executor
   276  func (e *UniversalExecutor) Version() (*ExecutorVersion, error) {
   277  	return &ExecutorVersion{Version: ExecutorVersionLatest}, nil
   278  }
   279  
   280  // Launch launches the main process and returns its state. It also
   281  // configures an applies isolation on certain platforms.
   282  func (e *UniversalExecutor) Launch(command *ExecCommand) (*ProcessState, error) {
   283  	e.logger.Trace("preparing to launch command", "command", command.Cmd, "args", strings.Join(command.Args, " "))
   284  
   285  	e.commandCfg = command
   286  
   287  	// setting the user of the process
   288  	if command.User != "" {
   289  		e.logger.Debug("running command as user", "user", command.User)
   290  		if err := setCmdUser(&e.childCmd, command.User); err != nil {
   291  			return nil, err
   292  		}
   293  	}
   294  
   295  	// set the task dir as the working directory for the command
   296  	e.childCmd.Dir = e.commandCfg.TaskDir
   297  
   298  	// start command in separate process group
   299  	if err := e.setNewProcessGroup(); err != nil {
   300  		return nil, err
   301  	}
   302  
   303  	// Maybe setup containment (for now, cgroups only only on linux)
   304  	if e.commandCfg.ResourceLimits || e.commandCfg.BasicProcessCgroup {
   305  		pid := os.Getpid()
   306  		if err := e.configureResourceContainer(pid); err != nil {
   307  			e.logger.Error("failed to configure resource container", "pid", pid, "error", err)
   308  			return nil, err
   309  		}
   310  	}
   311  
   312  	stdout, err := e.commandCfg.Stdout()
   313  	if err != nil {
   314  		return nil, err
   315  	}
   316  	stderr, err := e.commandCfg.Stderr()
   317  	if err != nil {
   318  		return nil, err
   319  	}
   320  
   321  	e.childCmd.Stdout = stdout
   322  	e.childCmd.Stderr = stderr
   323  
   324  	// Look up the binary path and make it executable
   325  	absPath, err := lookupBin(command.TaskDir, command.Cmd)
   326  	if err != nil {
   327  		return nil, err
   328  	}
   329  
   330  	if err := makeExecutable(absPath); err != nil {
   331  		return nil, err
   332  	}
   333  
   334  	path := absPath
   335  
   336  	// Set the commands arguments
   337  	e.childCmd.Path = path
   338  	e.childCmd.Args = append([]string{e.childCmd.Path}, command.Args...)
   339  	e.childCmd.Env = e.commandCfg.Env
   340  
   341  	// Start the process
   342  	if err = withNetworkIsolation(e.childCmd.Start, command.NetworkIsolation); err != nil {
   343  		return nil, fmt.Errorf("failed to start command path=%q --- args=%q: %v", path, e.childCmd.Args, err)
   344  	}
   345  
   346  	go e.pidCollector.collectPids(e.processExited, e.getAllPids)
   347  	go e.wait()
   348  	return &ProcessState{Pid: e.childCmd.Process.Pid, ExitCode: -1, Time: time.Now()}, nil
   349  }
   350  
   351  // Exec a command inside a container for exec and java drivers.
   352  func (e *UniversalExecutor) Exec(deadline time.Time, name string, args []string) ([]byte, int, error) {
   353  	ctx, cancel := context.WithDeadline(context.Background(), deadline)
   354  	defer cancel()
   355  	return ExecScript(ctx, e.childCmd.Dir, e.commandCfg.Env, e.childCmd.SysProcAttr, e.commandCfg.NetworkIsolation, name, args)
   356  }
   357  
   358  // ExecScript executes cmd with args and returns the output, exit code, and
   359  // error. Output is truncated to drivers/shared/structs.CheckBufSize
   360  func ExecScript(ctx context.Context, dir string, env []string, attrs *syscall.SysProcAttr,
   361  	netSpec *drivers.NetworkIsolationSpec, name string, args []string) ([]byte, int, error) {
   362  
   363  	cmd := exec.CommandContext(ctx, name, args...)
   364  
   365  	// Copy runtime environment from the main command
   366  	cmd.SysProcAttr = attrs
   367  	cmd.Dir = dir
   368  	cmd.Env = env
   369  
   370  	// Capture output
   371  	buf, _ := circbuf.NewBuffer(int64(drivers.CheckBufSize))
   372  	cmd.Stdout = buf
   373  	cmd.Stderr = buf
   374  
   375  	if err := withNetworkIsolation(cmd.Run, netSpec); err != nil {
   376  		exitErr, ok := err.(*exec.ExitError)
   377  		if !ok {
   378  			// Non-exit error, return it and let the caller treat
   379  			// it as a critical failure
   380  			return nil, 0, err
   381  		}
   382  
   383  		// Some kind of error happened; default to critical
   384  		exitCode := 2
   385  		if status, ok := exitErr.Sys().(syscall.WaitStatus); ok {
   386  			exitCode = status.ExitStatus()
   387  		}
   388  
   389  		// Don't return the exitError as the caller only needs the
   390  		// output and code.
   391  		return buf.Bytes(), exitCode, nil
   392  	}
   393  	return buf.Bytes(), 0, nil
   394  }
   395  
   396  func (e *UniversalExecutor) ExecStreaming(ctx context.Context, command []string, tty bool,
   397  	stream drivers.ExecTaskStream) error {
   398  
   399  	if len(command) == 0 {
   400  		return fmt.Errorf("command is required")
   401  	}
   402  
   403  	cmd := exec.CommandContext(ctx, command[0], command[1:]...)
   404  
   405  	cmd.Dir = "/"
   406  	cmd.Env = e.childCmd.Env
   407  
   408  	execHelper := &execHelper{
   409  		logger: e.logger,
   410  
   411  		newTerminal: func() (func() (*os.File, error), *os.File, error) {
   412  			pty, tty, err := pty.Open()
   413  			if err != nil {
   414  				return nil, nil, err
   415  			}
   416  
   417  			return func() (*os.File, error) { return pty, nil }, tty, err
   418  		},
   419  		setTTY: func(tty *os.File) error {
   420  			cmd.SysProcAttr = sessionCmdAttr(tty)
   421  
   422  			cmd.Stdin = tty
   423  			cmd.Stdout = tty
   424  			cmd.Stderr = tty
   425  			return nil
   426  		},
   427  		setIO: func(stdin io.Reader, stdout, stderr io.Writer) error {
   428  			cmd.Stdin = stdin
   429  			cmd.Stdout = stdout
   430  			cmd.Stderr = stderr
   431  			return nil
   432  		},
   433  		processStart: func() error {
   434  			if u := e.commandCfg.User; u != "" {
   435  				if err := setCmdUser(cmd, u); err != nil {
   436  					return err
   437  				}
   438  			}
   439  
   440  			return withNetworkIsolation(cmd.Start, e.commandCfg.NetworkIsolation)
   441  		},
   442  		processWait: func() (*os.ProcessState, error) {
   443  			err := cmd.Wait()
   444  			return cmd.ProcessState, err
   445  		},
   446  	}
   447  
   448  	return execHelper.run(ctx, tty, stream)
   449  }
   450  
   451  // Wait waits until a process has exited and returns it's exitcode and errors
   452  func (e *UniversalExecutor) Wait(ctx context.Context) (*ProcessState, error) {
   453  	select {
   454  	case <-ctx.Done():
   455  		return nil, ctx.Err()
   456  	case <-e.processExited:
   457  		return e.exitState, nil
   458  	}
   459  }
   460  
   461  func (e *UniversalExecutor) UpdateResources(resources *drivers.Resources) error {
   462  	return nil
   463  }
   464  
   465  func (e *UniversalExecutor) wait() {
   466  	defer close(e.processExited)
   467  	defer e.commandCfg.Close()
   468  	pid := e.childCmd.Process.Pid
   469  	err := e.childCmd.Wait()
   470  	if err == nil {
   471  		e.exitState = &ProcessState{Pid: pid, ExitCode: 0, Time: time.Now()}
   472  		return
   473  	}
   474  
   475  	exitCode := 1
   476  	var signal int
   477  	if exitErr, ok := err.(*exec.ExitError); ok {
   478  		if status, ok := exitErr.Sys().(syscall.WaitStatus); ok {
   479  			exitCode = status.ExitStatus()
   480  			if status.Signaled() {
   481  				// bash(1) uses the lower 7 bits of a uint8
   482  				// to indicate normal program failure (see
   483  				// <sysexits.h>). If a process terminates due
   484  				// to a signal, encode the signal number to
   485  				// indicate which signal caused the process
   486  				// to terminate.  Mirror this exit code
   487  				// encoding scheme.
   488  				const exitSignalBase = 128
   489  				signal = int(status.Signal())
   490  				exitCode = exitSignalBase + signal
   491  			}
   492  		}
   493  	} else {
   494  		e.logger.Warn("unexpected Cmd.Wait() error type", "error", err)
   495  	}
   496  
   497  	e.exitState = &ProcessState{Pid: pid, ExitCode: exitCode, Signal: signal, Time: time.Now()}
   498  }
   499  
   500  var (
   501  	// finishedErr is the error message received when trying to kill and already
   502  	// exited process.
   503  	finishedErr = "os: process already finished"
   504  
   505  	// noSuchProcessErr is the error message received when trying to kill a non
   506  	// existing process (e.g. when killing a process group).
   507  	noSuchProcessErr = "no such process"
   508  )
   509  
   510  // Shutdown cleans up the alloc directory, destroys resource container and
   511  // kills the user process.
   512  func (e *UniversalExecutor) Shutdown(signal string, grace time.Duration) error {
   513  	e.logger.Debug("shutdown requested", "signal", signal, "grace_period_ms", grace.Round(time.Millisecond))
   514  	var merr multierror.Error
   515  
   516  	// If the executor did not launch a process, return.
   517  	if e.commandCfg == nil {
   518  		return nil
   519  	}
   520  
   521  	// If there is no process we can't shutdown
   522  	if e.childCmd.Process == nil {
   523  		e.logger.Warn("failed to shutdown due to missing process", "error", "no process found")
   524  		return fmt.Errorf("executor failed to shutdown error: no process found")
   525  	}
   526  
   527  	proc, err := os.FindProcess(e.childCmd.Process.Pid)
   528  	if err != nil {
   529  		err = fmt.Errorf("executor failed to find process: %v", err)
   530  		e.logger.Warn("failed to shutdown due to inability to find process", "pid", e.childCmd.Process.Pid, "error", err)
   531  		return err
   532  	}
   533  
   534  	// If grace is 0 then skip shutdown logic
   535  	if grace > 0 {
   536  		// Default signal to SIGINT if not set
   537  		if signal == "" {
   538  			signal = "SIGINT"
   539  		}
   540  
   541  		sig, ok := signals.SignalLookup[signal]
   542  		if !ok {
   543  			err = fmt.Errorf("error unknown signal given for shutdown: %s", signal)
   544  			e.logger.Warn("failed to shutdown", "error", err)
   545  			return err
   546  		}
   547  
   548  		if err := e.shutdownProcess(sig, proc); err != nil {
   549  			e.logger.Warn("failed to shutdown process", "pid", proc.Pid, "error", err)
   550  			return err
   551  		}
   552  
   553  		select {
   554  		case <-e.processExited:
   555  		case <-time.After(grace):
   556  			proc.Kill()
   557  		}
   558  	} else {
   559  		proc.Kill()
   560  	}
   561  
   562  	// Wait for process to exit
   563  	select {
   564  	case <-e.processExited:
   565  	case <-time.After(time.Second * 15):
   566  		e.logger.Warn("process did not exit after 15 seconds")
   567  		merr.Errors = append(merr.Errors, fmt.Errorf("process did not exit after 15 seconds"))
   568  	}
   569  
   570  	// prefer killing the process via platform-dependent resource containment
   571  	killByContainment := e.commandCfg.ResourceLimits || e.commandCfg.BasicProcessCgroup
   572  
   573  	if !killByContainment {
   574  		// there is no containment, so kill the group the old fashioned way by sending
   575  		// SIGKILL to the negative pid
   576  		if cleanupChildrenErr := e.killProcessTree(proc); cleanupChildrenErr != nil && cleanupChildrenErr.Error() != finishedErr {
   577  			merr.Errors = append(merr.Errors,
   578  				fmt.Errorf("can't kill process with pid %d: %v", e.childCmd.Process.Pid, cleanupChildrenErr))
   579  		}
   580  	} else {
   581  		// there is containment available (e.g. cgroups) so defer to that implementation
   582  		// for killing the processes
   583  		if cleanupErr := e.containment.Cleanup(); cleanupErr != nil {
   584  			e.logger.Warn("containment cleanup failed", "error", cleanupErr)
   585  			merr.Errors = append(merr.Errors, cleanupErr)
   586  		}
   587  	}
   588  
   589  	if err = merr.ErrorOrNil(); err != nil {
   590  		e.logger.Warn("failed to shutdown due to some error", "error", err.Error())
   591  		return err
   592  	}
   593  
   594  	return nil
   595  }
   596  
   597  // Signal sends the passed signal to the task
   598  func (e *UniversalExecutor) Signal(s os.Signal) error {
   599  	if e.childCmd.Process == nil {
   600  		return fmt.Errorf("Task not yet run")
   601  	}
   602  
   603  	e.logger.Debug("sending signal to PID", "signal", s, "pid", e.childCmd.Process.Pid)
   604  	err := e.childCmd.Process.Signal(s)
   605  	if err != nil {
   606  		e.logger.Error("sending signal failed", "signal", s, "error", err)
   607  		return err
   608  	}
   609  
   610  	return nil
   611  }
   612  
   613  func (e *UniversalExecutor) Stats(ctx context.Context, interval time.Duration) (<-chan *cstructs.TaskResourceUsage, error) {
   614  	ch := make(chan *cstructs.TaskResourceUsage)
   615  	go e.handleStats(ch, ctx, interval)
   616  	return ch, nil
   617  }
   618  
   619  func (e *UniversalExecutor) handleStats(ch chan *cstructs.TaskResourceUsage, ctx context.Context, interval time.Duration) {
   620  	defer close(ch)
   621  	timer := time.NewTimer(0)
   622  	for {
   623  		select {
   624  		case <-ctx.Done():
   625  			return
   626  
   627  		case <-timer.C:
   628  			timer.Reset(interval)
   629  		}
   630  
   631  		pidStats, err := e.pidCollector.pidStats()
   632  		if err != nil {
   633  			e.logger.Warn("error collecting stats", "error", err)
   634  			return
   635  		}
   636  
   637  		select {
   638  		case <-ctx.Done():
   639  			return
   640  		case ch <- aggregatedResourceUsage(e.systemCpuStats, pidStats):
   641  		}
   642  	}
   643  }
   644  
   645  // lookupBin looks for path to the binary to run by looking for the binary in
   646  // the following locations, in-order:
   647  // task/local/, task/, on the host file system, in host $PATH
   648  // The return path is absolute.
   649  func lookupBin(taskDir string, bin string) (string, error) {
   650  	// Check in the local directory
   651  	local := filepath.Join(taskDir, allocdir.TaskLocal, bin)
   652  	if _, err := os.Stat(local); err == nil {
   653  		return local, nil
   654  	}
   655  
   656  	// Check at the root of the task's directory
   657  	root := filepath.Join(taskDir, bin)
   658  	if _, err := os.Stat(root); err == nil {
   659  		return root, nil
   660  	}
   661  
   662  	// when checking host paths, check with Stat first if path is absolute
   663  	// as exec.LookPath only considers files already marked as executable
   664  	// and only consider this for absolute paths to avoid depending on
   665  	// current directory of nomad which may cause unexpected behavior
   666  	if _, err := os.Stat(bin); err == nil && filepath.IsAbs(bin) {
   667  		return bin, nil
   668  	}
   669  
   670  	// Check the $PATH
   671  	if host, err := exec.LookPath(bin); err == nil {
   672  		return host, nil
   673  	}
   674  
   675  	return "", fmt.Errorf("binary %q could not be found", bin)
   676  }
   677  
   678  // makeExecutable makes the given file executable for root,group,others.
   679  func makeExecutable(binPath string) error {
   680  	if runtime.GOOS == "windows" {
   681  		return nil
   682  	}
   683  
   684  	fi, err := os.Stat(binPath)
   685  	if err != nil {
   686  		if os.IsNotExist(err) {
   687  			return fmt.Errorf("binary %q does not exist", binPath)
   688  		}
   689  		return fmt.Errorf("specified binary is invalid: %v", err)
   690  	}
   691  
   692  	// If it is not executable, make it so.
   693  	perm := fi.Mode().Perm()
   694  	req := os.FileMode(0555)
   695  	if perm&req != req {
   696  		if err := os.Chmod(binPath, perm|req); err != nil {
   697  			return fmt.Errorf("error making %q executable: %s", binPath, err)
   698  		}
   699  	}
   700  	return nil
   701  }
   702  
   703  // SupportedCaps returns a list of all supported capabilities in kernel.
   704  func SupportedCaps(allowNetRaw bool) []string {
   705  	var allCaps []string
   706  	last := capability.CAP_LAST_CAP
   707  	// workaround for RHEL6 which has no /proc/sys/kernel/cap_last_cap
   708  	if last == capability.Cap(63) {
   709  		last = capability.CAP_BLOCK_SUSPEND
   710  	}
   711  	for _, cap := range capability.List() {
   712  		if cap > last {
   713  			continue
   714  		}
   715  		if !allowNetRaw && cap == capability.CAP_NET_RAW {
   716  			continue
   717  		}
   718  		allCaps = append(allCaps, fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String())))
   719  	}
   720  	return allCaps
   721  }