github.com/hhrutter/nomad@v0.6.0-rc2.0.20170723054333-80c4b03f0705/client/driver/executor/executor.go (about)

     1  package executor
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io/ioutil"
     7  	"log"
     8  	"net"
     9  	"os"
    10  	"os/exec"
    11  	"path/filepath"
    12  	"runtime"
    13  	"strconv"
    14  	"strings"
    15  	"sync"
    16  	"syscall"
    17  	"time"
    18  
    19  	"github.com/armon/circbuf"
    20  	"github.com/hashicorp/go-multierror"
    21  	"github.com/mitchellh/go-ps"
    22  	"github.com/shirou/gopsutil/process"
    23  
    24  	"github.com/hashicorp/nomad/client/allocdir"
    25  	"github.com/hashicorp/nomad/client/driver/env"
    26  	"github.com/hashicorp/nomad/client/driver/logging"
    27  	"github.com/hashicorp/nomad/client/stats"
    28  	shelpers "github.com/hashicorp/nomad/helper/stats"
    29  	"github.com/hashicorp/nomad/nomad/structs"
    30  
    31  	dstructs "github.com/hashicorp/nomad/client/driver/structs"
    32  	cstructs "github.com/hashicorp/nomad/client/structs"
    33  )
    34  
    35  const (
    36  	// pidScanInterval is the interval at which the executor scans the process
    37  	// tree for finding out the pids that the executor and it's child processes
    38  	// have forked
    39  	pidScanInterval = 5 * time.Second
    40  )
    41  
    42  var (
    43  	// The statistics the basic executor exposes
    44  	ExecutorBasicMeasuredMemStats = []string{"RSS", "Swap"}
    45  	ExecutorBasicMeasuredCpuStats = []string{"System Mode", "User Mode", "Percent"}
    46  )
    47  
    48  // Executor is the interface which allows a driver to launch and supervise
    49  // a process
    50  type Executor interface {
    51  	SetContext(ctx *ExecutorContext) error
    52  	LaunchCmd(command *ExecCommand) (*ProcessState, error)
    53  	LaunchSyslogServer() (*SyslogServerState, error)
    54  	Wait() (*ProcessState, error)
    55  	ShutDown() error
    56  	Exit() error
    57  	UpdateLogConfig(logConfig *structs.LogConfig) error
    58  	UpdateTask(task *structs.Task) error
    59  	Version() (*ExecutorVersion, error)
    60  	Stats() (*cstructs.TaskResourceUsage, error)
    61  	Signal(s os.Signal) error
    62  	Exec(deadline time.Time, cmd string, args []string) ([]byte, int, error)
    63  }
    64  
    65  // ExecutorContext holds context to configure the command user
    66  // wants to run and isolate it
    67  type ExecutorContext struct {
    68  	// TaskEnv holds information about the environment of a Task
    69  	TaskEnv *env.TaskEnv
    70  
    71  	// Task is the task whose executor is being launched
    72  	Task *structs.Task
    73  
    74  	// AllocID is the allocation id to which the task belongs
    75  	AllocID string
    76  
    77  	// TaskDir is the host path to the task's root
    78  	TaskDir string
    79  
    80  	// LogDir is the host path where logs should be written
    81  	LogDir string
    82  
    83  	// Driver is the name of the driver that invoked the executor
    84  	Driver string
    85  
    86  	// PortUpperBound is the upper bound of the ports that we can use to start
    87  	// the syslog server
    88  	PortUpperBound uint
    89  
    90  	// PortLowerBound is the lower bound of the ports that we can use to start
    91  	// the syslog server
    92  	PortLowerBound uint
    93  }
    94  
    95  // ExecCommand holds the user command, args, and other isolation related
    96  // settings.
    97  type ExecCommand struct {
    98  	// Cmd is the command that the user wants to run.
    99  	Cmd string
   100  
   101  	// Args is the args of the command that the user wants to run.
   102  	Args []string
   103  
   104  	// FSIsolation determines whether the command would be run in a chroot.
   105  	FSIsolation bool
   106  
   107  	// User is the user which the executor uses to run the command.
   108  	User string
   109  
   110  	// ResourceLimits determines whether resource limits are enforced by the
   111  	// executor.
   112  	ResourceLimits bool
   113  }
   114  
   115  // ProcessState holds information about the state of a user process.
   116  type ProcessState struct {
   117  	Pid             int
   118  	ExitCode        int
   119  	Signal          int
   120  	IsolationConfig *dstructs.IsolationConfig
   121  	Time            time.Time
   122  }
   123  
   124  // nomadPid holds a pid and it's cpu percentage calculator
   125  type nomadPid struct {
   126  	pid           int
   127  	cpuStatsTotal *stats.CpuStats
   128  	cpuStatsUser  *stats.CpuStats
   129  	cpuStatsSys   *stats.CpuStats
   130  }
   131  
   132  // SyslogServerState holds the address and islation information of a launched
   133  // syslog server
   134  type SyslogServerState struct {
   135  	IsolationConfig *dstructs.IsolationConfig
   136  	Addr            string
   137  }
   138  
   139  // ExecutorVersion is the version of the executor
   140  type ExecutorVersion struct {
   141  	Version string
   142  }
   143  
   144  func (v *ExecutorVersion) GoString() string {
   145  	return v.Version
   146  }
   147  
   148  // UniversalExecutor is an implementation of the Executor which launches and
   149  // supervises processes. In addition to process supervision it provides resource
   150  // and file system isolation
   151  type UniversalExecutor struct {
   152  	cmd     exec.Cmd
   153  	ctx     *ExecutorContext
   154  	command *ExecCommand
   155  
   156  	pids                map[int]*nomadPid
   157  	pidLock             sync.RWMutex
   158  	exitState           *ProcessState
   159  	processExited       chan interface{}
   160  	fsIsolationEnforced bool
   161  
   162  	lre         *logging.FileRotator
   163  	lro         *logging.FileRotator
   164  	rotatorLock sync.Mutex
   165  
   166  	shutdownCh chan struct{}
   167  
   168  	syslogServer *logging.SyslogServer
   169  	syslogChan   chan *logging.SyslogMessage
   170  
   171  	resConCtx resourceContainerContext
   172  
   173  	totalCpuStats  *stats.CpuStats
   174  	userCpuStats   *stats.CpuStats
   175  	systemCpuStats *stats.CpuStats
   176  	logger         *log.Logger
   177  }
   178  
   179  // NewExecutor returns an Executor
   180  func NewExecutor(logger *log.Logger) Executor {
   181  	if err := shelpers.Init(); err != nil {
   182  		logger.Printf("[ERR] executor: unable to initialize stats: %v", err)
   183  	}
   184  
   185  	exec := &UniversalExecutor{
   186  		logger:         logger,
   187  		processExited:  make(chan interface{}),
   188  		totalCpuStats:  stats.NewCpuStats(),
   189  		userCpuStats:   stats.NewCpuStats(),
   190  		systemCpuStats: stats.NewCpuStats(),
   191  		pids:           make(map[int]*nomadPid),
   192  	}
   193  
   194  	return exec
   195  }
   196  
   197  // Version returns the api version of the executor
   198  func (e *UniversalExecutor) Version() (*ExecutorVersion, error) {
   199  	return &ExecutorVersion{Version: "1.1.0"}, nil
   200  }
   201  
   202  // SetContext is used to set the executors context and should be the first call
   203  // after launching the executor.
   204  func (e *UniversalExecutor) SetContext(ctx *ExecutorContext) error {
   205  	e.ctx = ctx
   206  	return nil
   207  }
   208  
   209  // LaunchCmd launches the main process and returns its state. It also
   210  // configures an applies isolation on certain platforms.
   211  func (e *UniversalExecutor) LaunchCmd(command *ExecCommand) (*ProcessState, error) {
   212  	e.logger.Printf("[DEBUG] executor: launching command %v %v", command.Cmd, strings.Join(command.Args, " "))
   213  
   214  	// Ensure the context has been set first
   215  	if e.ctx == nil {
   216  		return nil, fmt.Errorf("SetContext must be called before launching a command")
   217  	}
   218  
   219  	e.command = command
   220  
   221  	// setting the user of the process
   222  	if command.User != "" {
   223  		e.logger.Printf("[DEBUG] executor: running command as %s", command.User)
   224  		if err := e.runAs(command.User); err != nil {
   225  			return nil, err
   226  		}
   227  	}
   228  
   229  	// set the task dir as the working directory for the command
   230  	e.cmd.Dir = e.ctx.TaskDir
   231  
   232  	// configuring the chroot, resource container, and start the plugin
   233  	// process in the chroot.
   234  	if err := e.configureIsolation(); err != nil {
   235  		return nil, err
   236  	}
   237  	// Apply ourselves into the resource container. The executor MUST be in
   238  	// the resource container before the user task is started, otherwise we
   239  	// are subject to a fork attack in which a process escapes isolation by
   240  	// immediately forking.
   241  	if err := e.applyLimits(os.Getpid()); err != nil {
   242  		return nil, err
   243  	}
   244  
   245  	// Setup the loggers
   246  	if err := e.configureLoggers(); err != nil {
   247  		return nil, err
   248  	}
   249  	e.cmd.Stdout = e.lro
   250  	e.cmd.Stderr = e.lre
   251  
   252  	// Look up the binary path and make it executable
   253  	absPath, err := e.lookupBin(e.ctx.TaskEnv.ReplaceEnv(command.Cmd))
   254  	if err != nil {
   255  		return nil, err
   256  	}
   257  
   258  	if err := e.makeExecutable(absPath); err != nil {
   259  		return nil, err
   260  	}
   261  
   262  	path := absPath
   263  
   264  	// Determine the path to run as it may have to be relative to the chroot.
   265  	if e.fsIsolationEnforced {
   266  		rel, err := filepath.Rel(e.ctx.TaskDir, path)
   267  		if err != nil {
   268  			return nil, fmt.Errorf("failed to determine relative path base=%q target=%q: %v", e.ctx.TaskDir, path, err)
   269  		}
   270  		path = rel
   271  	}
   272  
   273  	// Set the commands arguments
   274  	e.cmd.Path = path
   275  	e.cmd.Args = append([]string{e.cmd.Path}, e.ctx.TaskEnv.ParseAndReplace(command.Args)...)
   276  	e.cmd.Env = e.ctx.TaskEnv.List()
   277  
   278  	// Start the process
   279  	if err := e.cmd.Start(); err != nil {
   280  		return nil, fmt.Errorf("failed to start command path=%q --- args=%q: %v", path, e.cmd.Args, err)
   281  	}
   282  	go e.collectPids()
   283  	go e.wait()
   284  	ic := e.resConCtx.getIsolationConfig()
   285  	return &ProcessState{Pid: e.cmd.Process.Pid, ExitCode: -1, IsolationConfig: ic, Time: time.Now()}, nil
   286  }
   287  
   288  // Exec a command inside a container for exec and java drivers.
   289  func (e *UniversalExecutor) Exec(deadline time.Time, name string, args []string) ([]byte, int, error) {
   290  	ctx, cancel := context.WithDeadline(context.Background(), deadline)
   291  	defer cancel()
   292  	return ExecScript(ctx, e.cmd.Dir, e.ctx.TaskEnv, e.cmd.SysProcAttr, name, args)
   293  }
   294  
   295  // ExecScript executes cmd with args and returns the output, exit code, and
   296  // error. Output is truncated to client/driver/structs.CheckBufSize
   297  func ExecScript(ctx context.Context, dir string, env *env.TaskEnv, attrs *syscall.SysProcAttr,
   298  	name string, args []string) ([]byte, int, error) {
   299  	name = env.ReplaceEnv(name)
   300  	cmd := exec.CommandContext(ctx, name, env.ParseAndReplace(args)...)
   301  
   302  	// Copy runtime environment from the main command
   303  	cmd.SysProcAttr = attrs
   304  	cmd.Dir = dir
   305  	cmd.Env = env.List()
   306  
   307  	// Capture output
   308  	buf, _ := circbuf.NewBuffer(int64(dstructs.CheckBufSize))
   309  	cmd.Stdout = buf
   310  	cmd.Stderr = buf
   311  
   312  	if err := cmd.Run(); err != nil {
   313  		exitErr, ok := err.(*exec.ExitError)
   314  		if !ok {
   315  			// Non-exit error, return it and let the caller treat
   316  			// it as a critical failure
   317  			return nil, 0, err
   318  		}
   319  
   320  		// Some kind of error happened; default to critical
   321  		exitCode := 2
   322  		if status, ok := exitErr.Sys().(syscall.WaitStatus); ok {
   323  			exitCode = status.ExitStatus()
   324  		}
   325  
   326  		// Don't return the exitError as the caller only needs the
   327  		// output and code.
   328  		return buf.Bytes(), exitCode, nil
   329  	}
   330  	return buf.Bytes(), 0, nil
   331  }
   332  
   333  // configureLoggers sets up the standard out/error file rotators
   334  func (e *UniversalExecutor) configureLoggers() error {
   335  	e.rotatorLock.Lock()
   336  	defer e.rotatorLock.Unlock()
   337  
   338  	logFileSize := int64(e.ctx.Task.LogConfig.MaxFileSizeMB * 1024 * 1024)
   339  	if e.lro == nil {
   340  		lro, err := logging.NewFileRotator(e.ctx.LogDir, fmt.Sprintf("%v.stdout", e.ctx.Task.Name),
   341  			e.ctx.Task.LogConfig.MaxFiles, logFileSize, e.logger)
   342  		if err != nil {
   343  			return fmt.Errorf("error creating new stdout log file for %q: %v", e.ctx.Task.Name, err)
   344  		}
   345  		e.lro = lro
   346  	}
   347  
   348  	if e.lre == nil {
   349  		lre, err := logging.NewFileRotator(e.ctx.LogDir, fmt.Sprintf("%v.stderr", e.ctx.Task.Name),
   350  			e.ctx.Task.LogConfig.MaxFiles, logFileSize, e.logger)
   351  		if err != nil {
   352  			return fmt.Errorf("error creating new stderr log file for %q: %v", e.ctx.Task.Name, err)
   353  		}
   354  		e.lre = lre
   355  	}
   356  	return nil
   357  }
   358  
   359  // Wait waits until a process has exited and returns it's exitcode and errors
   360  func (e *UniversalExecutor) Wait() (*ProcessState, error) {
   361  	<-e.processExited
   362  	return e.exitState, nil
   363  }
   364  
   365  // COMPAT: prior to Nomad 0.3.2, UpdateTask didn't exist.
   366  // UpdateLogConfig updates the log configuration
   367  func (e *UniversalExecutor) UpdateLogConfig(logConfig *structs.LogConfig) error {
   368  	e.ctx.Task.LogConfig = logConfig
   369  	if e.lro == nil {
   370  		return fmt.Errorf("log rotator for stdout doesn't exist")
   371  	}
   372  	e.lro.MaxFiles = logConfig.MaxFiles
   373  	e.lro.FileSize = int64(logConfig.MaxFileSizeMB * 1024 * 1024)
   374  
   375  	if e.lre == nil {
   376  		return fmt.Errorf("log rotator for stderr doesn't exist")
   377  	}
   378  	e.lre.MaxFiles = logConfig.MaxFiles
   379  	e.lre.FileSize = int64(logConfig.MaxFileSizeMB * 1024 * 1024)
   380  	return nil
   381  }
   382  
   383  func (e *UniversalExecutor) UpdateTask(task *structs.Task) error {
   384  	e.ctx.Task = task
   385  
   386  	// Updating Log Config
   387  	e.rotatorLock.Lock()
   388  	if e.lro != nil && e.lre != nil {
   389  		fileSize := int64(task.LogConfig.MaxFileSizeMB * 1024 * 1024)
   390  		e.lro.MaxFiles = task.LogConfig.MaxFiles
   391  		e.lro.FileSize = fileSize
   392  		e.lre.MaxFiles = task.LogConfig.MaxFiles
   393  		e.lre.FileSize = fileSize
   394  	}
   395  	e.rotatorLock.Unlock()
   396  	return nil
   397  }
   398  
   399  func (e *UniversalExecutor) wait() {
   400  	defer close(e.processExited)
   401  	err := e.cmd.Wait()
   402  	ic := e.resConCtx.getIsolationConfig()
   403  	if err == nil {
   404  		e.exitState = &ProcessState{Pid: 0, ExitCode: 0, IsolationConfig: ic, Time: time.Now()}
   405  		return
   406  	}
   407  
   408  	e.lre.Close()
   409  	e.lro.Close()
   410  
   411  	exitCode := 1
   412  	var signal int
   413  	if exitErr, ok := err.(*exec.ExitError); ok {
   414  		if status, ok := exitErr.Sys().(syscall.WaitStatus); ok {
   415  			exitCode = status.ExitStatus()
   416  			if status.Signaled() {
   417  				// bash(1) uses the lower 7 bits of a uint8
   418  				// to indicate normal program failure (see
   419  				// <sysexits.h>). If a process terminates due
   420  				// to a signal, encode the signal number to
   421  				// indicate which signal caused the process
   422  				// to terminate.  Mirror this exit code
   423  				// encoding scheme.
   424  				const exitSignalBase = 128
   425  				signal = int(status.Signal())
   426  				exitCode = exitSignalBase + signal
   427  			}
   428  		}
   429  	} else {
   430  		e.logger.Printf("[DEBUG] executor: unexpected Wait() error type: %v", err)
   431  	}
   432  
   433  	e.exitState = &ProcessState{Pid: 0, ExitCode: exitCode, Signal: signal, IsolationConfig: ic, Time: time.Now()}
   434  }
   435  
   436  var (
   437  	// finishedErr is the error message received when trying to kill and already
   438  	// exited process.
   439  	finishedErr = "os: process already finished"
   440  )
   441  
   442  // ClientCleanup is the cleanup routine that a Nomad Client uses to remove the
   443  // reminants of a child UniversalExecutor.
   444  func ClientCleanup(ic *dstructs.IsolationConfig, pid int) error {
   445  	return clientCleanup(ic, pid)
   446  }
   447  
   448  // Exit cleans up the alloc directory, destroys resource container and kills the
   449  // user process
   450  func (e *UniversalExecutor) Exit() error {
   451  	var merr multierror.Error
   452  	if e.syslogServer != nil {
   453  		e.syslogServer.Shutdown()
   454  	}
   455  
   456  	if e.lre != nil {
   457  		e.lre.Close()
   458  	}
   459  
   460  	if e.lro != nil {
   461  		e.lro.Close()
   462  	}
   463  
   464  	// If the executor did not launch a process, return.
   465  	if e.command == nil {
   466  		return nil
   467  	}
   468  
   469  	// Prefer killing the process via the resource container.
   470  	if e.cmd.Process != nil && !e.command.ResourceLimits {
   471  		proc, err := os.FindProcess(e.cmd.Process.Pid)
   472  		if err != nil {
   473  			e.logger.Printf("[ERR] executor: can't find process with pid: %v, err: %v",
   474  				e.cmd.Process.Pid, err)
   475  		} else if err := proc.Kill(); err != nil && err.Error() != finishedErr {
   476  			merr.Errors = append(merr.Errors,
   477  				fmt.Errorf("can't kill process with pid: %v, err: %v", e.cmd.Process.Pid, err))
   478  		}
   479  	}
   480  
   481  	if e.command.ResourceLimits {
   482  		if err := e.resConCtx.executorCleanup(); err != nil {
   483  			merr.Errors = append(merr.Errors, err)
   484  		}
   485  	}
   486  	return merr.ErrorOrNil()
   487  }
   488  
   489  // Shutdown sends an interrupt signal to the user process
   490  func (e *UniversalExecutor) ShutDown() error {
   491  	if e.cmd.Process == nil {
   492  		return fmt.Errorf("executor.shutdown error: no process found")
   493  	}
   494  	proc, err := os.FindProcess(e.cmd.Process.Pid)
   495  	if err != nil {
   496  		return fmt.Errorf("executor.shutdown failed to find process: %v", err)
   497  	}
   498  	if runtime.GOOS == "windows" {
   499  		if err := proc.Kill(); err != nil && err.Error() != finishedErr {
   500  			return err
   501  		}
   502  		return nil
   503  	}
   504  	if err = proc.Signal(os.Interrupt); err != nil && err.Error() != finishedErr {
   505  		return fmt.Errorf("executor.shutdown error: %v", err)
   506  	}
   507  	return nil
   508  }
   509  
   510  // pidStats returns the resource usage stats per pid
   511  func (e *UniversalExecutor) pidStats() (map[string]*cstructs.ResourceUsage, error) {
   512  	stats := make(map[string]*cstructs.ResourceUsage)
   513  	e.pidLock.RLock()
   514  	pids := make(map[int]*nomadPid, len(e.pids))
   515  	for k, v := range e.pids {
   516  		pids[k] = v
   517  	}
   518  	e.pidLock.RUnlock()
   519  	for pid, np := range pids {
   520  		p, err := process.NewProcess(int32(pid))
   521  		if err != nil {
   522  			e.logger.Printf("[TRACE] executor: unable to create new process with pid: %v", pid)
   523  			continue
   524  		}
   525  		ms := &cstructs.MemoryStats{}
   526  		if memInfo, err := p.MemoryInfo(); err == nil {
   527  			ms.RSS = memInfo.RSS
   528  			ms.Swap = memInfo.Swap
   529  			ms.Measured = ExecutorBasicMeasuredMemStats
   530  		}
   531  
   532  		cs := &cstructs.CpuStats{}
   533  		if cpuStats, err := p.Times(); err == nil {
   534  			cs.SystemMode = np.cpuStatsSys.Percent(cpuStats.System * float64(time.Second))
   535  			cs.UserMode = np.cpuStatsUser.Percent(cpuStats.User * float64(time.Second))
   536  			cs.Measured = ExecutorBasicMeasuredCpuStats
   537  
   538  			// calculate cpu usage percent
   539  			cs.Percent = np.cpuStatsTotal.Percent(cpuStats.Total() * float64(time.Second))
   540  		}
   541  		stats[strconv.Itoa(pid)] = &cstructs.ResourceUsage{MemoryStats: ms, CpuStats: cs}
   542  	}
   543  
   544  	return stats, nil
   545  }
   546  
   547  // lookupBin looks for path to the binary to run by looking for the binary in
   548  // the following locations, in-order: task/local/, task/, based on host $PATH.
   549  // The return path is absolute.
   550  func (e *UniversalExecutor) lookupBin(bin string) (string, error) {
   551  	// Check in the local directory
   552  	local := filepath.Join(e.ctx.TaskDir, allocdir.TaskLocal, bin)
   553  	if _, err := os.Stat(local); err == nil {
   554  		return local, nil
   555  	}
   556  
   557  	// Check at the root of the task's directory
   558  	root := filepath.Join(e.ctx.TaskDir, bin)
   559  	if _, err := os.Stat(root); err == nil {
   560  		return root, nil
   561  	}
   562  
   563  	// Check the $PATH
   564  	if host, err := exec.LookPath(bin); err == nil {
   565  		return host, nil
   566  	}
   567  
   568  	return "", fmt.Errorf("binary %q could not be found", bin)
   569  }
   570  
   571  // makeExecutable makes the given file executable for root,group,others.
   572  func (e *UniversalExecutor) makeExecutable(binPath string) error {
   573  	if runtime.GOOS == "windows" {
   574  		return nil
   575  	}
   576  
   577  	fi, err := os.Stat(binPath)
   578  	if err != nil {
   579  		if os.IsNotExist(err) {
   580  			return fmt.Errorf("binary %q does not exist", binPath)
   581  		}
   582  		return fmt.Errorf("specified binary is invalid: %v", err)
   583  	}
   584  
   585  	// If it is not executable, make it so.
   586  	perm := fi.Mode().Perm()
   587  	req := os.FileMode(0555)
   588  	if perm&req != req {
   589  		if err := os.Chmod(binPath, perm|req); err != nil {
   590  			return fmt.Errorf("error making %q executable: %s", binPath, err)
   591  		}
   592  	}
   593  	return nil
   594  }
   595  
   596  // getFreePort returns a free port ready to be listened on between upper and
   597  // lower bounds
   598  func (e *UniversalExecutor) getListener(lowerBound uint, upperBound uint) (net.Listener, error) {
   599  	if runtime.GOOS == "windows" {
   600  		return e.listenerTCP(lowerBound, upperBound)
   601  	}
   602  
   603  	return e.listenerUnix()
   604  }
   605  
   606  // listenerTCP creates a TCP listener using an unused port between an upper and
   607  // lower bound
   608  func (e *UniversalExecutor) listenerTCP(lowerBound uint, upperBound uint) (net.Listener, error) {
   609  	for i := lowerBound; i <= upperBound; i++ {
   610  		addr, err := net.ResolveTCPAddr("tcp", fmt.Sprintf("localhost:%v", i))
   611  		if err != nil {
   612  			return nil, err
   613  		}
   614  		l, err := net.ListenTCP("tcp", addr)
   615  		if err != nil {
   616  			continue
   617  		}
   618  		return l, nil
   619  	}
   620  	return nil, fmt.Errorf("No free port found")
   621  }
   622  
   623  // listenerUnix creates a Unix domain socket
   624  func (e *UniversalExecutor) listenerUnix() (net.Listener, error) {
   625  	f, err := ioutil.TempFile("", "plugin")
   626  	if err != nil {
   627  		return nil, err
   628  	}
   629  	path := f.Name()
   630  
   631  	if err := f.Close(); err != nil {
   632  		return nil, err
   633  	}
   634  	if err := os.Remove(path); err != nil {
   635  		return nil, err
   636  	}
   637  
   638  	return net.Listen("unix", path)
   639  }
   640  
   641  // collectPids collects the pids of the child processes that the executor is
   642  // running every 5 seconds
   643  func (e *UniversalExecutor) collectPids() {
   644  	// Fire the timer right away when the executor starts from there on the pids
   645  	// are collected every scan interval
   646  	timer := time.NewTimer(0)
   647  	defer timer.Stop()
   648  	for {
   649  		select {
   650  		case <-timer.C:
   651  			pids, err := e.getAllPids()
   652  			if err != nil {
   653  				e.logger.Printf("[DEBUG] executor: error collecting pids: %v", err)
   654  			}
   655  			e.pidLock.Lock()
   656  
   657  			// Adding pids which are not being tracked
   658  			for pid, np := range pids {
   659  				if _, ok := e.pids[pid]; !ok {
   660  					e.pids[pid] = np
   661  				}
   662  			}
   663  			// Removing pids which are no longer present
   664  			for pid := range e.pids {
   665  				if _, ok := pids[pid]; !ok {
   666  					delete(e.pids, pid)
   667  				}
   668  			}
   669  			e.pidLock.Unlock()
   670  			timer.Reset(pidScanInterval)
   671  		case <-e.processExited:
   672  			return
   673  		}
   674  	}
   675  }
   676  
   677  // scanPids scans all the pids on the machine running the current executor and
   678  // returns the child processes of the executor.
   679  func (e *UniversalExecutor) scanPids(parentPid int, allPids []ps.Process) (map[int]*nomadPid, error) {
   680  	processFamily := make(map[int]struct{})
   681  	processFamily[parentPid] = struct{}{}
   682  
   683  	// A mapping of pids to their parent pids. It is used to build the process
   684  	// tree of the executing task
   685  	pidsRemaining := make(map[int]int, len(allPids))
   686  	for _, pid := range allPids {
   687  		pidsRemaining[pid.Pid()] = pid.PPid()
   688  	}
   689  
   690  	for {
   691  		// flag to indicate if we have found a match
   692  		foundNewPid := false
   693  
   694  		for pid, ppid := range pidsRemaining {
   695  			_, childPid := processFamily[ppid]
   696  
   697  			// checking if the pid is a child of any of the parents
   698  			if childPid {
   699  				processFamily[pid] = struct{}{}
   700  				delete(pidsRemaining, pid)
   701  				foundNewPid = true
   702  			}
   703  		}
   704  
   705  		// not scanning anymore if we couldn't find a single match
   706  		if !foundNewPid {
   707  			break
   708  		}
   709  	}
   710  
   711  	res := make(map[int]*nomadPid)
   712  	for pid := range processFamily {
   713  		np := nomadPid{
   714  			pid:           pid,
   715  			cpuStatsTotal: stats.NewCpuStats(),
   716  			cpuStatsUser:  stats.NewCpuStats(),
   717  			cpuStatsSys:   stats.NewCpuStats(),
   718  		}
   719  		res[pid] = &np
   720  	}
   721  	return res, nil
   722  }
   723  
   724  // aggregatedResourceUsage aggregates the resource usage of all the pids and
   725  // returns a TaskResourceUsage data point
   726  func (e *UniversalExecutor) aggregatedResourceUsage(pidStats map[string]*cstructs.ResourceUsage) *cstructs.TaskResourceUsage {
   727  	ts := time.Now().UTC().UnixNano()
   728  	var (
   729  		systemModeCPU, userModeCPU, percent float64
   730  		totalRSS, totalSwap                 uint64
   731  	)
   732  
   733  	for _, pidStat := range pidStats {
   734  		systemModeCPU += pidStat.CpuStats.SystemMode
   735  		userModeCPU += pidStat.CpuStats.UserMode
   736  		percent += pidStat.CpuStats.Percent
   737  
   738  		totalRSS += pidStat.MemoryStats.RSS
   739  		totalSwap += pidStat.MemoryStats.Swap
   740  	}
   741  
   742  	totalCPU := &cstructs.CpuStats{
   743  		SystemMode: systemModeCPU,
   744  		UserMode:   userModeCPU,
   745  		Percent:    percent,
   746  		Measured:   ExecutorBasicMeasuredCpuStats,
   747  		TotalTicks: e.systemCpuStats.TicksConsumed(percent),
   748  	}
   749  
   750  	totalMemory := &cstructs.MemoryStats{
   751  		RSS:      totalRSS,
   752  		Swap:     totalSwap,
   753  		Measured: ExecutorBasicMeasuredMemStats,
   754  	}
   755  
   756  	resourceUsage := cstructs.ResourceUsage{
   757  		MemoryStats: totalMemory,
   758  		CpuStats:    totalCPU,
   759  	}
   760  	return &cstructs.TaskResourceUsage{
   761  		ResourceUsage: &resourceUsage,
   762  		Timestamp:     ts,
   763  		Pids:          pidStats,
   764  	}
   765  }
   766  
   767  // Signal sends the passed signal to the task
   768  func (e *UniversalExecutor) Signal(s os.Signal) error {
   769  	if e.cmd.Process == nil {
   770  		return fmt.Errorf("Task not yet run")
   771  	}
   772  
   773  	e.logger.Printf("[DEBUG] executor: sending signal %s to PID %d", s, e.cmd.Process.Pid)
   774  	err := e.cmd.Process.Signal(s)
   775  	if err != nil {
   776  		e.logger.Printf("[ERR] executor: sending signal %v failed: %v", s, err)
   777  		return err
   778  	}
   779  
   780  	return nil
   781  }