github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/client/driver/executor/executor_linux.go (about)

     1  package executor
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"os/user"
     7  	"path/filepath"
     8  	"strconv"
     9  	"strings"
    10  	"syscall"
    11  	"time"
    12  
    13  	"github.com/hashicorp/go-multierror"
    14  	"github.com/mitchellh/go-ps"
    15  	"github.com/opencontainers/runc/libcontainer/cgroups"
    16  	cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
    17  	cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
    18  	"github.com/opencontainers/runc/libcontainer/system"
    19  
    20  	"github.com/hashicorp/nomad/client/allocdir"
    21  	"github.com/hashicorp/nomad/client/stats"
    22  	cstructs "github.com/hashicorp/nomad/client/structs"
    23  	"github.com/hashicorp/nomad/nomad/structs"
    24  )
    25  
    26  var (
    27  	// A mapping of directories on the host OS to attempt to embed inside each
    28  	// task's chroot.
    29  	chrootEnv = map[string]string{
    30  		"/bin":            "/bin",
    31  		"/etc":            "/etc",
    32  		"/lib":            "/lib",
    33  		"/lib32":          "/lib32",
    34  		"/lib64":          "/lib64",
    35  		"/run/resolvconf": "/run/resolvconf",
    36  		"/sbin":           "/sbin",
    37  		"/usr":            "/usr",
    38  	}
    39  
    40  	// clockTicks is the clocks per second of the machine
    41  	clockTicks = uint64(system.GetClockTicks())
    42  
    43  	// The statistics the executor exposes when using cgroups
    44  	ExecutorCgroupMeasuredMemStats = []string{"RSS", "Cache", "Swap", "Max Usage", "Kernel Usage", "Kernel Max Usage"}
    45  	ExecutorCgroupMeasuredCpuStats = []string{"System Mode", "User Mode", "Throttled Periods", "Throttled Time", "Percent"}
    46  )
    47  
    48  // configureIsolation configures chroot and creates cgroups
    49  func (e *UniversalExecutor) configureIsolation() error {
    50  	if e.command.FSIsolation {
    51  		if err := e.configureChroot(); err != nil {
    52  			return err
    53  		}
    54  	}
    55  
    56  	if e.command.ResourceLimits {
    57  		if err := e.configureCgroups(e.ctx.Task.Resources); err != nil {
    58  			return fmt.Errorf("error creating cgroups: %v", err)
    59  		}
    60  	}
    61  	return nil
    62  }
    63  
    64  // applyLimits puts a process in a pre-configured cgroup
    65  func (e *UniversalExecutor) applyLimits(pid int) error {
    66  	if !e.command.ResourceLimits {
    67  		return nil
    68  	}
    69  
    70  	// Entering the process in the cgroup
    71  	manager := getCgroupManager(e.resConCtx.groups, nil)
    72  	if err := manager.Apply(pid); err != nil {
    73  		e.logger.Printf("[ERR] executor: error applying pid to cgroup: %v", err)
    74  		if er := e.removeChrootMounts(); er != nil {
    75  			e.logger.Printf("[ERR] executor: error removing chroot: %v", er)
    76  		}
    77  		return err
    78  	}
    79  	e.resConCtx.cgPaths = manager.GetPaths()
    80  	cgConfig := cgroupConfig.Config{Cgroups: e.resConCtx.groups}
    81  	if err := manager.Set(&cgConfig); err != nil {
    82  		e.logger.Printf("[ERR] executor: error setting cgroup config: %v", err)
    83  		if er := DestroyCgroup(e.resConCtx.groups, e.resConCtx.cgPaths, os.Getpid()); er != nil {
    84  			e.logger.Printf("[ERR] executor: error destroying cgroup: %v", er)
    85  		}
    86  		if er := e.removeChrootMounts(); er != nil {
    87  			e.logger.Printf("[ERR] executor: error removing chroot: %v", er)
    88  		}
    89  		return err
    90  	}
    91  	return nil
    92  }
    93  
    94  // configureCgroups converts a Nomad Resources specification into the equivalent
    95  // cgroup configuration. It returns an error if the resources are invalid.
    96  func (e *UniversalExecutor) configureCgroups(resources *structs.Resources) error {
    97  	e.resConCtx.groups = &cgroupConfig.Cgroup{}
    98  	e.resConCtx.groups.Resources = &cgroupConfig.Resources{}
    99  	cgroupName := structs.GenerateUUID()
   100  	e.resConCtx.groups.Path = filepath.Join("/nomad", cgroupName)
   101  
   102  	// TODO: verify this is needed for things like network access
   103  	e.resConCtx.groups.Resources.AllowAllDevices = true
   104  
   105  	if resources.MemoryMB > 0 {
   106  		// Total amount of memory allowed to consume
   107  		e.resConCtx.groups.Resources.Memory = int64(resources.MemoryMB * 1024 * 1024)
   108  		// Disable swap to avoid issues on the machine
   109  		e.resConCtx.groups.Resources.MemorySwap = int64(-1)
   110  	}
   111  
   112  	if resources.CPU < 2 {
   113  		return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU)
   114  	}
   115  
   116  	// Set the relative CPU shares for this cgroup.
   117  	e.resConCtx.groups.Resources.CpuShares = int64(resources.CPU)
   118  
   119  	if resources.IOPS != 0 {
   120  		// Validate it is in an acceptable range.
   121  		if resources.IOPS < 10 || resources.IOPS > 1000 {
   122  			return fmt.Errorf("resources.IOPS must be between 10 and 1000: %d", resources.IOPS)
   123  		}
   124  
   125  		e.resConCtx.groups.Resources.BlkioWeight = uint16(resources.IOPS)
   126  	}
   127  
   128  	return nil
   129  }
   130  
   131  // Stats reports the resource utilization of the cgroup. If there is no resource
   132  // isolation we aggregate the resource utilization of all the pids launched by
   133  // the executor.
   134  func (e *UniversalExecutor) Stats() (*cstructs.TaskResourceUsage, error) {
   135  	if !e.command.ResourceLimits {
   136  		pidStats, err := e.pidStats()
   137  		if err != nil {
   138  			return nil, err
   139  		}
   140  		return e.aggregatedResourceUsage(pidStats), nil
   141  	}
   142  	ts := time.Now()
   143  	manager := getCgroupManager(e.resConCtx.groups, e.resConCtx.cgPaths)
   144  	stats, err := manager.GetStats()
   145  	if err != nil {
   146  		return nil, err
   147  	}
   148  
   149  	// Memory Related Stats
   150  	swap := stats.MemoryStats.SwapUsage
   151  	maxUsage := stats.MemoryStats.Usage.MaxUsage
   152  	rss := stats.MemoryStats.Stats["rss"]
   153  	cache := stats.MemoryStats.Stats["cache"]
   154  	ms := &cstructs.MemoryStats{
   155  		RSS:            rss,
   156  		Cache:          cache,
   157  		Swap:           swap.Usage,
   158  		MaxUsage:       maxUsage,
   159  		KernelUsage:    stats.MemoryStats.KernelUsage.Usage,
   160  		KernelMaxUsage: stats.MemoryStats.KernelUsage.MaxUsage,
   161  		Measured:       ExecutorCgroupMeasuredMemStats,
   162  	}
   163  
   164  	// CPU Related Stats
   165  	totalProcessCPUUsage := float64(stats.CpuStats.CpuUsage.TotalUsage)
   166  	userModeTime := float64(stats.CpuStats.CpuUsage.UsageInUsermode)
   167  	kernelModeTime := float64(stats.CpuStats.CpuUsage.UsageInKernelmode)
   168  
   169  	totalPercent := e.totalCpuStats.Percent(totalProcessCPUUsage)
   170  	cs := &cstructs.CpuStats{
   171  		SystemMode:       e.systemCpuStats.Percent(kernelModeTime),
   172  		UserMode:         e.userCpuStats.Percent(userModeTime),
   173  		Percent:          totalPercent,
   174  		ThrottledPeriods: stats.CpuStats.ThrottlingData.ThrottledPeriods,
   175  		ThrottledTime:    stats.CpuStats.ThrottlingData.ThrottledTime,
   176  		TotalTicks:       e.systemCpuStats.TicksConsumed(totalPercent),
   177  		Measured:         ExecutorCgroupMeasuredCpuStats,
   178  	}
   179  	taskResUsage := cstructs.TaskResourceUsage{
   180  		ResourceUsage: &cstructs.ResourceUsage{
   181  			MemoryStats: ms,
   182  			CpuStats:    cs,
   183  		},
   184  		Timestamp: ts.UTC().UnixNano(),
   185  	}
   186  	if pidStats, err := e.pidStats(); err == nil {
   187  		taskResUsage.Pids = pidStats
   188  	}
   189  	return &taskResUsage, nil
   190  }
   191  
   192  // runAs takes a user id as a string and looks up the user, and sets the command
   193  // to execute as that user.
   194  func (e *UniversalExecutor) runAs(userid string) error {
   195  	u, err := user.Lookup(userid)
   196  	if err != nil {
   197  		return fmt.Errorf("Failed to identify user %v: %v", userid, err)
   198  	}
   199  
   200  	// Convert the uid and gid
   201  	uid, err := strconv.ParseUint(u.Uid, 10, 32)
   202  	if err != nil {
   203  		return fmt.Errorf("Unable to convert userid to uint32: %s", err)
   204  	}
   205  	gid, err := strconv.ParseUint(u.Gid, 10, 32)
   206  	if err != nil {
   207  		return fmt.Errorf("Unable to convert groupid to uint32: %s", err)
   208  	}
   209  
   210  	// Set the command to run as that user and group.
   211  	if e.cmd.SysProcAttr == nil {
   212  		e.cmd.SysProcAttr = &syscall.SysProcAttr{}
   213  	}
   214  	if e.cmd.SysProcAttr.Credential == nil {
   215  		e.cmd.SysProcAttr.Credential = &syscall.Credential{}
   216  	}
   217  	e.cmd.SysProcAttr.Credential.Uid = uint32(uid)
   218  	e.cmd.SysProcAttr.Credential.Gid = uint32(gid)
   219  
   220  	return nil
   221  }
   222  
   223  // configureChroot configures a chroot
   224  func (e *UniversalExecutor) configureChroot() error {
   225  	allocDir := e.ctx.AllocDir
   226  	if err := allocDir.MountSharedDir(e.ctx.Task.Name); err != nil {
   227  		return err
   228  	}
   229  
   230  	chroot := chrootEnv
   231  	if len(e.ctx.ChrootEnv) > 0 {
   232  		chroot = e.ctx.ChrootEnv
   233  	}
   234  
   235  	if err := allocDir.Embed(e.ctx.Task.Name, chroot); err != nil {
   236  		return err
   237  	}
   238  
   239  	// Set the tasks AllocDir environment variable.
   240  	e.ctx.TaskEnv.
   241  		SetAllocDir(filepath.Join("/", allocdir.SharedAllocName)).
   242  		SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal)).
   243  		SetSecretsDir(filepath.Join("/", allocdir.TaskSecrets)).
   244  		Build()
   245  
   246  	if e.cmd.SysProcAttr == nil {
   247  		e.cmd.SysProcAttr = &syscall.SysProcAttr{}
   248  	}
   249  	e.cmd.SysProcAttr.Chroot = e.taskDir
   250  	e.cmd.Dir = "/"
   251  
   252  	if err := allocDir.MountSpecialDirs(e.taskDir); err != nil {
   253  		return err
   254  	}
   255  
   256  	e.fsIsolationEnforced = true
   257  	return nil
   258  }
   259  
   260  // cleanTaskDir is an idempotent operation to clean the task directory and
   261  // should be called when tearing down the task.
   262  func (e *UniversalExecutor) removeChrootMounts() error {
   263  	// Prevent a race between Wait/ForceStop
   264  	e.resConCtx.cgLock.Lock()
   265  	defer e.resConCtx.cgLock.Unlock()
   266  	return e.ctx.AllocDir.UnmountAll()
   267  }
   268  
   269  // getAllPids returns the pids of all the processes spun up by the executor. We
   270  // use the libcontainer apis to get the pids when the user is using cgroup
   271  // isolation and we scan the entire process table if the user is not using any
   272  // isolation
   273  func (e *UniversalExecutor) getAllPids() (map[int]*nomadPid, error) {
   274  	if e.command.ResourceLimits {
   275  		manager := getCgroupManager(e.resConCtx.groups, e.resConCtx.cgPaths)
   276  		pids, err := manager.GetAllPids()
   277  		if err != nil {
   278  			return nil, err
   279  		}
   280  		np := make(map[int]*nomadPid, len(pids))
   281  		for _, pid := range pids {
   282  			np[pid] = &nomadPid{
   283  				pid:           pid,
   284  				cpuStatsTotal: stats.NewCpuStats(),
   285  				cpuStatsSys:   stats.NewCpuStats(),
   286  				cpuStatsUser:  stats.NewCpuStats(),
   287  			}
   288  		}
   289  		return np, nil
   290  	}
   291  	allProcesses, err := ps.Processes()
   292  	if err != nil {
   293  		return nil, err
   294  	}
   295  	return e.scanPids(os.Getpid(), allProcesses)
   296  }
   297  
   298  // destroyCgroup kills all processes in the cgroup and removes the cgroup
   299  // configuration from the host. This function is idempotent.
   300  func DestroyCgroup(groups *cgroupConfig.Cgroup, cgPaths map[string]string, executorPid int) error {
   301  	mErrs := new(multierror.Error)
   302  	if groups == nil {
   303  		return fmt.Errorf("Can't destroy: cgroup configuration empty")
   304  	}
   305  
   306  	// Move the executor into the global cgroup so that the task specific
   307  	// cgroup can be destroyed.
   308  	nilGroup := &cgroupConfig.Cgroup{}
   309  	nilGroup.Path = "/"
   310  	nilGroup.Resources = groups.Resources
   311  	nilManager := getCgroupManager(nilGroup, nil)
   312  	err := nilManager.Apply(executorPid)
   313  	if err != nil && !strings.Contains(err.Error(), "no such process") {
   314  		return fmt.Errorf("failed to remove executor pid %d: %v", executorPid, err)
   315  	}
   316  
   317  	// Freeze the Cgroup so that it can not continue to fork/exec.
   318  	manager := getCgroupManager(groups, cgPaths)
   319  	err = manager.Freeze(cgroupConfig.Frozen)
   320  	if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
   321  		return fmt.Errorf("failed to freeze cgroup: %v", err)
   322  	}
   323  
   324  	var procs []*os.Process
   325  	pids, err := manager.GetAllPids()
   326  	if err != nil {
   327  		multierror.Append(mErrs, fmt.Errorf("error getting pids: %v", err))
   328  
   329  		// Unfreeze the cgroup.
   330  		err = manager.Freeze(cgroupConfig.Thawed)
   331  		if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
   332  			multierror.Append(mErrs, fmt.Errorf("failed to unfreeze cgroup: %v", err))
   333  		}
   334  		return mErrs.ErrorOrNil()
   335  	}
   336  
   337  	// Kill the processes in the cgroup
   338  	for _, pid := range pids {
   339  		proc, err := os.FindProcess(pid)
   340  		if err != nil {
   341  			multierror.Append(mErrs, fmt.Errorf("error finding process %v: %v", pid, err))
   342  			continue
   343  		}
   344  
   345  		procs = append(procs, proc)
   346  		if e := proc.Kill(); e != nil {
   347  			multierror.Append(mErrs, fmt.Errorf("error killing process %v: %v", pid, e))
   348  		}
   349  	}
   350  
   351  	// Unfreeze the cgroug so we can wait.
   352  	err = manager.Freeze(cgroupConfig.Thawed)
   353  	if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
   354  		multierror.Append(mErrs, fmt.Errorf("failed to unfreeze cgroup: %v", err))
   355  	}
   356  
   357  	// Wait on the killed processes to ensure they are cleaned up.
   358  	for _, proc := range procs {
   359  		// Don't capture the error because we expect this to fail for
   360  		// processes we didn't fork.
   361  		proc.Wait()
   362  	}
   363  
   364  	// Remove the cgroup.
   365  	if err := manager.Destroy(); err != nil {
   366  		multierror.Append(mErrs, fmt.Errorf("failed to delete the cgroup directories: %v", err))
   367  	}
   368  	return mErrs.ErrorOrNil()
   369  }
   370  
   371  // getCgroupManager returns the correct libcontainer cgroup manager.
   372  func getCgroupManager(groups *cgroupConfig.Cgroup, paths map[string]string) cgroups.Manager {
   373  	return &cgroupFs.Manager{Cgroups: groups, Paths: paths}
   374  }