github.com/anuvu/nomad@v0.8.7-atom1/client/driver/executor/executor_linux.go (about)

     1  package executor
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"os/user"
     7  	"path/filepath"
     8  	"strconv"
     9  	"strings"
    10  	"syscall"
    11  	"time"
    12  
    13  	"github.com/hashicorp/go-multierror"
    14  	"github.com/mitchellh/go-ps"
    15  	cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
    16  	cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
    17  
    18  	"github.com/hashicorp/nomad/client/stats"
    19  	cstructs "github.com/hashicorp/nomad/client/structs"
    20  	"github.com/hashicorp/nomad/helper/uuid"
    21  	"github.com/hashicorp/nomad/nomad/structs"
    22  )
    23  
    24  var (
    25  	// The statistics the executor exposes when using cgroups
    26  	ExecutorCgroupMeasuredMemStats = []string{"RSS", "Cache", "Swap", "Max Usage", "Kernel Usage", "Kernel Max Usage"}
    27  	ExecutorCgroupMeasuredCpuStats = []string{"System Mode", "User Mode", "Throttled Periods", "Throttled Time", "Percent"}
    28  )
    29  
    30  // configureIsolation configures chroot and creates cgroups
    31  func (e *UniversalExecutor) configureIsolation() error {
    32  	if e.command.FSIsolation {
    33  		if err := e.configureChroot(); err != nil {
    34  			return err
    35  		}
    36  	}
    37  
    38  	if e.command.ResourceLimits || e.command.BasicProcessCgroup {
    39  		if err := e.configureCgroups(e.ctx.Task.Resources); err != nil {
    40  			return fmt.Errorf("error creating cgroups: %v", err)
    41  		}
    42  	}
    43  	return nil
    44  }
    45  
    46  // applyLimits puts a process in a pre-configured cgroup
    47  func (e *UniversalExecutor) applyLimits(pid int) error {
    48  	if !(e.command.ResourceLimits || e.command.BasicProcessCgroup) {
    49  		return nil
    50  	}
    51  
    52  	// Entering the process in the cgroup
    53  	manager := getCgroupManager(e.resConCtx.groups, nil)
    54  	if err := manager.Apply(pid); err != nil {
    55  		e.logger.Printf("[ERR] executor: error applying pid to cgroup: %v", err)
    56  		return err
    57  	}
    58  
    59  	e.resConCtx.cgPaths = manager.GetPaths()
    60  
    61  	// Don't enter all the cgroups since we will inherit resources limits. Only
    62  	// use devices (required by libcontainer) and freezer. Freezer allows us to
    63  	// capture all pids and stop any fork/execs from happening while we are
    64  	// cleaning up.
    65  	if !e.command.ResourceLimits {
    66  		// Move the executor into the global cgroup so that the task specific
    67  		// cgroup can be destroyed.
    68  		nilGroup := &cgroupConfig.Cgroup{}
    69  		nilGroup.Path = "/"
    70  		nilGroup.Resources = e.resConCtx.groups.Resources
    71  		nilManager := getCgroupManager(nilGroup, nil)
    72  		err := nilManager.Apply(pid)
    73  		if err != nil {
    74  			return fmt.Errorf("failed to remove executor pid %d: %v", pid, err)
    75  		}
    76  
    77  		// Grab the freezer and devices cgroup paths. We do this from the old
    78  		// manager after the executor pid has been applied since there is no
    79  		// other way to determine what the proper cgroup paths would be.
    80  		freezer := &cgroupFs.FreezerGroup{}
    81  		devices := &cgroupFs.DevicesGroup{}
    82  		freezerName, devicesName := freezer.Name(), devices.Name()
    83  		newPath := map[string]string{
    84  			freezerName: e.resConCtx.cgPaths[freezerName],
    85  			devicesName: e.resConCtx.cgPaths[devicesName],
    86  		}
    87  
    88  		// Clear the cgroups paths so that everything is properly cleaned except
    89  		// the groups we want our process to stay in. This will delete the
    90  		// directories from disk.
    91  		manager.Cgroups.Paths = nil
    92  		delete(manager.Paths, freezerName)
    93  		delete(manager.Paths, devicesName)
    94  		if err := manager.Destroy(); err != nil {
    95  			e.logger.Printf("[ERR] executor: failed to destroy original: %v", err)
    96  			return err
    97  		}
    98  
    99  		// Update our context such that the new cgroup manager only is tracking
   100  		// the paths we care about now.
   101  		e.resConCtx.cgPaths = newPath
   102  		e.resConCtx.groups.Paths = newPath
   103  
   104  		// Apply just the freezer and devices now
   105  		manager = getCgroupManager(e.resConCtx.groups, e.resConCtx.cgPaths)
   106  		if err := manager.Apply(pid); err != nil {
   107  			e.logger.Printf("[ERR] executor: error applying pid to cgroup subset %v: %v", e.resConCtx.cgPaths, err)
   108  			return err
   109  		}
   110  	}
   111  
   112  	cgConfig := cgroupConfig.Config{Cgroups: e.resConCtx.groups}
   113  	if err := manager.Set(&cgConfig); err != nil {
   114  		e.logger.Printf("[ERR] executor: error setting cgroup config: %v", err)
   115  		if er := DestroyCgroup(e.resConCtx.groups, e.resConCtx.cgPaths, os.Getpid()); er != nil {
   116  			e.logger.Printf("[ERR] executor: error destroying cgroup: %v", er)
   117  		}
   118  		return err
   119  	}
   120  	return nil
   121  }
   122  
   123  // configureCgroups converts a Nomad Resources specification into the equivalent
   124  // cgroup configuration. It returns an error if the resources are invalid.
   125  func (e *UniversalExecutor) configureCgroups(resources *structs.Resources) error {
   126  	e.resConCtx.groups = &cgroupConfig.Cgroup{}
   127  	e.resConCtx.groups.Resources = &cgroupConfig.Resources{}
   128  	cgroupName := uuid.Generate()
   129  	e.resConCtx.groups.Path = filepath.Join("/nomad", cgroupName)
   130  
   131  	// Allow access to /dev/
   132  	e.resConCtx.groups.Resources.AllowAllDevices = true
   133  
   134  	// Use a cgroup but don't apply limits
   135  	if !e.command.ResourceLimits {
   136  		return nil
   137  	}
   138  
   139  	if resources.MemoryMB > 0 {
   140  		// Total amount of memory allowed to consume
   141  		e.resConCtx.groups.Resources.Memory = int64(resources.MemoryMB * 1024 * 1024)
   142  		// Disable swap to avoid issues on the machine
   143  		var memSwappiness int64 = 0
   144  		e.resConCtx.groups.Resources.MemorySwappiness = &memSwappiness
   145  	}
   146  
   147  	if resources.CPU < 2 {
   148  		return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU)
   149  	}
   150  
   151  	// Set the relative CPU shares for this cgroup.
   152  	e.resConCtx.groups.Resources.CpuShares = int64(resources.CPU)
   153  
   154  	if resources.IOPS != 0 {
   155  		// Validate it is in an acceptable range.
   156  		if resources.IOPS < 10 || resources.IOPS > 1000 {
   157  			return fmt.Errorf("resources.IOPS must be between 10 and 1000: %d", resources.IOPS)
   158  		}
   159  
   160  		e.resConCtx.groups.Resources.BlkioWeight = uint16(resources.IOPS)
   161  	}
   162  
   163  	return nil
   164  }
   165  
   166  // Stats reports the resource utilization of the cgroup. If there is no resource
   167  // isolation we aggregate the resource utilization of all the pids launched by
   168  // the executor.
   169  func (e *UniversalExecutor) Stats() (*cstructs.TaskResourceUsage, error) {
   170  	// If we don't use full resource limits fallback to normal collection. It is
   171  	// not enough to be in the Cgroup since you must be in the memory, cpu, and
   172  	// cpuacct cgroup to gather the correct statistics.
   173  	if !e.command.ResourceLimits {
   174  		pidStats, err := e.pidStats()
   175  		if err != nil {
   176  			return nil, err
   177  		}
   178  		return e.aggregatedResourceUsage(pidStats), nil
   179  	}
   180  	ts := time.Now()
   181  	manager := getCgroupManager(e.resConCtx.groups, e.resConCtx.cgPaths)
   182  	stats, err := manager.GetStats()
   183  	if err != nil {
   184  		return nil, err
   185  	}
   186  
   187  	// Memory Related Stats
   188  	swap := stats.MemoryStats.SwapUsage
   189  	maxUsage := stats.MemoryStats.Usage.MaxUsage
   190  	rss := stats.MemoryStats.Stats["rss"]
   191  	cache := stats.MemoryStats.Stats["cache"]
   192  	ms := &cstructs.MemoryStats{
   193  		RSS:            rss,
   194  		Cache:          cache,
   195  		Swap:           swap.Usage,
   196  		MaxUsage:       maxUsage,
   197  		KernelUsage:    stats.MemoryStats.KernelUsage.Usage,
   198  		KernelMaxUsage: stats.MemoryStats.KernelUsage.MaxUsage,
   199  		Measured:       ExecutorCgroupMeasuredMemStats,
   200  	}
   201  
   202  	// CPU Related Stats
   203  	totalProcessCPUUsage := float64(stats.CpuStats.CpuUsage.TotalUsage)
   204  	userModeTime := float64(stats.CpuStats.CpuUsage.UsageInUsermode)
   205  	kernelModeTime := float64(stats.CpuStats.CpuUsage.UsageInKernelmode)
   206  
   207  	totalPercent := e.totalCpuStats.Percent(totalProcessCPUUsage)
   208  	cs := &cstructs.CpuStats{
   209  		SystemMode:       e.systemCpuStats.Percent(kernelModeTime),
   210  		UserMode:         e.userCpuStats.Percent(userModeTime),
   211  		Percent:          totalPercent,
   212  		ThrottledPeriods: stats.CpuStats.ThrottlingData.ThrottledPeriods,
   213  		ThrottledTime:    stats.CpuStats.ThrottlingData.ThrottledTime,
   214  		TotalTicks:       e.systemCpuStats.TicksConsumed(totalPercent),
   215  		Measured:         ExecutorCgroupMeasuredCpuStats,
   216  	}
   217  	taskResUsage := cstructs.TaskResourceUsage{
   218  		ResourceUsage: &cstructs.ResourceUsage{
   219  			MemoryStats: ms,
   220  			CpuStats:    cs,
   221  		},
   222  		Timestamp: ts.UTC().UnixNano(),
   223  	}
   224  	if pidStats, err := e.pidStats(); err == nil {
   225  		taskResUsage.Pids = pidStats
   226  	}
   227  	return &taskResUsage, nil
   228  }
   229  
   230  // runAs takes a user id as a string and looks up the user, and sets the command
   231  // to execute as that user.
   232  func (e *UniversalExecutor) runAs(userid string) error {
   233  	u, err := user.Lookup(userid)
   234  	if err != nil {
   235  		return fmt.Errorf("Failed to identify user %v: %v", userid, err)
   236  	}
   237  
   238  	// Get the groups the user is a part of
   239  	gidStrings, err := u.GroupIds()
   240  	if err != nil {
   241  		return fmt.Errorf("Unable to lookup user's group membership: %v", err)
   242  	}
   243  
   244  	gids := make([]uint32, len(gidStrings))
   245  	for _, gidString := range gidStrings {
   246  		u, err := strconv.Atoi(gidString)
   247  		if err != nil {
   248  			return fmt.Errorf("Unable to convert user's group to int %s: %v", gidString, err)
   249  		}
   250  
   251  		gids = append(gids, uint32(u))
   252  	}
   253  
   254  	// Convert the uid and gid
   255  	uid, err := strconv.ParseUint(u.Uid, 10, 32)
   256  	if err != nil {
   257  		return fmt.Errorf("Unable to convert userid to uint32: %s", err)
   258  	}
   259  	gid, err := strconv.ParseUint(u.Gid, 10, 32)
   260  	if err != nil {
   261  		return fmt.Errorf("Unable to convert groupid to uint32: %s", err)
   262  	}
   263  
   264  	// Set the command to run as that user and group.
   265  	if e.cmd.SysProcAttr == nil {
   266  		e.cmd.SysProcAttr = &syscall.SysProcAttr{}
   267  	}
   268  	if e.cmd.SysProcAttr.Credential == nil {
   269  		e.cmd.SysProcAttr.Credential = &syscall.Credential{}
   270  	}
   271  	e.cmd.SysProcAttr.Credential.Uid = uint32(uid)
   272  	e.cmd.SysProcAttr.Credential.Gid = uint32(gid)
   273  	e.cmd.SysProcAttr.Credential.Groups = gids
   274  
   275  	e.logger.Printf("[DEBUG] executor: running as user:group %d:%d with group membership in %v", uid, gid, gids)
   276  
   277  	return nil
   278  }
   279  
   280  // configureChroot configures a chroot
   281  func (e *UniversalExecutor) configureChroot() error {
   282  	if e.cmd.SysProcAttr == nil {
   283  		e.cmd.SysProcAttr = &syscall.SysProcAttr{}
   284  	}
   285  	e.cmd.SysProcAttr.Chroot = e.ctx.TaskDir
   286  	e.cmd.Dir = "/"
   287  
   288  	e.fsIsolationEnforced = true
   289  	return nil
   290  }
   291  
   292  // getAllPids returns the pids of all the processes spun up by the executor. We
   293  // use the libcontainer apis to get the pids when the user is using cgroup
   294  // isolation and we scan the entire process table if the user is not using any
   295  // isolation
   296  func (e *UniversalExecutor) getAllPids() (map[int]*nomadPid, error) {
   297  	if e.command.ResourceLimits || e.command.BasicProcessCgroup {
   298  		manager := getCgroupManager(e.resConCtx.groups, e.resConCtx.cgPaths)
   299  		pids, err := manager.GetAllPids()
   300  		if err != nil {
   301  			return nil, err
   302  		}
   303  		np := make(map[int]*nomadPid, len(pids))
   304  		for _, pid := range pids {
   305  			np[pid] = &nomadPid{
   306  				pid:           pid,
   307  				cpuStatsTotal: stats.NewCpuStats(),
   308  				cpuStatsSys:   stats.NewCpuStats(),
   309  				cpuStatsUser:  stats.NewCpuStats(),
   310  			}
   311  		}
   312  		return np, nil
   313  	}
   314  	allProcesses, err := ps.Processes()
   315  	if err != nil {
   316  		return nil, err
   317  	}
   318  	return e.scanPids(os.Getpid(), allProcesses)
   319  }
   320  
   321  // destroyCgroup kills all processes in the cgroup and removes the cgroup
   322  // configuration from the host. This function is idempotent.
   323  func DestroyCgroup(groups *cgroupConfig.Cgroup, cgPaths map[string]string, executorPid int) error {
   324  	mErrs := new(multierror.Error)
   325  	if groups == nil {
   326  		return fmt.Errorf("Can't destroy: cgroup configuration empty")
   327  	}
   328  
   329  	// Move the executor into the global cgroup so that the task specific
   330  	// cgroup can be destroyed.
   331  	nilGroup := &cgroupConfig.Cgroup{}
   332  	nilGroup.Path = "/"
   333  	nilGroup.Resources = groups.Resources
   334  	nilManager := getCgroupManager(nilGroup, nil)
   335  	err := nilManager.Apply(executorPid)
   336  	if err != nil && !strings.Contains(err.Error(), "no such process") {
   337  		return fmt.Errorf("failed to remove executor pid %d: %v", executorPid, err)
   338  	}
   339  
   340  	// Freeze the Cgroup so that it can not continue to fork/exec.
   341  	manager := getCgroupManager(groups, cgPaths)
   342  	err = manager.Freeze(cgroupConfig.Frozen)
   343  	if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
   344  		return fmt.Errorf("failed to freeze cgroup: %v", err)
   345  	}
   346  
   347  	var procs []*os.Process
   348  	pids, err := manager.GetAllPids()
   349  	if err != nil {
   350  		multierror.Append(mErrs, fmt.Errorf("error getting pids: %v", err))
   351  
   352  		// Unfreeze the cgroup.
   353  		err = manager.Freeze(cgroupConfig.Thawed)
   354  		if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
   355  			multierror.Append(mErrs, fmt.Errorf("failed to unfreeze cgroup: %v", err))
   356  		}
   357  		return mErrs.ErrorOrNil()
   358  	}
   359  
   360  	// Kill the processes in the cgroup
   361  	for _, pid := range pids {
   362  		proc, err := os.FindProcess(pid)
   363  		if err != nil {
   364  			multierror.Append(mErrs, fmt.Errorf("error finding process %v: %v", pid, err))
   365  			continue
   366  		}
   367  
   368  		procs = append(procs, proc)
   369  		if e := proc.Kill(); e != nil {
   370  			multierror.Append(mErrs, fmt.Errorf("error killing process %v: %v", pid, e))
   371  		}
   372  	}
   373  
   374  	// Unfreeze the cgroug so we can wait.
   375  	err = manager.Freeze(cgroupConfig.Thawed)
   376  	if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
   377  		multierror.Append(mErrs, fmt.Errorf("failed to unfreeze cgroup: %v", err))
   378  	}
   379  
   380  	// Wait on the killed processes to ensure they are cleaned up.
   381  	for _, proc := range procs {
   382  		// Don't capture the error because we expect this to fail for
   383  		// processes we didn't fork.
   384  		proc.Wait()
   385  	}
   386  
   387  	// Clear the cgroups paths so that everything is properly cleaned
   388  	manager.Cgroups.Paths = nil
   389  
   390  	// Remove the cgroup.
   391  	if err := manager.Destroy(); err != nil {
   392  		multierror.Append(mErrs, fmt.Errorf("failed to delete the cgroup directories: %v", err))
   393  	}
   394  	return mErrs.ErrorOrNil()
   395  }
   396  
   397  // getCgroupManager returns the correct libcontainer cgroup manager.
   398  func getCgroupManager(groups *cgroupConfig.Cgroup, paths map[string]string) *cgroupFs.Manager {
   399  	return &cgroupFs.Manager{Cgroups: groups, Paths: paths}
   400  }