github.com/blixtra/nomad@v0.7.2-0.20171221000451-da9a1d7bb050/client/driver/executor/executor_linux.go (about)

     1  package executor
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"os/user"
     7  	"path/filepath"
     8  	"strconv"
     9  	"strings"
    10  	"syscall"
    11  	"time"
    12  
    13  	"github.com/hashicorp/go-multierror"
    14  	"github.com/mitchellh/go-ps"
    15  	"github.com/opencontainers/runc/libcontainer/cgroups"
    16  	cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
    17  	cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
    18  
    19  	"github.com/hashicorp/nomad/client/stats"
    20  	cstructs "github.com/hashicorp/nomad/client/structs"
    21  	"github.com/hashicorp/nomad/helper/uuid"
    22  	"github.com/hashicorp/nomad/nomad/structs"
    23  )
    24  
    25  var (
    26  	// The statistics the executor exposes when using cgroups
    27  	ExecutorCgroupMeasuredMemStats = []string{"RSS", "Cache", "Swap", "Max Usage", "Kernel Usage", "Kernel Max Usage"}
    28  	ExecutorCgroupMeasuredCpuStats = []string{"System Mode", "User Mode", "Throttled Periods", "Throttled Time", "Percent"}
    29  )
    30  
    31  // configureIsolation configures chroot and creates cgroups
    32  func (e *UniversalExecutor) configureIsolation() error {
    33  	if e.command.FSIsolation {
    34  		if err := e.configureChroot(); err != nil {
    35  			return err
    36  		}
    37  	}
    38  
    39  	if e.command.ResourceLimits {
    40  		if err := e.configureCgroups(e.ctx.Task.Resources); err != nil {
    41  			return fmt.Errorf("error creating cgroups: %v", err)
    42  		}
    43  	}
    44  	return nil
    45  }
    46  
    47  // applyLimits puts a process in a pre-configured cgroup
    48  func (e *UniversalExecutor) applyLimits(pid int) error {
    49  	if !e.command.ResourceLimits {
    50  		return nil
    51  	}
    52  
    53  	// Entering the process in the cgroup
    54  	manager := getCgroupManager(e.resConCtx.groups, nil)
    55  	if err := manager.Apply(pid); err != nil {
    56  		e.logger.Printf("[ERR] executor: error applying pid to cgroup: %v", err)
    57  		return err
    58  	}
    59  	e.resConCtx.cgPaths = manager.GetPaths()
    60  	cgConfig := cgroupConfig.Config{Cgroups: e.resConCtx.groups}
    61  	if err := manager.Set(&cgConfig); err != nil {
    62  		e.logger.Printf("[ERR] executor: error setting cgroup config: %v", err)
    63  		if er := DestroyCgroup(e.resConCtx.groups, e.resConCtx.cgPaths, os.Getpid()); er != nil {
    64  			e.logger.Printf("[ERR] executor: error destroying cgroup: %v", er)
    65  		}
    66  		return err
    67  	}
    68  	return nil
    69  }
    70  
    71  // configureCgroups converts a Nomad Resources specification into the equivalent
    72  // cgroup configuration. It returns an error if the resources are invalid.
    73  func (e *UniversalExecutor) configureCgroups(resources *structs.Resources) error {
    74  	e.resConCtx.groups = &cgroupConfig.Cgroup{}
    75  	e.resConCtx.groups.Resources = &cgroupConfig.Resources{}
    76  	cgroupName := uuid.Generate()
    77  	e.resConCtx.groups.Path = filepath.Join("/nomad", cgroupName)
    78  
    79  	// TODO: verify this is needed for things like network access
    80  	e.resConCtx.groups.Resources.AllowAllDevices = true
    81  
    82  	if resources.MemoryMB > 0 {
    83  		// Total amount of memory allowed to consume
    84  		e.resConCtx.groups.Resources.Memory = int64(resources.MemoryMB * 1024 * 1024)
    85  		// Disable swap to avoid issues on the machine
    86  		e.resConCtx.groups.Resources.MemorySwap = int64(-1)
    87  	}
    88  
    89  	if resources.CPU < 2 {
    90  		return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU)
    91  	}
    92  
    93  	// Set the relative CPU shares for this cgroup.
    94  	e.resConCtx.groups.Resources.CpuShares = int64(resources.CPU)
    95  
    96  	if resources.IOPS != 0 {
    97  		// Validate it is in an acceptable range.
    98  		if resources.IOPS < 10 || resources.IOPS > 1000 {
    99  			return fmt.Errorf("resources.IOPS must be between 10 and 1000: %d", resources.IOPS)
   100  		}
   101  
   102  		e.resConCtx.groups.Resources.BlkioWeight = uint16(resources.IOPS)
   103  	}
   104  
   105  	return nil
   106  }
   107  
   108  // Stats reports the resource utilization of the cgroup. If there is no resource
   109  // isolation we aggregate the resource utilization of all the pids launched by
   110  // the executor.
   111  func (e *UniversalExecutor) Stats() (*cstructs.TaskResourceUsage, error) {
   112  	if !e.command.ResourceLimits {
   113  		pidStats, err := e.pidStats()
   114  		if err != nil {
   115  			return nil, err
   116  		}
   117  		return e.aggregatedResourceUsage(pidStats), nil
   118  	}
   119  	ts := time.Now()
   120  	manager := getCgroupManager(e.resConCtx.groups, e.resConCtx.cgPaths)
   121  	stats, err := manager.GetStats()
   122  	if err != nil {
   123  		return nil, err
   124  	}
   125  
   126  	// Memory Related Stats
   127  	swap := stats.MemoryStats.SwapUsage
   128  	maxUsage := stats.MemoryStats.Usage.MaxUsage
   129  	rss := stats.MemoryStats.Stats["rss"]
   130  	cache := stats.MemoryStats.Stats["cache"]
   131  	ms := &cstructs.MemoryStats{
   132  		RSS:            rss,
   133  		Cache:          cache,
   134  		Swap:           swap.Usage,
   135  		MaxUsage:       maxUsage,
   136  		KernelUsage:    stats.MemoryStats.KernelUsage.Usage,
   137  		KernelMaxUsage: stats.MemoryStats.KernelUsage.MaxUsage,
   138  		Measured:       ExecutorCgroupMeasuredMemStats,
   139  	}
   140  
   141  	// CPU Related Stats
   142  	totalProcessCPUUsage := float64(stats.CpuStats.CpuUsage.TotalUsage)
   143  	userModeTime := float64(stats.CpuStats.CpuUsage.UsageInUsermode)
   144  	kernelModeTime := float64(stats.CpuStats.CpuUsage.UsageInKernelmode)
   145  
   146  	totalPercent := e.totalCpuStats.Percent(totalProcessCPUUsage)
   147  	cs := &cstructs.CpuStats{
   148  		SystemMode:       e.systemCpuStats.Percent(kernelModeTime),
   149  		UserMode:         e.userCpuStats.Percent(userModeTime),
   150  		Percent:          totalPercent,
   151  		ThrottledPeriods: stats.CpuStats.ThrottlingData.ThrottledPeriods,
   152  		ThrottledTime:    stats.CpuStats.ThrottlingData.ThrottledTime,
   153  		TotalTicks:       e.systemCpuStats.TicksConsumed(totalPercent),
   154  		Measured:         ExecutorCgroupMeasuredCpuStats,
   155  	}
   156  	taskResUsage := cstructs.TaskResourceUsage{
   157  		ResourceUsage: &cstructs.ResourceUsage{
   158  			MemoryStats: ms,
   159  			CpuStats:    cs,
   160  		},
   161  		Timestamp: ts.UTC().UnixNano(),
   162  	}
   163  	if pidStats, err := e.pidStats(); err == nil {
   164  		taskResUsage.Pids = pidStats
   165  	}
   166  	return &taskResUsage, nil
   167  }
   168  
   169  // runAs takes a user id as a string and looks up the user, and sets the command
   170  // to execute as that user.
   171  func (e *UniversalExecutor) runAs(userid string) error {
   172  	u, err := user.Lookup(userid)
   173  	if err != nil {
   174  		return fmt.Errorf("Failed to identify user %v: %v", userid, err)
   175  	}
   176  
   177  	// Get the groups the user is a part of
   178  	gidStrings, err := u.GroupIds()
   179  	if err != nil {
   180  		return fmt.Errorf("Unable to lookup user's group membership: %v", err)
   181  	}
   182  
   183  	gids := make([]uint32, len(gidStrings))
   184  	for _, gidString := range gidStrings {
   185  		u, err := strconv.Atoi(gidString)
   186  		if err != nil {
   187  			return fmt.Errorf("Unable to convert user's group to int %s: %v", gidString, err)
   188  		}
   189  
   190  		gids = append(gids, uint32(u))
   191  	}
   192  
   193  	// Convert the uid and gid
   194  	uid, err := strconv.ParseUint(u.Uid, 10, 32)
   195  	if err != nil {
   196  		return fmt.Errorf("Unable to convert userid to uint32: %s", err)
   197  	}
   198  	gid, err := strconv.ParseUint(u.Gid, 10, 32)
   199  	if err != nil {
   200  		return fmt.Errorf("Unable to convert groupid to uint32: %s", err)
   201  	}
   202  
   203  	// Set the command to run as that user and group.
   204  	if e.cmd.SysProcAttr == nil {
   205  		e.cmd.SysProcAttr = &syscall.SysProcAttr{}
   206  	}
   207  	if e.cmd.SysProcAttr.Credential == nil {
   208  		e.cmd.SysProcAttr.Credential = &syscall.Credential{}
   209  	}
   210  	e.cmd.SysProcAttr.Credential.Uid = uint32(uid)
   211  	e.cmd.SysProcAttr.Credential.Gid = uint32(gid)
   212  	e.cmd.SysProcAttr.Credential.Groups = gids
   213  
   214  	e.logger.Printf("[DEBUG] executor: running as user:group %d:%d with group membership in %v", uid, gid, gids)
   215  
   216  	return nil
   217  }
   218  
   219  // configureChroot configures a chroot
   220  func (e *UniversalExecutor) configureChroot() error {
   221  	if e.cmd.SysProcAttr == nil {
   222  		e.cmd.SysProcAttr = &syscall.SysProcAttr{}
   223  	}
   224  	e.cmd.SysProcAttr.Chroot = e.ctx.TaskDir
   225  	e.cmd.Dir = "/"
   226  
   227  	e.fsIsolationEnforced = true
   228  	return nil
   229  }
   230  
   231  // getAllPids returns the pids of all the processes spun up by the executor. We
   232  // use the libcontainer apis to get the pids when the user is using cgroup
   233  // isolation and we scan the entire process table if the user is not using any
   234  // isolation
   235  func (e *UniversalExecutor) getAllPids() (map[int]*nomadPid, error) {
   236  	if e.command.ResourceLimits {
   237  		manager := getCgroupManager(e.resConCtx.groups, e.resConCtx.cgPaths)
   238  		pids, err := manager.GetAllPids()
   239  		if err != nil {
   240  			return nil, err
   241  		}
   242  		np := make(map[int]*nomadPid, len(pids))
   243  		for _, pid := range pids {
   244  			np[pid] = &nomadPid{
   245  				pid:           pid,
   246  				cpuStatsTotal: stats.NewCpuStats(),
   247  				cpuStatsSys:   stats.NewCpuStats(),
   248  				cpuStatsUser:  stats.NewCpuStats(),
   249  			}
   250  		}
   251  		return np, nil
   252  	}
   253  	allProcesses, err := ps.Processes()
   254  	if err != nil {
   255  		return nil, err
   256  	}
   257  	return e.scanPids(os.Getpid(), allProcesses)
   258  }
   259  
   260  // destroyCgroup kills all processes in the cgroup and removes the cgroup
   261  // configuration from the host. This function is idempotent.
   262  func DestroyCgroup(groups *cgroupConfig.Cgroup, cgPaths map[string]string, executorPid int) error {
   263  	mErrs := new(multierror.Error)
   264  	if groups == nil {
   265  		return fmt.Errorf("Can't destroy: cgroup configuration empty")
   266  	}
   267  
   268  	// Move the executor into the global cgroup so that the task specific
   269  	// cgroup can be destroyed.
   270  	nilGroup := &cgroupConfig.Cgroup{}
   271  	nilGroup.Path = "/"
   272  	nilGroup.Resources = groups.Resources
   273  	nilManager := getCgroupManager(nilGroup, nil)
   274  	err := nilManager.Apply(executorPid)
   275  	if err != nil && !strings.Contains(err.Error(), "no such process") {
   276  		return fmt.Errorf("failed to remove executor pid %d: %v", executorPid, err)
   277  	}
   278  
   279  	// Freeze the Cgroup so that it can not continue to fork/exec.
   280  	manager := getCgroupManager(groups, cgPaths)
   281  	err = manager.Freeze(cgroupConfig.Frozen)
   282  	if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
   283  		return fmt.Errorf("failed to freeze cgroup: %v", err)
   284  	}
   285  
   286  	var procs []*os.Process
   287  	pids, err := manager.GetAllPids()
   288  	if err != nil {
   289  		multierror.Append(mErrs, fmt.Errorf("error getting pids: %v", err))
   290  
   291  		// Unfreeze the cgroup.
   292  		err = manager.Freeze(cgroupConfig.Thawed)
   293  		if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
   294  			multierror.Append(mErrs, fmt.Errorf("failed to unfreeze cgroup: %v", err))
   295  		}
   296  		return mErrs.ErrorOrNil()
   297  	}
   298  
   299  	// Kill the processes in the cgroup
   300  	for _, pid := range pids {
   301  		proc, err := os.FindProcess(pid)
   302  		if err != nil {
   303  			multierror.Append(mErrs, fmt.Errorf("error finding process %v: %v", pid, err))
   304  			continue
   305  		}
   306  
   307  		procs = append(procs, proc)
   308  		if e := proc.Kill(); e != nil {
   309  			multierror.Append(mErrs, fmt.Errorf("error killing process %v: %v", pid, e))
   310  		}
   311  	}
   312  
   313  	// Unfreeze the cgroug so we can wait.
   314  	err = manager.Freeze(cgroupConfig.Thawed)
   315  	if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
   316  		multierror.Append(mErrs, fmt.Errorf("failed to unfreeze cgroup: %v", err))
   317  	}
   318  
   319  	// Wait on the killed processes to ensure they are cleaned up.
   320  	for _, proc := range procs {
   321  		// Don't capture the error because we expect this to fail for
   322  		// processes we didn't fork.
   323  		proc.Wait()
   324  	}
   325  
   326  	// Remove the cgroup.
   327  	if err := manager.Destroy(); err != nil {
   328  		multierror.Append(mErrs, fmt.Errorf("failed to delete the cgroup directories: %v", err))
   329  	}
   330  	return mErrs.ErrorOrNil()
   331  }
   332  
   333  // getCgroupManager returns the correct libcontainer cgroup manager.
   334  func getCgroupManager(groups *cgroupConfig.Cgroup, paths map[string]string) cgroups.Manager {
   335  	return &cgroupFs.Manager{Cgroups: groups, Paths: paths}
   336  }