github.com/emate/nomad@v0.8.2-wo-binpacking/client/driver/executor/executor_linux.go (about)

     1  package executor
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"os/user"
     7  	"path/filepath"
     8  	"strconv"
     9  	"strings"
    10  	"syscall"
    11  	"time"
    12  
    13  	"github.com/hashicorp/go-multierror"
    14  	"github.com/mitchellh/go-ps"
    15  	"github.com/opencontainers/runc/libcontainer/cgroups"
    16  	cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
    17  	cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
    18  
    19  	"github.com/hashicorp/nomad/client/stats"
    20  	cstructs "github.com/hashicorp/nomad/client/structs"
    21  	"github.com/hashicorp/nomad/helper/uuid"
    22  	"github.com/hashicorp/nomad/nomad/structs"
    23  )
    24  
    25  var (
    26  	// The statistics the executor exposes when using cgroups
    27  	ExecutorCgroupMeasuredMemStats = []string{"RSS", "Cache", "Swap", "Max Usage", "Kernel Usage", "Kernel Max Usage"}
    28  	ExecutorCgroupMeasuredCpuStats = []string{"System Mode", "User Mode", "Throttled Periods", "Throttled Time", "Percent"}
    29  )
    30  
    31  // configureIsolation configures chroot and creates cgroups
    32  func (e *UniversalExecutor) configureIsolation() error {
    33  	if e.command.FSIsolation {
    34  		if err := e.configureChroot(); err != nil {
    35  			return err
    36  		}
    37  	}
    38  
    39  	if e.command.ResourceLimits {
    40  		if err := e.configureCgroups(e.ctx.Task.Resources); err != nil {
    41  			return fmt.Errorf("error creating cgroups: %v", err)
    42  		}
    43  	}
    44  	return nil
    45  }
    46  
    47  // applyLimits puts a process in a pre-configured cgroup
    48  func (e *UniversalExecutor) applyLimits(pid int) error {
    49  	if !e.command.ResourceLimits {
    50  		return nil
    51  	}
    52  
    53  	// Entering the process in the cgroup
    54  	manager := getCgroupManager(e.resConCtx.groups, nil)
    55  	if err := manager.Apply(pid); err != nil {
    56  		e.logger.Printf("[ERR] executor: error applying pid to cgroup: %v", err)
    57  		return err
    58  	}
    59  	e.resConCtx.cgPaths = manager.GetPaths()
    60  	cgConfig := cgroupConfig.Config{Cgroups: e.resConCtx.groups}
    61  	if err := manager.Set(&cgConfig); err != nil {
    62  		e.logger.Printf("[ERR] executor: error setting cgroup config: %v", err)
    63  		if er := DestroyCgroup(e.resConCtx.groups, e.resConCtx.cgPaths, os.Getpid()); er != nil {
    64  			e.logger.Printf("[ERR] executor: error destroying cgroup: %v", er)
    65  		}
    66  		return err
    67  	}
    68  	return nil
    69  }
    70  
    71  // configureCgroups converts a Nomad Resources specification into the equivalent
    72  // cgroup configuration. It returns an error if the resources are invalid.
    73  func (e *UniversalExecutor) configureCgroups(resources *structs.Resources) error {
    74  	e.resConCtx.groups = &cgroupConfig.Cgroup{}
    75  	e.resConCtx.groups.Resources = &cgroupConfig.Resources{}
    76  	cgroupName := uuid.Generate()
    77  	e.resConCtx.groups.Path = filepath.Join("/nomad", cgroupName)
    78  
    79  	// TODO: verify this is needed for things like network access
    80  	e.resConCtx.groups.Resources.AllowAllDevices = true
    81  
    82  	if resources.MemoryMB > 0 {
    83  		// Total amount of memory allowed to consume
    84  		e.resConCtx.groups.Resources.Memory = int64(resources.MemoryMB * 1024 * 1024)
    85  		// Disable swap to avoid issues on the machine
    86  		var memSwappiness int64 = 0
    87  		e.resConCtx.groups.Resources.MemorySwappiness = &memSwappiness
    88  	}
    89  
    90  	if resources.CPU < 2 {
    91  		return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU)
    92  	}
    93  
    94  	// Set the relative CPU shares for this cgroup.
    95  	e.resConCtx.groups.Resources.CpuShares = int64(resources.CPU)
    96  
    97  	if resources.IOPS != 0 {
    98  		// Validate it is in an acceptable range.
    99  		if resources.IOPS < 10 || resources.IOPS > 1000 {
   100  			return fmt.Errorf("resources.IOPS must be between 10 and 1000: %d", resources.IOPS)
   101  		}
   102  
   103  		e.resConCtx.groups.Resources.BlkioWeight = uint16(resources.IOPS)
   104  	}
   105  
   106  	return nil
   107  }
   108  
   109  // Stats reports the resource utilization of the cgroup. If there is no resource
   110  // isolation we aggregate the resource utilization of all the pids launched by
   111  // the executor.
   112  func (e *UniversalExecutor) Stats() (*cstructs.TaskResourceUsage, error) {
   113  	if !e.command.ResourceLimits {
   114  		pidStats, err := e.pidStats()
   115  		if err != nil {
   116  			return nil, err
   117  		}
   118  		return e.aggregatedResourceUsage(pidStats), nil
   119  	}
   120  	ts := time.Now()
   121  	manager := getCgroupManager(e.resConCtx.groups, e.resConCtx.cgPaths)
   122  	stats, err := manager.GetStats()
   123  	if err != nil {
   124  		return nil, err
   125  	}
   126  
   127  	// Memory Related Stats
   128  	swap := stats.MemoryStats.SwapUsage
   129  	maxUsage := stats.MemoryStats.Usage.MaxUsage
   130  	rss := stats.MemoryStats.Stats["rss"]
   131  	cache := stats.MemoryStats.Stats["cache"]
   132  	ms := &cstructs.MemoryStats{
   133  		RSS:            rss,
   134  		Cache:          cache,
   135  		Swap:           swap.Usage,
   136  		MaxUsage:       maxUsage,
   137  		KernelUsage:    stats.MemoryStats.KernelUsage.Usage,
   138  		KernelMaxUsage: stats.MemoryStats.KernelUsage.MaxUsage,
   139  		Measured:       ExecutorCgroupMeasuredMemStats,
   140  	}
   141  
   142  	// CPU Related Stats
   143  	totalProcessCPUUsage := float64(stats.CpuStats.CpuUsage.TotalUsage)
   144  	userModeTime := float64(stats.CpuStats.CpuUsage.UsageInUsermode)
   145  	kernelModeTime := float64(stats.CpuStats.CpuUsage.UsageInKernelmode)
   146  
   147  	totalPercent := e.totalCpuStats.Percent(totalProcessCPUUsage)
   148  	cs := &cstructs.CpuStats{
   149  		SystemMode:       e.systemCpuStats.Percent(kernelModeTime),
   150  		UserMode:         e.userCpuStats.Percent(userModeTime),
   151  		Percent:          totalPercent,
   152  		ThrottledPeriods: stats.CpuStats.ThrottlingData.ThrottledPeriods,
   153  		ThrottledTime:    stats.CpuStats.ThrottlingData.ThrottledTime,
   154  		TotalTicks:       e.systemCpuStats.TicksConsumed(totalPercent),
   155  		Measured:         ExecutorCgroupMeasuredCpuStats,
   156  	}
   157  	taskResUsage := cstructs.TaskResourceUsage{
   158  		ResourceUsage: &cstructs.ResourceUsage{
   159  			MemoryStats: ms,
   160  			CpuStats:    cs,
   161  		},
   162  		Timestamp: ts.UTC().UnixNano(),
   163  	}
   164  	if pidStats, err := e.pidStats(); err == nil {
   165  		taskResUsage.Pids = pidStats
   166  	}
   167  	return &taskResUsage, nil
   168  }
   169  
   170  // runAs takes a user id as a string and looks up the user, and sets the command
   171  // to execute as that user.
   172  func (e *UniversalExecutor) runAs(userid string) error {
   173  	u, err := user.Lookup(userid)
   174  	if err != nil {
   175  		return fmt.Errorf("Failed to identify user %v: %v", userid, err)
   176  	}
   177  
   178  	// Get the groups the user is a part of
   179  	gidStrings, err := u.GroupIds()
   180  	if err != nil {
   181  		return fmt.Errorf("Unable to lookup user's group membership: %v", err)
   182  	}
   183  
   184  	gids := make([]uint32, len(gidStrings))
   185  	for _, gidString := range gidStrings {
   186  		u, err := strconv.Atoi(gidString)
   187  		if err != nil {
   188  			return fmt.Errorf("Unable to convert user's group to int %s: %v", gidString, err)
   189  		}
   190  
   191  		gids = append(gids, uint32(u))
   192  	}
   193  
   194  	// Convert the uid and gid
   195  	uid, err := strconv.ParseUint(u.Uid, 10, 32)
   196  	if err != nil {
   197  		return fmt.Errorf("Unable to convert userid to uint32: %s", err)
   198  	}
   199  	gid, err := strconv.ParseUint(u.Gid, 10, 32)
   200  	if err != nil {
   201  		return fmt.Errorf("Unable to convert groupid to uint32: %s", err)
   202  	}
   203  
   204  	// Set the command to run as that user and group.
   205  	if e.cmd.SysProcAttr == nil {
   206  		e.cmd.SysProcAttr = &syscall.SysProcAttr{}
   207  	}
   208  	if e.cmd.SysProcAttr.Credential == nil {
   209  		e.cmd.SysProcAttr.Credential = &syscall.Credential{}
   210  	}
   211  	e.cmd.SysProcAttr.Credential.Uid = uint32(uid)
   212  	e.cmd.SysProcAttr.Credential.Gid = uint32(gid)
   213  	e.cmd.SysProcAttr.Credential.Groups = gids
   214  
   215  	e.logger.Printf("[DEBUG] executor: running as user:group %d:%d with group membership in %v", uid, gid, gids)
   216  
   217  	return nil
   218  }
   219  
   220  // configureChroot configures a chroot
   221  func (e *UniversalExecutor) configureChroot() error {
   222  	if e.cmd.SysProcAttr == nil {
   223  		e.cmd.SysProcAttr = &syscall.SysProcAttr{}
   224  	}
   225  	e.cmd.SysProcAttr.Chroot = e.ctx.TaskDir
   226  	e.cmd.Dir = "/"
   227  
   228  	e.fsIsolationEnforced = true
   229  	return nil
   230  }
   231  
   232  // getAllPids returns the pids of all the processes spun up by the executor. We
   233  // use the libcontainer apis to get the pids when the user is using cgroup
   234  // isolation and we scan the entire process table if the user is not using any
   235  // isolation
   236  func (e *UniversalExecutor) getAllPids() (map[int]*nomadPid, error) {
   237  	if e.command.ResourceLimits {
   238  		manager := getCgroupManager(e.resConCtx.groups, e.resConCtx.cgPaths)
   239  		pids, err := manager.GetAllPids()
   240  		if err != nil {
   241  			return nil, err
   242  		}
   243  		np := make(map[int]*nomadPid, len(pids))
   244  		for _, pid := range pids {
   245  			np[pid] = &nomadPid{
   246  				pid:           pid,
   247  				cpuStatsTotal: stats.NewCpuStats(),
   248  				cpuStatsSys:   stats.NewCpuStats(),
   249  				cpuStatsUser:  stats.NewCpuStats(),
   250  			}
   251  		}
   252  		return np, nil
   253  	}
   254  	allProcesses, err := ps.Processes()
   255  	if err != nil {
   256  		return nil, err
   257  	}
   258  	return e.scanPids(os.Getpid(), allProcesses)
   259  }
   260  
   261  // destroyCgroup kills all processes in the cgroup and removes the cgroup
   262  // configuration from the host. This function is idempotent.
   263  func DestroyCgroup(groups *cgroupConfig.Cgroup, cgPaths map[string]string, executorPid int) error {
   264  	mErrs := new(multierror.Error)
   265  	if groups == nil {
   266  		return fmt.Errorf("Can't destroy: cgroup configuration empty")
   267  	}
   268  
   269  	// Move the executor into the global cgroup so that the task specific
   270  	// cgroup can be destroyed.
   271  	nilGroup := &cgroupConfig.Cgroup{}
   272  	nilGroup.Path = "/"
   273  	nilGroup.Resources = groups.Resources
   274  	nilManager := getCgroupManager(nilGroup, nil)
   275  	err := nilManager.Apply(executorPid)
   276  	if err != nil && !strings.Contains(err.Error(), "no such process") {
   277  		return fmt.Errorf("failed to remove executor pid %d: %v", executorPid, err)
   278  	}
   279  
   280  	// Freeze the Cgroup so that it can not continue to fork/exec.
   281  	manager := getCgroupManager(groups, cgPaths)
   282  	err = manager.Freeze(cgroupConfig.Frozen)
   283  	if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
   284  		return fmt.Errorf("failed to freeze cgroup: %v", err)
   285  	}
   286  
   287  	var procs []*os.Process
   288  	pids, err := manager.GetAllPids()
   289  	if err != nil {
   290  		multierror.Append(mErrs, fmt.Errorf("error getting pids: %v", err))
   291  
   292  		// Unfreeze the cgroup.
   293  		err = manager.Freeze(cgroupConfig.Thawed)
   294  		if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
   295  			multierror.Append(mErrs, fmt.Errorf("failed to unfreeze cgroup: %v", err))
   296  		}
   297  		return mErrs.ErrorOrNil()
   298  	}
   299  
   300  	// Kill the processes in the cgroup
   301  	for _, pid := range pids {
   302  		proc, err := os.FindProcess(pid)
   303  		if err != nil {
   304  			multierror.Append(mErrs, fmt.Errorf("error finding process %v: %v", pid, err))
   305  			continue
   306  		}
   307  
   308  		procs = append(procs, proc)
   309  		if e := proc.Kill(); e != nil {
   310  			multierror.Append(mErrs, fmt.Errorf("error killing process %v: %v", pid, e))
   311  		}
   312  	}
   313  
   314  	// Unfreeze the cgroug so we can wait.
   315  	err = manager.Freeze(cgroupConfig.Thawed)
   316  	if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
   317  		multierror.Append(mErrs, fmt.Errorf("failed to unfreeze cgroup: %v", err))
   318  	}
   319  
   320  	// Wait on the killed processes to ensure they are cleaned up.
   321  	for _, proc := range procs {
   322  		// Don't capture the error because we expect this to fail for
   323  		// processes we didn't fork.
   324  		proc.Wait()
   325  	}
   326  
   327  	// Remove the cgroup.
   328  	if err := manager.Destroy(); err != nil {
   329  		multierror.Append(mErrs, fmt.Errorf("failed to delete the cgroup directories: %v", err))
   330  	}
   331  	return mErrs.ErrorOrNil()
   332  }
   333  
   334  // getCgroupManager returns the correct libcontainer cgroup manager.
   335  func getCgroupManager(groups *cgroupConfig.Cgroup, paths map[string]string) cgroups.Manager {
   336  	return &cgroupFs.Manager{Cgroups: groups, Paths: paths}
   337  }