github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/runsc/cgroup/cgroup.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package cgroup provides an interface to read and write configuration to
    16  // cgroup.
    17  package cgroup
    18  
    19  import (
    20  	"bufio"
    21  	"context"
    22  	"errors"
    23  	"fmt"
    24  	"io"
    25  	"io/ioutil"
    26  	"os"
    27  	"path/filepath"
    28  	"strconv"
    29  	"strings"
    30  	"time"
    31  
    32  	"github.com/cenkalti/backoff"
    33  	specs "github.com/opencontainers/runtime-spec/specs-go"
    34  	"golang.org/x/sys/unix"
    35  	"github.com/SagerNet/gvisor/pkg/cleanup"
    36  	"github.com/SagerNet/gvisor/pkg/log"
    37  )
    38  
    39  const (
    40  	cgroupRoot = "/sys/fs/cgroup"
    41  )
    42  
    43  var controllers = map[string]controller{
    44  	"blkio":    &blockIO{},
    45  	"cpu":      &cpu{},
    46  	"cpuset":   &cpuSet{},
    47  	"hugetlb":  &hugeTLB{},
    48  	"memory":   &memory{},
    49  	"net_cls":  &networkClass{},
    50  	"net_prio": &networkPrio{},
    51  	"pids":     &pids{},
    52  
    53  	// These controllers either don't have anything in the OCI spec or is
    54  	// irrelevant for a sandbox.
    55  	"cpuacct":    &noop{},
    56  	"devices":    &noop{},
    57  	"freezer":    &noop{},
    58  	"perf_event": &noop{},
    59  	"rdma":       &noop{isOptional: true},
    60  	"systemd":    &noop{},
    61  }
    62  
    63  // IsOnlyV2 checks whether cgroups V2 is enabled and V1 is not.
    64  func IsOnlyV2() bool {
    65  	var stat unix.Statfs_t
    66  	if err := unix.Statfs(cgroupRoot, &stat); err != nil {
    67  		// It's not used for anything important, assume not V2 on failure.
    68  		return false
    69  	}
    70  	return stat.Type == unix.CGROUP2_SUPER_MAGIC
    71  }
    72  
    73  func setOptionalValueInt(path, name string, val *int64) error {
    74  	if val == nil || *val == 0 {
    75  		return nil
    76  	}
    77  	str := strconv.FormatInt(*val, 10)
    78  	return setValue(path, name, str)
    79  }
    80  
    81  func setOptionalValueUint(path, name string, val *uint64) error {
    82  	if val == nil || *val == 0 {
    83  		return nil
    84  	}
    85  	str := strconv.FormatUint(*val, 10)
    86  	return setValue(path, name, str)
    87  }
    88  
    89  func setOptionalValueUint32(path, name string, val *uint32) error {
    90  	if val == nil || *val == 0 {
    91  		return nil
    92  	}
    93  	str := strconv.FormatUint(uint64(*val), 10)
    94  	return setValue(path, name, str)
    95  }
    96  
    97  func setOptionalValueUint16(path, name string, val *uint16) error {
    98  	if val == nil || *val == 0 {
    99  		return nil
   100  	}
   101  	str := strconv.FormatUint(uint64(*val), 10)
   102  	return setValue(path, name, str)
   103  }
   104  
   105  func setValue(path, name, data string) error {
   106  	fullpath := filepath.Join(path, name)
   107  
   108  	// Retry writes on EINTR; see:
   109  	//    https://github.com/golang/go/issues/38033
   110  	for {
   111  		err := ioutil.WriteFile(fullpath, []byte(data), 0700)
   112  		if err == nil {
   113  			return nil
   114  		} else if !errors.Is(err, unix.EINTR) {
   115  			return err
   116  		}
   117  	}
   118  }
   119  
   120  func getValue(path, name string) (string, error) {
   121  	fullpath := filepath.Join(path, name)
   122  	out, err := ioutil.ReadFile(fullpath)
   123  	if err != nil {
   124  		return "", err
   125  	}
   126  	return string(out), nil
   127  }
   128  
   129  func getInt(path, name string) (int, error) {
   130  	s, err := getValue(path, name)
   131  	if err != nil {
   132  		return 0, err
   133  	}
   134  	return strconv.Atoi(strings.TrimSpace(s))
   135  }
   136  
   137  // fillFromAncestor sets the value of a cgroup file from the first ancestor
   138  // that has content. It does nothing if the file in 'path' has already been set.
   139  func fillFromAncestor(path string) (string, error) {
   140  	out, err := ioutil.ReadFile(path)
   141  	if err != nil {
   142  		return "", err
   143  	}
   144  	val := strings.TrimSpace(string(out))
   145  	if val != "" {
   146  		// File is set, stop here.
   147  		return val, nil
   148  	}
   149  
   150  	// File is not set, recurse to parent and then set here.
   151  	name := filepath.Base(path)
   152  	parent := filepath.Dir(filepath.Dir(path))
   153  	val, err = fillFromAncestor(filepath.Join(parent, name))
   154  	if err != nil {
   155  		return "", err
   156  	}
   157  
   158  	// Retry writes on EINTR; see:
   159  	//    https://github.com/golang/go/issues/38033
   160  	for {
   161  		err := ioutil.WriteFile(path, []byte(val), 0700)
   162  		if err == nil {
   163  			break
   164  		} else if !errors.Is(err, unix.EINTR) {
   165  			return "", err
   166  		}
   167  	}
   168  	return val, nil
   169  }
   170  
   171  // countCpuset returns the number of CPU in a string formatted like:
   172  // 		"0-2,7,12-14  # bits 0, 1, 2, 7, 12, 13, and 14 set" - man 7 cpuset
   173  func countCpuset(cpuset string) (int, error) {
   174  	var count int
   175  	for _, p := range strings.Split(cpuset, ",") {
   176  		interval := strings.Split(p, "-")
   177  		switch len(interval) {
   178  		case 1:
   179  			if _, err := strconv.Atoi(interval[0]); err != nil {
   180  				return 0, err
   181  			}
   182  			count++
   183  
   184  		case 2:
   185  			start, err := strconv.Atoi(interval[0])
   186  			if err != nil {
   187  				return 0, err
   188  			}
   189  			end, err := strconv.Atoi(interval[1])
   190  			if err != nil {
   191  				return 0, err
   192  			}
   193  			if start < 0 || end < 0 || start > end {
   194  				return 0, fmt.Errorf("invalid cpuset: %q", p)
   195  			}
   196  			count += end - start + 1
   197  
   198  		default:
   199  			return 0, fmt.Errorf("invalid cpuset: %q", p)
   200  		}
   201  	}
   202  	return count, nil
   203  }
   204  
   205  // loadPaths loads cgroup paths for given 'pid', may be set to 'self'.
   206  func loadPaths(pid string) (map[string]string, error) {
   207  	procCgroup, err := os.Open(filepath.Join("/proc", pid, "cgroup"))
   208  	if err != nil {
   209  		return nil, err
   210  	}
   211  	defer procCgroup.Close()
   212  
   213  	// Load mountinfo for the current process, because it's where cgroups is
   214  	// being accessed from.
   215  	mountinfo, err := os.Open(filepath.Join("/proc/self/mountinfo"))
   216  	if err != nil {
   217  		return nil, err
   218  	}
   219  	defer mountinfo.Close()
   220  
   221  	return loadPathsHelper(procCgroup, mountinfo)
   222  }
   223  
   224  func loadPathsHelper(cgroup, mountinfo io.Reader) (map[string]string, error) {
   225  	paths := make(map[string]string)
   226  
   227  	scanner := bufio.NewScanner(cgroup)
   228  	for scanner.Scan() {
   229  		// Format: ID:[name=]controller1,controller2:path
   230  		// Example: 2:cpu,cpuacct:/user.slice
   231  		tokens := strings.Split(scanner.Text(), ":")
   232  		if len(tokens) != 3 {
   233  			return nil, fmt.Errorf("invalid cgroups file, line: %q", scanner.Text())
   234  		}
   235  		if len(tokens[1]) == 0 {
   236  			continue
   237  		}
   238  		for _, ctrlr := range strings.Split(tokens[1], ",") {
   239  			// Remove prefix for cgroups with no controller, eg. systemd.
   240  			ctrlr = strings.TrimPrefix(ctrlr, "name=")
   241  			// Discard unknown controllers.
   242  			if _, ok := controllers[ctrlr]; ok {
   243  				paths[ctrlr] = tokens[2]
   244  			}
   245  		}
   246  	}
   247  	if err := scanner.Err(); err != nil {
   248  		return nil, err
   249  	}
   250  
   251  	// For nested containers, in /proc/[pid]/cgroup we see paths from host,
   252  	// which don't exist in container, so recover the container paths here by
   253  	// double-checking with /proc/[pid]/mountinfo
   254  	mountScanner := bufio.NewScanner(mountinfo)
   255  	for mountScanner.Scan() {
   256  		// Format: ID parent major:minor root mount-point options opt-fields - fs-type source super-options
   257  		// Example: 39 32 0:34 / /sys/fs/cgroup/devices rw,noexec shared:18 - cgroup cgroup rw,devices
   258  		fields := strings.Fields(mountScanner.Text())
   259  		if len(fields) < 9 || fields[len(fields)-3] != "cgroup" {
   260  			// Skip mounts that are not cgroup mounts.
   261  			continue
   262  		}
   263  		// Cgroup controller type is in the super-options field.
   264  		superOptions := strings.Split(fields[len(fields)-1], ",")
   265  		for _, opt := range superOptions {
   266  			// Remove prefix for cgroups with no controller, eg. systemd.
   267  			opt = strings.TrimPrefix(opt, "name=")
   268  
   269  			// Only considers cgroup controllers that are registered, and skip other
   270  			// irrelevant options, e.g. rw.
   271  			if cgroupPath, ok := paths[opt]; ok {
   272  				rootDir := fields[3]
   273  				if rootDir != "/" {
   274  					// When cgroup is in submount, remove repeated path components from
   275  					// cgroup path to avoid duplicating them.
   276  					relCgroupPath, err := filepath.Rel(rootDir, cgroupPath)
   277  					if err != nil {
   278  						return nil, err
   279  					}
   280  					paths[opt] = relCgroupPath
   281  				}
   282  			}
   283  		}
   284  	}
   285  	if err := mountScanner.Err(); err != nil {
   286  		return nil, err
   287  	}
   288  
   289  	return paths, nil
   290  }
   291  
   292  // Cgroup represents a group inside all controllers. For example:
   293  //   Name='/foo/bar' maps to /sys/fs/cgroup/<controller>/foo/bar on
   294  //   all controllers.
   295  //
   296  // If Name is relative, it uses the parent cgroup path to determine the
   297  // location. For example:
   298  //   Name='foo/bar' and Parent[ctrl]="/user.slice", then it will map to
   299  //   /sys/fs/cgroup/<ctrl>/user.slice/foo/bar
   300  type Cgroup struct {
   301  	Name    string            `json:"name"`
   302  	Parents map[string]string `json:"parents"`
   303  	Own     map[string]bool   `json:"own"`
   304  }
   305  
   306  // NewFromSpec creates a new Cgroup instance if the spec includes a cgroup path.
   307  // Returns nil otherwise. Cgroup paths are loaded based on the current process.
   308  func NewFromSpec(spec *specs.Spec) (*Cgroup, error) {
   309  	if spec.Linux == nil || spec.Linux.CgroupsPath == "" {
   310  		return nil, nil
   311  	}
   312  	return new("self", spec.Linux.CgroupsPath)
   313  }
   314  
   315  // NewFromPid loads cgroup for the given process.
   316  func NewFromPid(pid int) (*Cgroup, error) {
   317  	return new(strconv.Itoa(pid), "")
   318  }
   319  
   320  func new(pid, cgroupsPath string) (*Cgroup, error) {
   321  	var parents map[string]string
   322  
   323  	// If path is relative, load cgroup paths for the process to build the
   324  	// relative paths.
   325  	if !filepath.IsAbs(cgroupsPath) {
   326  		var err error
   327  		parents, err = loadPaths(pid)
   328  		if err != nil {
   329  			return nil, fmt.Errorf("finding current cgroups: %w", err)
   330  		}
   331  	}
   332  	cg := &Cgroup{
   333  		Name:    cgroupsPath,
   334  		Parents: parents,
   335  		Own:     make(map[string]bool),
   336  	}
   337  	log.Debugf("New cgroup for pid: %s, %+v", pid, cg)
   338  	return cg, nil
   339  }
   340  
   341  // Install creates and configures cgroups according to 'res'. If cgroup path
   342  // already exists, it means that the caller has already provided a
   343  // pre-configured cgroups, and 'res' is ignored.
   344  func (c *Cgroup) Install(res *specs.LinuxResources) error {
   345  	log.Debugf("Installing cgroup path %q", c.Name)
   346  
   347  	// Clean up partially created cgroups on error. Errors during cleanup itself
   348  	// are ignored.
   349  	clean := cleanup.Make(func() { _ = c.Uninstall() })
   350  	defer clean.Clean()
   351  
   352  	// Controllers can be symlinks to a group of controllers (e.g. cpu,cpuacct).
   353  	// So first check what directories need to be created. Otherwise, when
   354  	// the directory for one of the controllers in a group is created, it will
   355  	// make it seem like the directory already existed and it's not owned by the
   356  	// other controllers in the group.
   357  	var missing []string
   358  	for key := range controllers {
   359  		path := c.MakePath(key)
   360  		if _, err := os.Stat(path); err != nil {
   361  			missing = append(missing, key)
   362  		} else {
   363  			log.Debugf("Using pre-created cgroup %q: %q", key, path)
   364  		}
   365  	}
   366  	for _, key := range missing {
   367  		ctrlr := controllers[key]
   368  		path := c.MakePath(key)
   369  		log.Debugf("Creating cgroup %q: %q", key, path)
   370  		if err := os.MkdirAll(path, 0755); err != nil {
   371  			if ctrlr.optional() && errors.Is(err, unix.EROFS) {
   372  				if err := ctrlr.skip(res); err != nil {
   373  					return err
   374  				}
   375  				log.Infof("Skipping cgroup %q", key)
   376  				continue
   377  			}
   378  			return err
   379  		}
   380  
   381  		// Only set controllers that were created by me.
   382  		c.Own[key] = true
   383  		if err := ctrlr.set(res, path); err != nil {
   384  			return err
   385  		}
   386  	}
   387  	clean.Release()
   388  	return nil
   389  }
   390  
   391  // Uninstall removes the settings done in Install(). If cgroup path already
   392  // existed when Install() was called, Uninstall is a noop.
   393  func (c *Cgroup) Uninstall() error {
   394  	log.Debugf("Deleting cgroup %q", c.Name)
   395  	for key := range controllers {
   396  		if !c.Own[key] {
   397  			// cgroup is managed by caller, don't touch it.
   398  			continue
   399  		}
   400  		path := c.MakePath(key)
   401  		log.Debugf("Removing cgroup controller for key=%q path=%q", key, path)
   402  
   403  		// If we try to remove the cgroup too soon after killing the
   404  		// sandbox we might get EBUSY, so we retry for a few seconds
   405  		// until it succeeds.
   406  		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
   407  		defer cancel()
   408  		b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx)
   409  		fn := func() error {
   410  			err := unix.Rmdir(path)
   411  			if os.IsNotExist(err) {
   412  				return nil
   413  			}
   414  			return err
   415  		}
   416  		if err := backoff.Retry(fn, b); err != nil {
   417  			return fmt.Errorf("removing cgroup path %q: %w", path, err)
   418  		}
   419  	}
   420  	return nil
   421  }
   422  
   423  // Join adds the current process to the all controllers. Returns function that
   424  // restores cgroup to the original state.
   425  func (c *Cgroup) Join() (func(), error) {
   426  	// First save the current state so it can be restored.
   427  	undo := func() {}
   428  	paths, err := loadPaths("self")
   429  	if err != nil {
   430  		return undo, err
   431  	}
   432  	var undoPaths []string
   433  	for ctrlr, path := range paths {
   434  		// Skip controllers we don't handle.
   435  		if _, ok := controllers[ctrlr]; ok {
   436  			fullPath := filepath.Join(cgroupRoot, ctrlr, path)
   437  			undoPaths = append(undoPaths, fullPath)
   438  		}
   439  	}
   440  
   441  	// Replace empty undo with the real thing before changes are made to cgroups.
   442  	undo = func() {
   443  		for _, path := range undoPaths {
   444  			log.Debugf("Restoring cgroup %q", path)
   445  			// Writing the value 0 to a cgroup.procs file causes
   446  			// the writing process to be moved to the corresponding
   447  			// cgroup. - cgroups(7).
   448  			if err := setValue(path, "cgroup.procs", "0"); err != nil {
   449  				log.Warningf("Error restoring cgroup %q: %v", path, err)
   450  			}
   451  		}
   452  	}
   453  
   454  	// Now join the cgroups.
   455  	for key, ctrlr := range controllers {
   456  		path := c.MakePath(key)
   457  		log.Debugf("Joining cgroup %q", path)
   458  		// Writing the value 0 to a cgroup.procs file causes the writing process to
   459  		// be moved to the corresponding cgroup - cgroups(7).
   460  		if err := setValue(path, "cgroup.procs", "0"); err != nil {
   461  			if ctrlr.optional() && os.IsNotExist(err) {
   462  				continue
   463  			}
   464  			return undo, err
   465  		}
   466  	}
   467  	return undo, nil
   468  }
   469  
   470  // CPUQuota returns the CFS CPU quota.
   471  func (c *Cgroup) CPUQuota() (float64, error) {
   472  	path := c.MakePath("cpu")
   473  	quota, err := getInt(path, "cpu.cfs_quota_us")
   474  	if err != nil {
   475  		return -1, err
   476  	}
   477  	period, err := getInt(path, "cpu.cfs_period_us")
   478  	if err != nil {
   479  		return -1, err
   480  	}
   481  	if quota <= 0 || period <= 0 {
   482  		return -1, err
   483  	}
   484  	return float64(quota) / float64(period), nil
   485  }
   486  
   487  // CPUUsage returns the total CPU usage of the cgroup.
   488  func (c *Cgroup) CPUUsage() (uint64, error) {
   489  	path := c.MakePath("cpuacct")
   490  	usage, err := getValue(path, "cpuacct.usage")
   491  	if err != nil {
   492  		return 0, err
   493  	}
   494  	return strconv.ParseUint(strings.TrimSpace(usage), 10, 64)
   495  }
   496  
   497  // NumCPU returns the number of CPUs configured in 'cpuset/cpuset.cpus'.
   498  func (c *Cgroup) NumCPU() (int, error) {
   499  	path := c.MakePath("cpuset")
   500  	cpuset, err := getValue(path, "cpuset.cpus")
   501  	if err != nil {
   502  		return 0, err
   503  	}
   504  	return countCpuset(strings.TrimSpace(cpuset))
   505  }
   506  
   507  // MemoryLimit returns the memory limit.
   508  func (c *Cgroup) MemoryLimit() (uint64, error) {
   509  	path := c.MakePath("memory")
   510  	limStr, err := getValue(path, "memory.limit_in_bytes")
   511  	if err != nil {
   512  		return 0, err
   513  	}
   514  	return strconv.ParseUint(strings.TrimSpace(limStr), 10, 64)
   515  }
   516  
   517  // MakePath builds a path to the given controller.
   518  func (c *Cgroup) MakePath(controllerName string) string {
   519  	path := c.Name
   520  	if parent, ok := c.Parents[controllerName]; ok {
   521  		path = filepath.Join(parent, c.Name)
   522  	}
   523  	return filepath.Join(cgroupRoot, controllerName, path)
   524  }
   525  
   526  type controller interface {
   527  	// optional controllers don't fail if not found.
   528  	optional() bool
   529  	// set applies resource limits to controller.
   530  	set(*specs.LinuxResources, string) error
   531  	// skip is called when controller is not found to check if it can be safely
   532  	// skipped or not based on the spec.
   533  	skip(*specs.LinuxResources) error
   534  }
   535  
   536  type noop struct {
   537  	isOptional bool
   538  }
   539  
   540  func (n *noop) optional() bool {
   541  	return n.isOptional
   542  }
   543  
   544  func (*noop) set(*specs.LinuxResources, string) error {
   545  	return nil
   546  }
   547  
   548  func (n *noop) skip(*specs.LinuxResources) error {
   549  	if !n.isOptional {
   550  		panic("cgroup controller is not optional")
   551  	}
   552  	return nil
   553  }
   554  
   555  type mandatory struct{}
   556  
   557  func (*mandatory) optional() bool {
   558  	return false
   559  }
   560  
   561  func (*mandatory) skip(*specs.LinuxResources) error {
   562  	panic("cgroup controller is not optional")
   563  }
   564  
   565  type memory struct {
   566  	mandatory
   567  }
   568  
   569  func (*memory) set(spec *specs.LinuxResources, path string) error {
   570  	if spec == nil || spec.Memory == nil {
   571  		return nil
   572  	}
   573  	if err := setOptionalValueInt(path, "memory.limit_in_bytes", spec.Memory.Limit); err != nil {
   574  		return err
   575  	}
   576  	if err := setOptionalValueInt(path, "memory.soft_limit_in_bytes", spec.Memory.Reservation); err != nil {
   577  		return err
   578  	}
   579  	if err := setOptionalValueInt(path, "memory.memsw.limit_in_bytes", spec.Memory.Swap); err != nil {
   580  		return err
   581  	}
   582  	if err := setOptionalValueInt(path, "memory.kmem.limit_in_bytes", spec.Memory.Kernel); err != nil {
   583  		return err
   584  	}
   585  	if err := setOptionalValueInt(path, "memory.kmem.tcp.limit_in_bytes", spec.Memory.KernelTCP); err != nil {
   586  		return err
   587  	}
   588  	if err := setOptionalValueUint(path, "memory.swappiness", spec.Memory.Swappiness); err != nil {
   589  		return err
   590  	}
   591  
   592  	if spec.Memory.DisableOOMKiller != nil && *spec.Memory.DisableOOMKiller {
   593  		if err := setValue(path, "memory.oom_control", "1"); err != nil {
   594  			return err
   595  		}
   596  	}
   597  	return nil
   598  }
   599  
   600  type cpu struct {
   601  	mandatory
   602  }
   603  
   604  func (*cpu) set(spec *specs.LinuxResources, path string) error {
   605  	if spec == nil || spec.CPU == nil {
   606  		return nil
   607  	}
   608  	if err := setOptionalValueUint(path, "cpu.shares", spec.CPU.Shares); err != nil {
   609  		return err
   610  	}
   611  	if err := setOptionalValueInt(path, "cpu.cfs_quota_us", spec.CPU.Quota); err != nil {
   612  		return err
   613  	}
   614  	if err := setOptionalValueUint(path, "cpu.cfs_period_us", spec.CPU.Period); err != nil {
   615  		return err
   616  	}
   617  	if err := setOptionalValueUint(path, "cpu.rt_period_us", spec.CPU.RealtimePeriod); err != nil {
   618  		return err
   619  	}
   620  	return setOptionalValueInt(path, "cpu.rt_runtime_us", spec.CPU.RealtimeRuntime)
   621  }
   622  
   623  type cpuSet struct {
   624  	mandatory
   625  }
   626  
   627  func (*cpuSet) set(spec *specs.LinuxResources, path string) error {
   628  	// cpuset.cpus and mems are required fields, but are not set on a new cgroup.
   629  	// If not set in the spec, get it from one of the ancestors cgroup.
   630  	if spec == nil || spec.CPU == nil || spec.CPU.Cpus == "" {
   631  		if _, err := fillFromAncestor(filepath.Join(path, "cpuset.cpus")); err != nil {
   632  			return err
   633  		}
   634  	} else {
   635  		if err := setValue(path, "cpuset.cpus", spec.CPU.Cpus); err != nil {
   636  			return err
   637  		}
   638  	}
   639  
   640  	if spec == nil || spec.CPU == nil || spec.CPU.Mems == "" {
   641  		_, err := fillFromAncestor(filepath.Join(path, "cpuset.mems"))
   642  		return err
   643  	}
   644  	return setValue(path, "cpuset.mems", spec.CPU.Mems)
   645  }
   646  
   647  type blockIO struct {
   648  	mandatory
   649  }
   650  
   651  func (*blockIO) set(spec *specs.LinuxResources, path string) error {
   652  	if spec == nil || spec.BlockIO == nil {
   653  		return nil
   654  	}
   655  
   656  	if err := setOptionalValueUint16(path, "blkio.weight", spec.BlockIO.Weight); err != nil {
   657  		return err
   658  	}
   659  	if err := setOptionalValueUint16(path, "blkio.leaf_weight", spec.BlockIO.LeafWeight); err != nil {
   660  		return err
   661  	}
   662  
   663  	for _, dev := range spec.BlockIO.WeightDevice {
   664  		if dev.Weight != nil {
   665  			val := fmt.Sprintf("%d:%d %d", dev.Major, dev.Minor, *dev.Weight)
   666  			if err := setValue(path, "blkio.weight_device", val); err != nil {
   667  				return err
   668  			}
   669  		}
   670  		if dev.LeafWeight != nil {
   671  			val := fmt.Sprintf("%d:%d %d", dev.Major, dev.Minor, *dev.LeafWeight)
   672  			if err := setValue(path, "blkio.leaf_weight_device", val); err != nil {
   673  				return err
   674  			}
   675  		}
   676  	}
   677  	if err := setThrottle(path, "blkio.throttle.read_bps_device", spec.BlockIO.ThrottleReadBpsDevice); err != nil {
   678  		return err
   679  	}
   680  	if err := setThrottle(path, "blkio.throttle.write_bps_device", spec.BlockIO.ThrottleWriteBpsDevice); err != nil {
   681  		return err
   682  	}
   683  	if err := setThrottle(path, "blkio.throttle.read_iops_device", spec.BlockIO.ThrottleReadIOPSDevice); err != nil {
   684  		return err
   685  	}
   686  	return setThrottle(path, "blkio.throttle.write_iops_device", spec.BlockIO.ThrottleWriteIOPSDevice)
   687  }
   688  
   689  func setThrottle(path, name string, devs []specs.LinuxThrottleDevice) error {
   690  	for _, dev := range devs {
   691  		val := fmt.Sprintf("%d:%d %d", dev.Major, dev.Minor, dev.Rate)
   692  		if err := setValue(path, name, val); err != nil {
   693  			return err
   694  		}
   695  	}
   696  	return nil
   697  }
   698  
   699  type networkClass struct{}
   700  
   701  func (*networkClass) optional() bool {
   702  	return true
   703  }
   704  
   705  func (*networkClass) set(spec *specs.LinuxResources, path string) error {
   706  	if spec == nil || spec.Network == nil {
   707  		return nil
   708  	}
   709  	return setOptionalValueUint32(path, "net_cls.classid", spec.Network.ClassID)
   710  }
   711  
   712  func (*networkClass) skip(spec *specs.LinuxResources) error {
   713  	if spec != nil && spec.Network != nil && spec.Network.ClassID != nil {
   714  		return fmt.Errorf("Network.ClassID set but net_cls cgroup controller not found")
   715  	}
   716  	return nil
   717  }
   718  
   719  type networkPrio struct{}
   720  
   721  func (*networkPrio) optional() bool {
   722  	return true
   723  }
   724  
   725  func (*networkPrio) set(spec *specs.LinuxResources, path string) error {
   726  	if spec == nil || spec.Network == nil {
   727  		return nil
   728  	}
   729  	for _, prio := range spec.Network.Priorities {
   730  		val := fmt.Sprintf("%s %d", prio.Name, prio.Priority)
   731  		if err := setValue(path, "net_prio.ifpriomap", val); err != nil {
   732  			return err
   733  		}
   734  	}
   735  	return nil
   736  }
   737  
   738  func (*networkPrio) skip(spec *specs.LinuxResources) error {
   739  	if spec != nil && spec.Network != nil && len(spec.Network.Priorities) > 0 {
   740  		return fmt.Errorf("Network.Priorities set but net_prio cgroup controller not found")
   741  	}
   742  	return nil
   743  }
   744  
   745  type pids struct {
   746  	mandatory
   747  }
   748  
   749  func (*pids) set(spec *specs.LinuxResources, path string) error {
   750  	if spec == nil || spec.Pids == nil || spec.Pids.Limit <= 0 {
   751  		return nil
   752  	}
   753  	val := strconv.FormatInt(spec.Pids.Limit, 10)
   754  	return setValue(path, "pids.max", val)
   755  }
   756  
   757  type hugeTLB struct{}
   758  
   759  func (*hugeTLB) optional() bool {
   760  	return true
   761  }
   762  
   763  func (*hugeTLB) skip(spec *specs.LinuxResources) error {
   764  	if spec != nil && len(spec.HugepageLimits) > 0 {
   765  		return fmt.Errorf("HugepageLimits set but hugetlb cgroup controller not found")
   766  	}
   767  	return nil
   768  }
   769  
   770  func (*hugeTLB) set(spec *specs.LinuxResources, path string) error {
   771  	if spec == nil {
   772  		return nil
   773  	}
   774  	for _, limit := range spec.HugepageLimits {
   775  		name := fmt.Sprintf("hugetlb.%s.limit_in_bytes", limit.Pagesize)
   776  		val := strconv.FormatUint(limit.Limit, 10)
   777  		if err := setValue(path, name, val); err != nil {
   778  			return err
   779  		}
   780  	}
   781  	return nil
   782  }