github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/libcontainer/cgroups/fs/fs.go (about)

     1  package fs
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  	"strings"
     9  	"sync"
    10  
    11  	"golang.org/x/sys/unix"
    12  
    13  	"github.com/opencontainers/runc/libcontainer/cgroups"
    14  	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
    15  	"github.com/opencontainers/runc/libcontainer/configs"
    16  )
    17  
    18  var subsystems = []subsystem{
    19  	&CpusetGroup{},
    20  	&DevicesGroup{},
    21  	&MemoryGroup{},
    22  	&CpuGroup{},
    23  	&CpuacctGroup{},
    24  	&PidsGroup{},
    25  	&BlkioGroup{},
    26  	&HugetlbGroup{},
    27  	&NetClsGroup{},
    28  	&NetPrioGroup{},
    29  	&PerfEventGroup{},
    30  	&FreezerGroup{},
    31  	&RdmaGroup{},
    32  	&NameGroup{GroupName: "name=systemd", Join: true},
    33  	&NameGroup{GroupName: "misc", Join: true},
    34  }
    35  
    36  var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
    37  
    38  func init() {
    39  	// If using cgroups-hybrid mode then add a "" controller indicating
    40  	// it should join the cgroups v2.
    41  	if cgroups.IsCgroup2HybridMode() {
    42  		subsystems = append(subsystems, &NameGroup{GroupName: "", Join: true})
    43  	}
    44  }
    45  
    46  type subsystem interface {
    47  	// Name returns the name of the subsystem.
    48  	Name() string
    49  	// GetStats fills in the stats for the subsystem.
    50  	GetStats(path string, stats *cgroups.Stats) error
    51  	// Apply creates and joins a cgroup, adding pid into it. Some
    52  	// subsystems use resources to pre-configure the cgroup parents
    53  	// before creating or joining it.
    54  	Apply(path string, r *configs.Resources, pid int) error
    55  	// Set sets the cgroup resources.
    56  	Set(path string, r *configs.Resources) error
    57  }
    58  
    59  type Manager struct {
    60  	mu      sync.Mutex
    61  	cgroups *configs.Cgroup
    62  	paths   map[string]string
    63  }
    64  
    65  func NewManager(cg *configs.Cgroup, paths map[string]string) (*Manager, error) {
    66  	// Some v1 controllers (cpu, cpuset, and devices) expect
    67  	// cgroups.Resources to not be nil in Apply.
    68  	if cg.Resources == nil {
    69  		return nil, errors.New("cgroup v1 manager needs configs.Resources to be set during manager creation")
    70  	}
    71  	if cg.Resources.Unified != nil {
    72  		return nil, cgroups.ErrV1NoUnified
    73  	}
    74  
    75  	if paths == nil {
    76  		var err error
    77  		paths, err = initPaths(cg)
    78  		if err != nil {
    79  			return nil, err
    80  		}
    81  	}
    82  
    83  	return &Manager{
    84  		cgroups: cg,
    85  		paths:   paths,
    86  	}, nil
    87  }
    88  
    89  // isIgnorableError returns whether err is a permission error (in the loose
    90  // sense of the word). This includes EROFS (which for an unprivileged user is
    91  // basically a permission error) and EACCES (for similar reasons) as well as
    92  // the normal EPERM.
    93  func isIgnorableError(rootless bool, err error) bool {
    94  	// We do not ignore errors if we are root.
    95  	if !rootless {
    96  		return false
    97  	}
    98  	// Is it an ordinary EPERM?
    99  	if errors.Is(err, os.ErrPermission) {
   100  		return true
   101  	}
   102  	// Handle some specific syscall errors.
   103  	var errno unix.Errno
   104  	if errors.As(err, &errno) {
   105  		return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES
   106  	}
   107  	return false
   108  }
   109  
   110  func (m *Manager) Apply(pid int) (err error) {
   111  	m.mu.Lock()
   112  	defer m.mu.Unlock()
   113  
   114  	c := m.cgroups
   115  
   116  	for _, sys := range subsystems {
   117  		name := sys.Name()
   118  		p, ok := m.paths[name]
   119  		if !ok {
   120  			continue
   121  		}
   122  
   123  		if err := sys.Apply(p, c.Resources, pid); err != nil {
   124  			// In the case of rootless (including euid=0 in userns), where an
   125  			// explicit cgroup path hasn't been set, we don't bail on error in
   126  			// case of permission problems here, but do delete the path from
   127  			// the m.paths map, since it is either non-existent and could not
   128  			// be created, or the pid could not be added to it.
   129  			//
   130  			// Cases where limits for the subsystem have been set are handled
   131  			// later by Set, which fails with a friendly error (see
   132  			// if path == "" in Set).
   133  			if isIgnorableError(c.Rootless, err) && c.Path == "" {
   134  				delete(m.paths, name)
   135  				continue
   136  			}
   137  			return err
   138  		}
   139  
   140  	}
   141  	return nil
   142  }
   143  
   144  func (m *Manager) Destroy() error {
   145  	m.mu.Lock()
   146  	defer m.mu.Unlock()
   147  	return cgroups.RemovePaths(m.paths)
   148  }
   149  
   150  func (m *Manager) Path(subsys string) string {
   151  	m.mu.Lock()
   152  	defer m.mu.Unlock()
   153  	return m.paths[subsys]
   154  }
   155  
   156  func (m *Manager) GetStats() (*cgroups.Stats, error) {
   157  	m.mu.Lock()
   158  	defer m.mu.Unlock()
   159  	stats := cgroups.NewStats()
   160  	for _, sys := range subsystems {
   161  		path := m.paths[sys.Name()]
   162  		if path == "" {
   163  			continue
   164  		}
   165  		if err := sys.GetStats(path, stats); err != nil {
   166  			return nil, err
   167  		}
   168  	}
   169  	return stats, nil
   170  }
   171  
   172  func (m *Manager) Set(r *configs.Resources) error {
   173  	if r == nil {
   174  		return nil
   175  	}
   176  
   177  	if r.Unified != nil {
   178  		return cgroups.ErrV1NoUnified
   179  	}
   180  
   181  	m.mu.Lock()
   182  	defer m.mu.Unlock()
   183  	for _, sys := range subsystems {
   184  		path := m.paths[sys.Name()]
   185  		if err := sys.Set(path, r); err != nil {
   186  			// When rootless is true, errors from the device subsystem
   187  			// are ignored, as it is really not expected to work.
   188  			if m.cgroups.Rootless && sys.Name() == "devices" && !errors.Is(err, cgroups.ErrDevicesUnsupported) {
   189  				continue
   190  			}
   191  			// However, errors from other subsystems are not ignored.
   192  			// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
   193  			if path == "" {
   194  				// We never created a path for this cgroup, so we cannot set
   195  				// limits for it (though we have already tried at this point).
   196  				return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name())
   197  			}
   198  			return err
   199  		}
   200  	}
   201  
   202  	return nil
   203  }
   204  
   205  // Freeze toggles the container's freezer cgroup depending on the state
   206  // provided
   207  func (m *Manager) Freeze(state configs.FreezerState) error {
   208  	path := m.Path("freezer")
   209  	if path == "" {
   210  		return errors.New("cannot toggle freezer: cgroups not configured for container")
   211  	}
   212  
   213  	prevState := m.cgroups.Resources.Freezer
   214  	m.cgroups.Resources.Freezer = state
   215  	freezer := &FreezerGroup{}
   216  	if err := freezer.Set(path, m.cgroups.Resources); err != nil {
   217  		m.cgroups.Resources.Freezer = prevState
   218  		return err
   219  	}
   220  	return nil
   221  }
   222  
   223  func (m *Manager) GetPids() ([]int, error) {
   224  	return cgroups.GetPids(m.Path("devices"))
   225  }
   226  
   227  func (m *Manager) GetAllPids() ([]int, error) {
   228  	return cgroups.GetAllPids(m.Path("devices"))
   229  }
   230  
   231  func (m *Manager) GetPaths() map[string]string {
   232  	m.mu.Lock()
   233  	defer m.mu.Unlock()
   234  	return m.paths
   235  }
   236  
   237  func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
   238  	return m.cgroups, nil
   239  }
   240  
   241  func (m *Manager) GetFreezerState() (configs.FreezerState, error) {
   242  	dir := m.Path("freezer")
   243  	// If the container doesn't have the freezer cgroup, say it's undefined.
   244  	if dir == "" {
   245  		return configs.Undefined, nil
   246  	}
   247  	freezer := &FreezerGroup{}
   248  	return freezer.GetState(dir)
   249  }
   250  
   251  func (m *Manager) Exists() bool {
   252  	return cgroups.PathExists(m.Path("devices"))
   253  }
   254  
   255  func OOMKillCount(path string) (uint64, error) {
   256  	return fscommon.GetValueByKey(path, "memory.oom_control", "oom_kill")
   257  }
   258  
   259  func (m *Manager) OOMKillCount() (uint64, error) {
   260  	c, err := OOMKillCount(m.Path("memory"))
   261  	// Ignore ENOENT when rootless as it couldn't create cgroup.
   262  	if err != nil && m.cgroups.Rootless && os.IsNotExist(err) {
   263  		err = nil
   264  	}
   265  
   266  	return c, err
   267  }
   268  
   269  func (m *Manager) GetEffectiveCPUs() string {
   270  	return GetEffectiveCPUs(m.Path("cpuset"), m.cgroups)
   271  }
   272  
   273  func GetEffectiveCPUs(cpusetPath string, cgroups *configs.Cgroup) string {
   274  	// Fast path.
   275  	if cgroups.CpusetCpus != "" {
   276  		return cgroups.CpusetCpus
   277  	} else if !strings.HasPrefix(cpusetPath, defaultCgroupRoot) {
   278  		return ""
   279  	}
   280  
   281  	// Iterates until it goes to the cgroup root path.
   282  	// It's required for containers in which cpuset controller
   283  	// is not enabled, in this case a parent cgroup is used.
   284  	for path := cpusetPath; path != defaultCgroupRoot; path = filepath.Dir(path) {
   285  		cpus, err := fscommon.GetCgroupParamString(path, "cpuset.effective_cpus")
   286  		if err == nil {
   287  			return cpus
   288  		}
   289  	}
   290  
   291  	return ""
   292  }