github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/libcontainer/cgroups/systemd/v1.go (about)

     1  package systemd
     2  
     3  import (
     4  	"errors"
     5  	"os"
     6  	"path/filepath"
     7  	"strings"
     8  	"sync"
     9  
    10  	systemdDbus "github.com/coreos/go-systemd/v22/dbus"
    11  	"github.com/sirupsen/logrus"
    12  
    13  	"github.com/opencontainers/runc/libcontainer/cgroups"
    14  	"github.com/opencontainers/runc/libcontainer/cgroups/fs"
    15  	"github.com/opencontainers/runc/libcontainer/configs"
    16  )
    17  
    18  type LegacyManager struct {
    19  	mu      sync.Mutex
    20  	cgroups *configs.Cgroup
    21  	paths   map[string]string
    22  	dbus    *dbusConnManager
    23  }
    24  
    25  func NewLegacyManager(cg *configs.Cgroup, paths map[string]string) (*LegacyManager, error) {
    26  	if cg.Rootless {
    27  		return nil, errors.New("cannot use rootless systemd cgroups manager on cgroup v1")
    28  	}
    29  	if cg.Resources != nil && cg.Resources.Unified != nil {
    30  		return nil, cgroups.ErrV1NoUnified
    31  	}
    32  	if paths == nil {
    33  		var err error
    34  		paths, err = initPaths(cg)
    35  		if err != nil {
    36  			return nil, err
    37  		}
    38  	}
    39  	return &LegacyManager{
    40  		cgroups: cg,
    41  		paths:   paths,
    42  		dbus:    newDbusConnManager(false),
    43  	}, nil
    44  }
    45  
    46  type subsystem interface {
    47  	// Name returns the name of the subsystem.
    48  	Name() string
    49  	// GetStats returns the stats, as 'stats', corresponding to the cgroup under 'path'.
    50  	GetStats(path string, stats *cgroups.Stats) error
    51  	// Set sets cgroup resource limits.
    52  	Set(path string, r *configs.Resources) error
    53  }
    54  
    55  var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
    56  
    57  var legacySubsystems = []subsystem{
    58  	&fs.CpusetGroup{},
    59  	&fs.DevicesGroup{},
    60  	&fs.MemoryGroup{},
    61  	&fs.CpuGroup{},
    62  	&fs.CpuacctGroup{},
    63  	&fs.PidsGroup{},
    64  	&fs.BlkioGroup{},
    65  	&fs.HugetlbGroup{},
    66  	&fs.PerfEventGroup{},
    67  	&fs.FreezerGroup{},
    68  	&fs.NetPrioGroup{},
    69  	&fs.NetClsGroup{},
    70  	&fs.NameGroup{GroupName: "name=systemd"},
    71  	&fs.RdmaGroup{},
    72  	&fs.NameGroup{GroupName: "misc"},
    73  }
    74  
    75  func genV1ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
    76  	var properties []systemdDbus.Property
    77  
    78  	deviceProperties, err := generateDeviceProperties(r, cm)
    79  	if err != nil {
    80  		return nil, err
    81  	}
    82  	properties = append(properties, deviceProperties...)
    83  
    84  	if r.Memory != 0 {
    85  		properties = append(properties,
    86  			newProp("MemoryLimit", uint64(r.Memory)))
    87  	}
    88  
    89  	if r.CpuShares != 0 {
    90  		properties = append(properties,
    91  			newProp("CPUShares", r.CpuShares))
    92  	}
    93  
    94  	addCpuQuota(cm, &properties, r.CpuQuota, r.CpuPeriod)
    95  
    96  	if r.BlkioWeight != 0 {
    97  		properties = append(properties,
    98  			newProp("BlockIOWeight", uint64(r.BlkioWeight)))
    99  	}
   100  
   101  	if r.PidsLimit > 0 || r.PidsLimit == -1 {
   102  		properties = append(properties,
   103  			newProp("TasksMax", uint64(r.PidsLimit)))
   104  	}
   105  
   106  	err = addCpuset(cm, &properties, r.CpusetCpus, r.CpusetMems)
   107  	if err != nil {
   108  		return nil, err
   109  	}
   110  
   111  	return properties, nil
   112  }
   113  
   114  // initPaths figures out and returns paths to cgroups.
   115  func initPaths(c *configs.Cgroup) (map[string]string, error) {
   116  	slice := "system.slice"
   117  	if c.Parent != "" {
   118  		var err error
   119  		slice, err = ExpandSlice(c.Parent)
   120  		if err != nil {
   121  			return nil, err
   122  		}
   123  	}
   124  
   125  	unit := getUnitName(c)
   126  
   127  	paths := make(map[string]string)
   128  	for _, s := range legacySubsystems {
   129  		subsystemPath, err := getSubsystemPath(slice, unit, s.Name())
   130  		if err != nil {
   131  			// Even if it's `not found` error, we'll return err
   132  			// because devices cgroup is hard requirement for
   133  			// container security.
   134  			if s.Name() == "devices" {
   135  				return nil, err
   136  			}
   137  			// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
   138  			if cgroups.IsNotFound(err) {
   139  				continue
   140  			}
   141  			return nil, err
   142  		}
   143  		paths[s.Name()] = subsystemPath
   144  	}
   145  
   146  	// If systemd is using cgroups-hybrid mode then add the slice path of
   147  	// this container to the paths so the following process executed with
   148  	// "runc exec" joins that cgroup as well.
   149  	if cgroups.IsCgroup2HybridMode() {
   150  		// "" means cgroup-hybrid path
   151  		cgroupsHybridPath, err := getSubsystemPath(slice, unit, "")
   152  		if err != nil && cgroups.IsNotFound(err) {
   153  			return nil, err
   154  		}
   155  		paths[""] = cgroupsHybridPath
   156  	}
   157  
   158  	return paths, nil
   159  }
   160  
   161  func (m *LegacyManager) Apply(pid int) error {
   162  	var (
   163  		c          = m.cgroups
   164  		unitName   = getUnitName(c)
   165  		slice      = "system.slice"
   166  		properties []systemdDbus.Property
   167  	)
   168  
   169  	m.mu.Lock()
   170  	defer m.mu.Unlock()
   171  
   172  	if c.Parent != "" {
   173  		slice = c.Parent
   174  	}
   175  
   176  	properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
   177  
   178  	if strings.HasSuffix(unitName, ".slice") {
   179  		// If we create a slice, the parent is defined via a Wants=.
   180  		properties = append(properties, systemdDbus.PropWants(slice))
   181  	} else {
   182  		// Otherwise it's a scope, which we put into a Slice=.
   183  		properties = append(properties, systemdDbus.PropSlice(slice))
   184  		// Assume scopes always support delegation (supported since systemd v218).
   185  		properties = append(properties, newProp("Delegate", true))
   186  	}
   187  
   188  	// only add pid if its valid, -1 is used w/ general slice creation.
   189  	if pid != -1 {
   190  		properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
   191  	}
   192  
   193  	// Always enable accounting, this gets us the same behaviour as the fs implementation,
   194  	// plus the kernel has some problems with joining the memory cgroup at a later time.
   195  	properties = append(properties,
   196  		newProp("MemoryAccounting", true),
   197  		newProp("CPUAccounting", true),
   198  		newProp("BlockIOAccounting", true),
   199  		newProp("TasksAccounting", true),
   200  	)
   201  
   202  	// Assume DefaultDependencies= will always work (the check for it was previously broken.)
   203  	properties = append(properties,
   204  		newProp("DefaultDependencies", false))
   205  
   206  	properties = append(properties, c.SystemdProps...)
   207  
   208  	if err := startUnit(m.dbus, unitName, properties, pid == -1); err != nil {
   209  		return err
   210  	}
   211  
   212  	if err := m.joinCgroups(pid); err != nil {
   213  		return err
   214  	}
   215  
   216  	return nil
   217  }
   218  
   219  func (m *LegacyManager) Destroy() error {
   220  	m.mu.Lock()
   221  	defer m.mu.Unlock()
   222  
   223  	stopErr := stopUnit(m.dbus, getUnitName(m.cgroups))
   224  
   225  	// Both on success and on error, cleanup all the cgroups
   226  	// we are aware of, as some of them were created directly
   227  	// by Apply() and are not managed by systemd.
   228  	if err := cgroups.RemovePaths(m.paths); err != nil && stopErr == nil {
   229  		return err
   230  	}
   231  
   232  	return stopErr
   233  }
   234  
   235  func (m *LegacyManager) Path(subsys string) string {
   236  	m.mu.Lock()
   237  	defer m.mu.Unlock()
   238  	return m.paths[subsys]
   239  }
   240  
   241  func (m *LegacyManager) joinCgroups(pid int) error {
   242  	for _, sys := range legacySubsystems {
   243  		name := sys.Name()
   244  		switch name {
   245  		case "name=systemd":
   246  			// let systemd handle this
   247  		case "cpuset":
   248  			if path, ok := m.paths[name]; ok {
   249  				s := &fs.CpusetGroup{}
   250  				if err := s.ApplyDir(path, m.cgroups.Resources, pid); err != nil {
   251  					return err
   252  				}
   253  			}
   254  		default:
   255  			if path, ok := m.paths[name]; ok {
   256  				if err := os.MkdirAll(path, 0o755); err != nil {
   257  					return err
   258  				}
   259  				if err := cgroups.WriteCgroupProc(path, pid); err != nil {
   260  					return err
   261  				}
   262  			}
   263  		}
   264  	}
   265  
   266  	return nil
   267  }
   268  
   269  func getSubsystemPath(slice, unit, subsystem string) (string, error) {
   270  	mountpoint, err := cgroups.FindCgroupMountpoint("", subsystem)
   271  	if err != nil {
   272  		return "", err
   273  	}
   274  
   275  	return filepath.Join(mountpoint, slice, unit), nil
   276  }
   277  
   278  func (m *LegacyManager) Freeze(state configs.FreezerState) error {
   279  	err := m.doFreeze(state)
   280  	if err == nil {
   281  		m.cgroups.Resources.Freezer = state
   282  	}
   283  	return err
   284  }
   285  
   286  // doFreeze is the same as Freeze but without
   287  // changing the m.cgroups.Resources.Frozen field.
   288  func (m *LegacyManager) doFreeze(state configs.FreezerState) error {
   289  	path, ok := m.paths["freezer"]
   290  	if !ok {
   291  		return errSubsystemDoesNotExist
   292  	}
   293  	freezer := &fs.FreezerGroup{}
   294  	resources := &configs.Resources{Freezer: state}
   295  	return freezer.Set(path, resources)
   296  }
   297  
   298  func (m *LegacyManager) GetPids() ([]int, error) {
   299  	path, ok := m.paths["devices"]
   300  	if !ok {
   301  		return nil, errSubsystemDoesNotExist
   302  	}
   303  	return cgroups.GetPids(path)
   304  }
   305  
   306  func (m *LegacyManager) GetAllPids() ([]int, error) {
   307  	path, ok := m.paths["devices"]
   308  	if !ok {
   309  		return nil, errSubsystemDoesNotExist
   310  	}
   311  	return cgroups.GetAllPids(path)
   312  }
   313  
   314  func (m *LegacyManager) GetStats() (*cgroups.Stats, error) {
   315  	m.mu.Lock()
   316  	defer m.mu.Unlock()
   317  	stats := cgroups.NewStats()
   318  	for _, sys := range legacySubsystems {
   319  		path := m.paths[sys.Name()]
   320  		if path == "" {
   321  			continue
   322  		}
   323  		if err := sys.GetStats(path, stats); err != nil {
   324  			return nil, err
   325  		}
   326  	}
   327  
   328  	return stats, nil
   329  }
   330  
   331  func (m *LegacyManager) Set(r *configs.Resources) error {
   332  	if r == nil {
   333  		return nil
   334  	}
   335  	if r.Unified != nil {
   336  		return cgroups.ErrV1NoUnified
   337  	}
   338  	properties, err := genV1ResourcesProperties(r, m.dbus)
   339  	if err != nil {
   340  		return err
   341  	}
   342  
   343  	unitName := getUnitName(m.cgroups)
   344  	needsFreeze, needsThaw, err := m.freezeBeforeSet(unitName, r)
   345  	if err != nil {
   346  		return err
   347  	}
   348  
   349  	if needsFreeze {
   350  		if err := m.doFreeze(configs.Frozen); err != nil {
   351  			// If freezer cgroup isn't supported, we just warn about it.
   352  			logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
   353  			// skip update the cgroup while frozen failed. #3803
   354  			if !errors.Is(err, errSubsystemDoesNotExist) {
   355  				if needsThaw {
   356  					if thawErr := m.doFreeze(configs.Thawed); thawErr != nil {
   357  						logrus.Infof("thaw container after doFreeze failed: %v", thawErr)
   358  					}
   359  				}
   360  				return err
   361  			}
   362  		}
   363  	}
   364  	setErr := setUnitProperties(m.dbus, unitName, properties...)
   365  	if needsThaw {
   366  		if err := m.doFreeze(configs.Thawed); err != nil {
   367  			logrus.Infof("thaw container after SetUnitProperties failed: %v", err)
   368  		}
   369  	}
   370  	if setErr != nil {
   371  		return setErr
   372  	}
   373  
   374  	for _, sys := range legacySubsystems {
   375  		// Get the subsystem path, but don't error out for not found cgroups.
   376  		path, ok := m.paths[sys.Name()]
   377  		if !ok {
   378  			continue
   379  		}
   380  		if err := sys.Set(path, r); err != nil {
   381  			return err
   382  		}
   383  	}
   384  
   385  	return nil
   386  }
   387  
   388  func (m *LegacyManager) GetPaths() map[string]string {
   389  	m.mu.Lock()
   390  	defer m.mu.Unlock()
   391  	return m.paths
   392  }
   393  
   394  func (m *LegacyManager) GetCgroups() (*configs.Cgroup, error) {
   395  	return m.cgroups, nil
   396  }
   397  
   398  func (m *LegacyManager) GetFreezerState() (configs.FreezerState, error) {
   399  	path, ok := m.paths["freezer"]
   400  	if !ok {
   401  		return configs.Undefined, nil
   402  	}
   403  	freezer := &fs.FreezerGroup{}
   404  	return freezer.GetState(path)
   405  }
   406  
   407  func (m *LegacyManager) Exists() bool {
   408  	return cgroups.PathExists(m.Path("devices"))
   409  }
   410  
   411  func (m *LegacyManager) OOMKillCount() (uint64, error) {
   412  	return fs.OOMKillCount(m.Path("memory"))
   413  }
   414  
   415  func (m *LegacyManager) GetEffectiveCPUs() string {
   416  	return fs.GetEffectiveCPUs(m.Path("cpuset"), m.cgroups)
   417  }