github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/libcontainer/cgroups/systemd/v1.go (about) 1 package systemd 2 3 import ( 4 "errors" 5 "os" 6 "path/filepath" 7 "strings" 8 "sync" 9 10 systemdDbus "github.com/coreos/go-systemd/v22/dbus" 11 "github.com/sirupsen/logrus" 12 13 "github.com/opencontainers/runc/libcontainer/cgroups" 14 "github.com/opencontainers/runc/libcontainer/cgroups/fs" 15 "github.com/opencontainers/runc/libcontainer/configs" 16 ) 17 18 type LegacyManager struct { 19 mu sync.Mutex 20 cgroups *configs.Cgroup 21 paths map[string]string 22 dbus *dbusConnManager 23 } 24 25 func NewLegacyManager(cg *configs.Cgroup, paths map[string]string) (*LegacyManager, error) { 26 if cg.Rootless { 27 return nil, errors.New("cannot use rootless systemd cgroups manager on cgroup v1") 28 } 29 if cg.Resources != nil && cg.Resources.Unified != nil { 30 return nil, cgroups.ErrV1NoUnified 31 } 32 if paths == nil { 33 var err error 34 paths, err = initPaths(cg) 35 if err != nil { 36 return nil, err 37 } 38 } 39 return &LegacyManager{ 40 cgroups: cg, 41 paths: paths, 42 dbus: newDbusConnManager(false), 43 }, nil 44 } 45 46 type subsystem interface { 47 // Name returns the name of the subsystem. 48 Name() string 49 // GetStats returns the stats, as 'stats', corresponding to the cgroup under 'path'. 50 GetStats(path string, stats *cgroups.Stats) error 51 // Set sets cgroup resource limits. 52 Set(path string, r *configs.Resources) error 53 } 54 55 var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist") 56 57 var legacySubsystems = []subsystem{ 58 &fs.CpusetGroup{}, 59 &fs.DevicesGroup{}, 60 &fs.MemoryGroup{}, 61 &fs.CpuGroup{}, 62 &fs.CpuacctGroup{}, 63 &fs.PidsGroup{}, 64 &fs.BlkioGroup{}, 65 &fs.HugetlbGroup{}, 66 &fs.PerfEventGroup{}, 67 &fs.FreezerGroup{}, 68 &fs.NetPrioGroup{}, 69 &fs.NetClsGroup{}, 70 &fs.NameGroup{GroupName: "name=systemd"}, 71 &fs.RdmaGroup{}, 72 &fs.NameGroup{GroupName: "misc"}, 73 } 74 75 func genV1ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) { 76 var properties []systemdDbus.Property 77 78 deviceProperties, err := generateDeviceProperties(r, cm) 79 if err != nil { 80 return nil, err 81 } 82 properties = append(properties, deviceProperties...) 83 84 if r.Memory != 0 { 85 properties = append(properties, 86 newProp("MemoryLimit", uint64(r.Memory))) 87 } 88 89 if r.CpuShares != 0 { 90 properties = append(properties, 91 newProp("CPUShares", r.CpuShares)) 92 } 93 94 addCpuQuota(cm, &properties, r.CpuQuota, r.CpuPeriod) 95 96 if r.BlkioWeight != 0 { 97 properties = append(properties, 98 newProp("BlockIOWeight", uint64(r.BlkioWeight))) 99 } 100 101 if r.PidsLimit > 0 || r.PidsLimit == -1 { 102 properties = append(properties, 103 newProp("TasksMax", uint64(r.PidsLimit))) 104 } 105 106 err = addCpuset(cm, &properties, r.CpusetCpus, r.CpusetMems) 107 if err != nil { 108 return nil, err 109 } 110 111 return properties, nil 112 } 113 114 // initPaths figures out and returns paths to cgroups. 115 func initPaths(c *configs.Cgroup) (map[string]string, error) { 116 slice := "system.slice" 117 if c.Parent != "" { 118 var err error 119 slice, err = ExpandSlice(c.Parent) 120 if err != nil { 121 return nil, err 122 } 123 } 124 125 unit := getUnitName(c) 126 127 paths := make(map[string]string) 128 for _, s := range legacySubsystems { 129 subsystemPath, err := getSubsystemPath(slice, unit, s.Name()) 130 if err != nil { 131 // Even if it's `not found` error, we'll return err 132 // because devices cgroup is hard requirement for 133 // container security. 134 if s.Name() == "devices" { 135 return nil, err 136 } 137 // Don't fail if a cgroup hierarchy was not found, just skip this subsystem 138 if cgroups.IsNotFound(err) { 139 continue 140 } 141 return nil, err 142 } 143 paths[s.Name()] = subsystemPath 144 } 145 146 // If systemd is using cgroups-hybrid mode then add the slice path of 147 // this container to the paths so the following process executed with 148 // "runc exec" joins that cgroup as well. 149 if cgroups.IsCgroup2HybridMode() { 150 // "" means cgroup-hybrid path 151 cgroupsHybridPath, err := getSubsystemPath(slice, unit, "") 152 if err != nil && cgroups.IsNotFound(err) { 153 return nil, err 154 } 155 paths[""] = cgroupsHybridPath 156 } 157 158 return paths, nil 159 } 160 161 func (m *LegacyManager) Apply(pid int) error { 162 var ( 163 c = m.cgroups 164 unitName = getUnitName(c) 165 slice = "system.slice" 166 properties []systemdDbus.Property 167 ) 168 169 m.mu.Lock() 170 defer m.mu.Unlock() 171 172 if c.Parent != "" { 173 slice = c.Parent 174 } 175 176 properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name)) 177 178 if strings.HasSuffix(unitName, ".slice") { 179 // If we create a slice, the parent is defined via a Wants=. 180 properties = append(properties, systemdDbus.PropWants(slice)) 181 } else { 182 // Otherwise it's a scope, which we put into a Slice=. 183 properties = append(properties, systemdDbus.PropSlice(slice)) 184 // Assume scopes always support delegation (supported since systemd v218). 185 properties = append(properties, newProp("Delegate", true)) 186 } 187 188 // only add pid if its valid, -1 is used w/ general slice creation. 189 if pid != -1 { 190 properties = append(properties, newProp("PIDs", []uint32{uint32(pid)})) 191 } 192 193 // Always enable accounting, this gets us the same behaviour as the fs implementation, 194 // plus the kernel has some problems with joining the memory cgroup at a later time. 195 properties = append(properties, 196 newProp("MemoryAccounting", true), 197 newProp("CPUAccounting", true), 198 newProp("BlockIOAccounting", true), 199 newProp("TasksAccounting", true), 200 ) 201 202 // Assume DefaultDependencies= will always work (the check for it was previously broken.) 203 properties = append(properties, 204 newProp("DefaultDependencies", false)) 205 206 properties = append(properties, c.SystemdProps...) 207 208 if err := startUnit(m.dbus, unitName, properties, pid == -1); err != nil { 209 return err 210 } 211 212 if err := m.joinCgroups(pid); err != nil { 213 return err 214 } 215 216 return nil 217 } 218 219 func (m *LegacyManager) Destroy() error { 220 m.mu.Lock() 221 defer m.mu.Unlock() 222 223 stopErr := stopUnit(m.dbus, getUnitName(m.cgroups)) 224 225 // Both on success and on error, cleanup all the cgroups 226 // we are aware of, as some of them were created directly 227 // by Apply() and are not managed by systemd. 228 if err := cgroups.RemovePaths(m.paths); err != nil && stopErr == nil { 229 return err 230 } 231 232 return stopErr 233 } 234 235 func (m *LegacyManager) Path(subsys string) string { 236 m.mu.Lock() 237 defer m.mu.Unlock() 238 return m.paths[subsys] 239 } 240 241 func (m *LegacyManager) joinCgroups(pid int) error { 242 for _, sys := range legacySubsystems { 243 name := sys.Name() 244 switch name { 245 case "name=systemd": 246 // let systemd handle this 247 case "cpuset": 248 if path, ok := m.paths[name]; ok { 249 s := &fs.CpusetGroup{} 250 if err := s.ApplyDir(path, m.cgroups.Resources, pid); err != nil { 251 return err 252 } 253 } 254 default: 255 if path, ok := m.paths[name]; ok { 256 if err := os.MkdirAll(path, 0o755); err != nil { 257 return err 258 } 259 if err := cgroups.WriteCgroupProc(path, pid); err != nil { 260 return err 261 } 262 } 263 } 264 } 265 266 return nil 267 } 268 269 func getSubsystemPath(slice, unit, subsystem string) (string, error) { 270 mountpoint, err := cgroups.FindCgroupMountpoint("", subsystem) 271 if err != nil { 272 return "", err 273 } 274 275 return filepath.Join(mountpoint, slice, unit), nil 276 } 277 278 func (m *LegacyManager) Freeze(state configs.FreezerState) error { 279 err := m.doFreeze(state) 280 if err == nil { 281 m.cgroups.Resources.Freezer = state 282 } 283 return err 284 } 285 286 // doFreeze is the same as Freeze but without 287 // changing the m.cgroups.Resources.Frozen field. 288 func (m *LegacyManager) doFreeze(state configs.FreezerState) error { 289 path, ok := m.paths["freezer"] 290 if !ok { 291 return errSubsystemDoesNotExist 292 } 293 freezer := &fs.FreezerGroup{} 294 resources := &configs.Resources{Freezer: state} 295 return freezer.Set(path, resources) 296 } 297 298 func (m *LegacyManager) GetPids() ([]int, error) { 299 path, ok := m.paths["devices"] 300 if !ok { 301 return nil, errSubsystemDoesNotExist 302 } 303 return cgroups.GetPids(path) 304 } 305 306 func (m *LegacyManager) GetAllPids() ([]int, error) { 307 path, ok := m.paths["devices"] 308 if !ok { 309 return nil, errSubsystemDoesNotExist 310 } 311 return cgroups.GetAllPids(path) 312 } 313 314 func (m *LegacyManager) GetStats() (*cgroups.Stats, error) { 315 m.mu.Lock() 316 defer m.mu.Unlock() 317 stats := cgroups.NewStats() 318 for _, sys := range legacySubsystems { 319 path := m.paths[sys.Name()] 320 if path == "" { 321 continue 322 } 323 if err := sys.GetStats(path, stats); err != nil { 324 return nil, err 325 } 326 } 327 328 return stats, nil 329 } 330 331 func (m *LegacyManager) Set(r *configs.Resources) error { 332 if r == nil { 333 return nil 334 } 335 if r.Unified != nil { 336 return cgroups.ErrV1NoUnified 337 } 338 properties, err := genV1ResourcesProperties(r, m.dbus) 339 if err != nil { 340 return err 341 } 342 343 unitName := getUnitName(m.cgroups) 344 needsFreeze, needsThaw, err := m.freezeBeforeSet(unitName, r) 345 if err != nil { 346 return err 347 } 348 349 if needsFreeze { 350 if err := m.doFreeze(configs.Frozen); err != nil { 351 // If freezer cgroup isn't supported, we just warn about it. 352 logrus.Infof("freeze container before SetUnitProperties failed: %v", err) 353 // skip update the cgroup while frozen failed. #3803 354 if !errors.Is(err, errSubsystemDoesNotExist) { 355 if needsThaw { 356 if thawErr := m.doFreeze(configs.Thawed); thawErr != nil { 357 logrus.Infof("thaw container after doFreeze failed: %v", thawErr) 358 } 359 } 360 return err 361 } 362 } 363 } 364 setErr := setUnitProperties(m.dbus, unitName, properties...) 365 if needsThaw { 366 if err := m.doFreeze(configs.Thawed); err != nil { 367 logrus.Infof("thaw container after SetUnitProperties failed: %v", err) 368 } 369 } 370 if setErr != nil { 371 return setErr 372 } 373 374 for _, sys := range legacySubsystems { 375 // Get the subsystem path, but don't error out for not found cgroups. 376 path, ok := m.paths[sys.Name()] 377 if !ok { 378 continue 379 } 380 if err := sys.Set(path, r); err != nil { 381 return err 382 } 383 } 384 385 return nil 386 } 387 388 func (m *LegacyManager) GetPaths() map[string]string { 389 m.mu.Lock() 390 defer m.mu.Unlock() 391 return m.paths 392 } 393 394 func (m *LegacyManager) GetCgroups() (*configs.Cgroup, error) { 395 return m.cgroups, nil 396 } 397 398 func (m *LegacyManager) GetFreezerState() (configs.FreezerState, error) { 399 path, ok := m.paths["freezer"] 400 if !ok { 401 return configs.Undefined, nil 402 } 403 freezer := &fs.FreezerGroup{} 404 return freezer.GetState(path) 405 } 406 407 func (m *LegacyManager) Exists() bool { 408 return cgroups.PathExists(m.Path("devices")) 409 } 410 411 func (m *LegacyManager) OOMKillCount() (uint64, error) { 412 return fs.OOMKillCount(m.Path("memory")) 413 } 414 415 func (m *LegacyManager) GetEffectiveCPUs() string { 416 return fs.GetEffectiveCPUs(m.Path("cpuset"), m.cgroups) 417 }