github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/libcontainer/cgroups/fs/fs.go (about) 1 package fs 2 3 import ( 4 "errors" 5 "fmt" 6 "os" 7 "path/filepath" 8 "strings" 9 "sync" 10 11 "golang.org/x/sys/unix" 12 13 "github.com/opencontainers/runc/libcontainer/cgroups" 14 "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" 15 "github.com/opencontainers/runc/libcontainer/configs" 16 ) 17 18 var subsystems = []subsystem{ 19 &CpusetGroup{}, 20 &DevicesGroup{}, 21 &MemoryGroup{}, 22 &CpuGroup{}, 23 &CpuacctGroup{}, 24 &PidsGroup{}, 25 &BlkioGroup{}, 26 &HugetlbGroup{}, 27 &NetClsGroup{}, 28 &NetPrioGroup{}, 29 &PerfEventGroup{}, 30 &FreezerGroup{}, 31 &RdmaGroup{}, 32 &NameGroup{GroupName: "name=systemd", Join: true}, 33 &NameGroup{GroupName: "misc", Join: true}, 34 } 35 36 var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist") 37 38 func init() { 39 // If using cgroups-hybrid mode then add a "" controller indicating 40 // it should join the cgroups v2. 41 if cgroups.IsCgroup2HybridMode() { 42 subsystems = append(subsystems, &NameGroup{GroupName: "", Join: true}) 43 } 44 } 45 46 type subsystem interface { 47 // Name returns the name of the subsystem. 48 Name() string 49 // GetStats fills in the stats for the subsystem. 50 GetStats(path string, stats *cgroups.Stats) error 51 // Apply creates and joins a cgroup, adding pid into it. Some 52 // subsystems use resources to pre-configure the cgroup parents 53 // before creating or joining it. 54 Apply(path string, r *configs.Resources, pid int) error 55 // Set sets the cgroup resources. 56 Set(path string, r *configs.Resources) error 57 } 58 59 type Manager struct { 60 mu sync.Mutex 61 cgroups *configs.Cgroup 62 paths map[string]string 63 } 64 65 func NewManager(cg *configs.Cgroup, paths map[string]string) (*Manager, error) { 66 // Some v1 controllers (cpu, cpuset, and devices) expect 67 // cgroups.Resources to not be nil in Apply. 68 if cg.Resources == nil { 69 return nil, errors.New("cgroup v1 manager needs configs.Resources to be set during manager creation") 70 } 71 if cg.Resources.Unified != nil { 72 return nil, cgroups.ErrV1NoUnified 73 } 74 75 if paths == nil { 76 var err error 77 paths, err = initPaths(cg) 78 if err != nil { 79 return nil, err 80 } 81 } 82 83 return &Manager{ 84 cgroups: cg, 85 paths: paths, 86 }, nil 87 } 88 89 // isIgnorableError returns whether err is a permission error (in the loose 90 // sense of the word). This includes EROFS (which for an unprivileged user is 91 // basically a permission error) and EACCES (for similar reasons) as well as 92 // the normal EPERM. 93 func isIgnorableError(rootless bool, err error) bool { 94 // We do not ignore errors if we are root. 95 if !rootless { 96 return false 97 } 98 // Is it an ordinary EPERM? 99 if errors.Is(err, os.ErrPermission) { 100 return true 101 } 102 // Handle some specific syscall errors. 103 var errno unix.Errno 104 if errors.As(err, &errno) { 105 return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES 106 } 107 return false 108 } 109 110 func (m *Manager) Apply(pid int) (err error) { 111 m.mu.Lock() 112 defer m.mu.Unlock() 113 114 c := m.cgroups 115 116 for _, sys := range subsystems { 117 name := sys.Name() 118 p, ok := m.paths[name] 119 if !ok { 120 continue 121 } 122 123 if err := sys.Apply(p, c.Resources, pid); err != nil { 124 // In the case of rootless (including euid=0 in userns), where an 125 // explicit cgroup path hasn't been set, we don't bail on error in 126 // case of permission problems here, but do delete the path from 127 // the m.paths map, since it is either non-existent and could not 128 // be created, or the pid could not be added to it. 129 // 130 // Cases where limits for the subsystem have been set are handled 131 // later by Set, which fails with a friendly error (see 132 // if path == "" in Set). 133 if isIgnorableError(c.Rootless, err) && c.Path == "" { 134 delete(m.paths, name) 135 continue 136 } 137 return err 138 } 139 140 } 141 return nil 142 } 143 144 func (m *Manager) Destroy() error { 145 m.mu.Lock() 146 defer m.mu.Unlock() 147 return cgroups.RemovePaths(m.paths) 148 } 149 150 func (m *Manager) Path(subsys string) string { 151 m.mu.Lock() 152 defer m.mu.Unlock() 153 return m.paths[subsys] 154 } 155 156 func (m *Manager) GetStats() (*cgroups.Stats, error) { 157 m.mu.Lock() 158 defer m.mu.Unlock() 159 stats := cgroups.NewStats() 160 for _, sys := range subsystems { 161 path := m.paths[sys.Name()] 162 if path == "" { 163 continue 164 } 165 if err := sys.GetStats(path, stats); err != nil { 166 return nil, err 167 } 168 } 169 return stats, nil 170 } 171 172 func (m *Manager) Set(r *configs.Resources) error { 173 if r == nil { 174 return nil 175 } 176 177 if r.Unified != nil { 178 return cgroups.ErrV1NoUnified 179 } 180 181 m.mu.Lock() 182 defer m.mu.Unlock() 183 for _, sys := range subsystems { 184 path := m.paths[sys.Name()] 185 if err := sys.Set(path, r); err != nil { 186 // When rootless is true, errors from the device subsystem 187 // are ignored, as it is really not expected to work. 188 if m.cgroups.Rootless && sys.Name() == "devices" && !errors.Is(err, cgroups.ErrDevicesUnsupported) { 189 continue 190 } 191 // However, errors from other subsystems are not ignored. 192 // see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error" 193 if path == "" { 194 // We never created a path for this cgroup, so we cannot set 195 // limits for it (though we have already tried at this point). 196 return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name()) 197 } 198 return err 199 } 200 } 201 202 return nil 203 } 204 205 // Freeze toggles the container's freezer cgroup depending on the state 206 // provided 207 func (m *Manager) Freeze(state configs.FreezerState) error { 208 path := m.Path("freezer") 209 if path == "" { 210 return errors.New("cannot toggle freezer: cgroups not configured for container") 211 } 212 213 prevState := m.cgroups.Resources.Freezer 214 m.cgroups.Resources.Freezer = state 215 freezer := &FreezerGroup{} 216 if err := freezer.Set(path, m.cgroups.Resources); err != nil { 217 m.cgroups.Resources.Freezer = prevState 218 return err 219 } 220 return nil 221 } 222 223 func (m *Manager) GetPids() ([]int, error) { 224 return cgroups.GetPids(m.Path("devices")) 225 } 226 227 func (m *Manager) GetAllPids() ([]int, error) { 228 return cgroups.GetAllPids(m.Path("devices")) 229 } 230 231 func (m *Manager) GetPaths() map[string]string { 232 m.mu.Lock() 233 defer m.mu.Unlock() 234 return m.paths 235 } 236 237 func (m *Manager) GetCgroups() (*configs.Cgroup, error) { 238 return m.cgroups, nil 239 } 240 241 func (m *Manager) GetFreezerState() (configs.FreezerState, error) { 242 dir := m.Path("freezer") 243 // If the container doesn't have the freezer cgroup, say it's undefined. 244 if dir == "" { 245 return configs.Undefined, nil 246 } 247 freezer := &FreezerGroup{} 248 return freezer.GetState(dir) 249 } 250 251 func (m *Manager) Exists() bool { 252 return cgroups.PathExists(m.Path("devices")) 253 } 254 255 func OOMKillCount(path string) (uint64, error) { 256 return fscommon.GetValueByKey(path, "memory.oom_control", "oom_kill") 257 } 258 259 func (m *Manager) OOMKillCount() (uint64, error) { 260 c, err := OOMKillCount(m.Path("memory")) 261 // Ignore ENOENT when rootless as it couldn't create cgroup. 262 if err != nil && m.cgroups.Rootless && os.IsNotExist(err) { 263 err = nil 264 } 265 266 return c, err 267 } 268 269 func (m *Manager) GetEffectiveCPUs() string { 270 return GetEffectiveCPUs(m.Path("cpuset"), m.cgroups) 271 } 272 273 func GetEffectiveCPUs(cpusetPath string, cgroups *configs.Cgroup) string { 274 // Fast path. 275 if cgroups.CpusetCpus != "" { 276 return cgroups.CpusetCpus 277 } else if !strings.HasPrefix(cpusetPath, defaultCgroupRoot) { 278 return "" 279 } 280 281 // Iterates until it goes to the cgroup root path. 282 // It's required for containers in which cpuset controller 283 // is not enabled, in this case a parent cgroup is used. 284 for path := cpusetPath; path != defaultCgroupRoot; path = filepath.Dir(path) { 285 cpus, err := fscommon.GetCgroupParamString(path, "cpuset.effective_cpus") 286 if err == nil { 287 return cpus 288 } 289 } 290 291 return "" 292 }