github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/kernel/cgroup.go (about) 1 // Copyright 2021 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package kernel 16 17 import ( 18 "bytes" 19 "fmt" 20 "sort" 21 "sync/atomic" 22 23 "github.com/SagerNet/gvisor/pkg/context" 24 "github.com/SagerNet/gvisor/pkg/sentry/fsimpl/kernfs" 25 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 26 "github.com/SagerNet/gvisor/pkg/sync" 27 ) 28 29 // InvalidCgroupHierarchyID indicates an uninitialized hierarchy ID. 30 const InvalidCgroupHierarchyID uint32 = 0 31 32 // CgroupControllerType is the name of a cgroup controller. 33 type CgroupControllerType string 34 35 // CgroupController is the common interface to cgroup controllers available to 36 // the entire sentry. The controllers themselves are defined by cgroupfs. 37 // 38 // Callers of this interface are often unable access synchronization needed to 39 // ensure returned values remain valid. Some of values returned from this 40 // interface are thus snapshots in time, and may become stale. This is ok for 41 // many callers like procfs. 42 type CgroupController interface { 43 // Returns the type of this cgroup controller (ex "memory", "cpu"). Returned 44 // value is valid for the lifetime of the controller. 45 Type() CgroupControllerType 46 47 // Hierarchy returns the ID of the hierarchy this cgroup controller is 48 // attached to. Returned value is valid for the lifetime of the controller. 49 HierarchyID() uint32 50 51 // RootCgroup returns the root cgroup for this controller. Returned value is 52 // valid for the lifetime of the controller. 53 RootCgroup() Cgroup 54 55 // NumCgroups returns the number of cgroups managed by this controller. 56 // Returned value is a snapshot in time. 57 NumCgroups() uint64 58 59 // Enabled returns whether this controller is enabled. Returned value is a 60 // snapshot in time. 61 Enabled() bool 62 } 63 64 // Cgroup represents a named pointer to a cgroup in cgroupfs. When a task enters 65 // a cgroup, it holds a reference on the underlying dentry pointing to the 66 // cgroup. 67 // 68 // +stateify savable 69 type Cgroup struct { 70 *kernfs.Dentry 71 CgroupImpl 72 } 73 74 func (c *Cgroup) decRef() { 75 c.Dentry.DecRef(context.Background()) 76 } 77 78 // Path returns the absolute path of c, relative to its hierarchy root. 79 func (c *Cgroup) Path() string { 80 return c.FSLocalPath() 81 } 82 83 // HierarchyID returns the id of the hierarchy that contains this cgroup. 84 func (c *Cgroup) HierarchyID() uint32 { 85 // Note: a cgroup is guaranteed to have at least one controller. 86 return c.Controllers()[0].HierarchyID() 87 } 88 89 // CgroupImpl is the common interface to cgroups. 90 type CgroupImpl interface { 91 Controllers() []CgroupController 92 Enter(t *Task) 93 Leave(t *Task) 94 } 95 96 // hierarchy represents a cgroupfs filesystem instance, with a unique set of 97 // controllers attached to it. Multiple cgroupfs mounts may reference the same 98 // hierarchy. 99 // 100 // +stateify savable 101 type hierarchy struct { 102 id uint32 103 // These are a subset of the controllers in CgroupRegistry.controllers, 104 // grouped here by hierarchy for conveninent lookup. 105 controllers map[CgroupControllerType]CgroupController 106 // fs is not owned by hierarchy. The FS is responsible for unregistering the 107 // hierarchy on destruction, which removes this association. 108 fs *vfs.Filesystem 109 } 110 111 func (h *hierarchy) match(ctypes []CgroupControllerType) bool { 112 if len(ctypes) != len(h.controllers) { 113 return false 114 } 115 for _, ty := range ctypes { 116 if _, ok := h.controllers[ty]; !ok { 117 return false 118 } 119 } 120 return true 121 } 122 123 // cgroupFS is the public interface to cgroupfs. This lets the kernel package 124 // refer to cgroupfs.filesystem methods without directly depending on the 125 // cgroupfs package, which would lead to a circular dependency. 126 type cgroupFS interface { 127 // Returns the vfs.Filesystem for the cgroupfs. 128 VFSFilesystem() *vfs.Filesystem 129 130 // InitializeHierarchyID sets the hierarchy ID for this filesystem during 131 // filesystem creation. May only be called before the filesystem is visible 132 // to the vfs layer. 133 InitializeHierarchyID(hid uint32) 134 } 135 136 // CgroupRegistry tracks the active set of cgroup controllers on the system. 137 // 138 // +stateify savable 139 type CgroupRegistry struct { 140 // lastHierarchyID is the id of the last allocated cgroup hierarchy. Valid 141 // ids are from 1 to math.MaxUint32. Must be accessed through atomic ops. 142 // 143 lastHierarchyID uint32 144 145 mu sync.Mutex `state:"nosave"` 146 147 // controllers is the set of currently known cgroup controllers on the 148 // system. Protected by mu. 149 // 150 // +checklocks:mu 151 controllers map[CgroupControllerType]CgroupController 152 153 // hierarchies is the active set of cgroup hierarchies. Protected by mu. 154 // 155 // +checklocks:mu 156 hierarchies map[uint32]hierarchy 157 } 158 159 func newCgroupRegistry() *CgroupRegistry { 160 return &CgroupRegistry{ 161 controllers: make(map[CgroupControllerType]CgroupController), 162 hierarchies: make(map[uint32]hierarchy), 163 } 164 } 165 166 // nextHierarchyID returns a newly allocated, unique hierarchy ID. 167 func (r *CgroupRegistry) nextHierarchyID() (uint32, error) { 168 if hid := atomic.AddUint32(&r.lastHierarchyID, 1); hid != 0 { 169 return hid, nil 170 } 171 return InvalidCgroupHierarchyID, fmt.Errorf("cgroup hierarchy ID overflow") 172 } 173 174 // FindHierarchy returns a cgroup filesystem containing exactly the set of 175 // controllers named in names. If no such FS is found, FindHierarchy return 176 // nil. FindHierarchy takes a reference on the returned FS, which is transferred 177 // to the caller. 178 func (r *CgroupRegistry) FindHierarchy(ctypes []CgroupControllerType) *vfs.Filesystem { 179 r.mu.Lock() 180 defer r.mu.Unlock() 181 182 for _, h := range r.hierarchies { 183 if h.match(ctypes) { 184 if !h.fs.TryIncRef() { 185 // Racing with filesystem destruction, namely h.fs.Release. 186 // Since we hold r.mu, we know the hierarchy hasn't been 187 // unregistered yet, but its associated filesystem is tearing 188 // down. 189 // 190 // If we simply indicate the hierarchy wasn't found without 191 // cleaning up the registry, the caller can race with the 192 // unregister and find itself temporarily unable to create a new 193 // hierarchy with a subset of the relevant controllers. 194 // 195 // To keep the result of FindHierarchy consistent with the 196 // uniqueness of controllers enforced by Register, drop the 197 // dying hierarchy now. The eventual unregister by the FS 198 // teardown will become a no-op. 199 return nil 200 } 201 return h.fs 202 } 203 } 204 205 return nil 206 } 207 208 // Register registers the provided set of controllers with the registry as a new 209 // hierarchy. If any controller is already registered, the function returns an 210 // error without modifying the registry. Register sets the hierarchy ID for the 211 // filesystem on success. 212 func (r *CgroupRegistry) Register(cs []CgroupController, fs cgroupFS) error { 213 r.mu.Lock() 214 defer r.mu.Unlock() 215 216 if len(cs) == 0 { 217 return fmt.Errorf("can't register hierarchy with no controllers") 218 } 219 220 for _, c := range cs { 221 if _, ok := r.controllers[c.Type()]; ok { 222 return fmt.Errorf("controllers may only be mounted on a single hierarchy") 223 } 224 } 225 226 hid, err := r.nextHierarchyID() 227 if err != nil { 228 return err 229 } 230 231 // Must not fail below here, once we publish the hierarchy ID. 232 233 fs.InitializeHierarchyID(hid) 234 235 h := hierarchy{ 236 id: hid, 237 controllers: make(map[CgroupControllerType]CgroupController), 238 fs: fs.VFSFilesystem(), 239 } 240 for _, c := range cs { 241 n := c.Type() 242 r.controllers[n] = c 243 h.controllers[n] = c 244 } 245 r.hierarchies[hid] = h 246 return nil 247 } 248 249 // Unregister removes a previously registered hierarchy from the registry. If no 250 // such hierarchy is registered, Unregister is a no-op. 251 func (r *CgroupRegistry) Unregister(hid uint32) { 252 r.mu.Lock() 253 r.unregisterLocked(hid) 254 r.mu.Unlock() 255 } 256 257 // Precondition: Caller must hold r.mu. 258 // +checklocks:r.mu 259 func (r *CgroupRegistry) unregisterLocked(hid uint32) { 260 if h, ok := r.hierarchies[hid]; ok { 261 for name, _ := range h.controllers { 262 delete(r.controllers, name) 263 } 264 delete(r.hierarchies, hid) 265 } 266 } 267 268 // computeInitialGroups takes a reference on each of the returned cgroups. The 269 // caller takes ownership of this returned reference. 270 func (r *CgroupRegistry) computeInitialGroups(inherit map[Cgroup]struct{}) map[Cgroup]struct{} { 271 r.mu.Lock() 272 defer r.mu.Unlock() 273 274 ctlSet := make(map[CgroupControllerType]CgroupController) 275 cgset := make(map[Cgroup]struct{}) 276 277 // Remember controllers from the inherited cgroups set... 278 for cg, _ := range inherit { 279 cg.IncRef() // Ref transferred to caller. 280 for _, ctl := range cg.Controllers() { 281 ctlSet[ctl.Type()] = ctl 282 cgset[cg] = struct{}{} 283 } 284 } 285 286 // ... and add the root cgroups of all the missing controllers. 287 for name, ctl := range r.controllers { 288 if _, ok := ctlSet[name]; !ok { 289 cg := ctl.RootCgroup() 290 // Multiple controllers may share the same hierarchy, so may have 291 // the same root cgroup. Grab a single ref per hierarchy root. 292 if _, ok := cgset[cg]; ok { 293 continue 294 } 295 cg.IncRef() // Ref transferred to caller. 296 cgset[cg] = struct{}{} 297 } 298 } 299 return cgset 300 } 301 302 // GenerateProcCgroups writes the contents of /proc/cgroups to buf. 303 func (r *CgroupRegistry) GenerateProcCgroups(buf *bytes.Buffer) { 304 r.mu.Lock() 305 entries := make([]string, 0, len(r.controllers)) 306 for _, c := range r.controllers { 307 en := 0 308 if c.Enabled() { 309 en = 1 310 } 311 entries = append(entries, fmt.Sprintf("%s\t%d\t%d\t%d\n", c.Type(), c.HierarchyID(), c.NumCgroups(), en)) 312 } 313 r.mu.Unlock() 314 315 sort.Strings(entries) 316 fmt.Fprint(buf, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n") 317 for _, e := range entries { 318 fmt.Fprint(buf, e) 319 } 320 }