github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/kernel/task_cgroup.go (about) 1 // Copyright 2021 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package kernel 16 17 import ( 18 "bytes" 19 "fmt" 20 "sort" 21 "strings" 22 23 "github.com/metacubex/gvisor/pkg/errors/linuxerr" 24 "github.com/metacubex/gvisor/pkg/log" 25 ) 26 27 // EnterInitialCgroups moves t into an initial set of cgroups. 28 // If initCgroups is not nil, the new task will be placed in the specified cgroups. 29 // Otherwise, if parent is not nil, the new task will be placed in the parent's cgroups. 30 // If neither is specified, the new task will be in the root cgroups. 31 // 32 // This is analogous to Linux's kernel/cgroup/cgroup.c:cgroup_css_set_fork(). 33 // 34 // Precondition: t isn't in any cgroups yet, t.cgroups is empty. 35 func (t *Task) EnterInitialCgroups(parent *Task, initCgroups map[Cgroup]struct{}) { 36 var inherit map[Cgroup]struct{} 37 if initCgroups != nil { 38 inherit = initCgroups 39 } else if parent != nil { 40 parent.mu.Lock() 41 defer parent.mu.Unlock() 42 inherit = parent.cgroups 43 } 44 joinSet := t.k.cgroupRegistry.computeInitialGroups(inherit) 45 46 t.mu.NestedLock(taskLockChild) 47 defer t.mu.NestedUnlock(taskLockChild) 48 // Transfer ownership of joinSet refs to the task's cgset. 49 t.cgroups = joinSet 50 for c := range t.cgroups { 51 // Since t isn't in any cgroup yet, we can skip the check against 52 // existing cgroups. 53 c.Enter(t) 54 t.SetMemCgIDFromCgroup(c) 55 } 56 } 57 58 // SetMemCgID sets the given memory cgroup id to the task. 59 func (t *Task) SetMemCgID(memCgID uint32) { 60 t.memCgID.Store(memCgID) 61 } 62 63 // SetMemCgIDFromCgroup sets the id of the given memory cgroup to the task. 64 func (t *Task) SetMemCgIDFromCgroup(cg Cgroup) { 65 for _, ctl := range cg.Controllers() { 66 if ctl.Type() == CgroupControllerMemory { 67 t.SetMemCgID(cg.ID()) 68 return 69 } 70 } 71 } 72 73 // ResetMemCgIDFromCgroup sets the memory cgroup id to zero, if the task has 74 // a memory cgroup. 75 func (t *Task) ResetMemCgIDFromCgroup(cg Cgroup) { 76 for _, ctl := range cg.Controllers() { 77 if ctl.Type() == CgroupControllerMemory { 78 t.SetMemCgID(0) 79 return 80 } 81 } 82 } 83 84 // EnterCgroup moves t into c. 85 func (t *Task) EnterCgroup(c Cgroup) error { 86 newControllers := make(map[CgroupControllerType]struct{}) 87 for _, ctl := range c.Controllers() { 88 newControllers[ctl.Type()] = struct{}{} 89 } 90 91 t.mu.Lock() 92 defer t.mu.Unlock() 93 94 for oldCG := range t.cgroups { 95 if oldCG.HierarchyID() == c.HierarchyID() { 96 log.Warningf("Cannot enter new cgroup %v due to conflicting controllers. Try migrate instead?", c) 97 return linuxerr.EBUSY 98 } 99 } 100 101 // No migration required. 102 t.enterCgroupLocked(c) 103 104 return nil 105 } 106 107 // +checklocks:t.mu 108 func (t *Task) enterCgroupLocked(c Cgroup) { 109 c.IncRef() 110 t.cgroups[c] = struct{}{} 111 c.Enter(t) 112 t.SetMemCgIDFromCgroup(c) 113 } 114 115 // +checklocks:t.mu 116 func (t *Task) enterCgroupIfNotYetLocked(c Cgroup) { 117 if _, ok := t.cgroups[c]; ok { 118 return 119 } 120 t.enterCgroupLocked(c) 121 } 122 123 // LeaveCgroups removes t out from all its cgroups. 124 func (t *Task) LeaveCgroups() { 125 t.tg.pidns.owner.mu.Lock() // Prevent migration. 126 t.mu.Lock() 127 cgs := t.cgroups 128 t.cgroups = nil 129 for c := range cgs { 130 c.Leave(t) 131 } 132 t.SetMemCgID(0) 133 t.mu.Unlock() 134 t.tg.pidns.owner.mu.Unlock() 135 136 for c := range cgs { 137 c.decRef() 138 } 139 } 140 141 // +checklocks:t.mu 142 func (t *Task) findCgroupWithMatchingHierarchyLocked(other Cgroup) (Cgroup, bool) { 143 for c := range t.cgroups { 144 if c.HierarchyID() != other.HierarchyID() { 145 continue 146 } 147 return c, true 148 } 149 return Cgroup{}, false 150 } 151 152 // CgroupPrepareMigrate starts a cgroup migration for this task to dst. The 153 // migration must be completed through the returned context. 154 func (t *Task) CgroupPrepareMigrate(dst Cgroup) (*CgroupMigrationContext, error) { 155 t.mu.Lock() 156 defer t.mu.Unlock() 157 src, found := t.findCgroupWithMatchingHierarchyLocked(dst) 158 if !found { 159 log.Warningf("Cannot migrate to cgroup %v since task %v not currently in target hierarchy %v", dst, t, dst.HierarchyID()) 160 return nil, linuxerr.EINVAL 161 } 162 if err := dst.PrepareMigrate(t, &src); err != nil { 163 return nil, err 164 } 165 return &CgroupMigrationContext{ 166 src: src, 167 dst: dst, 168 t: t, 169 }, nil 170 } 171 172 // MigrateCgroup migrates all tasks in tg to the dst cgroup. Either all tasks 173 // are migrated, or none are. Atomicity of migrations wrt cgroup membership 174 // (i.e. a task can't switch cgroups mid-migration due to another migration) is 175 // guaranteed because migrations are serialized by TaskSet.mu. 176 func (tg *ThreadGroup) MigrateCgroup(dst Cgroup) error { 177 tg.pidns.owner.mu.RLock() 178 defer tg.pidns.owner.mu.RUnlock() 179 180 var ctxs []*CgroupMigrationContext 181 182 // Prepare migrations. On partial failure, abort. 183 for t := tg.tasks.Front(); t != nil; t = t.Next() { 184 ctx, err := t.CgroupPrepareMigrate(dst) 185 if err != nil { 186 // Rollback. 187 for _, ctx := range ctxs { 188 ctx.Abort() 189 } 190 return err 191 } 192 ctxs = append(ctxs, ctx) 193 } 194 195 // All migrations are now guaranteed to succeed. 196 197 for _, ctx := range ctxs { 198 ctx.Commit() 199 } 200 201 return nil 202 } 203 204 // MigrateCgroup migrates this task to the dst cgroup. 205 func (t *Task) MigrateCgroup(dst Cgroup) error { 206 t.tg.pidns.owner.mu.RLock() 207 defer t.tg.pidns.owner.mu.RUnlock() 208 209 ctx, err := t.CgroupPrepareMigrate(dst) 210 if err != nil { 211 return err 212 } 213 ctx.Commit() 214 return nil 215 } 216 217 // TaskCgroupEntry represents a line in /proc/<pid>/cgroup, and is used to 218 // format a cgroup for display. 219 type TaskCgroupEntry struct { 220 HierarchyID uint32 `json:"hierarchy_id"` 221 Controllers string `json:"controllers,omitempty"` 222 Path string `json:"path,omitempty"` 223 } 224 225 // GetCgroupEntries generates the contents of /proc/<pid>/cgroup as 226 // a TaskCgroupEntry array. 227 func (t *Task) GetCgroupEntries() []TaskCgroupEntry { 228 t.mu.Lock() 229 defer t.mu.Unlock() 230 231 cgEntries := make([]TaskCgroupEntry, 0, len(t.cgroups)) 232 for c := range t.cgroups { 233 ctls := c.Controllers() 234 ctlNames := make([]string, 0, len(ctls)) 235 236 // We're guaranteed to have a valid name, a non-empty controller list, 237 // or both. 238 239 // Explicit hierarchy name, if any. 240 if name := c.Name(); name != "" { 241 ctlNames = append(ctlNames, fmt.Sprintf("name=%s", name)) 242 } 243 244 // Controllers attached to this hierarchy, if any. 245 for _, ctl := range ctls { 246 ctlNames = append(ctlNames, string(ctl.Type())) 247 } 248 249 cgEntries = append(cgEntries, TaskCgroupEntry{ 250 HierarchyID: c.HierarchyID(), 251 Controllers: strings.Join(ctlNames, ","), 252 Path: c.Path(), 253 }) 254 } 255 256 sort.Slice(cgEntries, func(i, j int) bool { return cgEntries[i].HierarchyID > cgEntries[j].HierarchyID }) 257 return cgEntries 258 } 259 260 // GenerateProcTaskCgroup writes the contents of /proc/<pid>/cgroup for t to buf. 261 func (t *Task) GenerateProcTaskCgroup(buf *bytes.Buffer) { 262 cgEntries := t.GetCgroupEntries() 263 for _, cgE := range cgEntries { 264 fmt.Fprintf(buf, "%d:%s:%s\n", cgE.HierarchyID, cgE.Controllers, cgE.Path) 265 } 266 } 267 268 // +checklocks:t.mu 269 func (t *Task) chargeLocked(target *Task, ctl CgroupControllerType, res CgroupResourceType, value int64) (bool, Cgroup, error) { 270 // Due to the uniqueness of controllers on hierarchies, at most one cgroup 271 // in t.cgroups will match. 272 for c := range t.cgroups { 273 err := c.Charge(target, c.Dentry, ctl, res, value) 274 if err == nil { 275 c.IncRef() 276 } 277 return err == nil, c, err 278 } 279 return false, Cgroup{}, nil 280 } 281 282 // ChargeFor charges t's cgroup on behalf of some other task. Returns 283 // the cgroup that's charged if any. Returned cgroup has an extra ref 284 // that's transferred to the caller. 285 func (t *Task) ChargeFor(other *Task, ctl CgroupControllerType, res CgroupResourceType, value int64) (bool, Cgroup, error) { 286 t.mu.Lock() 287 defer t.mu.Unlock() 288 return t.chargeLocked(other, ctl, res, value) 289 }