github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/kernel/task_cgroup.go (about) 1 // Copyright 2021 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package kernel 16 17 import ( 18 "bytes" 19 "fmt" 20 "sort" 21 "strings" 22 23 "github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr" 24 "github.com/nicocha30/gvisor-ligolo/pkg/log" 25 ) 26 27 // EnterInitialCgroups moves t into an initial set of cgroups. 28 // If initCgroups is not nil, the new task will be placed in the specified cgroups. 29 // Otherwise, if parent is not nil, the new task will be placed in the parent's cgroups. 30 // If neither is specified, the new task will be in the root cgroups. 31 // 32 // This is analogous to Linux's kernel/cgroup/cgroup.c:cgroup_css_set_fork(). 33 // 34 // Precondition: t isn't in any cgroups yet, t.cgroups is empty. 35 func (t *Task) EnterInitialCgroups(parent *Task, initCgroups map[Cgroup]struct{}) { 36 var inherit map[Cgroup]struct{} 37 if initCgroups != nil { 38 inherit = initCgroups 39 } else if parent != nil { 40 parent.mu.Lock() 41 defer parent.mu.Unlock() 42 inherit = parent.cgroups 43 } 44 joinSet := t.k.cgroupRegistry.computeInitialGroups(inherit) 45 46 t.mu.NestedLock(taskLockChild) 47 defer t.mu.NestedUnlock(taskLockChild) 48 // Transfer ownership of joinSet refs to the task's cgset. 49 t.cgroups = joinSet 50 for c := range t.cgroups { 51 // Since t isn't in any cgroup yet, we can skip the check against 52 // existing cgroups. 53 c.Enter(t) 54 t.setMemCgID(c) 55 } 56 } 57 58 // TODO(b/277772401): setMemCgIDLocked should be called after adding support for 59 // task migration for cgroup memory controllers. 60 func (t *Task) setMemCgID(cg Cgroup) { 61 for _, ctl := range cg.Controllers() { 62 if ctl.Type() == CgroupControllerMemory { 63 t.memCgID.Store(cg.ID()) 64 } 65 } 66 } 67 68 func (t *Task) resetMemCgID(cg Cgroup) { 69 for _, ctl := range cg.Controllers() { 70 if ctl.Type() == CgroupControllerMemory { 71 t.memCgID.Store(0) 72 } 73 } 74 } 75 76 // EnterCgroup moves t into c. 77 func (t *Task) EnterCgroup(c Cgroup) error { 78 newControllers := make(map[CgroupControllerType]struct{}) 79 for _, ctl := range c.Controllers() { 80 newControllers[ctl.Type()] = struct{}{} 81 } 82 83 t.mu.Lock() 84 defer t.mu.Unlock() 85 86 for oldCG := range t.cgroups { 87 if oldCG.HierarchyID() == c.HierarchyID() { 88 log.Warningf("Cannot enter new cgroup %v due to conflicting controllers. Try migrate instead?", c) 89 return linuxerr.EBUSY 90 } 91 } 92 93 // No migration required. 94 t.enterCgroupLocked(c) 95 96 return nil 97 } 98 99 // +checklocks:t.mu 100 func (t *Task) enterCgroupLocked(c Cgroup) { 101 c.IncRef() 102 t.cgroups[c] = struct{}{} 103 c.Enter(t) 104 t.setMemCgID(c) 105 } 106 107 // +checklocks:t.mu 108 func (t *Task) enterCgroupIfNotYetLocked(c Cgroup) { 109 if _, ok := t.cgroups[c]; ok { 110 return 111 } 112 t.enterCgroupLocked(c) 113 } 114 115 // LeaveCgroups removes t out from all its cgroups. 116 func (t *Task) LeaveCgroups() { 117 t.tg.pidns.owner.mu.Lock() // Prevent migration. 118 t.mu.Lock() 119 cgs := t.cgroups 120 t.cgroups = nil 121 for c := range cgs { 122 c.Leave(t) 123 } 124 t.memCgID.Store(0) 125 t.mu.Unlock() 126 t.tg.pidns.owner.mu.Unlock() 127 128 for c := range cgs { 129 c.decRef() 130 } 131 } 132 133 // +checklocks:t.mu 134 func (t *Task) findCgroupWithMatchingHierarchyLocked(other Cgroup) (Cgroup, bool) { 135 for c := range t.cgroups { 136 if c.HierarchyID() != other.HierarchyID() { 137 continue 138 } 139 return c, true 140 } 141 return Cgroup{}, false 142 } 143 144 // CgroupPrepareMigrate starts a cgroup migration for this task to dst. The 145 // migration must be completed through the returned context. 146 func (t *Task) CgroupPrepareMigrate(dst Cgroup) (*CgroupMigrationContext, error) { 147 t.mu.Lock() 148 defer t.mu.Unlock() 149 src, found := t.findCgroupWithMatchingHierarchyLocked(dst) 150 if !found { 151 log.Warningf("Cannot migrate to cgroup %v since task %v not currently in target hierarchy %v", dst, t, dst.HierarchyID()) 152 return nil, linuxerr.EINVAL 153 } 154 if err := dst.PrepareMigrate(t, &src); err != nil { 155 return nil, err 156 } 157 return &CgroupMigrationContext{ 158 src: src, 159 dst: dst, 160 t: t, 161 }, nil 162 } 163 164 // MigrateCgroup migrates all tasks in tg to the dst cgroup. Either all tasks 165 // are migrated, or none are. Atomicity of migrations wrt cgroup membership 166 // (i.e. a task can't switch cgroups mid-migration due to another migration) is 167 // guaranteed because migrations are serialized by TaskSet.mu. 168 func (tg *ThreadGroup) MigrateCgroup(dst Cgroup) error { 169 tg.pidns.owner.mu.RLock() 170 defer tg.pidns.owner.mu.RUnlock() 171 172 var ctxs []*CgroupMigrationContext 173 174 // Prepare migrations. On partial failure, abort. 175 for t := tg.tasks.Front(); t != nil; t = t.Next() { 176 ctx, err := t.CgroupPrepareMigrate(dst) 177 if err != nil { 178 // Rollback. 179 for _, ctx := range ctxs { 180 ctx.Abort() 181 } 182 return err 183 } 184 ctxs = append(ctxs, ctx) 185 } 186 187 // All migrations are now guaranteed to succeed. 188 189 for _, ctx := range ctxs { 190 ctx.Commit() 191 } 192 193 return nil 194 } 195 196 // MigrateCgroup migrates this task to the dst cgroup. 197 func (t *Task) MigrateCgroup(dst Cgroup) error { 198 t.tg.pidns.owner.mu.RLock() 199 defer t.tg.pidns.owner.mu.RUnlock() 200 201 ctx, err := t.CgroupPrepareMigrate(dst) 202 if err != nil { 203 return err 204 } 205 ctx.Commit() 206 return nil 207 } 208 209 // TaskCgroupEntry represents a line in /proc/<pid>/cgroup, and is used to 210 // format a cgroup for display. 211 type TaskCgroupEntry struct { 212 HierarchyID uint32 `json:"hierarchy_id"` 213 Controllers string `json:"controllers,omitempty"` 214 Path string `json:"path,omitempty"` 215 } 216 217 // GetCgroupEntries generates the contents of /proc/<pid>/cgroup as 218 // a TaskCgroupEntry array. 219 func (t *Task) GetCgroupEntries() []TaskCgroupEntry { 220 t.mu.Lock() 221 defer t.mu.Unlock() 222 223 cgEntries := make([]TaskCgroupEntry, 0, len(t.cgroups)) 224 for c := range t.cgroups { 225 ctls := c.Controllers() 226 ctlNames := make([]string, 0, len(ctls)) 227 228 // We're guaranteed to have a valid name, a non-empty controller list, 229 // or both. 230 231 // Explicit hierachy name, if any. 232 if name := c.Name(); name != "" { 233 ctlNames = append(ctlNames, fmt.Sprintf("name=%s", name)) 234 } 235 236 // Controllers attached to this hierarchy, if any. 237 for _, ctl := range ctls { 238 ctlNames = append(ctlNames, string(ctl.Type())) 239 } 240 241 cgEntries = append(cgEntries, TaskCgroupEntry{ 242 HierarchyID: c.HierarchyID(), 243 Controllers: strings.Join(ctlNames, ","), 244 Path: c.Path(), 245 }) 246 } 247 248 sort.Slice(cgEntries, func(i, j int) bool { return cgEntries[i].HierarchyID > cgEntries[j].HierarchyID }) 249 return cgEntries 250 } 251 252 // GenerateProcTaskCgroup writes the contents of /proc/<pid>/cgroup for t to buf. 253 func (t *Task) GenerateProcTaskCgroup(buf *bytes.Buffer) { 254 cgEntries := t.GetCgroupEntries() 255 for _, cgE := range cgEntries { 256 fmt.Fprintf(buf, "%d:%s:%s\n", cgE.HierarchyID, cgE.Controllers, cgE.Path) 257 } 258 } 259 260 // +checklocks:t.mu 261 func (t *Task) chargeLocked(target *Task, ctl CgroupControllerType, res CgroupResourceType, value int64) (bool, Cgroup, error) { 262 // Due to the uniqueness of controllers on hierarchies, at most one cgroup 263 // in t.cgroups will match. 264 for c := range t.cgroups { 265 err := c.Charge(target, c.Dentry, ctl, res, value) 266 if err == nil { 267 c.IncRef() 268 } 269 return err == nil, c, err 270 } 271 return false, Cgroup{}, nil 272 } 273 274 // ChargeFor charges t's cgroup on behalf of some other task. Returns 275 // the cgroup that's charged if any. Returned cgroup has an extra ref 276 // that's transferred to the caller. 277 func (t *Task) ChargeFor(other *Task, ctl CgroupControllerType, res CgroupResourceType, value int64) (bool, Cgroup, error) { 278 t.mu.Lock() 279 defer t.mu.Unlock() 280 return t.chargeLocked(other, ctl, res, value) 281 }