github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/kernel/task_cgroup.go (about)

     1  // Copyright 2021 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package kernel
    16  
    17  import (
    18  	"bytes"
    19  	"fmt"
    20  	"sort"
    21  	"strings"
    22  
    23  	"github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr"
    24  	"github.com/nicocha30/gvisor-ligolo/pkg/log"
    25  )
    26  
    27  // EnterInitialCgroups moves t into an initial set of cgroups.
    28  // If initCgroups is not nil, the new task will be placed in the specified cgroups.
    29  // Otherwise, if parent is not nil, the new task will be placed in the parent's cgroups.
    30  // If neither is specified, the new task will be in the root cgroups.
    31  //
    32  // This is analogous to Linux's kernel/cgroup/cgroup.c:cgroup_css_set_fork().
    33  //
    34  // Precondition: t isn't in any cgroups yet, t.cgroups is empty.
    35  func (t *Task) EnterInitialCgroups(parent *Task, initCgroups map[Cgroup]struct{}) {
    36  	var inherit map[Cgroup]struct{}
    37  	if initCgroups != nil {
    38  		inherit = initCgroups
    39  	} else if parent != nil {
    40  		parent.mu.Lock()
    41  		defer parent.mu.Unlock()
    42  		inherit = parent.cgroups
    43  	}
    44  	joinSet := t.k.cgroupRegistry.computeInitialGroups(inherit)
    45  
    46  	t.mu.NestedLock(taskLockChild)
    47  	defer t.mu.NestedUnlock(taskLockChild)
    48  	// Transfer ownership of joinSet refs to the task's cgset.
    49  	t.cgroups = joinSet
    50  	for c := range t.cgroups {
    51  		// Since t isn't in any cgroup yet, we can skip the check against
    52  		// existing cgroups.
    53  		c.Enter(t)
    54  		t.setMemCgID(c)
    55  	}
    56  }
    57  
    58  // TODO(b/277772401): setMemCgIDLocked should be called after adding support for
    59  // task migration for cgroup memory controllers.
    60  func (t *Task) setMemCgID(cg Cgroup) {
    61  	for _, ctl := range cg.Controllers() {
    62  		if ctl.Type() == CgroupControllerMemory {
    63  			t.memCgID.Store(cg.ID())
    64  		}
    65  	}
    66  }
    67  
    68  func (t *Task) resetMemCgID(cg Cgroup) {
    69  	for _, ctl := range cg.Controllers() {
    70  		if ctl.Type() == CgroupControllerMemory {
    71  			t.memCgID.Store(0)
    72  		}
    73  	}
    74  }
    75  
    76  // EnterCgroup moves t into c.
    77  func (t *Task) EnterCgroup(c Cgroup) error {
    78  	newControllers := make(map[CgroupControllerType]struct{})
    79  	for _, ctl := range c.Controllers() {
    80  		newControllers[ctl.Type()] = struct{}{}
    81  	}
    82  
    83  	t.mu.Lock()
    84  	defer t.mu.Unlock()
    85  
    86  	for oldCG := range t.cgroups {
    87  		if oldCG.HierarchyID() == c.HierarchyID() {
    88  			log.Warningf("Cannot enter new cgroup %v due to conflicting controllers. Try migrate instead?", c)
    89  			return linuxerr.EBUSY
    90  		}
    91  	}
    92  
    93  	// No migration required.
    94  	t.enterCgroupLocked(c)
    95  
    96  	return nil
    97  }
    98  
    99  // +checklocks:t.mu
   100  func (t *Task) enterCgroupLocked(c Cgroup) {
   101  	c.IncRef()
   102  	t.cgroups[c] = struct{}{}
   103  	c.Enter(t)
   104  	t.setMemCgID(c)
   105  }
   106  
   107  // +checklocks:t.mu
   108  func (t *Task) enterCgroupIfNotYetLocked(c Cgroup) {
   109  	if _, ok := t.cgroups[c]; ok {
   110  		return
   111  	}
   112  	t.enterCgroupLocked(c)
   113  }
   114  
   115  // LeaveCgroups removes t out from all its cgroups.
   116  func (t *Task) LeaveCgroups() {
   117  	t.tg.pidns.owner.mu.Lock() // Prevent migration.
   118  	t.mu.Lock()
   119  	cgs := t.cgroups
   120  	t.cgroups = nil
   121  	for c := range cgs {
   122  		c.Leave(t)
   123  	}
   124  	t.memCgID.Store(0)
   125  	t.mu.Unlock()
   126  	t.tg.pidns.owner.mu.Unlock()
   127  
   128  	for c := range cgs {
   129  		c.decRef()
   130  	}
   131  }
   132  
   133  // +checklocks:t.mu
   134  func (t *Task) findCgroupWithMatchingHierarchyLocked(other Cgroup) (Cgroup, bool) {
   135  	for c := range t.cgroups {
   136  		if c.HierarchyID() != other.HierarchyID() {
   137  			continue
   138  		}
   139  		return c, true
   140  	}
   141  	return Cgroup{}, false
   142  }
   143  
   144  // CgroupPrepareMigrate starts a cgroup migration for this task to dst. The
   145  // migration must be completed through the returned context.
   146  func (t *Task) CgroupPrepareMigrate(dst Cgroup) (*CgroupMigrationContext, error) {
   147  	t.mu.Lock()
   148  	defer t.mu.Unlock()
   149  	src, found := t.findCgroupWithMatchingHierarchyLocked(dst)
   150  	if !found {
   151  		log.Warningf("Cannot migrate to cgroup %v since task %v not currently in target hierarchy %v", dst, t, dst.HierarchyID())
   152  		return nil, linuxerr.EINVAL
   153  	}
   154  	if err := dst.PrepareMigrate(t, &src); err != nil {
   155  		return nil, err
   156  	}
   157  	return &CgroupMigrationContext{
   158  		src: src,
   159  		dst: dst,
   160  		t:   t,
   161  	}, nil
   162  }
   163  
   164  // MigrateCgroup migrates all tasks in tg to the dst cgroup. Either all tasks
   165  // are migrated, or none are. Atomicity of migrations wrt cgroup membership
   166  // (i.e. a task can't switch cgroups mid-migration due to another migration) is
   167  // guaranteed because migrations are serialized by TaskSet.mu.
   168  func (tg *ThreadGroup) MigrateCgroup(dst Cgroup) error {
   169  	tg.pidns.owner.mu.RLock()
   170  	defer tg.pidns.owner.mu.RUnlock()
   171  
   172  	var ctxs []*CgroupMigrationContext
   173  
   174  	// Prepare migrations. On partial failure, abort.
   175  	for t := tg.tasks.Front(); t != nil; t = t.Next() {
   176  		ctx, err := t.CgroupPrepareMigrate(dst)
   177  		if err != nil {
   178  			// Rollback.
   179  			for _, ctx := range ctxs {
   180  				ctx.Abort()
   181  			}
   182  			return err
   183  		}
   184  		ctxs = append(ctxs, ctx)
   185  	}
   186  
   187  	// All migrations are now guaranteed to succeed.
   188  
   189  	for _, ctx := range ctxs {
   190  		ctx.Commit()
   191  	}
   192  
   193  	return nil
   194  }
   195  
   196  // MigrateCgroup migrates this task to the dst cgroup.
   197  func (t *Task) MigrateCgroup(dst Cgroup) error {
   198  	t.tg.pidns.owner.mu.RLock()
   199  	defer t.tg.pidns.owner.mu.RUnlock()
   200  
   201  	ctx, err := t.CgroupPrepareMigrate(dst)
   202  	if err != nil {
   203  		return err
   204  	}
   205  	ctx.Commit()
   206  	return nil
   207  }
   208  
   209  // TaskCgroupEntry represents a line in /proc/<pid>/cgroup, and is used to
   210  // format a cgroup for display.
   211  type TaskCgroupEntry struct {
   212  	HierarchyID uint32 `json:"hierarchy_id"`
   213  	Controllers string `json:"controllers,omitempty"`
   214  	Path        string `json:"path,omitempty"`
   215  }
   216  
   217  // GetCgroupEntries generates the contents of /proc/<pid>/cgroup as
   218  // a TaskCgroupEntry array.
   219  func (t *Task) GetCgroupEntries() []TaskCgroupEntry {
   220  	t.mu.Lock()
   221  	defer t.mu.Unlock()
   222  
   223  	cgEntries := make([]TaskCgroupEntry, 0, len(t.cgroups))
   224  	for c := range t.cgroups {
   225  		ctls := c.Controllers()
   226  		ctlNames := make([]string, 0, len(ctls))
   227  
   228  		// We're guaranteed to have a valid name, a non-empty controller list,
   229  		// or both.
   230  
   231  		// Explicit hierachy name, if any.
   232  		if name := c.Name(); name != "" {
   233  			ctlNames = append(ctlNames, fmt.Sprintf("name=%s", name))
   234  		}
   235  
   236  		// Controllers attached to this hierarchy, if any.
   237  		for _, ctl := range ctls {
   238  			ctlNames = append(ctlNames, string(ctl.Type()))
   239  		}
   240  
   241  		cgEntries = append(cgEntries, TaskCgroupEntry{
   242  			HierarchyID: c.HierarchyID(),
   243  			Controllers: strings.Join(ctlNames, ","),
   244  			Path:        c.Path(),
   245  		})
   246  	}
   247  
   248  	sort.Slice(cgEntries, func(i, j int) bool { return cgEntries[i].HierarchyID > cgEntries[j].HierarchyID })
   249  	return cgEntries
   250  }
   251  
   252  // GenerateProcTaskCgroup writes the contents of /proc/<pid>/cgroup for t to buf.
   253  func (t *Task) GenerateProcTaskCgroup(buf *bytes.Buffer) {
   254  	cgEntries := t.GetCgroupEntries()
   255  	for _, cgE := range cgEntries {
   256  		fmt.Fprintf(buf, "%d:%s:%s\n", cgE.HierarchyID, cgE.Controllers, cgE.Path)
   257  	}
   258  }
   259  
   260  // +checklocks:t.mu
   261  func (t *Task) chargeLocked(target *Task, ctl CgroupControllerType, res CgroupResourceType, value int64) (bool, Cgroup, error) {
   262  	// Due to the uniqueness of controllers on hierarchies, at most one cgroup
   263  	// in t.cgroups will match.
   264  	for c := range t.cgroups {
   265  		err := c.Charge(target, c.Dentry, ctl, res, value)
   266  		if err == nil {
   267  			c.IncRef()
   268  		}
   269  		return err == nil, c, err
   270  	}
   271  	return false, Cgroup{}, nil
   272  }
   273  
   274  // ChargeFor charges t's cgroup on behalf of some other task. Returns
   275  // the cgroup that's charged if any. Returned cgroup has an extra ref
   276  // that's transferred to the caller.
   277  func (t *Task) ChargeFor(other *Task, ctl CgroupControllerType, res CgroupResourceType, value int64) (bool, Cgroup, error) {
   278  	t.mu.Lock()
   279  	defer t.mu.Unlock()
   280  	return t.chargeLocked(other, ctl, res, value)
   281  }