github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/cgroupfs/base.go (about)

     1  // Copyright 2021 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package cgroupfs
    16  
    17  import (
    18  	"bytes"
    19  	"fmt"
    20  	"sort"
    21  	"strconv"
    22  	"sync/atomic"
    23  
    24  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    25  	"github.com/SagerNet/gvisor/pkg/context"
    26  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/fsimpl/kernfs"
    28  	"github.com/SagerNet/gvisor/pkg/sentry/kernel"
    29  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/auth"
    30  	"github.com/SagerNet/gvisor/pkg/usermem"
    31  )
    32  
    33  // controllerCommon implements kernel.CgroupController.
    34  //
    35  // Must call init before use.
    36  //
    37  // +stateify savable
    38  type controllerCommon struct {
    39  	ty kernel.CgroupControllerType
    40  	fs *filesystem
    41  }
    42  
    43  func (c *controllerCommon) init(ty kernel.CgroupControllerType, fs *filesystem) {
    44  	c.ty = ty
    45  	c.fs = fs
    46  }
    47  
    48  // Type implements kernel.CgroupController.Type.
    49  func (c *controllerCommon) Type() kernel.CgroupControllerType {
    50  	return kernel.CgroupControllerType(c.ty)
    51  }
    52  
    53  // HierarchyID implements kernel.CgroupController.HierarchyID.
    54  func (c *controllerCommon) HierarchyID() uint32 {
    55  	return c.fs.hierarchyID
    56  }
    57  
    58  // NumCgroups implements kernel.CgroupController.NumCgroups.
    59  func (c *controllerCommon) NumCgroups() uint64 {
    60  	return atomic.LoadUint64(&c.fs.numCgroups)
    61  }
    62  
    63  // Enabled implements kernel.CgroupController.Enabled.
    64  //
    65  // Controllers are currently always enabled.
    66  func (c *controllerCommon) Enabled() bool {
    67  	return true
    68  }
    69  
    70  // RootCgroup implements kernel.CgroupController.RootCgroup.
    71  func (c *controllerCommon) RootCgroup() kernel.Cgroup {
    72  	return c.fs.rootCgroup()
    73  }
    74  
    75  // controller is an interface for common functionality related to all cgroups.
    76  // It is an extension of the public cgroup interface, containing cgroup
    77  // functionality private to cgroupfs.
    78  type controller interface {
    79  	kernel.CgroupController
    80  
    81  	// AddControlFiles should extend the contents map with inodes representing
    82  	// control files defined by this controller.
    83  	AddControlFiles(ctx context.Context, creds *auth.Credentials, c *cgroupInode, contents map[string]kernfs.Inode)
    84  }
    85  
    86  // cgroupInode implements kernel.CgroupImpl and kernfs.Inode.
    87  //
    88  // +stateify savable
    89  type cgroupInode struct {
    90  	dir
    91  	fs *filesystem
    92  
    93  	// ts is the list of tasks in this cgroup. The kernel is responsible for
    94  	// removing tasks from this list before they're destroyed, so any tasks on
    95  	// this list are always valid.
    96  	//
    97  	// ts, and cgroup membership in general is protected by fs.tasksMu.
    98  	ts map[*kernel.Task]struct{}
    99  }
   100  
   101  var _ kernel.CgroupImpl = (*cgroupInode)(nil)
   102  
   103  func (fs *filesystem) newCgroupInode(ctx context.Context, creds *auth.Credentials) kernfs.Inode {
   104  	c := &cgroupInode{
   105  		fs: fs,
   106  		ts: make(map[*kernel.Task]struct{}),
   107  	}
   108  
   109  	contents := make(map[string]kernfs.Inode)
   110  	contents["cgroup.procs"] = fs.newControllerFile(ctx, creds, &cgroupProcsData{c})
   111  	contents["tasks"] = fs.newControllerFile(ctx, creds, &tasksData{c})
   112  
   113  	for _, ctl := range fs.controllers {
   114  		ctl.AddControlFiles(ctx, creds, c, contents)
   115  	}
   116  
   117  	c.dir.InodeAttrs.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|linux.FileMode(0555))
   118  	c.dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
   119  	c.dir.InitRefs()
   120  	c.dir.IncLinks(c.dir.OrderedChildren.Populate(contents))
   121  
   122  	atomic.AddUint64(&fs.numCgroups, 1)
   123  
   124  	return c
   125  }
   126  
   127  func (c *cgroupInode) HierarchyID() uint32 {
   128  	return c.fs.hierarchyID
   129  }
   130  
   131  // Controllers implements kernel.CgroupImpl.Controllers.
   132  func (c *cgroupInode) Controllers() []kernel.CgroupController {
   133  	return c.fs.kcontrollers
   134  }
   135  
   136  // tasks returns a snapshot of the tasks inside the cgroup.
   137  func (c *cgroupInode) tasks() []*kernel.Task {
   138  	c.fs.tasksMu.RLock()
   139  	defer c.fs.tasksMu.RUnlock()
   140  	ts := make([]*kernel.Task, 0, len(c.ts))
   141  	for t := range c.ts {
   142  		ts = append(ts, t)
   143  	}
   144  	return ts
   145  }
   146  
   147  // Enter implements kernel.CgroupImpl.Enter.
   148  func (c *cgroupInode) Enter(t *kernel.Task) {
   149  	c.fs.tasksMu.Lock()
   150  	c.ts[t] = struct{}{}
   151  	c.fs.tasksMu.Unlock()
   152  }
   153  
   154  // Leave implements kernel.CgroupImpl.Leave.
   155  func (c *cgroupInode) Leave(t *kernel.Task) {
   156  	c.fs.tasksMu.Lock()
   157  	delete(c.ts, t)
   158  	c.fs.tasksMu.Unlock()
   159  }
   160  
   161  func sortTIDs(tids []kernel.ThreadID) {
   162  	sort.Slice(tids, func(i, j int) bool { return tids[i] < tids[j] })
   163  }
   164  
   165  // +stateify savable
   166  type cgroupProcsData struct {
   167  	*cgroupInode
   168  }
   169  
   170  // Generate implements vfs.DynamicBytesSource.Generate.
   171  func (d *cgroupProcsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   172  	t := kernel.TaskFromContext(ctx)
   173  	currPidns := t.ThreadGroup().PIDNamespace()
   174  
   175  	pgids := make(map[kernel.ThreadID]struct{})
   176  
   177  	for _, task := range d.tasks() {
   178  		// Map dedups pgid, since iterating over all tasks produces multiple
   179  		// entries for the group leaders.
   180  		if pgid := currPidns.IDOfThreadGroup(task.ThreadGroup()); pgid != 0 {
   181  			pgids[pgid] = struct{}{}
   182  		}
   183  	}
   184  
   185  	pgidList := make([]kernel.ThreadID, 0, len(pgids))
   186  	for pgid, _ := range pgids {
   187  		pgidList = append(pgidList, pgid)
   188  	}
   189  	sortTIDs(pgidList)
   190  
   191  	for _, pgid := range pgidList {
   192  		fmt.Fprintf(buf, "%d\n", pgid)
   193  	}
   194  
   195  	return nil
   196  }
   197  
   198  // Write implements vfs.WritableDynamicBytesSource.Write.
   199  func (d *cgroupProcsData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
   200  	// TODO(b/183137098): Payload is the pid for a process to add to this cgroup.
   201  	return src.NumBytes(), nil
   202  }
   203  
   204  // +stateify savable
   205  type tasksData struct {
   206  	*cgroupInode
   207  }
   208  
   209  // Generate implements vfs.DynamicBytesSource.Generate.
   210  func (d *tasksData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   211  	t := kernel.TaskFromContext(ctx)
   212  	currPidns := t.ThreadGroup().PIDNamespace()
   213  
   214  	var pids []kernel.ThreadID
   215  
   216  	for _, task := range d.tasks() {
   217  		if pid := currPidns.IDOfTask(task); pid != 0 {
   218  			pids = append(pids, pid)
   219  		}
   220  	}
   221  	sortTIDs(pids)
   222  
   223  	for _, pid := range pids {
   224  		fmt.Fprintf(buf, "%d\n", pid)
   225  	}
   226  
   227  	return nil
   228  }
   229  
   230  // Write implements vfs.WritableDynamicBytesSource.Write.
   231  func (d *tasksData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
   232  	// TODO(b/183137098): Payload is the pid for a process to add to this cgroup.
   233  	return src.NumBytes(), nil
   234  }
   235  
   236  // parseInt64FromString interprets src as string encoding a int64 value, and
   237  // returns the parsed value.
   238  func parseInt64FromString(ctx context.Context, src usermem.IOSequence, offset int64) (val, len int64, err error) {
   239  	const maxInt64StrLen = 20 // i.e. len(fmt.Sprintf("%d", math.MinInt64)) == 20
   240  
   241  	t := kernel.TaskFromContext(ctx)
   242  	src = src.DropFirst64(offset)
   243  
   244  	buf := t.CopyScratchBuffer(maxInt64StrLen)
   245  	n, err := src.CopyIn(ctx, buf)
   246  	if err != nil {
   247  		return 0, int64(n), err
   248  	}
   249  	buf = buf[:n]
   250  
   251  	val, err = strconv.ParseInt(string(buf), 10, 64)
   252  	if err != nil {
   253  		// Note: This also handles zero-len writes if offset is beyond the end
   254  		// of src, or src is empty.
   255  		ctx.Warningf("cgroupfs.parseInt64FromString: failed to parse %q: %v", string(buf), err)
   256  		return 0, int64(n), linuxerr.EINVAL
   257  	}
   258  
   259  	return val, int64(n), nil
   260  }