github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/proc/task.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package proc
    16  
    17  import (
    18  	"bytes"
    19  	"fmt"
    20  
    21  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    22  	"github.com/SagerNet/gvisor/pkg/context"
    23  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    24  	"github.com/SagerNet/gvisor/pkg/sentry/fsimpl/kernfs"
    25  	"github.com/SagerNet/gvisor/pkg/sentry/kernel"
    26  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/auth"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/mm"
    28  	"github.com/SagerNet/gvisor/pkg/sentry/vfs"
    29  	"github.com/SagerNet/gvisor/pkg/syserror"
    30  )
    31  
    32  // taskInode represents the inode for /proc/PID/ directory.
    33  //
    34  // +stateify savable
    35  type taskInode struct {
    36  	implStatFS
    37  	kernfs.InodeAttrs
    38  	kernfs.InodeDirectoryNoNewChildren
    39  	kernfs.InodeNotSymlink
    40  	kernfs.InodeTemporary
    41  	kernfs.OrderedChildren
    42  	taskInodeRefs
    43  
    44  	locks vfs.FileLocks
    45  
    46  	task *kernel.Task
    47  }
    48  
    49  var _ kernfs.Inode = (*taskInode)(nil)
    50  
    51  func (fs *filesystem) newTaskInode(ctx context.Context, task *kernel.Task, pidns *kernel.PIDNamespace, isThreadGroup bool, fakeCgroupControllers map[string]string) (kernfs.Inode, error) {
    52  	if task.ExitState() == kernel.TaskExitDead {
    53  		return nil, syserror.ESRCH
    54  	}
    55  
    56  	contents := map[string]kernfs.Inode{
    57  		"auxv":      fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &auxvData{task: task}),
    58  		"cmdline":   fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &cmdlineData{task: task, arg: cmdlineDataArg}),
    59  		"comm":      fs.newComm(ctx, task, fs.NextIno(), 0444),
    60  		"cwd":       fs.newCwdSymlink(ctx, task, fs.NextIno()),
    61  		"environ":   fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &cmdlineData{task: task, arg: environDataArg}),
    62  		"exe":       fs.newExeSymlink(ctx, task, fs.NextIno()),
    63  		"fd":        fs.newFDDirInode(ctx, task),
    64  		"fdinfo":    fs.newFDInfoDirInode(ctx, task),
    65  		"gid_map":   fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0644, &idMapData{task: task, gids: true}),
    66  		"io":        fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0400, newIO(task, isThreadGroup)),
    67  		"maps":      fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &mapsData{task: task}),
    68  		"mem":       fs.newMemInode(ctx, task, fs.NextIno(), 0400),
    69  		"mountinfo": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &mountInfoData{task: task}),
    70  		"mounts":    fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &mountsData{task: task}),
    71  		"net":       fs.newTaskNetDir(ctx, task),
    72  		"ns": fs.newTaskOwnedDir(ctx, task, fs.NextIno(), 0511, map[string]kernfs.Inode{
    73  			"net":  fs.newNamespaceSymlink(ctx, task, fs.NextIno(), "net"),
    74  			"pid":  fs.newNamespaceSymlink(ctx, task, fs.NextIno(), "pid"),
    75  			"user": fs.newNamespaceSymlink(ctx, task, fs.NextIno(), "user"),
    76  		}),
    77  		"oom_score":     fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, newStaticFile("0\n")),
    78  		"oom_score_adj": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0644, &oomScoreAdj{task: task}),
    79  		"smaps":         fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &smapsData{task: task}),
    80  		"stat":          fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &taskStatData{task: task, pidns: pidns, tgstats: isThreadGroup}),
    81  		"statm":         fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &statmData{task: task}),
    82  		"status":        fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &statusData{task: task, pidns: pidns}),
    83  		"uid_map":       fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0644, &idMapData{task: task, gids: false}),
    84  	}
    85  	if isThreadGroup {
    86  		contents["task"] = fs.newSubtasks(ctx, task, pidns, fakeCgroupControllers)
    87  	}
    88  	if len(fakeCgroupControllers) > 0 {
    89  		contents["cgroup"] = fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, newFakeCgroupData(fakeCgroupControllers))
    90  	} else {
    91  		contents["cgroup"] = fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &taskCgroupData{task: task})
    92  	}
    93  
    94  	taskInode := &taskInode{task: task}
    95  	// Note: credentials are overridden by taskOwnedInode.
    96  	taskInode.InodeAttrs.Init(ctx, task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555)
    97  	taskInode.InitRefs()
    98  
    99  	inode := &taskOwnedInode{Inode: taskInode, owner: task}
   100  
   101  	taskInode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
   102  	links := taskInode.OrderedChildren.Populate(contents)
   103  	taskInode.IncLinks(links)
   104  
   105  	return inode, nil
   106  }
   107  
   108  // Valid implements kernfs.Inode.Valid. This inode remains valid as long
   109  // as the task is still running. When it's dead, another tasks with the same
   110  // PID could replace it.
   111  func (i *taskInode) Valid(ctx context.Context) bool {
   112  	return i.task.ExitState() != kernel.TaskExitDead
   113  }
   114  
   115  // Open implements kernfs.Inode.Open.
   116  func (i *taskInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
   117  	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), d, &i.OrderedChildren, &i.locks, &opts, kernfs.GenericDirectoryFDOptions{
   118  		SeekEnd: kernfs.SeekEndZero,
   119  	})
   120  	if err != nil {
   121  		return nil, err
   122  	}
   123  	return fd.VFSFileDescription(), nil
   124  }
   125  
   126  // SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed.
   127  func (*taskInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
   128  	return linuxerr.EPERM
   129  }
   130  
   131  // DecRef implements kernfs.Inode.DecRef.
   132  func (i *taskInode) DecRef(ctx context.Context) {
   133  	i.taskInodeRefs.DecRef(func() { i.Destroy(ctx) })
   134  }
   135  
   136  // taskOwnedInode implements kernfs.Inode and overrides inode owner with task
   137  // effective user and group.
   138  //
   139  // +stateify savable
   140  type taskOwnedInode struct {
   141  	kernfs.Inode
   142  
   143  	// owner is the task that owns this inode.
   144  	owner *kernel.Task
   145  }
   146  
   147  var _ kernfs.Inode = (*taskOwnedInode)(nil)
   148  
   149  func (fs *filesystem) newTaskOwnedInode(ctx context.Context, task *kernel.Task, ino uint64, perm linux.FileMode, inode dynamicInode) kernfs.Inode {
   150  	// Note: credentials are overridden by taskOwnedInode.
   151  	inode.Init(ctx, task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, inode, perm)
   152  
   153  	return &taskOwnedInode{Inode: inode, owner: task}
   154  }
   155  
   156  func (fs *filesystem) newTaskOwnedDir(ctx context.Context, task *kernel.Task, ino uint64, perm linux.FileMode, children map[string]kernfs.Inode) kernfs.Inode {
   157  	// Note: credentials are overridden by taskOwnedInode.
   158  	fdOpts := kernfs.GenericDirectoryFDOptions{SeekEnd: kernfs.SeekEndZero}
   159  	dir := kernfs.NewStaticDir(ctx, task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, perm, children, fdOpts)
   160  
   161  	return &taskOwnedInode{Inode: dir, owner: task}
   162  }
   163  
   164  func (i *taskOwnedInode) Valid(ctx context.Context) bool {
   165  	return i.owner.ExitState() != kernel.TaskExitDead && i.Inode.Valid(ctx)
   166  }
   167  
   168  // Stat implements kernfs.Inode.Stat.
   169  func (i *taskOwnedInode) Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
   170  	stat, err := i.Inode.Stat(ctx, fs, opts)
   171  	if err != nil {
   172  		return linux.Statx{}, err
   173  	}
   174  	if opts.Mask&(linux.STATX_UID|linux.STATX_GID) != 0 {
   175  		uid, gid := i.getOwner(linux.FileMode(stat.Mode))
   176  		if opts.Mask&linux.STATX_UID != 0 {
   177  			stat.UID = uint32(uid)
   178  		}
   179  		if opts.Mask&linux.STATX_GID != 0 {
   180  			stat.GID = uint32(gid)
   181  		}
   182  	}
   183  	return stat, nil
   184  }
   185  
   186  // CheckPermissions implements kernfs.Inode.CheckPermissions.
   187  func (i *taskOwnedInode) CheckPermissions(_ context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error {
   188  	mode := i.Mode()
   189  	uid, gid := i.getOwner(mode)
   190  	return vfs.GenericCheckPermissions(creds, ats, mode, uid, gid)
   191  }
   192  
   193  func (i *taskOwnedInode) getOwner(mode linux.FileMode) (auth.KUID, auth.KGID) {
   194  	// By default, set the task owner as the file owner.
   195  	creds := i.owner.Credentials()
   196  	uid := creds.EffectiveKUID
   197  	gid := creds.EffectiveKGID
   198  
   199  	// Linux doesn't apply dumpability adjustments to world readable/executable
   200  	// directories so that applications can stat /proc/PID to determine the
   201  	// effective UID of a process. See fs/proc/base.c:task_dump_owner.
   202  	if mode.FileType() == linux.ModeDirectory && mode.Permissions() == 0555 {
   203  		return uid, gid
   204  	}
   205  
   206  	// If the task is not dumpable, then root (in the namespace preferred)
   207  	// owns the file.
   208  	m := getMM(i.owner)
   209  	if m == nil {
   210  		return auth.RootKUID, auth.RootKGID
   211  	}
   212  	if m.Dumpability() != mm.UserDumpable {
   213  		uid = auth.RootKUID
   214  		if kuid := creds.UserNamespace.MapToKUID(auth.RootUID); kuid.Ok() {
   215  			uid = kuid
   216  		}
   217  		gid = auth.RootKGID
   218  		if kgid := creds.UserNamespace.MapToKGID(auth.RootGID); kgid.Ok() {
   219  			gid = kgid
   220  		}
   221  	}
   222  	return uid, gid
   223  }
   224  
   225  func newIO(t *kernel.Task, isThreadGroup bool) *ioData {
   226  	if isThreadGroup {
   227  		return &ioData{ioUsage: t.ThreadGroup()}
   228  	}
   229  	return &ioData{ioUsage: t}
   230  }
   231  
   232  // newFakeCgroupData creates an inode that shows fake cgroup
   233  // information passed in as mount options.  From man 7 cgroups: "For
   234  // each cgroup hierarchy of which the process is a member, there is
   235  // one entry containing three colon-separated fields:
   236  // hierarchy-ID:controller-list:cgroup-path"
   237  //
   238  // TODO(b/182488796): Remove once all users adopt cgroupfs.
   239  func newFakeCgroupData(controllers map[string]string) dynamicInode {
   240  	var buf bytes.Buffer
   241  
   242  	// The hierarchy ids must be positive integers (for cgroup v1), but the
   243  	// exact number does not matter, so long as they are unique. We can
   244  	// just use a counter, but since linux sorts this file in descending
   245  	// order, we must count down to preserve this behavior.
   246  	i := len(controllers)
   247  	for name, dir := range controllers {
   248  		fmt.Fprintf(&buf, "%d:%s:%s\n", i, name, dir)
   249  		i--
   250  	}
   251  	return newStaticFile(buf.String())
   252  }