github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/proc/tasks.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package proc
    16  
    17  import (
    18  	"bytes"
    19  	"sort"
    20  	"strconv"
    21  
    22  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    23  	"github.com/SagerNet/gvisor/pkg/context"
    24  	"github.com/SagerNet/gvisor/pkg/sentry/fsimpl/kernfs"
    25  	"github.com/SagerNet/gvisor/pkg/sentry/kernel"
    26  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/auth"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/vfs"
    28  	"github.com/SagerNet/gvisor/pkg/syserror"
    29  )
    30  
    31  const (
    32  	selfName       = "self"
    33  	threadSelfName = "thread-self"
    34  )
    35  
    36  // tasksInode represents the inode for /proc/ directory.
    37  //
    38  // +stateify savable
    39  type tasksInode struct {
    40  	implStatFS
    41  	kernfs.InodeAlwaysValid
    42  	kernfs.InodeAttrs
    43  	kernfs.InodeDirectoryNoNewChildren
    44  	kernfs.InodeNotSymlink
    45  	kernfs.InodeTemporary // This holds no meaning as this inode can't be Looked up and is always valid.
    46  	kernfs.OrderedChildren
    47  	tasksInodeRefs
    48  
    49  	locks vfs.FileLocks
    50  
    51  	fs    *filesystem
    52  	pidns *kernel.PIDNamespace
    53  
    54  	// '/proc/self' and '/proc/thread-self' have custom directory offsets in
    55  	// Linux. So handle them outside of OrderedChildren.
    56  
    57  	// fakeCgroupControllers is a map of controller name to directory in the
    58  	// cgroup hierarchy. These controllers are immutable and will be listed
    59  	// in /proc/pid/cgroup if not nil.
    60  	fakeCgroupControllers map[string]string
    61  }
    62  
    63  var _ kernfs.Inode = (*tasksInode)(nil)
    64  
    65  func (fs *filesystem) newTasksInode(ctx context.Context, k *kernel.Kernel, pidns *kernel.PIDNamespace, fakeCgroupControllers map[string]string) *tasksInode {
    66  	root := auth.NewRootCredentials(pidns.UserNamespace())
    67  	contents := map[string]kernfs.Inode{
    68  		"cmdline":     fs.newInode(ctx, root, 0444, &cmdLineData{}),
    69  		"cpuinfo":     fs.newInode(ctx, root, 0444, newStaticFileSetStat(cpuInfoData(k))),
    70  		"filesystems": fs.newInode(ctx, root, 0444, &filesystemsData{}),
    71  		"loadavg":     fs.newInode(ctx, root, 0444, &loadavgData{}),
    72  		"sys":         fs.newSysDir(ctx, root, k),
    73  		"meminfo":     fs.newInode(ctx, root, 0444, &meminfoData{}),
    74  		"mounts":      kernfs.NewStaticSymlink(ctx, root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), "self/mounts"),
    75  		"net":         kernfs.NewStaticSymlink(ctx, root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), "self/net"),
    76  		"stat":        fs.newInode(ctx, root, 0444, &statData{}),
    77  		"uptime":      fs.newInode(ctx, root, 0444, &uptimeData{}),
    78  		"version":     fs.newInode(ctx, root, 0444, &versionData{}),
    79  	}
    80  	// If fakeCgroupControllers are provided, don't create a cgroupfs backed
    81  	// /proc/cgroup as it will not match the fake controllers.
    82  	if len(fakeCgroupControllers) == 0 {
    83  		contents["cgroups"] = fs.newInode(ctx, root, 0444, &cgroupsData{})
    84  	}
    85  
    86  	inode := &tasksInode{
    87  		pidns:                 pidns,
    88  		fs:                    fs,
    89  		fakeCgroupControllers: fakeCgroupControllers,
    90  	}
    91  	inode.InodeAttrs.Init(ctx, root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555)
    92  	inode.InitRefs()
    93  
    94  	inode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
    95  	links := inode.OrderedChildren.Populate(contents)
    96  	inode.IncLinks(links)
    97  
    98  	return inode
    99  }
   100  
   101  // Lookup implements kernfs.inodeDirectory.Lookup.
   102  func (i *tasksInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) {
   103  	// Check if a static entry was looked up.
   104  	if d, err := i.OrderedChildren.Lookup(ctx, name); err == nil {
   105  		return d, nil
   106  	}
   107  
   108  	// Not a static entry. Try to lookup a corresponding task.
   109  	tid, err := strconv.ParseUint(name, 10, 64)
   110  	if err != nil {
   111  		root := auth.NewRootCredentials(i.pidns.UserNamespace())
   112  		// If it failed to parse, check if it's one of the special handled files.
   113  		switch name {
   114  		case selfName:
   115  			return i.newSelfSymlink(ctx, root), nil
   116  		case threadSelfName:
   117  			return i.newThreadSelfSymlink(ctx, root), nil
   118  		}
   119  		return nil, syserror.ENOENT
   120  	}
   121  
   122  	task := i.pidns.TaskWithID(kernel.ThreadID(tid))
   123  	if task == nil {
   124  		return nil, syserror.ENOENT
   125  	}
   126  
   127  	return i.fs.newTaskInode(ctx, task, i.pidns, true, i.fakeCgroupControllers)
   128  }
   129  
   130  // IterDirents implements kernfs.inodeDirectory.IterDirents.
   131  func (i *tasksInode) IterDirents(ctx context.Context, mnt *vfs.Mount, cb vfs.IterDirentsCallback, offset, _ int64) (int64, error) {
   132  	// fs/proc/internal.h: #define FIRST_PROCESS_ENTRY 256
   133  	const FIRST_PROCESS_ENTRY = 256
   134  
   135  	// Use maxTaskID to shortcut searches that will result in 0 entries.
   136  	const maxTaskID = kernel.TasksLimit + 1
   137  	if offset >= maxTaskID {
   138  		return offset, nil
   139  	}
   140  
   141  	// According to Linux (fs/proc/base.c:proc_pid_readdir()), process directories
   142  	// start at offset FIRST_PROCESS_ENTRY with '/proc/self', followed by
   143  	// '/proc/thread-self' and then '/proc/[pid]'.
   144  	if offset < FIRST_PROCESS_ENTRY {
   145  		offset = FIRST_PROCESS_ENTRY
   146  	}
   147  
   148  	if offset == FIRST_PROCESS_ENTRY {
   149  		dirent := vfs.Dirent{
   150  			Name:    selfName,
   151  			Type:    linux.DT_LNK,
   152  			Ino:     i.fs.NextIno(),
   153  			NextOff: offset + 1,
   154  		}
   155  		if err := cb.Handle(dirent); err != nil {
   156  			return offset, err
   157  		}
   158  		offset++
   159  	}
   160  	if offset == FIRST_PROCESS_ENTRY+1 {
   161  		dirent := vfs.Dirent{
   162  			Name:    threadSelfName,
   163  			Type:    linux.DT_LNK,
   164  			Ino:     i.fs.NextIno(),
   165  			NextOff: offset + 1,
   166  		}
   167  		if err := cb.Handle(dirent); err != nil {
   168  			return offset, err
   169  		}
   170  		offset++
   171  	}
   172  
   173  	// Collect all tasks that TGIDs are greater than the offset specified. Per
   174  	// Linux we only include in directory listings if it's the leader. But for
   175  	// whatever crazy reason, you can still walk to the given node.
   176  	var tids []int
   177  	startTid := offset - FIRST_PROCESS_ENTRY - 2
   178  	for _, tg := range i.pidns.ThreadGroups() {
   179  		tid := i.pidns.IDOfThreadGroup(tg)
   180  		if int64(tid) < startTid {
   181  			continue
   182  		}
   183  		if leader := tg.Leader(); leader != nil {
   184  			tids = append(tids, int(tid))
   185  		}
   186  	}
   187  
   188  	if len(tids) == 0 {
   189  		return offset, nil
   190  	}
   191  
   192  	sort.Ints(tids)
   193  	for _, tid := range tids {
   194  		dirent := vfs.Dirent{
   195  			Name:    strconv.FormatUint(uint64(tid), 10),
   196  			Type:    linux.DT_DIR,
   197  			Ino:     i.fs.NextIno(),
   198  			NextOff: FIRST_PROCESS_ENTRY + 2 + int64(tid) + 1,
   199  		}
   200  		if err := cb.Handle(dirent); err != nil {
   201  			return offset, err
   202  		}
   203  		offset++
   204  	}
   205  	return maxTaskID, nil
   206  }
   207  
   208  // Open implements kernfs.Inode.Open.
   209  func (i *tasksInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
   210  	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), d, &i.OrderedChildren, &i.locks, &opts, kernfs.GenericDirectoryFDOptions{
   211  		SeekEnd: kernfs.SeekEndZero,
   212  	})
   213  	if err != nil {
   214  		return nil, err
   215  	}
   216  	return fd.VFSFileDescription(), nil
   217  }
   218  
   219  func (i *tasksInode) Stat(ctx context.Context, vsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
   220  	stat, err := i.InodeAttrs.Stat(ctx, vsfs, opts)
   221  	if err != nil {
   222  		return linux.Statx{}, err
   223  	}
   224  
   225  	if opts.Mask&linux.STATX_NLINK != 0 {
   226  		// Add dynamic children to link count.
   227  		for _, tg := range i.pidns.ThreadGroups() {
   228  			if leader := tg.Leader(); leader != nil {
   229  				stat.Nlink++
   230  			}
   231  		}
   232  	}
   233  
   234  	return stat, nil
   235  }
   236  
   237  // DecRef implements kernfs.Inode.DecRef.
   238  func (i *tasksInode) DecRef(ctx context.Context) {
   239  	i.tasksInodeRefs.DecRef(func() { i.Destroy(ctx) })
   240  }
   241  
   242  // staticFileSetStat implements a special static file that allows inode
   243  // attributes to be set. This is to support /proc files that are readonly, but
   244  // allow attributes to be set.
   245  //
   246  // +stateify savable
   247  type staticFileSetStat struct {
   248  	dynamicBytesFileSetAttr
   249  	vfs.StaticData
   250  }
   251  
   252  var _ dynamicInode = (*staticFileSetStat)(nil)
   253  
   254  func newStaticFileSetStat(data string) *staticFileSetStat {
   255  	return &staticFileSetStat{StaticData: vfs.StaticData{Data: data}}
   256  }
   257  
   258  func cpuInfoData(k *kernel.Kernel) string {
   259  	features := k.FeatureSet()
   260  	if features == nil {
   261  		// Kernel is always initialized with a FeatureSet.
   262  		panic("cpuinfo read with nil FeatureSet")
   263  	}
   264  	var buf bytes.Buffer
   265  	for i, max := uint(0), k.ApplicationCores(); i < max; i++ {
   266  		features.WriteCPUInfoTo(i, &buf)
   267  	}
   268  	return buf.String()
   269  }
   270  
   271  func shmData(v uint64) dynamicInode {
   272  	return newStaticFile(strconv.FormatUint(v, 10))
   273  }