github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/fsimpl/proc/tasks.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package proc
    16  
    17  import (
    18  	"bytes"
    19  	"sort"
    20  	"strconv"
    21  
    22  	"github.com/nicocha30/gvisor-ligolo/pkg/abi/linux"
    23  	"github.com/nicocha30/gvisor-ligolo/pkg/context"
    24  	"github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr"
    25  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsimpl/kernfs"
    26  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel"
    27  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel/auth"
    28  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/vfs"
    29  )
    30  
    31  const (
    32  	selfName       = "self"
    33  	threadSelfName = "thread-self"
    34  )
    35  
    36  // tasksInode represents the inode for /proc/ directory.
    37  //
    38  // +stateify savable
    39  type tasksInode struct {
    40  	implStatFS
    41  	kernfs.InodeAlwaysValid
    42  	kernfs.InodeAttrs
    43  	kernfs.InodeDirectoryNoNewChildren
    44  	kernfs.InodeNotAnonymous
    45  	kernfs.InodeNotSymlink
    46  	kernfs.InodeTemporary // This holds no meaning as this inode can't be Looked up and is always valid.
    47  	kernfs.InodeWatches
    48  	kernfs.OrderedChildren
    49  	tasksInodeRefs
    50  
    51  	locks vfs.FileLocks
    52  
    53  	fs    *filesystem
    54  	pidns *kernel.PIDNamespace
    55  
    56  	// '/proc/self' and '/proc/thread-self' have custom directory offsets in
    57  	// Linux. So handle them outside of OrderedChildren.
    58  
    59  	// fakeCgroupControllers is a map of controller name to directory in the
    60  	// cgroup hierarchy. These controllers are immutable and will be listed
    61  	// in /proc/pid/cgroup if not nil.
    62  	fakeCgroupControllers map[string]string
    63  }
    64  
    65  var _ kernfs.Inode = (*tasksInode)(nil)
    66  
    67  func (fs *filesystem) newTasksInode(ctx context.Context, k *kernel.Kernel, pidns *kernel.PIDNamespace, fakeCgroupControllers map[string]string) *tasksInode {
    68  	root := auth.NewRootCredentials(pidns.UserNamespace())
    69  	contents := map[string]kernfs.Inode{
    70  		"cmdline":        fs.newInode(ctx, root, 0444, &cmdLineData{}),
    71  		"cpuinfo":        fs.newInode(ctx, root, 0444, newStaticFileSetStat(cpuInfoData(k))),
    72  		"filesystems":    fs.newInode(ctx, root, 0444, &filesystemsData{}),
    73  		"loadavg":        fs.newInode(ctx, root, 0444, &loadavgData{}),
    74  		"sys":            fs.newSysDir(ctx, root, k),
    75  		"meminfo":        fs.newInode(ctx, root, 0444, &meminfoData{}),
    76  		"mounts":         kernfs.NewStaticSymlink(ctx, root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), "self/mounts"),
    77  		"net":            kernfs.NewStaticSymlink(ctx, root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), "self/net"),
    78  		"sentry-meminfo": fs.newInode(ctx, root, 0444, &sentryMeminfoData{}),
    79  		"stat":           fs.newInode(ctx, root, 0444, &statData{}),
    80  		"uptime":         fs.newInode(ctx, root, 0444, &uptimeData{}),
    81  		"version":        fs.newInode(ctx, root, 0444, &versionData{}),
    82  	}
    83  	// If fakeCgroupControllers are provided, don't create a cgroupfs backed
    84  	// /proc/cgroup as it will not match the fake controllers.
    85  	if len(fakeCgroupControllers) == 0 {
    86  		contents["cgroups"] = fs.newInode(ctx, root, 0444, &cgroupsData{})
    87  	}
    88  
    89  	inode := &tasksInode{
    90  		pidns:                 pidns,
    91  		fs:                    fs,
    92  		fakeCgroupControllers: fakeCgroupControllers,
    93  	}
    94  	inode.InodeAttrs.Init(ctx, root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555)
    95  	inode.InitRefs()
    96  
    97  	inode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
    98  	links := inode.OrderedChildren.Populate(contents)
    99  	inode.IncLinks(links)
   100  
   101  	return inode
   102  }
   103  
   104  // Lookup implements kernfs.inodeDirectory.Lookup.
   105  func (i *tasksInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) {
   106  	// Check if a static entry was looked up.
   107  	if d, err := i.OrderedChildren.Lookup(ctx, name); err == nil {
   108  		return d, nil
   109  	}
   110  
   111  	// Not a static entry. Try to lookup a corresponding task.
   112  	tid, err := strconv.ParseUint(name, 10, 64)
   113  	if err != nil {
   114  		root := auth.NewRootCredentials(i.pidns.UserNamespace())
   115  		// If it failed to parse, check if it's one of the special handled files.
   116  		switch name {
   117  		case selfName:
   118  			return i.newSelfSymlink(ctx, root), nil
   119  		case threadSelfName:
   120  			return i.newThreadSelfSymlink(ctx, root), nil
   121  		}
   122  		return nil, linuxerr.ENOENT
   123  	}
   124  
   125  	task := i.pidns.TaskWithID(kernel.ThreadID(tid))
   126  	if task == nil {
   127  		return nil, linuxerr.ENOENT
   128  	}
   129  
   130  	return i.fs.newTaskInode(ctx, task, i.pidns, true, i.fakeCgroupControllers)
   131  }
   132  
   133  // IterDirents implements kernfs.inodeDirectory.IterDirents.
   134  func (i *tasksInode) IterDirents(ctx context.Context, mnt *vfs.Mount, cb vfs.IterDirentsCallback, offset, _ int64) (int64, error) {
   135  	// fs/proc/internal.h: #define FIRST_PROCESS_ENTRY 256
   136  	const FIRST_PROCESS_ENTRY = 256
   137  
   138  	// Use maxTaskID to shortcut searches that will result in 0 entries.
   139  	const maxTaskID = kernel.TasksLimit + 1
   140  	if offset >= maxTaskID {
   141  		return offset, nil
   142  	}
   143  
   144  	// According to Linux (fs/proc/base.c:proc_pid_readdir()), process directories
   145  	// start at offset FIRST_PROCESS_ENTRY with '/proc/self', followed by
   146  	// '/proc/thread-self' and then '/proc/[pid]'.
   147  	if offset < FIRST_PROCESS_ENTRY {
   148  		offset = FIRST_PROCESS_ENTRY
   149  	}
   150  
   151  	if offset == FIRST_PROCESS_ENTRY {
   152  		dirent := vfs.Dirent{
   153  			Name:    selfName,
   154  			Type:    linux.DT_LNK,
   155  			Ino:     i.fs.NextIno(),
   156  			NextOff: offset + 1,
   157  		}
   158  		if err := cb.Handle(dirent); err != nil {
   159  			return offset, err
   160  		}
   161  		offset++
   162  	}
   163  	if offset == FIRST_PROCESS_ENTRY+1 {
   164  		dirent := vfs.Dirent{
   165  			Name:    threadSelfName,
   166  			Type:    linux.DT_LNK,
   167  			Ino:     i.fs.NextIno(),
   168  			NextOff: offset + 1,
   169  		}
   170  		if err := cb.Handle(dirent); err != nil {
   171  			return offset, err
   172  		}
   173  		offset++
   174  	}
   175  
   176  	// Collect all tasks that TGIDs are greater than the offset specified. Per
   177  	// Linux we only include in directory listings if it's the leader. But for
   178  	// whatever crazy reason, you can still walk to the given node.
   179  	var tids []int
   180  	startTid := offset - FIRST_PROCESS_ENTRY - 2
   181  	for _, tg := range i.pidns.ThreadGroups() {
   182  		tid := i.pidns.IDOfThreadGroup(tg)
   183  		if int64(tid) < startTid {
   184  			continue
   185  		}
   186  		if leader := tg.Leader(); leader != nil {
   187  			tids = append(tids, int(tid))
   188  		}
   189  	}
   190  
   191  	if len(tids) == 0 {
   192  		return offset, nil
   193  	}
   194  
   195  	sort.Ints(tids)
   196  	for _, tid := range tids {
   197  		dirent := vfs.Dirent{
   198  			Name:    strconv.FormatUint(uint64(tid), 10),
   199  			Type:    linux.DT_DIR,
   200  			Ino:     i.fs.NextIno(),
   201  			NextOff: FIRST_PROCESS_ENTRY + 2 + int64(tid) + 1,
   202  		}
   203  		if err := cb.Handle(dirent); err != nil {
   204  			return offset, err
   205  		}
   206  		offset++
   207  	}
   208  	return maxTaskID, nil
   209  }
   210  
   211  // Open implements kernfs.Inode.Open.
   212  func (i *tasksInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
   213  	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), d, &i.OrderedChildren, &i.locks, &opts, kernfs.GenericDirectoryFDOptions{
   214  		SeekEnd: kernfs.SeekEndZero,
   215  	})
   216  	if err != nil {
   217  		return nil, err
   218  	}
   219  	return fd.VFSFileDescription(), nil
   220  }
   221  
   222  func (i *tasksInode) Stat(ctx context.Context, vsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
   223  	stat, err := i.InodeAttrs.Stat(ctx, vsfs, opts)
   224  	if err != nil {
   225  		return linux.Statx{}, err
   226  	}
   227  
   228  	if opts.Mask&linux.STATX_NLINK != 0 {
   229  		// Add dynamic children to link count.
   230  		for _, tg := range i.pidns.ThreadGroups() {
   231  			if leader := tg.Leader(); leader != nil {
   232  				stat.Nlink++
   233  			}
   234  		}
   235  	}
   236  
   237  	return stat, nil
   238  }
   239  
   240  // DecRef implements kernfs.Inode.DecRef.
   241  func (i *tasksInode) DecRef(ctx context.Context) {
   242  	i.tasksInodeRefs.DecRef(func() { i.Destroy(ctx) })
   243  }
   244  
   245  // staticFileSetStat implements a special static file that allows inode
   246  // attributes to be set. This is to support /proc files that are readonly, but
   247  // allow attributes to be set.
   248  //
   249  // +stateify savable
   250  type staticFileSetStat struct {
   251  	dynamicBytesFileSetAttr
   252  	vfs.StaticData
   253  }
   254  
   255  var _ dynamicInode = (*staticFileSetStat)(nil)
   256  
   257  func newStaticFileSetStat(data string) *staticFileSetStat {
   258  	return &staticFileSetStat{StaticData: vfs.StaticData{Data: data}}
   259  }
   260  
   261  func cpuInfoData(k *kernel.Kernel) string {
   262  	features := k.FeatureSet()
   263  	var buf bytes.Buffer
   264  	for i, max := uint(0), k.ApplicationCores(); i < max; i++ {
   265  		features.WriteCPUInfoTo(i, &buf)
   266  	}
   267  	return buf.String()
   268  }
   269  
   270  func ipcData(v uint64) dynamicInode {
   271  	return newStaticFile(strconv.FormatUint(v, 10))
   272  }