
     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    15  package proc
    17  import (
    18  	"bytes"
    19  	"fmt"
    20  	"io"
    21  	"sort"
    22  	"strconv"
    24  	""
    25  	""
    26  	""
    27  	""
    28  	""
    29  	""
    30  	""
    31  	""
    32  	""
    33  	""
    34  	""
    35  	""
    36  	""
    37  	""
    38  	""
    39  	""
    40  	""
    41  )
    43  // LINT.IfChange
    45  // getTaskMM returns t's MemoryManager. If getTaskMM succeeds, the MemoryManager's
    46  // users count is incremented, and must be decremented by the caller when it is
    47  // no longer in use.
    48  func getTaskMM(t *kernel.Task) (*mm.MemoryManager, error) {
    49  	if t.ExitState() == kernel.TaskExitDead {
    50  		return nil, syserror.ESRCH
    51  	}
    52  	var m *mm.MemoryManager
    53  	t.WithMuLocked(func(t *kernel.Task) {
    54  		m = t.MemoryManager()
    55  	})
    56  	if m == nil || !m.IncUsers() {
    57  		return nil, io.EOF
    58  	}
    59  	return m, nil
    60  }
    62  func checkTaskState(t *kernel.Task) error {
    63  	switch t.ExitState() {
    64  	case kernel.TaskExitZombie:
    65  		return linuxerr.EACCES
    66  	case kernel.TaskExitDead:
    67  		return syserror.ESRCH
    68  	}
    69  	return nil
    70  }
    72  // taskDir represents a task-level directory.
    73  //
    74  // +stateify savable
    75  type taskDir struct {
    76  	ramfs.Dir
    78  	t *kernel.Task
    79  }
    81  var _ fs.InodeOperations = (*taskDir)(nil)
    83  // newTaskDir creates a new proc task entry.
    84  func (p *proc) newTaskDir(ctx context.Context, t *kernel.Task, msrc *fs.MountSource, isThreadGroup bool) *fs.Inode {
    85  	contents := map[string]*fs.Inode{
    86  		"auxv":          newAuxvec(ctx, t, msrc),
    87  		"cmdline":       newExecArgInode(ctx, t, msrc, cmdlineExecArg),
    88  		"comm":          newComm(ctx, t, msrc),
    89  		"cwd":           newCwd(ctx, t, msrc),
    90  		"environ":       newExecArgInode(ctx, t, msrc, environExecArg),
    91  		"exe":           newExe(ctx, t, msrc),
    92  		"fd":            newFdDir(ctx, t, msrc),
    93  		"fdinfo":        newFdInfoDir(ctx, t, msrc),
    94  		"gid_map":       newGIDMap(ctx, t, msrc),
    95  		"io":            newIO(ctx, t, msrc, isThreadGroup),
    96  		"maps":          newMaps(ctx, t, msrc),
    97  		"mem":           newMem(ctx, t, msrc),
    98  		"mountinfo":     seqfile.NewSeqFileInode(ctx, &mountInfoFile{t: t}, msrc),
    99  		"mounts":        seqfile.NewSeqFileInode(ctx, &mountsFile{t: t}, msrc),
   100  		"net":           newNetDir(ctx, t, msrc),
   101  		"ns":            newNamespaceDir(ctx, t, msrc),
   102  		"oom_score":     newOOMScore(ctx, msrc),
   103  		"oom_score_adj": newOOMScoreAdj(ctx, t, msrc),
   104  		"smaps":         newSmaps(ctx, t, msrc),
   105  		"stat":          newTaskStat(ctx, t, msrc, isThreadGroup, p.pidns),
   106  		"statm":         newStatm(ctx, t, msrc),
   107  		"status":        newStatus(ctx, t, msrc, p.pidns),
   108  		"uid_map":       newUIDMap(ctx, t, msrc),
   109  	}
   110  	if isThreadGroup {
   111  		contents["task"] = p.newSubtasks(ctx, t, msrc)
   112  	}
   113  	if len(p.cgroupControllers) > 0 {
   114  		contents["cgroup"] = newCGroupInode(ctx, msrc, p.cgroupControllers)
   115  	}
   117  	// N.B. taskOwnedInodeOps enforces dumpability-based ownership.
   118  	d := &taskDir{
   119  		Dir: *ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)),
   120  		t:   t,
   121  	}
   122  	return newProcInode(ctx, d, msrc, fs.SpecialDirectory, t)
   123  }
   125  // subtasks represents a /proc/TID/task directory.
   126  //
   127  // +stateify savable
   128  type subtasks struct {
   129  	ramfs.Dir
   131  	t *kernel.Task
   132  	p *proc
   133  }
   135  var _ fs.InodeOperations = (*subtasks)(nil)
   137  func (p *proc) newSubtasks(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
   138  	s := &subtasks{
   139  		Dir: *ramfs.NewDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0555)),
   140  		t:   t,
   141  		p:   p,
   142  	}
   143  	return newProcInode(ctx, s, msrc, fs.SpecialDirectory, t)
   144  }
   146  // UnstableAttr returns unstable attributes of the subtasks.
   147  func (s *subtasks) UnstableAttr(ctx context.Context, inode *fs.Inode) (fs.UnstableAttr, error) {
   148  	uattr, err := s.Dir.UnstableAttr(ctx, inode)
   149  	if err != nil {
   150  		return fs.UnstableAttr{}, err
   151  	}
   152  	// We can't rely on ramfs' implementation because the task directories are
   153  	// generated dynamically.
   154  	uattr.Links = uint64(2 + s.t.ThreadGroup().Count())
   155  	return uattr, nil
   156  }
   158  // GetFile implements fs.InodeOperations.GetFile.
   159  func (s *subtasks) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
   160  	return fs.NewFile(ctx, dirent, flags, &subtasksFile{t: s.t, pidns: s.p.pidns}), nil
   161  }
   163  // +stateify savable
   164  type subtasksFile struct {
   165  	fsutil.DirFileOperations        `state:"nosave"`
   166  	fsutil.FileUseInodeUnstableAttr `state:"nosave"`
   168  	t     *kernel.Task
   169  	pidns *kernel.PIDNamespace
   170  }
   172  // Readdir implements fs.FileOperations.Readdir.
   173  func (f *subtasksFile) Readdir(ctx context.Context, file *fs.File, ser fs.DentrySerializer) (int64, error) {
   174  	dirCtx := fs.DirCtx{
   175  		Serializer: ser,
   176  	}
   178  	// Note that unlike most Readdir implementations, the offset here is
   179  	// not an index into the subtasks, but rather the TID of the next
   180  	// subtask to emit.
   181  	offset := file.Offset()
   183  	tasks := f.t.ThreadGroup().MemberIDs(f.pidns)
   184  	if len(tasks) == 0 {
   185  		return offset, syserror.ENOENT
   186  	}
   188  	if offset == 0 {
   189  		// Serialize "." and "..".
   190  		root := fs.RootFromContext(ctx)
   191  		if root != nil {
   192  			defer root.DecRef(ctx)
   193  		}
   194  		dot, dotdot := file.Dirent.GetDotAttrs(root)
   195  		if err := dirCtx.DirEmit(".", dot); err != nil {
   196  			return offset, err
   197  		}
   198  		if err := dirCtx.DirEmit("..", dotdot); err != nil {
   199  			return offset, err
   200  		}
   201  	}
   203  	// Serialize tasks.
   204  	taskInts := make([]int, 0, len(tasks))
   205  	for _, tid := range tasks {
   206  		taskInts = append(taskInts, int(tid))
   207  	}
   209  	sort.Sort(sort.IntSlice(taskInts))
   210  	// Find the task to start at.
   211  	idx := sort.SearchInts(taskInts, int(offset))
   212  	if idx == len(taskInts) {
   213  		return offset, nil
   214  	}
   215  	taskInts = taskInts[idx:]
   217  	var tid int
   218  	for _, tid = range taskInts {
   219  		name := strconv.FormatUint(uint64(tid), 10)
   220  		attr := fs.GenericDentAttr(fs.SpecialDirectory, device.ProcDevice)
   221  		if err := dirCtx.DirEmit(name, attr); err != nil {
   222  			// Returned offset is next tid to serialize.
   223  			return int64(tid), err
   224  		}
   225  	}
   226  	// We serialized them all.  Next offset should be higher than last
   227  	// serialized tid.
   228  	return int64(tid) + 1, nil
   229  }
   231  var _ fs.FileOperations = (*subtasksFile)(nil)
   233  // Lookup loads an Inode in a task's subtask directory into a Dirent.
   234  func (s *subtasks) Lookup(ctx context.Context, dir *fs.Inode, p string) (*fs.Dirent, error) {
   235  	tid, err := strconv.ParseUint(p, 10, 32)
   236  	if err != nil {
   237  		return nil, syserror.ENOENT
   238  	}
   240  	task := s.p.pidns.TaskWithID(kernel.ThreadID(tid))
   241  	if task == nil {
   242  		return nil, syserror.ENOENT
   243  	}
   244  	if task.ThreadGroup() != s.t.ThreadGroup() {
   245  		return nil, syserror.ENOENT
   246  	}
   248  	td := s.p.newTaskDir(ctx, task, dir.MountSource, false)
   249  	return fs.NewDirent(ctx, td, p), nil
   250  }
   252  // exe is an fs.InodeOperations symlink for the /proc/PID/exe file.
   253  //
   254  // +stateify savable
   255  type exe struct {
   256  	ramfs.Symlink
   258  	t *kernel.Task
   259  }
   261  func newExe(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
   262  	exeSymlink := &exe{
   263  		Symlink: *ramfs.NewSymlink(ctx, fs.RootOwner, ""),
   264  		t:       t,
   265  	}
   266  	return newProcInode(ctx, exeSymlink, msrc, fs.Symlink, t)
   267  }
   269  func (e *exe) executable() (file fsbridge.File, err error) {
   270  	if err := checkTaskState(e.t); err != nil {
   271  		return nil, err
   272  	}
   273  	e.t.WithMuLocked(func(t *kernel.Task) {
   274  		mm := t.MemoryManager()
   275  		if mm == nil {
   276  			err = linuxerr.EACCES
   277  			return
   278  		}
   280  		// The MemoryManager may be destroyed, in which case
   281  		// MemoryManager.destroy will simply set the executable to nil
   282  		// (with locks held).
   283  		file = mm.Executable()
   284  		if file == nil {
   285  			err = syserror.ESRCH
   286  		}
   287  	})
   288  	return
   289  }
   291  // Readlink implements fs.InodeOperations.
   292  func (e *exe) Readlink(ctx context.Context, inode *fs.Inode) (string, error) {
   293  	if !kernel.ContextCanTrace(ctx, e.t, false) {
   294  		return "", linuxerr.EACCES
   295  	}
   297  	// Pull out the executable for /proc/TID/exe.
   298  	exec, err := e.executable()
   299  	if err != nil {
   300  		return "", err
   301  	}
   302  	defer exec.DecRef(ctx)
   304  	return exec.PathnameWithDeleted(ctx), nil
   305  }
   307  // cwd is an fs.InodeOperations symlink for the /proc/PID/cwd file.
   308  //
   309  // +stateify savable
   310  type cwd struct {
   311  	ramfs.Symlink
   313  	t *kernel.Task
   314  }
   316  func newCwd(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
   317  	cwdSymlink := &cwd{
   318  		Symlink: *ramfs.NewSymlink(ctx, fs.RootOwner, ""),
   319  		t:       t,
   320  	}
   321  	return newProcInode(ctx, cwdSymlink, msrc, fs.Symlink, t)
   322  }
   324  // Readlink implements fs.InodeOperations.
   325  func (e *cwd) Readlink(ctx context.Context, inode *fs.Inode) (string, error) {
   326  	if !kernel.ContextCanTrace(ctx, e.t, false) {
   327  		return "", linuxerr.EACCES
   328  	}
   329  	if err := checkTaskState(e.t); err != nil {
   330  		return "", err
   331  	}
   332  	cwd := e.t.FSContext().WorkingDirectory()
   333  	if cwd == nil {
   334  		// It could have raced with process deletion.
   335  		return "", syserror.ESRCH
   336  	}
   337  	defer cwd.DecRef(ctx)
   339  	root := fs.RootFromContext(ctx)
   340  	if root == nil {
   341  		// It could have raced with process deletion.
   342  		return "", syserror.ESRCH
   343  	}
   344  	defer root.DecRef(ctx)
   346  	name, _ := cwd.FullName(root)
   347  	return name, nil
   348  }
   350  // namespaceSymlink represents a symlink in the namespacefs, such as the files
   351  // in /proc/<pid>/ns.
   352  //
   353  // +stateify savable
   354  type namespaceSymlink struct {
   355  	ramfs.Symlink
   357  	t *kernel.Task
   358  }
   360  func newNamespaceSymlink(ctx context.Context, t *kernel.Task, msrc *fs.MountSource, name string) *fs.Inode {
   361  	// TODO(rahat): Namespace symlinks should contain the namespace name and the
   362  	// inode number for the namespace instance, so for example user:[123456]. We
   363  	// currently fake the inode number by sticking the symlink inode in its
   364  	// place.
   365  	target := fmt.Sprintf("%s:[%d]", name, device.ProcDevice.NextIno())
   366  	n := &namespaceSymlink{
   367  		Symlink: *ramfs.NewSymlink(ctx, fs.RootOwner, target),
   368  		t:       t,
   369  	}
   370  	return newProcInode(ctx, n, msrc, fs.Symlink, t)
   371  }
   373  // Readlink reads the symlink value.
   374  func (n *namespaceSymlink) Readlink(ctx context.Context, inode *fs.Inode) (string, error) {
   375  	if err := checkTaskState(n.t); err != nil {
   376  		return "", err
   377  	}
   378  	return n.Symlink.Readlink(ctx, inode)
   379  }
   381  // Getlink implements fs.InodeOperations.Getlink.
   382  func (n *namespaceSymlink) Getlink(ctx context.Context, inode *fs.Inode) (*fs.Dirent, error) {
   383  	if !kernel.ContextCanTrace(ctx, n.t, false) {
   384  		return nil, linuxerr.EACCES
   385  	}
   386  	if err := checkTaskState(n.t); err != nil {
   387  		return nil, err
   388  	}
   390  	// Create a new regular file to fake the namespace file.
   391  	iops := fsutil.NewNoReadWriteFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0777), linux.PROC_SUPER_MAGIC)
   392  	return fs.NewDirent(ctx, newProcInode(ctx, iops, inode.MountSource, fs.RegularFile, nil), n.Symlink.Target), nil
   393  }
   395  func newNamespaceDir(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
   396  	contents := map[string]*fs.Inode{
   397  		"net":  newNamespaceSymlink(ctx, t, msrc, "net"),
   398  		"pid":  newNamespaceSymlink(ctx, t, msrc, "pid"),
   399  		"user": newNamespaceSymlink(ctx, t, msrc, "user"),
   400  	}
   401  	d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0511))
   402  	return newProcInode(ctx, d, msrc, fs.SpecialDirectory, t)
   403  }
   405  // memData implements fs.Inode for /proc/[pid]/mem.
   406  //
   407  // +stateify savable
   408  type memData struct {
   409  	fsutil.SimpleFileInode
   411  	t *kernel.Task
   412  }
   414  // memDataFile implements fs.FileOperations for /proc/[pid]/mem.
   415  //
   416  // +stateify savable
   417  type memDataFile struct {
   418  	fsutil.FileGenericSeek          `state:"nosave"`
   419  	fsutil.FileNoIoctl              `state:"nosave"`
   420  	fsutil.FileNoMMap               `state:"nosave"`
   421  	fsutil.FileNoWrite              `state:"nosave"`
   422  	fsutil.FileNoSplice             `state:"nosave"`
   423  	fsutil.FileNoopFlush            `state:"nosave"`
   424  	fsutil.FileNoopFsync            `state:"nosave"`
   425  	fsutil.FileNoopRelease          `state:"nosave"`
   426  	fsutil.FileNotDirReaddir        `state:"nosave"`
   427  	fsutil.FileUseInodeUnstableAttr `state:"nosave"`
   428  	waiter.AlwaysReady              `state:"nosave"`
   430  	t *kernel.Task
   431  }
   433  func newMem(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
   434  	inode := &memData{
   435  		SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0400), linux.PROC_SUPER_MAGIC),
   436  		t:               t,
   437  	}
   438  	return newProcInode(ctx, inode, msrc, fs.SpecialFile, t)
   439  }
   441  // Truncate implements fs.InodeOperations.Truncate.
   442  func (m *memData) Truncate(context.Context, *fs.Inode, int64) error {
   443  	return nil
   444  }
   446  // GetFile implements fs.InodeOperations.GetFile.
   447  func (m *memData) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
   448  	// TODO( Add check for PTRACE_MODE_ATTACH_FSCREDS
   449  	// Permission to read this file is governed by PTRACE_MODE_ATTACH_FSCREDS
   450  	// Since we dont implement setfsuid/setfsgid we can just use PTRACE_MODE_ATTACH
   451  	if !kernel.ContextCanTrace(ctx, m.t, true) {
   452  		return nil, linuxerr.EACCES
   453  	}
   454  	if err := checkTaskState(m.t); err != nil {
   455  		return nil, err
   456  	}
   457  	// Enable random access reads
   458  	flags.Pread = true
   459  	return fs.NewFile(ctx, dirent, flags, &memDataFile{t: m.t}), nil
   460  }
   462  // Read implements fs.FileOperations.Read.
   463  func (m *memDataFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
   464  	if dst.NumBytes() == 0 {
   465  		return 0, nil
   466  	}
   467  	mm, err := getTaskMM(m.t)
   468  	if err != nil {
   469  		return 0, nil
   470  	}
   471  	defer mm.DecUsers(ctx)
   472  	// Buffer the read data because of MM locks
   473  	buf := make([]byte, dst.NumBytes())
   474  	n, readErr := mm.CopyIn(ctx, hostarch.Addr(offset), buf, usermem.IOOpts{IgnorePermissions: true})
   475  	if n > 0 {
   476  		if _, err := dst.CopyOut(ctx, buf[:n]); err != nil {
   477  			return 0, syserror.EFAULT
   478  		}
   479  		return int64(n), nil
   480  	}
   481  	if readErr != nil {
   482  		return 0, syserror.EIO
   483  	}
   484  	return 0, nil
   485  }
   487  // mapsData implements seqfile.SeqSource for /proc/[pid]/maps.
   488  //
   489  // +stateify savable
   490  type mapsData struct {
   491  	t *kernel.Task
   492  }
   494  func newMaps(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
   495  	return newProcInode(ctx, seqfile.NewSeqFile(ctx, &mapsData{t}), msrc, fs.SpecialFile, t)
   496  }
   498  func (md *mapsData) mm() *mm.MemoryManager {
   499  	var tmm *mm.MemoryManager
   500  	md.t.WithMuLocked(func(t *kernel.Task) {
   501  		if mm := t.MemoryManager(); mm != nil {
   502  			// No additional reference is taken on mm here. This is safe
   503  			// because MemoryManager.destroy is required to leave the
   504  			// MemoryManager in a state where it's still usable as a SeqSource.
   505  			tmm = mm
   506  		}
   507  	})
   508  	return tmm
   509  }
   511  // NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
   512  func (md *mapsData) NeedsUpdate(generation int64) bool {
   513  	if mm :=; mm != nil {
   514  		return mm.NeedsUpdate(generation)
   515  	}
   516  	return true
   517  }
   519  // ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData.
   520  func (md *mapsData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
   521  	if mm :=; mm != nil {
   522  		return mm.ReadMapsSeqFileData(ctx, h)
   523  	}
   524  	return []seqfile.SeqData{}, 0
   525  }
   527  // smapsData implements seqfile.SeqSource for /proc/[pid]/smaps.
   528  //
   529  // +stateify savable
   530  type smapsData struct {
   531  	t *kernel.Task
   532  }
   534  func newSmaps(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
   535  	return newProcInode(ctx, seqfile.NewSeqFile(ctx, &smapsData{t}), msrc, fs.SpecialFile, t)
   536  }
   538  func (sd *smapsData) mm() *mm.MemoryManager {
   539  	var tmm *mm.MemoryManager
   540  	sd.t.WithMuLocked(func(t *kernel.Task) {
   541  		if mm := t.MemoryManager(); mm != nil {
   542  			// No additional reference is taken on mm here. This is safe
   543  			// because MemoryManager.destroy is required to leave the
   544  			// MemoryManager in a state where it's still usable as a SeqSource.
   545  			tmm = mm
   546  		}
   547  	})
   548  	return tmm
   549  }
   551  // NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
   552  func (sd *smapsData) NeedsUpdate(generation int64) bool {
   553  	if mm :=; mm != nil {
   554  		return mm.NeedsUpdate(generation)
   555  	}
   556  	return true
   557  }
   559  // ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData.
   560  func (sd *smapsData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
   561  	if mm :=; mm != nil {
   562  		return mm.ReadSmapsSeqFileData(ctx, h)
   563  	}
   564  	return []seqfile.SeqData{}, 0
   565  }
   567  // +stateify savable
   568  type taskStatData struct {
   569  	t *kernel.Task
   571  	// If tgstats is true, accumulate fault stats (not implemented) and CPU
   572  	// time across all tasks in t's thread group.
   573  	tgstats bool
   575  	// pidns is the PID namespace associated with the proc filesystem that
   576  	// includes the file using this statData.
   577  	pidns *kernel.PIDNamespace
   578  }
   580  func newTaskStat(ctx context.Context, t *kernel.Task, msrc *fs.MountSource, showSubtasks bool, pidns *kernel.PIDNamespace) *fs.Inode {
   581  	return newProcInode(ctx, seqfile.NewSeqFile(ctx, &taskStatData{t, showSubtasks /* tgstats */, pidns}), msrc, fs.SpecialFile, t)
   582  }
   584  // NeedsUpdate returns whether the generation is old or not.
   585  func (s *taskStatData) NeedsUpdate(generation int64) bool {
   586  	return true
   587  }
   589  // ReadSeqFileData returns data for the SeqFile reader.
   590  // SeqData, the current generation and where in the file the handle corresponds to.
   591  func (s *taskStatData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
   592  	if h != nil {
   593  		return nil, 0
   594  	}
   596  	var buf bytes.Buffer
   598  	fmt.Fprintf(&buf, "%d ", s.pidns.IDOfTask(s.t))
   599  	fmt.Fprintf(&buf, "(%s) ", s.t.Name())
   600  	fmt.Fprintf(&buf, "%c ", s.t.StateStatus()[0])
   601  	ppid := kernel.ThreadID(0)
   602  	if parent := s.t.Parent(); parent != nil {
   603  		ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup())
   604  	}
   605  	fmt.Fprintf(&buf, "%d ", ppid)
   606  	fmt.Fprintf(&buf, "%d ", s.pidns.IDOfProcessGroup(s.t.ThreadGroup().ProcessGroup()))
   607  	fmt.Fprintf(&buf, "%d ", s.pidns.IDOfSession(s.t.ThreadGroup().Session()))
   608  	fmt.Fprintf(&buf, "0 0 " /* tty_nr tpgid */)
   609  	fmt.Fprintf(&buf, "0 " /* flags */)
   610  	fmt.Fprintf(&buf, "0 0 0 0 " /* minflt cminflt majflt cmajflt */)
   611  	var cputime usage.CPUStats
   612  	if s.tgstats {
   613  		cputime = s.t.ThreadGroup().CPUStats()
   614  	} else {
   615  		cputime = s.t.CPUStats()
   616  	}
   617  	fmt.Fprintf(&buf, "%d %d ", linux.ClockTFromDuration(cputime.UserTime), linux.ClockTFromDuration(cputime.SysTime))
   618  	cputime = s.t.ThreadGroup().JoinedChildCPUStats()
   619  	fmt.Fprintf(&buf, "%d %d ", linux.ClockTFromDuration(cputime.UserTime), linux.ClockTFromDuration(cputime.SysTime))
   620  	fmt.Fprintf(&buf, "%d %d ", s.t.Priority(), s.t.Niceness())
   621  	fmt.Fprintf(&buf, "%d ", s.t.ThreadGroup().Count())
   623  	// itrealvalue. Since kernel 2.6.17, this field is no longer
   624  	// maintained, and is hard coded as 0.
   625  	fmt.Fprintf(&buf, "0 ")
   627  	// Start time is relative to boot time, expressed in clock ticks.
   628  	fmt.Fprintf(&buf, "%d ", linux.ClockTFromDuration(s.t.StartTime().Sub(s.t.Kernel().Timekeeper().BootTime())))
   630  	var vss, rss uint64
   631  	s.t.WithMuLocked(func(t *kernel.Task) {
   632  		if mm := t.MemoryManager(); mm != nil {
   633  			vss = mm.VirtualMemorySize()
   634  			rss = mm.ResidentSetSize()
   635  		}
   636  	})
   637  	fmt.Fprintf(&buf, "%d %d ", vss, rss/hostarch.PageSize)
   639  	// rsslim.
   640  	fmt.Fprintf(&buf, "%d ", s.t.ThreadGroup().Limits().Get(limits.Rss).Cur)
   642  	fmt.Fprintf(&buf, "0 0 0 0 0 " /* startcode endcode startstack kstkesp kstkeip */)
   643  	fmt.Fprintf(&buf, "0 0 0 0 0 " /* signal blocked sigignore sigcatch wchan */)
   644  	fmt.Fprintf(&buf, "0 0 " /* nswap cnswap */)
   645  	terminationSignal := linux.Signal(0)
   646  	if s.t == s.t.ThreadGroup().Leader() {
   647  		terminationSignal = s.t.ThreadGroup().TerminationSignal()
   648  	}
   649  	fmt.Fprintf(&buf, "%d ", terminationSignal)
   650  	fmt.Fprintf(&buf, "0 0 0 " /* processor rt_priority policy */)
   651  	fmt.Fprintf(&buf, "0 0 0 " /* delayacct_blkio_ticks guest_time cguest_time */)
   652  	fmt.Fprintf(&buf, "0 0 0 0 0 0 0 " /* start_data end_data start_brk arg_start arg_end env_start env_end */)
   653  	fmt.Fprintf(&buf, "0\n" /* exit_code */)
   655  	return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*taskStatData)(nil)}}, 0
   656  }
   658  // statmData implements seqfile.SeqSource for /proc/[pid]/statm.
   659  //
   660  // +stateify savable
   661  type statmData struct {
   662  	t *kernel.Task
   663  }
   665  func newStatm(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
   666  	return newProcInode(ctx, seqfile.NewSeqFile(ctx, &statmData{t}), msrc, fs.SpecialFile, t)
   667  }
   669  // NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
   670  func (s *statmData) NeedsUpdate(generation int64) bool {
   671  	return true
   672  }
   674  // ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData.
   675  func (s *statmData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
   676  	if h != nil {
   677  		return nil, 0
   678  	}
   680  	var vss, rss uint64
   681  	s.t.WithMuLocked(func(t *kernel.Task) {
   682  		if mm := t.MemoryManager(); mm != nil {
   683  			vss = mm.VirtualMemorySize()
   684  			rss = mm.ResidentSetSize()
   685  		}
   686  	})
   688  	var buf bytes.Buffer
   689  	fmt.Fprintf(&buf, "%d %d 0 0 0 0 0\n", vss/hostarch.PageSize, rss/hostarch.PageSize)
   691  	return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*statmData)(nil)}}, 0
   692  }
   694  // statusData implements seqfile.SeqSource for /proc/[pid]/status.
   695  //
   696  // +stateify savable
   697  type statusData struct {
   698  	t     *kernel.Task
   699  	pidns *kernel.PIDNamespace
   700  }
   702  func newStatus(ctx context.Context, t *kernel.Task, msrc *fs.MountSource, pidns *kernel.PIDNamespace) *fs.Inode {
   703  	return newProcInode(ctx, seqfile.NewSeqFile(ctx, &statusData{t, pidns}), msrc, fs.SpecialFile, t)
   704  }
   706  // NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
   707  func (s *statusData) NeedsUpdate(generation int64) bool {
   708  	return true
   709  }
   711  // ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData.
   712  func (s *statusData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
   713  	if h != nil {
   714  		return nil, 0
   715  	}
   717  	var buf bytes.Buffer
   718  	fmt.Fprintf(&buf, "Name:\t%s\n", s.t.Name())
   719  	fmt.Fprintf(&buf, "State:\t%s\n", s.t.StateStatus())
   720  	fmt.Fprintf(&buf, "Tgid:\t%d\n", s.pidns.IDOfThreadGroup(s.t.ThreadGroup()))
   721  	fmt.Fprintf(&buf, "Pid:\t%d\n", s.pidns.IDOfTask(s.t))
   722  	ppid := kernel.ThreadID(0)
   723  	if parent := s.t.Parent(); parent != nil {
   724  		ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup())
   725  	}
   726  	fmt.Fprintf(&buf, "PPid:\t%d\n", ppid)
   727  	tpid := kernel.ThreadID(0)
   728  	if tracer := s.t.Tracer(); tracer != nil {
   729  		tpid = s.pidns.IDOfTask(tracer)
   730  	}
   731  	fmt.Fprintf(&buf, "TracerPid:\t%d\n", tpid)
   732  	var fds int
   733  	var vss, rss, data uint64
   734  	s.t.WithMuLocked(func(t *kernel.Task) {
   735  		if fdTable := t.FDTable(); fdTable != nil {
   736  			fds = fdTable.CurrentMaxFDs()
   737  		}
   738  		if mm := t.MemoryManager(); mm != nil {
   739  			vss = mm.VirtualMemorySize()
   740  			rss = mm.ResidentSetSize()
   741  			data = mm.VirtualDataSize()
   742  		}
   743  	})
   744  	fmt.Fprintf(&buf, "FDSize:\t%d\n", fds)
   745  	fmt.Fprintf(&buf, "VmSize:\t%d kB\n", vss>>10)
   746  	fmt.Fprintf(&buf, "VmRSS:\t%d kB\n", rss>>10)
   747  	fmt.Fprintf(&buf, "VmData:\t%d kB\n", data>>10)
   748  	fmt.Fprintf(&buf, "Threads:\t%d\n", s.t.ThreadGroup().Count())
   749  	creds := s.t.Credentials()
   750  	fmt.Fprintf(&buf, "CapInh:\t%016x\n", creds.InheritableCaps)
   751  	fmt.Fprintf(&buf, "CapPrm:\t%016x\n", creds.PermittedCaps)
   752  	fmt.Fprintf(&buf, "CapEff:\t%016x\n", creds.EffectiveCaps)
   753  	fmt.Fprintf(&buf, "CapBnd:\t%016x\n", creds.BoundingCaps)
   754  	fmt.Fprintf(&buf, "Seccomp:\t%d\n", s.t.SeccompMode())
   755  	// We unconditionally report a single NUMA node. See
   756  	// pkg/sentry/syscalls/linux/sys_mempolicy.go.
   757  	fmt.Fprintf(&buf, "Mems_allowed:\t1\n")
   758  	fmt.Fprintf(&buf, "Mems_allowed_list:\t0\n")
   759  	return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*statusData)(nil)}}, 0
   760  }
   762  // ioUsage is the /proc/<pid>/io and /proc/<pid>/task/<tid>/io data provider.
   763  type ioUsage interface {
   764  	// IOUsage returns the io usage data.
   765  	IOUsage() *usage.IO
   766  }
   768  // +stateify savable
   769  type ioData struct {
   770  	ioUsage
   771  }
   773  func newIO(ctx context.Context, t *kernel.Task, msrc *fs.MountSource, isThreadGroup bool) *fs.Inode {
   774  	if isThreadGroup {
   775  		return newProcInode(ctx, seqfile.NewSeqFile(ctx, &ioData{t.ThreadGroup()}), msrc, fs.SpecialFile, t)
   776  	}
   777  	return newProcInode(ctx, seqfile.NewSeqFile(ctx, &ioData{t}), msrc, fs.SpecialFile, t)
   778  }
   780  // NeedsUpdate returns whether the generation is old or not.
   781  func (i *ioData) NeedsUpdate(generation int64) bool {
   782  	return true
   783  }
   785  // ReadSeqFileData returns data for the SeqFile reader.
   786  // SeqData, the current generation and where in the file the handle corresponds to.
   787  func (i *ioData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
   788  	if h != nil {
   789  		return nil, 0
   790  	}
   792  	io := usage.IO{}
   793  	io.Accumulate(i.IOUsage())
   795  	var buf bytes.Buffer
   796  	fmt.Fprintf(&buf, "rchar: %d\n", io.CharsRead)
   797  	fmt.Fprintf(&buf, "wchar: %d\n", io.CharsWritten)
   798  	fmt.Fprintf(&buf, "syscr: %d\n", io.ReadSyscalls)
   799  	fmt.Fprintf(&buf, "syscw: %d\n", io.WriteSyscalls)
   800  	fmt.Fprintf(&buf, "read_bytes: %d\n", io.BytesRead)
   801  	fmt.Fprintf(&buf, "write_bytes: %d\n", io.BytesWritten)
   802  	fmt.Fprintf(&buf, "cancelled_write_bytes: %d\n", io.BytesWriteCancelled)
   804  	return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*ioData)(nil)}}, 0
   805  }
   807  // comm is a file containing the command name for a task.
   808  //
   809  // On Linux, /proc/[pid]/comm is writable, and writing to the comm file changes
   810  // the thread name. We don't implement this yet as there are no known users of
   811  // this feature.
   812  //
   813  // +stateify savable
   814  type comm struct {
   815  	fsutil.SimpleFileInode
   817  	t *kernel.Task
   818  }
   820  // newComm returns a new comm file.
   821  func newComm(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
   822  	c := &comm{
   823  		SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC),
   824  		t:               t,
   825  	}
   826  	return newProcInode(ctx, c, msrc, fs.SpecialFile, t)
   827  }
   829  // Check implements fs.InodeOperations.Check.
   830  func (c *comm) Check(ctx context.Context, inode *fs.Inode, p fs.PermMask) bool {
   831  	// This file can always be read or written by members of the same
   832  	// thread group. See fs/proc/base.c:proc_tid_comm_permission.
   833  	//
   834  	// N.B. This check is currently a no-op as we don't yet support writing
   835  	// and this file is world-readable anyways.
   836  	t := kernel.TaskFromContext(ctx)
   837  	if t != nil && t.ThreadGroup() == c.t.ThreadGroup() && !p.Execute {
   838  		return true
   839  	}
   841  	return fs.ContextCanAccessFile(ctx, inode, p)
   842  }
   844  // GetFile implements fs.InodeOperations.GetFile.
   845  func (c *comm) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
   846  	return fs.NewFile(ctx, dirent, flags, &commFile{t: c.t}), nil
   847  }
   849  // +stateify savable
   850  type commFile struct {
   851  	fsutil.FileGenericSeek          `state:"nosave"`
   852  	fsutil.FileNoIoctl              `state:"nosave"`
   853  	fsutil.FileNoMMap               `state:"nosave"`
   854  	fsutil.FileNoSplice             `state:"nosave"`
   855  	fsutil.FileNoWrite              `state:"nosave"`
   856  	fsutil.FileNoopFlush            `state:"nosave"`
   857  	fsutil.FileNoopFsync            `state:"nosave"`
   858  	fsutil.FileNoopRelease          `state:"nosave"`
   859  	fsutil.FileNotDirReaddir        `state:"nosave"`
   860  	fsutil.FileUseInodeUnstableAttr `state:"nosave"`
   861  	waiter.AlwaysReady              `state:"nosave"`
   863  	t *kernel.Task
   864  }
   866  var _ fs.FileOperations = (*commFile)(nil)
   868  // Read implements fs.FileOperations.Read.
   869  func (f *commFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
   870  	if offset < 0 {
   871  		return 0, linuxerr.EINVAL
   872  	}
   874  	buf := []byte(f.t.Name() + "\n")
   875  	if offset >= int64(len(buf)) {
   876  		return 0, io.EOF
   877  	}
   879  	n, err := dst.CopyOut(ctx, buf[offset:])
   880  	return int64(n), err
   881  }
   883  // auxvec is a file containing the auxiliary vector for a task.
   884  //
   885  // +stateify savable
   886  type auxvec struct {
   887  	fsutil.SimpleFileInode
   889  	t *kernel.Task
   890  }
   892  // newAuxvec returns a new auxvec file.
   893  func newAuxvec(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
   894  	a := &auxvec{
   895  		SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC),
   896  		t:               t,
   897  	}
   898  	return newProcInode(ctx, a, msrc, fs.SpecialFile, t)
   899  }
   901  // GetFile implements fs.InodeOperations.GetFile.
   902  func (a *auxvec) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
   903  	return fs.NewFile(ctx, dirent, flags, &auxvecFile{t: a.t}), nil
   904  }
   906  // +stateify savable
   907  type auxvecFile struct {
   908  	fsutil.FileGenericSeek          `state:"nosave"`
   909  	fsutil.FileNoIoctl              `state:"nosave"`
   910  	fsutil.FileNoMMap               `state:"nosave"`
   911  	fsutil.FileNoSplice             `state:"nosave"`
   912  	fsutil.FileNoWrite              `state:"nosave"`
   913  	fsutil.FileNoopFlush            `state:"nosave"`
   914  	fsutil.FileNoopFsync            `state:"nosave"`
   915  	fsutil.FileNoopRelease          `state:"nosave"`
   916  	fsutil.FileNotDirReaddir        `state:"nosave"`
   917  	fsutil.FileUseInodeUnstableAttr `state:"nosave"`
   918  	waiter.AlwaysReady              `state:"nosave"`
   920  	t *kernel.Task
   921  }
   923  // Read implements fs.FileOperations.Read.
   924  func (f *auxvecFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
   925  	if offset < 0 {
   926  		return 0, linuxerr.EINVAL
   927  	}
   929  	m, err := getTaskMM(f.t)
   930  	if err != nil {
   931  		return 0, err
   932  	}
   933  	defer m.DecUsers(ctx)
   934  	auxv := m.Auxv()
   936  	// Space for buffer with AT_NULL (0) terminator at the end.
   937  	size := (len(auxv) + 1) * 16
   938  	if offset >= int64(size) {
   939  		return 0, io.EOF
   940  	}
   942  	buf := make([]byte, size)
   943  	for i, e := range auxv {
   944  		hostarch.ByteOrder.PutUint64(buf[16*i:], e.Key)
   945  		hostarch.ByteOrder.PutUint64(buf[16*i+8:], uint64(e.Value))
   946  	}
   948  	n, err := dst.CopyOut(ctx, buf[offset:])
   949  	return int64(n), err
   950  }
   952  // newOOMScore returns a oom_score file. It is a stub that always returns 0.
   953  // TODO(
   954  func newOOMScore(ctx context.Context, msrc *fs.MountSource) *fs.Inode {
   955  	return newStaticProcInode(ctx, msrc, []byte("0\n"))
   956  }
   958  // oomScoreAdj is a file containing the oom_score adjustment for a task.
   959  //
   960  // +stateify savable
   961  type oomScoreAdj struct {
   962  	fsutil.SimpleFileInode
   964  	t *kernel.Task
   965  }
   967  // +stateify savable
   968  type oomScoreAdjFile struct {
   969  	fsutil.FileGenericSeek          `state:"nosave"`
   970  	fsutil.FileNoIoctl              `state:"nosave"`
   971  	fsutil.FileNoMMap               `state:"nosave"`
   972  	fsutil.FileNoSplice             `state:"nosave"`
   973  	fsutil.FileNoopFlush            `state:"nosave"`
   974  	fsutil.FileNoopFsync            `state:"nosave"`
   975  	fsutil.FileNoopRelease          `state:"nosave"`
   976  	fsutil.FileNotDirReaddir        `state:"nosave"`
   977  	fsutil.FileUseInodeUnstableAttr `state:"nosave"`
   978  	waiter.AlwaysReady              `state:"nosave"`
   980  	t *kernel.Task
   981  }
   983  // newOOMScoreAdj returns a oom_score_adj file.
   984  func newOOMScoreAdj(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
   985  	i := &oomScoreAdj{
   986  		SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0644), linux.PROC_SUPER_MAGIC),
   987  		t:               t,
   988  	}
   989  	return newProcInode(ctx, i, msrc, fs.SpecialFile, t)
   990  }
   992  // Truncate implements fs.InodeOperations.Truncate. Truncate is called when
   993  // O_TRUNC is specified for any kind of existing Dirent but is not called via
   994  // (f)truncate for proc files.
   995  func (*oomScoreAdj) Truncate(context.Context, *fs.Inode, int64) error {
   996  	return nil
   997  }
   999  // GetFile implements fs.InodeOperations.GetFile.
  1000  func (o *oomScoreAdj) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
  1001  	return fs.NewFile(ctx, dirent, flags, &oomScoreAdjFile{t: o.t}), nil
  1002  }
  1004  // Read implements fs.FileOperations.Read.
  1005  func (f *oomScoreAdjFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
  1006  	if f.t.ExitState() == kernel.TaskExitDead {
  1007  		return 0, syserror.ESRCH
  1008  	}
  1009  	var buf bytes.Buffer
  1010  	fmt.Fprintf(&buf, "%d\n", f.t.OOMScoreAdj())
  1011  	if offset >= int64(buf.Len()) {
  1012  		return 0, io.EOF
  1013  	}
  1014  	n, err := dst.CopyOut(ctx, buf.Bytes()[offset:])
  1015  	return int64(n), err
  1016  }
  1018  // Write implements fs.FileOperations.Write.
  1019  func (f *oomScoreAdjFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, offset int64) (int64, error) {
  1020  	if src.NumBytes() == 0 {
  1021  		return 0, nil
  1022  	}
  1024  	// Limit input size so as not to impact performance if input size is large.
  1025  	src = src.TakeFirst(hostarch.PageSize - 1)
  1027  	var v int32
  1028  	n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
  1029  	if err != nil {
  1030  		return 0, err
  1031  	}
  1033  	if f.t.ExitState() == kernel.TaskExitDead {
  1034  		return 0, syserror.ESRCH
  1035  	}
  1036  	if err := f.t.SetOOMScoreAdj(v); err != nil {
  1037  		return 0, err
  1038  	}
  1040  	return n, nil
  1041  }
  1043  // LINT.ThenChange(../../fsimpl/proc/task.go|../../fsimpl/proc/task_files.go)