github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/proc/task_files.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package proc
    16  
    17  import (
    18  	"bytes"
    19  	"fmt"
    20  	"io"
    21  
    22  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    23  	"github.com/SagerNet/gvisor/pkg/context"
    24  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    25  	"github.com/SagerNet/gvisor/pkg/hostarch"
    26  	"github.com/SagerNet/gvisor/pkg/safemem"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/fsbridge"
    28  	"github.com/SagerNet/gvisor/pkg/sentry/fsimpl/kernfs"
    29  	"github.com/SagerNet/gvisor/pkg/sentry/kernel"
    30  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/auth"
    31  	"github.com/SagerNet/gvisor/pkg/sentry/limits"
    32  	"github.com/SagerNet/gvisor/pkg/sentry/mm"
    33  	"github.com/SagerNet/gvisor/pkg/sentry/usage"
    34  	"github.com/SagerNet/gvisor/pkg/sentry/vfs"
    35  	"github.com/SagerNet/gvisor/pkg/sync"
    36  	"github.com/SagerNet/gvisor/pkg/syserror"
    37  	"github.com/SagerNet/gvisor/pkg/usermem"
    38  )
    39  
    40  // "There is an (arbitrary) limit on the number of lines in the file. As at
    41  // Linux 3.18, the limit is five lines." - user_namespaces(7)
    42  const maxIDMapLines = 5
    43  
    44  // mm gets the kernel task's MemoryManager. No additional reference is taken on
    45  // mm here. This is safe because MemoryManager.destroy is required to leave the
    46  // MemoryManager in a state where it's still usable as a DynamicBytesSource.
    47  func getMM(task *kernel.Task) *mm.MemoryManager {
    48  	var tmm *mm.MemoryManager
    49  	task.WithMuLocked(func(t *kernel.Task) {
    50  		if mm := t.MemoryManager(); mm != nil {
    51  			tmm = mm
    52  		}
    53  	})
    54  	return tmm
    55  }
    56  
    57  // getMMIncRef returns t's MemoryManager. If getMMIncRef succeeds, the
    58  // MemoryManager's users count is incremented, and must be decremented by the
    59  // caller when it is no longer in use.
    60  func getMMIncRef(task *kernel.Task) (*mm.MemoryManager, error) {
    61  	var m *mm.MemoryManager
    62  	task.WithMuLocked(func(t *kernel.Task) {
    63  		m = t.MemoryManager()
    64  	})
    65  	if m == nil || !m.IncUsers() {
    66  		return nil, io.EOF
    67  	}
    68  	return m, nil
    69  }
    70  
    71  func checkTaskState(t *kernel.Task) error {
    72  	switch t.ExitState() {
    73  	case kernel.TaskExitZombie:
    74  		return linuxerr.EACCES
    75  	case kernel.TaskExitDead:
    76  		return syserror.ESRCH
    77  	}
    78  	return nil
    79  }
    80  
    81  type bufferWriter struct {
    82  	buf *bytes.Buffer
    83  }
    84  
    85  // WriteFromBlocks writes up to srcs.NumBytes() bytes from srcs and returns
    86  // the number of bytes written. It may return a partial write without an
    87  // error (i.e. (n, nil) where 0 < n < srcs.NumBytes()). It should not
    88  // return a full write with an error (i.e. srcs.NumBytes(), err) where err
    89  // != nil).
    90  func (w *bufferWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) {
    91  	written := srcs.NumBytes()
    92  	for !srcs.IsEmpty() {
    93  		w.buf.Write(srcs.Head().ToSlice())
    94  		srcs = srcs.Tail()
    95  	}
    96  	return written, nil
    97  }
    98  
    99  // auxvData implements vfs.DynamicBytesSource for /proc/[pid]/auxv.
   100  //
   101  // +stateify savable
   102  type auxvData struct {
   103  	kernfs.DynamicBytesFile
   104  
   105  	task *kernel.Task
   106  }
   107  
   108  var _ dynamicInode = (*auxvData)(nil)
   109  
   110  // Generate implements vfs.DynamicBytesSource.Generate.
   111  func (d *auxvData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   112  	if d.task.ExitState() == kernel.TaskExitDead {
   113  		return syserror.ESRCH
   114  	}
   115  	m, err := getMMIncRef(d.task)
   116  	if err != nil {
   117  		// Return empty file.
   118  		return nil
   119  	}
   120  	defer m.DecUsers(ctx)
   121  
   122  	auxv := m.Auxv()
   123  	// Space for buffer with AT_NULL (0) terminator at the end.
   124  	buf.Grow((len(auxv) + 1) * 16)
   125  	for _, e := range auxv {
   126  		var tmp [16]byte
   127  		hostarch.ByteOrder.PutUint64(tmp[:8], e.Key)
   128  		hostarch.ByteOrder.PutUint64(tmp[8:], uint64(e.Value))
   129  		buf.Write(tmp[:])
   130  	}
   131  	var atNull [16]byte
   132  	buf.Write(atNull[:])
   133  
   134  	return nil
   135  }
   136  
   137  // execArgType enumerates the types of exec arguments that are exposed through
   138  // proc.
   139  type execArgType int
   140  
   141  const (
   142  	cmdlineDataArg execArgType = iota
   143  	environDataArg
   144  )
   145  
   146  // cmdlineData implements vfs.DynamicBytesSource for /proc/[pid]/cmdline.
   147  //
   148  // +stateify savable
   149  type cmdlineData struct {
   150  	kernfs.DynamicBytesFile
   151  
   152  	task *kernel.Task
   153  
   154  	// arg is the type of exec argument this file contains.
   155  	arg execArgType
   156  }
   157  
   158  var _ dynamicInode = (*cmdlineData)(nil)
   159  
   160  // Generate implements vfs.DynamicBytesSource.Generate.
   161  func (d *cmdlineData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   162  	if d.task.ExitState() == kernel.TaskExitDead {
   163  		return syserror.ESRCH
   164  	}
   165  	m, err := getMMIncRef(d.task)
   166  	if err != nil {
   167  		// Return empty file.
   168  		return nil
   169  	}
   170  	defer m.DecUsers(ctx)
   171  
   172  	// Figure out the bounds of the exec arg we are trying to read.
   173  	var ar hostarch.AddrRange
   174  	switch d.arg {
   175  	case cmdlineDataArg:
   176  		ar = hostarch.AddrRange{
   177  			Start: m.ArgvStart(),
   178  			End:   m.ArgvEnd(),
   179  		}
   180  	case environDataArg:
   181  		ar = hostarch.AddrRange{
   182  			Start: m.EnvvStart(),
   183  			End:   m.EnvvEnd(),
   184  		}
   185  	default:
   186  		panic(fmt.Sprintf("unknown exec arg type %v", d.arg))
   187  	}
   188  	if ar.Start == 0 || ar.End == 0 {
   189  		// Don't attempt to read before the start/end are set up.
   190  		return io.EOF
   191  	}
   192  
   193  	// N.B. Technically this should be usermem.IOOpts.IgnorePermissions = true
   194  	// until Linux 4.9 (272ddc8b3735 "proc: don't use FOLL_FORCE for reading
   195  	// cmdline and environment").
   196  	writer := &bufferWriter{buf: buf}
   197  	if n, err := m.CopyInTo(ctx, hostarch.AddrRangeSeqOf(ar), writer, usermem.IOOpts{}); n == 0 || err != nil {
   198  		// Nothing to copy or something went wrong.
   199  		return err
   200  	}
   201  
   202  	// On Linux, if the NULL byte at the end of the argument vector has been
   203  	// overwritten, it continues reading the environment vector as part of
   204  	// the argument vector.
   205  	if d.arg == cmdlineDataArg && buf.Bytes()[buf.Len()-1] != 0 {
   206  		if end := bytes.IndexByte(buf.Bytes(), 0); end != -1 {
   207  			// If we found a NULL character somewhere else in argv, truncate the
   208  			// return up to the NULL terminator (including it).
   209  			buf.Truncate(end)
   210  			return nil
   211  		}
   212  
   213  		// There is no NULL terminator in the string, return into envp.
   214  		arEnvv := hostarch.AddrRange{
   215  			Start: m.EnvvStart(),
   216  			End:   m.EnvvEnd(),
   217  		}
   218  
   219  		// Upstream limits the returned amount to one page of slop.
   220  		// https://elixir.bootlin.com/linux/v4.20/source/fs/proc/base.c#L208
   221  		// we'll return one page total between argv and envp because of the
   222  		// above page restrictions.
   223  		if buf.Len() >= hostarch.PageSize {
   224  			// Returned at least one page already, nothing else to add.
   225  			return nil
   226  		}
   227  		remaining := hostarch.PageSize - buf.Len()
   228  		if int(arEnvv.Length()) > remaining {
   229  			end, ok := arEnvv.Start.AddLength(uint64(remaining))
   230  			if !ok {
   231  				return syserror.EFAULT
   232  			}
   233  			arEnvv.End = end
   234  		}
   235  		if _, err := m.CopyInTo(ctx, hostarch.AddrRangeSeqOf(arEnvv), writer, usermem.IOOpts{}); err != nil {
   236  			return err
   237  		}
   238  
   239  		// Linux will return envp up to and including the first NULL character,
   240  		// so find it.
   241  		envStart := int(ar.Length())
   242  		if nullIdx := bytes.IndexByte(buf.Bytes()[envStart:], 0); nullIdx != -1 {
   243  			buf.Truncate(envStart + nullIdx)
   244  		}
   245  	}
   246  
   247  	return nil
   248  }
   249  
   250  // +stateify savable
   251  type commInode struct {
   252  	kernfs.DynamicBytesFile
   253  
   254  	task *kernel.Task
   255  }
   256  
   257  func (fs *filesystem) newComm(ctx context.Context, task *kernel.Task, ino uint64, perm linux.FileMode) kernfs.Inode {
   258  	inode := &commInode{task: task}
   259  	inode.DynamicBytesFile.Init(ctx, task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, &commData{task: task}, perm)
   260  	return inode
   261  }
   262  
   263  func (i *commInode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error {
   264  	// This file can always be read or written by members of the same thread
   265  	// group. See fs/proc/base.c:proc_tid_comm_permission.
   266  	//
   267  	// N.B. This check is currently a no-op as we don't yet support writing and
   268  	// this file is world-readable anyways.
   269  	t := kernel.TaskFromContext(ctx)
   270  	if t != nil && t.ThreadGroup() == i.task.ThreadGroup() && !ats.MayExec() {
   271  		return nil
   272  	}
   273  
   274  	return i.DynamicBytesFile.CheckPermissions(ctx, creds, ats)
   275  }
   276  
   277  // commData implements vfs.DynamicBytesSource for /proc/[pid]/comm.
   278  //
   279  // +stateify savable
   280  type commData struct {
   281  	kernfs.DynamicBytesFile
   282  
   283  	task *kernel.Task
   284  }
   285  
   286  var _ dynamicInode = (*commData)(nil)
   287  
   288  // Generate implements vfs.DynamicBytesSource.Generate.
   289  func (d *commData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   290  	buf.WriteString(d.task.Name())
   291  	buf.WriteString("\n")
   292  	return nil
   293  }
   294  
   295  // idMapData implements vfs.WritableDynamicBytesSource for
   296  // /proc/[pid]/{gid_map|uid_map}.
   297  //
   298  // +stateify savable
   299  type idMapData struct {
   300  	kernfs.DynamicBytesFile
   301  
   302  	task *kernel.Task
   303  	gids bool
   304  }
   305  
   306  var _ dynamicInode = (*idMapData)(nil)
   307  
   308  // Generate implements vfs.WritableDynamicBytesSource.Generate.
   309  func (d *idMapData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   310  	var entries []auth.IDMapEntry
   311  	if d.gids {
   312  		entries = d.task.UserNamespace().GIDMap()
   313  	} else {
   314  		entries = d.task.UserNamespace().UIDMap()
   315  	}
   316  	for _, e := range entries {
   317  		fmt.Fprintf(buf, "%10d %10d %10d\n", e.FirstID, e.FirstParentID, e.Length)
   318  	}
   319  	return nil
   320  }
   321  
   322  // Write implements vfs.WritableDynamicBytesSource.Write.
   323  func (d *idMapData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
   324  	// "In addition, the number of bytes written to the file must be less than
   325  	// the system page size, and the write must be performed at the start of
   326  	// the file ..." - user_namespaces(7)
   327  	srclen := src.NumBytes()
   328  	if srclen >= hostarch.PageSize || offset != 0 {
   329  		return 0, linuxerr.EINVAL
   330  	}
   331  	b := make([]byte, srclen)
   332  	if _, err := src.CopyIn(ctx, b); err != nil {
   333  		return 0, err
   334  	}
   335  
   336  	// Truncate from the first NULL byte.
   337  	var nul int64
   338  	nul = int64(bytes.IndexByte(b, 0))
   339  	if nul == -1 {
   340  		nul = srclen
   341  	}
   342  	b = b[:nul]
   343  	// Remove the last \n.
   344  	if nul >= 1 && b[nul-1] == '\n' {
   345  		b = b[:nul-1]
   346  	}
   347  	lines := bytes.SplitN(b, []byte("\n"), maxIDMapLines+1)
   348  	if len(lines) > maxIDMapLines {
   349  		return 0, linuxerr.EINVAL
   350  	}
   351  
   352  	entries := make([]auth.IDMapEntry, len(lines))
   353  	for i, l := range lines {
   354  		var e auth.IDMapEntry
   355  		_, err := fmt.Sscan(string(l), &e.FirstID, &e.FirstParentID, &e.Length)
   356  		if err != nil {
   357  			return 0, linuxerr.EINVAL
   358  		}
   359  		entries[i] = e
   360  	}
   361  	var err error
   362  	if d.gids {
   363  		err = d.task.UserNamespace().SetGIDMap(ctx, entries)
   364  	} else {
   365  		err = d.task.UserNamespace().SetUIDMap(ctx, entries)
   366  	}
   367  	if err != nil {
   368  		return 0, err
   369  	}
   370  
   371  	// On success, Linux's kernel/user_namespace.c:map_write() always returns
   372  	// count, even if fewer bytes were used.
   373  	return int64(srclen), nil
   374  }
   375  
   376  var _ kernfs.Inode = (*memInode)(nil)
   377  
   378  // memInode implements kernfs.Inode for /proc/[pid]/mem.
   379  //
   380  // +stateify savable
   381  type memInode struct {
   382  	kernfs.InodeAttrs
   383  	kernfs.InodeNoStatFS
   384  	kernfs.InodeNoopRefCount
   385  	kernfs.InodeNotDirectory
   386  	kernfs.InodeNotSymlink
   387  
   388  	task  *kernel.Task
   389  	locks vfs.FileLocks
   390  }
   391  
   392  func (fs *filesystem) newMemInode(ctx context.Context, task *kernel.Task, ino uint64, perm linux.FileMode) kernfs.Inode {
   393  	// Note: credentials are overridden by taskOwnedInode.
   394  	inode := &memInode{task: task}
   395  	inode.init(ctx, task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, perm)
   396  	return &taskOwnedInode{Inode: inode, owner: task}
   397  }
   398  
   399  func (f *memInode) init(ctx context.Context, creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode) {
   400  	if perm&^linux.PermissionsMask != 0 {
   401  		panic(fmt.Sprintf("Only permission mask must be set: %x", perm&linux.PermissionsMask))
   402  	}
   403  	f.InodeAttrs.Init(ctx, creds, devMajor, devMinor, ino, linux.ModeRegular|perm)
   404  }
   405  
   406  // Open implements kernfs.Inode.Open.
   407  func (f *memInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
   408  	// TODO(github.com/SagerNet/issue/260): Add check for PTRACE_MODE_ATTACH_FSCREDS
   409  	// Permission to read this file is governed by PTRACE_MODE_ATTACH_FSCREDS
   410  	// Since we dont implement setfsuid/setfsgid we can just use PTRACE_MODE_ATTACH
   411  	if !kernel.ContextCanTrace(ctx, f.task, true) {
   412  		return nil, linuxerr.EACCES
   413  	}
   414  	if err := checkTaskState(f.task); err != nil {
   415  		return nil, err
   416  	}
   417  	fd := &memFD{}
   418  	if err := fd.Init(rp.Mount(), d, f, opts.Flags); err != nil {
   419  		return nil, err
   420  	}
   421  	return &fd.vfsfd, nil
   422  }
   423  
   424  // SetStat implements kernfs.Inode.SetStat.
   425  func (*memInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
   426  	return linuxerr.EPERM
   427  }
   428  
   429  var _ vfs.FileDescriptionImpl = (*memFD)(nil)
   430  
   431  // memFD implements vfs.FileDescriptionImpl for /proc/[pid]/mem.
   432  //
   433  // +stateify savable
   434  type memFD struct {
   435  	vfsfd vfs.FileDescription
   436  	vfs.FileDescriptionDefaultImpl
   437  	vfs.LockFD
   438  
   439  	inode *memInode
   440  
   441  	// mu guards the fields below.
   442  	mu     sync.Mutex `state:"nosave"`
   443  	offset int64
   444  }
   445  
   446  // Init initializes memFD.
   447  func (fd *memFD) Init(m *vfs.Mount, d *kernfs.Dentry, inode *memInode, flags uint32) error {
   448  	fd.LockFD.Init(&inode.locks)
   449  	if err := fd.vfsfd.Init(fd, flags, m, d.VFSDentry(), &vfs.FileDescriptionOptions{}); err != nil {
   450  		return err
   451  	}
   452  	fd.inode = inode
   453  	return nil
   454  }
   455  
   456  // Seek implements vfs.FileDescriptionImpl.Seek.
   457  func (fd *memFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
   458  	fd.mu.Lock()
   459  	defer fd.mu.Unlock()
   460  	switch whence {
   461  	case linux.SEEK_SET:
   462  	case linux.SEEK_CUR:
   463  		offset += fd.offset
   464  	default:
   465  		return 0, linuxerr.EINVAL
   466  	}
   467  	if offset < 0 {
   468  		return 0, linuxerr.EINVAL
   469  	}
   470  	fd.offset = offset
   471  	return offset, nil
   472  }
   473  
   474  // PRead implements vfs.FileDescriptionImpl.PRead.
   475  func (fd *memFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
   476  	if dst.NumBytes() == 0 {
   477  		return 0, nil
   478  	}
   479  	m, err := getMMIncRef(fd.inode.task)
   480  	if err != nil {
   481  		return 0, err
   482  	}
   483  	defer m.DecUsers(ctx)
   484  	// Buffer the read data because of MM locks
   485  	buf := make([]byte, dst.NumBytes())
   486  	n, readErr := m.CopyIn(ctx, hostarch.Addr(offset), buf, usermem.IOOpts{IgnorePermissions: true})
   487  	if n > 0 {
   488  		if _, err := dst.CopyOut(ctx, buf[:n]); err != nil {
   489  			return 0, syserror.EFAULT
   490  		}
   491  		return int64(n), nil
   492  	}
   493  	if readErr != nil {
   494  		return 0, syserror.EIO
   495  	}
   496  	return 0, nil
   497  }
   498  
   499  // Read implements vfs.FileDescriptionImpl.Read.
   500  func (fd *memFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
   501  	fd.mu.Lock()
   502  	n, err := fd.PRead(ctx, dst, fd.offset, opts)
   503  	fd.offset += n
   504  	fd.mu.Unlock()
   505  	return n, err
   506  }
   507  
   508  // Stat implements vfs.FileDescriptionImpl.Stat.
   509  func (fd *memFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
   510  	fs := fd.vfsfd.VirtualDentry().Mount().Filesystem()
   511  	return fd.inode.Stat(ctx, fs, opts)
   512  }
   513  
   514  // SetStat implements vfs.FileDescriptionImpl.SetStat.
   515  func (fd *memFD) SetStat(context.Context, vfs.SetStatOptions) error {
   516  	return linuxerr.EPERM
   517  }
   518  
   519  // Release implements vfs.FileDescriptionImpl.Release.
   520  func (fd *memFD) Release(context.Context) {}
   521  
   522  // mapsData implements vfs.DynamicBytesSource for /proc/[pid]/maps.
   523  //
   524  // +stateify savable
   525  type mapsData struct {
   526  	kernfs.DynamicBytesFile
   527  
   528  	task *kernel.Task
   529  }
   530  
   531  var _ dynamicInode = (*mapsData)(nil)
   532  
   533  // Generate implements vfs.DynamicBytesSource.Generate.
   534  func (d *mapsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   535  	if mm := getMM(d.task); mm != nil {
   536  		mm.ReadMapsDataInto(ctx, buf)
   537  	}
   538  	return nil
   539  }
   540  
   541  // smapsData implements vfs.DynamicBytesSource for /proc/[pid]/smaps.
   542  //
   543  // +stateify savable
   544  type smapsData struct {
   545  	kernfs.DynamicBytesFile
   546  
   547  	task *kernel.Task
   548  }
   549  
   550  var _ dynamicInode = (*smapsData)(nil)
   551  
   552  // Generate implements vfs.DynamicBytesSource.Generate.
   553  func (d *smapsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   554  	if mm := getMM(d.task); mm != nil {
   555  		mm.ReadSmapsDataInto(ctx, buf)
   556  	}
   557  	return nil
   558  }
   559  
   560  // +stateify savable
   561  type taskStatData struct {
   562  	kernfs.DynamicBytesFile
   563  
   564  	task *kernel.Task
   565  
   566  	// If tgstats is true, accumulate fault stats (not implemented) and CPU
   567  	// time across all tasks in t's thread group.
   568  	tgstats bool
   569  
   570  	// pidns is the PID namespace associated with the proc filesystem that
   571  	// includes the file using this statData.
   572  	pidns *kernel.PIDNamespace
   573  }
   574  
   575  var _ dynamicInode = (*taskStatData)(nil)
   576  
   577  // Generate implements vfs.DynamicBytesSource.Generate.
   578  func (s *taskStatData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   579  	fmt.Fprintf(buf, "%d ", s.pidns.IDOfTask(s.task))
   580  	fmt.Fprintf(buf, "(%s) ", s.task.Name())
   581  	fmt.Fprintf(buf, "%c ", s.task.StateStatus()[0])
   582  	ppid := kernel.ThreadID(0)
   583  	if parent := s.task.Parent(); parent != nil {
   584  		ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup())
   585  	}
   586  	fmt.Fprintf(buf, "%d ", ppid)
   587  	fmt.Fprintf(buf, "%d ", s.pidns.IDOfProcessGroup(s.task.ThreadGroup().ProcessGroup()))
   588  	fmt.Fprintf(buf, "%d ", s.pidns.IDOfSession(s.task.ThreadGroup().Session()))
   589  	fmt.Fprintf(buf, "0 0 " /* tty_nr tpgid */)
   590  	fmt.Fprintf(buf, "0 " /* flags */)
   591  	fmt.Fprintf(buf, "0 0 0 0 " /* minflt cminflt majflt cmajflt */)
   592  	var cputime usage.CPUStats
   593  	if s.tgstats {
   594  		cputime = s.task.ThreadGroup().CPUStats()
   595  	} else {
   596  		cputime = s.task.CPUStats()
   597  	}
   598  	fmt.Fprintf(buf, "%d %d ", linux.ClockTFromDuration(cputime.UserTime), linux.ClockTFromDuration(cputime.SysTime))
   599  	cputime = s.task.ThreadGroup().JoinedChildCPUStats()
   600  	fmt.Fprintf(buf, "%d %d ", linux.ClockTFromDuration(cputime.UserTime), linux.ClockTFromDuration(cputime.SysTime))
   601  	fmt.Fprintf(buf, "%d %d ", s.task.Priority(), s.task.Niceness())
   602  	fmt.Fprintf(buf, "%d ", s.task.ThreadGroup().Count())
   603  
   604  	// itrealvalue. Since kernel 2.6.17, this field is no longer
   605  	// maintained, and is hard coded as 0.
   606  	fmt.Fprintf(buf, "0 ")
   607  
   608  	// Start time is relative to boot time, expressed in clock ticks.
   609  	fmt.Fprintf(buf, "%d ", linux.ClockTFromDuration(s.task.StartTime().Sub(s.task.Kernel().Timekeeper().BootTime())))
   610  
   611  	var vss, rss uint64
   612  	s.task.WithMuLocked(func(t *kernel.Task) {
   613  		if mm := t.MemoryManager(); mm != nil {
   614  			vss = mm.VirtualMemorySize()
   615  			rss = mm.ResidentSetSize()
   616  		}
   617  	})
   618  	fmt.Fprintf(buf, "%d %d ", vss, rss/hostarch.PageSize)
   619  
   620  	// rsslim.
   621  	fmt.Fprintf(buf, "%d ", s.task.ThreadGroup().Limits().Get(limits.Rss).Cur)
   622  
   623  	fmt.Fprintf(buf, "0 0 0 0 0 " /* startcode endcode startstack kstkesp kstkeip */)
   624  	fmt.Fprintf(buf, "0 0 0 0 0 " /* signal blocked sigignore sigcatch wchan */)
   625  	fmt.Fprintf(buf, "0 0 " /* nswap cnswap */)
   626  	terminationSignal := linux.Signal(0)
   627  	if s.task == s.task.ThreadGroup().Leader() {
   628  		terminationSignal = s.task.ThreadGroup().TerminationSignal()
   629  	}
   630  	fmt.Fprintf(buf, "%d ", terminationSignal)
   631  	fmt.Fprintf(buf, "0 0 0 " /* processor rt_priority policy */)
   632  	fmt.Fprintf(buf, "0 0 0 " /* delayacct_blkio_ticks guest_time cguest_time */)
   633  	fmt.Fprintf(buf, "0 0 0 0 0 0 0 " /* start_data end_data start_brk arg_start arg_end env_start env_end */)
   634  	fmt.Fprintf(buf, "0\n" /* exit_code */)
   635  
   636  	return nil
   637  }
   638  
   639  // statmData implements vfs.DynamicBytesSource for /proc/[pid]/statm.
   640  //
   641  // +stateify savable
   642  type statmData struct {
   643  	kernfs.DynamicBytesFile
   644  
   645  	task *kernel.Task
   646  }
   647  
   648  var _ dynamicInode = (*statmData)(nil)
   649  
   650  // Generate implements vfs.DynamicBytesSource.Generate.
   651  func (s *statmData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   652  	var vss, rss uint64
   653  	s.task.WithMuLocked(func(t *kernel.Task) {
   654  		if mm := t.MemoryManager(); mm != nil {
   655  			vss = mm.VirtualMemorySize()
   656  			rss = mm.ResidentSetSize()
   657  		}
   658  	})
   659  
   660  	fmt.Fprintf(buf, "%d %d 0 0 0 0 0\n", vss/hostarch.PageSize, rss/hostarch.PageSize)
   661  	return nil
   662  }
   663  
   664  // statusData implements vfs.DynamicBytesSource for /proc/[pid]/status.
   665  //
   666  // +stateify savable
   667  type statusData struct {
   668  	kernfs.DynamicBytesFile
   669  
   670  	task  *kernel.Task
   671  	pidns *kernel.PIDNamespace
   672  }
   673  
   674  var _ dynamicInode = (*statusData)(nil)
   675  
   676  // Generate implements vfs.DynamicBytesSource.Generate.
   677  func (s *statusData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   678  	fmt.Fprintf(buf, "Name:\t%s\n", s.task.Name())
   679  	fmt.Fprintf(buf, "State:\t%s\n", s.task.StateStatus())
   680  	fmt.Fprintf(buf, "Tgid:\t%d\n", s.pidns.IDOfThreadGroup(s.task.ThreadGroup()))
   681  	fmt.Fprintf(buf, "Pid:\t%d\n", s.pidns.IDOfTask(s.task))
   682  	ppid := kernel.ThreadID(0)
   683  	if parent := s.task.Parent(); parent != nil {
   684  		ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup())
   685  	}
   686  	fmt.Fprintf(buf, "PPid:\t%d\n", ppid)
   687  	tpid := kernel.ThreadID(0)
   688  	if tracer := s.task.Tracer(); tracer != nil {
   689  		tpid = s.pidns.IDOfTask(tracer)
   690  	}
   691  	fmt.Fprintf(buf, "TracerPid:\t%d\n", tpid)
   692  	var fds int
   693  	var vss, rss, data uint64
   694  	s.task.WithMuLocked(func(t *kernel.Task) {
   695  		if fdTable := t.FDTable(); fdTable != nil {
   696  			fds = fdTable.CurrentMaxFDs()
   697  		}
   698  		if mm := t.MemoryManager(); mm != nil {
   699  			vss = mm.VirtualMemorySize()
   700  			rss = mm.ResidentSetSize()
   701  			data = mm.VirtualDataSize()
   702  		}
   703  	})
   704  	fmt.Fprintf(buf, "FDSize:\t%d\n", fds)
   705  	fmt.Fprintf(buf, "VmSize:\t%d kB\n", vss>>10)
   706  	fmt.Fprintf(buf, "VmRSS:\t%d kB\n", rss>>10)
   707  	fmt.Fprintf(buf, "VmData:\t%d kB\n", data>>10)
   708  	fmt.Fprintf(buf, "Threads:\t%d\n", s.task.ThreadGroup().Count())
   709  	creds := s.task.Credentials()
   710  	fmt.Fprintf(buf, "CapInh:\t%016x\n", creds.InheritableCaps)
   711  	fmt.Fprintf(buf, "CapPrm:\t%016x\n", creds.PermittedCaps)
   712  	fmt.Fprintf(buf, "CapEff:\t%016x\n", creds.EffectiveCaps)
   713  	fmt.Fprintf(buf, "CapBnd:\t%016x\n", creds.BoundingCaps)
   714  	fmt.Fprintf(buf, "Seccomp:\t%d\n", s.task.SeccompMode())
   715  	// We unconditionally report a single NUMA node. See
   716  	// pkg/sentry/syscalls/linux/sys_mempolicy.go.
   717  	fmt.Fprintf(buf, "Mems_allowed:\t1\n")
   718  	fmt.Fprintf(buf, "Mems_allowed_list:\t0\n")
   719  	return nil
   720  }
   721  
   722  // ioUsage is the /proc/[pid]/io and /proc/[pid]/task/[tid]/io data provider.
   723  type ioUsage interface {
   724  	// IOUsage returns the io usage data.
   725  	IOUsage() *usage.IO
   726  }
   727  
   728  // +stateify savable
   729  type ioData struct {
   730  	kernfs.DynamicBytesFile
   731  
   732  	ioUsage
   733  }
   734  
   735  var _ dynamicInode = (*ioData)(nil)
   736  
   737  // Generate implements vfs.DynamicBytesSource.Generate.
   738  func (i *ioData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   739  	io := usage.IO{}
   740  	io.Accumulate(i.IOUsage())
   741  
   742  	fmt.Fprintf(buf, "char: %d\n", io.CharsRead)
   743  	fmt.Fprintf(buf, "wchar: %d\n", io.CharsWritten)
   744  	fmt.Fprintf(buf, "syscr: %d\n", io.ReadSyscalls)
   745  	fmt.Fprintf(buf, "syscw: %d\n", io.WriteSyscalls)
   746  	fmt.Fprintf(buf, "read_bytes: %d\n", io.BytesRead)
   747  	fmt.Fprintf(buf, "write_bytes: %d\n", io.BytesWritten)
   748  	fmt.Fprintf(buf, "cancelled_write_bytes: %d\n", io.BytesWriteCancelled)
   749  	return nil
   750  }
   751  
   752  // oomScoreAdj is a stub of the /proc/<pid>/oom_score_adj file.
   753  //
   754  // +stateify savable
   755  type oomScoreAdj struct {
   756  	kernfs.DynamicBytesFile
   757  
   758  	task *kernel.Task
   759  }
   760  
   761  var _ vfs.WritableDynamicBytesSource = (*oomScoreAdj)(nil)
   762  
   763  // Generate implements vfs.DynamicBytesSource.Generate.
   764  func (o *oomScoreAdj) Generate(ctx context.Context, buf *bytes.Buffer) error {
   765  	if o.task.ExitState() == kernel.TaskExitDead {
   766  		return syserror.ESRCH
   767  	}
   768  	fmt.Fprintf(buf, "%d\n", o.task.OOMScoreAdj())
   769  	return nil
   770  }
   771  
   772  // Write implements vfs.WritableDynamicBytesSource.Write.
   773  func (o *oomScoreAdj) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
   774  	if src.NumBytes() == 0 {
   775  		return 0, nil
   776  	}
   777  
   778  	// Limit input size so as not to impact performance if input size is large.
   779  	src = src.TakeFirst(hostarch.PageSize - 1)
   780  
   781  	var v int32
   782  	n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
   783  	if err != nil {
   784  		return 0, err
   785  	}
   786  
   787  	if o.task.ExitState() == kernel.TaskExitDead {
   788  		return 0, syserror.ESRCH
   789  	}
   790  	if err := o.task.SetOOMScoreAdj(v); err != nil {
   791  		return 0, err
   792  	}
   793  
   794  	return n, nil
   795  }
   796  
   797  // exeSymlink is an symlink for the /proc/[pid]/exe file.
   798  //
   799  // +stateify savable
   800  type exeSymlink struct {
   801  	implStatFS
   802  	kernfs.InodeAttrs
   803  	kernfs.InodeNoopRefCount
   804  	kernfs.InodeSymlink
   805  
   806  	task *kernel.Task
   807  }
   808  
   809  var _ kernfs.Inode = (*exeSymlink)(nil)
   810  
   811  func (fs *filesystem) newExeSymlink(ctx context.Context, task *kernel.Task, ino uint64) kernfs.Inode {
   812  	inode := &exeSymlink{task: task}
   813  	inode.Init(ctx, task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeSymlink|0777)
   814  	return inode
   815  }
   816  
   817  // Readlink implements kernfs.Inode.Readlink.
   818  func (s *exeSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) {
   819  	exec, _, err := s.Getlink(ctx, nil)
   820  	if err != nil {
   821  		return "", err
   822  	}
   823  	defer exec.DecRef(ctx)
   824  
   825  	root := vfs.RootFromContext(ctx)
   826  	if !root.Ok() {
   827  		// It could have raced with process deletion.
   828  		return "", syserror.ESRCH
   829  	}
   830  	defer root.DecRef(ctx)
   831  
   832  	vfsObj := exec.Mount().Filesystem().VirtualFilesystem()
   833  	name, _ := vfsObj.PathnameWithDeleted(ctx, root, exec)
   834  	return name, nil
   835  }
   836  
   837  // Getlink implements kernfs.Inode.Getlink.
   838  func (s *exeSymlink) Getlink(ctx context.Context, _ *vfs.Mount) (vfs.VirtualDentry, string, error) {
   839  	if !kernel.ContextCanTrace(ctx, s.task, false) {
   840  		return vfs.VirtualDentry{}, "", linuxerr.EACCES
   841  	}
   842  	if err := checkTaskState(s.task); err != nil {
   843  		return vfs.VirtualDentry{}, "", err
   844  	}
   845  
   846  	var err error
   847  	var exec fsbridge.File
   848  	s.task.WithMuLocked(func(t *kernel.Task) {
   849  		mm := t.MemoryManager()
   850  		if mm == nil {
   851  			err = linuxerr.EACCES
   852  			return
   853  		}
   854  
   855  		// The MemoryManager may be destroyed, in which case
   856  		// MemoryManager.destroy will simply set the executable to nil
   857  		// (with locks held).
   858  		exec = mm.Executable()
   859  		if exec == nil {
   860  			err = syserror.ESRCH
   861  		}
   862  	})
   863  	if err != nil {
   864  		return vfs.VirtualDentry{}, "", err
   865  	}
   866  	defer exec.DecRef(ctx)
   867  
   868  	vd := exec.(*fsbridge.VFSFile).FileDescription().VirtualDentry()
   869  	vd.IncRef()
   870  	return vd, "", nil
   871  }
   872  
   873  // cwdSymlink is an symlink for the /proc/[pid]/cwd file.
   874  //
   875  // +stateify savable
   876  type cwdSymlink struct {
   877  	implStatFS
   878  	kernfs.InodeAttrs
   879  	kernfs.InodeNoopRefCount
   880  	kernfs.InodeSymlink
   881  
   882  	task *kernel.Task
   883  }
   884  
   885  var _ kernfs.Inode = (*cwdSymlink)(nil)
   886  
   887  func (fs *filesystem) newCwdSymlink(ctx context.Context, task *kernel.Task, ino uint64) kernfs.Inode {
   888  	inode := &cwdSymlink{task: task}
   889  	inode.Init(ctx, task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeSymlink|0777)
   890  	return inode
   891  }
   892  
   893  // Readlink implements kernfs.Inode.Readlink.
   894  func (s *cwdSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) {
   895  	cwd, _, err := s.Getlink(ctx, nil)
   896  	if err != nil {
   897  		return "", err
   898  	}
   899  	defer cwd.DecRef(ctx)
   900  
   901  	root := vfs.RootFromContext(ctx)
   902  	if !root.Ok() {
   903  		// It could have raced with process deletion.
   904  		return "", syserror.ESRCH
   905  	}
   906  	defer root.DecRef(ctx)
   907  
   908  	vfsObj := cwd.Mount().Filesystem().VirtualFilesystem()
   909  	name, _ := vfsObj.PathnameWithDeleted(ctx, root, cwd)
   910  	return name, nil
   911  }
   912  
   913  // Getlink implements kernfs.Inode.Getlink.
   914  func (s *cwdSymlink) Getlink(ctx context.Context, _ *vfs.Mount) (vfs.VirtualDentry, string, error) {
   915  	if !kernel.ContextCanTrace(ctx, s.task, false) {
   916  		return vfs.VirtualDentry{}, "", linuxerr.EACCES
   917  	}
   918  	if err := checkTaskState(s.task); err != nil {
   919  		return vfs.VirtualDentry{}, "", err
   920  	}
   921  	cwd := s.task.FSContext().WorkingDirectoryVFS2()
   922  	if !cwd.Ok() {
   923  		// It could have raced with process deletion.
   924  		return vfs.VirtualDentry{}, "", syserror.ESRCH
   925  	}
   926  	return cwd, "", nil
   927  }
   928  
   929  // mountInfoData is used to implement /proc/[pid]/mountinfo.
   930  //
   931  // +stateify savable
   932  type mountInfoData struct {
   933  	kernfs.DynamicBytesFile
   934  
   935  	task *kernel.Task
   936  }
   937  
   938  var _ dynamicInode = (*mountInfoData)(nil)
   939  
   940  // Generate implements vfs.DynamicBytesSource.Generate.
   941  func (i *mountInfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   942  	var fsctx *kernel.FSContext
   943  	i.task.WithMuLocked(func(t *kernel.Task) {
   944  		fsctx = t.FSContext()
   945  	})
   946  	if fsctx == nil {
   947  		// The task has been destroyed. Nothing to show here.
   948  		return nil
   949  	}
   950  	rootDir := fsctx.RootDirectoryVFS2()
   951  	if !rootDir.Ok() {
   952  		// Root has been destroyed. Don't try to read mounts.
   953  		return nil
   954  	}
   955  	defer rootDir.DecRef(ctx)
   956  	i.task.Kernel().VFS().GenerateProcMountInfo(ctx, rootDir, buf)
   957  	return nil
   958  }
   959  
   960  // mountsData is used to implement /proc/[pid]/mounts.
   961  //
   962  // +stateify savable
   963  type mountsData struct {
   964  	kernfs.DynamicBytesFile
   965  
   966  	task *kernel.Task
   967  }
   968  
   969  var _ dynamicInode = (*mountsData)(nil)
   970  
   971  // Generate implements vfs.DynamicBytesSource.Generate.
   972  func (i *mountsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   973  	var fsctx *kernel.FSContext
   974  	i.task.WithMuLocked(func(t *kernel.Task) {
   975  		fsctx = t.FSContext()
   976  	})
   977  	if fsctx == nil {
   978  		// The task has been destroyed. Nothing to show here.
   979  		return nil
   980  	}
   981  	rootDir := fsctx.RootDirectoryVFS2()
   982  	if !rootDir.Ok() {
   983  		// Root has been destroyed. Don't try to read mounts.
   984  		return nil
   985  	}
   986  	defer rootDir.DecRef(ctx)
   987  	i.task.Kernel().VFS().GenerateProcMounts(ctx, rootDir, buf)
   988  	return nil
   989  }
   990  
   991  // +stateify savable
   992  type namespaceSymlink struct {
   993  	kernfs.StaticSymlink
   994  
   995  	task *kernel.Task
   996  }
   997  
   998  func (fs *filesystem) newNamespaceSymlink(ctx context.Context, task *kernel.Task, ino uint64, ns string) kernfs.Inode {
   999  	// Namespace symlinks should contain the namespace name and the inode number
  1000  	// for the namespace instance, so for example user:[123456]. We currently fake
  1001  	// the inode number by sticking the symlink inode in its place.
  1002  	target := fmt.Sprintf("%s:[%d]", ns, ino)
  1003  
  1004  	inode := &namespaceSymlink{task: task}
  1005  	// Note: credentials are overridden by taskOwnedInode.
  1006  	inode.Init(ctx, task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, target)
  1007  
  1008  	taskInode := &taskOwnedInode{Inode: inode, owner: task}
  1009  	return taskInode
  1010  }
  1011  
  1012  // Readlink implements kernfs.Inode.Readlink.
  1013  func (s *namespaceSymlink) Readlink(ctx context.Context, mnt *vfs.Mount) (string, error) {
  1014  	if err := checkTaskState(s.task); err != nil {
  1015  		return "", err
  1016  	}
  1017  	return s.StaticSymlink.Readlink(ctx, mnt)
  1018  }
  1019  
  1020  // Getlink implements kernfs.Inode.Getlink.
  1021  func (s *namespaceSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDentry, string, error) {
  1022  	if err := checkTaskState(s.task); err != nil {
  1023  		return vfs.VirtualDentry{}, "", err
  1024  	}
  1025  
  1026  	// Create a synthetic inode to represent the namespace.
  1027  	fs := mnt.Filesystem().Impl().(*filesystem)
  1028  	nsInode := &namespaceInode{}
  1029  	nsInode.Init(ctx, auth.CredentialsFromContext(ctx), linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0444)
  1030  	dentry := &kernfs.Dentry{}
  1031  	dentry.Init(&fs.Filesystem, nsInode)
  1032  	vd := vfs.MakeVirtualDentry(mnt, dentry.VFSDentry())
  1033  	// Only IncRef vd.Mount() because vd.Dentry() already holds a ref of 1.
  1034  	mnt.IncRef()
  1035  	return vd, "", nil
  1036  }
  1037  
  1038  // namespaceInode is a synthetic inode created to represent a namespace in
  1039  // /proc/[pid]/ns/*.
  1040  //
  1041  // +stateify savable
  1042  type namespaceInode struct {
  1043  	implStatFS
  1044  	kernfs.InodeAttrs
  1045  	kernfs.InodeNoopRefCount
  1046  	kernfs.InodeNotDirectory
  1047  	kernfs.InodeNotSymlink
  1048  
  1049  	locks vfs.FileLocks
  1050  }
  1051  
  1052  var _ kernfs.Inode = (*namespaceInode)(nil)
  1053  
  1054  // Init initializes a namespace inode.
  1055  func (i *namespaceInode) Init(ctx context.Context, creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode) {
  1056  	if perm&^linux.PermissionsMask != 0 {
  1057  		panic(fmt.Sprintf("Only permission mask must be set: %x", perm&linux.PermissionsMask))
  1058  	}
  1059  	i.InodeAttrs.Init(ctx, creds, devMajor, devMinor, ino, linux.ModeRegular|perm)
  1060  }
  1061  
  1062  // Open implements kernfs.Inode.Open.
  1063  func (i *namespaceInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
  1064  	fd := &namespaceFD{inode: i}
  1065  	i.IncRef()
  1066  	fd.LockFD.Init(&i.locks)
  1067  	if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), d.VFSDentry(), &vfs.FileDescriptionOptions{}); err != nil {
  1068  		return nil, err
  1069  	}
  1070  	return &fd.vfsfd, nil
  1071  }
  1072  
  1073  // namespace FD is a synthetic file that represents a namespace in
  1074  // /proc/[pid]/ns/*.
  1075  //
  1076  // +stateify savable
  1077  type namespaceFD struct {
  1078  	vfs.FileDescriptionDefaultImpl
  1079  	vfs.LockFD
  1080  
  1081  	vfsfd vfs.FileDescription
  1082  	inode *namespaceInode
  1083  }
  1084  
  1085  var _ vfs.FileDescriptionImpl = (*namespaceFD)(nil)
  1086  
  1087  // Stat implements vfs.FileDescriptionImpl.Stat.
  1088  func (fd *namespaceFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
  1089  	vfs := fd.vfsfd.VirtualDentry().Mount().Filesystem()
  1090  	return fd.inode.Stat(ctx, vfs, opts)
  1091  }
  1092  
  1093  // SetStat implements vfs.FileDescriptionImpl.SetStat.
  1094  func (fd *namespaceFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
  1095  	vfs := fd.vfsfd.VirtualDentry().Mount().Filesystem()
  1096  	creds := auth.CredentialsFromContext(ctx)
  1097  	return fd.inode.SetStat(ctx, vfs, creds, opts)
  1098  }
  1099  
  1100  // Release implements vfs.FileDescriptionImpl.Release.
  1101  func (fd *namespaceFD) Release(ctx context.Context) {
  1102  	fd.inode.DecRef(ctx)
  1103  }
  1104  
  1105  // taskCgroupData generates data for /proc/[pid]/cgroup.
  1106  //
  1107  // +stateify savable
  1108  type taskCgroupData struct {
  1109  	dynamicBytesFileSetAttr
  1110  	task *kernel.Task
  1111  }
  1112  
  1113  var _ dynamicInode = (*taskCgroupData)(nil)
  1114  
  1115  // Generate implements vfs.DynamicBytesSource.Generate.
  1116  func (d *taskCgroupData) Generate(ctx context.Context, buf *bytes.Buffer) error {
  1117  	// When a task is existing on Linux, a task's cgroup set is cleared and
  1118  	// reset to the initial cgroup set, which is essentially the set of root
  1119  	// cgroups. Because of this, the /proc/<pid>/cgroup file is always readable
  1120  	// on Linux throughout a task's lifetime.
  1121  	//
  1122  	// The sentry removes tasks from cgroups during the exit process, but
  1123  	// doesn't move them into an initial cgroup set, so partway through task
  1124  	// exit this file show a task is in no cgroups, which is incorrect. Instead,
  1125  	// once a task has left its cgroups, we return an error.
  1126  	if d.task.ExitState() >= kernel.TaskExitInitiated {
  1127  		return syserror.ESRCH
  1128  	}
  1129  
  1130  	d.task.GenerateProcTaskCgroup(buf)
  1131  	return nil
  1132  }