github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/proc/tasks_files.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package proc
    16  
    17  import (
    18  	"bytes"
    19  	"fmt"
    20  	"strconv"
    21  
    22  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    23  	"github.com/SagerNet/gvisor/pkg/context"
    24  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    25  	"github.com/SagerNet/gvisor/pkg/hostarch"
    26  	"github.com/SagerNet/gvisor/pkg/sentry/fsimpl/kernfs"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/kernel"
    28  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/auth"
    29  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/time"
    30  	"github.com/SagerNet/gvisor/pkg/sentry/usage"
    31  	"github.com/SagerNet/gvisor/pkg/sentry/vfs"
    32  	"github.com/SagerNet/gvisor/pkg/syserror"
    33  )
    34  
    35  // +stateify savable
    36  type selfSymlink struct {
    37  	implStatFS
    38  	kernfs.InodeAttrs
    39  	kernfs.InodeNoopRefCount
    40  	kernfs.InodeSymlink
    41  
    42  	pidns *kernel.PIDNamespace
    43  }
    44  
    45  var _ kernfs.Inode = (*selfSymlink)(nil)
    46  
    47  func (i *tasksInode) newSelfSymlink(ctx context.Context, creds *auth.Credentials) kernfs.Inode {
    48  	inode := &selfSymlink{pidns: i.pidns}
    49  	inode.Init(ctx, creds, linux.UNNAMED_MAJOR, i.fs.devMinor, i.fs.NextIno(), linux.ModeSymlink|0777)
    50  	return inode
    51  }
    52  
    53  func (s *selfSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) {
    54  	t := kernel.TaskFromContext(ctx)
    55  	if t == nil {
    56  		// Who is reading this link?
    57  		return "", linuxerr.EINVAL
    58  	}
    59  	tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup())
    60  	if tgid == 0 {
    61  		return "", syserror.ENOENT
    62  	}
    63  	return strconv.FormatUint(uint64(tgid), 10), nil
    64  }
    65  
    66  func (s *selfSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDentry, string, error) {
    67  	target, err := s.Readlink(ctx, mnt)
    68  	return vfs.VirtualDentry{}, target, err
    69  }
    70  
    71  // SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed.
    72  func (*selfSymlink) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
    73  	return linuxerr.EPERM
    74  }
    75  
    76  // +stateify savable
    77  type threadSelfSymlink struct {
    78  	implStatFS
    79  	kernfs.InodeAttrs
    80  	kernfs.InodeNoopRefCount
    81  	kernfs.InodeSymlink
    82  
    83  	pidns *kernel.PIDNamespace
    84  }
    85  
    86  var _ kernfs.Inode = (*threadSelfSymlink)(nil)
    87  
    88  func (i *tasksInode) newThreadSelfSymlink(ctx context.Context, creds *auth.Credentials) kernfs.Inode {
    89  	inode := &threadSelfSymlink{pidns: i.pidns}
    90  	inode.Init(ctx, creds, linux.UNNAMED_MAJOR, i.fs.devMinor, i.fs.NextIno(), linux.ModeSymlink|0777)
    91  	return inode
    92  }
    93  
    94  func (s *threadSelfSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) {
    95  	t := kernel.TaskFromContext(ctx)
    96  	if t == nil {
    97  		// Who is reading this link?
    98  		return "", linuxerr.EINVAL
    99  	}
   100  	tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup())
   101  	tid := s.pidns.IDOfTask(t)
   102  	if tid == 0 || tgid == 0 {
   103  		return "", syserror.ENOENT
   104  	}
   105  	return fmt.Sprintf("%d/task/%d", tgid, tid), nil
   106  }
   107  
   108  func (s *threadSelfSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDentry, string, error) {
   109  	target, err := s.Readlink(ctx, mnt)
   110  	return vfs.VirtualDentry{}, target, err
   111  }
   112  
   113  // SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed.
   114  func (*threadSelfSymlink) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
   115  	return linuxerr.EPERM
   116  }
   117  
   118  // dynamicBytesFileSetAttr implements a special file that allows inode
   119  // attributes to be set. This is to support /proc files that are readonly, but
   120  // allow attributes to be set.
   121  //
   122  // +stateify savable
   123  type dynamicBytesFileSetAttr struct {
   124  	kernfs.DynamicBytesFile
   125  }
   126  
   127  // SetStat implements kernfs.Inode.SetStat.
   128  func (d *dynamicBytesFileSetAttr) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
   129  	return d.DynamicBytesFile.InodeAttrs.SetStat(ctx, fs, creds, opts)
   130  }
   131  
   132  // cpuStats contains the breakdown of CPU time for /proc/stat.
   133  //
   134  // +stateify savable
   135  type cpuStats struct {
   136  	// user is time spent in userspace tasks with non-positive niceness.
   137  	user uint64
   138  
   139  	// nice is time spent in userspace tasks with positive niceness.
   140  	nice uint64
   141  
   142  	// system is time spent in non-interrupt kernel context.
   143  	system uint64
   144  
   145  	// idle is time spent idle.
   146  	idle uint64
   147  
   148  	// ioWait is time spent waiting for IO.
   149  	ioWait uint64
   150  
   151  	// irq is time spent in interrupt context.
   152  	irq uint64
   153  
   154  	// softirq is time spent in software interrupt context.
   155  	softirq uint64
   156  
   157  	// steal is involuntary wait time.
   158  	steal uint64
   159  
   160  	// guest is time spent in guests with non-positive niceness.
   161  	guest uint64
   162  
   163  	// guestNice is time spent in guests with positive niceness.
   164  	guestNice uint64
   165  }
   166  
   167  // String implements fmt.Stringer.
   168  func (c cpuStats) String() string {
   169  	return fmt.Sprintf("%d %d %d %d %d %d %d %d %d %d", c.user, c.nice, c.system, c.idle, c.ioWait, c.irq, c.softirq, c.steal, c.guest, c.guestNice)
   170  }
   171  
   172  // statData implements vfs.DynamicBytesSource for /proc/stat.
   173  //
   174  // +stateify savable
   175  type statData struct {
   176  	dynamicBytesFileSetAttr
   177  }
   178  
   179  var _ dynamicInode = (*statData)(nil)
   180  
   181  // Generate implements vfs.DynamicBytesSource.Generate.
   182  func (*statData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   183  	// TODO(b/37226836): We currently export only zero CPU stats. We could
   184  	// at least provide some aggregate stats.
   185  	var cpu cpuStats
   186  	fmt.Fprintf(buf, "cpu  %s\n", cpu)
   187  
   188  	k := kernel.KernelFromContext(ctx)
   189  	for c, max := uint(0), k.ApplicationCores(); c < max; c++ {
   190  		fmt.Fprintf(buf, "cpu%d %s\n", c, cpu)
   191  	}
   192  
   193  	// The total number of interrupts is dependent on the CPUs and PCI
   194  	// devices on the system. See arch_probe_nr_irqs.
   195  	//
   196  	// Since we don't report real interrupt stats, just choose an arbitrary
   197  	// value from a representative VM.
   198  	const numInterrupts = 256
   199  
   200  	// The Kernel doesn't handle real interrupts, so report all zeroes.
   201  	// TODO(b/37226836): We could count page faults as #PF.
   202  	fmt.Fprintf(buf, "intr 0") // total
   203  	for i := 0; i < numInterrupts; i++ {
   204  		fmt.Fprintf(buf, " 0")
   205  	}
   206  	fmt.Fprintf(buf, "\n")
   207  
   208  	// Total number of context switches.
   209  	// TODO(b/37226836): Count this.
   210  	fmt.Fprintf(buf, "ctxt 0\n")
   211  
   212  	// CLOCK_REALTIME timestamp from boot, in seconds.
   213  	fmt.Fprintf(buf, "btime %d\n", k.Timekeeper().BootTime().Seconds())
   214  
   215  	// Total number of clones.
   216  	// TODO(b/37226836): Count this.
   217  	fmt.Fprintf(buf, "processes 0\n")
   218  
   219  	// Number of runnable tasks.
   220  	// TODO(b/37226836): Count this.
   221  	fmt.Fprintf(buf, "procs_running 0\n")
   222  
   223  	// Number of tasks waiting on IO.
   224  	// TODO(b/37226836): Count this.
   225  	fmt.Fprintf(buf, "procs_blocked 0\n")
   226  
   227  	// Number of each softirq handled.
   228  	fmt.Fprintf(buf, "softirq 0") // total
   229  	for i := 0; i < linux.NumSoftIRQ; i++ {
   230  		fmt.Fprintf(buf, " 0")
   231  	}
   232  	fmt.Fprintf(buf, "\n")
   233  	return nil
   234  }
   235  
   236  // loadavgData backs /proc/loadavg.
   237  //
   238  // +stateify savable
   239  type loadavgData struct {
   240  	dynamicBytesFileSetAttr
   241  }
   242  
   243  var _ dynamicInode = (*loadavgData)(nil)
   244  
   245  // Generate implements vfs.DynamicBytesSource.Generate.
   246  func (*loadavgData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   247  	// TODO(b/62345059): Include real data in fields.
   248  	// Column 1-3: CPU and IO utilization of the last 1, 5, and 10 minute periods.
   249  	// Column 4-5: currently running processes and the total number of processes.
   250  	// Column 6: the last process ID used.
   251  	fmt.Fprintf(buf, "%.2f %.2f %.2f %d/%d %d\n", 0.00, 0.00, 0.00, 0, 0, 0)
   252  	return nil
   253  }
   254  
   255  // meminfoData implements vfs.DynamicBytesSource for /proc/meminfo.
   256  //
   257  // +stateify savable
   258  type meminfoData struct {
   259  	dynamicBytesFileSetAttr
   260  }
   261  
   262  var _ dynamicInode = (*meminfoData)(nil)
   263  
   264  // Generate implements vfs.DynamicBytesSource.Generate.
   265  func (*meminfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   266  	k := kernel.KernelFromContext(ctx)
   267  	mf := k.MemoryFile()
   268  	mf.UpdateUsage()
   269  	snapshot, totalUsage := usage.MemoryAccounting.Copy()
   270  	totalSize := usage.TotalMemory(mf.TotalSize(), totalUsage)
   271  	anon := snapshot.Anonymous + snapshot.Tmpfs
   272  	file := snapshot.PageCache + snapshot.Mapped
   273  	// We don't actually have active/inactive LRUs, so just make up numbers.
   274  	activeFile := (file / 2) &^ (hostarch.PageSize - 1)
   275  	inactiveFile := file - activeFile
   276  
   277  	fmt.Fprintf(buf, "MemTotal:       %8d kB\n", totalSize/1024)
   278  	memFree := totalSize - totalUsage
   279  	if memFree > totalSize {
   280  		// Underflow.
   281  		memFree = 0
   282  	}
   283  	// We use MemFree as MemAvailable because we don't swap.
   284  	// TODO(rahat): When reclaim is implemented the value of MemAvailable
   285  	// should change.
   286  	fmt.Fprintf(buf, "MemFree:        %8d kB\n", memFree/1024)
   287  	fmt.Fprintf(buf, "MemAvailable:   %8d kB\n", memFree/1024)
   288  	fmt.Fprintf(buf, "Buffers:               0 kB\n") // memory usage by block devices
   289  	fmt.Fprintf(buf, "Cached:         %8d kB\n", (file+snapshot.Tmpfs)/1024)
   290  	// Emulate a system with no swap, which disables inactivation of anon pages.
   291  	fmt.Fprintf(buf, "SwapCache:             0 kB\n")
   292  	fmt.Fprintf(buf, "Active:         %8d kB\n", (anon+activeFile)/1024)
   293  	fmt.Fprintf(buf, "Inactive:       %8d kB\n", inactiveFile/1024)
   294  	fmt.Fprintf(buf, "Active(anon):   %8d kB\n", anon/1024)
   295  	fmt.Fprintf(buf, "Inactive(anon):        0 kB\n")
   296  	fmt.Fprintf(buf, "Active(file):   %8d kB\n", activeFile/1024)
   297  	fmt.Fprintf(buf, "Inactive(file): %8d kB\n", inactiveFile/1024)
   298  	fmt.Fprintf(buf, "Unevictable:           0 kB\n") // TODO(b/31823263)
   299  	fmt.Fprintf(buf, "Mlocked:               0 kB\n") // TODO(b/31823263)
   300  	fmt.Fprintf(buf, "SwapTotal:             0 kB\n")
   301  	fmt.Fprintf(buf, "SwapFree:              0 kB\n")
   302  	fmt.Fprintf(buf, "Dirty:                 0 kB\n")
   303  	fmt.Fprintf(buf, "Writeback:             0 kB\n")
   304  	fmt.Fprintf(buf, "AnonPages:      %8d kB\n", anon/1024)
   305  	fmt.Fprintf(buf, "Mapped:         %8d kB\n", file/1024) // doesn't count mapped tmpfs, which we don't know
   306  	fmt.Fprintf(buf, "Shmem:          %8d kB\n", snapshot.Tmpfs/1024)
   307  	return nil
   308  }
   309  
   310  // uptimeData implements vfs.DynamicBytesSource for /proc/uptime.
   311  //
   312  // +stateify savable
   313  type uptimeData struct {
   314  	dynamicBytesFileSetAttr
   315  }
   316  
   317  var _ dynamicInode = (*uptimeData)(nil)
   318  
   319  // Generate implements vfs.DynamicBytesSource.Generate.
   320  func (*uptimeData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   321  	k := kernel.KernelFromContext(ctx)
   322  	now := time.NowFromContext(ctx)
   323  
   324  	// Pretend that we've spent zero time sleeping (second number).
   325  	fmt.Fprintf(buf, "%.2f 0.00\n", now.Sub(k.Timekeeper().BootTime()).Seconds())
   326  	return nil
   327  }
   328  
   329  // versionData implements vfs.DynamicBytesSource for /proc/version.
   330  //
   331  // +stateify savable
   332  type versionData struct {
   333  	dynamicBytesFileSetAttr
   334  }
   335  
   336  var _ dynamicInode = (*versionData)(nil)
   337  
   338  // Generate implements vfs.DynamicBytesSource.Generate.
   339  func (*versionData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   340  	// /proc/version takes the form:
   341  	//
   342  	// "SYSNAME version RELEASE (COMPILE_USER@COMPILE_HOST)
   343  	// (COMPILER_VERSION) VERSION"
   344  	//
   345  	// where:
   346  	// - SYSNAME, RELEASE, and VERSION are the same as returned by
   347  	// sys_utsname
   348  	// - COMPILE_USER is the user that build the kernel
   349  	// - COMPILE_HOST is the hostname of the machine on which the kernel
   350  	// was built
   351  	// - COMPILER_VERSION is the version reported by the building compiler
   352  	//
   353  	// Since we don't really want to expose build information to
   354  	// applications, those fields are omitted.
   355  	//
   356  	// FIXME(mpratt): Using Version from the init task SyscallTable
   357  	// disregards the different version a task may have (e.g., in a uts
   358  	// namespace).
   359  	ver := kernelVersion(ctx)
   360  	fmt.Fprintf(buf, "%s version %s %s\n", ver.Sysname, ver.Release, ver.Version)
   361  	return nil
   362  }
   363  
   364  // filesystemsData backs /proc/filesystems.
   365  //
   366  // +stateify savable
   367  type filesystemsData struct {
   368  	kernfs.DynamicBytesFile
   369  }
   370  
   371  var _ dynamicInode = (*filesystemsData)(nil)
   372  
   373  // Generate implements vfs.DynamicBytesSource.Generate.
   374  func (d *filesystemsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   375  	k := kernel.KernelFromContext(ctx)
   376  	k.VFS().GenerateProcFilesystems(buf)
   377  	return nil
   378  }
   379  
   380  // cgroupsData backs /proc/cgroups.
   381  //
   382  // +stateify savable
   383  type cgroupsData struct {
   384  	dynamicBytesFileSetAttr
   385  }
   386  
   387  var _ dynamicInode = (*cgroupsData)(nil)
   388  
   389  // Generate implements vfs.DynamicBytesSource.Generate.
   390  func (*cgroupsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   391  	r := kernel.KernelFromContext(ctx).CgroupRegistry()
   392  	r.GenerateProcCgroups(buf)
   393  	return nil
   394  }
   395  
   396  // cmdLineData backs /proc/cmdline.
   397  //
   398  // +stateify savable
   399  type cmdLineData struct {
   400  	dynamicBytesFileSetAttr
   401  }
   402  
   403  var _ dynamicInode = (*cmdLineData)(nil)
   404  
   405  // Generate implements vfs.DynamicByteSource.Generate.
   406  func (*cmdLineData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   407  	fmt.Fprintf(buf, "BOOT_IMAGE=/vmlinuz-%s-gvisor quiet\n", kernelVersion(ctx).Release)
   408  	return nil
   409  }
   410  
   411  // kernelVersion returns the kernel version.
   412  func kernelVersion(ctx context.Context) kernel.Version {
   413  	k := kernel.KernelFromContext(ctx)
   414  	init := k.GlobalInit()
   415  	if init == nil {
   416  		// Attempted to read before the init Task is created. This can
   417  		// only occur during startup, which should never need to read
   418  		// this file.
   419  		panic("Attempted to read version before initial Task is available")
   420  	}
   421  	return init.Leader().SyscallTable().Version
   422  }