github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/fsimpl/proc/tasks_files.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package proc
    16  
    17  import (
    18  	"bytes"
    19  	"fmt"
    20  	"runtime"
    21  	"strconv"
    22  
    23  	"github.com/metacubex/gvisor/pkg/abi/linux"
    24  	"github.com/metacubex/gvisor/pkg/context"
    25  	"github.com/metacubex/gvisor/pkg/errors/linuxerr"
    26  	"github.com/metacubex/gvisor/pkg/hostarch"
    27  	"github.com/metacubex/gvisor/pkg/sentry/fsimpl/kernfs"
    28  	"github.com/metacubex/gvisor/pkg/sentry/kernel"
    29  	"github.com/metacubex/gvisor/pkg/sentry/kernel/auth"
    30  	"github.com/metacubex/gvisor/pkg/sentry/kernel/time"
    31  	"github.com/metacubex/gvisor/pkg/sentry/usage"
    32  	"github.com/metacubex/gvisor/pkg/sentry/vfs"
    33  )
    34  
    35  // +stateify savable
    36  type selfSymlink struct {
    37  	implStatFS
    38  	kernfs.InodeAttrs
    39  	kernfs.InodeNoopRefCount
    40  	kernfs.InodeNotAnonymous
    41  	kernfs.InodeSymlink
    42  	kernfs.InodeWatches
    43  
    44  	pidns *kernel.PIDNamespace
    45  }
    46  
    47  var _ kernfs.Inode = (*selfSymlink)(nil)
    48  
    49  func (i *tasksInode) newSelfSymlink(ctx context.Context, creds *auth.Credentials) kernfs.Inode {
    50  	inode := &selfSymlink{pidns: i.pidns}
    51  	inode.Init(ctx, creds, linux.UNNAMED_MAJOR, i.fs.devMinor, i.fs.NextIno(), linux.ModeSymlink|0777)
    52  	return inode
    53  }
    54  
    55  func (s *selfSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) {
    56  	t := kernel.TaskFromContext(ctx)
    57  	if t == nil {
    58  		// Who is reading this link?
    59  		return "", linuxerr.EINVAL
    60  	}
    61  	tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup())
    62  	if tgid == 0 {
    63  		return "", linuxerr.ENOENT
    64  	}
    65  	return strconv.FormatUint(uint64(tgid), 10), nil
    66  }
    67  
    68  func (s *selfSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDentry, string, error) {
    69  	target, err := s.Readlink(ctx, mnt)
    70  	return vfs.VirtualDentry{}, target, err
    71  }
    72  
    73  // SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed.
    74  func (*selfSymlink) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
    75  	return linuxerr.EPERM
    76  }
    77  
    78  // +stateify savable
    79  type threadSelfSymlink struct {
    80  	implStatFS
    81  	kernfs.InodeAttrs
    82  	kernfs.InodeNoopRefCount
    83  	kernfs.InodeNotAnonymous
    84  	kernfs.InodeSymlink
    85  	kernfs.InodeWatches
    86  
    87  	pidns *kernel.PIDNamespace
    88  }
    89  
    90  var _ kernfs.Inode = (*threadSelfSymlink)(nil)
    91  
    92  func (i *tasksInode) newThreadSelfSymlink(ctx context.Context, creds *auth.Credentials) kernfs.Inode {
    93  	inode := &threadSelfSymlink{pidns: i.pidns}
    94  	inode.Init(ctx, creds, linux.UNNAMED_MAJOR, i.fs.devMinor, i.fs.NextIno(), linux.ModeSymlink|0777)
    95  	return inode
    96  }
    97  
    98  func (s *threadSelfSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) {
    99  	t := kernel.TaskFromContext(ctx)
   100  	if t == nil {
   101  		// Who is reading this link?
   102  		return "", linuxerr.EINVAL
   103  	}
   104  	tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup())
   105  	tid := s.pidns.IDOfTask(t)
   106  	if tid == 0 || tgid == 0 {
   107  		return "", linuxerr.ENOENT
   108  	}
   109  	return fmt.Sprintf("%d/task/%d", tgid, tid), nil
   110  }
   111  
   112  func (s *threadSelfSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDentry, string, error) {
   113  	target, err := s.Readlink(ctx, mnt)
   114  	return vfs.VirtualDentry{}, target, err
   115  }
   116  
   117  // SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed.
   118  func (*threadSelfSymlink) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
   119  	return linuxerr.EPERM
   120  }
   121  
   122  // dynamicBytesFileSetAttr implements a special file that allows inode
   123  // attributes to be set. This is to support /proc files that are readonly, but
   124  // allow attributes to be set.
   125  //
   126  // +stateify savable
   127  type dynamicBytesFileSetAttr struct {
   128  	kernfs.DynamicBytesFile
   129  }
   130  
   131  // SetStat implements kernfs.Inode.SetStat.
   132  func (d *dynamicBytesFileSetAttr) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
   133  	return d.DynamicBytesFile.InodeAttrs.SetStat(ctx, fs, creds, opts)
   134  }
   135  
   136  // cpuStats contains the breakdown of CPU time for /proc/stat.
   137  //
   138  // +stateify savable
   139  type cpuStats struct {
   140  	// user is time spent in userspace tasks with non-positive niceness.
   141  	user uint64
   142  
   143  	// nice is time spent in userspace tasks with positive niceness.
   144  	nice uint64
   145  
   146  	// system is time spent in non-interrupt kernel context.
   147  	system uint64
   148  
   149  	// idle is time spent idle.
   150  	idle uint64
   151  
   152  	// ioWait is time spent waiting for IO.
   153  	ioWait uint64
   154  
   155  	// irq is time spent in interrupt context.
   156  	irq uint64
   157  
   158  	// softirq is time spent in software interrupt context.
   159  	softirq uint64
   160  
   161  	// steal is involuntary wait time.
   162  	steal uint64
   163  
   164  	// guest is time spent in guests with non-positive niceness.
   165  	guest uint64
   166  
   167  	// guestNice is time spent in guests with positive niceness.
   168  	guestNice uint64
   169  }
   170  
   171  // String implements fmt.Stringer.
   172  func (c cpuStats) String() string {
   173  	return fmt.Sprintf("%d %d %d %d %d %d %d %d %d %d", c.user, c.nice, c.system, c.idle, c.ioWait, c.irq, c.softirq, c.steal, c.guest, c.guestNice)
   174  }
   175  
   176  // statData implements vfs.DynamicBytesSource for /proc/stat.
   177  //
   178  // +stateify savable
   179  type statData struct {
   180  	dynamicBytesFileSetAttr
   181  }
   182  
   183  var _ dynamicInode = (*statData)(nil)
   184  
   185  // Generate implements vfs.DynamicBytesSource.Generate.
   186  func (*statData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   187  	// TODO(b/37226836): We currently export only zero CPU stats. We could
   188  	// at least provide some aggregate stats.
   189  	var cpu cpuStats
   190  	fmt.Fprintf(buf, "cpu  %s\n", cpu)
   191  
   192  	k := kernel.KernelFromContext(ctx)
   193  	for c, max := uint(0), k.ApplicationCores(); c < max; c++ {
   194  		fmt.Fprintf(buf, "cpu%d %s\n", c, cpu)
   195  	}
   196  
   197  	// The total number of interrupts is dependent on the CPUs and PCI
   198  	// devices on the system. See arch_probe_nr_irqs.
   199  	//
   200  	// Since we don't report real interrupt stats, just choose an arbitrary
   201  	// value from a representative VM.
   202  	const numInterrupts = 256
   203  
   204  	// The Kernel doesn't handle real interrupts, so report all zeroes.
   205  	// TODO(b/37226836): We could count page faults as #PF.
   206  	fmt.Fprintf(buf, "intr 0") // total
   207  	for i := 0; i < numInterrupts; i++ {
   208  		fmt.Fprintf(buf, " 0")
   209  	}
   210  	fmt.Fprintf(buf, "\n")
   211  
   212  	// Total number of context switches.
   213  	// TODO(b/37226836): Count this.
   214  	fmt.Fprintf(buf, "ctxt 0\n")
   215  
   216  	// CLOCK_REALTIME timestamp from boot, in seconds.
   217  	fmt.Fprintf(buf, "btime %d\n", k.Timekeeper().BootTime().Seconds())
   218  
   219  	// Total number of clones.
   220  	// TODO(b/37226836): Count this.
   221  	fmt.Fprintf(buf, "processes 0\n")
   222  
   223  	// Number of runnable tasks.
   224  	// TODO(b/37226836): Count this.
   225  	fmt.Fprintf(buf, "procs_running 0\n")
   226  
   227  	// Number of tasks waiting on IO.
   228  	// TODO(b/37226836): Count this.
   229  	fmt.Fprintf(buf, "procs_blocked 0\n")
   230  
   231  	// Number of each softirq handled.
   232  	fmt.Fprintf(buf, "softirq 0") // total
   233  	for i := 0; i < linux.NumSoftIRQ; i++ {
   234  		fmt.Fprintf(buf, " 0")
   235  	}
   236  	fmt.Fprintf(buf, "\n")
   237  	return nil
   238  }
   239  
   240  // loadavgData backs /proc/loadavg.
   241  //
   242  // +stateify savable
   243  type loadavgData struct {
   244  	dynamicBytesFileSetAttr
   245  }
   246  
   247  var _ dynamicInode = (*loadavgData)(nil)
   248  
   249  // Generate implements vfs.DynamicBytesSource.Generate.
   250  func (*loadavgData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   251  	// TODO(b/62345059): Include real data in fields.
   252  	// Column 1-3: CPU and IO utilization of the last 1, 5, and 10 minute periods.
   253  	// Column 4-5: currently running processes and the total number of processes.
   254  	// Column 6: the last process ID used.
   255  	fmt.Fprintf(buf, "%.2f %.2f %.2f %d/%d %d\n", 0.00, 0.00, 0.00, 0, 0, 0)
   256  	return nil
   257  }
   258  
   259  // meminfoData implements vfs.DynamicBytesSource for /proc/meminfo.
   260  //
   261  // +stateify savable
   262  type meminfoData struct {
   263  	dynamicBytesFileSetAttr
   264  }
   265  
   266  var _ dynamicInode = (*meminfoData)(nil)
   267  
   268  // Generate implements vfs.DynamicBytesSource.Generate.
   269  func (*meminfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   270  	mf := kernel.KernelFromContext(ctx).MemoryFile()
   271  	_ = mf.UpdateUsage(nil) // Best effort
   272  	snapshot, totalUsage := usage.MemoryAccounting.Copy()
   273  	totalSize := usage.TotalMemory(mf.TotalSize(), totalUsage)
   274  	anon := snapshot.Anonymous + snapshot.Tmpfs
   275  	file := snapshot.PageCache + snapshot.Mapped
   276  	// We don't actually have active/inactive LRUs, so just make up numbers.
   277  	activeFile := (file / 2) &^ (hostarch.PageSize - 1)
   278  	inactiveFile := file - activeFile
   279  
   280  	fmt.Fprintf(buf, "MemTotal:       %8d kB\n", totalSize/1024)
   281  	memFree := totalSize - totalUsage
   282  	if memFree > totalSize {
   283  		// Underflow.
   284  		memFree = 0
   285  	}
   286  	// We use MemFree as MemAvailable because we don't swap.
   287  	// TODO(rahat): When reclaim is implemented the value of MemAvailable
   288  	// should change.
   289  	fmt.Fprintf(buf, "MemFree:        %8d kB\n", memFree/1024)
   290  	fmt.Fprintf(buf, "MemAvailable:   %8d kB\n", memFree/1024)
   291  	fmt.Fprintf(buf, "Buffers:               0 kB\n") // memory usage by block devices
   292  	fmt.Fprintf(buf, "Cached:         %8d kB\n", (file+snapshot.Tmpfs)/1024)
   293  	// Emulate a system with no swap, which disables inactivation of anon pages.
   294  	fmt.Fprintf(buf, "SwapCache:             0 kB\n")
   295  	fmt.Fprintf(buf, "Active:         %8d kB\n", (anon+activeFile)/1024)
   296  	fmt.Fprintf(buf, "Inactive:       %8d kB\n", inactiveFile/1024)
   297  	fmt.Fprintf(buf, "Active(anon):   %8d kB\n", anon/1024)
   298  	fmt.Fprintf(buf, "Inactive(anon):        0 kB\n")
   299  	fmt.Fprintf(buf, "Active(file):   %8d kB\n", activeFile/1024)
   300  	fmt.Fprintf(buf, "Inactive(file): %8d kB\n", inactiveFile/1024)
   301  	fmt.Fprintf(buf, "Unevictable:           0 kB\n") // TODO(b/31823263)
   302  	fmt.Fprintf(buf, "Mlocked:               0 kB\n") // TODO(b/31823263)
   303  	fmt.Fprintf(buf, "SwapTotal:             0 kB\n")
   304  	fmt.Fprintf(buf, "SwapFree:              0 kB\n")
   305  	fmt.Fprintf(buf, "Dirty:                 0 kB\n")
   306  	fmt.Fprintf(buf, "Writeback:             0 kB\n")
   307  	fmt.Fprintf(buf, "AnonPages:      %8d kB\n", anon/1024)
   308  	fmt.Fprintf(buf, "Mapped:         %8d kB\n", file/1024) // doesn't count mapped tmpfs, which we don't know
   309  	fmt.Fprintf(buf, "Shmem:          %8d kB\n", snapshot.Tmpfs/1024)
   310  	return nil
   311  }
   312  
   313  // uptimeData implements vfs.DynamicBytesSource for /proc/uptime.
   314  //
   315  // +stateify savable
   316  type uptimeData struct {
   317  	dynamicBytesFileSetAttr
   318  }
   319  
   320  var _ dynamicInode = (*uptimeData)(nil)
   321  
   322  // Generate implements vfs.DynamicBytesSource.Generate.
   323  func (*uptimeData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   324  	k := kernel.KernelFromContext(ctx)
   325  	now := time.NowFromContext(ctx)
   326  
   327  	// Pretend that we've spent zero time sleeping (second number).
   328  	fmt.Fprintf(buf, "%.2f 0.00\n", now.Sub(k.Timekeeper().BootTime()).Seconds())
   329  	return nil
   330  }
   331  
   332  // versionData implements vfs.DynamicBytesSource for /proc/version.
   333  //
   334  // +stateify savable
   335  type versionData struct {
   336  	dynamicBytesFileSetAttr
   337  }
   338  
   339  var _ dynamicInode = (*versionData)(nil)
   340  
   341  // Generate implements vfs.DynamicBytesSource.Generate.
   342  func (*versionData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   343  	// /proc/version takes the form:
   344  	//
   345  	// "SYSNAME version RELEASE (COMPILE_USER@COMPILE_HOST)
   346  	// (COMPILER_VERSION) VERSION"
   347  	//
   348  	// where:
   349  	//	- SYSNAME, RELEASE, and VERSION are the same as returned by
   350  	//		sys_utsname
   351  	//	- COMPILE_USER is the user that build the kernel
   352  	//	- COMPILE_HOST is the hostname of the machine on which the kernel
   353  	//		was built
   354  	//	- COMPILER_VERSION is the version reported by the building compiler
   355  	//
   356  	// Since we don't really want to expose build information to
   357  	// applications, those fields are omitted.
   358  	//
   359  	// FIXME(mpratt): Using Version from the init task SyscallTable
   360  	// disregards the different version a task may have (e.g., in a uts
   361  	// namespace).
   362  	ver := kernelVersion(ctx)
   363  	fmt.Fprintf(buf, "%s version %s %s\n", ver.Sysname, ver.Release, ver.Version)
   364  	return nil
   365  }
   366  
   367  // filesystemsData backs /proc/filesystems.
   368  //
   369  // +stateify savable
   370  type filesystemsData struct {
   371  	kernfs.DynamicBytesFile
   372  }
   373  
   374  var _ dynamicInode = (*filesystemsData)(nil)
   375  
   376  // Generate implements vfs.DynamicBytesSource.Generate.
   377  func (d *filesystemsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   378  	k := kernel.KernelFromContext(ctx)
   379  	k.VFS().GenerateProcFilesystems(buf)
   380  	return nil
   381  }
   382  
   383  // cgroupsData backs /proc/cgroups.
   384  //
   385  // +stateify savable
   386  type cgroupsData struct {
   387  	dynamicBytesFileSetAttr
   388  }
   389  
   390  var _ dynamicInode = (*cgroupsData)(nil)
   391  
   392  // Generate implements vfs.DynamicBytesSource.Generate.
   393  func (*cgroupsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   394  	r := kernel.KernelFromContext(ctx).CgroupRegistry()
   395  	r.GenerateProcCgroups(buf)
   396  	return nil
   397  }
   398  
   399  // cmdLineData backs /proc/cmdline.
   400  //
   401  // +stateify savable
   402  type cmdLineData struct {
   403  	dynamicBytesFileSetAttr
   404  }
   405  
   406  var _ dynamicInode = (*cmdLineData)(nil)
   407  
   408  // Generate implements vfs.DynamicByteSource.Generate.
   409  func (*cmdLineData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   410  	fmt.Fprintf(buf, "BOOT_IMAGE=/vmlinuz-%s-gvisor quiet\n", kernelVersion(ctx).Release)
   411  	return nil
   412  }
   413  
   414  // kernelVersion returns the kernel version.
   415  func kernelVersion(ctx context.Context) kernel.Version {
   416  	k := kernel.KernelFromContext(ctx)
   417  	init := k.GlobalInit()
   418  	if init == nil {
   419  		// Attempted to read before the init Task is created. This can
   420  		// only occur during startup, which should never need to read
   421  		// this file.
   422  		panic("Attempted to read version before initial Task is available")
   423  	}
   424  	return init.Leader().SyscallTable().Version
   425  }
   426  
   427  // sentryMeminfoData implements vfs.DynamicBytesSource for /proc/sentry-meminfo.
   428  //
   429  // +stateify savable
   430  type sentryMeminfoData struct {
   431  	dynamicBytesFileSetAttr
   432  }
   433  
   434  var _ dynamicInode = (*sentryMeminfoData)(nil)
   435  
   436  // Generate implements vfs.DynamicBytesSource.Generate.
   437  func (*sentryMeminfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   438  	var sentryMeminfo runtime.MemStats
   439  	runtime.ReadMemStats(&sentryMeminfo)
   440  
   441  	fmt.Fprintf(buf, "Alloc:          %8d kB\n", sentryMeminfo.Alloc/1024)
   442  	fmt.Fprintf(buf, "TotalAlloc:     %8d kB\n", sentryMeminfo.TotalAlloc/1024)
   443  	fmt.Fprintf(buf, "Sys:            %8d kB\n", sentryMeminfo.Sys/1024)
   444  	fmt.Fprintf(buf, "Mallocs:        %8d\n", sentryMeminfo.Mallocs)
   445  	fmt.Fprintf(buf, "Frees:          %8d\n", sentryMeminfo.Frees)
   446  	fmt.Fprintf(buf, "Live Objects:   %8d\n", sentryMeminfo.Mallocs-sentryMeminfo.Frees)
   447  	fmt.Fprintf(buf, "HeapAlloc:      %8d kB\n", sentryMeminfo.HeapAlloc/1024)
   448  	fmt.Fprintf(buf, "HeapSys:        %8d kB\n", sentryMeminfo.HeapSys/1024)
   449  	fmt.Fprintf(buf, "HeapObjects:    %8d\n", sentryMeminfo.HeapObjects)
   450  	return nil
   451  }