github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/fsimpl/sys/sys.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package sys implements sysfs.
    16  package sys
    17  
    18  import (
    19  	"bytes"
    20  	"fmt"
    21  	"strconv"
    22  
    23  	"golang.org/x/sys/unix"
    24  	"github.com/nicocha30/gvisor-ligolo/pkg/abi/linux"
    25  	"github.com/nicocha30/gvisor-ligolo/pkg/context"
    26  	"github.com/nicocha30/gvisor-ligolo/pkg/coverage"
    27  	"github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr"
    28  	"github.com/nicocha30/gvisor-ligolo/pkg/log"
    29  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsimpl/kernfs"
    30  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel"
    31  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel/auth"
    32  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/vfs"
    33  )
    34  
    35  const (
    36  	// Name is the default filesystem name.
    37  	Name                     = "sysfs"
    38  	defaultSysMode           = linux.FileMode(0444)
    39  	defaultSysDirMode        = linux.FileMode(0755)
    40  	defaultMaxCachedDentries = uint64(1000)
    41  )
    42  
    43  // FilesystemType implements vfs.FilesystemType.
    44  //
    45  // +stateify savable
    46  type FilesystemType struct{}
    47  
    48  // InternalData contains internal data passed in via
    49  // vfs.GetFilesystemOptions.InternalData.
    50  //
    51  // +stateify savable
    52  type InternalData struct {
    53  	// ProductName is the value to be set to devices/virtual/dmi/id/product_name.
    54  	ProductName string
    55  	// EnableAccelSysfs is whether to populate sysfs paths used by hardware
    56  	// accelerators.
    57  	EnableAccelSysfs bool
    58  }
    59  
    60  // filesystem implements vfs.FilesystemImpl.
    61  //
    62  // +stateify savable
    63  type filesystem struct {
    64  	kernfs.Filesystem
    65  
    66  	devMinor uint32
    67  }
    68  
    69  // Name implements vfs.FilesystemType.Name.
    70  func (FilesystemType) Name() string {
    71  	return Name
    72  }
    73  
    74  // Release implements vfs.FilesystemType.Release.
    75  func (FilesystemType) Release(ctx context.Context) {}
    76  
    77  // GetFilesystem implements vfs.FilesystemType.GetFilesystem.
    78  func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
    79  	devMinor, err := vfsObj.GetAnonBlockDevMinor()
    80  	if err != nil {
    81  		return nil, nil, err
    82  	}
    83  
    84  	mopts := vfs.GenericParseMountOptions(opts.Data)
    85  	maxCachedDentries := defaultMaxCachedDentries
    86  	if str, ok := mopts["dentry_cache_limit"]; ok {
    87  		delete(mopts, "dentry_cache_limit")
    88  		maxCachedDentries, err = strconv.ParseUint(str, 10, 64)
    89  		if err != nil {
    90  			ctx.Warningf("sys.FilesystemType.GetFilesystem: invalid dentry cache limit: dentry_cache_limit=%s", str)
    91  			return nil, nil, linuxerr.EINVAL
    92  		}
    93  	}
    94  
    95  	fs := &filesystem{
    96  		devMinor: devMinor,
    97  	}
    98  	fs.MaxCachedDentries = maxCachedDentries
    99  	fs.VFSFilesystem().Init(vfsObj, &fsType, fs)
   100  
   101  	k := kernel.KernelFromContext(ctx)
   102  	fsDirChildren := make(map[string]kernfs.Inode)
   103  	// Create an empty directory to serve as the mount point for cgroupfs when
   104  	// cgroups are available. This emulates Linux behaviour, see
   105  	// kernel/cgroup.c:cgroup_init(). Note that in Linux, userspace (typically
   106  	// the init process) is ultimately responsible for actually mounting
   107  	// cgroupfs, but the kernel creates the mountpoint. For the sentry, the
   108  	// launcher mounts cgroupfs.
   109  	if k.CgroupRegistry() != nil {
   110  		fsDirChildren["cgroup"] = fs.newDir(ctx, creds, defaultSysDirMode, nil)
   111  	}
   112  
   113  	classSub := map[string]kernfs.Inode{
   114  		"power_supply": fs.newDir(ctx, creds, defaultSysDirMode, nil),
   115  		"net":          fs.newDir(ctx, creds, defaultSysDirMode, fs.newNetDir(ctx, creds, defaultSysDirMode)),
   116  	}
   117  	devicesSub := map[string]kernfs.Inode{
   118  		"system": fs.newDir(ctx, creds, defaultSysDirMode, map[string]kernfs.Inode{
   119  			"cpu": cpuDir(ctx, fs, creds),
   120  		}),
   121  	}
   122  
   123  	productName := ""
   124  	var busSub map[string]kernfs.Inode
   125  	if opts.InternalData != nil {
   126  		idata := opts.InternalData.(*InternalData)
   127  		productName = idata.ProductName
   128  		if idata.EnableAccelSysfs {
   129  			pciMainBusSub, err := fs.mirrorPCIBusDeviceDir(ctx, creds, pciMainBusDevicePath)
   130  			if err != nil {
   131  				return nil, nil, err
   132  			}
   133  			devicesSub["pci0000:00"] = fs.newDir(ctx, creds, defaultSysDirMode, pciMainBusSub)
   134  
   135  			accelSub, err := fs.newAccelDir(ctx, creds)
   136  			if err != nil {
   137  				return nil, nil, err
   138  			}
   139  			classSub["accel"] = fs.newDir(ctx, creds, defaultSysDirMode, accelSub)
   140  
   141  			pciDevicesSub, err := fs.newPCIDevicesDir(ctx, creds)
   142  			if err != nil {
   143  				return nil, nil, err
   144  			}
   145  			busSub = map[string]kernfs.Inode{
   146  				"pci": fs.newDir(ctx, creds, defaultSysDirMode, map[string]kernfs.Inode{
   147  					"devices": fs.newDir(ctx, creds, defaultSysDirMode, pciDevicesSub),
   148  				}),
   149  			}
   150  		}
   151  	}
   152  
   153  	if len(productName) > 0 {
   154  		log.Debugf("Setting product_name: %q", productName)
   155  		classSub["dmi"] = fs.newDir(ctx, creds, defaultSysDirMode, map[string]kernfs.Inode{
   156  			"id": kernfs.NewStaticSymlink(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), "../../devices/virtual/dmi/id"),
   157  		})
   158  		devicesSub["virtual"] = fs.newDir(ctx, creds, defaultSysDirMode, map[string]kernfs.Inode{
   159  			"dmi": fs.newDir(ctx, creds, defaultSysDirMode, map[string]kernfs.Inode{
   160  				"id": fs.newDir(ctx, creds, defaultSysDirMode, map[string]kernfs.Inode{
   161  					"product_name": fs.newStaticFile(ctx, creds, defaultSysMode, productName+"\n"),
   162  				}),
   163  			}),
   164  		})
   165  	}
   166  	root := fs.newDir(ctx, creds, defaultSysDirMode, map[string]kernfs.Inode{
   167  		"block":    fs.newDir(ctx, creds, defaultSysDirMode, nil),
   168  		"bus":      fs.newDir(ctx, creds, defaultSysDirMode, busSub),
   169  		"class":    fs.newDir(ctx, creds, defaultSysDirMode, classSub),
   170  		"dev":      fs.newDir(ctx, creds, defaultSysDirMode, nil),
   171  		"devices":  fs.newDir(ctx, creds, defaultSysDirMode, devicesSub),
   172  		"firmware": fs.newDir(ctx, creds, defaultSysDirMode, nil),
   173  		"fs":       fs.newDir(ctx, creds, defaultSysDirMode, fsDirChildren),
   174  		"kernel":   kernelDir(ctx, fs, creds),
   175  		"module":   fs.newDir(ctx, creds, defaultSysDirMode, nil),
   176  		"power":    fs.newDir(ctx, creds, defaultSysDirMode, nil),
   177  	})
   178  	var rootD kernfs.Dentry
   179  	rootD.InitRoot(&fs.Filesystem, root)
   180  	return fs.VFSFilesystem(), rootD.VFSDentry(), nil
   181  }
   182  
   183  func cpuDir(ctx context.Context, fs *filesystem, creds *auth.Credentials) kernfs.Inode {
   184  	k := kernel.KernelFromContext(ctx)
   185  	maxCPUCores := k.ApplicationCores()
   186  	children := map[string]kernfs.Inode{
   187  		"online":   fs.newCPUFile(ctx, creds, maxCPUCores, linux.FileMode(0444)),
   188  		"possible": fs.newCPUFile(ctx, creds, maxCPUCores, linux.FileMode(0444)),
   189  		"present":  fs.newCPUFile(ctx, creds, maxCPUCores, linux.FileMode(0444)),
   190  	}
   191  	for i := uint(0); i < maxCPUCores; i++ {
   192  		children[fmt.Sprintf("cpu%d", i)] = fs.newDir(ctx, creds, linux.FileMode(0555), nil)
   193  	}
   194  	return fs.newDir(ctx, creds, defaultSysDirMode, children)
   195  }
   196  
   197  func kernelDir(ctx context.Context, fs *filesystem, creds *auth.Credentials) kernfs.Inode {
   198  	// Set up /sys/kernel/debug/kcov. Technically, debugfs should be
   199  	// mounted at debug/, but for our purposes, it is sufficient to keep it
   200  	// in sys.
   201  	var children map[string]kernfs.Inode
   202  	if coverage.KcovSupported() {
   203  		log.Debugf("Set up /sys/kernel/debug/kcov")
   204  		children = map[string]kernfs.Inode{
   205  			"debug": fs.newDir(ctx, creds, linux.FileMode(0700), map[string]kernfs.Inode{
   206  				"kcov": fs.newKcovFile(ctx, creds),
   207  			}),
   208  		}
   209  	}
   210  	return fs.newDir(ctx, creds, defaultSysDirMode, children)
   211  }
   212  
   213  // Release implements vfs.FilesystemImpl.Release.
   214  func (fs *filesystem) Release(ctx context.Context) {
   215  	fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
   216  	fs.Filesystem.Release(ctx)
   217  }
   218  
   219  // MountOptions implements vfs.FilesystemImpl.MountOptions.
   220  func (fs *filesystem) MountOptions() string {
   221  	return fmt.Sprintf("dentry_cache_limit=%d", fs.MaxCachedDentries)
   222  }
   223  
   224  // dir implements kernfs.Inode.
   225  //
   226  // +stateify savable
   227  type dir struct {
   228  	dirRefs
   229  	kernfs.InodeAlwaysValid
   230  	kernfs.InodeAttrs
   231  	kernfs.InodeDirectoryNoNewChildren
   232  	kernfs.InodeNotAnonymous
   233  	kernfs.InodeNotSymlink
   234  	kernfs.InodeTemporary
   235  	kernfs.InodeWatches
   236  	kernfs.OrderedChildren
   237  
   238  	locks vfs.FileLocks
   239  }
   240  
   241  func (fs *filesystem) newDir(ctx context.Context, creds *auth.Credentials, mode linux.FileMode, contents map[string]kernfs.Inode) kernfs.Inode {
   242  	d := &dir{}
   243  	d.InodeAttrs.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0755)
   244  	d.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
   245  	d.InitRefs()
   246  	d.IncLinks(d.OrderedChildren.Populate(contents))
   247  	return d
   248  }
   249  
   250  // SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed.
   251  func (*dir) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
   252  	return linuxerr.EPERM
   253  }
   254  
   255  // Open implements kernfs.Inode.Open.
   256  func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, kd *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
   257  	opts.Flags &= linux.O_ACCMODE | linux.O_CREAT | linux.O_EXCL | linux.O_TRUNC |
   258  		linux.O_DIRECTORY | linux.O_NOFOLLOW | linux.O_NONBLOCK | linux.O_NOCTTY
   259  	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), kd, &d.OrderedChildren, &d.locks, &opts, kernfs.GenericDirectoryFDOptions{
   260  		SeekEnd: kernfs.SeekEndStaticEntries,
   261  	})
   262  	if err != nil {
   263  		return nil, err
   264  	}
   265  	return fd.VFSFileDescription(), nil
   266  }
   267  
   268  // DecRef implements kernfs.Inode.DecRef.
   269  func (d *dir) DecRef(ctx context.Context) {
   270  	d.dirRefs.DecRef(func() { d.Destroy(ctx) })
   271  }
   272  
   273  // StatFS implements kernfs.Inode.StatFS.
   274  func (d *dir) StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error) {
   275  	return vfs.GenericStatFS(linux.SYSFS_MAGIC), nil
   276  }
   277  
   278  // cpuFile implements kernfs.Inode.
   279  //
   280  // +stateify savable
   281  type cpuFile struct {
   282  	implStatFS
   283  	kernfs.DynamicBytesFile
   284  
   285  	maxCores uint
   286  }
   287  
   288  // Generate implements vfs.DynamicBytesSource.Generate.
   289  func (c *cpuFile) Generate(ctx context.Context, buf *bytes.Buffer) error {
   290  	fmt.Fprintf(buf, "0-%d\n", c.maxCores-1)
   291  	return nil
   292  }
   293  
   294  func (fs *filesystem) newCPUFile(ctx context.Context, creds *auth.Credentials, maxCores uint, mode linux.FileMode) kernfs.Inode {
   295  	c := &cpuFile{maxCores: maxCores}
   296  	c.DynamicBytesFile.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), c, mode)
   297  	return c
   298  }
   299  
   300  // +stateify savable
   301  type implStatFS struct{}
   302  
   303  // StatFS implements kernfs.Inode.StatFS.
   304  func (*implStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error) {
   305  	return vfs.GenericStatFS(linux.SYSFS_MAGIC), nil
   306  }
   307  
   308  // +stateify savable
   309  type staticFile struct {
   310  	kernfs.DynamicBytesFile
   311  	vfs.StaticData
   312  }
   313  
   314  func (fs *filesystem) newStaticFile(ctx context.Context, creds *auth.Credentials, mode linux.FileMode, data string) kernfs.Inode {
   315  	s := &staticFile{StaticData: vfs.StaticData{Data: data}}
   316  	s.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), s, mode)
   317  	return s
   318  }
   319  
   320  // hostFile is an inode whose contents are generated by reading from the
   321  // host.
   322  //
   323  // +stateify savable
   324  type hostFile struct {
   325  	kernfs.DynamicBytesFile
   326  	hostPath string
   327  }
   328  
   329  func (hf *hostFile) Generate(ctx context.Context, buf *bytes.Buffer) error {
   330  	fd, err := unix.Openat(-1, hf.hostPath, unix.O_RDONLY|unix.O_NOFOLLOW, 0)
   331  	if err != nil {
   332  		return err
   333  	}
   334  	var data [hostFileBufSize]byte
   335  	n, err := unix.Read(fd, data[:])
   336  	if err != nil {
   337  		return err
   338  	}
   339  	unix.Close(fd)
   340  	buf.Write(data[:n])
   341  	return nil
   342  }
   343  
   344  func (fs *filesystem) newHostFile(ctx context.Context, creds *auth.Credentials, mode linux.FileMode, hostPath string) kernfs.Inode {
   345  	hf := &hostFile{hostPath: hostPath}
   346  	hf.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), hf, mode)
   347  	return hf
   348  }