github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/fsimpl/dev/dev.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package dev provides a filesystem implementation for /dev.
    16  package dev
    17  
    18  import (
    19  	"fmt"
    20  	"path"
    21  
    22  	"github.com/metacubex/gvisor/pkg/abi/linux"
    23  	"github.com/metacubex/gvisor/pkg/context"
    24  	"github.com/metacubex/gvisor/pkg/errors/linuxerr"
    25  	"github.com/metacubex/gvisor/pkg/fspath"
    26  	"github.com/metacubex/gvisor/pkg/sentry/fsimpl/tmpfs"
    27  	"github.com/metacubex/gvisor/pkg/sentry/kernel/auth"
    28  	"github.com/metacubex/gvisor/pkg/sentry/vfs"
    29  )
    30  
    31  // Name is the dev filesystem name.
    32  const Name = "dev"
    33  
    34  // FilesystemType implements vfs.FilesystemType.
    35  //
    36  // +stateify savable
    37  type FilesystemType struct{}
    38  
    39  // Name implements vfs.FilesystemType.Name.
    40  func (FilesystemType) Name() string {
    41  	return Name
    42  }
    43  
    44  // GetFilesystem implements vfs.FilesystemType.GetFilesystem.
    45  func (fst FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
    46  	mntns, err := vfsObj.NewMountNamespace(ctx, creds, source /* source */, tmpfs.Name, &vfs.MountOptions{GetFilesystemOptions: vfs.GetFilesystemOptions{
    47  		Data: "mode=0755", // opts from drivers/base/devtmpfs.c:devtmpfs_init()
    48  	}}, nil)
    49  	if err != nil {
    50  		return nil, nil, err
    51  	}
    52  	defer mntns.DecRef(ctx)
    53  
    54  	root := mntns.Root(ctx)
    55  	defer root.DecRef(ctx)
    56  
    57  	iopts, _ := opts.InternalData.(InternalData) // If not provided, zero value is OK.
    58  
    59  	// Initialize contents.
    60  	if err := userspaceInit(ctx, vfsObj, creds, root, iopts.ShmMode); err != nil {
    61  		return nil, nil, err
    62  	}
    63  	if err := vfsObj.ForEachDevice(func(pathname string, kind vfs.DeviceKind, major, minor uint32, perms uint16) error {
    64  		if pathname == "" {
    65  			return nil
    66  		}
    67  		mode := linux.FileMode(perms)
    68  		switch kind {
    69  		case vfs.CharDevice:
    70  			mode |= linux.S_IFCHR
    71  		case vfs.BlockDevice:
    72  			mode |= linux.S_IFBLK
    73  		default:
    74  			panic(fmt.Sprintf("invalid DeviceKind: %v", kind))
    75  		}
    76  		return CreateDeviceFile(ctx, vfsObj, creds, root, pathname, major, minor, mode, nil /* uid */, nil /* gid */)
    77  	}); err != nil {
    78  		return nil, nil, err
    79  	}
    80  
    81  	root.Mount().Filesystem().IncRef()
    82  	root.Dentry().IncRef()
    83  	return root.Mount().Filesystem(), root.Dentry(), nil
    84  }
    85  
    86  // Release implements vfs.FilesystemType.Release.
    87  func (fst *FilesystemType) Release(ctx context.Context) {}
    88  
    89  // InternalData contains internal data passed in via vfs.GetFilesystemOptions.
    90  type InternalData struct {
    91  	// ShmMode indicates the mode to create the /dev/shm dir with.
    92  	ShmMode *uint16
    93  }
    94  
    95  func pathOperationAt(root vfs.VirtualDentry, pathname string) *vfs.PathOperation {
    96  	return &vfs.PathOperation{
    97  		Root:  root,
    98  		Start: root,
    99  		Path:  fspath.Parse(pathname),
   100  	}
   101  }
   102  
   103  // CreateDeviceFile creates a device special file at the given pathname from root.
   104  func CreateDeviceFile(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, root vfs.VirtualDentry, pathname string, major, minor uint32, mode linux.FileMode, uid, gid *uint32) error {
   105  	// Create any parent directories. See
   106  	// devtmpfs.c:handle_create()=>create_path().
   107  	parent := path.Dir(pathname)
   108  	if err := vfsObj.MkdirAllAt(ctx, parent, root, creds, &vfs.MkdirOptions{
   109  		Mode: 0755,
   110  	}, true /* mustBeDir */); err != nil {
   111  		return fmt.Errorf("failed to create device parent directory %q: %v", parent, err)
   112  	}
   113  	created := true
   114  	pop := pathOperationAt(root, pathname)
   115  	if err := vfsObj.MknodAt(ctx, creds, pop, &vfs.MknodOptions{Mode: mode, DevMajor: major, DevMinor: minor}); err != nil {
   116  		if linuxerr.Equals(linuxerr.EEXIST, err) {
   117  			// EEXIST is silently ignored; compare
   118  			// opencontainers/runc:libcontainer/rootfs_linux.go:createDeviceNode().
   119  			created = false
   120  		} else {
   121  			return fmt.Errorf("failed to create device file at %q: %w", pathname, err)
   122  		}
   123  	}
   124  	if created && (uid != nil || gid != nil) {
   125  		var opts vfs.SetStatOptions
   126  		if uid != nil {
   127  			opts.Stat.Mask |= linux.STATX_UID
   128  			opts.Stat.UID = *uid
   129  		}
   130  		if gid != nil {
   131  			opts.Stat.Mask |= linux.STATX_GID
   132  			opts.Stat.GID = *gid
   133  		}
   134  		if err := vfsObj.SetStatAt(ctx, creds, pop, &opts); err != nil {
   135  			return fmt.Errorf("failed to set UID/GID for device file %q: %w", pathname, err)
   136  		}
   137  	}
   138  	return nil
   139  }
   140  
   141  // userspaceInit creates symbolic links and mount points in the devtmpfs
   142  // instance that are created by userspace in Linux. It does not create mounts.
   143  func userspaceInit(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, root vfs.VirtualDentry, shmMode *uint16) error {
   144  	// Initialize symlinks.
   145  	for _, symlink := range []struct {
   146  		source string
   147  		target string
   148  	}{
   149  		// systemd: src/shared/dev-setup.c:dev_setup()
   150  		{source: "fd", target: "/proc/self/fd"},
   151  		{source: "stdin", target: "/proc/self/fd/0"},
   152  		{source: "stdout", target: "/proc/self/fd/1"},
   153  		{source: "stderr", target: "/proc/self/fd/2"},
   154  		// /proc/kcore is not implemented.
   155  
   156  		// Linux implements /dev/ptmx as a device node, but advises
   157  		// container implementations to create /dev/ptmx as a symlink
   158  		// to pts/ptmx (Documentation/filesystems/devpts.txt). Systemd
   159  		// follows this advice (src/nspawn/nspawn.c:setup_pts()), while
   160  		// LXC tries to create a bind mount and falls back to a symlink
   161  		// (src/lxc/conf.c:lxc_setup_devpts()).
   162  		{source: "ptmx", target: "pts/ptmx"},
   163  	} {
   164  		if err := vfsObj.SymlinkAt(ctx, creds, pathOperationAt(root, symlink.source), symlink.target); err != nil {
   165  			return fmt.Errorf("failed to create symlink %q => %q: %v", symlink.source, symlink.target, err)
   166  		}
   167  	}
   168  
   169  	// systemd: src/core/mount-setup.c:mount_table
   170  	for _, dir := range []string{
   171  		"shm",
   172  		"pts",
   173  	} {
   174  		// "The access mode here doesn't really matter too much, since the
   175  		// mounted file system will take precedence anyway"
   176  		//   - systemd: src/core/mount-setup.c:mount_one()
   177  		accessMode := linux.FileMode(0755)
   178  		if shmMode != nil && dir == "shm" {
   179  			accessMode = linux.FileMode(*shmMode)
   180  		}
   181  		if err := vfsObj.MkdirAt(ctx, creds, pathOperationAt(root, dir), &vfs.MkdirOptions{
   182  			Mode: accessMode,
   183  		}); err != nil {
   184  			return fmt.Errorf("failed to create directory %q: %v", dir, err)
   185  		}
   186  	}
   187  
   188  	return nil
   189  }