github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package devtmpfs provides an implementation of /dev based on tmpfs,
    16  // analogous to Linux's devtmpfs.
    17  package devtmpfs
    18  
    19  import (
    20  	"fmt"
    21  	"path"
    22  
    23  	"github.com/MerlinKodo/gvisor/pkg/abi/linux"
    24  	"github.com/MerlinKodo/gvisor/pkg/context"
    25  	"github.com/MerlinKodo/gvisor/pkg/fspath"
    26  	"github.com/MerlinKodo/gvisor/pkg/sentry/fsimpl/tmpfs"
    27  	"github.com/MerlinKodo/gvisor/pkg/sentry/kernel/auth"
    28  	"github.com/MerlinKodo/gvisor/pkg/sentry/vfs"
    29  	"github.com/MerlinKodo/gvisor/pkg/sync"
    30  )
    31  
    32  // Name is the default filesystem name.
    33  const Name = "devtmpfs"
    34  
    35  // FilesystemType implements vfs.FilesystemType.
    36  //
    37  // +stateify savable
    38  type FilesystemType struct {
    39  	initOnce sync.Once `state:"nosave"` // FIXME(gvisor.dev/issue/1663): not yet supported.
    40  	initErr  error
    41  
    42  	// fs is the tmpfs filesystem that backs all mounts of this FilesystemType.
    43  	// root is fs' root. fs and root are immutable.
    44  	fs   *vfs.Filesystem
    45  	root *vfs.Dentry
    46  }
    47  
    48  // Name implements vfs.FilesystemType.Name.
    49  func (*FilesystemType) Name() string {
    50  	return Name
    51  }
    52  
    53  // GetFilesystem implements vfs.FilesystemType.GetFilesystem.
    54  func (fst *FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
    55  	fst.initOnce.Do(func() {
    56  		fs, root, err := tmpfs.FilesystemType{}.GetFilesystem(ctx, vfsObj, creds, "" /* source */, vfs.GetFilesystemOptions{
    57  			Data: "mode=0755", // opts from drivers/base/devtmpfs.c:devtmpfs_init()
    58  		})
    59  		if err != nil {
    60  			fst.initErr = err
    61  			return
    62  		}
    63  		fst.fs = fs
    64  		fst.root = root
    65  	})
    66  	if fst.initErr != nil {
    67  		return nil, nil, fst.initErr
    68  	}
    69  	fst.fs.IncRef()
    70  	fst.root.IncRef()
    71  	return fst.fs, fst.root, nil
    72  }
    73  
    74  // Release implements vfs.FilesystemType.Release.
    75  func (fst *FilesystemType) Release(ctx context.Context) {
    76  	if fst.fs != nil {
    77  		// Release the original reference obtained when creating the filesystem.
    78  		fst.root.DecRef(ctx)
    79  		fst.fs.DecRef(ctx)
    80  	}
    81  }
    82  
    83  // Accessor allows devices to create device special files in devtmpfs.
    84  type Accessor struct {
    85  	vfsObj *vfs.VirtualFilesystem
    86  	mntns  *vfs.MountNamespace
    87  	root   vfs.VirtualDentry
    88  	creds  *auth.Credentials
    89  }
    90  
    91  // NewAccessor returns an Accessor that supports creation of device special
    92  // files in the devtmpfs instance registered with name fsTypeName in vfsObj.
    93  func NewAccessor(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, fsTypeName string) (*Accessor, error) {
    94  	mntns, err := vfsObj.NewMountNamespace(ctx, creds, "devtmpfs" /* source */, fsTypeName, &vfs.MountOptions{}, nil)
    95  	if err != nil {
    96  		return nil, err
    97  	}
    98  	// Pass a reference on root to the Accessor.
    99  	root := mntns.Root(ctx)
   100  	return &Accessor{
   101  		vfsObj: vfsObj,
   102  		mntns:  mntns,
   103  		root:   root,
   104  		creds:  creds,
   105  	}, nil
   106  }
   107  
   108  // Release must be called when a is no longer in use.
   109  func (a *Accessor) Release(ctx context.Context) {
   110  	a.root.DecRef(ctx)
   111  	a.mntns.DecRef(ctx)
   112  }
   113  
   114  // accessorContext implements context.Context by extending an existing
   115  // context.Context with an Accessor's values for VFS-relevant state.
   116  type accessorContext struct {
   117  	context.Context
   118  	a *Accessor
   119  }
   120  
   121  func (a *Accessor) wrapContext(ctx context.Context) *accessorContext {
   122  	return &accessorContext{
   123  		Context: ctx,
   124  		a:       a,
   125  	}
   126  }
   127  
   128  // Value implements context.Context.Value.
   129  func (ac *accessorContext) Value(key any) any {
   130  	switch key {
   131  	case vfs.CtxMountNamespace:
   132  		ac.a.mntns.IncRef()
   133  		return ac.a.mntns
   134  	case vfs.CtxRoot:
   135  		ac.a.root.IncRef()
   136  		return ac.a.root
   137  	default:
   138  		return ac.Context.Value(key)
   139  	}
   140  }
   141  
   142  func (a *Accessor) pathOperationAt(pathname string) *vfs.PathOperation {
   143  	return &vfs.PathOperation{
   144  		Root:  a.root,
   145  		Start: a.root,
   146  		Path:  fspath.Parse(pathname),
   147  	}
   148  }
   149  
   150  // CreateDeviceFile creates a device special file at the given pathname in the
   151  // devtmpfs instance accessed by the Accessor.
   152  func (a *Accessor) CreateDeviceFile(ctx context.Context, pathname string, kind vfs.DeviceKind, major, minor uint32, perms uint16) error {
   153  	actx := a.wrapContext(ctx)
   154  
   155  	mode := (linux.FileMode)(perms)
   156  	switch kind {
   157  	case vfs.BlockDevice:
   158  		mode |= linux.S_IFBLK
   159  	case vfs.CharDevice:
   160  		mode |= linux.S_IFCHR
   161  	default:
   162  		panic(fmt.Sprintf("invalid vfs.DeviceKind: %v", kind))
   163  	}
   164  
   165  	// Create any parent directories. See
   166  	// devtmpfs.c:handle_create()=>path_create().
   167  	parent := path.Dir(pathname)
   168  	if err := a.vfsObj.MkdirAllAt(ctx, parent, a.root, a.creds, &vfs.MkdirOptions{
   169  		Mode: 0755,
   170  	}, true /* mustBeDir */); err != nil {
   171  		return fmt.Errorf("failed to create device parent directory %q: %v", parent, err)
   172  	}
   173  
   174  	// NOTE: Linux's devtmpfs refuses to automatically delete files it didn't
   175  	// create, which it recognizes by storing a pointer to the kdevtmpfs struct
   176  	// thread in struct inode::i_private. Accessor doesn't yet support deletion
   177  	// of files at all, and probably won't as long as we don't need to support
   178  	// kernel modules, so this is moot for now.
   179  	return a.vfsObj.MknodAt(actx, a.creds, a.pathOperationAt(pathname), &vfs.MknodOptions{
   180  		Mode:     mode,
   181  		DevMajor: major,
   182  		DevMinor: minor,
   183  	})
   184  }
   185  
   186  // UserspaceInit creates symbolic links and mount points in the devtmpfs
   187  // instance accessed by the Accessor that are created by userspace in Linux. It
   188  // does not create mounts.
   189  func (a *Accessor) UserspaceInit(ctx context.Context) error {
   190  	actx := a.wrapContext(ctx)
   191  
   192  	// Initialize symlinks.
   193  	for _, symlink := range []struct {
   194  		source string
   195  		target string
   196  	}{
   197  		// systemd: src/shared/dev-setup.c:dev_setup()
   198  		{source: "fd", target: "/proc/self/fd"},
   199  		{source: "stdin", target: "/proc/self/fd/0"},
   200  		{source: "stdout", target: "/proc/self/fd/1"},
   201  		{source: "stderr", target: "/proc/self/fd/2"},
   202  		// /proc/kcore is not implemented.
   203  
   204  		// Linux implements /dev/ptmx as a device node, but advises
   205  		// container implementations to create /dev/ptmx as a symlink
   206  		// to pts/ptmx (Documentation/filesystems/devpts.txt). Systemd
   207  		// follows this advice (src/nspawn/nspawn.c:setup_pts()), while
   208  		// LXC tries to create a bind mount and falls back to a symlink
   209  		// (src/lxc/conf.c:lxc_setup_devpts()).
   210  		{source: "ptmx", target: "pts/ptmx"},
   211  	} {
   212  		if err := a.vfsObj.SymlinkAt(actx, a.creds, a.pathOperationAt(symlink.source), symlink.target); err != nil {
   213  			return fmt.Errorf("failed to create symlink %q => %q: %v", symlink.source, symlink.target, err)
   214  		}
   215  	}
   216  
   217  	// systemd: src/core/mount-setup.c:mount_table
   218  	for _, dir := range []string{
   219  		"shm",
   220  		"pts",
   221  	} {
   222  		if err := a.vfsObj.MkdirAt(actx, a.creds, a.pathOperationAt(dir), &vfs.MkdirOptions{
   223  			// systemd: src/core/mount-setup.c:mount_one()
   224  			Mode: 0755,
   225  		}); err != nil {
   226  			return fmt.Errorf("failed to create directory %q: %v", dir, err)
   227  		}
   228  	}
   229  
   230  	return nil
   231  }