github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package devtmpfs provides an implementation of /dev based on tmpfs,
    16  // analogous to Linux's devtmpfs.
    17  package devtmpfs
    18  
    19  import (
    20  	"fmt"
    21  	"path"
    22  
    23  	"github.com/nicocha30/gvisor-ligolo/pkg/abi/linux"
    24  	"github.com/nicocha30/gvisor-ligolo/pkg/context"
    25  	"github.com/nicocha30/gvisor-ligolo/pkg/fspath"
    26  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsimpl/tmpfs"
    27  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel/auth"
    28  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/vfs"
    29  	"github.com/nicocha30/gvisor-ligolo/pkg/sync"
    30  )
    31  
    32  // Name is the default filesystem name.
    33  const Name = "devtmpfs"
    34  
    35  // FilesystemType implements vfs.FilesystemType.
    36  //
    37  // +stateify savable
    38  type FilesystemType struct {
    39  	initOnce sync.Once `state:"nosave"` // FIXME(gvisor.dev/issue/1663): not yet supported.
    40  	initErr  error
    41  
    42  	// fs is the tmpfs filesystem that backs all mounts of this FilesystemType.
    43  	// root is fs' root. fs and root are immutable.
    44  	fs   *vfs.Filesystem
    45  	root *vfs.Dentry
    46  }
    47  
    48  // Name implements vfs.FilesystemType.Name.
    49  func (*FilesystemType) Name() string {
    50  	return Name
    51  }
    52  
    53  // GetFilesystem implements vfs.FilesystemType.GetFilesystem.
    54  func (fst *FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
    55  	fst.initOnce.Do(func() {
    56  		fs, root, err := tmpfs.FilesystemType{}.GetFilesystem(ctx, vfsObj, creds, "" /* source */, vfs.GetFilesystemOptions{
    57  			Data: "mode=0755", // opts from drivers/base/devtmpfs.c:devtmpfs_init()
    58  		})
    59  		if err != nil {
    60  			fst.initErr = err
    61  			return
    62  		}
    63  		fst.fs = fs
    64  		fst.root = root
    65  	})
    66  	if fst.initErr != nil {
    67  		return nil, nil, fst.initErr
    68  	}
    69  	fst.fs.IncRef()
    70  	fst.root.IncRef()
    71  	return fst.fs, fst.root, nil
    72  }
    73  
    74  // Release implements vfs.FilesystemType.Release.
    75  func (fst *FilesystemType) Release(ctx context.Context) {
    76  	if fst.fs != nil {
    77  		// Release the original reference obtained when creating the filesystem.
    78  		fst.root.DecRef(ctx)
    79  		fst.fs.DecRef(ctx)
    80  	}
    81  }
    82  
    83  // Accessor allows devices to create device special files in devtmpfs.
    84  type Accessor struct {
    85  	vfsObj *vfs.VirtualFilesystem
    86  	mntns  *vfs.MountNamespace
    87  	root   vfs.VirtualDentry
    88  	creds  *auth.Credentials
    89  }
    90  
    91  // NewAccessor returns an Accessor that supports creation of device special
    92  // files in the devtmpfs instance registered with name fsTypeName in vfsObj.
    93  func NewAccessor(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, fsTypeName string) (*Accessor, error) {
    94  	mntns, err := vfsObj.NewMountNamespace(ctx, creds, "devtmpfs" /* source */, fsTypeName, &vfs.MountOptions{})
    95  	if err != nil {
    96  		return nil, err
    97  	}
    98  	// Pass a reference on root to the Accessor.
    99  	root := mntns.Root()
   100  	root.IncRef()
   101  	return &Accessor{
   102  		vfsObj: vfsObj,
   103  		mntns:  mntns,
   104  		root:   root,
   105  		creds:  creds,
   106  	}, nil
   107  }
   108  
   109  // Release must be called when a is no longer in use.
   110  func (a *Accessor) Release(ctx context.Context) {
   111  	a.root.DecRef(ctx)
   112  	a.mntns.DecRef(ctx)
   113  }
   114  
   115  // accessorContext implements context.Context by extending an existing
   116  // context.Context with an Accessor's values for VFS-relevant state.
   117  type accessorContext struct {
   118  	context.Context
   119  	a *Accessor
   120  }
   121  
   122  func (a *Accessor) wrapContext(ctx context.Context) *accessorContext {
   123  	return &accessorContext{
   124  		Context: ctx,
   125  		a:       a,
   126  	}
   127  }
   128  
   129  // Value implements context.Context.Value.
   130  func (ac *accessorContext) Value(key any) any {
   131  	switch key {
   132  	case vfs.CtxMountNamespace:
   133  		ac.a.mntns.IncRef()
   134  		return ac.a.mntns
   135  	case vfs.CtxRoot:
   136  		ac.a.root.IncRef()
   137  		return ac.a.root
   138  	default:
   139  		return ac.Context.Value(key)
   140  	}
   141  }
   142  
   143  func (a *Accessor) pathOperationAt(pathname string) *vfs.PathOperation {
   144  	return &vfs.PathOperation{
   145  		Root:  a.root,
   146  		Start: a.root,
   147  		Path:  fspath.Parse(pathname),
   148  	}
   149  }
   150  
   151  // CreateDeviceFile creates a device special file at the given pathname in the
   152  // devtmpfs instance accessed by the Accessor.
   153  func (a *Accessor) CreateDeviceFile(ctx context.Context, pathname string, kind vfs.DeviceKind, major, minor uint32, perms uint16) error {
   154  	actx := a.wrapContext(ctx)
   155  
   156  	mode := (linux.FileMode)(perms)
   157  	switch kind {
   158  	case vfs.BlockDevice:
   159  		mode |= linux.S_IFBLK
   160  	case vfs.CharDevice:
   161  		mode |= linux.S_IFCHR
   162  	default:
   163  		panic(fmt.Sprintf("invalid vfs.DeviceKind: %v", kind))
   164  	}
   165  
   166  	// Create any parent directories. See
   167  	// devtmpfs.c:handle_create()=>path_create().
   168  	parent := path.Dir(pathname)
   169  	if err := a.vfsObj.MkdirAllAt(ctx, parent, a.root, a.creds, &vfs.MkdirOptions{
   170  		Mode: 0755,
   171  	}, true /* mustBeDir */); err != nil {
   172  		return fmt.Errorf("failed to create device parent directory %q: %v", parent, err)
   173  	}
   174  
   175  	// NOTE: Linux's devtmpfs refuses to automatically delete files it didn't
   176  	// create, which it recognizes by storing a pointer to the kdevtmpfs struct
   177  	// thread in struct inode::i_private. Accessor doesn't yet support deletion
   178  	// of files at all, and probably won't as long as we don't need to support
   179  	// kernel modules, so this is moot for now.
   180  	return a.vfsObj.MknodAt(actx, a.creds, a.pathOperationAt(pathname), &vfs.MknodOptions{
   181  		Mode:     mode,
   182  		DevMajor: major,
   183  		DevMinor: minor,
   184  	})
   185  }
   186  
   187  // UserspaceInit creates symbolic links and mount points in the devtmpfs
   188  // instance accessed by the Accessor that are created by userspace in Linux. It
   189  // does not create mounts.
   190  func (a *Accessor) UserspaceInit(ctx context.Context) error {
   191  	actx := a.wrapContext(ctx)
   192  
   193  	// Initialize symlinks.
   194  	for _, symlink := range []struct {
   195  		source string
   196  		target string
   197  	}{
   198  		// systemd: src/shared/dev-setup.c:dev_setup()
   199  		{source: "fd", target: "/proc/self/fd"},
   200  		{source: "stdin", target: "/proc/self/fd/0"},
   201  		{source: "stdout", target: "/proc/self/fd/1"},
   202  		{source: "stderr", target: "/proc/self/fd/2"},
   203  		// /proc/kcore is not implemented.
   204  
   205  		// Linux implements /dev/ptmx as a device node, but advises
   206  		// container implementations to create /dev/ptmx as a symlink
   207  		// to pts/ptmx (Documentation/filesystems/devpts.txt). Systemd
   208  		// follows this advice (src/nspawn/nspawn.c:setup_pts()), while
   209  		// LXC tries to create a bind mount and falls back to a symlink
   210  		// (src/lxc/conf.c:lxc_setup_devpts()).
   211  		{source: "ptmx", target: "pts/ptmx"},
   212  	} {
   213  		if err := a.vfsObj.SymlinkAt(actx, a.creds, a.pathOperationAt(symlink.source), symlink.target); err != nil {
   214  			return fmt.Errorf("failed to create symlink %q => %q: %v", symlink.source, symlink.target, err)
   215  		}
   216  	}
   217  
   218  	// systemd: src/core/mount-setup.c:mount_table
   219  	for _, dir := range []string{
   220  		"shm",
   221  		"pts",
   222  	} {
   223  		if err := a.vfsObj.MkdirAt(actx, a.creds, a.pathOperationAt(dir), &vfs.MkdirOptions{
   224  			// systemd: src/core/mount-setup.c:mount_one()
   225  			Mode: 0755,
   226  		}); err != nil {
   227  			return fmt.Errorf("failed to create directory %q: %v", dir, err)
   228  		}
   229  	}
   230  
   231  	return nil
   232  }