github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/fsimpl/devpts/devpts.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package devpts provides a filesystem implementation that behaves like
    16  // devpts.
    17  package devpts
    18  
    19  import (
    20  	"fmt"
    21  	"math"
    22  	"sort"
    23  	"strconv"
    24  	"sync"
    25  
    26  	"github.com/metacubex/gvisor/pkg/abi/linux"
    27  	"github.com/metacubex/gvisor/pkg/context"
    28  	"github.com/metacubex/gvisor/pkg/errors/linuxerr"
    29  	"github.com/metacubex/gvisor/pkg/sentry/fsimpl/kernfs"
    30  	"github.com/metacubex/gvisor/pkg/sentry/kernel/auth"
    31  	"github.com/metacubex/gvisor/pkg/sentry/vfs"
    32  )
    33  
    34  // Name is the filesystem name.
    35  const Name = "devpts"
    36  
    37  // FilesystemType implements vfs.FilesystemType.
    38  //
    39  // +stateify savable
    40  type FilesystemType struct {
    41  	initOnce sync.Once `state:"nosave"` // FIXME(gvisor.dev/issue/1663): not yet supported.
    42  	initErr  error
    43  
    44  	// fs backs all mounts of this FilesystemType. root is fs' root. fs and root
    45  	// are immutable.
    46  	fs   *vfs.Filesystem
    47  	root *vfs.Dentry
    48  }
    49  
    50  type fileSystemOpts struct {
    51  	mode     linux.FileMode
    52  	ptmxMode linux.FileMode
    53  	uid      auth.KUID
    54  	gid      auth.KGID
    55  }
    56  
    57  // Name implements vfs.FilesystemType.Name.
    58  func (*FilesystemType) Name() string {
    59  	return Name
    60  }
    61  
    62  // GetFilesystem implements vfs.FilesystemType.GetFilesystem.
    63  func (fstype *FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
    64  	mopts := vfs.GenericParseMountOptions(opts.Data)
    65  	fsOpts := fileSystemOpts{
    66  		mode:     0555,
    67  		ptmxMode: 0666,
    68  		uid:      creds.EffectiveKUID,
    69  		gid:      creds.EffectiveKGID,
    70  	}
    71  	if modeStr, ok := mopts["mode"]; ok {
    72  		delete(mopts, "mode")
    73  		mode, err := strconv.ParseUint(modeStr, 8, 32)
    74  		if err != nil {
    75  			ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: invalid mode: %q", modeStr)
    76  			return nil, nil, linuxerr.EINVAL
    77  		}
    78  		fsOpts.mode = linux.FileMode(mode & 0777)
    79  	}
    80  	if modeStr, ok := mopts["ptmxmode"]; ok {
    81  		delete(mopts, "ptmxmode")
    82  		mode, err := strconv.ParseUint(modeStr, 8, 32)
    83  		if err != nil {
    84  			ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: invalid ptmxmode: %q", modeStr)
    85  			return nil, nil, linuxerr.EINVAL
    86  		}
    87  		fsOpts.ptmxMode = linux.FileMode(mode & 0777)
    88  	}
    89  	if uidStr, ok := mopts["uid"]; ok {
    90  		delete(mopts, "uid")
    91  		uid, err := strconv.ParseUint(uidStr, 10, 32)
    92  		if err != nil {
    93  			ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: invalid uid: %q", uidStr)
    94  			return nil, nil, linuxerr.EINVAL
    95  		}
    96  		kuid := creds.UserNamespace.MapToKUID(auth.UID(uid))
    97  		if !kuid.Ok() {
    98  			ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: unmapped uid: %d", uid)
    99  			return nil, nil, linuxerr.EINVAL
   100  		}
   101  		fsOpts.uid = kuid
   102  	}
   103  	if gidStr, ok := mopts["gid"]; ok {
   104  		delete(mopts, "gid")
   105  		gid, err := strconv.ParseUint(gidStr, 10, 32)
   106  		if err != nil {
   107  			ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: invalid gid: %q", gidStr)
   108  			return nil, nil, linuxerr.EINVAL
   109  		}
   110  		kgid := creds.UserNamespace.MapToKGID(auth.GID(gid))
   111  		if !kgid.Ok() {
   112  			ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: unmapped gid: %d", gid)
   113  			return nil, nil, linuxerr.EINVAL
   114  		}
   115  		fsOpts.gid = kgid
   116  	}
   117  	newinstance := false
   118  	if _, ok := mopts["newinstance"]; ok {
   119  		newinstance = true
   120  		delete(mopts, "newinstance")
   121  	}
   122  	if len(mopts) != 0 {
   123  		ctx.Warningf("devpts.FilesystemType.GetFilesystem: unknown options: %v", mopts)
   124  		return nil, nil, linuxerr.EINVAL
   125  	}
   126  
   127  	if newinstance {
   128  		fs, root, err := fstype.newFilesystem(ctx, vfsObj, creds, fsOpts)
   129  		if err != nil {
   130  			return nil, nil, err
   131  		}
   132  		return fs.VFSFilesystem(), root.VFSDentry(), nil
   133  	}
   134  
   135  	fstype.initOnce.Do(func() {
   136  		fs, root, err := fstype.newFilesystem(ctx, vfsObj, creds, fsOpts)
   137  		if err != nil {
   138  			fstype.initErr = err
   139  			return
   140  		}
   141  		fstype.fs = fs.VFSFilesystem()
   142  		fstype.root = root.VFSDentry()
   143  	})
   144  	if fstype.initErr != nil {
   145  		return nil, nil, fstype.initErr
   146  	}
   147  	fstype.fs.IncRef()
   148  	fstype.root.IncRef()
   149  	return fstype.fs, fstype.root, nil
   150  }
   151  
   152  // Release implements vfs.FilesystemType.Release.
   153  func (fstype *FilesystemType) Release(ctx context.Context) {
   154  	if fstype.fs != nil {
   155  		fstype.root.DecRef(ctx)
   156  		fstype.fs.DecRef(ctx)
   157  	}
   158  }
   159  
   160  // +stateify savable
   161  type filesystem struct {
   162  	kernfs.Filesystem
   163  
   164  	devMinor uint32
   165  }
   166  
   167  // newFilesystem creates a new devpts filesystem with root directory and ptmx
   168  // master inode. It returns the filesystem and root Dentry.
   169  func (fstype *FilesystemType) newFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, opts fileSystemOpts) (*filesystem, *kernfs.Dentry, error) {
   170  	devMinor, err := vfsObj.GetAnonBlockDevMinor()
   171  	if err != nil {
   172  		return nil, nil, err
   173  	}
   174  
   175  	fs := &filesystem{
   176  		devMinor: devMinor,
   177  	}
   178  	fs.Filesystem.VFSFilesystem().Init(vfsObj, fstype, fs)
   179  
   180  	// Construct the root directory. This is always inode id 1.
   181  	root := &rootInode{
   182  		replicas: make(map[uint32]*replicaInode),
   183  	}
   184  	root.InodeAttrs.InitWithIDs(ctx, opts.uid, opts.gid, linux.UNNAMED_MAJOR, devMinor, 1, linux.ModeDirectory|opts.mode)
   185  	root.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
   186  	root.InitRefs()
   187  
   188  	var rootD kernfs.Dentry
   189  	rootD.InitRoot(&fs.Filesystem, root)
   190  
   191  	// Construct the pts master inode and dentry. Linux always uses inode
   192  	// id 2 for ptmx. See fs/devpts/inode.c:mknod_ptmx.
   193  	master := &masterInode{
   194  		root: root,
   195  	}
   196  	master.InodeAttrs.InitWithIDs(ctx, opts.uid, opts.gid, linux.UNNAMED_MAJOR, devMinor, 2, linux.ModeCharacterDevice|opts.ptmxMode)
   197  
   198  	// Add the master as a child of the root.
   199  	links := root.OrderedChildren.Populate(map[string]kernfs.Inode{
   200  		"ptmx": master,
   201  	})
   202  	root.IncLinks(links)
   203  
   204  	return fs, &rootD, nil
   205  }
   206  
   207  // Release implements vfs.FilesystemImpl.Release.
   208  func (fs *filesystem) Release(ctx context.Context) {
   209  	fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
   210  	fs.Filesystem.Release(ctx)
   211  }
   212  
   213  // MountOptions implements vfs.FilesystemImpl.MountOptions.
   214  func (fs *filesystem) MountOptions() string {
   215  	return ""
   216  }
   217  
   218  // rootInode is the root directory inode for the devpts mounts.
   219  //
   220  // +stateify savable
   221  type rootInode struct {
   222  	implStatFS
   223  	kernfs.InodeAlwaysValid
   224  	kernfs.InodeAttrs
   225  	kernfs.InodeDirectoryNoNewChildren
   226  	kernfs.InodeNotAnonymous
   227  	kernfs.InodeNotSymlink
   228  	kernfs.InodeTemporary // This holds no meaning as this inode can't be Looked up and is always valid.
   229  	kernfs.InodeWatches
   230  	kernfs.OrderedChildren
   231  	rootInodeRefs
   232  
   233  	locks vfs.FileLocks
   234  
   235  	// master is the master pty inode. Immutable.
   236  	master *masterInode
   237  
   238  	// mu protects the fields below.
   239  	mu sync.Mutex `state:"nosave"`
   240  
   241  	// replicas maps pty ids to replica inodes.
   242  	replicas map[uint32]*replicaInode
   243  
   244  	// nextIdx is the next pty index to use. Must be accessed atomically.
   245  	//
   246  	// TODO(b/29356795): reuse indices when ptys are closed.
   247  	nextIdx uint32
   248  }
   249  
   250  var _ kernfs.Inode = (*rootInode)(nil)
   251  
   252  // allocateTerminal creates a new Terminal and installs a pts node for it.
   253  func (i *rootInode) allocateTerminal(ctx context.Context, creds *auth.Credentials) (*Terminal, error) {
   254  	i.mu.Lock()
   255  	defer i.mu.Unlock()
   256  	if i.nextIdx == math.MaxUint32 {
   257  		return nil, linuxerr.ENOMEM
   258  	}
   259  	idx := i.nextIdx
   260  	i.nextIdx++
   261  
   262  	// Sanity check that replica with idx does not exist.
   263  	if _, ok := i.replicas[idx]; ok {
   264  		panic(fmt.Sprintf("pty index collision; index %d already exists", idx))
   265  	}
   266  
   267  	// Create the new terminal and replica.
   268  	t := newTerminal(idx)
   269  	replica := &replicaInode{
   270  		root: i,
   271  		t:    t,
   272  	}
   273  	// Linux always uses pty index + 3 as the inode id. See
   274  	// fs/devpts/inode.c:devpts_pty_new().
   275  	replica.InodeAttrs.Init(ctx, creds, i.InodeAttrs.DevMajor(), i.InodeAttrs.DevMinor(), uint64(idx+3), linux.ModeCharacterDevice|0600)
   276  	i.replicas[idx] = replica
   277  
   278  	return t, nil
   279  }
   280  
   281  // masterClose is called when the master end of t is closed.
   282  func (i *rootInode) masterClose(ctx context.Context, t *Terminal) {
   283  	i.mu.Lock()
   284  	defer i.mu.Unlock()
   285  
   286  	// Sanity check that replica with idx exists.
   287  	ri, ok := i.replicas[t.n]
   288  	if !ok {
   289  		panic(fmt.Sprintf("pty with index %d does not exist", t.n))
   290  	}
   291  
   292  	// Drop the ref on replica inode taken during rootInode.allocateTerminal.
   293  	ri.DecRef(ctx)
   294  	delete(i.replicas, t.n)
   295  }
   296  
   297  // Open implements kernfs.Inode.Open.
   298  func (i *rootInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
   299  	opts.Flags &= linux.O_ACCMODE | linux.O_CREAT | linux.O_EXCL | linux.O_TRUNC |
   300  		linux.O_DIRECTORY | linux.O_NOFOLLOW | linux.O_NONBLOCK | linux.O_NOCTTY
   301  	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), d, &i.OrderedChildren, &i.locks, &opts, kernfs.GenericDirectoryFDOptions{
   302  		SeekEnd: kernfs.SeekEndStaticEntries,
   303  	})
   304  	if err != nil {
   305  		return nil, err
   306  	}
   307  	return fd.VFSFileDescription(), nil
   308  }
   309  
   310  // Lookup implements kernfs.Inode.Lookup.
   311  func (i *rootInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) {
   312  	// Check if a static entry was looked up.
   313  	if d, err := i.OrderedChildren.Lookup(ctx, name); err == nil {
   314  		return d, nil
   315  	}
   316  
   317  	// Not a static entry.
   318  	idx, err := strconv.ParseUint(name, 10, 32)
   319  	if err != nil {
   320  		return nil, linuxerr.ENOENT
   321  	}
   322  	i.mu.Lock()
   323  	defer i.mu.Unlock()
   324  	if ri, ok := i.replicas[uint32(idx)]; ok {
   325  		ri.IncRef() // This ref is passed to the dentry upon creation via Init.
   326  		return ri, nil
   327  
   328  	}
   329  	return nil, linuxerr.ENOENT
   330  }
   331  
   332  // IterDirents implements kernfs.Inode.IterDirents.
   333  func (i *rootInode) IterDirents(ctx context.Context, mnt *vfs.Mount, cb vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) {
   334  	i.mu.Lock()
   335  	defer i.mu.Unlock()
   336  	i.InodeAttrs.TouchAtime(ctx, mnt)
   337  	if relOffset >= int64(len(i.replicas)) {
   338  		return offset, nil
   339  	}
   340  	ids := make([]int, 0, len(i.replicas))
   341  	for id := range i.replicas {
   342  		ids = append(ids, int(id))
   343  	}
   344  	sort.Ints(ids)
   345  	for _, id := range ids[relOffset:] {
   346  		dirent := vfs.Dirent{
   347  			Name:    strconv.FormatUint(uint64(id), 10),
   348  			Type:    linux.DT_CHR,
   349  			Ino:     i.replicas[uint32(id)].InodeAttrs.Ino(),
   350  			NextOff: offset + 1,
   351  		}
   352  		if err := cb.Handle(dirent); err != nil {
   353  			return offset, err
   354  		}
   355  		offset++
   356  	}
   357  	return offset, nil
   358  }
   359  
   360  // DecRef implements kernfs.Inode.DecRef.
   361  func (i *rootInode) DecRef(ctx context.Context) {
   362  	i.rootInodeRefs.DecRef(func() { i.Destroy(ctx) })
   363  }
   364  
   365  // +stateify savable
   366  type implStatFS struct{}
   367  
   368  // StatFS implements kernfs.Inode.StatFS.
   369  func (*implStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error) {
   370  	return vfs.GenericStatFS(linux.DEVPTS_SUPER_MAGIC), nil
   371  }