github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/tty/dir.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package tty provide pseudoterminals via a devpts filesystem.
    16  package tty
    17  
    18  import (
    19  	"fmt"
    20  	"math"
    21  	"strconv"
    22  
    23  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    24  	"github.com/SagerNet/gvisor/pkg/context"
    25  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    26  	"github.com/SagerNet/gvisor/pkg/hostarch"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/fs"
    28  	"github.com/SagerNet/gvisor/pkg/sentry/fs/fsutil"
    29  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/auth"
    30  	"github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport"
    31  	"github.com/SagerNet/gvisor/pkg/sync"
    32  	"github.com/SagerNet/gvisor/pkg/syserror"
    33  	"github.com/SagerNet/gvisor/pkg/usermem"
    34  	"github.com/SagerNet/gvisor/pkg/waiter"
    35  )
    36  
    37  // dirInodeOperations is the root of a devpts mount.
    38  //
    39  // This indirectly manages all terminals within the mount.
    40  //
    41  // New Terminals are created by masterInodeOperations.GetFile, which registers
    42  // the replica Inode in the this directory for discovery via Lookup/Readdir. The
    43  // replica inode is unregistered when the master file is Released, as the replica
    44  // is no longer discoverable at that point.
    45  //
    46  // References on the underlying Terminal are held by masterFileOperations and
    47  // replicaInodeOperations.
    48  //
    49  // masterInodeOperations and replicaInodeOperations hold a pointer to
    50  // dirInodeOperations, which is reference counted by the refcount their
    51  // corresponding Dirents hold on their parent (this directory).
    52  //
    53  // dirInodeOperations implements fs.InodeOperations.
    54  //
    55  // +stateify savable
    56  type dirInodeOperations struct {
    57  	fsutil.InodeGenericChecker       `state:"nosave"`
    58  	fsutil.InodeIsDirAllocate        `state:"nosave"`
    59  	fsutil.InodeIsDirTruncate        `state:"nosave"`
    60  	fsutil.InodeNoExtendedAttributes `state:"nosave"`
    61  	fsutil.InodeNoopWriteOut         `state:"nosave"`
    62  	fsutil.InodeNotMappable          `state:"nosave"`
    63  	fsutil.InodeNotRenameable        `state:"nosave"`
    64  	fsutil.InodeNotSocket            `state:"nosave"`
    65  	fsutil.InodeNotSymlink           `state:"nosave"`
    66  	fsutil.InodeVirtual              `state:"nosave"`
    67  
    68  	fsutil.InodeSimpleAttributes
    69  
    70  	// msrc is the super block this directory is on.
    71  	//
    72  	// TODO(chrisko): Plumb this through instead of storing it here.
    73  	msrc *fs.MountSource
    74  
    75  	// mu protects the fields below.
    76  	mu sync.Mutex `state:"nosave"`
    77  
    78  	// master is the master PTY inode.
    79  	master *fs.Inode
    80  
    81  	// replicas contains the replica inodes reachable from the directory.
    82  	//
    83  	// A new replica is added by allocateTerminal and is removed by
    84  	// masterFileOperations.Release.
    85  	//
    86  	// A reference is held on every replica in the map.
    87  	replicas map[uint32]*fs.Inode
    88  
    89  	// dentryMap is a SortedDentryMap used to implement Readdir containing
    90  	// the master and all entries in replicas.
    91  	dentryMap *fs.SortedDentryMap
    92  
    93  	// next is the next pty index to use.
    94  	//
    95  	// TODO(b/29356795): reuse indices when ptys are closed.
    96  	next uint32
    97  }
    98  
    99  var _ fs.InodeOperations = (*dirInodeOperations)(nil)
   100  
   101  // newDir creates a new dir with a ptmx file and no terminals.
   102  func newDir(ctx context.Context, m *fs.MountSource) *fs.Inode {
   103  	d := &dirInodeOperations{
   104  		InodeSimpleAttributes: fsutil.NewInodeSimpleAttributes(ctx, fs.RootOwner, fs.FilePermsFromMode(0555), linux.DEVPTS_SUPER_MAGIC),
   105  		msrc:                  m,
   106  		replicas:              make(map[uint32]*fs.Inode),
   107  		dentryMap:             fs.NewSortedDentryMap(nil),
   108  	}
   109  	// Linux devpts uses a default mode of 0000 for ptmx which can be
   110  	// changed with the ptmxmode mount option. However, that default is not
   111  	// useful here (since we'd *always* need the mount option, so it is
   112  	// accessible by default).
   113  	d.master = newMasterInode(ctx, d, fs.RootOwner, fs.FilePermsFromMode(0666))
   114  	d.dentryMap.Add("ptmx", fs.DentAttr{
   115  		Type:    d.master.StableAttr.Type,
   116  		InodeID: d.master.StableAttr.InodeID,
   117  	})
   118  
   119  	return fs.NewInode(ctx, d, m, fs.StableAttr{
   120  		DeviceID: ptsDevice.DeviceID(),
   121  		// N.B. Linux always uses inode id 1 for the directory. See
   122  		// fs/devpts/inode.c:devpts_fill_super.
   123  		//
   124  		// TODO(b/75267214): Since ptsDevice must be shared between
   125  		// different mounts, we must not assign fixed numbers.
   126  		InodeID:   ptsDevice.NextIno(),
   127  		BlockSize: hostarch.PageSize,
   128  		Type:      fs.Directory,
   129  	})
   130  }
   131  
   132  // Release implements fs.InodeOperations.Release.
   133  func (d *dirInodeOperations) Release(ctx context.Context) {
   134  	d.mu.Lock()
   135  	defer d.mu.Unlock()
   136  
   137  	d.master.DecRef(ctx)
   138  	if len(d.replicas) != 0 {
   139  		panic(fmt.Sprintf("devpts directory still contains active terminals: %+v", d))
   140  	}
   141  }
   142  
   143  // Lookup implements fs.InodeOperations.Lookup.
   144  func (d *dirInodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string) (*fs.Dirent, error) {
   145  	d.mu.Lock()
   146  	defer d.mu.Unlock()
   147  
   148  	// Master?
   149  	if name == "ptmx" {
   150  		d.master.IncRef()
   151  		return fs.NewDirent(ctx, d.master, name), nil
   152  	}
   153  
   154  	// Replica number?
   155  	n, err := strconv.ParseUint(name, 10, 32)
   156  	if err != nil {
   157  		// Not found.
   158  		return nil, syserror.ENOENT
   159  	}
   160  
   161  	s, ok := d.replicas[uint32(n)]
   162  	if !ok {
   163  		return nil, syserror.ENOENT
   164  	}
   165  
   166  	s.IncRef()
   167  	return fs.NewDirent(ctx, s, name), nil
   168  }
   169  
   170  // Create implements fs.InodeOperations.Create.
   171  //
   172  // Creation is never allowed.
   173  func (d *dirInodeOperations) Create(ctx context.Context, dir *fs.Inode, name string, flags fs.FileFlags, perm fs.FilePermissions) (*fs.File, error) {
   174  	return nil, linuxerr.EACCES
   175  }
   176  
   177  // CreateDirectory implements fs.InodeOperations.CreateDirectory.
   178  //
   179  // Creation is never allowed.
   180  func (d *dirInodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error {
   181  	return linuxerr.EACCES
   182  }
   183  
   184  // CreateLink implements fs.InodeOperations.CreateLink.
   185  //
   186  // Creation is never allowed.
   187  func (d *dirInodeOperations) CreateLink(ctx context.Context, dir *fs.Inode, oldname, newname string) error {
   188  	return linuxerr.EACCES
   189  }
   190  
   191  // CreateHardLink implements fs.InodeOperations.CreateHardLink.
   192  //
   193  // Creation is never allowed.
   194  func (d *dirInodeOperations) CreateHardLink(ctx context.Context, dir *fs.Inode, target *fs.Inode, name string) error {
   195  	return linuxerr.EACCES
   196  }
   197  
   198  // CreateFifo implements fs.InodeOperations.CreateFifo.
   199  //
   200  // Creation is never allowed.
   201  func (d *dirInodeOperations) CreateFifo(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error {
   202  	return linuxerr.EACCES
   203  }
   204  
   205  // Remove implements fs.InodeOperations.Remove.
   206  //
   207  // Removal is never allowed.
   208  func (d *dirInodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string) error {
   209  	return linuxerr.EPERM
   210  }
   211  
   212  // RemoveDirectory implements fs.InodeOperations.RemoveDirectory.
   213  //
   214  // Removal is never allowed.
   215  func (d *dirInodeOperations) RemoveDirectory(ctx context.Context, dir *fs.Inode, name string) error {
   216  	return linuxerr.EPERM
   217  }
   218  
   219  // Bind implements fs.InodeOperations.Bind.
   220  func (d *dirInodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, data transport.BoundEndpoint, perm fs.FilePermissions) (*fs.Dirent, error) {
   221  	return nil, linuxerr.EPERM
   222  }
   223  
   224  // GetFile implements fs.InodeOperations.GetFile.
   225  func (d *dirInodeOperations) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
   226  	return fs.NewFile(ctx, dirent, flags, &dirFileOperations{di: d}), nil
   227  }
   228  
   229  // allocateTerminal creates a new Terminal and installs a pts node for it.
   230  //
   231  // The caller must call DecRef when done with the returned Terminal.
   232  func (d *dirInodeOperations) allocateTerminal(ctx context.Context) (*Terminal, error) {
   233  	d.mu.Lock()
   234  	defer d.mu.Unlock()
   235  
   236  	n := d.next
   237  	if n == math.MaxUint32 {
   238  		return nil, syserror.ENOMEM
   239  	}
   240  
   241  	if _, ok := d.replicas[n]; ok {
   242  		panic(fmt.Sprintf("pty index collision; index %d already exists", n))
   243  	}
   244  
   245  	t := newTerminal(ctx, d, n)
   246  	d.next++
   247  
   248  	// The reference returned by newTerminal is returned to the caller.
   249  	// Take another for the replica inode.
   250  	t.IncRef()
   251  
   252  	// Create a pts node. The owner is based on the context that opens
   253  	// ptmx.
   254  	creds := auth.CredentialsFromContext(ctx)
   255  	uid, gid := creds.EffectiveKUID, creds.EffectiveKGID
   256  	replica := newReplicaInode(ctx, d, t, fs.FileOwner{uid, gid}, fs.FilePermsFromMode(0666))
   257  
   258  	d.replicas[n] = replica
   259  	d.dentryMap.Add(strconv.FormatUint(uint64(n), 10), fs.DentAttr{
   260  		Type:    replica.StableAttr.Type,
   261  		InodeID: replica.StableAttr.InodeID,
   262  	})
   263  
   264  	return t, nil
   265  }
   266  
   267  // masterClose is called when the master end of t is closed.
   268  func (d *dirInodeOperations) masterClose(ctx context.Context, t *Terminal) {
   269  	d.mu.Lock()
   270  	defer d.mu.Unlock()
   271  
   272  	// The replica end disappears from the directory when the master end is
   273  	// closed, even if the replica end is open elsewhere.
   274  	//
   275  	// N.B. since we're using a backdoor method to remove a directory entry
   276  	// we won't properly fire inotify events like Linux would.
   277  	s, ok := d.replicas[t.n]
   278  	if !ok {
   279  		panic(fmt.Sprintf("Terminal %+v doesn't exist in %+v?", t, d))
   280  	}
   281  
   282  	s.DecRef(ctx)
   283  	delete(d.replicas, t.n)
   284  	d.dentryMap.Remove(strconv.FormatUint(uint64(t.n), 10))
   285  }
   286  
   287  // dirFileOperations are the fs.FileOperations for the directory.
   288  //
   289  // This is nearly identical to fsutil.DirFileOperations, except that it takes
   290  // df.di.mu in IterateDir.
   291  //
   292  // +stateify savable
   293  type dirFileOperations struct {
   294  	fsutil.FileNoopRelease          `state:"nosave"`
   295  	fsutil.FileGenericSeek          `state:"nosave"`
   296  	fsutil.FileNoFsync              `state:"nosave"`
   297  	fsutil.FileNoopFlush            `state:"nosave"`
   298  	fsutil.FileNoMMap               `state:"nosave"`
   299  	fsutil.FileNoIoctl              `state:"nosave"`
   300  	fsutil.FileNoSplice             `state:"nosave"`
   301  	fsutil.FileUseInodeUnstableAttr `state:"nosave"`
   302  	waiter.AlwaysReady              `state:"nosave"`
   303  
   304  	// di is the inode operations.
   305  	di *dirInodeOperations
   306  
   307  	// dirCursor contains the name of the last directory entry that was
   308  	// serialized.
   309  	dirCursor string
   310  }
   311  
   312  var _ fs.FileOperations = (*dirFileOperations)(nil)
   313  
   314  // IterateDir implements DirIterator.IterateDir.
   315  func (df *dirFileOperations) IterateDir(ctx context.Context, d *fs.Dirent, dirCtx *fs.DirCtx, offset int) (int, error) {
   316  	df.di.mu.Lock()
   317  	defer df.di.mu.Unlock()
   318  
   319  	n, err := fs.GenericReaddir(dirCtx, df.di.dentryMap)
   320  	return offset + n, err
   321  }
   322  
   323  // Readdir implements FileOperations.Readdir.
   324  func (df *dirFileOperations) Readdir(ctx context.Context, file *fs.File, serializer fs.DentrySerializer) (int64, error) {
   325  	root := fs.RootFromContext(ctx)
   326  	if root != nil {
   327  		defer root.DecRef(ctx)
   328  	}
   329  	dirCtx := &fs.DirCtx{
   330  		Serializer: serializer,
   331  		DirCursor:  &df.dirCursor,
   332  	}
   333  	return fs.DirentReaddir(ctx, file.Dirent, df, root, dirCtx, file.Offset())
   334  }
   335  
   336  // Read implements FileOperations.Read
   337  func (df *dirFileOperations) Read(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) {
   338  	return 0, syserror.EISDIR
   339  }
   340  
   341  // Write implements FileOperations.Write.
   342  func (df *dirFileOperations) Write(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) {
   343  	return 0, syserror.EISDIR
   344  }