github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/ramfs/dir.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package ramfs provides the fundamentals for a simple in-memory filesystem.
    16  package ramfs
    17  
    18  import (
    19  	"fmt"
    20  
    21  	"golang.org/x/sys/unix"
    22  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    23  	"github.com/SagerNet/gvisor/pkg/context"
    24  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    25  	"github.com/SagerNet/gvisor/pkg/sentry/fs"
    26  	"github.com/SagerNet/gvisor/pkg/sentry/fs/fsutil"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport"
    28  	"github.com/SagerNet/gvisor/pkg/sync"
    29  	"github.com/SagerNet/gvisor/pkg/syserror"
    30  )
    31  
    32  // CreateOps represents operations to create different file types.
    33  type CreateOps struct {
    34  	// NewDir creates a new directory.
    35  	NewDir func(ctx context.Context, dir *fs.Inode, perms fs.FilePermissions) (*fs.Inode, error)
    36  
    37  	// NewFile creates a new file.
    38  	NewFile func(ctx context.Context, dir *fs.Inode, perms fs.FilePermissions) (*fs.Inode, error)
    39  
    40  	// NewSymlink creates a new symlink with permissions 0777.
    41  	NewSymlink func(ctx context.Context, dir *fs.Inode, target string) (*fs.Inode, error)
    42  
    43  	// NewBoundEndpoint creates a new socket.
    44  	NewBoundEndpoint func(ctx context.Context, dir *fs.Inode, ep transport.BoundEndpoint, perms fs.FilePermissions) (*fs.Inode, error)
    45  
    46  	// NewFifo creates a new fifo.
    47  	NewFifo func(ctx context.Context, dir *fs.Inode, perm fs.FilePermissions) (*fs.Inode, error)
    48  }
    49  
    50  // Dir represents a single directory in the filesystem.
    51  //
    52  // +stateify savable
    53  type Dir struct {
    54  	fsutil.InodeGenericChecker `state:"nosave"`
    55  	fsutil.InodeIsDirAllocate  `state:"nosave"`
    56  	fsutil.InodeIsDirTruncate  `state:"nosave"`
    57  	fsutil.InodeNoopWriteOut   `state:"nosave"`
    58  	fsutil.InodeNotMappable    `state:"nosave"`
    59  	fsutil.InodeNotSocket      `state:"nosave"`
    60  	fsutil.InodeNotSymlink     `state:"nosave"`
    61  	fsutil.InodeVirtual        `state:"nosave"`
    62  
    63  	fsutil.InodeSimpleAttributes
    64  	fsutil.InodeSimpleExtendedAttributes
    65  
    66  	// CreateOps may be provided.
    67  	//
    68  	// These may only be modified during initialization (while the application
    69  	// is not running). No sychronization is performed when accessing these
    70  	// operations during syscalls.
    71  	*CreateOps `state:"nosave"`
    72  
    73  	// mu protects the fields below.
    74  	mu sync.Mutex `state:"nosave"`
    75  
    76  	// children are inodes that are in this directory.  A reference is held
    77  	// on each inode while it is in the map.
    78  	children map[string]*fs.Inode
    79  
    80  	// dentryMap is a sortedDentryMap containing entries for all children.
    81  	// Its entries are kept up-to-date with d.children.
    82  	dentryMap *fs.SortedDentryMap
    83  }
    84  
    85  var _ fs.InodeOperations = (*Dir)(nil)
    86  
    87  // NewDir returns a new Dir with the given contents and attributes. A reference
    88  // on each fs.Inode in the `contents` map will be donated to this Dir.
    89  func NewDir(ctx context.Context, contents map[string]*fs.Inode, owner fs.FileOwner, perms fs.FilePermissions) *Dir {
    90  	d := &Dir{
    91  		InodeSimpleAttributes: fsutil.NewInodeSimpleAttributes(ctx, owner, perms, linux.RAMFS_MAGIC),
    92  	}
    93  
    94  	if contents == nil {
    95  		contents = make(map[string]*fs.Inode)
    96  	}
    97  	d.children = contents
    98  
    99  	// Build the entries map ourselves, rather than calling addChildLocked,
   100  	// because it will be faster.
   101  	entries := make(map[string]fs.DentAttr, len(contents))
   102  	for name, inode := range contents {
   103  		entries[name] = fs.DentAttr{
   104  			Type:    inode.StableAttr.Type,
   105  			InodeID: inode.StableAttr.InodeID,
   106  		}
   107  	}
   108  	d.dentryMap = fs.NewSortedDentryMap(entries)
   109  
   110  	// Directories have an extra link, corresponding to '.'.
   111  	d.AddLink()
   112  
   113  	return d
   114  }
   115  
   116  // addChildLocked add the child inode, inheriting its reference.
   117  func (d *Dir) addChildLocked(ctx context.Context, name string, inode *fs.Inode) {
   118  	d.children[name] = inode
   119  	d.dentryMap.Add(name, fs.DentAttr{
   120  		Type:    inode.StableAttr.Type,
   121  		InodeID: inode.StableAttr.InodeID,
   122  	})
   123  
   124  	// If the child is a directory, increment this dir's link count,
   125  	// corresponding to '..' from the subdirectory.
   126  	if fs.IsDir(inode.StableAttr) {
   127  		d.AddLink()
   128  		// ctime updated below.
   129  	}
   130  
   131  	// Given we're now adding this inode to the directory we must also
   132  	// increase its link count. Similarly we decrement it in removeChildLocked.
   133  	//
   134  	// Changing link count updates ctime.
   135  	inode.AddLink()
   136  	inode.InodeOperations.NotifyStatusChange(ctx)
   137  
   138  	// We've change the directory. This always updates our mtime and ctime.
   139  	d.NotifyModificationAndStatusChange(ctx)
   140  }
   141  
   142  // AddChild adds a child to this dir, inheriting its reference.
   143  func (d *Dir) AddChild(ctx context.Context, name string, inode *fs.Inode) {
   144  	d.mu.Lock()
   145  	defer d.mu.Unlock()
   146  	d.addChildLocked(ctx, name, inode)
   147  }
   148  
   149  // FindChild returns (child, true) if the directory contains name.
   150  func (d *Dir) FindChild(name string) (*fs.Inode, bool) {
   151  	d.mu.Lock()
   152  	defer d.mu.Unlock()
   153  	child, ok := d.children[name]
   154  	return child, ok
   155  }
   156  
   157  // Children returns the names and DentAttrs of all children. It can be used to
   158  // implement Readdir for types that embed ramfs.Dir.
   159  func (d *Dir) Children() ([]string, map[string]fs.DentAttr) {
   160  	d.mu.Lock()
   161  	defer d.mu.Unlock()
   162  
   163  	// Return a copy to prevent callers from modifying our children.
   164  	names, entries := d.dentryMap.GetAll()
   165  	namesCopy := make([]string, len(names))
   166  	copy(namesCopy, names)
   167  
   168  	entriesCopy := make(map[string]fs.DentAttr)
   169  	for k, v := range entries {
   170  		entriesCopy[k] = v
   171  	}
   172  
   173  	return namesCopy, entriesCopy
   174  }
   175  
   176  // removeChildLocked attempts to remove an entry from this directory. It
   177  // returns the removed fs.Inode along with its reference, which callers are
   178  // responsible for decrementing.
   179  func (d *Dir) removeChildLocked(ctx context.Context, name string) (*fs.Inode, error) {
   180  	inode, ok := d.children[name]
   181  	if !ok {
   182  		return nil, linuxerr.EACCES
   183  	}
   184  
   185  	delete(d.children, name)
   186  	d.dentryMap.Remove(name)
   187  	d.NotifyModification(ctx)
   188  
   189  	// If the child was a subdirectory, then we must decrement this dir's
   190  	// link count which was the child's ".." directory entry.
   191  	if fs.IsDir(inode.StableAttr) {
   192  		d.DropLink()
   193  		// ctime changed below.
   194  	}
   195  
   196  	// Given we're now removing this inode to the directory we must also
   197  	// decrease its link count. Similarly it is increased in addChildLocked.
   198  	//
   199  	// Changing link count updates ctime.
   200  	inode.DropLink()
   201  	inode.InodeOperations.NotifyStatusChange(ctx)
   202  
   203  	// We've change the directory. This always updates our mtime and ctime.
   204  	d.NotifyModificationAndStatusChange(ctx)
   205  
   206  	return inode, nil
   207  }
   208  
   209  // Remove removes the named non-directory.
   210  func (d *Dir) Remove(ctx context.Context, _ *fs.Inode, name string) error {
   211  	if len(name) > linux.NAME_MAX {
   212  		return linuxerr.ENAMETOOLONG
   213  	}
   214  
   215  	d.mu.Lock()
   216  	defer d.mu.Unlock()
   217  	inode, err := d.removeChildLocked(ctx, name)
   218  	if err != nil {
   219  		return err
   220  	}
   221  
   222  	// Remove our reference on the inode.
   223  	inode.DecRef(ctx)
   224  	return nil
   225  }
   226  
   227  // RemoveDirectory removes the named directory.
   228  func (d *Dir) RemoveDirectory(ctx context.Context, _ *fs.Inode, name string) error {
   229  	if len(name) > linux.NAME_MAX {
   230  		return linuxerr.ENAMETOOLONG
   231  	}
   232  
   233  	d.mu.Lock()
   234  	defer d.mu.Unlock()
   235  
   236  	// Get the child and make sure it is not empty.
   237  	childInode, err := d.walkLocked(ctx, name)
   238  	if err != nil {
   239  		return err
   240  	}
   241  	if ok, err := hasChildren(ctx, childInode); err != nil {
   242  		return err
   243  	} else if ok {
   244  		return linuxerr.ENOTEMPTY
   245  	}
   246  
   247  	// Child was empty. Proceed with removal.
   248  	inode, err := d.removeChildLocked(ctx, name)
   249  	if err != nil {
   250  		return err
   251  	}
   252  
   253  	// Remove our reference on the inode.
   254  	inode.DecRef(ctx)
   255  
   256  	return nil
   257  }
   258  
   259  // Lookup loads an inode at p into a Dirent. It returns the fs.Dirent along
   260  // with a reference.
   261  func (d *Dir) Lookup(ctx context.Context, _ *fs.Inode, p string) (*fs.Dirent, error) {
   262  	if len(p) > linux.NAME_MAX {
   263  		return nil, linuxerr.ENAMETOOLONG
   264  	}
   265  
   266  	d.mu.Lock()
   267  	defer d.mu.Unlock()
   268  
   269  	inode, err := d.walkLocked(ctx, p)
   270  	if err != nil {
   271  		return nil, err
   272  	}
   273  
   274  	// Take a reference on the inode before returning it.  This reference
   275  	// is owned by the dirent we are about to create.
   276  	inode.IncRef()
   277  	return fs.NewDirent(ctx, inode, p), nil
   278  }
   279  
   280  // walkLocked must be called with d.mu held.
   281  func (d *Dir) walkLocked(ctx context.Context, p string) (*fs.Inode, error) {
   282  	// Lookup a child node.
   283  	if inode, ok := d.children[p]; ok {
   284  		return inode, nil
   285  	}
   286  
   287  	// fs.InodeOperations.Lookup returns syserror.ENOENT if p
   288  	// does not exist.
   289  	return nil, syserror.ENOENT
   290  }
   291  
   292  // createInodeOperationsCommon creates a new child node at this dir by calling
   293  // makeInodeOperations. It is the common logic for creating a new child.
   294  func (d *Dir) createInodeOperationsCommon(ctx context.Context, name string, makeInodeOperations func() (*fs.Inode, error)) (*fs.Inode, error) {
   295  	if len(name) > linux.NAME_MAX {
   296  		return nil, linuxerr.ENAMETOOLONG
   297  	}
   298  
   299  	d.mu.Lock()
   300  	defer d.mu.Unlock()
   301  
   302  	inode, err := makeInodeOperations()
   303  	if err != nil {
   304  		return nil, err
   305  	}
   306  
   307  	d.addChildLocked(ctx, name, inode)
   308  
   309  	return inode, nil
   310  }
   311  
   312  // Create creates a new Inode with the given name and returns its File.
   313  func (d *Dir) Create(ctx context.Context, dir *fs.Inode, name string, flags fs.FileFlags, perms fs.FilePermissions) (*fs.File, error) {
   314  	if d.CreateOps == nil || d.CreateOps.NewFile == nil {
   315  		return nil, linuxerr.EACCES
   316  	}
   317  
   318  	inode, err := d.createInodeOperationsCommon(ctx, name, func() (*fs.Inode, error) {
   319  		return d.NewFile(ctx, dir, perms)
   320  	})
   321  	if err != nil {
   322  		return nil, err
   323  	}
   324  
   325  	// Take an extra ref on inode, which will be owned by the dirent.
   326  	inode.IncRef()
   327  
   328  	// Create the Dirent and corresponding file.
   329  	created := fs.NewDirent(ctx, inode, name)
   330  	defer created.DecRef(ctx)
   331  	return created.Inode.GetFile(ctx, created, flags)
   332  }
   333  
   334  // CreateLink returns a new link.
   335  func (d *Dir) CreateLink(ctx context.Context, dir *fs.Inode, oldname, newname string) error {
   336  	if d.CreateOps == nil || d.CreateOps.NewSymlink == nil {
   337  		return linuxerr.EACCES
   338  	}
   339  	_, err := d.createInodeOperationsCommon(ctx, newname, func() (*fs.Inode, error) {
   340  		return d.NewSymlink(ctx, dir, oldname)
   341  	})
   342  	return err
   343  }
   344  
   345  // CreateHardLink creates a new hard link.
   346  func (d *Dir) CreateHardLink(ctx context.Context, dir *fs.Inode, target *fs.Inode, name string) error {
   347  	if len(name) > linux.NAME_MAX {
   348  		return linuxerr.ENAMETOOLONG
   349  	}
   350  
   351  	d.mu.Lock()
   352  	defer d.mu.Unlock()
   353  
   354  	// Take an extra reference on the inode and add it to our children.
   355  	target.IncRef()
   356  
   357  	// The link count will be incremented in addChildLocked.
   358  	d.addChildLocked(ctx, name, target)
   359  
   360  	return nil
   361  }
   362  
   363  // CreateDirectory returns a new subdirectory.
   364  func (d *Dir) CreateDirectory(ctx context.Context, dir *fs.Inode, name string, perms fs.FilePermissions) error {
   365  	if d.CreateOps == nil || d.CreateOps.NewDir == nil {
   366  		return linuxerr.EACCES
   367  	}
   368  	_, err := d.createInodeOperationsCommon(ctx, name, func() (*fs.Inode, error) {
   369  		return d.NewDir(ctx, dir, perms)
   370  	})
   371  	return err
   372  }
   373  
   374  // Bind implements fs.InodeOperations.Bind.
   375  func (d *Dir) Bind(ctx context.Context, dir *fs.Inode, name string, ep transport.BoundEndpoint, perms fs.FilePermissions) (*fs.Dirent, error) {
   376  	if d.CreateOps == nil || d.CreateOps.NewBoundEndpoint == nil {
   377  		return nil, linuxerr.EACCES
   378  	}
   379  	inode, err := d.createInodeOperationsCommon(ctx, name, func() (*fs.Inode, error) {
   380  		return d.NewBoundEndpoint(ctx, dir, ep, perms)
   381  	})
   382  	if err == unix.EEXIST {
   383  		return nil, unix.EADDRINUSE
   384  	}
   385  	if err != nil {
   386  		return nil, err
   387  	}
   388  	// Take another ref on inode which will be donated to the new dirent.
   389  	inode.IncRef()
   390  	return fs.NewDirent(ctx, inode, name), nil
   391  }
   392  
   393  // CreateFifo implements fs.InodeOperations.CreateFifo.
   394  func (d *Dir) CreateFifo(ctx context.Context, dir *fs.Inode, name string, perms fs.FilePermissions) error {
   395  	if d.CreateOps == nil || d.CreateOps.NewFifo == nil {
   396  		return linuxerr.EACCES
   397  	}
   398  	_, err := d.createInodeOperationsCommon(ctx, name, func() (*fs.Inode, error) {
   399  		return d.NewFifo(ctx, dir, perms)
   400  	})
   401  	return err
   402  }
   403  
   404  // GetFile implements fs.InodeOperations.GetFile.
   405  func (d *Dir) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
   406  	flags.Pread = true
   407  	return fs.NewFile(ctx, dirent, flags, &dirFileOperations{dir: d}), nil
   408  }
   409  
   410  // Rename implements fs.InodeOperations.Rename.
   411  func (*Dir) Rename(ctx context.Context, inode *fs.Inode, oldParent *fs.Inode, oldName string, newParent *fs.Inode, newName string, replacement bool) error {
   412  	return Rename(ctx, oldParent.InodeOperations, oldName, newParent.InodeOperations, newName, replacement)
   413  }
   414  
   415  // Release implements fs.InodeOperation.Release.
   416  func (d *Dir) Release(ctx context.Context) {
   417  	// Drop references on all children.
   418  	d.mu.Lock()
   419  	for _, i := range d.children {
   420  		i.DecRef(ctx)
   421  	}
   422  	d.mu.Unlock()
   423  }
   424  
   425  // dirFileOperations implements fs.FileOperations for a ramfs directory.
   426  //
   427  // +stateify savable
   428  type dirFileOperations struct {
   429  	fsutil.DirFileOperations        `state:"nosave"`
   430  	fsutil.FileUseInodeUnstableAttr `state:"nosave"`
   431  
   432  	// dirCursor contains the name of the last directory entry that was
   433  	// serialized.
   434  	dirCursor string
   435  
   436  	// dir is the ramfs dir that this file corresponds to.
   437  	dir *Dir
   438  }
   439  
   440  var _ fs.FileOperations = (*dirFileOperations)(nil)
   441  
   442  // Seek implements fs.FileOperations.Seek.
   443  func (dfo *dirFileOperations) Seek(ctx context.Context, file *fs.File, whence fs.SeekWhence, offset int64) (int64, error) {
   444  	return fsutil.SeekWithDirCursor(ctx, file, whence, offset, &dfo.dirCursor)
   445  }
   446  
   447  // IterateDir implements DirIterator.IterateDir.
   448  func (dfo *dirFileOperations) IterateDir(ctx context.Context, d *fs.Dirent, dirCtx *fs.DirCtx, offset int) (int, error) {
   449  	dfo.dir.mu.Lock()
   450  	defer dfo.dir.mu.Unlock()
   451  
   452  	n, err := fs.GenericReaddir(dirCtx, dfo.dir.dentryMap)
   453  	return offset + n, err
   454  }
   455  
   456  // Readdir implements FileOperations.Readdir.
   457  func (dfo *dirFileOperations) Readdir(ctx context.Context, file *fs.File, serializer fs.DentrySerializer) (int64, error) {
   458  	root := fs.RootFromContext(ctx)
   459  	if root != nil {
   460  		defer root.DecRef(ctx)
   461  	}
   462  	dirCtx := &fs.DirCtx{
   463  		Serializer: serializer,
   464  		DirCursor:  &dfo.dirCursor,
   465  	}
   466  	dfo.dir.InodeSimpleAttributes.NotifyAccess(ctx)
   467  	return fs.DirentReaddir(ctx, file.Dirent, dfo, root, dirCtx, file.Offset())
   468  }
   469  
   470  // hasChildren is a helper method that determines whether an arbitrary inode
   471  // (not necessarily ramfs) has any children.
   472  func hasChildren(ctx context.Context, inode *fs.Inode) (bool, error) {
   473  	// Take an extra ref on inode which will be given to the dirent and
   474  	// dropped when that dirent is destroyed.
   475  	inode.IncRef()
   476  	d := fs.NewTransientDirent(inode)
   477  	defer d.DecRef(ctx)
   478  
   479  	file, err := inode.GetFile(ctx, d, fs.FileFlags{Read: true})
   480  	if err != nil {
   481  		return false, err
   482  	}
   483  	defer file.DecRef(ctx)
   484  
   485  	ser := &fs.CollectEntriesSerializer{}
   486  	if err := file.Readdir(ctx, ser); err != nil {
   487  		return false, err
   488  	}
   489  	// We will always write "." and "..", so ignore those two.
   490  	if ser.Written() > 2 {
   491  		return true, nil
   492  	}
   493  	return false, nil
   494  }
   495  
   496  // Rename renames from a *ramfs.Dir to another *ramfs.Dir.
   497  func Rename(ctx context.Context, oldParent fs.InodeOperations, oldName string, newParent fs.InodeOperations, newName string, replacement bool) error {
   498  	op, ok := oldParent.(*Dir)
   499  	if !ok {
   500  		return linuxerr.EXDEV
   501  	}
   502  	np, ok := newParent.(*Dir)
   503  	if !ok {
   504  		return linuxerr.EXDEV
   505  	}
   506  	if len(newName) > linux.NAME_MAX {
   507  		return linuxerr.ENAMETOOLONG
   508  	}
   509  
   510  	np.mu.Lock()
   511  	defer np.mu.Unlock()
   512  
   513  	// Is this is an overwriting rename?
   514  	if replacement {
   515  		replaced, ok := np.children[newName]
   516  		if !ok {
   517  			panic(fmt.Sprintf("Dirent claims rename is replacement, but %q is missing from %+v", newName, np))
   518  		}
   519  
   520  		// Non-empty directories cannot be replaced.
   521  		if fs.IsDir(replaced.StableAttr) {
   522  			if ok, err := hasChildren(ctx, replaced); err != nil {
   523  				return err
   524  			} else if ok {
   525  				return linuxerr.ENOTEMPTY
   526  			}
   527  		}
   528  
   529  		// Remove the replaced child and drop our reference on it.
   530  		inode, err := np.removeChildLocked(ctx, newName)
   531  		if err != nil {
   532  			return err
   533  		}
   534  		inode.DecRef(ctx)
   535  	}
   536  
   537  	// Be careful, we may have already grabbed this mutex above.
   538  	if op != np {
   539  		op.mu.Lock()
   540  		defer op.mu.Unlock()
   541  	}
   542  
   543  	// Do the swap.
   544  	n := op.children[oldName]
   545  	op.removeChildLocked(ctx, oldName)
   546  	np.addChildLocked(ctx, newName, n)
   547  
   548  	return nil
   549  }