github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/file_overlay.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fs
    16  
    17  import (
    18  	"io"
    19  
    20  	"github.com/SagerNet/gvisor/pkg/context"
    21  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    22  	"github.com/SagerNet/gvisor/pkg/refs"
    23  	"github.com/SagerNet/gvisor/pkg/sentry/arch"
    24  	"github.com/SagerNet/gvisor/pkg/sentry/memmap"
    25  	"github.com/SagerNet/gvisor/pkg/sync"
    26  	"github.com/SagerNet/gvisor/pkg/syserror"
    27  	"github.com/SagerNet/gvisor/pkg/usermem"
    28  	"github.com/SagerNet/gvisor/pkg/waiter"
    29  )
    30  
    31  // overlayFile gets a handle to a file from the upper or lower filesystem
    32  // in an overlay. The caller is responsible for calling File.DecRef on
    33  // the returned file.
    34  func overlayFile(ctx context.Context, inode *Inode, flags FileFlags) (*File, error) {
    35  	// Do a song and dance to eventually get to:
    36  	//
    37  	//   File -> single reference
    38  	//   Dirent -> single reference
    39  	//   Inode -> multiple references
    40  	//
    41  	// So that File.DecRef() -> File.destroy -> Dirent.DecRef -> Dirent.destroy,
    42  	// and both the transitory File and Dirent can be GC'ed but the Inode
    43  	// remains.
    44  
    45  	// Take another reference on the Inode.
    46  	inode.IncRef()
    47  
    48  	// Start with a single reference on the Dirent. It inherits the reference
    49  	// we just took on the Inode above.
    50  	dirent := NewTransientDirent(inode)
    51  
    52  	// Get a File. This will take another reference on the Dirent.
    53  	f, err := inode.GetFile(ctx, dirent, flags)
    54  
    55  	// Drop the extra reference on the Dirent. Now there's only one reference
    56  	// on the dirent, either owned by f (if non-nil), or the Dirent is about
    57  	// to be destroyed (if GetFile failed).
    58  	dirent.DecRef(ctx)
    59  
    60  	return f, err
    61  }
    62  
    63  // overlayFileOperations implements FileOperations for a file in an overlay.
    64  //
    65  // +stateify savable
    66  type overlayFileOperations struct {
    67  	// upperMu protects upper below. In contrast lower is stable.
    68  	upperMu sync.Mutex `state:"nosave"`
    69  
    70  	// We can't share Files in upper and lower filesystems between all Files
    71  	// in an overlay because some file systems expect to get distinct handles
    72  	// that are not consistent with each other on open(2).
    73  	//
    74  	// So we lazily acquire an upper File when the overlayEntry acquires an
    75  	// upper Inode (it might have one from the start). This synchronizes with
    76  	// copy up.
    77  	//
    78  	// If upper is non-nil and this is not a directory, then lower is ignored.
    79  	//
    80  	// For directories, upper and lower are ignored because it is always
    81  	// necessary to acquire new directory handles so that the directory cursors
    82  	// of the upper and lower Files are not exhausted.
    83  	upper *File
    84  	lower *File
    85  
    86  	// dirCursor is a directory cursor for a directory in an overlay. It is
    87  	// protected by File.mu of the owning file, which is held during
    88  	// Readdir and Seek calls.
    89  	dirCursor string
    90  }
    91  
    92  // Release implements FileOperations.Release.
    93  func (f *overlayFileOperations) Release(ctx context.Context) {
    94  	if f.upper != nil {
    95  		f.upper.DecRef(ctx)
    96  	}
    97  	if f.lower != nil {
    98  		f.lower.DecRef(ctx)
    99  	}
   100  }
   101  
   102  // EventRegister implements FileOperations.EventRegister.
   103  func (f *overlayFileOperations) EventRegister(we *waiter.Entry, mask waiter.EventMask) {
   104  	f.upperMu.Lock()
   105  	defer f.upperMu.Unlock()
   106  	if f.upper != nil {
   107  		f.upper.EventRegister(we, mask)
   108  		return
   109  	}
   110  	f.lower.EventRegister(we, mask)
   111  }
   112  
   113  // EventUnregister implements FileOperations.Unregister.
   114  func (f *overlayFileOperations) EventUnregister(we *waiter.Entry) {
   115  	f.upperMu.Lock()
   116  	defer f.upperMu.Unlock()
   117  	if f.upper != nil {
   118  		f.upper.EventUnregister(we)
   119  		return
   120  	}
   121  	f.lower.EventUnregister(we)
   122  }
   123  
   124  // Readiness implements FileOperations.Readiness.
   125  func (f *overlayFileOperations) Readiness(mask waiter.EventMask) waiter.EventMask {
   126  	f.upperMu.Lock()
   127  	defer f.upperMu.Unlock()
   128  	if f.upper != nil {
   129  		return f.upper.Readiness(mask)
   130  	}
   131  	return f.lower.Readiness(mask)
   132  }
   133  
   134  // Seek implements FileOperations.Seek.
   135  func (f *overlayFileOperations) Seek(ctx context.Context, file *File, whence SeekWhence, offset int64) (int64, error) {
   136  	f.upperMu.Lock()
   137  	defer f.upperMu.Unlock()
   138  
   139  	var seekDir bool
   140  	var n int64
   141  	if f.upper != nil {
   142  		var err error
   143  		if n, err = f.upper.FileOperations.Seek(ctx, file, whence, offset); err != nil {
   144  			return n, err
   145  		}
   146  		seekDir = IsDir(f.upper.Dirent.Inode.StableAttr)
   147  	} else {
   148  		var err error
   149  		if n, err = f.lower.FileOperations.Seek(ctx, file, whence, offset); err != nil {
   150  			return n, err
   151  		}
   152  		seekDir = IsDir(f.lower.Dirent.Inode.StableAttr)
   153  	}
   154  
   155  	// If this was a seek on a directory, we must update the cursor.
   156  	if seekDir && whence == SeekSet && offset == 0 {
   157  		// Currently only seeking to 0 on a directory is supported.
   158  		// FIXME(b/33075855): Lift directory seeking limitations.
   159  		f.dirCursor = ""
   160  	}
   161  	return n, nil
   162  }
   163  
   164  // Readdir implements FileOperations.Readdir.
   165  func (f *overlayFileOperations) Readdir(ctx context.Context, file *File, serializer DentrySerializer) (int64, error) {
   166  	root := RootFromContext(ctx)
   167  	if root != nil {
   168  		defer root.DecRef(ctx)
   169  	}
   170  
   171  	dirCtx := &DirCtx{
   172  		Serializer: serializer,
   173  		DirCursor:  &f.dirCursor,
   174  	}
   175  	return DirentReaddir(ctx, file.Dirent, f, root, dirCtx, file.Offset())
   176  }
   177  
   178  // IterateDir implements DirIterator.IterateDir.
   179  func (f *overlayFileOperations) IterateDir(ctx context.Context, d *Dirent, dirCtx *DirCtx, offset int) (int, error) {
   180  	o := d.Inode.overlay
   181  	o.copyMu.RLock()
   182  	defer o.copyMu.RUnlock()
   183  	return overlayIterateDirLocked(ctx, o, d, dirCtx, offset)
   184  }
   185  
   186  // Preconditions: o.copyMu must be locked.
   187  func overlayIterateDirLocked(ctx context.Context, o *overlayEntry, d *Dirent, dirCtx *DirCtx, offset int) (int, error) {
   188  	if !d.Inode.MountSource.CacheReaddir() {
   189  		// Can't use the dirCache. Simply read the entries.
   190  		entries, err := readdirEntriesLocked(ctx, o)
   191  		if err != nil {
   192  			return offset, err
   193  		}
   194  		n, err := GenericReaddir(dirCtx, entries)
   195  		return offset + n, err
   196  	}
   197  
   198  	// Otherwise, use or create cached entries.
   199  
   200  	o.dirCacheMu.RLock()
   201  	if o.dirCache != nil {
   202  		n, err := GenericReaddir(dirCtx, o.dirCache)
   203  		o.dirCacheMu.RUnlock()
   204  		return offset + n, err
   205  	}
   206  	o.dirCacheMu.RUnlock()
   207  
   208  	// We must hold dirCacheMu around both readdirEntries and setting
   209  	// o.dirCache to synchronize with dirCache invalidations done by
   210  	// Create, Remove, Rename.
   211  	o.dirCacheMu.Lock()
   212  
   213  	// We expect dirCache to be nil (we just checked above), but there is a
   214  	// chance that a racing call managed to just set it, in which case we
   215  	// can use that new value.
   216  	if o.dirCache == nil {
   217  		dirCache, err := readdirEntriesLocked(ctx, o)
   218  		if err != nil {
   219  			o.dirCacheMu.Unlock()
   220  			return offset, err
   221  		}
   222  		o.dirCache = dirCache
   223  	}
   224  
   225  	o.dirCacheMu.DowngradeLock()
   226  	n, err := GenericReaddir(dirCtx, o.dirCache)
   227  	o.dirCacheMu.RUnlock()
   228  
   229  	return offset + n, err
   230  }
   231  
   232  // onTop performs the given operation on the top-most available layer.
   233  func (f *overlayFileOperations) onTop(ctx context.Context, file *File, fn func(*File, FileOperations) error) error {
   234  	file.Dirent.Inode.overlay.copyMu.RLock()
   235  	defer file.Dirent.Inode.overlay.copyMu.RUnlock()
   236  
   237  	// Only lower layer is available.
   238  	if file.Dirent.Inode.overlay.upper == nil {
   239  		return fn(f.lower, f.lower.FileOperations)
   240  	}
   241  
   242  	f.upperMu.Lock()
   243  	if f.upper == nil {
   244  		upper, err := overlayFile(ctx, file.Dirent.Inode.overlay.upper, file.Flags())
   245  		if err != nil {
   246  			// Something very wrong; return a generic filesystem
   247  			// error to avoid propagating internals.
   248  			f.upperMu.Unlock()
   249  			return syserror.EIO
   250  		}
   251  
   252  		// Save upper file.
   253  		f.upper = upper
   254  	}
   255  	f.upperMu.Unlock()
   256  
   257  	return fn(f.upper, f.upper.FileOperations)
   258  }
   259  
   260  // Read implements FileOperations.Read.
   261  func (f *overlayFileOperations) Read(ctx context.Context, file *File, dst usermem.IOSequence, offset int64) (n int64, err error) {
   262  	err = f.onTop(ctx, file, func(file *File, ops FileOperations) error {
   263  		n, err = ops.Read(ctx, file, dst, offset)
   264  		return err // Will overwrite itself.
   265  	})
   266  	return
   267  }
   268  
   269  // WriteTo implements FileOperations.WriteTo.
   270  func (f *overlayFileOperations) WriteTo(ctx context.Context, file *File, dst io.Writer, count int64, dup bool) (n int64, err error) {
   271  	err = f.onTop(ctx, file, func(file *File, ops FileOperations) error {
   272  		n, err = ops.WriteTo(ctx, file, dst, count, dup)
   273  		return err // Will overwrite itself.
   274  	})
   275  	return
   276  }
   277  
   278  // Write implements FileOperations.Write.
   279  func (f *overlayFileOperations) Write(ctx context.Context, file *File, src usermem.IOSequence, offset int64) (int64, error) {
   280  	// f.upper must be non-nil. See inode_overlay.go:overlayGetFile, where the
   281  	// file is copied up and opened in the upper filesystem if FileFlags.Write.
   282  	// Write cannot be called if !FileFlags.Write, see FileOperations.Write.
   283  	return f.upper.FileOperations.Write(ctx, f.upper, src, offset)
   284  }
   285  
   286  // ReadFrom implements FileOperations.ReadFrom.
   287  func (f *overlayFileOperations) ReadFrom(ctx context.Context, file *File, src io.Reader, count int64) (n int64, err error) {
   288  	// See above; f.upper must be non-nil.
   289  	return f.upper.FileOperations.ReadFrom(ctx, f.upper, src, count)
   290  }
   291  
   292  // Fsync implements FileOperations.Fsync.
   293  func (f *overlayFileOperations) Fsync(ctx context.Context, file *File, start, end int64, syncType SyncType) (err error) {
   294  	f.upperMu.Lock()
   295  	if f.upper != nil {
   296  		err = f.upper.FileOperations.Fsync(ctx, f.upper, start, end, syncType)
   297  	}
   298  	f.upperMu.Unlock()
   299  	if err == nil && f.lower != nil {
   300  		// N.B. Fsync on the lower filesystem can cause writes of file
   301  		// attributes (i.e. access time) despite the fact that we must
   302  		// treat the lower filesystem as read-only.
   303  		//
   304  		// This matches the semantics of fsync(2) in Linux overlayfs.
   305  		err = f.lower.FileOperations.Fsync(ctx, f.lower, start, end, syncType)
   306  	}
   307  	return err
   308  }
   309  
   310  // Flush implements FileOperations.Flush.
   311  func (f *overlayFileOperations) Flush(ctx context.Context, file *File) (err error) {
   312  	// Flush whatever handles we have.
   313  	f.upperMu.Lock()
   314  	if f.upper != nil {
   315  		err = f.upper.FileOperations.Flush(ctx, f.upper)
   316  	}
   317  	f.upperMu.Unlock()
   318  	if err == nil && f.lower != nil {
   319  		err = f.lower.FileOperations.Flush(ctx, f.lower)
   320  	}
   321  	return err
   322  }
   323  
   324  // ConfigureMMap implements FileOperations.ConfigureMMap.
   325  func (*overlayFileOperations) ConfigureMMap(ctx context.Context, file *File, opts *memmap.MMapOpts) error {
   326  	o := file.Dirent.Inode.overlay
   327  
   328  	o.copyMu.RLock()
   329  	defer o.copyMu.RUnlock()
   330  
   331  	// If there is no lower inode, the overlay will never need to do a
   332  	// copy-up, and thus will never need to invalidate any mappings. We can
   333  	// call ConfigureMMap directly on the upper file.
   334  	if o.lower == nil {
   335  		f := file.FileOperations.(*overlayFileOperations)
   336  		if err := f.upper.ConfigureMMap(ctx, opts); err != nil {
   337  			return err
   338  		}
   339  
   340  		// ConfigureMMap will set the MappableIdentity to the upper
   341  		// file and take a reference on it, but we must also hold a
   342  		// reference to the overlay file during the lifetime of the
   343  		// Mappable. If we do not do this, the overlay file can be
   344  		// Released before the upper file is Released, and we will be
   345  		// unable to traverse to the upper file during Save, thus
   346  		// preventing us from saving a proper inode mapping for the
   347  		// file.
   348  		file.IncRef()
   349  		id := overlayMappingIdentity{
   350  			id:          opts.MappingIdentity,
   351  			overlayFile: file,
   352  		}
   353  		id.EnableLeakCheck("fs.overlayMappingIdentity")
   354  
   355  		// Swap out the old MappingIdentity for the wrapped one.
   356  		opts.MappingIdentity = &id
   357  		return nil
   358  	}
   359  
   360  	if !o.isMappableLocked() {
   361  		return linuxerr.ENODEV
   362  	}
   363  
   364  	// FIXME(jamieliu): This is a copy/paste of fsutil.GenericConfigureMMap,
   365  	// which we can't use because the overlay implementation is in package fs,
   366  	// so depending on fs/fsutil would create a circular dependency. Move
   367  	// overlay to fs/overlay.
   368  	opts.Mappable = o
   369  	opts.MappingIdentity = file
   370  	file.IncRef()
   371  	return nil
   372  }
   373  
   374  // UnstableAttr implements fs.FileOperations.UnstableAttr.
   375  func (f *overlayFileOperations) UnstableAttr(ctx context.Context, file *File) (UnstableAttr, error) {
   376  	// Hot path. Avoid defers.
   377  	f.upperMu.Lock()
   378  	if f.upper != nil {
   379  		attr, err := f.upper.UnstableAttr(ctx)
   380  		f.upperMu.Unlock()
   381  		return attr, err
   382  	}
   383  	f.upperMu.Unlock()
   384  
   385  	// It's possible that copy-up has occurred, but we haven't opened a upper
   386  	// file yet. If this is the case, just use the upper inode's UnstableAttr
   387  	// rather than opening a file.
   388  	o := file.Dirent.Inode.overlay
   389  	o.copyMu.RLock()
   390  	if o.upper != nil {
   391  		attr, err := o.upper.UnstableAttr(ctx)
   392  		o.copyMu.RUnlock()
   393  		return attr, err
   394  	}
   395  	o.copyMu.RUnlock()
   396  
   397  	return f.lower.UnstableAttr(ctx)
   398  }
   399  
   400  // Ioctl implements fs.FileOperations.Ioctl.
   401  func (f *overlayFileOperations) Ioctl(ctx context.Context, overlayFile *File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
   402  	f.upperMu.Lock()
   403  	defer f.upperMu.Unlock()
   404  
   405  	if f.upper == nil {
   406  		// It's possible that ioctl changes the file. Since we don't know all
   407  		// possible ioctls, only allow them to propagate to the upper. Triggering a
   408  		// copy up on any ioctl would be too drastic. In the future, it can have a
   409  		// list of ioctls that are safe to send to lower and a list that triggers a
   410  		// copy up.
   411  		return 0, syserror.ENOTTY
   412  	}
   413  	return f.upper.FileOperations.Ioctl(ctx, f.upper, io, args)
   414  }
   415  
   416  // FifoSize implements FifoSizer.FifoSize.
   417  func (f *overlayFileOperations) FifoSize(ctx context.Context, overlayFile *File) (rv int64, err error) {
   418  	err = f.onTop(ctx, overlayFile, func(file *File, ops FileOperations) error {
   419  		sz, ok := ops.(FifoSizer)
   420  		if !ok {
   421  			return linuxerr.EINVAL
   422  		}
   423  		rv, err = sz.FifoSize(ctx, file)
   424  		return err
   425  	})
   426  	return
   427  }
   428  
   429  // SetFifoSize implements FifoSizer.SetFifoSize.
   430  func (f *overlayFileOperations) SetFifoSize(size int64) (rv int64, err error) {
   431  	f.upperMu.Lock()
   432  	defer f.upperMu.Unlock()
   433  
   434  	if f.upper == nil {
   435  		// Named pipes cannot be copied up and changes to the lower are prohibited.
   436  		return 0, linuxerr.EINVAL
   437  	}
   438  	sz, ok := f.upper.FileOperations.(FifoSizer)
   439  	if !ok {
   440  		return 0, linuxerr.EINVAL
   441  	}
   442  	return sz.SetFifoSize(size)
   443  }
   444  
   445  // readdirEntriesLocked returns a sorted map of directory entries from the
   446  // upper and/or lower filesystem.
   447  //
   448  // Preconditions: o.copyMu must be locked.
   449  func readdirEntriesLocked(ctx context.Context, o *overlayEntry) (*SortedDentryMap, error) {
   450  	// Assert that there is at least one upper or lower entry.
   451  	if o.upper == nil && o.lower == nil {
   452  		panic("invalid overlayEntry, needs at least one Inode")
   453  	}
   454  	entries := make(map[string]DentAttr)
   455  
   456  	// Try the upper filesystem first.
   457  	if o.upper != nil {
   458  		var err error
   459  		entries, err = readdirOne(ctx, NewTransientDirent(o.upper))
   460  		if err != nil {
   461  			return nil, err
   462  		}
   463  	}
   464  
   465  	// Try the lower filesystem next.
   466  	if o.lower != nil {
   467  		lowerEntries, err := readdirOne(ctx, NewTransientDirent(o.lower))
   468  		if err != nil {
   469  			return nil, err
   470  		}
   471  		for name, entry := range lowerEntries {
   472  			// Skip this name if it is a negative entry in the
   473  			// upper or there exists a whiteout for it.
   474  			if o.upper != nil {
   475  				if overlayHasWhiteout(ctx, o.upper, name) {
   476  					continue
   477  				}
   478  			}
   479  			// Prefer the entries from the upper filesystem
   480  			// when names overlap.
   481  			if _, ok := entries[name]; !ok {
   482  				entries[name] = entry
   483  			}
   484  		}
   485  	}
   486  
   487  	// Sort and return the entries.
   488  	return NewSortedDentryMap(entries), nil
   489  }
   490  
   491  // readdirOne reads all of the directory entries from d.
   492  func readdirOne(ctx context.Context, d *Dirent) (map[string]DentAttr, error) {
   493  	dir, err := d.Inode.GetFile(ctx, d, FileFlags{Read: true})
   494  	if err != nil {
   495  		return nil, err
   496  	}
   497  	defer dir.DecRef(ctx)
   498  
   499  	// Use a stub serializer to read the entries into memory.
   500  	stubSerializer := &CollectEntriesSerializer{}
   501  	if err := dir.Readdir(ctx, stubSerializer); err != nil {
   502  		return nil, err
   503  	}
   504  	// The "." and ".." entries are from the overlay Inode's Dirent, not the stub.
   505  	delete(stubSerializer.Entries, ".")
   506  	delete(stubSerializer.Entries, "..")
   507  	return stubSerializer.Entries, nil
   508  }
   509  
   510  // overlayMappingIdentity wraps a MappingIdentity, and also holds a reference
   511  // on a file during its lifetime.
   512  //
   513  // +stateify savable
   514  type overlayMappingIdentity struct {
   515  	refs.AtomicRefCount
   516  	id          memmap.MappingIdentity
   517  	overlayFile *File
   518  }
   519  
   520  // DecRef implements AtomicRefCount.DecRef.
   521  func (omi *overlayMappingIdentity) DecRef(ctx context.Context) {
   522  	omi.AtomicRefCount.DecRefWithDestructor(ctx, func(context.Context) {
   523  		omi.overlayFile.DecRef(ctx)
   524  		omi.id.DecRef(ctx)
   525  	})
   526  }
   527  
   528  // DeviceID implements MappingIdentity.DeviceID using the device id from the
   529  // overlayFile.
   530  func (omi *overlayMappingIdentity) DeviceID() uint64 {
   531  	return omi.overlayFile.Dirent.Inode.StableAttr.DeviceID
   532  }
   533  
   534  // DeviceID implements MappingIdentity.InodeID using the inode id from the
   535  // overlayFile.
   536  func (omi *overlayMappingIdentity) InodeID() uint64 {
   537  	return omi.overlayFile.Dirent.Inode.StableAttr.InodeID
   538  }
   539  
   540  // MappedName implements MappingIdentity.MappedName.
   541  func (omi *overlayMappingIdentity) MappedName(ctx context.Context) string {
   542  	root := RootFromContext(ctx)
   543  	if root != nil {
   544  		defer root.DecRef(ctx)
   545  	}
   546  	name, _ := omi.overlayFile.Dirent.FullName(root)
   547  	return name
   548  }
   549  
   550  // Msync implements MappingIdentity.Msync.
   551  func (omi *overlayMappingIdentity) Msync(ctx context.Context, mr memmap.MappableRange) error {
   552  	return omi.id.Msync(ctx, mr)
   553  }