github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/overlay.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fs
    16  
    17  import (
    18  	"fmt"
    19  	"strings"
    20  
    21  	"github.com/SagerNet/gvisor/pkg/context"
    22  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    23  	"github.com/SagerNet/gvisor/pkg/hostarch"
    24  	"github.com/SagerNet/gvisor/pkg/log"
    25  	"github.com/SagerNet/gvisor/pkg/sentry/memmap"
    26  	"github.com/SagerNet/gvisor/pkg/sync"
    27  )
    28  
    29  // The virtual filesystem implements an overlay configuration. For a high-level
    30  // description, see README.md.
    31  //
    32  // Note on whiteouts:
    33  //
    34  // This implementation does not use the "Docker-style" whiteouts (symlinks with
    35  // ".wh." prefix). Instead upper filesystem directories support a set of extended
    36  // attributes to encode whiteouts: "trusted.overlay.whiteout.<filename>". This
    37  // gives flexibility to persist whiteouts independently of the filesystem layout
    38  // while additionally preventing name conflicts with files prefixed with ".wh.".
    39  //
    40  // Known deficiencies:
    41  //
    42  // - The device number of two files under the same overlay mount point may be
    43  //   different. This can happen if a file is found in the lower filesystem (takes
    44  //   the lower filesystem device) and another file is created in the upper
    45  //   filesystem (takes the upper filesystem device). This may appear odd but
    46  //   should not break applications.
    47  //
    48  // - Registered events on files (i.e. for notification of read/write readiness)
    49  //   are not copied across copy up. This is fine in the common case of files that
    50  //   do not block. For files that do block, like pipes and sockets, copy up is not
    51  //   supported.
    52  //
    53  // - Hardlinks in a lower filesystem are broken by copy up. For this reason, no
    54  //   attempt is made to preserve link count across copy up.
    55  //
    56  // - The maximum length of an extended attribute name is the same as the maximum
    57  //   length of a file path in Linux (XATTR_NAME_MAX == NAME_MAX). This means that
    58  //   whiteout attributes, if set directly on the host, are limited additionally by
    59  //   the extra whiteout prefix length (file paths must be strictly shorter than
    60  //   NAME_MAX). This is not a problem for in-memory filesystems which don't enforce
    61  //   XATTR_NAME_MAX.
    62  
    63  const (
    64  	// XattrOverlayPrefix is the prefix for extended attributes that affect
    65  	// the behavior of an overlay.
    66  	XattrOverlayPrefix = "trusted.overlay."
    67  
    68  	// XattrOverlayWhiteoutPrefix is the prefix for extended attributes
    69  	// that indicate that a whiteout exists.
    70  	XattrOverlayWhiteoutPrefix = XattrOverlayPrefix + "whiteout."
    71  )
    72  
    73  // XattrOverlayWhiteout returns an extended attribute that indicates a
    74  // whiteout exists for name. It is supported by directories that wish to
    75  // mask the existence of name.
    76  func XattrOverlayWhiteout(name string) string {
    77  	return XattrOverlayWhiteoutPrefix + name
    78  }
    79  
    80  // isXattrOverlay returns whether the given extended attribute configures the
    81  // overlay.
    82  func isXattrOverlay(name string) bool {
    83  	return strings.HasPrefix(name, XattrOverlayPrefix)
    84  }
    85  
    86  // NewOverlayRoot produces the root of an overlay.
    87  //
    88  // Preconditions:
    89  // * upper and lower must be non-nil.
    90  // * upper must not be an overlay.
    91  // * lower should not expose character devices, pipes, or sockets, because
    92  //   copying up these types of files is not supported.
    93  // * lower must not require that file objects be revalidated.
    94  // * lower must not have dynamic file/directory content.
    95  func NewOverlayRoot(ctx context.Context, upper *Inode, lower *Inode, flags MountSourceFlags) (*Inode, error) {
    96  	if !IsDir(upper.StableAttr) {
    97  		return nil, fmt.Errorf("upper Inode is a %v, not a directory", upper.StableAttr.Type)
    98  	}
    99  	if !IsDir(lower.StableAttr) {
   100  		return nil, fmt.Errorf("lower Inode is a %v, not a directory", lower.StableAttr.Type)
   101  	}
   102  	if upper.overlay != nil {
   103  		return nil, fmt.Errorf("cannot nest overlay in upper file of another overlay")
   104  	}
   105  
   106  	msrc := newOverlayMountSource(ctx, upper.MountSource, lower.MountSource, flags)
   107  	overlay, err := newOverlayEntry(ctx, upper, lower, true)
   108  	if err != nil {
   109  		msrc.DecRef(ctx)
   110  		return nil, err
   111  	}
   112  
   113  	return newOverlayInode(ctx, overlay, msrc), nil
   114  }
   115  
   116  // NewOverlayRootFile produces the root of an overlay that points to a file.
   117  //
   118  // Preconditions:
   119  // * lower must be non-nil.
   120  // * lower should not expose character devices, pipes, or sockets, because
   121  //   copying up these types of files is not supported. Neither it can be a dir.
   122  // * lower must not require that file objects be revalidated.
   123  // * lower must not have dynamic file/directory content.
   124  func NewOverlayRootFile(ctx context.Context, upperMS *MountSource, lower *Inode, flags MountSourceFlags) (*Inode, error) {
   125  	if !IsRegular(lower.StableAttr) {
   126  		return nil, fmt.Errorf("lower Inode is not a regular file")
   127  	}
   128  	msrc := newOverlayMountSource(ctx, upperMS, lower.MountSource, flags)
   129  	overlay, err := newOverlayEntry(ctx, nil, lower, true)
   130  	if err != nil {
   131  		msrc.DecRef(ctx)
   132  		return nil, err
   133  	}
   134  	return newOverlayInode(ctx, overlay, msrc), nil
   135  }
   136  
   137  // newOverlayInode creates a new Inode for an overlay.
   138  func newOverlayInode(ctx context.Context, o *overlayEntry, msrc *MountSource) *Inode {
   139  	var inode *Inode
   140  	if o.upper != nil {
   141  		inode = NewInode(ctx, nil, msrc, o.upper.StableAttr)
   142  	} else {
   143  		inode = NewInode(ctx, nil, msrc, o.lower.StableAttr)
   144  	}
   145  	inode.overlay = o
   146  	return inode
   147  }
   148  
   149  // overlayEntry is the overlay metadata of an Inode. It implements Mappable.
   150  //
   151  // +stateify savable
   152  type overlayEntry struct {
   153  	// lowerExists is true if an Inode exists for this file in the lower
   154  	// filesystem. If lowerExists is true, then the overlay must create
   155  	// a whiteout entry when renaming and removing this entry to mask the
   156  	// lower Inode.
   157  	//
   158  	// Note that this is distinct from actually holding onto a non-nil
   159  	// lower Inode (below). The overlay does not need to keep a lower Inode
   160  	// around unless it needs to operate on it, but it always needs to know
   161  	// whether the lower Inode exists to correctly execute a rename or
   162  	// remove operation.
   163  	lowerExists bool
   164  
   165  	// lower is an Inode from a lower filesystem. Modifications are
   166  	// never made on this Inode.
   167  	lower *Inode
   168  
   169  	// copyMu serializes copy-up for operations above
   170  	// mm.MemoryManager.mappingMu in the lock order.
   171  	copyMu sync.RWMutex `state:"nosave"`
   172  
   173  	// mapsMu serializes copy-up for operations between
   174  	// mm.MemoryManager.mappingMu and mm.MemoryManager.activeMu in the lock
   175  	// order.
   176  	mapsMu sync.Mutex `state:"nosave"`
   177  
   178  	// mappings tracks memory mappings of this Mappable so they can be removed
   179  	// from the lower filesystem Mappable and added to the upper filesystem
   180  	// Mappable when copy up occurs. It is strictly unnecessary after copy-up.
   181  	//
   182  	// mappings is protected by mapsMu.
   183  	mappings memmap.MappingSet
   184  
   185  	// dataMu serializes copy-up for operations below mm.MemoryManager.activeMu
   186  	// in the lock order.
   187  	dataMu sync.RWMutex `state:"nosave"`
   188  
   189  	// upper is an Inode from an upper filesystem. It is non-nil if
   190  	// the file exists in the upper filesystem. It becomes non-nil
   191  	// when the Inode that owns this overlayEntry is modified.
   192  	//
   193  	// upper is protected by all of copyMu, mapsMu, and dataMu. Holding any of
   194  	// these locks is sufficient to read upper; holding all three for writing
   195  	// is required to mutate it.
   196  	upper *Inode
   197  
   198  	// dirCacheMu protects dirCache.
   199  	dirCacheMu sync.RWMutex `state:"nosave"`
   200  
   201  	// dirCache is cache of DentAttrs from upper and lower Inodes.
   202  	dirCache *SortedDentryMap
   203  }
   204  
   205  // newOverlayEntry returns a new overlayEntry.
   206  func newOverlayEntry(ctx context.Context, upper *Inode, lower *Inode, lowerExists bool) (*overlayEntry, error) {
   207  	if upper == nil && lower == nil {
   208  		panic("invalid overlayEntry, needs at least one Inode")
   209  	}
   210  	if upper != nil && upper.overlay != nil {
   211  		panic("nested writable layers are not supported")
   212  	}
   213  	// Check for supported lower filesystem types.
   214  	if lower != nil {
   215  		switch lower.StableAttr.Type {
   216  		case RegularFile, Directory, Symlink, Socket:
   217  		default:
   218  			// We don't support copying up from character devices,
   219  			// named pipes, or anything weird (like proc files).
   220  			log.Warningf("%s not supported in lower filesytem", lower.StableAttr.Type)
   221  			return nil, linuxerr.EINVAL
   222  		}
   223  	}
   224  	return &overlayEntry{
   225  		lowerExists: lowerExists,
   226  		lower:       lower,
   227  		upper:       upper,
   228  	}, nil
   229  }
   230  
   231  func (o *overlayEntry) release(ctx context.Context) {
   232  	// We drop a reference on upper and lower file system Inodes
   233  	// rather than releasing them, because in-memory filesystems
   234  	// may hold an extra reference to these Inodes so that they
   235  	// stay in memory.
   236  	if o.upper != nil {
   237  		o.upper.DecRef(ctx)
   238  	}
   239  	if o.lower != nil {
   240  		o.lower.DecRef(ctx)
   241  	}
   242  }
   243  
   244  // overlayUpperMountSource gives the upper mount of an overlay mount.
   245  //
   246  // The caller may not use this MountSource past the lifetime of overlayMountSource and may
   247  // not call DecRef on it.
   248  func overlayUpperMountSource(overlayMountSource *MountSource) *MountSource {
   249  	return overlayMountSource.MountSourceOperations.(*overlayMountSourceOperations).upper
   250  }
   251  
   252  // Preconditions: At least one of o.copyMu, o.mapsMu, or o.dataMu must be locked.
   253  func (o *overlayEntry) inodeLocked() *Inode {
   254  	if o.upper != nil {
   255  		return o.upper
   256  	}
   257  	return o.lower
   258  }
   259  
   260  // Preconditions: At least one of o.copyMu, o.mapsMu, or o.dataMu must be locked.
   261  func (o *overlayEntry) isMappableLocked() bool {
   262  	return o.inodeLocked().Mappable() != nil
   263  }
   264  
   265  // markDirectoryDirty marks any cached data dirty for this directory. This is
   266  // necessary in order to ensure that this node does not retain stale state
   267  // throughout its lifetime across multiple open directory handles.
   268  //
   269  // Currently this means invalidating any readdir caches.
   270  func (o *overlayEntry) markDirectoryDirty() {
   271  	o.dirCacheMu.Lock()
   272  	o.dirCache = nil
   273  	o.dirCacheMu.Unlock()
   274  }
   275  
   276  // AddMapping implements memmap.Mappable.AddMapping.
   277  func (o *overlayEntry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error {
   278  	o.mapsMu.Lock()
   279  	defer o.mapsMu.Unlock()
   280  	if err := o.inodeLocked().Mappable().AddMapping(ctx, ms, ar, offset, writable); err != nil {
   281  		return err
   282  	}
   283  	o.mappings.AddMapping(ms, ar, offset, writable)
   284  	return nil
   285  }
   286  
   287  // RemoveMapping implements memmap.Mappable.RemoveMapping.
   288  func (o *overlayEntry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) {
   289  	o.mapsMu.Lock()
   290  	defer o.mapsMu.Unlock()
   291  	o.inodeLocked().Mappable().RemoveMapping(ctx, ms, ar, offset, writable)
   292  	o.mappings.RemoveMapping(ms, ar, offset, writable)
   293  }
   294  
   295  // CopyMapping implements memmap.Mappable.CopyMapping.
   296  func (o *overlayEntry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error {
   297  	o.mapsMu.Lock()
   298  	defer o.mapsMu.Unlock()
   299  	if err := o.inodeLocked().Mappable().CopyMapping(ctx, ms, srcAR, dstAR, offset, writable); err != nil {
   300  		return err
   301  	}
   302  	o.mappings.AddMapping(ms, dstAR, offset, writable)
   303  	return nil
   304  }
   305  
   306  // Translate implements memmap.Mappable.Translate.
   307  func (o *overlayEntry) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
   308  	o.dataMu.RLock()
   309  	defer o.dataMu.RUnlock()
   310  	return o.inodeLocked().Mappable().Translate(ctx, required, optional, at)
   311  }
   312  
   313  // InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
   314  func (o *overlayEntry) InvalidateUnsavable(ctx context.Context) error {
   315  	o.mapsMu.Lock()
   316  	defer o.mapsMu.Unlock()
   317  	return o.inodeLocked().Mappable().InvalidateUnsavable(ctx)
   318  }