github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/fsimpl/overlay/copy_up.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package overlay
    16  
    17  import (
    18  	"fmt"
    19  
    20  	"github.com/metacubex/gvisor/pkg/abi/linux"
    21  	"github.com/metacubex/gvisor/pkg/context"
    22  	"github.com/metacubex/gvisor/pkg/errors/linuxerr"
    23  	"github.com/metacubex/gvisor/pkg/fspath"
    24  	"github.com/metacubex/gvisor/pkg/hostarch"
    25  	"github.com/metacubex/gvisor/pkg/sentry/kernel/auth"
    26  	"github.com/metacubex/gvisor/pkg/sentry/memmap"
    27  	"github.com/metacubex/gvisor/pkg/sentry/vfs"
    28  )
    29  
    30  func (d *dentry) isCopiedUp() bool {
    31  	return d.copiedUp.Load() != 0
    32  }
    33  
    34  func (d *dentry) canBeCopiedUp() bool {
    35  	ftype := d.mode.Load() & linux.S_IFMT
    36  	switch ftype {
    37  	case linux.S_IFREG, linux.S_IFDIR, linux.S_IFLNK, linux.S_IFBLK, linux.S_IFCHR:
    38  		// Can be copied-up.
    39  		return true
    40  	default:
    41  		// Can't be copied-up.
    42  		return false
    43  	}
    44  }
    45  
    46  // copyUpLocked ensures that d exists on the upper layer, i.e. d.upperVD.Ok().
    47  //
    48  // Preconditions: filesystem.renameMu must be locked.
    49  func (d *dentry) copyUpLocked(ctx context.Context) error {
    50  	return d.copyUpMaybeSyntheticMountpointLocked(ctx, false /* forSyntheticMountpoint */)
    51  }
    52  
    53  func (d *dentry) copyUpMaybeSyntheticMountpointLocked(ctx context.Context, forSyntheticMountpoint bool) error {
    54  	// Fast path.
    55  	if d.isCopiedUp() {
    56  		return nil
    57  	}
    58  
    59  	// Attach our credentials to the context, as some VFS operations use
    60  	// credentials from context rather an take an explicit creds parameter.
    61  	ctx = auth.ContextWithCredentials(ctx, d.fs.creds)
    62  
    63  	if !d.canBeCopiedUp() {
    64  		return linuxerr.EPERM
    65  	}
    66  
    67  	// Ensure that our parent directory is copied-up.
    68  	parent := d.parent.Load()
    69  	if parent == nil {
    70  		// d is a filesystem root with no upper layer.
    71  		return linuxerr.EROFS
    72  	}
    73  	if err := parent.copyUpMaybeSyntheticMountpointLocked(ctx, forSyntheticMountpoint); err != nil {
    74  		return err
    75  	}
    76  
    77  	d.copyMu.Lock()
    78  	defer d.copyMu.Unlock()
    79  	if d.upperVD.Ok() {
    80  		// Raced with another call to d.copyUpLocked().
    81  		return nil
    82  	}
    83  	if d.vfsd.IsDead() {
    84  		// Raced with deletion of d.
    85  		return linuxerr.ENOENT
    86  	}
    87  
    88  	// Obtain settable timestamps from the lower layer.
    89  	vfsObj := d.fs.vfsfs.VirtualFilesystem()
    90  	oldpop := vfs.PathOperation{
    91  		Root:  d.lowerVDs[0],
    92  		Start: d.lowerVDs[0],
    93  	}
    94  	const timestampsMask = linux.STATX_ATIME | linux.STATX_MTIME
    95  	oldStat, err := vfsObj.StatAt(ctx, d.fs.creds, &oldpop, &vfs.StatOptions{
    96  		Mask: timestampsMask,
    97  	})
    98  	if err != nil {
    99  		return err
   100  	}
   101  
   102  	// Perform copy-up.
   103  	ftype := d.mode.Load() & linux.S_IFMT
   104  	newpop := vfs.PathOperation{
   105  		Root:  parent.upperVD,
   106  		Start: parent.upperVD,
   107  		Path:  fspath.Parse(d.name),
   108  	}
   109  	// Used during copy-up of memory-mapped regular files.
   110  	var mmapOpts *memmap.MMapOpts
   111  	cleanupUndoCopyUp := func() {
   112  		var err error
   113  		if ftype == linux.S_IFDIR {
   114  			err = vfsObj.RmdirAt(ctx, d.fs.creds, &newpop)
   115  		} else {
   116  			err = vfsObj.UnlinkAt(ctx, d.fs.creds, &newpop)
   117  		}
   118  		if err != nil {
   119  			panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after copy-up error: %v", err))
   120  		}
   121  		if d.upperVD.Ok() {
   122  			d.upperVD.DecRef(ctx)
   123  			d.upperVD = vfs.VirtualDentry{}
   124  		}
   125  	}
   126  	switch ftype {
   127  	case linux.S_IFREG:
   128  		oldFD, err := vfsObj.OpenAt(ctx, d.fs.creds, &oldpop, &vfs.OpenOptions{
   129  			Flags: linux.O_RDONLY,
   130  		})
   131  		if err != nil {
   132  			return err
   133  		}
   134  		defer oldFD.DecRef(ctx)
   135  		newFD, err := vfsObj.OpenAt(ctx, d.fs.creds, &newpop, &vfs.OpenOptions{
   136  			Flags: linux.O_WRONLY | linux.O_CREAT | linux.O_EXCL,
   137  			// d.mode can be read because d.copyMu is locked.
   138  			Mode: linux.FileMode(d.mode.RacyLoad() &^ linux.S_IFMT),
   139  		})
   140  		if err != nil {
   141  			return err
   142  		}
   143  		defer newFD.DecRef(ctx)
   144  		if _, err := vfs.CopyRegularFileData(ctx, newFD, oldFD); err != nil {
   145  			cleanupUndoCopyUp()
   146  			return err
   147  		}
   148  		if d.wrappedMappable != nil {
   149  			// We may have memory mappings of the file on the lower layer.
   150  			// Switch to mapping the file on the upper layer instead.
   151  			mmapOpts = &memmap.MMapOpts{
   152  				Perms:    hostarch.ReadWrite,
   153  				MaxPerms: hostarch.ReadWrite,
   154  			}
   155  			if err := newFD.ConfigureMMap(ctx, mmapOpts); err != nil {
   156  				cleanupUndoCopyUp()
   157  				return err
   158  			}
   159  			if mmapOpts.MappingIdentity != nil {
   160  				mmapOpts.MappingIdentity.DecRef(ctx)
   161  			}
   162  			// Don't actually switch Mappables until the end of copy-up; see
   163  			// below for why.
   164  		}
   165  		if err := newFD.SetStat(ctx, vfs.SetStatOptions{
   166  			Stat: linux.Statx{
   167  				Mask: linux.STATX_UID | linux.STATX_GID | oldStat.Mask&timestampsMask,
   168  				// d.uid and d.gid can be read because d.copyMu is locked.
   169  				UID:   d.uid.RacyLoad(),
   170  				GID:   d.gid.RacyLoad(),
   171  				Atime: oldStat.Atime,
   172  				Mtime: oldStat.Mtime,
   173  			},
   174  		}); err != nil {
   175  			cleanupUndoCopyUp()
   176  			return err
   177  		}
   178  		d.upperVD = newFD.VirtualDentry()
   179  		d.upperVD.IncRef()
   180  
   181  	case linux.S_IFDIR:
   182  		if err := vfsObj.MkdirAt(ctx, d.fs.creds, &newpop, &vfs.MkdirOptions{
   183  			// d.mode can be read because d.copyMu is locked.
   184  			Mode:                   linux.FileMode(d.mode.RacyLoad() &^ linux.S_IFMT),
   185  			ForSyntheticMountpoint: forSyntheticMountpoint,
   186  		}); err != nil {
   187  			return err
   188  		}
   189  		if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{
   190  			Stat: linux.Statx{
   191  				Mask: linux.STATX_UID | linux.STATX_GID | oldStat.Mask&timestampsMask,
   192  				// d.uid and d.gid can be read because d.copyMu is locked.
   193  				UID:   d.uid.RacyLoad(),
   194  				GID:   d.gid.RacyLoad(),
   195  				Atime: oldStat.Atime,
   196  				Mtime: oldStat.Mtime,
   197  			},
   198  		}); err != nil {
   199  			cleanupUndoCopyUp()
   200  			return err
   201  		}
   202  		upperVD, err := vfsObj.GetDentryAt(ctx, d.fs.creds, &newpop, &vfs.GetDentryOptions{})
   203  		if err != nil {
   204  			cleanupUndoCopyUp()
   205  			return err
   206  		}
   207  		d.upperVD = upperVD
   208  
   209  	case linux.S_IFLNK:
   210  		target, err := vfsObj.ReadlinkAt(ctx, d.fs.creds, &oldpop)
   211  		if err != nil {
   212  			return err
   213  		}
   214  		if err := vfsObj.SymlinkAt(ctx, d.fs.creds, &newpop, target); err != nil {
   215  			return err
   216  		}
   217  		if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{
   218  			Stat: linux.Statx{
   219  				Mask: linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID | oldStat.Mask&timestampsMask,
   220  				// d.{uid,gid,mode} can be read because d.copyMu is locked.
   221  				Mode:  uint16(d.mode.RacyLoad()),
   222  				UID:   d.uid.RacyLoad(),
   223  				GID:   d.gid.RacyLoad(),
   224  				Atime: oldStat.Atime,
   225  				Mtime: oldStat.Mtime,
   226  			},
   227  		}); err != nil {
   228  			cleanupUndoCopyUp()
   229  			return err
   230  		}
   231  		upperVD, err := vfsObj.GetDentryAt(ctx, d.fs.creds, &newpop, &vfs.GetDentryOptions{})
   232  		if err != nil {
   233  			cleanupUndoCopyUp()
   234  			return err
   235  		}
   236  		d.upperVD = upperVD
   237  
   238  	case linux.S_IFBLK, linux.S_IFCHR:
   239  		if err := vfsObj.MknodAt(ctx, d.fs.creds, &newpop, &vfs.MknodOptions{
   240  			// d.mode can be read because d.copyMu is locked.
   241  			Mode:     linux.FileMode(d.mode.RacyLoad()),
   242  			DevMajor: oldStat.RdevMajor,
   243  			DevMinor: oldStat.RdevMinor,
   244  		}); err != nil {
   245  			return err
   246  		}
   247  		if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{
   248  			Stat: linux.Statx{
   249  				Mask: linux.STATX_UID | linux.STATX_GID | oldStat.Mask&timestampsMask,
   250  				// d.uid and d.gid can be read because d.copyMu is locked.
   251  				UID:   d.uid.RacyLoad(),
   252  				GID:   d.gid.RacyLoad(),
   253  				Atime: oldStat.Atime,
   254  				Mtime: oldStat.Mtime,
   255  			},
   256  		}); err != nil {
   257  			cleanupUndoCopyUp()
   258  			return err
   259  		}
   260  		upperVD, err := vfsObj.GetDentryAt(ctx, d.fs.creds, &newpop, &vfs.GetDentryOptions{})
   261  		if err != nil {
   262  			cleanupUndoCopyUp()
   263  			return err
   264  		}
   265  		d.upperVD = upperVD
   266  
   267  	default:
   268  		// Should have rejected this at the beginning of this function?
   269  		panic(fmt.Sprintf("unexpected file type %o", ftype))
   270  	}
   271  
   272  	if err := d.copyXattrsLocked(ctx); err != nil {
   273  		cleanupUndoCopyUp()
   274  		return err
   275  	}
   276  
   277  	// Update the dentry's device and inode numbers (except for directories,
   278  	// for which these remain overlay-assigned).
   279  	if ftype != linux.S_IFDIR {
   280  		upperStat, err := vfsObj.StatAt(ctx, d.fs.creds, &vfs.PathOperation{
   281  			Root:  d.upperVD,
   282  			Start: d.upperVD,
   283  		}, &vfs.StatOptions{
   284  			Mask: linux.STATX_INO,
   285  		})
   286  		if err != nil {
   287  			cleanupUndoCopyUp()
   288  			return err
   289  		}
   290  		if upperStat.Mask&linux.STATX_INO == 0 {
   291  			cleanupUndoCopyUp()
   292  			return linuxerr.EREMOTE
   293  		}
   294  		d.devMajor.Store(upperStat.DevMajor)
   295  		d.devMinor.Store(upperStat.DevMinor)
   296  		d.ino.Store(upperStat.Ino)
   297  
   298  		// Lower level dentries for non-directories are no longer accessible from
   299  		// the overlayfs anymore after copyup. Ask filesystems to release their
   300  		// resources whenever possible.
   301  		for _, lowerDentry := range d.lowerVDs {
   302  			lowerDentry.Dentry().MarkEvictable()
   303  		}
   304  	}
   305  
   306  	if mmapOpts != nil && mmapOpts.Mappable != nil {
   307  		d.mapsMu.Lock()
   308  		defer d.mapsMu.Unlock()
   309  
   310  		// Propagate mappings of d to the new Mappable. Remember which mappings
   311  		// we added so we can remove them on failure.
   312  		upperMappable := mmapOpts.Mappable
   313  		allAdded := make(map[memmap.MappableRange]memmap.MappingsOfRange)
   314  		for seg := d.lowerMappings.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
   315  			added := make(memmap.MappingsOfRange)
   316  			for m := range seg.Value() {
   317  				if err := upperMappable.AddMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable); err != nil {
   318  					for m := range added {
   319  						upperMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable)
   320  					}
   321  					for mr, mappings := range allAdded {
   322  						for m := range mappings {
   323  							upperMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, mr.Start, m.Writable)
   324  						}
   325  					}
   326  					return err
   327  				}
   328  				added[m] = struct{}{}
   329  			}
   330  			allAdded[seg.Range()] = added
   331  		}
   332  
   333  		// Switch to the new Mappable. We do this at the end of copy-up
   334  		// because:
   335  		//
   336  		//	- We need to switch Mappables (by changing d.wrappedMappable) before
   337  		//		invalidating Translations from the old Mappable (to pick up
   338  		//		Translations from the new one).
   339  		//
   340  		//	- We need to lock d.dataMu while changing d.wrappedMappable, but
   341  		//		must invalidate Translations with d.dataMu unlocked (due to lock
   342  		//		ordering).
   343  		//
   344  		//	- Consequently, once we unlock d.dataMu, other threads may
   345  		//		immediately observe the new (copied-up) Mappable, which we want to
   346  		//		delay until copy-up is guaranteed to succeed.
   347  		d.dataMu.Lock()
   348  		lowerMappable := d.wrappedMappable
   349  		d.wrappedMappable = upperMappable
   350  		d.dataMu.Unlock()
   351  		d.lowerMappings.InvalidateAll(memmap.InvalidateOpts{})
   352  
   353  		// Remove mappings from the old Mappable.
   354  		for seg := d.lowerMappings.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
   355  			for m := range seg.Value() {
   356  				lowerMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable)
   357  			}
   358  		}
   359  		d.lowerMappings.RemoveAll()
   360  	}
   361  
   362  	d.copiedUp.Store(1)
   363  	return nil
   364  }
   365  
   366  // copyXattrsLocked copies a subset of lower's extended attributes to upper.
   367  // Attributes that configure an overlay in the lower are not copied up.
   368  //
   369  // Preconditions: d.copyMu must be locked for writing.
   370  func (d *dentry) copyXattrsLocked(ctx context.Context) error {
   371  	vfsObj := d.fs.vfsfs.VirtualFilesystem()
   372  	lowerPop := &vfs.PathOperation{Root: d.lowerVDs[0], Start: d.lowerVDs[0]}
   373  	upperPop := &vfs.PathOperation{Root: d.upperVD, Start: d.upperVD}
   374  
   375  	lowerXattrs, err := vfsObj.ListXattrAt(ctx, d.fs.creds, lowerPop, 0)
   376  	if err != nil {
   377  		if linuxerr.Equals(linuxerr.EOPNOTSUPP, err) {
   378  			// There are no guarantees as to the contents of lowerXattrs.
   379  			return nil
   380  		}
   381  		ctx.Infof("failed to copy up xattrs because ListXattrAt failed: %v", err)
   382  		return err
   383  	}
   384  
   385  	for _, name := range lowerXattrs {
   386  		// Do not copy up overlay attributes.
   387  		if isOverlayXattr(name) {
   388  			continue
   389  		}
   390  
   391  		value, err := vfsObj.GetXattrAt(ctx, d.fs.creds, lowerPop, &vfs.GetXattrOptions{Name: name, Size: 0})
   392  		if err != nil {
   393  			ctx.Infof("failed to copy up xattrs because GetXattrAt failed: %v", err)
   394  			return err
   395  		}
   396  
   397  		if err := vfsObj.SetXattrAt(ctx, d.fs.creds, upperPop, &vfs.SetXattrOptions{Name: name, Value: value}); err != nil {
   398  			ctx.Infof("failed to copy up xattrs because SetXattrAt failed: %v", err)
   399  			return err
   400  		}
   401  	}
   402  	return nil
   403  }
   404  
   405  // copyUpDescendantsLocked ensures that all descendants of d are copied up.
   406  //
   407  // Preconditions:
   408  //   - filesystem.renameMu must be locked.
   409  //   - d.dirMu must be locked.
   410  //   - d.isDir().
   411  func (d *dentry) copyUpDescendantsLocked(ctx context.Context, ds **[]*dentry) error {
   412  	dirents, err := d.getDirentsLocked(ctx)
   413  	if err != nil {
   414  		return err
   415  	}
   416  	for _, dirent := range dirents {
   417  		if dirent.Name == "." || dirent.Name == ".." {
   418  			continue
   419  		}
   420  		child, _, err := d.fs.getChildLocked(ctx, d, dirent.Name, ds)
   421  		if err != nil {
   422  			return err
   423  		}
   424  		if err := child.copyUpLocked(ctx); err != nil {
   425  			return err
   426  		}
   427  		if child.isDir() {
   428  			child.dirMu.Lock()
   429  			err := child.copyUpDescendantsLocked(ctx, ds)
   430  			child.dirMu.Unlock()
   431  			if err != nil {
   432  				return err
   433  			}
   434  		}
   435  	}
   436  	return nil
   437  }