github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/overlay/copy_up.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package overlay
    16  
    17  import (
    18  	"fmt"
    19  	"sync/atomic"
    20  
    21  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    22  	"github.com/SagerNet/gvisor/pkg/context"
    23  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    24  	"github.com/SagerNet/gvisor/pkg/fspath"
    25  	"github.com/SagerNet/gvisor/pkg/hostarch"
    26  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/auth"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/memmap"
    28  	"github.com/SagerNet/gvisor/pkg/sentry/vfs"
    29  	"github.com/SagerNet/gvisor/pkg/syserror"
    30  )
    31  
    32  func (d *dentry) isCopiedUp() bool {
    33  	return atomic.LoadUint32(&d.copiedUp) != 0
    34  }
    35  
    36  // copyUpLocked ensures that d exists on the upper layer, i.e. d.upperVD.Ok().
    37  //
    38  // Preconditions: filesystem.renameMu must be locked.
    39  func (d *dentry) copyUpLocked(ctx context.Context) error {
    40  	// Fast path.
    41  	if d.isCopiedUp() {
    42  		return nil
    43  	}
    44  
    45  	// Attach our credentials to the context, as some VFS operations use
    46  	// credentials from context rather an take an explicit creds parameter.
    47  	ctx = auth.ContextWithCredentials(ctx, d.fs.creds)
    48  
    49  	ftype := atomic.LoadUint32(&d.mode) & linux.S_IFMT
    50  	switch ftype {
    51  	case linux.S_IFREG, linux.S_IFDIR, linux.S_IFLNK, linux.S_IFBLK, linux.S_IFCHR:
    52  		// Can be copied-up.
    53  	default:
    54  		// Can't be copied-up.
    55  		return linuxerr.EPERM
    56  	}
    57  
    58  	// Ensure that our parent directory is copied-up.
    59  	if d.parent == nil {
    60  		// d is a filesystem root with no upper layer.
    61  		return linuxerr.EROFS
    62  	}
    63  	if err := d.parent.copyUpLocked(ctx); err != nil {
    64  		return err
    65  	}
    66  
    67  	d.copyMu.Lock()
    68  	defer d.copyMu.Unlock()
    69  	if d.upperVD.Ok() {
    70  		// Raced with another call to d.copyUpLocked().
    71  		return nil
    72  	}
    73  	if d.vfsd.IsDead() {
    74  		// Raced with deletion of d.
    75  		return syserror.ENOENT
    76  	}
    77  
    78  	// Obtain settable timestamps from the lower layer.
    79  	vfsObj := d.fs.vfsfs.VirtualFilesystem()
    80  	oldpop := vfs.PathOperation{
    81  		Root:  d.lowerVDs[0],
    82  		Start: d.lowerVDs[0],
    83  	}
    84  	const timestampsMask = linux.STATX_ATIME | linux.STATX_MTIME
    85  	oldStat, err := vfsObj.StatAt(ctx, d.fs.creds, &oldpop, &vfs.StatOptions{
    86  		Mask: timestampsMask,
    87  	})
    88  	if err != nil {
    89  		return err
    90  	}
    91  
    92  	// Perform copy-up.
    93  	newpop := vfs.PathOperation{
    94  		Root:  d.parent.upperVD,
    95  		Start: d.parent.upperVD,
    96  		Path:  fspath.Parse(d.name),
    97  	}
    98  	// Used during copy-up of memory-mapped regular files.
    99  	var mmapOpts *memmap.MMapOpts
   100  	cleanupUndoCopyUp := func() {
   101  		var err error
   102  		if ftype == linux.S_IFDIR {
   103  			err = vfsObj.RmdirAt(ctx, d.fs.creds, &newpop)
   104  		} else {
   105  			err = vfsObj.UnlinkAt(ctx, d.fs.creds, &newpop)
   106  		}
   107  		if err != nil {
   108  			panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after copy-up error: %v", err))
   109  		}
   110  		if d.upperVD.Ok() {
   111  			d.upperVD.DecRef(ctx)
   112  			d.upperVD = vfs.VirtualDentry{}
   113  		}
   114  	}
   115  	switch ftype {
   116  	case linux.S_IFREG:
   117  		oldFD, err := vfsObj.OpenAt(ctx, d.fs.creds, &oldpop, &vfs.OpenOptions{
   118  			Flags: linux.O_RDONLY,
   119  		})
   120  		if err != nil {
   121  			return err
   122  		}
   123  		defer oldFD.DecRef(ctx)
   124  		newFD, err := vfsObj.OpenAt(ctx, d.fs.creds, &newpop, &vfs.OpenOptions{
   125  			Flags: linux.O_WRONLY | linux.O_CREAT | linux.O_EXCL,
   126  			Mode:  linux.FileMode(d.mode &^ linux.S_IFMT),
   127  		})
   128  		if err != nil {
   129  			return err
   130  		}
   131  		defer newFD.DecRef(ctx)
   132  		if _, err := vfs.CopyRegularFileData(ctx, newFD, oldFD); err != nil {
   133  			cleanupUndoCopyUp()
   134  			return err
   135  		}
   136  		d.mapsMu.Lock()
   137  		defer d.mapsMu.Unlock()
   138  		if d.wrappedMappable != nil {
   139  			// We may have memory mappings of the file on the lower layer.
   140  			// Switch to mapping the file on the upper layer instead.
   141  			mmapOpts = &memmap.MMapOpts{
   142  				Perms:    hostarch.ReadWrite,
   143  				MaxPerms: hostarch.ReadWrite,
   144  			}
   145  			if err := newFD.ConfigureMMap(ctx, mmapOpts); err != nil {
   146  				cleanupUndoCopyUp()
   147  				return err
   148  			}
   149  			if mmapOpts.MappingIdentity != nil {
   150  				mmapOpts.MappingIdentity.DecRef(ctx)
   151  			}
   152  			// Don't actually switch Mappables until the end of copy-up; see
   153  			// below for why.
   154  		}
   155  		if err := newFD.SetStat(ctx, vfs.SetStatOptions{
   156  			Stat: linux.Statx{
   157  				Mask:  linux.STATX_UID | linux.STATX_GID | oldStat.Mask&timestampsMask,
   158  				UID:   d.uid,
   159  				GID:   d.gid,
   160  				Atime: oldStat.Atime,
   161  				Mtime: oldStat.Mtime,
   162  			},
   163  		}); err != nil {
   164  			cleanupUndoCopyUp()
   165  			return err
   166  		}
   167  		d.upperVD = newFD.VirtualDentry()
   168  		d.upperVD.IncRef()
   169  
   170  	case linux.S_IFDIR:
   171  		if err := vfsObj.MkdirAt(ctx, d.fs.creds, &newpop, &vfs.MkdirOptions{
   172  			Mode: linux.FileMode(d.mode &^ linux.S_IFMT),
   173  		}); err != nil {
   174  			return err
   175  		}
   176  		if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{
   177  			Stat: linux.Statx{
   178  				Mask:  linux.STATX_UID | linux.STATX_GID | oldStat.Mask&timestampsMask,
   179  				UID:   d.uid,
   180  				GID:   d.gid,
   181  				Atime: oldStat.Atime,
   182  				Mtime: oldStat.Mtime,
   183  			},
   184  		}); err != nil {
   185  			cleanupUndoCopyUp()
   186  			return err
   187  		}
   188  		upperVD, err := vfsObj.GetDentryAt(ctx, d.fs.creds, &newpop, &vfs.GetDentryOptions{})
   189  		if err != nil {
   190  			cleanupUndoCopyUp()
   191  			return err
   192  		}
   193  		d.upperVD = upperVD
   194  
   195  	case linux.S_IFLNK:
   196  		target, err := vfsObj.ReadlinkAt(ctx, d.fs.creds, &oldpop)
   197  		if err != nil {
   198  			return err
   199  		}
   200  		if err := vfsObj.SymlinkAt(ctx, d.fs.creds, &newpop, target); err != nil {
   201  			return err
   202  		}
   203  		if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{
   204  			Stat: linux.Statx{
   205  				Mask:  linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID | oldStat.Mask&timestampsMask,
   206  				Mode:  uint16(d.mode),
   207  				UID:   d.uid,
   208  				GID:   d.gid,
   209  				Atime: oldStat.Atime,
   210  				Mtime: oldStat.Mtime,
   211  			},
   212  		}); err != nil {
   213  			cleanupUndoCopyUp()
   214  			return err
   215  		}
   216  		upperVD, err := vfsObj.GetDentryAt(ctx, d.fs.creds, &newpop, &vfs.GetDentryOptions{})
   217  		if err != nil {
   218  			cleanupUndoCopyUp()
   219  			return err
   220  		}
   221  		d.upperVD = upperVD
   222  
   223  	case linux.S_IFBLK, linux.S_IFCHR:
   224  		if err := vfsObj.MknodAt(ctx, d.fs.creds, &newpop, &vfs.MknodOptions{
   225  			Mode:     linux.FileMode(d.mode),
   226  			DevMajor: oldStat.RdevMajor,
   227  			DevMinor: oldStat.RdevMinor,
   228  		}); err != nil {
   229  			return err
   230  		}
   231  		if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{
   232  			Stat: linux.Statx{
   233  				Mask:  linux.STATX_UID | linux.STATX_GID | oldStat.Mask&timestampsMask,
   234  				UID:   d.uid,
   235  				GID:   d.gid,
   236  				Atime: oldStat.Atime,
   237  				Mtime: oldStat.Mtime,
   238  			},
   239  		}); err != nil {
   240  			cleanupUndoCopyUp()
   241  			return err
   242  		}
   243  		upperVD, err := vfsObj.GetDentryAt(ctx, d.fs.creds, &newpop, &vfs.GetDentryOptions{})
   244  		if err != nil {
   245  			cleanupUndoCopyUp()
   246  			return err
   247  		}
   248  		d.upperVD = upperVD
   249  
   250  	default:
   251  		// Should have rejected this at the beginning of this function?
   252  		panic(fmt.Sprintf("unexpected file type %o", ftype))
   253  	}
   254  
   255  	if err := d.copyXattrsLocked(ctx); err != nil {
   256  		cleanupUndoCopyUp()
   257  		return err
   258  	}
   259  
   260  	// Update the dentry's device and inode numbers (except for directories,
   261  	// for which these remain overlay-assigned).
   262  	if ftype != linux.S_IFDIR {
   263  		upperStat, err := vfsObj.StatAt(ctx, d.fs.creds, &vfs.PathOperation{
   264  			Root:  d.upperVD,
   265  			Start: d.upperVD,
   266  		}, &vfs.StatOptions{
   267  			Mask: linux.STATX_INO,
   268  		})
   269  		if err != nil {
   270  			cleanupUndoCopyUp()
   271  			return err
   272  		}
   273  		if upperStat.Mask&linux.STATX_INO == 0 {
   274  			cleanupUndoCopyUp()
   275  			return linuxerr.EREMOTE
   276  		}
   277  		atomic.StoreUint32(&d.devMajor, upperStat.DevMajor)
   278  		atomic.StoreUint32(&d.devMinor, upperStat.DevMinor)
   279  		atomic.StoreUint64(&d.ino, upperStat.Ino)
   280  	}
   281  
   282  	if mmapOpts != nil && mmapOpts.Mappable != nil {
   283  		// Note that if mmapOpts != nil, then d.mapsMu is locked for writing
   284  		// (from the S_IFREG path above).
   285  
   286  		// Propagate mappings of d to the new Mappable. Remember which mappings
   287  		// we added so we can remove them on failure.
   288  		upperMappable := mmapOpts.Mappable
   289  		allAdded := make(map[memmap.MappableRange]memmap.MappingsOfRange)
   290  		for seg := d.lowerMappings.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
   291  			added := make(memmap.MappingsOfRange)
   292  			for m := range seg.Value() {
   293  				if err := upperMappable.AddMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable); err != nil {
   294  					for m := range added {
   295  						upperMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable)
   296  					}
   297  					for mr, mappings := range allAdded {
   298  						for m := range mappings {
   299  							upperMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, mr.Start, m.Writable)
   300  						}
   301  					}
   302  					return err
   303  				}
   304  				added[m] = struct{}{}
   305  			}
   306  			allAdded[seg.Range()] = added
   307  		}
   308  
   309  		// Switch to the new Mappable. We do this at the end of copy-up
   310  		// because:
   311  		//
   312  		// - We need to switch Mappables (by changing d.wrappedMappable) before
   313  		// invalidating Translations from the old Mappable (to pick up
   314  		// Translations from the new one).
   315  		//
   316  		// - We need to lock d.dataMu while changing d.wrappedMappable, but
   317  		// must invalidate Translations with d.dataMu unlocked (due to lock
   318  		// ordering).
   319  		//
   320  		// - Consequently, once we unlock d.dataMu, other threads may
   321  		// immediately observe the new (copied-up) Mappable, which we want to
   322  		// delay until copy-up is guaranteed to succeed.
   323  		d.dataMu.Lock()
   324  		lowerMappable := d.wrappedMappable
   325  		d.wrappedMappable = upperMappable
   326  		d.dataMu.Unlock()
   327  		d.lowerMappings.InvalidateAll(memmap.InvalidateOpts{})
   328  
   329  		// Remove mappings from the old Mappable.
   330  		for seg := d.lowerMappings.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
   331  			for m := range seg.Value() {
   332  				lowerMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable)
   333  			}
   334  		}
   335  		d.lowerMappings.RemoveAll()
   336  	}
   337  
   338  	atomic.StoreUint32(&d.copiedUp, 1)
   339  	return nil
   340  }
   341  
   342  // copyXattrsLocked copies a subset of lower's extended attributes to upper.
   343  // Attributes that configure an overlay in the lower are not copied up.
   344  //
   345  // Preconditions: d.copyMu must be locked for writing.
   346  func (d *dentry) copyXattrsLocked(ctx context.Context) error {
   347  	vfsObj := d.fs.vfsfs.VirtualFilesystem()
   348  	lowerPop := &vfs.PathOperation{Root: d.lowerVDs[0], Start: d.lowerVDs[0]}
   349  	upperPop := &vfs.PathOperation{Root: d.upperVD, Start: d.upperVD}
   350  
   351  	lowerXattrs, err := vfsObj.ListXattrAt(ctx, d.fs.creds, lowerPop, 0)
   352  	if err != nil {
   353  		if linuxerr.Equals(linuxerr.EOPNOTSUPP, err) {
   354  			// There are no guarantees as to the contents of lowerXattrs.
   355  			return nil
   356  		}
   357  		ctx.Infof("failed to copy up xattrs because ListXattrAt failed: %v", err)
   358  		return err
   359  	}
   360  
   361  	for _, name := range lowerXattrs {
   362  		// Do not copy up overlay attributes.
   363  		if isOverlayXattr(name) {
   364  			continue
   365  		}
   366  
   367  		value, err := vfsObj.GetXattrAt(ctx, d.fs.creds, lowerPop, &vfs.GetXattrOptions{Name: name, Size: 0})
   368  		if err != nil {
   369  			ctx.Infof("failed to copy up xattrs because GetXattrAt failed: %v", err)
   370  			return err
   371  		}
   372  
   373  		if err := vfsObj.SetXattrAt(ctx, d.fs.creds, upperPop, &vfs.SetXattrOptions{Name: name, Value: value}); err != nil {
   374  			ctx.Infof("failed to copy up xattrs because SetXattrAt failed: %v", err)
   375  			return err
   376  		}
   377  	}
   378  	return nil
   379  }
   380  
   381  // copyUpDescendantsLocked ensures that all descendants of d are copied up.
   382  //
   383  // Preconditions:
   384  // * filesystem.renameMu must be locked.
   385  // * d.dirMu must be locked.
   386  // * d.isDir().
   387  func (d *dentry) copyUpDescendantsLocked(ctx context.Context, ds **[]*dentry) error {
   388  	dirents, err := d.getDirentsLocked(ctx)
   389  	if err != nil {
   390  		return err
   391  	}
   392  	for _, dirent := range dirents {
   393  		if dirent.Name == "." || dirent.Name == ".." {
   394  			continue
   395  		}
   396  		child, _, err := d.fs.getChildLocked(ctx, d, dirent.Name, ds)
   397  		if err != nil {
   398  			return err
   399  		}
   400  		if err := child.copyUpLocked(ctx); err != nil {
   401  			return err
   402  		}
   403  		if child.isDir() {
   404  			child.dirMu.Lock()
   405  			err := child.copyUpDescendantsLocked(ctx, ds)
   406  			child.dirMu.Unlock()
   407  			if err != nil {
   408  				return err
   409  			}
   410  		}
   411  	}
   412  	return nil
   413  }