github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/fsimpl/overlay/copy_up.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package overlay
    16  
    17  import (
    18  	"fmt"
    19  
    20  	"github.com/nicocha30/gvisor-ligolo/pkg/abi/linux"
    21  	"github.com/nicocha30/gvisor-ligolo/pkg/context"
    22  	"github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr"
    23  	"github.com/nicocha30/gvisor-ligolo/pkg/fspath"
    24  	"github.com/nicocha30/gvisor-ligolo/pkg/hostarch"
    25  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel/auth"
    26  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/memmap"
    27  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/vfs"
    28  )
    29  
    30  func (d *dentry) isCopiedUp() bool {
    31  	return d.copiedUp.Load() != 0
    32  }
    33  
    34  func (d *dentry) canBeCopiedUp() bool {
    35  	ftype := d.mode.Load() & linux.S_IFMT
    36  	switch ftype {
    37  	case linux.S_IFREG, linux.S_IFDIR, linux.S_IFLNK, linux.S_IFBLK, linux.S_IFCHR:
    38  		// Can be copied-up.
    39  		return true
    40  	default:
    41  		// Can't be copied-up.
    42  		return false
    43  	}
    44  }
    45  
    46  // copyUpLocked ensures that d exists on the upper layer, i.e. d.upperVD.Ok().
    47  //
    48  // Preconditions: filesystem.renameMu must be locked.
    49  func (d *dentry) copyUpLocked(ctx context.Context) error {
    50  	return d.copyUpMaybeSyntheticMountpointLocked(ctx, false /* forSyntheticMountpoint */)
    51  }
    52  
    53  func (d *dentry) copyUpMaybeSyntheticMountpointLocked(ctx context.Context, forSyntheticMountpoint bool) error {
    54  	// Fast path.
    55  	if d.isCopiedUp() {
    56  		return nil
    57  	}
    58  
    59  	// Attach our credentials to the context, as some VFS operations use
    60  	// credentials from context rather an take an explicit creds parameter.
    61  	ctx = auth.ContextWithCredentials(ctx, d.fs.creds)
    62  
    63  	if !d.canBeCopiedUp() {
    64  		return linuxerr.EPERM
    65  	}
    66  
    67  	// Ensure that our parent directory is copied-up.
    68  	if d.parent == nil {
    69  		// d is a filesystem root with no upper layer.
    70  		return linuxerr.EROFS
    71  	}
    72  	if err := d.parent.copyUpMaybeSyntheticMountpointLocked(ctx, forSyntheticMountpoint); err != nil {
    73  		return err
    74  	}
    75  
    76  	d.copyMu.Lock()
    77  	defer d.copyMu.Unlock()
    78  	if d.upperVD.Ok() {
    79  		// Raced with another call to d.copyUpLocked().
    80  		return nil
    81  	}
    82  	if d.vfsd.IsDead() {
    83  		// Raced with deletion of d.
    84  		return linuxerr.ENOENT
    85  	}
    86  
    87  	// Obtain settable timestamps from the lower layer.
    88  	vfsObj := d.fs.vfsfs.VirtualFilesystem()
    89  	oldpop := vfs.PathOperation{
    90  		Root:  d.lowerVDs[0],
    91  		Start: d.lowerVDs[0],
    92  	}
    93  	const timestampsMask = linux.STATX_ATIME | linux.STATX_MTIME
    94  	oldStat, err := vfsObj.StatAt(ctx, d.fs.creds, &oldpop, &vfs.StatOptions{
    95  		Mask: timestampsMask,
    96  	})
    97  	if err != nil {
    98  		return err
    99  	}
   100  
   101  	// Perform copy-up.
   102  	ftype := d.mode.Load() & linux.S_IFMT
   103  	newpop := vfs.PathOperation{
   104  		Root:  d.parent.upperVD,
   105  		Start: d.parent.upperVD,
   106  		Path:  fspath.Parse(d.name),
   107  	}
   108  	// Used during copy-up of memory-mapped regular files.
   109  	var mmapOpts *memmap.MMapOpts
   110  	cleanupUndoCopyUp := func() {
   111  		var err error
   112  		if ftype == linux.S_IFDIR {
   113  			err = vfsObj.RmdirAt(ctx, d.fs.creds, &newpop)
   114  		} else {
   115  			err = vfsObj.UnlinkAt(ctx, d.fs.creds, &newpop)
   116  		}
   117  		if err != nil {
   118  			panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after copy-up error: %v", err))
   119  		}
   120  		if d.upperVD.Ok() {
   121  			d.upperVD.DecRef(ctx)
   122  			d.upperVD = vfs.VirtualDentry{}
   123  		}
   124  	}
   125  	switch ftype {
   126  	case linux.S_IFREG:
   127  		oldFD, err := vfsObj.OpenAt(ctx, d.fs.creds, &oldpop, &vfs.OpenOptions{
   128  			Flags: linux.O_RDONLY,
   129  		})
   130  		if err != nil {
   131  			return err
   132  		}
   133  		defer oldFD.DecRef(ctx)
   134  		newFD, err := vfsObj.OpenAt(ctx, d.fs.creds, &newpop, &vfs.OpenOptions{
   135  			Flags: linux.O_WRONLY | linux.O_CREAT | linux.O_EXCL,
   136  			// d.mode can be read because d.copyMu is locked.
   137  			Mode: linux.FileMode(d.mode.RacyLoad() &^ linux.S_IFMT),
   138  		})
   139  		if err != nil {
   140  			return err
   141  		}
   142  		defer newFD.DecRef(ctx)
   143  		if _, err := vfs.CopyRegularFileData(ctx, newFD, oldFD); err != nil {
   144  			cleanupUndoCopyUp()
   145  			return err
   146  		}
   147  		if d.wrappedMappable != nil {
   148  			// We may have memory mappings of the file on the lower layer.
   149  			// Switch to mapping the file on the upper layer instead.
   150  			mmapOpts = &memmap.MMapOpts{
   151  				Perms:    hostarch.ReadWrite,
   152  				MaxPerms: hostarch.ReadWrite,
   153  			}
   154  			if err := newFD.ConfigureMMap(ctx, mmapOpts); err != nil {
   155  				cleanupUndoCopyUp()
   156  				return err
   157  			}
   158  			if mmapOpts.MappingIdentity != nil {
   159  				mmapOpts.MappingIdentity.DecRef(ctx)
   160  			}
   161  			// Don't actually switch Mappables until the end of copy-up; see
   162  			// below for why.
   163  		}
   164  		if err := newFD.SetStat(ctx, vfs.SetStatOptions{
   165  			Stat: linux.Statx{
   166  				Mask: linux.STATX_UID | linux.STATX_GID | oldStat.Mask&timestampsMask,
   167  				// d.uid and d.gid can be read because d.copyMu is locked.
   168  				UID:   d.uid.RacyLoad(),
   169  				GID:   d.gid.RacyLoad(),
   170  				Atime: oldStat.Atime,
   171  				Mtime: oldStat.Mtime,
   172  			},
   173  		}); err != nil {
   174  			cleanupUndoCopyUp()
   175  			return err
   176  		}
   177  		d.upperVD = newFD.VirtualDentry()
   178  		d.upperVD.IncRef()
   179  
   180  	case linux.S_IFDIR:
   181  		if err := vfsObj.MkdirAt(ctx, d.fs.creds, &newpop, &vfs.MkdirOptions{
   182  			// d.mode can be read because d.copyMu is locked.
   183  			Mode:                   linux.FileMode(d.mode.RacyLoad() &^ linux.S_IFMT),
   184  			ForSyntheticMountpoint: forSyntheticMountpoint,
   185  		}); err != nil {
   186  			return err
   187  		}
   188  		if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{
   189  			Stat: linux.Statx{
   190  				Mask: linux.STATX_UID | linux.STATX_GID | oldStat.Mask&timestampsMask,
   191  				// d.uid and d.gid can be read because d.copyMu is locked.
   192  				UID:   d.uid.RacyLoad(),
   193  				GID:   d.gid.RacyLoad(),
   194  				Atime: oldStat.Atime,
   195  				Mtime: oldStat.Mtime,
   196  			},
   197  		}); err != nil {
   198  			cleanupUndoCopyUp()
   199  			return err
   200  		}
   201  		upperVD, err := vfsObj.GetDentryAt(ctx, d.fs.creds, &newpop, &vfs.GetDentryOptions{})
   202  		if err != nil {
   203  			cleanupUndoCopyUp()
   204  			return err
   205  		}
   206  		d.upperVD = upperVD
   207  
   208  	case linux.S_IFLNK:
   209  		target, err := vfsObj.ReadlinkAt(ctx, d.fs.creds, &oldpop)
   210  		if err != nil {
   211  			return err
   212  		}
   213  		if err := vfsObj.SymlinkAt(ctx, d.fs.creds, &newpop, target); err != nil {
   214  			return err
   215  		}
   216  		if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{
   217  			Stat: linux.Statx{
   218  				Mask: linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID | oldStat.Mask&timestampsMask,
   219  				// d.{uid,gid,mode} can be read because d.copyMu is locked.
   220  				Mode:  uint16(d.mode.RacyLoad()),
   221  				UID:   d.uid.RacyLoad(),
   222  				GID:   d.gid.RacyLoad(),
   223  				Atime: oldStat.Atime,
   224  				Mtime: oldStat.Mtime,
   225  			},
   226  		}); err != nil {
   227  			cleanupUndoCopyUp()
   228  			return err
   229  		}
   230  		upperVD, err := vfsObj.GetDentryAt(ctx, d.fs.creds, &newpop, &vfs.GetDentryOptions{})
   231  		if err != nil {
   232  			cleanupUndoCopyUp()
   233  			return err
   234  		}
   235  		d.upperVD = upperVD
   236  
   237  	case linux.S_IFBLK, linux.S_IFCHR:
   238  		if err := vfsObj.MknodAt(ctx, d.fs.creds, &newpop, &vfs.MknodOptions{
   239  			// d.mode can be read because d.copyMu is locked.
   240  			Mode:     linux.FileMode(d.mode.RacyLoad()),
   241  			DevMajor: oldStat.RdevMajor,
   242  			DevMinor: oldStat.RdevMinor,
   243  		}); err != nil {
   244  			return err
   245  		}
   246  		if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{
   247  			Stat: linux.Statx{
   248  				Mask: linux.STATX_UID | linux.STATX_GID | oldStat.Mask&timestampsMask,
   249  				// d.uid and d.gid can be read because d.copyMu is locked.
   250  				UID:   d.uid.RacyLoad(),
   251  				GID:   d.gid.RacyLoad(),
   252  				Atime: oldStat.Atime,
   253  				Mtime: oldStat.Mtime,
   254  			},
   255  		}); err != nil {
   256  			cleanupUndoCopyUp()
   257  			return err
   258  		}
   259  		upperVD, err := vfsObj.GetDentryAt(ctx, d.fs.creds, &newpop, &vfs.GetDentryOptions{})
   260  		if err != nil {
   261  			cleanupUndoCopyUp()
   262  			return err
   263  		}
   264  		d.upperVD = upperVD
   265  
   266  	default:
   267  		// Should have rejected this at the beginning of this function?
   268  		panic(fmt.Sprintf("unexpected file type %o", ftype))
   269  	}
   270  
   271  	if err := d.copyXattrsLocked(ctx); err != nil {
   272  		cleanupUndoCopyUp()
   273  		return err
   274  	}
   275  
   276  	// Update the dentry's device and inode numbers (except for directories,
   277  	// for which these remain overlay-assigned).
   278  	if ftype != linux.S_IFDIR {
   279  		upperStat, err := vfsObj.StatAt(ctx, d.fs.creds, &vfs.PathOperation{
   280  			Root:  d.upperVD,
   281  			Start: d.upperVD,
   282  		}, &vfs.StatOptions{
   283  			Mask: linux.STATX_INO,
   284  		})
   285  		if err != nil {
   286  			cleanupUndoCopyUp()
   287  			return err
   288  		}
   289  		if upperStat.Mask&linux.STATX_INO == 0 {
   290  			cleanupUndoCopyUp()
   291  			return linuxerr.EREMOTE
   292  		}
   293  		d.devMajor.Store(upperStat.DevMajor)
   294  		d.devMinor.Store(upperStat.DevMinor)
   295  		d.ino.Store(upperStat.Ino)
   296  
   297  		// Lower level dentries for non-directories are no longer accessible from
   298  		// the overlayfs anymore after copyup. Ask filesystems to release their
   299  		// resources whenever possible.
   300  		for _, lowerDentry := range d.lowerVDs {
   301  			lowerDentry.Dentry().MarkEvictable()
   302  		}
   303  	}
   304  
   305  	if mmapOpts != nil && mmapOpts.Mappable != nil {
   306  		d.mapsMu.Lock()
   307  		defer d.mapsMu.Unlock()
   308  
   309  		// Propagate mappings of d to the new Mappable. Remember which mappings
   310  		// we added so we can remove them on failure.
   311  		upperMappable := mmapOpts.Mappable
   312  		allAdded := make(map[memmap.MappableRange]memmap.MappingsOfRange)
   313  		for seg := d.lowerMappings.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
   314  			added := make(memmap.MappingsOfRange)
   315  			for m := range seg.Value() {
   316  				if err := upperMappable.AddMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable); err != nil {
   317  					for m := range added {
   318  						upperMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable)
   319  					}
   320  					for mr, mappings := range allAdded {
   321  						for m := range mappings {
   322  							upperMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, mr.Start, m.Writable)
   323  						}
   324  					}
   325  					return err
   326  				}
   327  				added[m] = struct{}{}
   328  			}
   329  			allAdded[seg.Range()] = added
   330  		}
   331  
   332  		// Switch to the new Mappable. We do this at the end of copy-up
   333  		// because:
   334  		//
   335  		//	- We need to switch Mappables (by changing d.wrappedMappable) before
   336  		//		invalidating Translations from the old Mappable (to pick up
   337  		//		Translations from the new one).
   338  		//
   339  		//	- We need to lock d.dataMu while changing d.wrappedMappable, but
   340  		//		must invalidate Translations with d.dataMu unlocked (due to lock
   341  		//		ordering).
   342  		//
   343  		//	- Consequently, once we unlock d.dataMu, other threads may
   344  		//		immediately observe the new (copied-up) Mappable, which we want to
   345  		//		delay until copy-up is guaranteed to succeed.
   346  		d.dataMu.Lock()
   347  		lowerMappable := d.wrappedMappable
   348  		d.wrappedMappable = upperMappable
   349  		d.dataMu.Unlock()
   350  		d.lowerMappings.InvalidateAll(memmap.InvalidateOpts{})
   351  
   352  		// Remove mappings from the old Mappable.
   353  		for seg := d.lowerMappings.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
   354  			for m := range seg.Value() {
   355  				lowerMappable.RemoveMapping(ctx, m.MappingSpace, m.AddrRange, seg.Start(), m.Writable)
   356  			}
   357  		}
   358  		d.lowerMappings.RemoveAll()
   359  	}
   360  
   361  	d.copiedUp.Store(1)
   362  	return nil
   363  }
   364  
   365  // copyXattrsLocked copies a subset of lower's extended attributes to upper.
   366  // Attributes that configure an overlay in the lower are not copied up.
   367  //
   368  // Preconditions: d.copyMu must be locked for writing.
   369  func (d *dentry) copyXattrsLocked(ctx context.Context) error {
   370  	vfsObj := d.fs.vfsfs.VirtualFilesystem()
   371  	lowerPop := &vfs.PathOperation{Root: d.lowerVDs[0], Start: d.lowerVDs[0]}
   372  	upperPop := &vfs.PathOperation{Root: d.upperVD, Start: d.upperVD}
   373  
   374  	lowerXattrs, err := vfsObj.ListXattrAt(ctx, d.fs.creds, lowerPop, 0)
   375  	if err != nil {
   376  		if linuxerr.Equals(linuxerr.EOPNOTSUPP, err) {
   377  			// There are no guarantees as to the contents of lowerXattrs.
   378  			return nil
   379  		}
   380  		ctx.Infof("failed to copy up xattrs because ListXattrAt failed: %v", err)
   381  		return err
   382  	}
   383  
   384  	for _, name := range lowerXattrs {
   385  		// Do not copy up overlay attributes.
   386  		if isOverlayXattr(name) {
   387  			continue
   388  		}
   389  
   390  		value, err := vfsObj.GetXattrAt(ctx, d.fs.creds, lowerPop, &vfs.GetXattrOptions{Name: name, Size: 0})
   391  		if err != nil {
   392  			ctx.Infof("failed to copy up xattrs because GetXattrAt failed: %v", err)
   393  			return err
   394  		}
   395  
   396  		if err := vfsObj.SetXattrAt(ctx, d.fs.creds, upperPop, &vfs.SetXattrOptions{Name: name, Value: value}); err != nil {
   397  			ctx.Infof("failed to copy up xattrs because SetXattrAt failed: %v", err)
   398  			return err
   399  		}
   400  	}
   401  	return nil
   402  }
   403  
   404  // copyUpDescendantsLocked ensures that all descendants of d are copied up.
   405  //
   406  // Preconditions:
   407  //   - filesystem.renameMu must be locked.
   408  //   - d.dirMu must be locked.
   409  //   - d.isDir().
   410  func (d *dentry) copyUpDescendantsLocked(ctx context.Context, ds **[]*dentry) error {
   411  	dirents, err := d.getDirentsLocked(ctx)
   412  	if err != nil {
   413  		return err
   414  	}
   415  	for _, dirent := range dirents {
   416  		if dirent.Name == "." || dirent.Name == ".." {
   417  			continue
   418  		}
   419  		child, _, err := d.fs.getChildLocked(ctx, d, dirent.Name, ds)
   420  		if err != nil {
   421  			return err
   422  		}
   423  		if err := child.copyUpLocked(ctx); err != nil {
   424  			return err
   425  		}
   426  		if child.isDir() {
   427  			child.dirMu.Lock()
   428  			err := child.copyUpDescendantsLocked(ctx, ds)
   429  			child.dirMu.Unlock()
   430  			if err != nil {
   431  				return err
   432  			}
   433  		}
   434  	}
   435  	return nil
   436  }