github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/overlay/regular_file.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package overlay
    16  
    17  import (
    18  	"sync/atomic"
    19  
    20  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    21  	"github.com/SagerNet/gvisor/pkg/context"
    22  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    23  	"github.com/SagerNet/gvisor/pkg/hostarch"
    24  	"github.com/SagerNet/gvisor/pkg/log"
    25  	"github.com/SagerNet/gvisor/pkg/sentry/arch"
    26  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/auth"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/memmap"
    28  	"github.com/SagerNet/gvisor/pkg/sentry/vfs"
    29  	"github.com/SagerNet/gvisor/pkg/sync"
    30  	"github.com/SagerNet/gvisor/pkg/usermem"
    31  	"github.com/SagerNet/gvisor/pkg/waiter"
    32  )
    33  
    34  func (d *dentry) isRegularFile() bool {
    35  	return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFREG
    36  }
    37  
    38  func (d *dentry) isSymlink() bool {
    39  	return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFLNK
    40  }
    41  
    42  func (d *dentry) readlink(ctx context.Context) (string, error) {
    43  	layerVD := d.topLayer()
    44  	return d.fs.vfsfs.VirtualFilesystem().ReadlinkAt(ctx, d.fs.creds, &vfs.PathOperation{
    45  		Root:  layerVD,
    46  		Start: layerVD,
    47  	})
    48  }
    49  
    50  // +stateify savable
    51  type regularFileFD struct {
    52  	fileDescription
    53  
    54  	// If copiedUp is false, cachedFD represents
    55  	// fileDescription.dentry().lowerVDs[0]; otherwise, cachedFD represents
    56  	// fileDescription.dentry().upperVD. cachedFlags is the last known value of
    57  	// cachedFD.StatusFlags(). copiedUp, cachedFD, and cachedFlags are
    58  	// protected by mu.
    59  	mu          sync.Mutex `state:"nosave"`
    60  	copiedUp    bool
    61  	cachedFD    *vfs.FileDescription
    62  	cachedFlags uint32
    63  
    64  	// If copiedUp is false, lowerWaiters contains all waiter.Entries
    65  	// registered with cachedFD. lowerWaiters is protected by mu.
    66  	lowerWaiters map[*waiter.Entry]waiter.EventMask
    67  }
    68  
    69  func (fd *regularFileFD) getCurrentFD(ctx context.Context) (*vfs.FileDescription, error) {
    70  	fd.mu.Lock()
    71  	defer fd.mu.Unlock()
    72  	wrappedFD, err := fd.currentFDLocked(ctx)
    73  	if err != nil {
    74  		return nil, err
    75  	}
    76  	wrappedFD.IncRef()
    77  	return wrappedFD, nil
    78  }
    79  
    80  func (fd *regularFileFD) currentFDLocked(ctx context.Context) (*vfs.FileDescription, error) {
    81  	d := fd.dentry()
    82  	statusFlags := fd.vfsfd.StatusFlags()
    83  	if !fd.copiedUp && d.isCopiedUp() {
    84  		// Switch to the copied-up file.
    85  		upperVD := d.topLayer()
    86  		upperFD, err := fd.filesystem().vfsfs.VirtualFilesystem().OpenAt(ctx, d.fs.creds, &vfs.PathOperation{
    87  			Root:  upperVD,
    88  			Start: upperVD,
    89  		}, &vfs.OpenOptions{
    90  			Flags: statusFlags,
    91  		})
    92  		if err != nil {
    93  			return nil, err
    94  		}
    95  		oldOff, oldOffErr := fd.cachedFD.Seek(ctx, 0, linux.SEEK_CUR)
    96  		if oldOffErr == nil {
    97  			if _, err := upperFD.Seek(ctx, oldOff, linux.SEEK_SET); err != nil {
    98  				upperFD.DecRef(ctx)
    99  				return nil, err
   100  			}
   101  		}
   102  		if len(fd.lowerWaiters) != 0 {
   103  			ready := upperFD.Readiness(^waiter.EventMask(0))
   104  			for e, mask := range fd.lowerWaiters {
   105  				fd.cachedFD.EventUnregister(e)
   106  				upperFD.EventRegister(e, mask)
   107  				if m := ready & mask; m != 0 {
   108  					e.Callback.Callback(e, m)
   109  				}
   110  			}
   111  		}
   112  		fd.cachedFD.DecRef(ctx)
   113  		fd.copiedUp = true
   114  		fd.cachedFD = upperFD
   115  		fd.cachedFlags = statusFlags
   116  		fd.lowerWaiters = nil
   117  	} else if fd.cachedFlags != statusFlags {
   118  		if err := fd.cachedFD.SetStatusFlags(ctx, d.fs.creds, statusFlags); err != nil {
   119  			return nil, err
   120  		}
   121  		fd.cachedFlags = statusFlags
   122  	}
   123  	return fd.cachedFD, nil
   124  }
   125  
   126  // Release implements vfs.FileDescriptionImpl.Release.
   127  func (fd *regularFileFD) Release(ctx context.Context) {
   128  	fd.cachedFD.DecRef(ctx)
   129  	fd.cachedFD = nil
   130  }
   131  
   132  // OnClose implements vfs.FileDescriptionImpl.OnClose.
   133  func (fd *regularFileFD) OnClose(ctx context.Context) error {
   134  	// Linux doesn't define ovl_file_operations.flush at all (i.e. its
   135  	// equivalent to OnClose is a no-op). We pass through to
   136  	// fd.cachedFD.OnClose() without upgrading if fd.dentry() has been
   137  	// copied-up, since OnClose is mostly used to define post-close writeback,
   138  	// and if fd.cachedFD hasn't been updated then it can't have been used to
   139  	// mutate fd.dentry() anyway.
   140  	fd.mu.Lock()
   141  	if statusFlags := fd.vfsfd.StatusFlags(); fd.cachedFlags != statusFlags {
   142  		if err := fd.cachedFD.SetStatusFlags(ctx, fd.filesystem().creds, statusFlags); err != nil {
   143  			fd.mu.Unlock()
   144  			return err
   145  		}
   146  		fd.cachedFlags = statusFlags
   147  	}
   148  	wrappedFD := fd.cachedFD
   149  	fd.mu.Unlock()
   150  	return wrappedFD.OnClose(ctx)
   151  }
   152  
   153  // Stat implements vfs.FileDescriptionImpl.Stat.
   154  func (fd *regularFileFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
   155  	var stat linux.Statx
   156  	if layerMask := opts.Mask &^ statInternalMask; layerMask != 0 {
   157  		wrappedFD, err := fd.getCurrentFD(ctx)
   158  		if err != nil {
   159  			return linux.Statx{}, err
   160  		}
   161  		stat, err = wrappedFD.Stat(ctx, vfs.StatOptions{
   162  			Mask: layerMask,
   163  			Sync: opts.Sync,
   164  		})
   165  		wrappedFD.DecRef(ctx)
   166  		if err != nil {
   167  			return linux.Statx{}, err
   168  		}
   169  	}
   170  	fd.dentry().statInternalTo(ctx, &opts, &stat)
   171  	return stat, nil
   172  }
   173  
   174  // Allocate implements vfs.FileDescriptionImpl.Allocate.
   175  func (fd *regularFileFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
   176  	wrappedFD, err := fd.getCurrentFD(ctx)
   177  	if err != nil {
   178  		return err
   179  	}
   180  	defer wrappedFD.DecRef(ctx)
   181  	return wrappedFD.Allocate(ctx, mode, offset, length)
   182  }
   183  
   184  // SetStat implements vfs.FileDescriptionImpl.SetStat.
   185  func (fd *regularFileFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
   186  	d := fd.dentry()
   187  	mode := linux.FileMode(atomic.LoadUint32(&d.mode))
   188  	if err := vfs.CheckSetStat(ctx, auth.CredentialsFromContext(ctx), &opts, mode, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil {
   189  		return err
   190  	}
   191  	mnt := fd.vfsfd.Mount()
   192  	if err := mnt.CheckBeginWrite(); err != nil {
   193  		return err
   194  	}
   195  	defer mnt.EndWrite()
   196  	if err := d.copyUpLocked(ctx); err != nil {
   197  		return err
   198  	}
   199  	// Changes to d's attributes are serialized by d.copyMu.
   200  	d.copyMu.Lock()
   201  	defer d.copyMu.Unlock()
   202  	wrappedFD, err := fd.currentFDLocked(ctx)
   203  	if err != nil {
   204  		return err
   205  	}
   206  	if err := wrappedFD.SetStat(ctx, opts); err != nil {
   207  		return err
   208  	}
   209  
   210  	// Changing owners or truncating may clear one or both of the setuid and
   211  	// setgid bits, so we may have to update opts before setting d.mode.
   212  	inotifyMask := opts.Stat.Mask
   213  	if opts.Stat.Mask&(linux.STATX_UID|linux.STATX_GID|linux.STATX_SIZE) != 0 {
   214  		stat, err := wrappedFD.Stat(ctx, vfs.StatOptions{
   215  			Mask: linux.STATX_MODE,
   216  		})
   217  		if err != nil {
   218  			return err
   219  		}
   220  		opts.Stat.Mode = stat.Mode
   221  		opts.Stat.Mask |= linux.STATX_MODE
   222  		// Don't generate inotify IN_ATTRIB for size-only changes (truncations).
   223  		if opts.Stat.Mask&(linux.STATX_UID|linux.STATX_GID) != 0 {
   224  			inotifyMask |= linux.STATX_MODE
   225  		}
   226  	}
   227  
   228  	d.updateAfterSetStatLocked(&opts)
   229  	if ev := vfs.InotifyEventFromStatMask(inotifyMask); ev != 0 {
   230  		d.InotifyWithParent(ctx, ev, 0, vfs.InodeEvent)
   231  	}
   232  	return nil
   233  }
   234  
   235  // StatFS implements vfs.FileDescriptionImpl.StatFS.
   236  func (fd *regularFileFD) StatFS(ctx context.Context) (linux.Statfs, error) {
   237  	return fd.filesystem().statFS(ctx)
   238  }
   239  
   240  // Readiness implements waiter.Waitable.Readiness.
   241  func (fd *regularFileFD) Readiness(mask waiter.EventMask) waiter.EventMask {
   242  	ctx := context.Background()
   243  	wrappedFD, err := fd.getCurrentFD(ctx)
   244  	if err != nil {
   245  		// TODO(b/171089913): Just use fd.cachedFD since Readiness can't return
   246  		// an error. This is obviously wrong, but at least consistent with
   247  		// VFS1.
   248  		log.Warningf("overlay.regularFileFD.Readiness: currentFDLocked failed: %v", err)
   249  		fd.mu.Lock()
   250  		wrappedFD = fd.cachedFD
   251  		wrappedFD.IncRef()
   252  		fd.mu.Unlock()
   253  	}
   254  	defer wrappedFD.DecRef(ctx)
   255  	return wrappedFD.Readiness(mask)
   256  }
   257  
   258  // EventRegister implements waiter.Waitable.EventRegister.
   259  func (fd *regularFileFD) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
   260  	fd.mu.Lock()
   261  	defer fd.mu.Unlock()
   262  	wrappedFD, err := fd.currentFDLocked(context.Background())
   263  	if err != nil {
   264  		// TODO(b/171089913): Just use fd.cachedFD since EventRegister can't
   265  		// return an error. This is obviously wrong, but at least consistent
   266  		// with VFS1.
   267  		log.Warningf("overlay.regularFileFD.EventRegister: currentFDLocked failed: %v", err)
   268  		wrappedFD = fd.cachedFD
   269  	}
   270  	wrappedFD.EventRegister(e, mask)
   271  	if !fd.copiedUp {
   272  		if fd.lowerWaiters == nil {
   273  			fd.lowerWaiters = make(map[*waiter.Entry]waiter.EventMask)
   274  		}
   275  		fd.lowerWaiters[e] = mask
   276  	}
   277  }
   278  
   279  // EventUnregister implements waiter.Waitable.EventUnregister.
   280  func (fd *regularFileFD) EventUnregister(e *waiter.Entry) {
   281  	fd.mu.Lock()
   282  	defer fd.mu.Unlock()
   283  	fd.cachedFD.EventUnregister(e)
   284  	if !fd.copiedUp {
   285  		delete(fd.lowerWaiters, e)
   286  	}
   287  }
   288  
   289  // PRead implements vfs.FileDescriptionImpl.PRead.
   290  func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
   291  	wrappedFD, err := fd.getCurrentFD(ctx)
   292  	if err != nil {
   293  		return 0, err
   294  	}
   295  	defer wrappedFD.DecRef(ctx)
   296  	return wrappedFD.PRead(ctx, dst, offset, opts)
   297  }
   298  
   299  // Read implements vfs.FileDescriptionImpl.Read.
   300  func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
   301  	// Hold fd.mu during the read to serialize the file offset.
   302  	fd.mu.Lock()
   303  	defer fd.mu.Unlock()
   304  	wrappedFD, err := fd.currentFDLocked(ctx)
   305  	if err != nil {
   306  		return 0, err
   307  	}
   308  	return wrappedFD.Read(ctx, dst, opts)
   309  }
   310  
   311  // PWrite implements vfs.FileDescriptionImpl.PWrite.
   312  func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
   313  	wrappedFD, err := fd.getCurrentFD(ctx)
   314  	if err != nil {
   315  		return 0, err
   316  	}
   317  	defer wrappedFD.DecRef(ctx)
   318  	n, err := wrappedFD.PWrite(ctx, src, offset, opts)
   319  	if err != nil {
   320  		return n, err
   321  	}
   322  	return fd.updateSetUserGroupIDs(ctx, wrappedFD, n)
   323  }
   324  
   325  // Write implements vfs.FileDescriptionImpl.Write.
   326  func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
   327  	// Hold fd.mu during the write to serialize the file offset.
   328  	fd.mu.Lock()
   329  	defer fd.mu.Unlock()
   330  	wrappedFD, err := fd.currentFDLocked(ctx)
   331  	if err != nil {
   332  		return 0, err
   333  	}
   334  	n, err := wrappedFD.Write(ctx, src, opts)
   335  	if err != nil {
   336  		return n, err
   337  	}
   338  	return fd.updateSetUserGroupIDs(ctx, wrappedFD, n)
   339  }
   340  
   341  func (fd *regularFileFD) updateSetUserGroupIDs(ctx context.Context, wrappedFD *vfs.FileDescription, written int64) (int64, error) {
   342  	// Writing can clear the setuid and/or setgid bits. We only have to
   343  	// check this if something was written and one of those bits was set.
   344  	dentry := fd.dentry()
   345  	if written == 0 || atomic.LoadUint32(&dentry.mode)&(linux.S_ISUID|linux.S_ISGID) == 0 {
   346  		return written, nil
   347  	}
   348  	stat, err := wrappedFD.Stat(ctx, vfs.StatOptions{Mask: linux.STATX_MODE})
   349  	if err != nil {
   350  		return written, err
   351  	}
   352  	dentry.copyMu.Lock()
   353  	defer dentry.copyMu.Unlock()
   354  	atomic.StoreUint32(&dentry.mode, uint32(stat.Mode))
   355  	return written, nil
   356  }
   357  
   358  // Seek implements vfs.FileDescriptionImpl.Seek.
   359  func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
   360  	// Hold fd.mu during the seek to serialize the file offset.
   361  	fd.mu.Lock()
   362  	defer fd.mu.Unlock()
   363  	wrappedFD, err := fd.currentFDLocked(ctx)
   364  	if err != nil {
   365  		return 0, err
   366  	}
   367  	return wrappedFD.Seek(ctx, offset, whence)
   368  }
   369  
   370  // Sync implements vfs.FileDescriptionImpl.Sync.
   371  func (fd *regularFileFD) Sync(ctx context.Context) error {
   372  	fd.mu.Lock()
   373  	if !fd.dentry().isCopiedUp() {
   374  		fd.mu.Unlock()
   375  		return nil
   376  	}
   377  	wrappedFD, err := fd.currentFDLocked(ctx)
   378  	if err != nil {
   379  		fd.mu.Unlock()
   380  		return err
   381  	}
   382  	wrappedFD.IncRef()
   383  	defer wrappedFD.DecRef(ctx)
   384  	fd.mu.Unlock()
   385  	return wrappedFD.Sync(ctx)
   386  }
   387  
   388  // Ioctl implements vfs.FileDescriptionImpl.Ioctl.
   389  func (fd *regularFileFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) {
   390  	wrappedFD, err := fd.getCurrentFD(ctx)
   391  	if err != nil {
   392  		return 0, err
   393  	}
   394  	defer wrappedFD.DecRef(ctx)
   395  	return wrappedFD.Ioctl(ctx, uio, args)
   396  }
   397  
   398  // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
   399  func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
   400  	if err := fd.ensureMappable(ctx, opts); err != nil {
   401  		return err
   402  	}
   403  	return vfs.GenericConfigureMMap(&fd.vfsfd, fd.dentry(), opts)
   404  }
   405  
   406  // ensureMappable ensures that fd.dentry().wrappedMappable is not nil.
   407  func (fd *regularFileFD) ensureMappable(ctx context.Context, opts *memmap.MMapOpts) error {
   408  	d := fd.dentry()
   409  
   410  	// Fast path if we already have a Mappable for the current top layer.
   411  	if atomic.LoadUint32(&d.isMappable) != 0 {
   412  		return nil
   413  	}
   414  
   415  	// Only permit mmap of regular files, since other file types may have
   416  	// unpredictable behavior when mmapped (e.g. /dev/zero).
   417  	if atomic.LoadUint32(&d.mode)&linux.S_IFMT != linux.S_IFREG {
   418  		return linuxerr.ENODEV
   419  	}
   420  
   421  	// Get a Mappable for the current top layer.
   422  	fd.mu.Lock()
   423  	defer fd.mu.Unlock()
   424  	d.copyMu.RLock()
   425  	defer d.copyMu.RUnlock()
   426  	if atomic.LoadUint32(&d.isMappable) != 0 {
   427  		return nil
   428  	}
   429  	wrappedFD, err := fd.currentFDLocked(ctx)
   430  	if err != nil {
   431  		return err
   432  	}
   433  	if err := wrappedFD.ConfigureMMap(ctx, opts); err != nil {
   434  		return err
   435  	}
   436  	if opts.MappingIdentity != nil {
   437  		opts.MappingIdentity.DecRef(ctx)
   438  		opts.MappingIdentity = nil
   439  	}
   440  	// Use this Mappable for all mappings of this layer (unless we raced with
   441  	// another call to ensureMappable).
   442  	d.mapsMu.Lock()
   443  	defer d.mapsMu.Unlock()
   444  	d.dataMu.Lock()
   445  	defer d.dataMu.Unlock()
   446  	if d.wrappedMappable == nil {
   447  		d.wrappedMappable = opts.Mappable
   448  		atomic.StoreUint32(&d.isMappable, 1)
   449  	}
   450  	return nil
   451  }
   452  
   453  // AddMapping implements memmap.Mappable.AddMapping.
   454  func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error {
   455  	d.mapsMu.Lock()
   456  	defer d.mapsMu.Unlock()
   457  	if err := d.wrappedMappable.AddMapping(ctx, ms, ar, offset, writable); err != nil {
   458  		return err
   459  	}
   460  	if !d.isCopiedUp() {
   461  		d.lowerMappings.AddMapping(ms, ar, offset, writable)
   462  	}
   463  	return nil
   464  }
   465  
   466  // RemoveMapping implements memmap.Mappable.RemoveMapping.
   467  func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) {
   468  	d.mapsMu.Lock()
   469  	defer d.mapsMu.Unlock()
   470  	d.wrappedMappable.RemoveMapping(ctx, ms, ar, offset, writable)
   471  	if !d.isCopiedUp() {
   472  		d.lowerMappings.RemoveMapping(ms, ar, offset, writable)
   473  	}
   474  }
   475  
   476  // CopyMapping implements memmap.Mappable.CopyMapping.
   477  func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error {
   478  	d.mapsMu.Lock()
   479  	defer d.mapsMu.Unlock()
   480  	if err := d.wrappedMappable.CopyMapping(ctx, ms, srcAR, dstAR, offset, writable); err != nil {
   481  		return err
   482  	}
   483  	if !d.isCopiedUp() {
   484  		d.lowerMappings.AddMapping(ms, dstAR, offset, writable)
   485  	}
   486  	return nil
   487  }
   488  
   489  // Translate implements memmap.Mappable.Translate.
   490  func (d *dentry) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
   491  	d.dataMu.RLock()
   492  	defer d.dataMu.RUnlock()
   493  	return d.wrappedMappable.Translate(ctx, required, optional, at)
   494  }
   495  
   496  // InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
   497  func (d *dentry) InvalidateUnsavable(ctx context.Context) error {
   498  	d.mapsMu.Lock()
   499  	defer d.mapsMu.Unlock()
   500  	return d.wrappedMappable.InvalidateUnsavable(ctx)
   501  }