github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/fsimpl/overlay/regular_file.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package overlay
    16  
    17  import (
    18  	"github.com/MerlinKodo/gvisor/pkg/abi/linux"
    19  	"github.com/MerlinKodo/gvisor/pkg/context"
    20  	"github.com/MerlinKodo/gvisor/pkg/errors/linuxerr"
    21  	"github.com/MerlinKodo/gvisor/pkg/hostarch"
    22  	"github.com/MerlinKodo/gvisor/pkg/log"
    23  	"github.com/MerlinKodo/gvisor/pkg/sentry/arch"
    24  	"github.com/MerlinKodo/gvisor/pkg/sentry/kernel/auth"
    25  	"github.com/MerlinKodo/gvisor/pkg/sentry/memmap"
    26  	"github.com/MerlinKodo/gvisor/pkg/sentry/vfs"
    27  	"github.com/MerlinKodo/gvisor/pkg/usermem"
    28  	"github.com/MerlinKodo/gvisor/pkg/waiter"
    29  )
    30  
    31  func (d *dentry) isRegularFile() bool {
    32  	return d.mode.Load()&linux.S_IFMT == linux.S_IFREG
    33  }
    34  
    35  func (d *dentry) isSymlink() bool {
    36  	return d.mode.Load()&linux.S_IFMT == linux.S_IFLNK
    37  }
    38  
    39  func (d *dentry) readlink(ctx context.Context) (string, error) {
    40  	layerVD := d.topLayer()
    41  	return d.fs.vfsfs.VirtualFilesystem().ReadlinkAt(ctx, d.fs.creds, &vfs.PathOperation{
    42  		Root:  layerVD,
    43  		Start: layerVD,
    44  	})
    45  }
    46  
    47  // +stateify savable
    48  type regularFileFD struct {
    49  	fileDescription
    50  
    51  	// If copiedUp is false, cachedFD represents
    52  	// fileDescription.dentry().lowerVDs[0]; otherwise, cachedFD represents
    53  	// fileDescription.dentry().upperVD. cachedFlags is the last known value of
    54  	// cachedFD.StatusFlags(). copiedUp, cachedFD, and cachedFlags are
    55  	// protected by mu.
    56  	mu          regularFileFDMutex `state:"nosave"`
    57  	copiedUp    bool
    58  	cachedFD    *vfs.FileDescription
    59  	cachedFlags uint32
    60  }
    61  
    62  func (fd *regularFileFD) getCurrentFD(ctx context.Context) (*vfs.FileDescription, error) {
    63  	fd.mu.Lock()
    64  	defer fd.mu.Unlock()
    65  	wrappedFD, err := fd.currentFDLocked(ctx)
    66  	if err != nil {
    67  		return nil, err
    68  	}
    69  	wrappedFD.IncRef()
    70  	return wrappedFD, nil
    71  }
    72  
    73  func (fd *regularFileFD) currentFDLocked(ctx context.Context) (*vfs.FileDescription, error) {
    74  	d := fd.dentry()
    75  	statusFlags := fd.vfsfd.StatusFlags()
    76  	if !fd.copiedUp && d.isCopiedUp() {
    77  		// Switch to the copied-up file.
    78  		upperVD := d.topLayer()
    79  		upperFD, err := fd.filesystem().vfsfs.VirtualFilesystem().OpenAt(ctx, d.fs.creds, &vfs.PathOperation{
    80  			Root:  upperVD,
    81  			Start: upperVD,
    82  		}, &vfs.OpenOptions{
    83  			Flags: statusFlags,
    84  		})
    85  		if err != nil {
    86  			return nil, err
    87  		}
    88  		oldOff, oldOffErr := fd.cachedFD.Seek(ctx, 0, linux.SEEK_CUR)
    89  		if oldOffErr == nil {
    90  			if _, err := upperFD.Seek(ctx, oldOff, linux.SEEK_SET); err != nil {
    91  				upperFD.DecRef(ctx)
    92  				return nil, err
    93  			}
    94  		}
    95  		fd.cachedFD.DecRef(ctx)
    96  		fd.copiedUp = true
    97  		fd.cachedFD = upperFD
    98  		fd.cachedFlags = statusFlags
    99  	} else if fd.cachedFlags != statusFlags {
   100  		if err := fd.cachedFD.SetStatusFlags(ctx, d.fs.creds, statusFlags); err != nil {
   101  			return nil, err
   102  		}
   103  		fd.cachedFlags = statusFlags
   104  	}
   105  	return fd.cachedFD, nil
   106  }
   107  
   108  // Release implements vfs.FileDescriptionImpl.Release.
   109  func (fd *regularFileFD) Release(ctx context.Context) {
   110  	fd.cachedFD.DecRef(ctx)
   111  	fd.cachedFD = nil
   112  }
   113  
   114  // OnClose implements vfs.FileDescriptionImpl.OnClose.
   115  func (fd *regularFileFD) OnClose(ctx context.Context) error {
   116  	// Linux doesn't define ovl_file_operations.flush at all (i.e. its
   117  	// equivalent to OnClose is a no-op). We pass through to
   118  	// fd.cachedFD.OnClose() without upgrading if fd.dentry() has been
   119  	// copied-up, since OnClose is mostly used to define post-close writeback,
   120  	// and if fd.cachedFD hasn't been updated then it can't have been used to
   121  	// mutate fd.dentry() anyway.
   122  	fd.mu.Lock()
   123  	if statusFlags := fd.vfsfd.StatusFlags(); fd.cachedFlags != statusFlags {
   124  		if err := fd.cachedFD.SetStatusFlags(ctx, fd.filesystem().creds, statusFlags); err != nil {
   125  			fd.mu.Unlock()
   126  			return err
   127  		}
   128  		fd.cachedFlags = statusFlags
   129  	}
   130  	wrappedFD := fd.cachedFD
   131  	fd.mu.Unlock()
   132  	return wrappedFD.OnClose(ctx)
   133  }
   134  
   135  // Stat implements vfs.FileDescriptionImpl.Stat.
   136  func (fd *regularFileFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
   137  	var stat linux.Statx
   138  	if layerMask := opts.Mask &^ statInternalMask; layerMask != 0 {
   139  		wrappedFD, err := fd.getCurrentFD(ctx)
   140  		if err != nil {
   141  			return linux.Statx{}, err
   142  		}
   143  		stat, err = wrappedFD.Stat(ctx, vfs.StatOptions{
   144  			Mask: layerMask,
   145  			Sync: opts.Sync,
   146  		})
   147  		wrappedFD.DecRef(ctx)
   148  		if err != nil {
   149  			return linux.Statx{}, err
   150  		}
   151  	}
   152  	fd.dentry().statInternalTo(ctx, &opts, &stat)
   153  	return stat, nil
   154  }
   155  
   156  // Allocate implements vfs.FileDescriptionImpl.Allocate.
   157  func (fd *regularFileFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
   158  	wrappedFD, err := fd.getCurrentFD(ctx)
   159  	if err != nil {
   160  		return err
   161  	}
   162  	defer wrappedFD.DecRef(ctx)
   163  	return wrappedFD.Allocate(ctx, mode, offset, length)
   164  }
   165  
   166  // SetStat implements vfs.FileDescriptionImpl.SetStat.
   167  func (fd *regularFileFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
   168  	d := fd.dentry()
   169  	mode := linux.FileMode(d.mode.Load())
   170  	if err := vfs.CheckSetStat(ctx, auth.CredentialsFromContext(ctx), &opts, mode, auth.KUID(d.uid.Load()), auth.KGID(d.gid.Load())); err != nil {
   171  		return err
   172  	}
   173  	mnt := fd.vfsfd.Mount()
   174  	if err := mnt.CheckBeginWrite(); err != nil {
   175  		return err
   176  	}
   177  	defer mnt.EndWrite()
   178  	if err := d.copyUpLocked(ctx); err != nil {
   179  		return err
   180  	}
   181  	// Changes to d's attributes are serialized by d.copyMu.
   182  	d.copyMu.Lock()
   183  	defer d.copyMu.Unlock()
   184  	wrappedFD, err := fd.currentFDLocked(ctx)
   185  	if err != nil {
   186  		return err
   187  	}
   188  	if err := wrappedFD.SetStat(ctx, opts); err != nil {
   189  		return err
   190  	}
   191  
   192  	// Changing owners or truncating may clear one or both of the setuid and
   193  	// setgid bits, so we may have to update opts before setting d.mode.
   194  	if opts.Stat.Mask&(linux.STATX_UID|linux.STATX_GID|linux.STATX_SIZE) != 0 {
   195  		stat, err := wrappedFD.Stat(ctx, vfs.StatOptions{
   196  			Mask: linux.STATX_MODE,
   197  		})
   198  		if err != nil {
   199  			return err
   200  		}
   201  		opts.Stat.Mode = stat.Mode
   202  		opts.Stat.Mask |= linux.STATX_MODE
   203  	}
   204  
   205  	d.updateAfterSetStatLocked(&opts)
   206  	return nil
   207  }
   208  
   209  // StatFS implements vfs.FileDescriptionImpl.StatFS.
   210  func (fd *regularFileFD) StatFS(ctx context.Context) (linux.Statfs, error) {
   211  	return fd.filesystem().statFS(ctx)
   212  }
   213  
   214  // Readiness implements waiter.Waitable.Readiness.
   215  func (fd *regularFileFD) Readiness(mask waiter.EventMask) waiter.EventMask {
   216  	ctx := context.Background()
   217  	wrappedFD, err := fd.getCurrentFD(ctx)
   218  	if err != nil {
   219  		// TODO(b/171089913): Just use fd.cachedFD since Readiness can't return
   220  		// an error. This is obviously wrong, but at least consistent with
   221  		// VFS1.
   222  		log.Warningf("overlay.regularFileFD.Readiness: currentFDLocked failed: %v", err)
   223  		fd.mu.Lock()
   224  		wrappedFD = fd.cachedFD
   225  		wrappedFD.IncRef()
   226  		fd.mu.Unlock()
   227  	}
   228  	defer wrappedFD.DecRef(ctx)
   229  	return wrappedFD.Readiness(mask)
   230  }
   231  
   232  // EventRegister implements waiter.Waitable.EventRegister.
   233  func (fd *regularFileFD) EventRegister(e *waiter.Entry) error {
   234  	fd.mu.Lock()
   235  	defer fd.mu.Unlock()
   236  	wrappedFD, err := fd.currentFDLocked(context.Background())
   237  	if err != nil {
   238  		// TODO(b/171089913): Just use fd.cachedFD for backward compatibility
   239  		// with VFS1.
   240  		log.Warningf("overlay.regularFileFD.EventRegister: currentFDLocked failed: %v", err)
   241  		wrappedFD = fd.cachedFD
   242  	}
   243  	return wrappedFD.EventRegister(e)
   244  }
   245  
   246  // EventUnregister implements waiter.Waitable.EventUnregister.
   247  func (fd *regularFileFD) EventUnregister(e *waiter.Entry) {
   248  	fd.mu.Lock()
   249  	defer fd.mu.Unlock()
   250  	fd.cachedFD.EventUnregister(e)
   251  }
   252  
   253  // Epollable implements FileDescriptionImpl.Epollable.
   254  func (fd *regularFileFD) Epollable() bool {
   255  	fd.mu.Lock()
   256  	defer fd.mu.Unlock()
   257  	wrappedFD, err := fd.currentFDLocked(context.Background())
   258  	if err != nil {
   259  		// TODO(b/171089913): Just use fd.cachedFD since EventRegister can't
   260  		// return an error. This is obviously wrong, but at least consistent
   261  		// with VFS1.
   262  		log.Warningf("overlay.regularFileFD.Epollable: currentFDLocked failed: %v", err)
   263  		wrappedFD = fd.cachedFD
   264  	}
   265  	return wrappedFD.Epollable()
   266  }
   267  
   268  // PRead implements vfs.FileDescriptionImpl.PRead.
   269  func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
   270  	wrappedFD, err := fd.getCurrentFD(ctx)
   271  	if err != nil {
   272  		return 0, err
   273  	}
   274  	defer wrappedFD.DecRef(ctx)
   275  	return wrappedFD.PRead(ctx, dst, offset, opts)
   276  }
   277  
   278  // Read implements vfs.FileDescriptionImpl.Read.
   279  func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
   280  	// Hold fd.mu during the read to serialize the file offset.
   281  	fd.mu.Lock()
   282  	defer fd.mu.Unlock()
   283  	wrappedFD, err := fd.currentFDLocked(ctx)
   284  	if err != nil {
   285  		return 0, err
   286  	}
   287  	return wrappedFD.Read(ctx, dst, opts)
   288  }
   289  
   290  // PWrite implements vfs.FileDescriptionImpl.PWrite.
   291  func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
   292  	wrappedFD, err := fd.getCurrentFD(ctx)
   293  	if err != nil {
   294  		return 0, err
   295  	}
   296  	defer wrappedFD.DecRef(ctx)
   297  	n, err := wrappedFD.PWrite(ctx, src, offset, opts)
   298  	if err != nil {
   299  		return n, err
   300  	}
   301  	return fd.updateSetUserGroupIDs(ctx, wrappedFD, n)
   302  }
   303  
   304  // Write implements vfs.FileDescriptionImpl.Write.
   305  func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
   306  	// Hold fd.mu during the write to serialize the file offset.
   307  	fd.mu.Lock()
   308  	defer fd.mu.Unlock()
   309  	wrappedFD, err := fd.currentFDLocked(ctx)
   310  	if err != nil {
   311  		return 0, err
   312  	}
   313  	n, err := wrappedFD.Write(ctx, src, opts)
   314  	if err != nil {
   315  		return n, err
   316  	}
   317  	return fd.updateSetUserGroupIDs(ctx, wrappedFD, n)
   318  }
   319  
   320  func (fd *regularFileFD) updateSetUserGroupIDs(ctx context.Context, wrappedFD *vfs.FileDescription, written int64) (int64, error) {
   321  	// Writing can clear the setuid and/or setgid bits. We only have to
   322  	// check this if something was written and one of those bits was set.
   323  	dentry := fd.dentry()
   324  	if written == 0 || dentry.mode.Load()&(linux.S_ISUID|linux.S_ISGID) == 0 {
   325  		return written, nil
   326  	}
   327  	stat, err := wrappedFD.Stat(ctx, vfs.StatOptions{Mask: linux.STATX_MODE})
   328  	if err != nil {
   329  		return written, err
   330  	}
   331  	dentry.copyMu.Lock()
   332  	defer dentry.copyMu.Unlock()
   333  	dentry.mode.Store(uint32(stat.Mode))
   334  	return written, nil
   335  }
   336  
   337  // Seek implements vfs.FileDescriptionImpl.Seek.
   338  func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
   339  	// Hold fd.mu during the seek to serialize the file offset.
   340  	fd.mu.Lock()
   341  	defer fd.mu.Unlock()
   342  	wrappedFD, err := fd.currentFDLocked(ctx)
   343  	if err != nil {
   344  		return 0, err
   345  	}
   346  	return wrappedFD.Seek(ctx, offset, whence)
   347  }
   348  
   349  // Sync implements vfs.FileDescriptionImpl.Sync.
   350  func (fd *regularFileFD) Sync(ctx context.Context) error {
   351  	fd.mu.Lock()
   352  	if !fd.dentry().isCopiedUp() {
   353  		fd.mu.Unlock()
   354  		return nil
   355  	}
   356  	wrappedFD, err := fd.currentFDLocked(ctx)
   357  	if err != nil {
   358  		fd.mu.Unlock()
   359  		return err
   360  	}
   361  	wrappedFD.IncRef()
   362  	defer wrappedFD.DecRef(ctx)
   363  	fd.mu.Unlock()
   364  	return wrappedFD.Sync(ctx)
   365  }
   366  
   367  // Ioctl implements vfs.FileDescriptionImpl.Ioctl.
   368  func (fd *regularFileFD) Ioctl(ctx context.Context, uio usermem.IO, sysno uintptr, args arch.SyscallArguments) (uintptr, error) {
   369  	wrappedFD, err := fd.getCurrentFD(ctx)
   370  	if err != nil {
   371  		return 0, err
   372  	}
   373  	defer wrappedFD.DecRef(ctx)
   374  	return wrappedFD.Ioctl(ctx, uio, sysno, args)
   375  }
   376  
   377  // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
   378  func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
   379  	if err := fd.ensureMappable(ctx, opts); err != nil {
   380  		return err
   381  	}
   382  	return vfs.GenericConfigureMMap(&fd.vfsfd, fd.dentry(), opts)
   383  }
   384  
   385  // ensureMappable ensures that fd.dentry().wrappedMappable is not nil.
   386  func (fd *regularFileFD) ensureMappable(ctx context.Context, opts *memmap.MMapOpts) error {
   387  	d := fd.dentry()
   388  
   389  	// Fast path if we already have a Mappable for the current top layer.
   390  	if d.isMappable.Load() != 0 {
   391  		return nil
   392  	}
   393  
   394  	// Only permit mmap of regular files, since other file types may have
   395  	// unpredictable behavior when mmapped (e.g. /dev/zero).
   396  	if d.mode.Load()&linux.S_IFMT != linux.S_IFREG {
   397  		return linuxerr.ENODEV
   398  	}
   399  
   400  	// Get a Mappable for the current top layer.
   401  	fd.mu.Lock()
   402  	defer fd.mu.Unlock()
   403  	d.copyMu.RLock()
   404  	defer d.copyMu.RUnlock()
   405  	if d.isMappable.Load() != 0 {
   406  		return nil
   407  	}
   408  	wrappedFD, err := fd.currentFDLocked(ctx)
   409  	if err != nil {
   410  		return err
   411  	}
   412  	if err := wrappedFD.ConfigureMMap(ctx, opts); err != nil {
   413  		return err
   414  	}
   415  	if opts.MappingIdentity != nil {
   416  		opts.MappingIdentity.DecRef(ctx)
   417  		opts.MappingIdentity = nil
   418  	}
   419  	// Use this Mappable for all mappings of this layer (unless we raced with
   420  	// another call to ensureMappable).
   421  	d.mapsMu.Lock()
   422  	defer d.mapsMu.Unlock()
   423  	d.dataMu.Lock()
   424  	defer d.dataMu.Unlock()
   425  	if d.wrappedMappable == nil {
   426  		d.wrappedMappable = opts.Mappable
   427  		d.isMappable.Store(1)
   428  	}
   429  	return nil
   430  }
   431  
   432  // AddMapping implements memmap.Mappable.AddMapping.
   433  func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error {
   434  	d.mapsMu.Lock()
   435  	defer d.mapsMu.Unlock()
   436  	if err := d.wrappedMappable.AddMapping(ctx, ms, ar, offset, writable); err != nil {
   437  		return err
   438  	}
   439  	if !d.isCopiedUp() {
   440  		d.lowerMappings.AddMapping(ms, ar, offset, writable)
   441  	}
   442  	return nil
   443  }
   444  
   445  // RemoveMapping implements memmap.Mappable.RemoveMapping.
   446  func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) {
   447  	d.mapsMu.Lock()
   448  	defer d.mapsMu.Unlock()
   449  	d.wrappedMappable.RemoveMapping(ctx, ms, ar, offset, writable)
   450  	if !d.isCopiedUp() {
   451  		d.lowerMappings.RemoveMapping(ms, ar, offset, writable)
   452  	}
   453  }
   454  
   455  // CopyMapping implements memmap.Mappable.CopyMapping.
   456  func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error {
   457  	d.mapsMu.Lock()
   458  	defer d.mapsMu.Unlock()
   459  	if err := d.wrappedMappable.CopyMapping(ctx, ms, srcAR, dstAR, offset, writable); err != nil {
   460  		return err
   461  	}
   462  	if !d.isCopiedUp() {
   463  		d.lowerMappings.AddMapping(ms, dstAR, offset, writable)
   464  	}
   465  	return nil
   466  }
   467  
   468  // Translate implements memmap.Mappable.Translate.
   469  func (d *dentry) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
   470  	d.dataMu.RLock()
   471  	defer d.dataMu.RUnlock()
   472  	return d.wrappedMappable.Translate(ctx, required, optional, at)
   473  }
   474  
   475  // InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
   476  func (d *dentry) InvalidateUnsavable(ctx context.Context) error {
   477  	d.mapsMu.Lock()
   478  	defer d.mapsMu.Unlock()
   479  	return d.wrappedMappable.InvalidateUnsavable(ctx)
   480  }