github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/fsimpl/fuse/regular_file.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fuse
    16  
    17  import (
    18  	"io"
    19  	"math"
    20  	"sync"
    21  
    22  	"github.com/metacubex/gvisor/pkg/abi/linux"
    23  	"github.com/metacubex/gvisor/pkg/context"
    24  	"github.com/metacubex/gvisor/pkg/errors/linuxerr"
    25  	"github.com/metacubex/gvisor/pkg/hostarch"
    26  	"github.com/metacubex/gvisor/pkg/sentry/fsutil"
    27  	"github.com/metacubex/gvisor/pkg/sentry/kernel/auth"
    28  	"github.com/metacubex/gvisor/pkg/sentry/memmap"
    29  	"github.com/metacubex/gvisor/pkg/sentry/vfs"
    30  	"github.com/metacubex/gvisor/pkg/usermem"
    31  )
    32  
    33  // +stateify savable
    34  type regularFileFD struct {
    35  	fileDescription
    36  
    37  	// offMu protects off.
    38  	offMu sync.Mutex `state:"nosave"`
    39  
    40  	// off is the file offset.
    41  	// +checklocks:offMu
    42  	off int64
    43  
    44  	// mapsMu protects mappings.
    45  	mapsMu sync.Mutex `state:"nosave"`
    46  
    47  	// mappings tracks mappings of the file into memmap.MappingSpaces.
    48  	//
    49  	// Protected by mapsMu.
    50  	mappings memmap.MappingSet
    51  
    52  	// dataMu protects the fields below.
    53  	dataMu sync.RWMutex `state:"nosave"`
    54  
    55  	// data maps offsets into the file to offsets into memFile that store
    56  	// the file's data.
    57  	//
    58  	// Protected by dataMu.
    59  	data fsutil.FileRangeSet
    60  }
    61  
    62  // Seek implements vfs.FileDescriptionImpl.Allocate.
    63  func (fd *regularFileFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
    64  	if mode & ^uint64(linux.FALLOC_FL_KEEP_SIZE|linux.FALLOC_FL_PUNCH_HOLE|linux.FALLOC_FL_ZERO_RANGE) != 0 {
    65  		return linuxerr.EOPNOTSUPP
    66  	}
    67  	in := linux.FUSEFallocateIn{
    68  		Fh:     fd.Fh,
    69  		Offset: uint64(offset),
    70  		Length: uint64(length),
    71  		Mode:   uint32(mode),
    72  	}
    73  	i := fd.inode()
    74  	req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), i.nodeID, linux.FUSE_FALLOCATE, &in)
    75  	res, err := i.fs.conn.Call(ctx, req)
    76  	if err != nil {
    77  		return err
    78  	}
    79  	if err := res.Error(); err != nil {
    80  		return err
    81  	}
    82  	i.attrMu.Lock()
    83  	defer i.attrMu.Unlock()
    84  	if uint64(offset+length) > i.size.Load() {
    85  		if err := i.reviseAttr(ctx, linux.FUSE_GETATTR_FH, fd.Fh); err != nil {
    86  			return err
    87  		}
    88  		// If the offset after update is still too large, return error.
    89  		if uint64(offset) >= i.size.Load() {
    90  			return io.EOF
    91  		}
    92  	}
    93  	return nil
    94  }
    95  
    96  // Seek implements vfs.FileDescriptionImpl.Seek.
    97  func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
    98  	fd.offMu.Lock()
    99  	defer fd.offMu.Unlock()
   100  	inode := fd.inode()
   101  	inode.attrMu.Lock()
   102  	defer inode.attrMu.Unlock()
   103  	switch whence {
   104  	case linux.SEEK_SET:
   105  		// use offset as specified
   106  	case linux.SEEK_CUR:
   107  		offset += fd.off
   108  	case linux.SEEK_END:
   109  		offset += int64(inode.size.Load())
   110  	default:
   111  		return 0, linuxerr.EINVAL
   112  	}
   113  	if offset < 0 {
   114  		return 0, linuxerr.EINVAL
   115  	}
   116  	fd.off = offset
   117  	return offset, nil
   118  }
   119  
   120  // PRead implements vfs.FileDescriptionImpl.PRead.
   121  func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
   122  	if offset < 0 {
   123  		return 0, linuxerr.EINVAL
   124  	}
   125  
   126  	// Check that flags are supported.
   127  	//
   128  	// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
   129  	if opts.Flags&^linux.RWF_HIPRI != 0 {
   130  		return 0, linuxerr.EOPNOTSUPP
   131  	}
   132  
   133  	size := dst.NumBytes()
   134  	if size == 0 {
   135  		// Early return if count is 0.
   136  		return 0, nil
   137  	} else if size > math.MaxUint32 {
   138  		// FUSE only supports uint32 for size.
   139  		// Overflow.
   140  		return 0, linuxerr.EINVAL
   141  	}
   142  
   143  	// TODO(gvisor.dev/issue/3678): Add direct IO support.
   144  
   145  	inode := fd.inode()
   146  	inode.attrMu.Lock()
   147  	defer inode.attrMu.Unlock()
   148  
   149  	// Reading beyond EOF, update file size if outdated.
   150  	if uint64(offset+size) > inode.size.Load() {
   151  		if err := inode.reviseAttr(ctx, linux.FUSE_GETATTR_FH, fd.Fh); err != nil {
   152  			return 0, err
   153  		}
   154  		// If the offset after update is still too large, return error.
   155  		if uint64(offset) >= inode.size.Load() {
   156  			return 0, io.EOF
   157  		}
   158  	}
   159  
   160  	// Truncate the read with updated file size.
   161  	fileSize := inode.size.Load()
   162  	if uint64(offset+size) > fileSize {
   163  		size = int64(fileSize) - offset
   164  	}
   165  
   166  	buffers, n, err := inode.fs.ReadInPages(ctx, fd, uint64(offset), uint32(size))
   167  	if err != nil {
   168  		return 0, err
   169  	}
   170  
   171  	// TODO(gvisor.dev/issue/3237): support indirect IO (e.g. caching),
   172  	// store the bytes that were read ahead.
   173  
   174  	// Update the number of bytes to copy for short read.
   175  	if n < uint32(size) {
   176  		size = int64(n)
   177  	}
   178  
   179  	// Copy the bytes read to the dst.
   180  	// This loop is intended for fragmented reads.
   181  	// For the majority of reads, this loop only execute once.
   182  	var copied int64
   183  	for _, buffer := range buffers {
   184  		toCopy := int64(len(buffer))
   185  		if copied+toCopy > size {
   186  			toCopy = size - copied
   187  		}
   188  		cp, err := dst.DropFirst64(copied).CopyOut(ctx, buffer[:toCopy])
   189  		if err != nil {
   190  			return 0, err
   191  		}
   192  		if int64(cp) != toCopy {
   193  			return 0, linuxerr.EIO
   194  		}
   195  		copied += toCopy
   196  	}
   197  
   198  	return copied, nil
   199  }
   200  
   201  // Read implements vfs.FileDescriptionImpl.Read.
   202  func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
   203  	fd.offMu.Lock()
   204  	n, err := fd.PRead(ctx, dst, fd.off, opts)
   205  	fd.off += n
   206  	fd.offMu.Unlock()
   207  	return n, err
   208  }
   209  
   210  // PWrite implements vfs.FileDescriptionImpl.PWrite.
   211  func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
   212  	n, _, err := fd.pwrite(ctx, src, offset, opts)
   213  	return n, err
   214  }
   215  
   216  // Write implements vfs.FileDescriptionImpl.Write.
   217  func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
   218  	fd.offMu.Lock()
   219  	n, off, err := fd.pwrite(ctx, src, fd.off, opts)
   220  	fd.off = off
   221  	fd.offMu.Unlock()
   222  	return n, err
   223  }
   224  
   225  // pwrite returns the number of bytes written, final offset and error. The
   226  // final offset should be ignored by PWrite.
   227  func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, int64, error) {
   228  	if offset < 0 {
   229  		return 0, offset, linuxerr.EINVAL
   230  	}
   231  
   232  	// Check that flags are supported.
   233  	//
   234  	// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
   235  	if opts.Flags&^linux.RWF_HIPRI != 0 {
   236  		return 0, offset, linuxerr.EOPNOTSUPP
   237  	}
   238  
   239  	inode := fd.inode()
   240  	inode.attrMu.Lock()
   241  	defer inode.attrMu.Unlock()
   242  
   243  	// If the file is opened with O_APPEND, update offset to file size.
   244  	// Note: since our Open() implements the interface of kernfs,
   245  	// and kernfs currently does not support O_APPEND, this will never
   246  	// be true before we switch out from kernfs.
   247  	if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 {
   248  		// Locking inode.metadataMu is sufficient for reading size
   249  		offset = int64(inode.size.Load())
   250  	}
   251  
   252  	srclen := src.NumBytes()
   253  	if srclen > math.MaxUint32 {
   254  		// FUSE only supports uint32 for size.
   255  		// Overflow.
   256  		return 0, offset, linuxerr.EINVAL
   257  	}
   258  	if end := offset + srclen; end < offset {
   259  		// Overflow.
   260  		return 0, offset, linuxerr.EINVAL
   261  	}
   262  
   263  	limit, err := vfs.CheckLimit(ctx, offset, srclen)
   264  	if err != nil {
   265  		return 0, offset, err
   266  	}
   267  	if limit == 0 {
   268  		// Return before causing any side effects.
   269  		return 0, offset, nil
   270  	}
   271  	src = src.TakeFirst64(limit)
   272  
   273  	n, offset, err := inode.fs.Write(ctx, fd, offset, src)
   274  	if n == 0 {
   275  		// We have checked srclen != 0 previously.
   276  		// If err == nil, then it's a short write and we return EIO.
   277  		return 0, offset, linuxerr.EIO
   278  	}
   279  
   280  	if offset > int64(inode.size.Load()) {
   281  		inode.size.Store(uint64(offset))
   282  		inode.fs.conn.attributeVersion.Add(1)
   283  	}
   284  	inode.touchCMtime()
   285  	return n, offset, err
   286  }
   287  
   288  // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
   289  func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
   290  	return linuxerr.ENOSYS
   291  }
   292  
   293  // AddMapping implements memmap.Mappable.AddMapping.
   294  func (fd *regularFileFD) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error {
   295  	return linuxerr.ENOSYS
   296  }
   297  
   298  // RemoveMapping implements memmap.Mappable.RemoveMapping.
   299  func (fd *regularFileFD) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) {
   300  }
   301  
   302  // CopyMapping implements memmap.Mappable.CopyMapping.
   303  func (fd *regularFileFD) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error {
   304  	return linuxerr.ENOSYS
   305  }
   306  
   307  // Translate implements memmap.Mappable.Translate.
   308  func (fd *regularFileFD) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
   309  	return nil, linuxerr.ENOSYS
   310  }
   311  
   312  // InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
   313  func (fd *regularFileFD) InvalidateUnsavable(ctx context.Context) error {
   314  	return linuxerr.ENOSYS
   315  }