github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/fuse/regular_file.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fuse
    16  
    17  import (
    18  	"io"
    19  	"math"
    20  	"sync"
    21  	"sync/atomic"
    22  
    23  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    24  	"github.com/SagerNet/gvisor/pkg/context"
    25  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    26  	"github.com/SagerNet/gvisor/pkg/sentry/vfs"
    27  	"github.com/SagerNet/gvisor/pkg/syserror"
    28  	"github.com/SagerNet/gvisor/pkg/usermem"
    29  )
    30  
    31  type regularFileFD struct {
    32  	fileDescription
    33  
    34  	// off is the file offset.
    35  	off int64
    36  	// offMu protects off.
    37  	offMu sync.Mutex
    38  }
    39  
    40  // PRead implements vfs.FileDescriptionImpl.PRead.
    41  func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
    42  	if offset < 0 {
    43  		return 0, linuxerr.EINVAL
    44  	}
    45  
    46  	// Check that flags are supported.
    47  	//
    48  	// TODO(github.com/SagerNet/issue/2601): Support select preadv2 flags.
    49  	if opts.Flags&^linux.RWF_HIPRI != 0 {
    50  		return 0, syserror.EOPNOTSUPP
    51  	}
    52  
    53  	size := dst.NumBytes()
    54  	if size == 0 {
    55  		// Early return if count is 0.
    56  		return 0, nil
    57  	} else if size > math.MaxUint32 {
    58  		// FUSE only supports uint32 for size.
    59  		// Overflow.
    60  		return 0, linuxerr.EINVAL
    61  	}
    62  
    63  	// TODO(github.com/SagerNet/issue/3678): Add direct IO support.
    64  
    65  	inode := fd.inode()
    66  
    67  	// Reading beyond EOF, update file size if outdated.
    68  	if uint64(offset+size) > atomic.LoadUint64(&inode.size) {
    69  		if err := inode.reviseAttr(ctx, linux.FUSE_GETATTR_FH, fd.Fh); err != nil {
    70  			return 0, err
    71  		}
    72  		// If the offset after update is still too large, return error.
    73  		if uint64(offset) >= atomic.LoadUint64(&inode.size) {
    74  			return 0, io.EOF
    75  		}
    76  	}
    77  
    78  	// Truncate the read with updated file size.
    79  	fileSize := atomic.LoadUint64(&inode.size)
    80  	if uint64(offset+size) > fileSize {
    81  		size = int64(fileSize) - offset
    82  	}
    83  
    84  	buffers, n, err := inode.fs.ReadInPages(ctx, fd, uint64(offset), uint32(size))
    85  	if err != nil {
    86  		return 0, err
    87  	}
    88  
    89  	// TODO(github.com/SagerNet/issue/3237): support indirect IO (e.g. caching),
    90  	// store the bytes that were read ahead.
    91  
    92  	// Update the number of bytes to copy for short read.
    93  	if n < uint32(size) {
    94  		size = int64(n)
    95  	}
    96  
    97  	// Copy the bytes read to the dst.
    98  	// This loop is intended for fragmented reads.
    99  	// For the majority of reads, this loop only execute once.
   100  	var copied int64
   101  	for _, buffer := range buffers {
   102  		toCopy := int64(len(buffer))
   103  		if copied+toCopy > size {
   104  			toCopy = size - copied
   105  		}
   106  		cp, err := dst.DropFirst64(copied).CopyOut(ctx, buffer[:toCopy])
   107  		if err != nil {
   108  			return 0, err
   109  		}
   110  		if int64(cp) != toCopy {
   111  			return 0, syserror.EIO
   112  		}
   113  		copied += toCopy
   114  	}
   115  
   116  	return copied, nil
   117  }
   118  
   119  // Read implements vfs.FileDescriptionImpl.Read.
   120  func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
   121  	fd.offMu.Lock()
   122  	n, err := fd.PRead(ctx, dst, fd.off, opts)
   123  	fd.off += n
   124  	fd.offMu.Unlock()
   125  	return n, err
   126  }
   127  
   128  // PWrite implements vfs.FileDescriptionImpl.PWrite.
   129  func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
   130  	n, _, err := fd.pwrite(ctx, src, offset, opts)
   131  	return n, err
   132  }
   133  
   134  // Write implements vfs.FileDescriptionImpl.Write.
   135  func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
   136  	fd.offMu.Lock()
   137  	n, off, err := fd.pwrite(ctx, src, fd.off, opts)
   138  	fd.off = off
   139  	fd.offMu.Unlock()
   140  	return n, err
   141  }
   142  
   143  // pwrite returns the number of bytes written, final offset and error. The
   144  // final offset should be ignored by PWrite.
   145  func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) {
   146  	if offset < 0 {
   147  		return 0, offset, linuxerr.EINVAL
   148  	}
   149  
   150  	// Check that flags are supported.
   151  	//
   152  	// TODO(github.com/SagerNet/issue/2601): Support select preadv2 flags.
   153  	if opts.Flags&^linux.RWF_HIPRI != 0 {
   154  		return 0, offset, syserror.EOPNOTSUPP
   155  	}
   156  
   157  	inode := fd.inode()
   158  	inode.metadataMu.Lock()
   159  	defer inode.metadataMu.Unlock()
   160  
   161  	// If the file is opened with O_APPEND, update offset to file size.
   162  	// Note: since our Open() implements the interface of kernfs,
   163  	// and kernfs currently does not support O_APPEND, this will never
   164  	// be true before we switch out from kernfs.
   165  	if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 {
   166  		// Locking inode.metadataMu is sufficient for reading size
   167  		offset = int64(inode.size)
   168  	}
   169  
   170  	srclen := src.NumBytes()
   171  
   172  	if srclen > math.MaxUint32 {
   173  		// FUSE only supports uint32 for size.
   174  		// Overflow.
   175  		return 0, offset, linuxerr.EINVAL
   176  	}
   177  	if end := offset + srclen; end < offset {
   178  		// Overflow.
   179  		return 0, offset, linuxerr.EINVAL
   180  	}
   181  
   182  	srclen, err = vfs.CheckLimit(ctx, offset, srclen)
   183  	if err != nil {
   184  		return 0, offset, err
   185  	}
   186  
   187  	if srclen == 0 {
   188  		// Return before causing any side effects.
   189  		return 0, offset, nil
   190  	}
   191  
   192  	src = src.TakeFirst64(srclen)
   193  
   194  	// TODO(github.com/SagerNet/issue/3237): Add cache support:
   195  	// buffer cache. Ideally we write from src to our buffer cache first.
   196  	// The slice passed to fs.Write() should be a slice from buffer cache.
   197  	data := make([]byte, srclen)
   198  	// Reason for making a copy here: connection.Call() blocks on kerneltask,
   199  	// which in turn acquires mm.activeMu lock. Functions like CopyInTo() will
   200  	// attemp to acquire the mm.activeMu lock as well -> deadlock.
   201  	// We must finish reading from the userspace memory before
   202  	// t.Block() deactivates it.
   203  	cp, err := src.CopyIn(ctx, data)
   204  	if err != nil {
   205  		return 0, offset, err
   206  	}
   207  	if int64(cp) != srclen {
   208  		return 0, offset, syserror.EIO
   209  	}
   210  
   211  	n, err := fd.inode().fs.Write(ctx, fd, uint64(offset), uint32(srclen), data)
   212  	if err != nil {
   213  		return 0, offset, err
   214  	}
   215  
   216  	if n == 0 {
   217  		// We have checked srclen != 0 previously.
   218  		// If err == nil, then it's a short write and we return EIO.
   219  		return 0, offset, syserror.EIO
   220  	}
   221  
   222  	written = int64(n)
   223  	finalOff = offset + written
   224  
   225  	if finalOff > int64(inode.size) {
   226  		atomic.StoreUint64(&inode.size, uint64(finalOff))
   227  		atomic.AddUint64(&inode.fs.conn.attributeVersion, 1)
   228  	}
   229  
   230  	return
   231  }