github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/fuse/read_write.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fuse
    16  
    17  import (
    18  	"io"
    19  	"sync/atomic"
    20  
    21  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    22  	"github.com/SagerNet/gvisor/pkg/context"
    23  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    24  	"github.com/SagerNet/gvisor/pkg/hostarch"
    25  	"github.com/SagerNet/gvisor/pkg/log"
    26  	"github.com/SagerNet/gvisor/pkg/sentry/kernel"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/auth"
    28  	"github.com/SagerNet/gvisor/pkg/syserror"
    29  )
    30  
    31  // ReadInPages sends FUSE_READ requests for the size after round it up to
    32  // a multiple of page size, blocks on it for reply, processes the reply
    33  // and returns the payload (or joined payloads) as a byte slice.
    34  // This is used for the general purpose reading.
    35  // We do not support direct IO (which read the exact number of bytes)
    36  // at this moment.
    37  func (fs *filesystem) ReadInPages(ctx context.Context, fd *regularFileFD, off uint64, size uint32) ([][]byte, uint32, error) {
    38  	attributeVersion := atomic.LoadUint64(&fs.conn.attributeVersion)
    39  
    40  	t := kernel.TaskFromContext(ctx)
    41  	if t == nil {
    42  		log.Warningf("fusefs.Read: couldn't get kernel task from context")
    43  		return nil, 0, linuxerr.EINVAL
    44  	}
    45  
    46  	// Round up to a multiple of page size.
    47  	readSize, _ := hostarch.PageRoundUp(uint64(size))
    48  
    49  	// One request cannnot exceed either maxRead or maxPages.
    50  	maxPages := fs.conn.maxRead >> hostarch.PageShift
    51  	if maxPages > uint32(fs.conn.maxPages) {
    52  		maxPages = uint32(fs.conn.maxPages)
    53  	}
    54  
    55  	var outs [][]byte
    56  	var sizeRead uint32
    57  
    58  	// readSize is a multiple of hostarch.PageSize.
    59  	// Always request bytes as a multiple of pages.
    60  	pagesRead, pagesToRead := uint32(0), uint32(readSize>>hostarch.PageShift)
    61  
    62  	// Reuse the same struct for unmarshalling to avoid unnecessary memory allocation.
    63  	in := linux.FUSEReadIn{
    64  		Fh:        fd.Fh,
    65  		LockOwner: 0, // TODO(github.com/SagerNet/issue/3245): file lock
    66  		ReadFlags: 0, // TODO(github.com/SagerNet/issue/3245): |= linux.FUSE_READ_LOCKOWNER
    67  		Flags:     fd.statusFlags(),
    68  	}
    69  
    70  	// This loop is intended for fragmented read where the bytes to read is
    71  	// larger than either the maxPages or maxRead.
    72  	// For the majority of reads with normal size, this loop should only
    73  	// execute once.
    74  	for pagesRead < pagesToRead {
    75  		pagesCanRead := pagesToRead - pagesRead
    76  		if pagesCanRead > maxPages {
    77  			pagesCanRead = maxPages
    78  		}
    79  
    80  		in.Offset = off + (uint64(pagesRead) << hostarch.PageShift)
    81  		in.Size = pagesCanRead << hostarch.PageShift
    82  
    83  		// TODO(github.com/SagerNet/issue/3247): support async read.
    84  
    85  		req := fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(t.ThreadID()), fd.inode().nodeID, linux.FUSE_READ, &in)
    86  		res, err := fs.conn.Call(t, req)
    87  		if err != nil {
    88  			return nil, 0, err
    89  		}
    90  		if err := res.Error(); err != nil {
    91  			return nil, 0, err
    92  		}
    93  
    94  		// Not enough bytes in response,
    95  		// either we reached EOF,
    96  		// or the FUSE server sends back a response
    97  		// that cannot even fit the hdr.
    98  		if len(res.data) <= res.hdr.SizeBytes() {
    99  			// We treat both case as EOF here for now
   100  			// since there is no reliable way to detect
   101  			// the over-short hdr case.
   102  			break
   103  		}
   104  
   105  		// Directly using the slice to avoid extra copy.
   106  		out := res.data[res.hdr.SizeBytes():]
   107  
   108  		outs = append(outs, out)
   109  		sizeRead += uint32(len(out))
   110  
   111  		pagesRead += pagesCanRead
   112  	}
   113  
   114  	defer fs.ReadCallback(ctx, fd, off, size, sizeRead, attributeVersion)
   115  
   116  	// No bytes returned: offset >= EOF.
   117  	if len(outs) == 0 {
   118  		return nil, 0, io.EOF
   119  	}
   120  
   121  	return outs, sizeRead, nil
   122  }
   123  
   124  // ReadCallback updates several information after receiving a read response.
   125  // Due to readahead, sizeRead can be larger than size.
   126  func (fs *filesystem) ReadCallback(ctx context.Context, fd *regularFileFD, off uint64, size uint32, sizeRead uint32, attributeVersion uint64) {
   127  	// TODO(github.com/SagerNet/issue/3247): support async read.
   128  	// If this is called by an async read, correctly process it.
   129  	// May need to update the signature.
   130  
   131  	i := fd.inode()
   132  	i.InodeAttrs.TouchAtime(ctx, fd.vfsfd.Mount())
   133  
   134  	// Reached EOF.
   135  	if sizeRead < size {
   136  		// TODO(github.com/SagerNet/issue/3630): If we have writeback cache, then we need to fill this hole.
   137  		// Might need to update the buf to be returned from the Read().
   138  
   139  		// Update existing size.
   140  		newSize := off + uint64(sizeRead)
   141  		fs.conn.mu.Lock()
   142  		if attributeVersion == i.attributeVersion && newSize < atomic.LoadUint64(&i.size) {
   143  			fs.conn.attributeVersion++
   144  			i.attributeVersion = i.fs.conn.attributeVersion
   145  			atomic.StoreUint64(&i.size, newSize)
   146  		}
   147  		fs.conn.mu.Unlock()
   148  	}
   149  }
   150  
   151  // Write sends FUSE_WRITE requests and return the bytes
   152  // written according to the response.
   153  //
   154  // Preconditions: len(data) == size.
   155  func (fs *filesystem) Write(ctx context.Context, fd *regularFileFD, off uint64, size uint32, data []byte) (uint32, error) {
   156  	t := kernel.TaskFromContext(ctx)
   157  	if t == nil {
   158  		log.Warningf("fusefs.Read: couldn't get kernel task from context")
   159  		return 0, linuxerr.EINVAL
   160  	}
   161  
   162  	// One request cannnot exceed either maxWrite or maxPages.
   163  	maxWrite := uint32(fs.conn.maxPages) << hostarch.PageShift
   164  	if maxWrite > fs.conn.maxWrite {
   165  		maxWrite = fs.conn.maxWrite
   166  	}
   167  
   168  	// Reuse the same struct for unmarshalling to avoid unnecessary memory allocation.
   169  	in := linux.FUSEWriteIn{
   170  		Fh: fd.Fh,
   171  		// TODO(github.com/SagerNet/issue/3245): file lock
   172  		LockOwner: 0,
   173  		// TODO(github.com/SagerNet/issue/3245): |= linux.FUSE_READ_LOCKOWNER
   174  		// TODO(github.com/SagerNet/issue/3237): |= linux.FUSE_WRITE_CACHE (not added yet)
   175  		WriteFlags: 0,
   176  		Flags:      fd.statusFlags(),
   177  	}
   178  
   179  	inode := fd.inode()
   180  	var written uint32
   181  
   182  	// This loop is intended for fragmented write where the bytes to write is
   183  	// larger than either the maxWrite or maxPages or when bigWrites is false.
   184  	// Unless a small value for max_write is explicitly used, this loop
   185  	// is expected to execute only once for the majority of the writes.
   186  	for written < size {
   187  		toWrite := size - written
   188  
   189  		// Limit the write size to one page.
   190  		// Note that the bigWrites flag is obsolete,
   191  		// latest libfuse always sets it on.
   192  		if !fs.conn.bigWrites && toWrite > hostarch.PageSize {
   193  			toWrite = hostarch.PageSize
   194  		}
   195  
   196  		// Limit the write size to maxWrite.
   197  		if toWrite > maxWrite {
   198  			toWrite = maxWrite
   199  		}
   200  
   201  		in.Offset = off + uint64(written)
   202  		in.Size = toWrite
   203  
   204  		req := fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(t.ThreadID()), inode.nodeID, linux.FUSE_WRITE, &in)
   205  		req.payload = data[written : written+toWrite]
   206  
   207  		// TODO(github.com/SagerNet/issue/3247): support async write.
   208  
   209  		res, err := fs.conn.Call(t, req)
   210  		if err != nil {
   211  			return 0, err
   212  		}
   213  		if err := res.Error(); err != nil {
   214  			return 0, err
   215  		}
   216  
   217  		out := linux.FUSEWriteOut{}
   218  		if err := res.UnmarshalPayload(&out); err != nil {
   219  			return 0, err
   220  		}
   221  
   222  		// Write more than requested? EIO.
   223  		if out.Size > toWrite {
   224  			return 0, syserror.EIO
   225  		}
   226  
   227  		written += out.Size
   228  
   229  		// Break if short write. Not necessarily an error.
   230  		if out.Size != toWrite {
   231  			break
   232  		}
   233  	}
   234  	inode.InodeAttrs.TouchCMtime(ctx)
   235  
   236  	return written, nil
   237  }