github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/fsimpl/fuse/read_write.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fuse
    16  
    17  import (
    18  	"io"
    19  
    20  	"github.com/MerlinKodo/gvisor/pkg/abi/linux"
    21  	"github.com/MerlinKodo/gvisor/pkg/context"
    22  	"github.com/MerlinKodo/gvisor/pkg/errors/linuxerr"
    23  	"github.com/MerlinKodo/gvisor/pkg/hostarch"
    24  	"github.com/MerlinKodo/gvisor/pkg/sentry/kernel/auth"
    25  	"github.com/MerlinKodo/gvisor/pkg/usermem"
    26  )
    27  
    28  // ReadInPages sends FUSE_READ requests for the size after round it up to
    29  // a multiple of page size, blocks on it for reply, processes the reply
    30  // and returns the payload (or joined payloads) as a byte slice.
    31  // This is used for the general purpose reading.
    32  // We do not support direct IO (which read the exact number of bytes)
    33  // at this moment.
    34  func (fs *filesystem) ReadInPages(ctx context.Context, fd *regularFileFD, off uint64, size uint32) ([][]byte, uint32, error) {
    35  	attributeVersion := fs.conn.attributeVersion.Load()
    36  
    37  	// Round up to a multiple of page size.
    38  	readSize, _ := hostarch.PageRoundUp(uint64(size))
    39  
    40  	// One request cannot exceed either maxRead or maxPages.
    41  	maxPages := fs.conn.maxRead >> hostarch.PageShift
    42  	if maxPages > uint32(fs.conn.maxPages) {
    43  		maxPages = uint32(fs.conn.maxPages)
    44  	}
    45  
    46  	var outs [][]byte
    47  	var sizeRead uint32
    48  
    49  	// readSize is a multiple of hostarch.PageSize.
    50  	// Always request bytes as a multiple of pages.
    51  	pagesRead, pagesToRead := uint32(0), uint32(readSize>>hostarch.PageShift)
    52  
    53  	// Reuse the same struct for unmarshalling to avoid unnecessary memory allocation.
    54  	in := linux.FUSEReadIn{
    55  		Fh:        fd.Fh,
    56  		LockOwner: 0, // TODO(gvisor.dev/issue/3245): file lock
    57  		ReadFlags: 0, // TODO(gvisor.dev/issue/3245): |= linux.FUSE_READ_LOCKOWNER
    58  		Flags:     fd.statusFlags(),
    59  	}
    60  
    61  	// This loop is intended for fragmented read where the bytes to read is
    62  	// larger than either the maxPages or maxRead.
    63  	// For the majority of reads with normal size, this loop should only
    64  	// execute once.
    65  	for pagesRead < pagesToRead {
    66  		pagesCanRead := pagesToRead - pagesRead
    67  		if pagesCanRead > maxPages {
    68  			pagesCanRead = maxPages
    69  		}
    70  
    71  		in.Offset = off + (uint64(pagesRead) << hostarch.PageShift)
    72  		in.Size = pagesCanRead << hostarch.PageShift
    73  
    74  		// TODO(gvisor.dev/issue/3247): support async read.
    75  		req := fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), fd.inode().nodeID, linux.FUSE_READ, &in)
    76  		res, err := fs.conn.Call(ctx, req)
    77  		if err != nil {
    78  			return nil, 0, err
    79  		}
    80  		if err := res.Error(); err != nil {
    81  			return nil, 0, err
    82  		}
    83  
    84  		// Not enough bytes in response,
    85  		// either we reached EOF,
    86  		// or the FUSE server sends back a response
    87  		// that cannot even fit the hdr.
    88  		if len(res.data) <= res.hdr.SizeBytes() {
    89  			// We treat both case as EOF here for now
    90  			// since there is no reliable way to detect
    91  			// the over-short hdr case.
    92  			break
    93  		}
    94  
    95  		// Directly using the slice to avoid extra copy.
    96  		out := res.data[res.hdr.SizeBytes():]
    97  
    98  		outs = append(outs, out)
    99  		sizeRead += uint32(len(out))
   100  
   101  		pagesRead += pagesCanRead
   102  	}
   103  
   104  	defer fs.ReadCallback(ctx, fd.inode(), off, size, sizeRead, attributeVersion) // +checklocksforce: fd.inode() locks are held during fd operations.
   105  
   106  	// No bytes returned: offset >= EOF.
   107  	if len(outs) == 0 {
   108  		return nil, 0, io.EOF
   109  	}
   110  
   111  	return outs, sizeRead, nil
   112  }
   113  
   114  // ReadCallback updates several information after receiving a read response.
   115  // Due to readahead, sizeRead can be larger than size.
   116  //
   117  // +checklocks:i.attrMu
   118  func (fs *filesystem) ReadCallback(ctx context.Context, i *inode, off uint64, size uint32, sizeRead uint32, attributeVersion uint64) {
   119  	// TODO(gvisor.dev/issue/3247): support async read.
   120  	// If this is called by an async read, correctly process it.
   121  	// May need to update the signature.
   122  	i.touchAtime()
   123  	// Reached EOF.
   124  	if sizeRead < size {
   125  		// TODO(gvisor.dev/issue/3630): If we have writeback cache, then we need to fill this hole.
   126  		// Might need to update the buf to be returned from the Read().
   127  
   128  		// Update existing size.
   129  		newSize := off + uint64(sizeRead)
   130  		fs.conn.mu.Lock()
   131  		if attributeVersion == i.attrVersion.Load() && newSize < i.size.Load() {
   132  			i.attrVersion.Store(i.fs.conn.attributeVersion.Add(1))
   133  			i.size.Store(newSize)
   134  		}
   135  		fs.conn.mu.Unlock()
   136  	}
   137  }
   138  
   139  // Write sends FUSE_WRITE requests and return the bytes written according to the
   140  // response.
   141  func (fs *filesystem) Write(ctx context.Context, fd *regularFileFD, offset int64, src usermem.IOSequence) (int64, int64, error) {
   142  	// One request cannot exceed either maxWrite or maxPages.
   143  	maxWrite := uint32(fs.conn.maxPages) << hostarch.PageShift
   144  	if maxWrite > fs.conn.maxWrite {
   145  		maxWrite = fs.conn.maxWrite
   146  	}
   147  
   148  	// Reuse the same struct for unmarshalling to avoid unnecessary memory allocation.
   149  	in := linux.FUSEWritePayloadIn{
   150  		Header: linux.FUSEWriteIn{
   151  			Fh: fd.Fh,
   152  			// TODO(gvisor.dev/issue/3245): file lock
   153  			LockOwner: 0,
   154  			// TODO(gvisor.dev/issue/3245): |= linux.FUSE_READ_LOCKOWNER
   155  			// TODO(gvisor.dev/issue/3237): |= linux.FUSE_WRITE_CACHE (not added yet)
   156  			WriteFlags: 0,
   157  			Flags:      fd.statusFlags(),
   158  		},
   159  	}
   160  
   161  	// This loop is intended for fragmented write where the bytes to write is
   162  	// larger than either the maxWrite or maxPages or when bigWrites is false.
   163  	// Unless a small value for max_write is explicitly used, this loop
   164  	// is expected to execute only once for the majority of the writes.
   165  	n := int64(0)
   166  	end := offset + src.NumBytes()
   167  	for n < end {
   168  		writeSize := uint32(end - n)
   169  
   170  		// Limit the write size to one page.
   171  		// Note that the bigWrites flag is obsolete,
   172  		// latest libfuse always sets it on.
   173  		if !fs.conn.bigWrites && writeSize > hostarch.PageSize {
   174  			writeSize = hostarch.PageSize
   175  		}
   176  		// Limit the write size to maxWrite.
   177  		if writeSize > maxWrite {
   178  			writeSize = maxWrite
   179  		}
   180  
   181  		// TODO(gvisor.dev/issue/3237): Add cache support:
   182  		// buffer cache. Ideally we write from src to our buffer cache first.
   183  		// The slice passed to fs.Write() should be a slice from buffer cache.
   184  		data := make([]byte, writeSize)
   185  		cp, _ := src.CopyIn(ctx, data)
   186  		data = data[:cp]
   187  
   188  		in.Header.Offset = uint64(offset)
   189  		in.Header.Size = uint32(cp)
   190  		in.Payload = data
   191  
   192  		req := fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), fd.inode().nodeID, linux.FUSE_WRITE, &in)
   193  		// TODO(gvisor.dev/issue/3247): support async write.
   194  		res, err := fs.conn.Call(ctx, req)
   195  		if err != nil {
   196  			return n, offset, err
   197  		}
   198  		out := linux.FUSEWriteOut{}
   199  		if err := res.UnmarshalPayload(&out); err != nil {
   200  			return n, offset, err
   201  		}
   202  		n += int64(out.Size)
   203  		offset += int64(out.Size)
   204  		src = src.DropFirst64(int64(out.Size))
   205  
   206  		if err := res.Error(); err != nil {
   207  			return n, offset, err
   208  		}
   209  		// Write more than requested? EIO.
   210  		if out.Size > writeSize {
   211  			return n, offset, linuxerr.EIO
   212  		}
   213  		// Break if short write. Not necessarily an error.
   214  		if out.Size != writeSize {
   215  			break
   216  		}
   217  	}
   218  	return n, offset, nil
   219  }