github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/fuse/read_write.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package fuse 16 17 import ( 18 "io" 19 "sync/atomic" 20 21 "github.com/SagerNet/gvisor/pkg/abi/linux" 22 "github.com/SagerNet/gvisor/pkg/context" 23 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 24 "github.com/SagerNet/gvisor/pkg/hostarch" 25 "github.com/SagerNet/gvisor/pkg/log" 26 "github.com/SagerNet/gvisor/pkg/sentry/kernel" 27 "github.com/SagerNet/gvisor/pkg/sentry/kernel/auth" 28 "github.com/SagerNet/gvisor/pkg/syserror" 29 ) 30 31 // ReadInPages sends FUSE_READ requests for the size after round it up to 32 // a multiple of page size, blocks on it for reply, processes the reply 33 // and returns the payload (or joined payloads) as a byte slice. 34 // This is used for the general purpose reading. 35 // We do not support direct IO (which read the exact number of bytes) 36 // at this moment. 37 func (fs *filesystem) ReadInPages(ctx context.Context, fd *regularFileFD, off uint64, size uint32) ([][]byte, uint32, error) { 38 attributeVersion := atomic.LoadUint64(&fs.conn.attributeVersion) 39 40 t := kernel.TaskFromContext(ctx) 41 if t == nil { 42 log.Warningf("fusefs.Read: couldn't get kernel task from context") 43 return nil, 0, linuxerr.EINVAL 44 } 45 46 // Round up to a multiple of page size. 47 readSize, _ := hostarch.PageRoundUp(uint64(size)) 48 49 // One request cannnot exceed either maxRead or maxPages. 50 maxPages := fs.conn.maxRead >> hostarch.PageShift 51 if maxPages > uint32(fs.conn.maxPages) { 52 maxPages = uint32(fs.conn.maxPages) 53 } 54 55 var outs [][]byte 56 var sizeRead uint32 57 58 // readSize is a multiple of hostarch.PageSize. 59 // Always request bytes as a multiple of pages. 60 pagesRead, pagesToRead := uint32(0), uint32(readSize>>hostarch.PageShift) 61 62 // Reuse the same struct for unmarshalling to avoid unnecessary memory allocation. 63 in := linux.FUSEReadIn{ 64 Fh: fd.Fh, 65 LockOwner: 0, // TODO(github.com/SagerNet/issue/3245): file lock 66 ReadFlags: 0, // TODO(github.com/SagerNet/issue/3245): |= linux.FUSE_READ_LOCKOWNER 67 Flags: fd.statusFlags(), 68 } 69 70 // This loop is intended for fragmented read where the bytes to read is 71 // larger than either the maxPages or maxRead. 72 // For the majority of reads with normal size, this loop should only 73 // execute once. 74 for pagesRead < pagesToRead { 75 pagesCanRead := pagesToRead - pagesRead 76 if pagesCanRead > maxPages { 77 pagesCanRead = maxPages 78 } 79 80 in.Offset = off + (uint64(pagesRead) << hostarch.PageShift) 81 in.Size = pagesCanRead << hostarch.PageShift 82 83 // TODO(github.com/SagerNet/issue/3247): support async read. 84 85 req := fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(t.ThreadID()), fd.inode().nodeID, linux.FUSE_READ, &in) 86 res, err := fs.conn.Call(t, req) 87 if err != nil { 88 return nil, 0, err 89 } 90 if err := res.Error(); err != nil { 91 return nil, 0, err 92 } 93 94 // Not enough bytes in response, 95 // either we reached EOF, 96 // or the FUSE server sends back a response 97 // that cannot even fit the hdr. 98 if len(res.data) <= res.hdr.SizeBytes() { 99 // We treat both case as EOF here for now 100 // since there is no reliable way to detect 101 // the over-short hdr case. 102 break 103 } 104 105 // Directly using the slice to avoid extra copy. 106 out := res.data[res.hdr.SizeBytes():] 107 108 outs = append(outs, out) 109 sizeRead += uint32(len(out)) 110 111 pagesRead += pagesCanRead 112 } 113 114 defer fs.ReadCallback(ctx, fd, off, size, sizeRead, attributeVersion) 115 116 // No bytes returned: offset >= EOF. 117 if len(outs) == 0 { 118 return nil, 0, io.EOF 119 } 120 121 return outs, sizeRead, nil 122 } 123 124 // ReadCallback updates several information after receiving a read response. 125 // Due to readahead, sizeRead can be larger than size. 126 func (fs *filesystem) ReadCallback(ctx context.Context, fd *regularFileFD, off uint64, size uint32, sizeRead uint32, attributeVersion uint64) { 127 // TODO(github.com/SagerNet/issue/3247): support async read. 128 // If this is called by an async read, correctly process it. 129 // May need to update the signature. 130 131 i := fd.inode() 132 i.InodeAttrs.TouchAtime(ctx, fd.vfsfd.Mount()) 133 134 // Reached EOF. 135 if sizeRead < size { 136 // TODO(github.com/SagerNet/issue/3630): If we have writeback cache, then we need to fill this hole. 137 // Might need to update the buf to be returned from the Read(). 138 139 // Update existing size. 140 newSize := off + uint64(sizeRead) 141 fs.conn.mu.Lock() 142 if attributeVersion == i.attributeVersion && newSize < atomic.LoadUint64(&i.size) { 143 fs.conn.attributeVersion++ 144 i.attributeVersion = i.fs.conn.attributeVersion 145 atomic.StoreUint64(&i.size, newSize) 146 } 147 fs.conn.mu.Unlock() 148 } 149 } 150 151 // Write sends FUSE_WRITE requests and return the bytes 152 // written according to the response. 153 // 154 // Preconditions: len(data) == size. 155 func (fs *filesystem) Write(ctx context.Context, fd *regularFileFD, off uint64, size uint32, data []byte) (uint32, error) { 156 t := kernel.TaskFromContext(ctx) 157 if t == nil { 158 log.Warningf("fusefs.Read: couldn't get kernel task from context") 159 return 0, linuxerr.EINVAL 160 } 161 162 // One request cannnot exceed either maxWrite or maxPages. 163 maxWrite := uint32(fs.conn.maxPages) << hostarch.PageShift 164 if maxWrite > fs.conn.maxWrite { 165 maxWrite = fs.conn.maxWrite 166 } 167 168 // Reuse the same struct for unmarshalling to avoid unnecessary memory allocation. 169 in := linux.FUSEWriteIn{ 170 Fh: fd.Fh, 171 // TODO(github.com/SagerNet/issue/3245): file lock 172 LockOwner: 0, 173 // TODO(github.com/SagerNet/issue/3245): |= linux.FUSE_READ_LOCKOWNER 174 // TODO(github.com/SagerNet/issue/3237): |= linux.FUSE_WRITE_CACHE (not added yet) 175 WriteFlags: 0, 176 Flags: fd.statusFlags(), 177 } 178 179 inode := fd.inode() 180 var written uint32 181 182 // This loop is intended for fragmented write where the bytes to write is 183 // larger than either the maxWrite or maxPages or when bigWrites is false. 184 // Unless a small value for max_write is explicitly used, this loop 185 // is expected to execute only once for the majority of the writes. 186 for written < size { 187 toWrite := size - written 188 189 // Limit the write size to one page. 190 // Note that the bigWrites flag is obsolete, 191 // latest libfuse always sets it on. 192 if !fs.conn.bigWrites && toWrite > hostarch.PageSize { 193 toWrite = hostarch.PageSize 194 } 195 196 // Limit the write size to maxWrite. 197 if toWrite > maxWrite { 198 toWrite = maxWrite 199 } 200 201 in.Offset = off + uint64(written) 202 in.Size = toWrite 203 204 req := fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(t.ThreadID()), inode.nodeID, linux.FUSE_WRITE, &in) 205 req.payload = data[written : written+toWrite] 206 207 // TODO(github.com/SagerNet/issue/3247): support async write. 208 209 res, err := fs.conn.Call(t, req) 210 if err != nil { 211 return 0, err 212 } 213 if err := res.Error(); err != nil { 214 return 0, err 215 } 216 217 out := linux.FUSEWriteOut{} 218 if err := res.UnmarshalPayload(&out); err != nil { 219 return 0, err 220 } 221 222 // Write more than requested? EIO. 223 if out.Size > toWrite { 224 return 0, syserror.EIO 225 } 226 227 written += out.Size 228 229 // Break if short write. Not necessarily an error. 230 if out.Size != toWrite { 231 break 232 } 233 } 234 inode.InodeAttrs.TouchCMtime(ctx) 235 236 return written, nil 237 }