github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/fuse/regular_file.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package fuse 16 17 import ( 18 "io" 19 "math" 20 "sync" 21 "sync/atomic" 22 23 "github.com/SagerNet/gvisor/pkg/abi/linux" 24 "github.com/SagerNet/gvisor/pkg/context" 25 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 26 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 27 "github.com/SagerNet/gvisor/pkg/syserror" 28 "github.com/SagerNet/gvisor/pkg/usermem" 29 ) 30 31 type regularFileFD struct { 32 fileDescription 33 34 // off is the file offset. 35 off int64 36 // offMu protects off. 37 offMu sync.Mutex 38 } 39 40 // PRead implements vfs.FileDescriptionImpl.PRead. 41 func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { 42 if offset < 0 { 43 return 0, linuxerr.EINVAL 44 } 45 46 // Check that flags are supported. 47 // 48 // TODO(github.com/SagerNet/issue/2601): Support select preadv2 flags. 49 if opts.Flags&^linux.RWF_HIPRI != 0 { 50 return 0, syserror.EOPNOTSUPP 51 } 52 53 size := dst.NumBytes() 54 if size == 0 { 55 // Early return if count is 0. 56 return 0, nil 57 } else if size > math.MaxUint32 { 58 // FUSE only supports uint32 for size. 59 // Overflow. 60 return 0, linuxerr.EINVAL 61 } 62 63 // TODO(github.com/SagerNet/issue/3678): Add direct IO support. 64 65 inode := fd.inode() 66 67 // Reading beyond EOF, update file size if outdated. 68 if uint64(offset+size) > atomic.LoadUint64(&inode.size) { 69 if err := inode.reviseAttr(ctx, linux.FUSE_GETATTR_FH, fd.Fh); err != nil { 70 return 0, err 71 } 72 // If the offset after update is still too large, return error. 73 if uint64(offset) >= atomic.LoadUint64(&inode.size) { 74 return 0, io.EOF 75 } 76 } 77 78 // Truncate the read with updated file size. 79 fileSize := atomic.LoadUint64(&inode.size) 80 if uint64(offset+size) > fileSize { 81 size = int64(fileSize) - offset 82 } 83 84 buffers, n, err := inode.fs.ReadInPages(ctx, fd, uint64(offset), uint32(size)) 85 if err != nil { 86 return 0, err 87 } 88 89 // TODO(github.com/SagerNet/issue/3237): support indirect IO (e.g. caching), 90 // store the bytes that were read ahead. 91 92 // Update the number of bytes to copy for short read. 93 if n < uint32(size) { 94 size = int64(n) 95 } 96 97 // Copy the bytes read to the dst. 98 // This loop is intended for fragmented reads. 99 // For the majority of reads, this loop only execute once. 100 var copied int64 101 for _, buffer := range buffers { 102 toCopy := int64(len(buffer)) 103 if copied+toCopy > size { 104 toCopy = size - copied 105 } 106 cp, err := dst.DropFirst64(copied).CopyOut(ctx, buffer[:toCopy]) 107 if err != nil { 108 return 0, err 109 } 110 if int64(cp) != toCopy { 111 return 0, syserror.EIO 112 } 113 copied += toCopy 114 } 115 116 return copied, nil 117 } 118 119 // Read implements vfs.FileDescriptionImpl.Read. 120 func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { 121 fd.offMu.Lock() 122 n, err := fd.PRead(ctx, dst, fd.off, opts) 123 fd.off += n 124 fd.offMu.Unlock() 125 return n, err 126 } 127 128 // PWrite implements vfs.FileDescriptionImpl.PWrite. 129 func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { 130 n, _, err := fd.pwrite(ctx, src, offset, opts) 131 return n, err 132 } 133 134 // Write implements vfs.FileDescriptionImpl.Write. 135 func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { 136 fd.offMu.Lock() 137 n, off, err := fd.pwrite(ctx, src, fd.off, opts) 138 fd.off = off 139 fd.offMu.Unlock() 140 return n, err 141 } 142 143 // pwrite returns the number of bytes written, final offset and error. The 144 // final offset should be ignored by PWrite. 145 func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) { 146 if offset < 0 { 147 return 0, offset, linuxerr.EINVAL 148 } 149 150 // Check that flags are supported. 151 // 152 // TODO(github.com/SagerNet/issue/2601): Support select preadv2 flags. 153 if opts.Flags&^linux.RWF_HIPRI != 0 { 154 return 0, offset, syserror.EOPNOTSUPP 155 } 156 157 inode := fd.inode() 158 inode.metadataMu.Lock() 159 defer inode.metadataMu.Unlock() 160 161 // If the file is opened with O_APPEND, update offset to file size. 162 // Note: since our Open() implements the interface of kernfs, 163 // and kernfs currently does not support O_APPEND, this will never 164 // be true before we switch out from kernfs. 165 if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 { 166 // Locking inode.metadataMu is sufficient for reading size 167 offset = int64(inode.size) 168 } 169 170 srclen := src.NumBytes() 171 172 if srclen > math.MaxUint32 { 173 // FUSE only supports uint32 for size. 174 // Overflow. 175 return 0, offset, linuxerr.EINVAL 176 } 177 if end := offset + srclen; end < offset { 178 // Overflow. 179 return 0, offset, linuxerr.EINVAL 180 } 181 182 srclen, err = vfs.CheckLimit(ctx, offset, srclen) 183 if err != nil { 184 return 0, offset, err 185 } 186 187 if srclen == 0 { 188 // Return before causing any side effects. 189 return 0, offset, nil 190 } 191 192 src = src.TakeFirst64(srclen) 193 194 // TODO(github.com/SagerNet/issue/3237): Add cache support: 195 // buffer cache. Ideally we write from src to our buffer cache first. 196 // The slice passed to fs.Write() should be a slice from buffer cache. 197 data := make([]byte, srclen) 198 // Reason for making a copy here: connection.Call() blocks on kerneltask, 199 // which in turn acquires mm.activeMu lock. Functions like CopyInTo() will 200 // attemp to acquire the mm.activeMu lock as well -> deadlock. 201 // We must finish reading from the userspace memory before 202 // t.Block() deactivates it. 203 cp, err := src.CopyIn(ctx, data) 204 if err != nil { 205 return 0, offset, err 206 } 207 if int64(cp) != srclen { 208 return 0, offset, syserror.EIO 209 } 210 211 n, err := fd.inode().fs.Write(ctx, fd, uint64(offset), uint32(srclen), data) 212 if err != nil { 213 return 0, offset, err 214 } 215 216 if n == 0 { 217 // We have checked srclen != 0 previously. 218 // If err == nil, then it's a short write and we return EIO. 219 return 0, offset, syserror.EIO 220 } 221 222 written = int64(n) 223 finalOff = offset + written 224 225 if finalOff > int64(inode.size) { 226 atomic.StoreUint64(&inode.size, uint64(finalOff)) 227 atomic.AddUint64(&inode.fs.conn.attributeVersion, 1) 228 } 229 230 return 231 }