github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/fsimpl/fuse/read_write.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package fuse 16 17 import ( 18 "io" 19 20 "github.com/nicocha30/gvisor-ligolo/pkg/abi/linux" 21 "github.com/nicocha30/gvisor-ligolo/pkg/context" 22 "github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr" 23 "github.com/nicocha30/gvisor-ligolo/pkg/hostarch" 24 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel/auth" 25 "github.com/nicocha30/gvisor-ligolo/pkg/usermem" 26 ) 27 28 // ReadInPages sends FUSE_READ requests for the size after round it up to 29 // a multiple of page size, blocks on it for reply, processes the reply 30 // and returns the payload (or joined payloads) as a byte slice. 31 // This is used for the general purpose reading. 32 // We do not support direct IO (which read the exact number of bytes) 33 // at this moment. 34 func (fs *filesystem) ReadInPages(ctx context.Context, fd *regularFileFD, off uint64, size uint32) ([][]byte, uint32, error) { 35 attributeVersion := fs.conn.attributeVersion.Load() 36 37 // Round up to a multiple of page size. 38 readSize, _ := hostarch.PageRoundUp(uint64(size)) 39 40 // One request cannot exceed either maxRead or maxPages. 41 maxPages := fs.conn.maxRead >> hostarch.PageShift 42 if maxPages > uint32(fs.conn.maxPages) { 43 maxPages = uint32(fs.conn.maxPages) 44 } 45 46 var outs [][]byte 47 var sizeRead uint32 48 49 // readSize is a multiple of hostarch.PageSize. 50 // Always request bytes as a multiple of pages. 51 pagesRead, pagesToRead := uint32(0), uint32(readSize>>hostarch.PageShift) 52 53 // Reuse the same struct for unmarshalling to avoid unnecessary memory allocation. 54 in := linux.FUSEReadIn{ 55 Fh: fd.Fh, 56 LockOwner: 0, // TODO(gvisor.dev/issue/3245): file lock 57 ReadFlags: 0, // TODO(gvisor.dev/issue/3245): |= linux.FUSE_READ_LOCKOWNER 58 Flags: fd.statusFlags(), 59 } 60 61 // This loop is intended for fragmented read where the bytes to read is 62 // larger than either the maxPages or maxRead. 63 // For the majority of reads with normal size, this loop should only 64 // execute once. 65 for pagesRead < pagesToRead { 66 pagesCanRead := pagesToRead - pagesRead 67 if pagesCanRead > maxPages { 68 pagesCanRead = maxPages 69 } 70 71 in.Offset = off + (uint64(pagesRead) << hostarch.PageShift) 72 in.Size = pagesCanRead << hostarch.PageShift 73 74 // TODO(gvisor.dev/issue/3247): support async read. 75 req := fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), fd.inode().nodeID, linux.FUSE_READ, &in) 76 res, err := fs.conn.Call(ctx, req) 77 if err != nil { 78 return nil, 0, err 79 } 80 if err := res.Error(); err != nil { 81 return nil, 0, err 82 } 83 84 // Not enough bytes in response, 85 // either we reached EOF, 86 // or the FUSE server sends back a response 87 // that cannot even fit the hdr. 88 if len(res.data) <= res.hdr.SizeBytes() { 89 // We treat both case as EOF here for now 90 // since there is no reliable way to detect 91 // the over-short hdr case. 92 break 93 } 94 95 // Directly using the slice to avoid extra copy. 96 out := res.data[res.hdr.SizeBytes():] 97 98 outs = append(outs, out) 99 sizeRead += uint32(len(out)) 100 101 pagesRead += pagesCanRead 102 } 103 104 defer fs.ReadCallback(ctx, fd.inode(), off, size, sizeRead, attributeVersion) // +checklocksforce: fd.inode() locks are held during fd operations. 105 106 // No bytes returned: offset >= EOF. 107 if len(outs) == 0 { 108 return nil, 0, io.EOF 109 } 110 111 return outs, sizeRead, nil 112 } 113 114 // ReadCallback updates several information after receiving a read response. 115 // Due to readahead, sizeRead can be larger than size. 116 // 117 // +checklocks:i.attrMu 118 func (fs *filesystem) ReadCallback(ctx context.Context, i *inode, off uint64, size uint32, sizeRead uint32, attributeVersion uint64) { 119 // TODO(gvisor.dev/issue/3247): support async read. 120 // If this is called by an async read, correctly process it. 121 // May need to update the signature. 122 i.touchAtime() 123 // Reached EOF. 124 if sizeRead < size { 125 // TODO(gvisor.dev/issue/3630): If we have writeback cache, then we need to fill this hole. 126 // Might need to update the buf to be returned from the Read(). 127 128 // Update existing size. 129 newSize := off + uint64(sizeRead) 130 fs.conn.mu.Lock() 131 if attributeVersion == i.attrVersion.Load() && newSize < i.size.Load() { 132 i.attrVersion.Store(i.fs.conn.attributeVersion.Add(1)) 133 i.size.Store(newSize) 134 } 135 fs.conn.mu.Unlock() 136 } 137 } 138 139 // Write sends FUSE_WRITE requests and return the bytes written according to the 140 // response. 141 func (fs *filesystem) Write(ctx context.Context, fd *regularFileFD, offset int64, src usermem.IOSequence) (int64, int64, error) { 142 // One request cannot exceed either maxWrite or maxPages. 143 maxWrite := uint32(fs.conn.maxPages) << hostarch.PageShift 144 if maxWrite > fs.conn.maxWrite { 145 maxWrite = fs.conn.maxWrite 146 } 147 148 // Reuse the same struct for unmarshalling to avoid unnecessary memory allocation. 149 in := linux.FUSEWritePayloadIn{ 150 Header: linux.FUSEWriteIn{ 151 Fh: fd.Fh, 152 // TODO(gvisor.dev/issue/3245): file lock 153 LockOwner: 0, 154 // TODO(gvisor.dev/issue/3245): |= linux.FUSE_READ_LOCKOWNER 155 // TODO(gvisor.dev/issue/3237): |= linux.FUSE_WRITE_CACHE (not added yet) 156 WriteFlags: 0, 157 Flags: fd.statusFlags(), 158 }, 159 } 160 161 // This loop is intended for fragmented write where the bytes to write is 162 // larger than either the maxWrite or maxPages or when bigWrites is false. 163 // Unless a small value for max_write is explicitly used, this loop 164 // is expected to execute only once for the majority of the writes. 165 n := int64(0) 166 end := offset + src.NumBytes() 167 for n < end { 168 writeSize := uint32(end - n) 169 170 // Limit the write size to one page. 171 // Note that the bigWrites flag is obsolete, 172 // latest libfuse always sets it on. 173 if !fs.conn.bigWrites && writeSize > hostarch.PageSize { 174 writeSize = hostarch.PageSize 175 } 176 // Limit the write size to maxWrite. 177 if writeSize > maxWrite { 178 writeSize = maxWrite 179 } 180 181 // TODO(gvisor.dev/issue/3237): Add cache support: 182 // buffer cache. Ideally we write from src to our buffer cache first. 183 // The slice passed to fs.Write() should be a slice from buffer cache. 184 data := make([]byte, writeSize) 185 cp, _ := src.CopyIn(ctx, data) 186 data = data[:cp] 187 188 in.Header.Offset = uint64(offset) 189 in.Header.Size = uint32(cp) 190 in.Payload = data 191 192 req := fs.conn.NewRequest(auth.CredentialsFromContext(ctx), pidFromContext(ctx), fd.inode().nodeID, linux.FUSE_WRITE, &in) 193 // TODO(gvisor.dev/issue/3247): support async write. 194 res, err := fs.conn.Call(ctx, req) 195 if err != nil { 196 return n, offset, err 197 } 198 out := linux.FUSEWriteOut{} 199 if err := res.UnmarshalPayload(&out); err != nil { 200 return n, offset, err 201 } 202 n += int64(out.Size) 203 offset += int64(out.Size) 204 src = src.DropFirst64(int64(out.Size)) 205 206 if err := res.Error(); err != nil { 207 return n, offset, err 208 } 209 // Write more than requested? EIO. 210 if out.Size > writeSize { 211 return n, offset, linuxerr.EIO 212 } 213 // Break if short write. Not necessarily an error. 214 if out.Size != writeSize { 215 break 216 } 217 } 218 return n, offset, nil 219 }