github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/ext/extent_file.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package ext 16 17 import ( 18 "io" 19 "sort" 20 21 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 22 "github.com/SagerNet/gvisor/pkg/sentry/fsimpl/ext/disklayout" 23 "github.com/SagerNet/gvisor/pkg/syserror" 24 ) 25 26 // extentFile is a type of regular file which uses extents to store file data. 27 // 28 // +stateify savable 29 type extentFile struct { 30 regFile regularFile 31 32 // root is the root extent node. This lives in the 60 byte diskInode.Data(). 33 // Immutable. 34 root disklayout.ExtentNode 35 } 36 37 // Compiles only if extentFile implements io.ReaderAt. 38 var _ io.ReaderAt = (*extentFile)(nil) 39 40 // newExtentFile is the extent file constructor. It reads the entire extent 41 // tree into memory. 42 // TODO(b/134676337): Build extent tree on demand to reduce memory usage. 43 func newExtentFile(args inodeArgs) (*extentFile, error) { 44 file := &extentFile{} 45 file.regFile.impl = file 46 file.regFile.inode.init(args, &file.regFile) 47 err := file.buildExtTree() 48 if err != nil { 49 return nil, err 50 } 51 return file, nil 52 } 53 54 // buildExtTree builds the extent tree by reading it from disk by doing 55 // running a simple DFS. It first reads the root node from the inode struct in 56 // memory. Then it recursively builds the rest of the tree by reading it off 57 // disk. 58 // 59 // Precondition: inode flag InExtents must be set. 60 func (f *extentFile) buildExtTree() error { 61 rootNodeData := f.regFile.inode.diskInode.Data() 62 63 f.root.Header.UnmarshalBytes(rootNodeData[:disklayout.ExtentHeaderSize]) 64 65 // Root node can not have more than 4 entries: 60 bytes = 1 header + 4 entries. 66 if f.root.Header.NumEntries > 4 { 67 // read(2) specifies that EINVAL should be returned if the file is unsuitable 68 // for reading. 69 return linuxerr.EINVAL 70 } 71 72 f.root.Entries = make([]disklayout.ExtentEntryPair, f.root.Header.NumEntries) 73 for i, off := uint16(0), disklayout.ExtentEntrySize; i < f.root.Header.NumEntries; i, off = i+1, off+disklayout.ExtentEntrySize { 74 var curEntry disklayout.ExtentEntry 75 if f.root.Header.Height == 0 { 76 // Leaf node. 77 curEntry = &disklayout.Extent{} 78 } else { 79 // Internal node. 80 curEntry = &disklayout.ExtentIdx{} 81 } 82 curEntry.UnmarshalBytes(rootNodeData[off : off+disklayout.ExtentEntrySize]) 83 f.root.Entries[i].Entry = curEntry 84 } 85 86 // If this node is internal, perform DFS. 87 if f.root.Header.Height > 0 { 88 for i := uint16(0); i < f.root.Header.NumEntries; i++ { 89 var err error 90 if f.root.Entries[i].Node, err = f.buildExtTreeFromDisk(f.root.Entries[i].Entry); err != nil { 91 return err 92 } 93 } 94 } 95 96 return nil 97 } 98 99 // buildExtTreeFromDisk reads the extent tree nodes from disk and recursively 100 // builds the tree. Performs a simple DFS. It returns the ExtentNode pointed to 101 // by the ExtentEntry. 102 func (f *extentFile) buildExtTreeFromDisk(entry disklayout.ExtentEntry) (*disklayout.ExtentNode, error) { 103 var header disklayout.ExtentHeader 104 off := entry.PhysicalBlock() * f.regFile.inode.blkSize 105 err := readFromDisk(f.regFile.inode.fs.dev, int64(off), &header) 106 if err != nil { 107 return nil, err 108 } 109 110 entries := make([]disklayout.ExtentEntryPair, header.NumEntries) 111 for i, off := uint16(0), off+disklayout.ExtentEntrySize; i < header.NumEntries; i, off = i+1, off+disklayout.ExtentEntrySize { 112 var curEntry disklayout.ExtentEntry 113 if header.Height == 0 { 114 // Leaf node. 115 curEntry = &disklayout.Extent{} 116 } else { 117 // Internal node. 118 curEntry = &disklayout.ExtentIdx{} 119 } 120 121 err := readFromDisk(f.regFile.inode.fs.dev, int64(off), curEntry) 122 if err != nil { 123 return nil, err 124 } 125 entries[i].Entry = curEntry 126 } 127 128 // If this node is internal, perform DFS. 129 if header.Height > 0 { 130 for i := uint16(0); i < header.NumEntries; i++ { 131 var err error 132 entries[i].Node, err = f.buildExtTreeFromDisk(entries[i].Entry) 133 if err != nil { 134 return nil, err 135 } 136 } 137 } 138 139 return &disklayout.ExtentNode{header, entries}, nil 140 } 141 142 // ReadAt implements io.ReaderAt.ReadAt. 143 func (f *extentFile) ReadAt(dst []byte, off int64) (int, error) { 144 if len(dst) == 0 { 145 return 0, nil 146 } 147 148 if off < 0 { 149 return 0, linuxerr.EINVAL 150 } 151 152 if uint64(off) >= f.regFile.inode.diskInode.Size() { 153 return 0, io.EOF 154 } 155 156 n, err := f.read(&f.root, uint64(off), dst) 157 if n < len(dst) && err == nil { 158 err = io.EOF 159 } 160 return n, err 161 } 162 163 // read is the recursive step of extentFile.ReadAt which traverses the extent 164 // tree from the node passed and reads file data. 165 func (f *extentFile) read(node *disklayout.ExtentNode, off uint64, dst []byte) (int, error) { 166 // Perform a binary search for the node covering bytes starting at r.fileOff. 167 // A highly fragmented filesystem can have upto 340 entries and so linear 168 // search should be avoided. Finds the first entry which does not cover the 169 // file block we want and subtracts 1 to get the desired index. 170 fileBlk := uint32(off / f.regFile.inode.blkSize) 171 n := len(node.Entries) 172 found := sort.Search(n, func(i int) bool { 173 return node.Entries[i].Entry.FileBlock() > fileBlk 174 }) - 1 175 176 // We should be in this recursive step only if the data we want exists under 177 // the current node. 178 if found < 0 { 179 panic("searching for a file block in an extent entry which does not cover it") 180 } 181 182 read := 0 183 toRead := len(dst) 184 var curR int 185 var err error 186 for i := found; i < n && read < toRead; i++ { 187 if node.Header.Height == 0 { 188 curR, err = f.readFromExtent(node.Entries[i].Entry.(*disklayout.Extent), off, dst[read:]) 189 } else { 190 curR, err = f.read(node.Entries[i].Node, off, dst[read:]) 191 } 192 193 read += curR 194 off += uint64(curR) 195 if err != nil { 196 return read, err 197 } 198 } 199 200 return read, nil 201 } 202 203 // readFromExtent reads file data from the extent. It takes advantage of the 204 // sequential nature of extents and reads file data from multiple blocks in one 205 // call. 206 // 207 // A non-nil error indicates that this is a partial read and there is probably 208 // more to read from this extent. The caller should propagate the error upward 209 // and not move to the next extent in the tree. 210 // 211 // A subsequent call to extentReader.Read should continue reading from where we 212 // left off as expected. 213 func (f *extentFile) readFromExtent(ex *disklayout.Extent, off uint64, dst []byte) (int, error) { 214 curFileBlk := uint32(off / f.regFile.inode.blkSize) 215 exFirstFileBlk := ex.FileBlock() 216 exLastFileBlk := exFirstFileBlk + uint32(ex.Length) // This is exclusive. 217 218 // We should be in this recursive step only if the data we want exists under 219 // the current extent. 220 if curFileBlk < exFirstFileBlk || exLastFileBlk <= curFileBlk { 221 panic("searching for a file block in an extent which does not cover it") 222 } 223 224 curPhyBlk := uint64(curFileBlk-exFirstFileBlk) + ex.PhysicalBlock() 225 readStart := curPhyBlk*f.regFile.inode.blkSize + (off % f.regFile.inode.blkSize) 226 227 endPhyBlk := ex.PhysicalBlock() + uint64(ex.Length) 228 extentEnd := endPhyBlk * f.regFile.inode.blkSize // This is exclusive. 229 230 toRead := int(extentEnd - readStart) 231 if len(dst) < toRead { 232 toRead = len(dst) 233 } 234 235 n, _ := f.regFile.inode.fs.dev.ReadAt(dst[:toRead], int64(readStart)) 236 if n < toRead { 237 return n, syserror.EIO 238 } 239 return n, nil 240 }