github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/ext/directory.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package ext 16 17 import ( 18 "github.com/SagerNet/gvisor/pkg/abi/linux" 19 "github.com/SagerNet/gvisor/pkg/context" 20 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 21 "github.com/SagerNet/gvisor/pkg/log" 22 "github.com/SagerNet/gvisor/pkg/sentry/fs" 23 "github.com/SagerNet/gvisor/pkg/sentry/fsimpl/ext/disklayout" 24 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 25 "github.com/SagerNet/gvisor/pkg/sync" 26 ) 27 28 // directory represents a directory inode. It holds the childList in memory. 29 // 30 // +stateify savable 31 type directory struct { 32 inode inode 33 34 // childCache maps filenames to dentries for children for which dentries 35 // have been instantiated. childCache is protected by filesystem.mu. 36 childCache map[string]*dentry 37 38 // mu serializes the changes to childList. 39 // Lock Order (outermost locks must be taken first): 40 // directory.mu 41 // filesystem.mu 42 mu sync.Mutex `state:"nosave"` 43 44 // childList is a list containing (1) child dirents and (2) fake dirents 45 // (with diskDirent == nil) that represent the iteration position of 46 // directoryFDs. childList is used to support directoryFD.IterDirents() 47 // efficiently. childList is protected by mu. 48 childList direntList 49 50 // childMap maps the child's filename to the dirent structure stored in 51 // childList. This adds some data replication but helps in faster path 52 // traversal. For consistency, key == childMap[key].diskDirent.FileName(). 53 // Immutable. 54 childMap map[string]*dirent 55 } 56 57 // newDirectory is the directory constructor. 58 func newDirectory(args inodeArgs, newDirent bool) (*directory, error) { 59 file := &directory{ 60 childCache: make(map[string]*dentry), 61 childMap: make(map[string]*dirent), 62 } 63 file.inode.init(args, file) 64 65 // Initialize childList by reading dirents from the underlying file. 66 if args.diskInode.Flags().Index { 67 // TODO(b/134676337): Support hash tree directories. Currently only the '.' 68 // and '..' entries are read in. 69 70 // Users cannot navigate this hash tree directory yet. 71 log.Warningf("hash tree directory being used which is unsupported") 72 return file, nil 73 } 74 75 // The dirents are organized in a linear array in the file data. 76 // Extract the file data and decode the dirents. 77 regFile, err := newRegularFile(args) 78 if err != nil { 79 return nil, err 80 } 81 82 // buf is used as scratch space for reading in dirents from disk and 83 // unmarshalling them into dirent structs. 84 buf := make([]byte, disklayout.DirentSize) 85 size := args.diskInode.Size() 86 for off, inc := uint64(0), uint64(0); off < size; off += inc { 87 toRead := size - off 88 if toRead > disklayout.DirentSize { 89 toRead = disklayout.DirentSize 90 } 91 if n, err := regFile.impl.ReadAt(buf[:toRead], int64(off)); uint64(n) < toRead { 92 return nil, err 93 } 94 95 var curDirent dirent 96 if newDirent { 97 curDirent.diskDirent = &disklayout.DirentNew{} 98 } else { 99 curDirent.diskDirent = &disklayout.DirentOld{} 100 } 101 curDirent.diskDirent.UnmarshalBytes(buf) 102 103 if curDirent.diskDirent.Inode() != 0 && len(curDirent.diskDirent.FileName()) != 0 { 104 // Inode number and name length fields being set to 0 is used to indicate 105 // an unused dirent. 106 file.childList.PushBack(&curDirent) 107 file.childMap[curDirent.diskDirent.FileName()] = &curDirent 108 } 109 110 // The next dirent is placed exactly after this dirent record on disk. 111 inc = uint64(curDirent.diskDirent.RecordSize()) 112 } 113 114 return file, nil 115 } 116 117 func (i *inode) isDir() bool { 118 _, ok := i.impl.(*directory) 119 return ok 120 } 121 122 // dirent is the directory.childList node. 123 // 124 // +stateify savable 125 type dirent struct { 126 diskDirent disklayout.Dirent 127 128 // direntEntry links dirents into their parent directory.childList. 129 direntEntry 130 } 131 132 // directoryFD represents a directory file description. It implements 133 // vfs.FileDescriptionImpl. 134 // 135 // +stateify savable 136 type directoryFD struct { 137 fileDescription 138 vfs.DirectoryFileDescriptionDefaultImpl 139 140 // Protected by directory.mu. 141 iter *dirent 142 off int64 143 } 144 145 // Compiles only if directoryFD implements vfs.FileDescriptionImpl. 146 var _ vfs.FileDescriptionImpl = (*directoryFD)(nil) 147 148 // Release implements vfs.FileDescriptionImpl.Release. 149 func (fd *directoryFD) Release(ctx context.Context) { 150 if fd.iter == nil { 151 return 152 } 153 154 dir := fd.inode().impl.(*directory) 155 dir.mu.Lock() 156 dir.childList.Remove(fd.iter) 157 dir.mu.Unlock() 158 fd.iter = nil 159 } 160 161 // IterDirents implements vfs.FileDescriptionImpl.IterDirents. 162 func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error { 163 extfs := fd.filesystem() 164 dir := fd.inode().impl.(*directory) 165 166 dir.mu.Lock() 167 defer dir.mu.Unlock() 168 169 // Ensure that fd.iter exists and is not linked into dir.childList. 170 var child *dirent 171 if fd.iter == nil { 172 // Start iteration at the beginning of dir. 173 child = dir.childList.Front() 174 fd.iter = &dirent{} 175 } else { 176 // Continue iteration from where we left off. 177 child = fd.iter.Next() 178 dir.childList.Remove(fd.iter) 179 } 180 for ; child != nil; child = child.Next() { 181 // Skip other directoryFD iterators. 182 if child.diskDirent != nil { 183 childType, ok := child.diskDirent.FileType() 184 if !ok { 185 // We will need to read the inode off disk. Do not increment 186 // ref count here because this inode is not being added to the 187 // dentry tree. 188 extfs.mu.Lock() 189 childInode, err := extfs.getOrCreateInodeLocked(child.diskDirent.Inode()) 190 extfs.mu.Unlock() 191 if err != nil { 192 // Usage of the file description after the error is 193 // undefined. This implementation would continue reading 194 // from the next dirent. 195 fd.off++ 196 dir.childList.InsertAfter(child, fd.iter) 197 return err 198 } 199 childType = fs.ToInodeType(childInode.diskInode.Mode().FileType()) 200 } 201 202 if err := cb.Handle(vfs.Dirent{ 203 Name: child.diskDirent.FileName(), 204 Type: fs.ToDirentType(childType), 205 Ino: uint64(child.diskDirent.Inode()), 206 NextOff: fd.off + 1, 207 }); err != nil { 208 dir.childList.InsertBefore(child, fd.iter) 209 return err 210 } 211 fd.off++ 212 } 213 } 214 dir.childList.PushBack(fd.iter) 215 return nil 216 } 217 218 // Seek implements vfs.FileDescriptionImpl.Seek. 219 func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { 220 if whence != linux.SEEK_SET && whence != linux.SEEK_CUR { 221 return 0, linuxerr.EINVAL 222 } 223 224 dir := fd.inode().impl.(*directory) 225 226 dir.mu.Lock() 227 defer dir.mu.Unlock() 228 229 // Find resulting offset. 230 if whence == linux.SEEK_CUR { 231 offset += fd.off 232 } 233 234 if offset < 0 { 235 // lseek(2) specifies that EINVAL should be returned if the resulting offset 236 // is negative. 237 return 0, linuxerr.EINVAL 238 } 239 240 n := int64(len(dir.childMap)) 241 realWantOff := offset 242 if realWantOff > n { 243 realWantOff = n 244 } 245 realCurOff := fd.off 246 if realCurOff > n { 247 realCurOff = n 248 } 249 250 // Ensure that fd.iter exists and is linked into dir.childList so we can 251 // intelligently seek from the optimal position. 252 if fd.iter == nil { 253 fd.iter = &dirent{} 254 dir.childList.PushFront(fd.iter) 255 } 256 257 // Guess that iterating from the current position is optimal. 258 child := fd.iter 259 diff := realWantOff - realCurOff // Shows direction and magnitude of travel. 260 261 // See if starting from the beginning or end is better. 262 abDiff := diff 263 if diff < 0 { 264 abDiff = -diff 265 } 266 if abDiff > realWantOff { 267 // Starting from the beginning is best. 268 child = dir.childList.Front() 269 diff = realWantOff 270 } else if abDiff > (n - realWantOff) { 271 // Starting from the end is best. 272 child = dir.childList.Back() 273 // (n - 1) because the last non-nil dirent represents the (n-1)th offset. 274 diff = realWantOff - (n - 1) 275 } 276 277 for child != nil { 278 // Skip other directoryFD iterators. 279 if child.diskDirent != nil { 280 if diff == 0 { 281 if child != fd.iter { 282 dir.childList.Remove(fd.iter) 283 dir.childList.InsertBefore(child, fd.iter) 284 } 285 286 fd.off = offset 287 return offset, nil 288 } 289 290 if diff < 0 { 291 diff++ 292 child = child.Prev() 293 } else { 294 diff-- 295 child = child.Next() 296 } 297 continue 298 } 299 300 if diff < 0 { 301 child = child.Prev() 302 } else { 303 child = child.Next() 304 } 305 } 306 307 // Reaching here indicates that the offset is beyond the end of the childList. 308 dir.childList.Remove(fd.iter) 309 dir.childList.PushBack(fd.iter) 310 fd.off = offset 311 return offset, nil 312 }