github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/gofer/directory.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package gofer 16 17 import ( 18 "fmt" 19 "sync/atomic" 20 21 "github.com/SagerNet/gvisor/pkg/abi/linux" 22 "github.com/SagerNet/gvisor/pkg/context" 23 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 24 "github.com/SagerNet/gvisor/pkg/hostarch" 25 "github.com/SagerNet/gvisor/pkg/p9" 26 "github.com/SagerNet/gvisor/pkg/refsvfs2" 27 "github.com/SagerNet/gvisor/pkg/sentry/kernel/auth" 28 "github.com/SagerNet/gvisor/pkg/sentry/kernel/pipe" 29 "github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport" 30 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 31 "github.com/SagerNet/gvisor/pkg/sync" 32 ) 33 34 func (d *dentry) isDir() bool { 35 return d.fileType() == linux.S_IFDIR 36 } 37 38 // Preconditions: 39 // * filesystem.renameMu must be locked. 40 // * d.dirMu must be locked. 41 // * d.isDir(). 42 // * child must be a newly-created dentry that has never had a parent. 43 func (d *dentry) cacheNewChildLocked(child *dentry, name string) { 44 d.IncRef() // reference held by child on its parent 45 child.parent = d 46 child.name = name 47 if d.children == nil { 48 d.children = make(map[string]*dentry) 49 } 50 d.children[name] = child 51 } 52 53 // Preconditions: 54 // * d.dirMu must be locked. 55 // * d.isDir(). 56 func (d *dentry) cacheNegativeLookupLocked(name string) { 57 // Don't cache negative lookups if InteropModeShared is in effect (since 58 // this makes remote lookup unavoidable), or if d.isSynthetic() (in which 59 // case the only files in the directory are those for which a dentry exists 60 // in d.children). Instead, just delete any previously-cached dentry. 61 if d.fs.opts.interop == InteropModeShared || d.isSynthetic() { 62 delete(d.children, name) 63 return 64 } 65 if d.children == nil { 66 d.children = make(map[string]*dentry) 67 } 68 d.children[name] = nil 69 } 70 71 type createSyntheticOpts struct { 72 name string 73 mode linux.FileMode 74 kuid auth.KUID 75 kgid auth.KGID 76 77 // The endpoint for a synthetic socket. endpoint should be nil if the file 78 // being created is not a socket. 79 endpoint transport.BoundEndpoint 80 81 // pipe should be nil if the file being created is not a pipe. 82 pipe *pipe.VFSPipe 83 } 84 85 // createSyntheticChildLocked creates a synthetic file with the given name 86 // in d. 87 // 88 // Preconditions: 89 // * d.dirMu must be locked. 90 // * d.isDir(). 91 // * d does not already contain a child with the given name. 92 func (d *dentry) createSyntheticChildLocked(opts *createSyntheticOpts) { 93 now := d.fs.clock.Now().Nanoseconds() 94 child := &dentry{ 95 refs: 1, // held by d 96 fs: d.fs, 97 ino: d.fs.nextIno(), 98 mode: uint32(opts.mode), 99 uid: uint32(opts.kuid), 100 gid: uint32(opts.kgid), 101 blockSize: hostarch.PageSize, // arbitrary 102 atime: now, 103 mtime: now, 104 ctime: now, 105 btime: now, 106 readFD: -1, 107 writeFD: -1, 108 mmapFD: -1, 109 nlink: uint32(2), 110 } 111 refsvfs2.Register(child) 112 switch opts.mode.FileType() { 113 case linux.S_IFDIR: 114 // Nothing else needs to be done. 115 case linux.S_IFSOCK: 116 child.endpoint = opts.endpoint 117 case linux.S_IFIFO: 118 child.pipe = opts.pipe 119 default: 120 panic(fmt.Sprintf("failed to create synthetic file of unrecognized type: %v", opts.mode.FileType())) 121 } 122 child.pf.dentry = child 123 child.vfsd.Init(child) 124 125 d.cacheNewChildLocked(child, opts.name) 126 d.syntheticChildren++ 127 } 128 129 // +stateify savable 130 type directoryFD struct { 131 fileDescription 132 vfs.DirectoryFileDescriptionDefaultImpl 133 134 mu sync.Mutex `state:"nosave"` 135 off int64 136 dirents []vfs.Dirent 137 } 138 139 // Release implements vfs.FileDescriptionImpl.Release. 140 func (fd *directoryFD) Release(context.Context) { 141 } 142 143 // IterDirents implements vfs.FileDescriptionImpl.IterDirents. 144 func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error { 145 fd.mu.Lock() 146 defer fd.mu.Unlock() 147 148 d := fd.dentry() 149 if fd.dirents == nil { 150 ds, err := d.getDirents(ctx) 151 if err != nil { 152 return err 153 } 154 fd.dirents = ds 155 } 156 157 d.InotifyWithParent(ctx, linux.IN_ACCESS, 0, vfs.PathEvent) 158 if d.cachedMetadataAuthoritative() { 159 d.touchAtime(fd.vfsfd.Mount()) 160 } 161 162 for fd.off < int64(len(fd.dirents)) { 163 if err := cb.Handle(fd.dirents[fd.off]); err != nil { 164 return err 165 } 166 fd.off++ 167 } 168 return nil 169 } 170 171 // Preconditions: 172 // * d.isDir(). 173 // * There exists at least one directoryFD representing d. 174 func (d *dentry) getDirents(ctx context.Context) ([]vfs.Dirent, error) { 175 // NOTE(b/135560623): 9P2000.L's readdir does not specify behavior in the 176 // presence of concurrent mutation of an iterated directory, so 177 // implementations may duplicate or omit entries in this case, which 178 // violates POSIX semantics. Thus we read all directory entries while 179 // holding d.dirMu to exclude directory mutations. (Note that it is 180 // impossible for the client to exclude concurrent mutation from other 181 // remote filesystem users. Since there is no way to detect if the server 182 // has incorrectly omitted directory entries, we simply assume that the 183 // server is well-behaved under InteropModeShared.) This is inconsistent 184 // with Linux (which appears to assume that directory fids have the correct 185 // semantics, and translates struct file_operations::readdir calls directly 186 // to readdir RPCs), but is consistent with VFS1. 187 188 // filesystem.renameMu is needed for d.parent, and must be locked before 189 // dentry.dirMu. 190 d.fs.renameMu.RLock() 191 defer d.fs.renameMu.RUnlock() 192 d.dirMu.Lock() 193 defer d.dirMu.Unlock() 194 if d.dirents != nil { 195 return d.dirents, nil 196 } 197 198 // It's not clear if 9P2000.L's readdir is expected to return "." and "..", 199 // so we generate them here. 200 parent := genericParentOrSelf(d) 201 dirents := []vfs.Dirent{ 202 { 203 Name: ".", 204 Type: linux.DT_DIR, 205 Ino: uint64(d.ino), 206 NextOff: 1, 207 }, 208 { 209 Name: "..", 210 Type: uint8(atomic.LoadUint32(&parent.mode) >> 12), 211 Ino: uint64(parent.ino), 212 NextOff: 2, 213 }, 214 } 215 var realChildren map[string]struct{} 216 if !d.isSynthetic() { 217 if d.syntheticChildren != 0 && d.fs.opts.interop == InteropModeShared { 218 // Record the set of children d actually has so that we don't emit 219 // duplicate entries for synthetic children. 220 realChildren = make(map[string]struct{}) 221 } 222 off := uint64(0) 223 const count = 64 * 1024 // for consistency with the vfs1 client 224 d.handleMu.RLock() 225 if d.readFile.isNil() { 226 // This should not be possible because a readable handle should 227 // have been opened when the calling directoryFD was opened. 228 d.handleMu.RUnlock() 229 panic("gofer.dentry.getDirents called without a readable handle") 230 } 231 for { 232 p9ds, err := d.readFile.readdir(ctx, off, count) 233 if err != nil { 234 d.handleMu.RUnlock() 235 return nil, err 236 } 237 if len(p9ds) == 0 { 238 d.handleMu.RUnlock() 239 break 240 } 241 for _, p9d := range p9ds { 242 if p9d.Name == "." || p9d.Name == ".." { 243 continue 244 } 245 dirent := vfs.Dirent{ 246 Name: p9d.Name, 247 Ino: d.fs.inoFromQIDPath(p9d.QID.Path), 248 NextOff: int64(len(dirents) + 1), 249 } 250 // p9 does not expose 9P2000.U's DMDEVICE, DMNAMEDPIPE, or 251 // DMSOCKET. 252 switch p9d.Type { 253 case p9.TypeSymlink: 254 dirent.Type = linux.DT_LNK 255 case p9.TypeDir: 256 dirent.Type = linux.DT_DIR 257 default: 258 dirent.Type = linux.DT_REG 259 } 260 dirents = append(dirents, dirent) 261 if realChildren != nil { 262 realChildren[p9d.Name] = struct{}{} 263 } 264 } 265 off = p9ds[len(p9ds)-1].Offset 266 } 267 } 268 // Emit entries for synthetic children. 269 if d.syntheticChildren != 0 { 270 for _, child := range d.children { 271 if child == nil || !child.isSynthetic() { 272 continue 273 } 274 if _, ok := realChildren[child.name]; ok { 275 continue 276 } 277 dirents = append(dirents, vfs.Dirent{ 278 Name: child.name, 279 Type: uint8(atomic.LoadUint32(&child.mode) >> 12), 280 Ino: uint64(child.ino), 281 NextOff: int64(len(dirents) + 1), 282 }) 283 } 284 } 285 // Cache dirents for future directoryFDs if permitted. 286 if d.cachedMetadataAuthoritative() { 287 d.dirents = dirents 288 } 289 return dirents, nil 290 } 291 292 // Seek implements vfs.FileDescriptionImpl.Seek. 293 func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { 294 fd.mu.Lock() 295 defer fd.mu.Unlock() 296 297 switch whence { 298 case linux.SEEK_SET: 299 if offset < 0 { 300 return 0, linuxerr.EINVAL 301 } 302 if offset == 0 { 303 // Ensure that the next call to fd.IterDirents() calls 304 // fd.dentry().getDirents(). 305 fd.dirents = nil 306 } 307 fd.off = offset 308 return fd.off, nil 309 case linux.SEEK_CUR: 310 offset += fd.off 311 if offset < 0 { 312 return 0, linuxerr.EINVAL 313 } 314 // Don't clear fd.dirents in this case, even if offset == 0. 315 fd.off = offset 316 return fd.off, nil 317 default: 318 return 0, linuxerr.EINVAL 319 } 320 } 321 322 // Sync implements vfs.FileDescriptionImpl.Sync. 323 func (fd *directoryFD) Sync(ctx context.Context) error { 324 return fd.dentry().syncRemoteFile(ctx) 325 }