github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/fsimpl/gofer/filesystem.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package gofer 16 17 import ( 18 "fmt" 19 "math" 20 "strings" 21 "sync" 22 23 "golang.org/x/sys/unix" 24 "github.com/MerlinKodo/gvisor/pkg/abi/linux" 25 "github.com/MerlinKodo/gvisor/pkg/atomicbitops" 26 "github.com/MerlinKodo/gvisor/pkg/context" 27 "github.com/MerlinKodo/gvisor/pkg/errors/linuxerr" 28 "github.com/MerlinKodo/gvisor/pkg/fspath" 29 "github.com/MerlinKodo/gvisor/pkg/sentry/fsimpl/host" 30 "github.com/MerlinKodo/gvisor/pkg/sentry/fsmetric" 31 "github.com/MerlinKodo/gvisor/pkg/sentry/kernel" 32 "github.com/MerlinKodo/gvisor/pkg/sentry/kernel/auth" 33 "github.com/MerlinKodo/gvisor/pkg/sentry/kernel/pipe" 34 "github.com/MerlinKodo/gvisor/pkg/sentry/socket/unix/transport" 35 "github.com/MerlinKodo/gvisor/pkg/sentry/vfs" 36 ) 37 38 // Sync implements vfs.FilesystemImpl.Sync. 39 func (fs *filesystem) Sync(ctx context.Context) error { 40 // Snapshot current syncable dentries and special file FDs. 41 fs.syncMu.Lock() 42 ds := make([]*dentry, 0, fs.syncableDentries.Len()) 43 for elem := fs.syncableDentries.Front(); elem != nil; elem = elem.Next() { 44 ds = append(ds, elem.d) 45 } 46 sffds := make([]*specialFileFD, 0, fs.specialFileFDs.Len()) 47 for sffd := fs.specialFileFDs.Front(); sffd != nil; sffd = sffd.Next() { 48 sffds = append(sffds, sffd) 49 } 50 fs.syncMu.Unlock() 51 52 // Return the first error we encounter, but sync everything we can 53 // regardless. 54 var retErr error 55 56 // Note that lisafs is capable of batching FSync RPCs. However, we can not 57 // batch all the FDIDs to be synced from ds and sffds. Because the error 58 // handling varies based on file type. FSync errors are only considered for 59 // regular file FDIDs that were opened for writing. We could do individual 60 // RPCs for such FDIDs and batch the rest, but it increases code complexity 61 // substantially. We could implement it in the future if need be. 62 63 // Sync syncable dentries. 64 for _, d := range ds { 65 if err := d.syncCachedFile(ctx, true /* forFilesystemSync */); err != nil { 66 ctx.Infof("gofer.filesystem.Sync: dentry.syncCachedFile failed: %v", err) 67 if retErr == nil { 68 retErr = err 69 } 70 } 71 } 72 73 // Sync special files, which may be writable but do not use dentry shared 74 // handles (so they won't be synced by the above). 75 for _, sffd := range sffds { 76 if err := sffd.sync(ctx, true /* forFilesystemSync */); err != nil { 77 ctx.Infof("gofer.filesystem.Sync: specialFileFD.sync failed: %v", err) 78 if retErr == nil { 79 retErr = err 80 } 81 } 82 } 83 84 return retErr 85 } 86 87 // MaxFilenameLen is the maximum length of a filename. This is dictated by 9P's 88 // encoding of strings, which uses 2 bytes for the length prefix. 89 const MaxFilenameLen = (1 << 16) - 1 90 91 // dentrySlicePool is a pool of *[]*dentry used to store dentries for which 92 // dentry.checkCachingLocked() must be called. The pool holds pointers to 93 // slices because Go lacks generics, so sync.Pool operates on any, so 94 // every call to (what should be) sync.Pool<[]*dentry>.Put() allocates a copy 95 // of the slice header on the heap. 96 var dentrySlicePool = sync.Pool{ 97 New: func() any { 98 ds := make([]*dentry, 0, 4) // arbitrary non-zero initial capacity 99 return &ds 100 }, 101 } 102 103 func appendDentry(ds *[]*dentry, d *dentry) *[]*dentry { 104 if ds == nil { 105 ds = dentrySlicePool.Get().(*[]*dentry) 106 } 107 *ds = append(*ds, d) 108 return ds 109 } 110 111 // Precondition: !parent.isSynthetic() && !child.isSynthetic(). 112 func appendNewChildDentry(ds **[]*dentry, parent *dentry, child *dentry) { 113 // The new child was added to parent and took a ref on the parent (hence 114 // parent can be removed from cache). A new child has 0 refs for now. So 115 // checkCachingLocked() should be called on both. Call it first on the parent 116 // as it may create space in the cache for child to be inserted - hence 117 // avoiding a cache eviction. 118 *ds = appendDentry(*ds, parent) 119 *ds = appendDentry(*ds, child) 120 } 121 122 // Preconditions: ds != nil. 123 func putDentrySlice(ds *[]*dentry) { 124 // Allow dentries to be GC'd. 125 for i := range *ds { 126 (*ds)[i] = nil 127 } 128 *ds = (*ds)[:0] 129 dentrySlicePool.Put(ds) 130 } 131 132 // renameMuRUnlockAndCheckCaching calls fs.renameMu.RUnlock(), then calls 133 // dentry.checkCachingLocked on all dentries in *dsp with fs.renameMu locked 134 // for writing. 135 // 136 // dsp is a pointer-to-pointer since defer evaluates its arguments immediately, 137 // but dentry slices are allocated lazily, and it's much easier to say "defer 138 // fs.renameMuRUnlockAndCheckCaching(&ds)" than "defer func() { 139 // fs.renameMuRUnlockAndCheckCaching(ds) }()" to work around this. 140 // +checklocksreleaseread:fs.renameMu 141 func (fs *filesystem) renameMuRUnlockAndCheckCaching(ctx context.Context, dsp **[]*dentry) { 142 fs.renameMu.RUnlock() 143 if *dsp == nil { 144 return 145 } 146 ds := **dsp 147 for _, d := range ds { 148 d.checkCachingLocked(ctx, false /* renameMuWriteLocked */) 149 } 150 putDentrySlice(*dsp) 151 } 152 153 // +checklocksrelease:fs.renameMu 154 func (fs *filesystem) renameMuUnlockAndCheckCaching(ctx context.Context, ds **[]*dentry) { 155 if *ds == nil { 156 fs.renameMu.Unlock() 157 return 158 } 159 for _, d := range **ds { 160 d.checkCachingLocked(ctx, true /* renameMuWriteLocked */) 161 } 162 fs.renameMu.Unlock() 163 putDentrySlice(*ds) 164 } 165 166 // stepLocked resolves rp.Component() to an existing file, starting from the 167 // given directory. 168 // 169 // Dentries which may become cached as a result of the traversal are appended 170 // to *ds. 171 // 172 // Preconditions: 173 // - fs.renameMu must be locked. 174 // - d.opMu must be locked for reading. 175 // - !rp.Done(). 176 // - If !d.cachedMetadataAuthoritative(), then d and all children that are 177 // part of rp must have been revalidated. 178 // 179 // +checklocksread:d.opMu 180 func (fs *filesystem) stepLocked(ctx context.Context, rp resolvingPath, d *dentry, mayFollowSymlinks bool, ds **[]*dentry) (*dentry, bool, error) { 181 if !d.isDir() { 182 return nil, false, linuxerr.ENOTDIR 183 } 184 if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { 185 return nil, false, err 186 } 187 name := rp.Component() 188 if name == "." { 189 rp.Advance() 190 return d, false, nil 191 } 192 if name == ".." { 193 if isRoot, err := rp.CheckRoot(ctx, &d.vfsd); err != nil { 194 return nil, false, err 195 } else if isRoot || d.parent == nil { 196 rp.Advance() 197 return d, false, nil 198 } 199 if err := rp.CheckMount(ctx, &d.parent.vfsd); err != nil { 200 return nil, false, err 201 } 202 rp.Advance() 203 return d.parent, false, nil 204 } 205 child, err := fs.getChildAndWalkPathLocked(ctx, d, rp, ds) 206 if err != nil { 207 return nil, false, err 208 } 209 if err := rp.CheckMount(ctx, &child.vfsd); err != nil { 210 return nil, false, err 211 } 212 if child.isSymlink() && mayFollowSymlinks && rp.ShouldFollowSymlink() { 213 target, err := child.readlink(ctx, rp.Mount()) 214 if err != nil { 215 return nil, false, err 216 } 217 followedSymlink, err := rp.HandleSymlink(target) 218 return d, followedSymlink, err 219 } 220 rp.Advance() 221 return child, false, nil 222 } 223 224 // getChildLocked returns a dentry representing the child of parent with the 225 // given name. Returns ENOENT if the child doesn't exist. 226 // 227 // Preconditions: 228 // - fs.renameMu must be locked. 229 // - parent.opMu must be locked. 230 // - parent.isDir(). 231 // - name is not "." or "..". 232 // - parent and the dentry at name have been revalidated. 233 // 234 // +checklocks:parent.opMu 235 func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name string, ds **[]*dentry) (*dentry, error) { 236 if child, err := parent.getCachedChildLocked(name); child != nil || err != nil { 237 return child, err 238 } 239 // We don't need to check for race here because parent.opMu is held for 240 // writing. 241 return fs.getRemoteChildLocked(ctx, parent, name, false /* checkForRace */, ds) 242 } 243 244 // getRemoteChildLocked is similar to getChildLocked, with the additional 245 // precondition that the child identified by name does not exist in cache. 246 // 247 // If checkForRace argument is true, then this method will check to see if the 248 // call has raced with another getRemoteChild call, and will handle the race if 249 // so. 250 // 251 // Preconditions: 252 // - If checkForRace is false, then parent.opMu must be held for writing. 253 // - Otherwise, parent.opMu must be held for reading. 254 // 255 // Postcondition: The returned dentry is already cached appropriately. 256 // 257 // +checklocksread:parent.opMu 258 func (fs *filesystem) getRemoteChildLocked(ctx context.Context, parent *dentry, name string, checkForRace bool, ds **[]*dentry) (*dentry, error) { 259 child, err := parent.getRemoteChild(ctx, name) 260 // Cache the result appropriately in the dentry tree. 261 if err != nil { 262 if linuxerr.Equals(linuxerr.ENOENT, err) { 263 parent.childrenMu.Lock() 264 defer parent.childrenMu.Unlock() 265 parent.cacheNegativeLookupLocked(name) 266 } 267 return nil, err 268 } 269 270 parent.childrenMu.Lock() 271 defer parent.childrenMu.Unlock() 272 273 if checkForRace { 274 // See if we raced with anoter getRemoteChild call that added 275 // to the cache. 276 if cachedChild, ok := parent.children[name]; ok && cachedChild != nil { 277 // We raced. Destroy our child and return the cached 278 // one. This child has no handles, no data, and has not 279 // been cached, so destruction is quick and painless. 280 child.destroyDisconnected(ctx) 281 282 // All good. Return the cached child. 283 return cachedChild, nil 284 } 285 // No race, continue with the child we got. 286 } 287 parent.cacheNewChildLocked(child, name) 288 appendNewChildDentry(ds, parent, child) 289 return child, nil 290 } 291 292 // getChildAndWalkPathLocked is the same as getChildLocked, except that it 293 // may prefetch the entire path represented by rp. 294 // 295 // +checklocksread:parent.opMu 296 func (fs *filesystem) getChildAndWalkPathLocked(ctx context.Context, parent *dentry, rp resolvingPath, ds **[]*dentry) (*dentry, error) { 297 if child, err := parent.getCachedChildLocked(rp.Component()); child != nil || err != nil { 298 return child, err 299 } 300 // dentry.getRemoteChildAndWalkPathLocked already handles dentry caching. 301 return parent.getRemoteChildAndWalkPathLocked(ctx, rp, ds) 302 } 303 304 // getCachedChildLocked returns a child dentry if it was cached earlier. If no 305 // cached child dentry exists, (nil, nil) is returned. 306 // 307 // Preconditions: 308 // - fs.renameMu must be locked. 309 // - d.opMu must be locked for reading. 310 // - d.isDir(). 311 // - name is not "." or "..". 312 // - d and the dentry at name have been revalidated. 313 // 314 // +checklocksread:d.opMu 315 func (d *dentry) getCachedChildLocked(name string) (*dentry, error) { 316 if len(name) > MaxFilenameLen { 317 return nil, linuxerr.ENAMETOOLONG 318 } 319 d.childrenMu.Lock() 320 defer d.childrenMu.Unlock() 321 if child, ok := d.children[name]; ok || d.isSynthetic() { 322 if child == nil { 323 return nil, linuxerr.ENOENT 324 } 325 return child, nil 326 } 327 328 if d.childrenSet != nil { 329 // Is the child even there? Don't make RPC if not. 330 if _, ok := d.childrenSet[name]; !ok { 331 return nil, linuxerr.ENOENT 332 } 333 } 334 return nil, nil 335 } 336 337 // walkParentDirLocked resolves all but the last path component of rp to an 338 // existing directory, starting from the given directory (which is usually 339 // rp.Start().Impl().(*dentry)). It does not check that the returned directory 340 // is searchable by the provider of rp. 341 // 342 // Preconditions: 343 // - fs.renameMu must be locked. 344 // - !rp.Done(). 345 // - If !d.cachedMetadataAuthoritative(), then d's cached metadata must be up 346 // to date. 347 func (fs *filesystem) walkParentDirLocked(ctx context.Context, vfsRP *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) { 348 rp := resolvingPathParent(vfsRP) 349 if err := fs.revalidatePath(ctx, rp, d, ds); err != nil { 350 return nil, err 351 } 352 for !rp.done() { 353 d.opMu.RLock() 354 next, followedSymlink, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds) 355 d.opMu.RUnlock() 356 if err != nil { 357 return nil, err 358 } 359 d = next 360 if followedSymlink { 361 if err := fs.revalidatePath(ctx, rp, d, ds); err != nil { 362 return nil, err 363 } 364 } 365 } 366 if !d.isDir() { 367 return nil, linuxerr.ENOTDIR 368 } 369 return d, nil 370 } 371 372 // resolveLocked resolves rp to an existing file. 373 // 374 // Preconditions: fs.renameMu must be locked. 375 func (fs *filesystem) resolveLocked(ctx context.Context, vfsRP *vfs.ResolvingPath, ds **[]*dentry) (*dentry, error) { 376 rp := resolvingPathFull(vfsRP) 377 d := rp.Start().Impl().(*dentry) 378 if err := fs.revalidatePath(ctx, rp, d, ds); err != nil { 379 return nil, err 380 } 381 for !rp.done() { 382 d.opMu.RLock() 383 next, followedSymlink, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds) 384 d.opMu.RUnlock() 385 if err != nil { 386 return nil, err 387 } 388 d = next 389 if followedSymlink { 390 if err := fs.revalidatePath(ctx, rp, d, ds); err != nil { 391 return nil, err 392 } 393 } 394 } 395 if rp.MustBeDir() && !d.isDir() { 396 return nil, linuxerr.ENOTDIR 397 } 398 return d, nil 399 } 400 401 // doCreateAt checks that creating a file at rp is permitted, then invokes 402 // createInRemoteDir (if the parent directory is a real remote directory) or 403 // createInSyntheticDir (if the parent directory is synthetic) to do so. 404 // 405 // Preconditions: 406 // - !rp.Done(). 407 // - For the final path component in rp, !rp.ShouldFollowSymlink(). 408 func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool, createInRemoteDir func(parent *dentry, name string, ds **[]*dentry) (*dentry, error), createInSyntheticDir func(parent *dentry, name string) (*dentry, error)) error { 409 var ds *[]*dentry 410 fs.renameMu.RLock() 411 defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 412 start := rp.Start().Impl().(*dentry) 413 parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) 414 if err != nil { 415 return err 416 } 417 418 // Order of checks is important. First check if parent directory can be 419 // executed, then check for existence, and lastly check if mount is writable. 420 if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { 421 return err 422 } 423 name := rp.Component() 424 if name == "." || name == ".." { 425 return linuxerr.EEXIST 426 } 427 if parent.isDeleted() { 428 return linuxerr.ENOENT 429 } 430 if err := fs.revalidateOne(ctx, rp.VirtualFilesystem(), parent, name, &ds); err != nil { 431 return err 432 } 433 434 parent.opMu.Lock() 435 defer parent.opMu.Unlock() 436 437 if len(name) > MaxFilenameLen { 438 return linuxerr.ENAMETOOLONG 439 } 440 // Check for existence only if caching information is available. Otherwise, 441 // don't check for existence just yet. We will check for existence if the 442 // checks for writability fail below. Existence check is done by the creation 443 // RPCs themselves. 444 parent.childrenMu.Lock() 445 if child, ok := parent.children[name]; ok && child != nil { 446 parent.childrenMu.Unlock() 447 return linuxerr.EEXIST 448 } 449 if parent.childrenSet != nil { 450 if _, ok := parent.childrenSet[name]; ok { 451 parent.childrenMu.Unlock() 452 return linuxerr.EEXIST 453 } 454 } 455 parent.childrenMu.Unlock() 456 checkExistence := func() error { 457 if child, err := fs.getChildLocked(ctx, parent, name, &ds); err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) { 458 return err 459 } else if child != nil { 460 return linuxerr.EEXIST 461 } 462 return nil 463 } 464 465 mnt := rp.Mount() 466 if err := mnt.CheckBeginWrite(); err != nil { 467 // Existence check takes precedence. 468 if existenceErr := checkExistence(); existenceErr != nil { 469 return existenceErr 470 } 471 return err 472 } 473 defer mnt.EndWrite() 474 475 if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil { 476 // Existence check takes precedence. 477 if existenceErr := checkExistence(); existenceErr != nil { 478 return existenceErr 479 } 480 return err 481 } 482 if !dir && rp.MustBeDir() { 483 return linuxerr.ENOENT 484 } 485 if parent.isSynthetic() { 486 if createInSyntheticDir == nil { 487 return linuxerr.EPERM 488 } 489 child, err := createInSyntheticDir(parent, name) 490 if err != nil { 491 return err 492 } 493 parent.childrenMu.Lock() 494 parent.cacheNewChildLocked(child, name) 495 parent.syntheticChildren++ 496 parent.clearDirentsLocked() 497 parent.childrenMu.Unlock() 498 parent.touchCMtime() 499 ev := linux.IN_CREATE 500 if dir { 501 ev |= linux.IN_ISDIR 502 } 503 parent.watches.Notify(ctx, name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */) 504 return nil 505 } 506 // No cached dentry exists; however, in InteropModeShared there might still be 507 // an existing file at name. Just attempt the file creation RPC anyways. If a 508 // file does exist, the RPC will fail with EEXIST like we would have. 509 child, err := createInRemoteDir(parent, name, &ds) 510 if err != nil { 511 return err 512 } 513 parent.childrenMu.Lock() 514 parent.cacheNewChildLocked(child, name) 515 if child.isSynthetic() { 516 parent.syntheticChildren++ 517 ds = appendDentry(ds, parent) 518 } else { 519 appendNewChildDentry(&ds, parent, child) 520 } 521 if fs.opts.interop != InteropModeShared { 522 if child, ok := parent.children[name]; ok && child == nil { 523 // Delete the now-stale negative dentry. 524 delete(parent.children, name) 525 parent.negativeChildren-- 526 } 527 parent.clearDirentsLocked() 528 parent.touchCMtime() 529 } 530 parent.childrenMu.Unlock() 531 ev := linux.IN_CREATE 532 if dir { 533 ev |= linux.IN_ISDIR 534 } 535 parent.watches.Notify(ctx, name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */) 536 return nil 537 } 538 539 // Preconditions: !rp.Done(). 540 func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool) error { 541 var ds *[]*dentry 542 fs.renameMu.RLock() 543 defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 544 start := rp.Start().Impl().(*dentry) 545 parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) 546 if err != nil { 547 return err 548 } 549 if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil { 550 return err 551 } 552 if err := rp.Mount().CheckBeginWrite(); err != nil { 553 return err 554 } 555 defer rp.Mount().EndWrite() 556 557 name := rp.Component() 558 if dir { 559 if name == "." { 560 return linuxerr.EINVAL 561 } 562 if name == ".." { 563 return linuxerr.ENOTEMPTY 564 } 565 } else { 566 if name == "." || name == ".." { 567 return linuxerr.EISDIR 568 } 569 } 570 571 vfsObj := rp.VirtualFilesystem() 572 if err := fs.revalidateOne(ctx, vfsObj, parent, rp.Component(), &ds); err != nil { 573 return err 574 } 575 576 mntns := vfs.MountNamespaceFromContext(ctx) 577 defer mntns.DecRef(ctx) 578 579 parent.opMu.Lock() 580 defer parent.opMu.Unlock() 581 582 parent.childrenMu.Lock() 583 if parent.childrenSet != nil { 584 if _, ok := parent.childrenSet[name]; !ok { 585 parent.childrenMu.Unlock() 586 return linuxerr.ENOENT 587 } 588 } 589 parent.childrenMu.Unlock() 590 591 // Load child if sticky bit is set because we need to determine whether 592 // deletion is allowed. 593 var child *dentry 594 if parent.mode.Load()&linux.ModeSticky == 0 { 595 var ok bool 596 parent.childrenMu.Lock() 597 child, ok = parent.children[name] 598 parent.childrenMu.Unlock() 599 if ok && child == nil { 600 // Hit a negative cached entry, child doesn't exist. 601 return linuxerr.ENOENT 602 } 603 } else { 604 child, _, err = fs.stepLocked(ctx, resolvingPathFull(rp), parent, false /* mayFollowSymlinks */, &ds) 605 if err != nil { 606 return err 607 } 608 if err := parent.mayDelete(rp.Credentials(), child); err != nil { 609 return err 610 } 611 } 612 613 // If a child dentry exists, prepare to delete it. This should fail if it is 614 // a mount point. We detect mount points by speculatively calling 615 // PrepareDeleteDentry, which fails if child is a mount point. 616 // 617 // Also note that if child is nil, then it can't be a mount point. 618 if child != nil { 619 // Hold child.childrenMu so we can check child.children and 620 // child.syntheticChildren. We don't access these fields until a bit later, 621 // but locking child.childrenMu after calling vfs.PrepareDeleteDentry() would 622 // create an inconsistent lock ordering between dentry.childrenMu and 623 // vfs.Dentry.mu (in the VFS lock order, it would make dentry.childrenMu both "a 624 // FilesystemImpl lock" and "a lock acquired by a FilesystemImpl between 625 // PrepareDeleteDentry and CommitDeleteDentry). To avoid this, lock 626 // child.childrenMu before calling PrepareDeleteDentry. 627 child.childrenMu.Lock() 628 defer child.childrenMu.Unlock() 629 if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil { 630 return err 631 } 632 } 633 flags := uint32(0) 634 // If a dentry exists, use it for best-effort checks on its deletability. 635 if dir { 636 if child != nil { 637 // child must be an empty directory. 638 if child.syntheticChildren != 0 { // +checklocksforce: child.childrenMu is held if child != nil. 639 // This is definitely not an empty directory, irrespective of 640 // fs.opts.interop. 641 vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: PrepareDeleteDentry called if child != nil. 642 return linuxerr.ENOTEMPTY 643 } 644 // If InteropModeShared is in effect and the first call to 645 // PrepareDeleteDentry above succeeded, then child wasn't 646 // revalidated (so we can't expect its file type to be correct) and 647 // individually revalidating its children (to confirm that they 648 // still exist) would be a waste of time. 649 if child.cachedMetadataAuthoritative() { 650 if !child.isDir() { 651 vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above. 652 return linuxerr.ENOTDIR 653 } 654 for _, grandchild := range child.children { // +checklocksforce: child.childrenMu is held if child != nil. 655 if grandchild != nil { 656 vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above. 657 return linuxerr.ENOTEMPTY 658 } 659 } 660 } 661 } 662 flags = linux.AT_REMOVEDIR 663 } else { 664 // child must be a non-directory file. 665 if child != nil && child.isDir() { 666 vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above. 667 return linuxerr.EISDIR 668 } 669 if rp.MustBeDir() { 670 if child != nil { 671 vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above. 672 } 673 return linuxerr.ENOTDIR 674 } 675 } 676 if parent.isSynthetic() { 677 if child == nil { 678 return linuxerr.ENOENT 679 } 680 } else if child == nil || !child.isSynthetic() { 681 if err := parent.unlink(ctx, name, flags); err != nil { 682 if child != nil { 683 vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above. 684 } 685 return err 686 } 687 } 688 689 // Generate inotify events for rmdir or unlink. 690 if dir { 691 parent.watches.Notify(ctx, name, linux.IN_DELETE|linux.IN_ISDIR, 0, vfs.InodeEvent, true /* unlinked */) 692 } else { 693 var cw *vfs.Watches 694 if child != nil { 695 cw = &child.watches 696 } 697 vfs.InotifyRemoveChild(ctx, cw, &parent.watches, name) 698 } 699 700 parent.childrenMu.Lock() 701 defer parent.childrenMu.Unlock() 702 703 if child != nil { 704 vfsObj.CommitDeleteDentry(ctx, &child.vfsd) // +checklocksforce: see above. 705 child.setDeleted() 706 if child.isSynthetic() { 707 parent.syntheticChildren-- 708 child.decRefNoCaching() 709 } 710 ds = appendDentry(ds, child) 711 } 712 parent.cacheNegativeLookupLocked(name) 713 if parent.cachedMetadataAuthoritative() { 714 parent.clearDirentsLocked() 715 parent.touchCMtime() 716 if dir { 717 parent.decLinks() 718 } 719 } 720 return nil 721 } 722 723 // AccessAt implements vfs.Filesystem.Impl.AccessAt. 724 func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error { 725 var ds *[]*dentry 726 fs.renameMu.RLock() 727 defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 728 d, err := fs.resolveLocked(ctx, rp, &ds) 729 if err != nil { 730 return err 731 } 732 if err := d.checkPermissions(creds, ats); err != nil { 733 return err 734 } 735 if ats.MayWrite() && rp.Mount().ReadOnly() { 736 return linuxerr.EROFS 737 } 738 return nil 739 } 740 741 // GetDentryAt implements vfs.FilesystemImpl.GetDentryAt. 742 func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { 743 var ds *[]*dentry 744 fs.renameMu.RLock() 745 defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 746 d, err := fs.resolveLocked(ctx, rp, &ds) 747 if err != nil { 748 return nil, err 749 } 750 if opts.CheckSearchable { 751 if !d.isDir() { 752 return nil, linuxerr.ENOTDIR 753 } 754 if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { 755 return nil, err 756 } 757 } 758 d.IncRef() 759 // Call d.checkCachingLocked() so it can be removed from the cache if needed. 760 ds = appendDentry(ds, d) 761 return &d.vfsd, nil 762 } 763 764 // GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt. 765 func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { 766 var ds *[]*dentry 767 fs.renameMu.RLock() 768 defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 769 start := rp.Start().Impl().(*dentry) 770 d, err := fs.walkParentDirLocked(ctx, rp, start, &ds) 771 if err != nil { 772 return nil, err 773 } 774 d.IncRef() 775 // Call d.checkCachingLocked() so it can be removed from the cache if needed. 776 ds = appendDentry(ds, d) 777 return &d.vfsd, nil 778 } 779 780 // LinkAt implements vfs.FilesystemImpl.LinkAt. 781 func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { 782 err := fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string, ds **[]*dentry) (*dentry, error) { 783 if rp.Mount() != vd.Mount() { 784 return nil, linuxerr.EXDEV 785 } 786 d := vd.Dentry().Impl().(*dentry) 787 if d.isDir() { 788 return nil, linuxerr.EPERM 789 } 790 gid := auth.KGID(d.gid.Load()) 791 uid := auth.KUID(d.uid.Load()) 792 mode := linux.FileMode(d.mode.Load()) 793 if err := vfs.MayLink(rp.Credentials(), mode, uid, gid); err != nil { 794 return nil, err 795 } 796 if d.nlink.Load() == 0 { 797 return nil, linuxerr.ENOENT 798 } 799 if d.nlink.Load() == math.MaxUint32 { 800 return nil, linuxerr.EMLINK 801 } 802 return parent.link(ctx, d, name) 803 }, nil) 804 805 if err == nil { 806 // Success! 807 vd.Dentry().Impl().(*dentry).incLinks() 808 } 809 return err 810 } 811 812 // MkdirAt implements vfs.FilesystemImpl.MkdirAt. 813 func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { 814 creds := rp.Credentials() 815 return fs.doCreateAt(ctx, rp, true /* dir */, func(parent *dentry, name string, ds **[]*dentry) (*dentry, error) { 816 // If the parent is a setgid directory, use the parent's GID 817 // rather than the caller's and enable setgid. 818 kgid := creds.EffectiveKGID 819 mode := opts.Mode 820 if parent.mode.Load()&linux.S_ISGID != 0 { 821 kgid = auth.KGID(parent.gid.Load()) 822 mode |= linux.S_ISGID 823 } 824 825 child, err := parent.mkdir(ctx, name, mode, creds.EffectiveKUID, kgid) 826 if err == nil { 827 if fs.opts.interop != InteropModeShared { 828 parent.incLinks() 829 } 830 return child, nil 831 } 832 833 if !opts.ForSyntheticMountpoint || linuxerr.Equals(linuxerr.EEXIST, err) { 834 return nil, err 835 } 836 ctx.Infof("Failed to create remote directory %q: %v; falling back to synthetic directory", name, err) 837 child = fs.newSyntheticDentry(&createSyntheticOpts{ 838 name: name, 839 mode: linux.S_IFDIR | opts.Mode, 840 kuid: creds.EffectiveKUID, 841 kgid: creds.EffectiveKGID, 842 }) 843 if fs.opts.interop != InteropModeShared { 844 parent.incLinks() 845 } 846 return child, nil 847 }, func(parent *dentry, name string) (*dentry, error) { 848 if !opts.ForSyntheticMountpoint { 849 // Can't create non-synthetic files in synthetic directories. 850 return nil, linuxerr.EPERM 851 } 852 child := fs.newSyntheticDentry(&createSyntheticOpts{ 853 name: name, 854 mode: linux.S_IFDIR | opts.Mode, 855 kuid: creds.EffectiveKUID, 856 kgid: creds.EffectiveKGID, 857 }) 858 parent.incLinks() 859 return child, nil 860 }) 861 } 862 863 // MknodAt implements vfs.FilesystemImpl.MknodAt. 864 func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { 865 return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string, ds **[]*dentry) (*dentry, error) { 866 creds := rp.Credentials() 867 if child, err := parent.mknod(ctx, name, creds, &opts); err == nil { 868 return child, nil 869 } else if !linuxerr.Equals(linuxerr.EPERM, err) { 870 return nil, err 871 } 872 873 // EPERM means that gofer does not allow creating a socket or pipe. Fallback 874 // to creating a synthetic one, i.e. one that is kept entirely in memory. 875 876 // Check that we're not overriding an existing file with a synthetic one. 877 _, _, err := fs.stepLocked(ctx, resolvingPathFull(rp), parent, false /* mayFollowSymlinks */, ds) // +checklocksforce: parent.opMu taken by doCreateAt. 878 switch { 879 case err == nil: 880 // Step succeeded, another file exists. 881 return nil, linuxerr.EEXIST 882 case !linuxerr.Equals(linuxerr.ENOENT, err): 883 // Schrödinger. File/Cat may or may not exist. 884 return nil, err 885 } 886 887 switch opts.Mode.FileType() { 888 case linux.S_IFSOCK: 889 return fs.newSyntheticDentry(&createSyntheticOpts{ 890 name: name, 891 mode: opts.Mode, 892 kuid: creds.EffectiveKUID, 893 kgid: creds.EffectiveKGID, 894 endpoint: opts.Endpoint, 895 }), nil 896 case linux.S_IFIFO: 897 return fs.newSyntheticDentry(&createSyntheticOpts{ 898 name: name, 899 mode: opts.Mode, 900 kuid: creds.EffectiveKUID, 901 kgid: creds.EffectiveKGID, 902 pipe: pipe.NewVFSPipe(true /* isNamed */, pipe.DefaultPipeSize), 903 }), nil 904 } 905 // Retain error from gofer if synthetic file cannot be created internally. 906 return nil, linuxerr.EPERM 907 }, nil) 908 } 909 910 // OpenAt implements vfs.FilesystemImpl.OpenAt. 911 func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) { 912 // Reject O_TMPFILE, which is not supported; supporting it correctly in the 913 // presence of other remote filesystem users requires remote filesystem 914 // support, and it isn't clear that there's any way to implement this in 915 // 9P. 916 if opts.Flags&linux.O_TMPFILE != 0 { 917 return nil, linuxerr.EOPNOTSUPP 918 } 919 mayCreate := opts.Flags&linux.O_CREAT != 0 920 mustCreate := opts.Flags&(linux.O_CREAT|linux.O_EXCL) == (linux.O_CREAT | linux.O_EXCL) 921 922 var ds *[]*dentry 923 fs.renameMu.RLock() 924 unlocked := false 925 unlock := func() { 926 if !unlocked { 927 fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 928 unlocked = true 929 } 930 } 931 defer unlock() 932 933 start := rp.Start().Impl().(*dentry) 934 if rp.Done() { 935 // Reject attempts to open mount root directory with O_CREAT. 936 if mayCreate && rp.MustBeDir() { 937 return nil, linuxerr.EISDIR 938 } 939 if mustCreate { 940 return nil, linuxerr.EEXIST 941 } 942 if !start.cachedMetadataAuthoritative() { 943 // Refresh dentry's attributes before opening. 944 if err := start.updateMetadata(ctx); err != nil { 945 return nil, err 946 } 947 } 948 start.IncRef() 949 defer start.DecRef(ctx) 950 unlock() 951 // start is intentionally not added to ds (which would remove it from the 952 // cache) because doing so regresses performance in practice. 953 return start.open(ctx, rp, &opts) 954 } 955 956 afterTrailingSymlink: 957 parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) 958 if err != nil { 959 return nil, err 960 } 961 // Check for search permission in the parent directory. 962 if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { 963 return nil, err 964 } 965 // Reject attempts to open directories with O_CREAT. 966 if mayCreate && rp.MustBeDir() { 967 return nil, linuxerr.EISDIR 968 } 969 if err := fs.revalidateOne(ctx, rp.VirtualFilesystem(), parent, rp.Component(), &ds); err != nil { 970 return nil, err 971 } 972 // Determine whether or not we need to create a file. 973 // NOTE(b/263297063): Don't hold opMu for writing here, to avoid 974 // serializing OpenAt calls in the same directory in the common case 975 // that the file exists. 976 parent.opMu.RLock() 977 child, followedSymlink, err := fs.stepLocked(ctx, resolvingPathFull(rp), parent, true /* mayFollowSymlinks */, &ds) 978 parent.opMu.RUnlock() 979 if followedSymlink { 980 if mustCreate { 981 // EEXIST must be returned if an existing symlink is opened with O_EXCL. 982 return nil, linuxerr.EEXIST 983 } 984 if err != nil { 985 // If followedSymlink && err != nil, then this symlink resolution error 986 // must be handled by the VFS layer. 987 return nil, err 988 } 989 start = parent 990 goto afterTrailingSymlink 991 } 992 if linuxerr.Equals(linuxerr.ENOENT, err) && mayCreate { 993 if parent.isSynthetic() { 994 return nil, linuxerr.EPERM 995 } 996 997 // Take opMu for writing, but note that the file may have been 998 // created by another goroutine since we checked for existence 999 // a few lines ago. We must handle that case. 1000 parent.opMu.Lock() 1001 fd, createErr := parent.createAndOpenChildLocked(ctx, rp, &opts, &ds) 1002 if !linuxerr.Equals(linuxerr.EEXIST, createErr) { 1003 // Either the creation was a success, or we got an 1004 // unexpected error. Either way we can return here. 1005 parent.opMu.Unlock() 1006 return fd, createErr 1007 } 1008 1009 // We raced, and now the file exists. 1010 if mustCreate { 1011 parent.opMu.Unlock() 1012 return nil, linuxerr.EEXIST 1013 } 1014 1015 // Step to the file again. Since we still hold opMu for 1016 // writing, there can't be a race here. 1017 child, _, err = fs.stepLocked(ctx, resolvingPathFull(rp), parent, false /* mayFollowSymlinks */, &ds) 1018 parent.opMu.Unlock() 1019 } 1020 if err != nil { 1021 return nil, err 1022 } 1023 if mustCreate { 1024 return nil, linuxerr.EEXIST 1025 } 1026 if rp.MustBeDir() && !child.isDir() { 1027 return nil, linuxerr.ENOTDIR 1028 } 1029 child.IncRef() 1030 defer child.DecRef(ctx) 1031 unlock() 1032 // child is intentionally not added to ds (which would remove it from the 1033 // cache) because doing so regresses performance in practice. 1034 return child.open(ctx, rp, &opts) 1035 } 1036 1037 // Preconditions: The caller must hold no locks (since opening pipes may block 1038 // indefinitely). 1039 func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) (*vfs.FileDescription, error) { 1040 ats := vfs.AccessTypesForOpenFlags(opts) 1041 if err := d.checkPermissions(rp.Credentials(), ats); err != nil { 1042 return nil, err 1043 } 1044 1045 if !d.isSynthetic() { 1046 // renameMu is locked here because it is required by d.openHandle(), which 1047 // is called by d.ensureSharedHandle() and d.openSpecialFile() below. It is 1048 // also required by d.connect() which is called by 1049 // d.openSocketByConnecting(). Note that opening non-synthetic pipes may 1050 // block, renameMu is unlocked separately in d.openSpecialFile() for pipes. 1051 d.fs.renameMu.RLock() 1052 defer d.fs.renameMu.RUnlock() 1053 } 1054 1055 trunc := opts.Flags&linux.O_TRUNC != 0 && d.fileType() == linux.S_IFREG 1056 if trunc { 1057 // Lock metadataMu *while* we open a regular file with O_TRUNC because 1058 // open(2) will change the file size on server. 1059 d.metadataMu.Lock() 1060 defer d.metadataMu.Unlock() 1061 } 1062 1063 var vfd *vfs.FileDescription 1064 var err error 1065 mnt := rp.Mount() 1066 switch d.fileType() { 1067 case linux.S_IFREG: 1068 if !d.fs.opts.regularFilesUseSpecialFileFD { 1069 if err := d.ensureSharedHandle(ctx, ats.MayRead(), ats.MayWrite(), trunc); err != nil { 1070 return nil, err 1071 } 1072 fd, err := newRegularFileFD(mnt, d, opts.Flags) 1073 if err != nil { 1074 return nil, err 1075 } 1076 vfd = &fd.vfsfd 1077 } 1078 case linux.S_IFDIR: 1079 // Can't open directories with O_CREAT. 1080 if opts.Flags&linux.O_CREAT != 0 { 1081 return nil, linuxerr.EISDIR 1082 } 1083 // Can't open directories writably. 1084 if ats&vfs.MayWrite != 0 { 1085 return nil, linuxerr.EISDIR 1086 } 1087 if opts.Flags&linux.O_DIRECT != 0 { 1088 return nil, linuxerr.EINVAL 1089 } 1090 if !d.isSynthetic() { 1091 if err := d.ensureSharedHandle(ctx, ats&vfs.MayRead != 0, false /* write */, false /* trunc */); err != nil { 1092 return nil, err 1093 } 1094 } 1095 fd := &directoryFD{} 1096 fd.LockFD.Init(&d.locks) 1097 if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { 1098 return nil, err 1099 } 1100 if d.readFD.Load() >= 0 { 1101 fsmetric.GoferOpensHost.Increment() 1102 } else { 1103 fsmetric.GoferOpens9P.Increment() 1104 } 1105 return &fd.vfsfd, nil 1106 case linux.S_IFLNK: 1107 // Can't open symlinks without O_PATH, which is handled at the VFS layer. 1108 return nil, linuxerr.ELOOP 1109 case linux.S_IFSOCK: 1110 if d.isSynthetic() { 1111 return nil, linuxerr.ENXIO 1112 } 1113 if d.fs.iopts.OpenSocketsByConnecting { 1114 return d.openSocketByConnecting(ctx, opts) 1115 } 1116 case linux.S_IFIFO: 1117 if d.isSynthetic() { 1118 return d.pipe.Open(ctx, mnt, &d.vfsd, opts.Flags, &d.locks) 1119 } 1120 if d.fs.opts.disableFifoOpen { 1121 return nil, linuxerr.EPERM 1122 } 1123 } 1124 1125 if vfd == nil { 1126 if vfd, err = d.openSpecialFile(ctx, mnt, opts); err != nil { 1127 return nil, err 1128 } 1129 } 1130 1131 if trunc { 1132 // If no errors occured so far then update file size in memory. This 1133 // step is required even if !d.cachedMetadataAuthoritative() because 1134 // d.mappings has to be updated. 1135 // d.metadataMu has already been acquired if trunc == true. 1136 d.updateSizeLocked(0) 1137 1138 if d.cachedMetadataAuthoritative() { 1139 d.touchCMtimeLocked() 1140 } 1141 } 1142 return vfd, err 1143 } 1144 1145 // Precondition: fs.renameMu is locked. 1146 func (d *dentry) openSocketByConnecting(ctx context.Context, opts *vfs.OpenOptions) (*vfs.FileDescription, error) { 1147 if opts.Flags&linux.O_DIRECT != 0 { 1148 return nil, linuxerr.EINVAL 1149 } 1150 // Note that special value of linux.SockType = 0 is interpreted by lisafs 1151 // as "do not care about the socket type". Analogous to p9.AnonymousSocket. 1152 sockFD, err := d.connect(ctx, 0 /* sockType */) 1153 if err != nil { 1154 return nil, err 1155 } 1156 fd, err := host.NewFD(ctx, kernel.KernelFromContext(ctx).HostMount(), sockFD, &host.NewFDOptions{ 1157 HaveFlags: true, 1158 Flags: opts.Flags, 1159 }) 1160 if err != nil { 1161 unix.Close(sockFD) 1162 return nil, err 1163 } 1164 return fd, nil 1165 } 1166 1167 // Preconditions: 1168 // - !d.isSynthetic(). 1169 // - fs.renameMu is locked. It may be released temporarily while pipe blocks. 1170 // - If d is a pipe, no other locks (other than fs.renameMu) should be held. 1171 func (d *dentry) openSpecialFile(ctx context.Context, mnt *vfs.Mount, opts *vfs.OpenOptions) (*vfs.FileDescription, error) { 1172 ats := vfs.AccessTypesForOpenFlags(opts) 1173 if opts.Flags&linux.O_DIRECT != 0 && !d.isRegularFile() { 1174 return nil, linuxerr.EINVAL 1175 } 1176 // We assume that the server silently inserts O_NONBLOCK in the open flags 1177 // for all named pipes (because all existing gofers do this). 1178 // 1179 // NOTE(b/133875563): This makes named pipe opens racy, because the 1180 // mechanisms for translating nonblocking to blocking opens can only detect 1181 // the instantaneous presence of a peer holding the other end of the pipe 1182 // open, not whether the pipe was *previously* opened by a peer that has 1183 // since closed its end. 1184 isBlockingOpenOfNamedPipe := d.fileType() == linux.S_IFIFO && opts.Flags&linux.O_NONBLOCK == 0 1185 retry: 1186 h, err := d.openHandle(ctx, ats.MayRead(), ats.MayWrite(), opts.Flags&linux.O_TRUNC != 0) 1187 if err != nil { 1188 if isBlockingOpenOfNamedPipe && ats == vfs.MayWrite && linuxerr.Equals(linuxerr.ENXIO, err) { 1189 // An attempt to open a named pipe with O_WRONLY|O_NONBLOCK fails 1190 // with ENXIO if opening the same named pipe with O_WRONLY would 1191 // block because there are no readers of the pipe. Release renameMu 1192 // while blocking. 1193 d.fs.renameMu.RUnlock() 1194 err := sleepBetweenNamedPipeOpenChecks(ctx) 1195 d.fs.renameMu.RLock() 1196 if err != nil { 1197 return nil, err 1198 } 1199 goto retry 1200 } 1201 return nil, err 1202 } 1203 if isBlockingOpenOfNamedPipe && ats == vfs.MayRead && h.fd >= 0 { 1204 // Release renameMu while blocking. 1205 d.fs.renameMu.RUnlock() 1206 err := blockUntilNonblockingPipeHasWriter(ctx, h.fd) 1207 d.fs.renameMu.RLock() 1208 if err != nil { 1209 h.close(ctx) 1210 return nil, err 1211 } 1212 } 1213 fd, err := newSpecialFileFD(h, mnt, d, opts.Flags) 1214 if err != nil { 1215 h.close(ctx) 1216 return nil, err 1217 } 1218 return &fd.vfsfd, nil 1219 } 1220 1221 // Preconditions: 1222 // - d.fs.renameMu must be locked. 1223 // - d.opMu must be locked for writing. 1224 // - !d.isSynthetic(). 1225 // 1226 // +checklocks:d.opMu 1227 func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions, ds **[]*dentry) (*vfs.FileDescription, error) { 1228 if err := d.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil { 1229 return nil, err 1230 } 1231 if d.isDeleted() { 1232 return nil, linuxerr.ENOENT 1233 } 1234 mnt := rp.Mount() 1235 if err := mnt.CheckBeginWrite(); err != nil { 1236 return nil, err 1237 } 1238 defer mnt.EndWrite() 1239 1240 creds := rp.Credentials() 1241 name := rp.Component() 1242 // If the parent is a setgid directory, use the parent's GID rather 1243 // than the caller's. 1244 kgid := creds.EffectiveKGID 1245 if d.mode.Load()&linux.S_ISGID != 0 { 1246 kgid = auth.KGID(d.gid.Load()) 1247 } 1248 1249 child, h, err := d.openCreate(ctx, name, opts.Flags&linux.O_ACCMODE, opts.Mode, creds.EffectiveKUID, kgid) 1250 if err != nil { 1251 return nil, err 1252 } 1253 1254 // Incorporate the fid that was opened by lcreate. 1255 useRegularFileFD := child.fileType() == linux.S_IFREG && !d.fs.opts.regularFilesUseSpecialFileFD 1256 if useRegularFileFD { 1257 var readable, writable bool 1258 child.handleMu.Lock() 1259 if vfs.MayReadFileWithOpenFlags(opts.Flags) { 1260 readable = true 1261 if h.fd != -1 { 1262 child.readFD = atomicbitops.FromInt32(h.fd) 1263 child.mmapFD = atomicbitops.FromInt32(h.fd) 1264 } 1265 } 1266 if vfs.MayWriteFileWithOpenFlags(opts.Flags) { 1267 writable = true 1268 child.writeFD = atomicbitops.FromInt32(h.fd) 1269 } 1270 child.updateHandles(ctx, h, readable, writable) 1271 child.handleMu.Unlock() 1272 } 1273 // Insert the dentry into the tree. 1274 d.childrenMu.Lock() 1275 // We have d.opMu for writing, so there can not be a cached child with 1276 // this name. We could not have raced. 1277 d.cacheNewChildLocked(child, name) 1278 appendNewChildDentry(ds, d, child) 1279 if d.cachedMetadataAuthoritative() { 1280 d.touchCMtime() 1281 d.clearDirentsLocked() 1282 } 1283 d.childrenMu.Unlock() 1284 1285 // Finally, construct a file description representing the created file. 1286 var childVFSFD *vfs.FileDescription 1287 if useRegularFileFD { 1288 fd, err := newRegularFileFD(mnt, child, opts.Flags) 1289 if err != nil { 1290 return nil, err 1291 } 1292 childVFSFD = &fd.vfsfd 1293 } else { 1294 fd, err := newSpecialFileFD(h, mnt, child, opts.Flags) 1295 if err != nil { 1296 h.close(ctx) 1297 return nil, err 1298 } 1299 childVFSFD = &fd.vfsfd 1300 } 1301 d.watches.Notify(ctx, name, linux.IN_CREATE, 0, vfs.PathEvent, false /* unlinked */) 1302 return childVFSFD, nil 1303 } 1304 1305 // ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt. 1306 func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { 1307 var ds *[]*dentry 1308 fs.renameMu.RLock() 1309 defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1310 d, err := fs.resolveLocked(ctx, rp, &ds) 1311 if err != nil { 1312 return "", err 1313 } 1314 if !d.isSymlink() { 1315 return "", linuxerr.EINVAL 1316 } 1317 return d.readlink(ctx, rp.Mount()) 1318 } 1319 1320 // RenameAt implements vfs.FilesystemImpl.RenameAt. 1321 func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { 1322 // Resolve newParent first to verify that it's on this Mount. 1323 var ds *[]*dentry 1324 fs.renameMu.Lock() 1325 defer fs.renameMuUnlockAndCheckCaching(ctx, &ds) 1326 newParent, err := fs.walkParentDirLocked(ctx, rp, rp.Start().Impl().(*dentry), &ds) 1327 if err != nil { 1328 return err 1329 } 1330 1331 if opts.Flags&^linux.RENAME_NOREPLACE != 0 { 1332 return linuxerr.EINVAL 1333 } 1334 if fs.opts.interop == InteropModeShared && opts.Flags&linux.RENAME_NOREPLACE != 0 { 1335 // Requires 9P support to synchronize with other remote filesystem 1336 // users. 1337 return linuxerr.EINVAL 1338 } 1339 1340 newName := rp.Component() 1341 if newName == "." || newName == ".." { 1342 if opts.Flags&linux.RENAME_NOREPLACE != 0 { 1343 return linuxerr.EEXIST 1344 } 1345 return linuxerr.EBUSY 1346 } 1347 if len(newName) > MaxFilenameLen { 1348 return linuxerr.ENAMETOOLONG 1349 } 1350 mnt := rp.Mount() 1351 if mnt != oldParentVD.Mount() { 1352 return linuxerr.EXDEV 1353 } 1354 if err := mnt.CheckBeginWrite(); err != nil { 1355 return err 1356 } 1357 defer mnt.EndWrite() 1358 1359 oldParent := oldParentVD.Dentry().Impl().(*dentry) 1360 if !oldParent.cachedMetadataAuthoritative() { 1361 if err := oldParent.updateMetadata(ctx); err != nil { 1362 return err 1363 } 1364 } 1365 creds := rp.Credentials() 1366 if err := oldParent.checkPermissions(creds, vfs.MayWrite|vfs.MayExec); err != nil { 1367 return err 1368 } 1369 1370 vfsObj := rp.VirtualFilesystem() 1371 if err := fs.revalidateOne(ctx, vfsObj, newParent, newName, &ds); err != nil { 1372 return err 1373 } 1374 if err := fs.revalidateOne(ctx, vfsObj, oldParent, oldName, &ds); err != nil { 1375 return err 1376 } 1377 1378 // We need a dentry representing the renamed file since, if it's a 1379 // directory, we need to check for write permission on it. 1380 oldParent.opMu.Lock() 1381 defer oldParent.opMu.Unlock() 1382 renamed, err := fs.getChildLocked(ctx, oldParent, oldName, &ds) 1383 if err != nil { 1384 return err 1385 } 1386 if err := oldParent.mayDelete(creds, renamed); err != nil { 1387 return err 1388 } 1389 if renamed.isDir() { 1390 if renamed == newParent || genericIsAncestorDentry(renamed, newParent) { 1391 return linuxerr.EINVAL 1392 } 1393 if oldParent != newParent { 1394 if err := renamed.checkPermissions(creds, vfs.MayWrite); err != nil { 1395 return err 1396 } 1397 } 1398 } else { 1399 if opts.MustBeDir || rp.MustBeDir() { 1400 return linuxerr.ENOTDIR 1401 } 1402 } 1403 1404 if oldParent != newParent { 1405 if err := newParent.checkPermissions(creds, vfs.MayWrite|vfs.MayExec); err != nil { 1406 return err 1407 } 1408 newParent.opMu.Lock() 1409 defer newParent.opMu.Unlock() 1410 } 1411 if newParent.isDeleted() { 1412 return linuxerr.ENOENT 1413 } 1414 replaced, err := fs.getChildLocked(ctx, newParent, newName, &ds) // +checklocksforce: newParent.opMu taken if newParent != oldParent. 1415 if err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) { 1416 return err 1417 } 1418 var replacedVFSD *vfs.Dentry 1419 if replaced != nil { 1420 if opts.Flags&linux.RENAME_NOREPLACE != 0 { 1421 return linuxerr.EEXIST 1422 } 1423 replacedVFSD = &replaced.vfsd 1424 if replaced.isDir() { 1425 if !renamed.isDir() { 1426 return linuxerr.EISDIR 1427 } 1428 if genericIsAncestorDentry(replaced, renamed) { 1429 return linuxerr.ENOTEMPTY 1430 } 1431 } else { 1432 if rp.MustBeDir() || renamed.isDir() { 1433 return linuxerr.ENOTDIR 1434 } 1435 } 1436 } 1437 1438 if oldParent == newParent && oldName == newName { 1439 return nil 1440 } 1441 mntns := vfs.MountNamespaceFromContext(ctx) 1442 defer mntns.DecRef(ctx) 1443 if err := vfsObj.PrepareRenameDentry(mntns, &renamed.vfsd, replacedVFSD); err != nil { 1444 return err 1445 } 1446 1447 // Update the remote filesystem. 1448 if !renamed.isSynthetic() { 1449 if err := oldParent.rename(ctx, oldName, newParent, newName); err != nil { 1450 vfsObj.AbortRenameDentry(&renamed.vfsd, replacedVFSD) 1451 return err 1452 } 1453 } else if replaced != nil && !replaced.isSynthetic() { 1454 // We are replacing an existing real file with a synthetic one, so we 1455 // need to unlink the former. 1456 flags := uint32(0) 1457 if replaced.isDir() { 1458 flags = linux.AT_REMOVEDIR 1459 } 1460 if err := newParent.unlink(ctx, newName, flags); err != nil { 1461 vfsObj.AbortRenameDentry(&renamed.vfsd, replacedVFSD) 1462 return err 1463 } 1464 } 1465 1466 // Update the dentry tree. 1467 newParent.childrenMu.Lock() 1468 defer newParent.childrenMu.Unlock() 1469 if oldParent != newParent { 1470 oldParent.childrenMu.Lock() 1471 defer oldParent.childrenMu.Unlock() 1472 } 1473 1474 vfsObj.CommitRenameReplaceDentry(ctx, &renamed.vfsd, replacedVFSD) 1475 if replaced != nil { 1476 replaced.setDeleted() 1477 if replaced.isSynthetic() { 1478 newParent.syntheticChildren-- 1479 replaced.decRefNoCaching() 1480 } 1481 ds = appendDentry(ds, replaced) 1482 // Remove the replaced entry from its parent's cache. 1483 delete(newParent.children, newName) 1484 } 1485 oldParent.cacheNegativeLookupLocked(oldName) // +checklocksforce: oldParent.childrenMu is held if oldParent != newParent. 1486 if renamed.isSynthetic() { 1487 oldParent.syntheticChildren-- 1488 newParent.syntheticChildren++ 1489 } 1490 // We have d.opMu for writing, so no need to check for existence of a 1491 // child with the given name. We could not have raced. 1492 newParent.cacheNewChildLocked(renamed, newName) 1493 oldParent.decRefNoCaching() 1494 if oldParent != newParent { 1495 ds = appendDentry(ds, newParent) 1496 ds = appendDentry(ds, oldParent) 1497 } 1498 1499 // Update metadata. 1500 if renamed.cachedMetadataAuthoritative() { 1501 renamed.touchCtime() 1502 } 1503 if oldParent.cachedMetadataAuthoritative() { 1504 oldParent.clearDirentsLocked() 1505 oldParent.touchCMtime() 1506 if renamed.isDir() { 1507 oldParent.decLinks() 1508 } 1509 } 1510 if newParent.cachedMetadataAuthoritative() { 1511 newParent.clearDirentsLocked() 1512 newParent.touchCMtime() 1513 if renamed.isDir() && (replaced == nil || !replaced.isDir()) { 1514 // Increase the link count if we did not replace another directory. 1515 newParent.incLinks() 1516 } 1517 } 1518 vfs.InotifyRename(ctx, &renamed.watches, &oldParent.watches, &newParent.watches, oldName, newName, renamed.isDir()) 1519 return nil 1520 } 1521 1522 // RmdirAt implements vfs.FilesystemImpl.RmdirAt. 1523 func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { 1524 return fs.unlinkAt(ctx, rp, true /* dir */) 1525 } 1526 1527 // SetStatAt implements vfs.FilesystemImpl.SetStatAt. 1528 func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { 1529 var ds *[]*dentry 1530 fs.renameMu.RLock() 1531 d, err := fs.resolveLocked(ctx, rp, &ds) 1532 if err != nil { 1533 fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1534 return err 1535 } 1536 err = d.setStat(ctx, rp.Credentials(), &opts, rp.Mount()) 1537 fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1538 if err != nil { 1539 return err 1540 } 1541 1542 if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 { 1543 d.InotifyWithParent(ctx, ev, 0, vfs.InodeEvent) 1544 } 1545 return nil 1546 } 1547 1548 // StatAt implements vfs.FilesystemImpl.StatAt. 1549 func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { 1550 var ds *[]*dentry 1551 fs.renameMu.RLock() 1552 defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1553 d, err := fs.resolveLocked(ctx, rp, &ds) 1554 if err != nil { 1555 return linux.Statx{}, err 1556 } 1557 // Since walking updates metadata for all traversed dentries under 1558 // InteropModeShared, including the returned one, we can return cached 1559 // metadata here regardless of fs.opts.interop. 1560 var stat linux.Statx 1561 d.statTo(&stat) 1562 return stat, nil 1563 } 1564 1565 // StatFSAt implements vfs.FilesystemImpl.StatFSAt. 1566 func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { 1567 var ds *[]*dentry 1568 fs.renameMu.RLock() 1569 defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1570 d, err := fs.resolveLocked(ctx, rp, &ds) 1571 if err != nil { 1572 return linux.Statfs{}, err 1573 } 1574 // If d is synthetic, invoke statfs on the first ancestor of d that isn't. 1575 for d.isSynthetic() { 1576 d = d.parent 1577 } 1578 statfs, err := d.statfs(ctx) 1579 if err != nil { 1580 return linux.Statfs{}, err 1581 } 1582 if statfs.NameLength == 0 || statfs.NameLength > MaxFilenameLen { 1583 statfs.NameLength = MaxFilenameLen 1584 } 1585 // This is primarily for distinguishing a gofer file system in 1586 // tests. Testing is important, so instead of defining 1587 // something completely random, use a standard value. 1588 statfs.Type = linux.V9FS_MAGIC 1589 return statfs, nil 1590 } 1591 1592 // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. 1593 func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { 1594 return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string, ds **[]*dentry) (*dentry, error) { 1595 child, err := parent.symlink(ctx, name, target, rp.Credentials()) 1596 if err != nil { 1597 return nil, err 1598 } 1599 if parent.fs.opts.interop != InteropModeShared { 1600 // Cache the symlink target on creation. In practice, this helps avoid a 1601 // lot of ReadLink RPCs. Note that when InteropModeShared is in effect, 1602 // we are forced to make Readlink RPCs. Because in this mode, we use host 1603 // timestamps, not timestamps based on our internal clock. And readlink 1604 // updates the atime on the host. 1605 child.haveTarget = true 1606 child.target = target 1607 } 1608 return child, nil 1609 }, nil) 1610 } 1611 1612 // UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. 1613 func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { 1614 return fs.unlinkAt(ctx, rp, false /* dir */) 1615 } 1616 1617 // BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt. 1618 func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) { 1619 var ds *[]*dentry 1620 fs.renameMu.RLock() 1621 defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1622 d, err := fs.resolveLocked(ctx, rp, &ds) 1623 if err != nil { 1624 return nil, err 1625 } 1626 if err := d.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil { 1627 return nil, err 1628 } 1629 if !d.isSocket() { 1630 return nil, linuxerr.ECONNREFUSED 1631 } 1632 if d.endpoint != nil { 1633 return d.endpoint, nil 1634 } 1635 if !d.isSynthetic() { 1636 d.IncRef() 1637 ds = appendDentry(ds, d) 1638 return &endpoint{ 1639 dentry: d, 1640 path: opts.Addr, 1641 }, nil 1642 } 1643 return nil, linuxerr.ECONNREFUSED 1644 } 1645 1646 // ListXattrAt implements vfs.FilesystemImpl.ListXattrAt. 1647 func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) { 1648 var ds *[]*dentry 1649 fs.renameMu.RLock() 1650 defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1651 d, err := fs.resolveLocked(ctx, rp, &ds) 1652 if err != nil { 1653 return nil, err 1654 } 1655 return d.listXattr(ctx, size) 1656 } 1657 1658 // GetXattrAt implements vfs.FilesystemImpl.GetXattrAt. 1659 func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) { 1660 var ds *[]*dentry 1661 fs.renameMu.RLock() 1662 defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1663 d, err := fs.resolveLocked(ctx, rp, &ds) 1664 if err != nil { 1665 return "", err 1666 } 1667 return d.getXattr(ctx, rp.Credentials(), &opts) 1668 } 1669 1670 // SetXattrAt implements vfs.FilesystemImpl.SetXattrAt. 1671 func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error { 1672 var ds *[]*dentry 1673 fs.renameMu.RLock() 1674 d, err := fs.resolveLocked(ctx, rp, &ds) 1675 if err != nil { 1676 fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1677 return err 1678 } 1679 err = d.setXattr(ctx, rp.Credentials(), &opts) 1680 fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1681 if err != nil { 1682 return err 1683 } 1684 1685 d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent) 1686 return nil 1687 } 1688 1689 // RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt. 1690 func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { 1691 var ds *[]*dentry 1692 fs.renameMu.RLock() 1693 d, err := fs.resolveLocked(ctx, rp, &ds) 1694 if err != nil { 1695 fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1696 return err 1697 } 1698 err = d.removeXattr(ctx, rp.Credentials(), name) 1699 fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1700 if err != nil { 1701 return err 1702 } 1703 1704 d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent) 1705 return nil 1706 } 1707 1708 // PrependPath implements vfs.FilesystemImpl.PrependPath. 1709 func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error { 1710 fs.renameMu.RLock() 1711 defer fs.renameMu.RUnlock() 1712 return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b) 1713 } 1714 1715 type mopt struct { 1716 key string 1717 value any 1718 } 1719 1720 func (m mopt) String() string { 1721 if m.value == nil { 1722 return fmt.Sprintf("%s", m.key) 1723 } 1724 return fmt.Sprintf("%s=%v", m.key, m.value) 1725 } 1726 1727 // MountOptions implements vfs.FilesystemImpl.MountOptions. 1728 func (fs *filesystem) MountOptions() string { 1729 optsKV := []mopt{ 1730 {moptTransport, transportModeFD}, // Only valid value, currently. 1731 {moptReadFD, fs.opts.fd}, // Currently, read and write FD are the same. 1732 {moptWriteFD, fs.opts.fd}, // Currently, read and write FD are the same. 1733 {moptAname, fs.opts.aname}, 1734 {moptDfltUID, fs.opts.dfltuid}, 1735 {moptDfltGID, fs.opts.dfltgid}, 1736 } 1737 1738 switch fs.opts.interop { 1739 case InteropModeExclusive: 1740 optsKV = append(optsKV, mopt{moptCache, cacheFSCache}) 1741 case InteropModeWritethrough: 1742 optsKV = append(optsKV, mopt{moptCache, cacheFSCacheWritethrough}) 1743 case InteropModeShared: 1744 optsKV = append(optsKV, mopt{moptCache, cacheRemoteRevalidating}) 1745 } 1746 if fs.opts.regularFilesUseSpecialFileFD { 1747 optsKV = append(optsKV, mopt{moptDisableFileHandleSharing, nil}) 1748 } 1749 if fs.opts.disableFifoOpen { 1750 optsKV = append(optsKV, mopt{moptDisableFifoOpen, nil}) 1751 } 1752 if fs.opts.forcePageCache { 1753 optsKV = append(optsKV, mopt{moptForcePageCache, nil}) 1754 } 1755 if fs.opts.limitHostFDTranslation { 1756 optsKV = append(optsKV, mopt{moptLimitHostFDTranslation, nil}) 1757 } 1758 if fs.opts.overlayfsStaleRead { 1759 optsKV = append(optsKV, mopt{moptOverlayfsStaleRead, nil}) 1760 } 1761 if fs.opts.directfs.enabled { 1762 optsKV = append(optsKV, mopt{moptDirectfs, nil}) 1763 } 1764 1765 opts := make([]string, 0, len(optsKV)) 1766 for _, opt := range optsKV { 1767 opts = append(opts, opt.String()) 1768 } 1769 return strings.Join(opts, ",") 1770 }