github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/gofer/filesystem.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package gofer 16 17 import ( 18 "fmt" 19 "math" 20 "strings" 21 "sync" 22 "sync/atomic" 23 24 "github.com/SagerNet/gvisor/pkg/abi/linux" 25 "github.com/SagerNet/gvisor/pkg/context" 26 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 27 "github.com/SagerNet/gvisor/pkg/fspath" 28 "github.com/SagerNet/gvisor/pkg/p9" 29 "github.com/SagerNet/gvisor/pkg/sentry/fsimpl/host" 30 "github.com/SagerNet/gvisor/pkg/sentry/fsmetric" 31 "github.com/SagerNet/gvisor/pkg/sentry/kernel" 32 "github.com/SagerNet/gvisor/pkg/sentry/kernel/auth" 33 "github.com/SagerNet/gvisor/pkg/sentry/kernel/pipe" 34 "github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport" 35 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 36 "github.com/SagerNet/gvisor/pkg/syserror" 37 ) 38 39 // Sync implements vfs.FilesystemImpl.Sync. 40 func (fs *filesystem) Sync(ctx context.Context) error { 41 // Snapshot current syncable dentries and special file FDs. 42 fs.renameMu.RLock() 43 fs.syncMu.Lock() 44 ds := make([]*dentry, 0, len(fs.syncableDentries)) 45 for d := range fs.syncableDentries { 46 // It's safe to use IncRef here even though fs.syncableDentries doesn't 47 // hold references since we hold fs.renameMu. Note that we can't use 48 // TryIncRef since cached dentries at zero references should still be 49 // synced. 50 d.IncRef() 51 ds = append(ds, d) 52 } 53 fs.renameMu.RUnlock() 54 sffds := make([]*specialFileFD, 0, len(fs.specialFileFDs)) 55 for sffd := range fs.specialFileFDs { 56 // As above, fs.specialFileFDs doesn't hold references. However, unlike 57 // dentries, an FD that has reached zero references can't be 58 // resurrected, so we can use TryIncRef. 59 if sffd.vfsfd.TryIncRef() { 60 sffds = append(sffds, sffd) 61 } 62 } 63 fs.syncMu.Unlock() 64 65 // Return the first error we encounter, but sync everything we can 66 // regardless. 67 var retErr error 68 69 // Sync syncable dentries. 70 for _, d := range ds { 71 err := d.syncCachedFile(ctx, true /* forFilesystemSync */) 72 d.DecRef(ctx) 73 if err != nil { 74 ctx.Infof("gofer.filesystem.Sync: dentry.syncCachedFile failed: %v", err) 75 if retErr == nil { 76 retErr = err 77 } 78 } 79 } 80 81 // Sync special files, which may be writable but do not use dentry shared 82 // handles (so they won't be synced by the above). 83 for _, sffd := range sffds { 84 err := sffd.sync(ctx, true /* forFilesystemSync */) 85 sffd.vfsfd.DecRef(ctx) 86 if err != nil { 87 ctx.Infof("gofer.filesystem.Sync: specialFileFD.sync failed: %v", err) 88 if retErr == nil { 89 retErr = err 90 } 91 } 92 } 93 94 return retErr 95 } 96 97 // maxFilenameLen is the maximum length of a filename. This is dictated by 9P's 98 // encoding of strings, which uses 2 bytes for the length prefix. 99 const maxFilenameLen = (1 << 16) - 1 100 101 // dentrySlicePool is a pool of *[]*dentry used to store dentries for which 102 // dentry.checkCachingLocked() must be called. The pool holds pointers to 103 // slices because Go lacks generics, so sync.Pool operates on interface{}, so 104 // every call to (what should be) sync.Pool<[]*dentry>.Put() allocates a copy 105 // of the slice header on the heap. 106 var dentrySlicePool = sync.Pool{ 107 New: func() interface{} { 108 ds := make([]*dentry, 0, 4) // arbitrary non-zero initial capacity 109 return &ds 110 }, 111 } 112 113 func appendDentry(ds *[]*dentry, d *dentry) *[]*dentry { 114 if ds == nil { 115 ds = dentrySlicePool.Get().(*[]*dentry) 116 } 117 *ds = append(*ds, d) 118 return ds 119 } 120 121 // Precondition: !parent.isSynthetic() && !child.isSynthetic(). 122 func appendNewChildDentry(ds **[]*dentry, parent *dentry, child *dentry) { 123 // The new child was added to parent and took a ref on the parent (hence 124 // parent can be removed from cache). A new child has 0 refs for now. So 125 // checkCachingLocked() should be called on both. Call it first on the parent 126 // as it may create space in the cache for child to be inserted - hence 127 // avoiding a cache eviction. 128 *ds = appendDentry(*ds, parent) 129 *ds = appendDentry(*ds, child) 130 } 131 132 // Preconditions: ds != nil. 133 func putDentrySlice(ds *[]*dentry) { 134 // Allow dentries to be GC'd. 135 for i := range *ds { 136 (*ds)[i] = nil 137 } 138 *ds = (*ds)[:0] 139 dentrySlicePool.Put(ds) 140 } 141 142 // renameMuRUnlockAndCheckCaching calls fs.renameMu.RUnlock(), then calls 143 // dentry.checkCachingLocked on all dentries in *dsp with fs.renameMu locked 144 // for writing. 145 // 146 // dsp is a pointer-to-pointer since defer evaluates its arguments immediately, 147 // but dentry slices are allocated lazily, and it's much easier to say "defer 148 // fs.renameMuRUnlockAndCheckCaching(&ds)" than "defer func() { 149 // fs.renameMuRUnlockAndCheckCaching(ds) }()" to work around this. 150 // +checklocksrelease:fs.renameMu 151 func (fs *filesystem) renameMuRUnlockAndCheckCaching(ctx context.Context, dsp **[]*dentry) { 152 fs.renameMu.RUnlock() 153 if *dsp == nil { 154 return 155 } 156 ds := **dsp 157 for _, d := range ds { 158 d.checkCachingLocked(ctx, false /* renameMuWriteLocked */) 159 } 160 putDentrySlice(*dsp) 161 } 162 163 // +checklocksrelease:fs.renameMu 164 func (fs *filesystem) renameMuUnlockAndCheckCaching(ctx context.Context, ds **[]*dentry) { 165 if *ds == nil { 166 fs.renameMu.Unlock() 167 return 168 } 169 for _, d := range **ds { 170 d.checkCachingLocked(ctx, true /* renameMuWriteLocked */) 171 } 172 fs.renameMu.Unlock() 173 putDentrySlice(*ds) 174 } 175 176 // stepLocked resolves rp.Component() to an existing file, starting from the 177 // given directory. 178 // 179 // Dentries which may become cached as a result of the traversal are appended 180 // to *ds. 181 // 182 // Preconditions: 183 // * fs.renameMu must be locked. 184 // * d.dirMu must be locked. 185 // * !rp.Done(). 186 // * If !d.cachedMetadataAuthoritative(), then d and all children that are 187 // part of rp must have been revalidated. 188 // 189 // Postconditions: The returned dentry's cached metadata is up to date. 190 func (fs *filesystem) stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, mayFollowSymlinks bool, ds **[]*dentry) (*dentry, bool, error) { 191 if !d.isDir() { 192 return nil, false, syserror.ENOTDIR 193 } 194 if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { 195 return nil, false, err 196 } 197 followedSymlink := false 198 afterSymlink: 199 name := rp.Component() 200 if name == "." { 201 rp.Advance() 202 return d, followedSymlink, nil 203 } 204 if name == ".." { 205 if isRoot, err := rp.CheckRoot(ctx, &d.vfsd); err != nil { 206 return nil, false, err 207 } else if isRoot || d.parent == nil { 208 rp.Advance() 209 return d, followedSymlink, nil 210 } 211 if err := rp.CheckMount(ctx, &d.parent.vfsd); err != nil { 212 return nil, false, err 213 } 214 rp.Advance() 215 return d.parent, followedSymlink, nil 216 } 217 child, err := fs.getChildLocked(ctx, d, name, ds) 218 if err != nil { 219 return nil, false, err 220 } 221 if err := rp.CheckMount(ctx, &child.vfsd); err != nil { 222 return nil, false, err 223 } 224 if child.isSymlink() && mayFollowSymlinks && rp.ShouldFollowSymlink() { 225 target, err := child.readlink(ctx, rp.Mount()) 226 if err != nil { 227 return nil, false, err 228 } 229 if err := rp.HandleSymlink(target); err != nil { 230 return nil, false, err 231 } 232 followedSymlink = true 233 goto afterSymlink // don't check the current directory again 234 } 235 rp.Advance() 236 return child, followedSymlink, nil 237 } 238 239 // getChildLocked returns a dentry representing the child of parent with the 240 // given name. Returns ENOENT if the child doesn't exist. 241 // 242 // Preconditions: 243 // * fs.renameMu must be locked. 244 // * parent.dirMu must be locked. 245 // * parent.isDir(). 246 // * name is not "." or "..". 247 // * dentry at name has been revalidated 248 func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name string, ds **[]*dentry) (*dentry, error) { 249 if len(name) > maxFilenameLen { 250 return nil, linuxerr.ENAMETOOLONG 251 } 252 if child, ok := parent.children[name]; ok || parent.isSynthetic() { 253 if child == nil { 254 return nil, syserror.ENOENT 255 } 256 return child, nil 257 } 258 259 qid, file, attrMask, attr, err := parent.file.walkGetAttrOne(ctx, name) 260 if err != nil { 261 if linuxerr.Equals(linuxerr.ENOENT, err) { 262 parent.cacheNegativeLookupLocked(name) 263 } 264 return nil, err 265 } 266 267 // Create a new dentry representing the file. 268 child, err := fs.newDentry(ctx, file, qid, attrMask, &attr) 269 if err != nil { 270 file.close(ctx) 271 delete(parent.children, name) 272 return nil, err 273 } 274 parent.cacheNewChildLocked(child, name) 275 appendNewChildDentry(ds, parent, child) 276 return child, nil 277 } 278 279 // walkParentDirLocked resolves all but the last path component of rp to an 280 // existing directory, starting from the given directory (which is usually 281 // rp.Start().Impl().(*dentry)). It does not check that the returned directory 282 // is searchable by the provider of rp. 283 // 284 // Preconditions: 285 // * fs.renameMu must be locked. 286 // * !rp.Done(). 287 // * If !d.cachedMetadataAuthoritative(), then d's cached metadata must be up 288 // to date. 289 func (fs *filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) { 290 if err := fs.revalidateParentDir(ctx, rp, d, ds); err != nil { 291 return nil, err 292 } 293 for !rp.Final() { 294 d.dirMu.Lock() 295 next, followedSymlink, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds) 296 d.dirMu.Unlock() 297 if err != nil { 298 return nil, err 299 } 300 d = next 301 if followedSymlink { 302 if err := fs.revalidateParentDir(ctx, rp, d, ds); err != nil { 303 return nil, err 304 } 305 } 306 } 307 if !d.isDir() { 308 return nil, syserror.ENOTDIR 309 } 310 return d, nil 311 } 312 313 // resolveLocked resolves rp to an existing file. 314 // 315 // Preconditions: fs.renameMu must be locked. 316 func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath, ds **[]*dentry) (*dentry, error) { 317 d := rp.Start().Impl().(*dentry) 318 if err := fs.revalidatePath(ctx, rp, d, ds); err != nil { 319 return nil, err 320 } 321 for !rp.Done() { 322 d.dirMu.Lock() 323 next, followedSymlink, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds) 324 d.dirMu.Unlock() 325 if err != nil { 326 return nil, err 327 } 328 d = next 329 if followedSymlink { 330 if err := fs.revalidatePath(ctx, rp, d, ds); err != nil { 331 return nil, err 332 } 333 } 334 } 335 if rp.MustBeDir() && !d.isDir() { 336 return nil, syserror.ENOTDIR 337 } 338 return d, nil 339 } 340 341 // doCreateAt checks that creating a file at rp is permitted, then invokes 342 // createInRemoteDir (if the parent directory is a real remote directory) or 343 // createInSyntheticDir (if the parent directory is synthetic) to do so. 344 // 345 // Preconditions: 346 // * !rp.Done(). 347 // * For the final path component in rp, !rp.ShouldFollowSymlink(). 348 func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool, createInRemoteDir func(parent *dentry, name string, ds **[]*dentry) error, createInSyntheticDir func(parent *dentry, name string) error) error { 349 var ds *[]*dentry 350 fs.renameMu.RLock() 351 defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 352 start := rp.Start().Impl().(*dentry) 353 parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) 354 if err != nil { 355 return err 356 } 357 358 // Order of checks is important. First check if parent directory can be 359 // executed, then check for existence, and lastly check if mount is writable. 360 if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { 361 return err 362 } 363 name := rp.Component() 364 if name == "." || name == ".." { 365 return syserror.EEXIST 366 } 367 if parent.isDeleted() { 368 return syserror.ENOENT 369 } 370 if err := fs.revalidateOne(ctx, rp.VirtualFilesystem(), parent, name, &ds); err != nil { 371 return err 372 } 373 374 parent.dirMu.Lock() 375 defer parent.dirMu.Unlock() 376 377 if len(name) > maxFilenameLen { 378 return linuxerr.ENAMETOOLONG 379 } 380 // Check for existence only if caching information is available. Otherwise, 381 // don't check for existence just yet. We will check for existence if the 382 // checks for writability fail below. Existence check is done by the creation 383 // RPCs themselves. 384 if child, ok := parent.children[name]; ok && child != nil { 385 return syserror.EEXIST 386 } 387 checkExistence := func() error { 388 if child, err := fs.getChildLocked(ctx, parent, name, &ds); err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) { 389 return err 390 } else if child != nil { 391 return syserror.EEXIST 392 } 393 return nil 394 } 395 396 mnt := rp.Mount() 397 if err := mnt.CheckBeginWrite(); err != nil { 398 // Existence check takes precedence. 399 if existenceErr := checkExistence(); existenceErr != nil { 400 return existenceErr 401 } 402 return err 403 } 404 defer mnt.EndWrite() 405 406 if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil { 407 // Existence check takes precedence. 408 if existenceErr := checkExistence(); existenceErr != nil { 409 return existenceErr 410 } 411 return err 412 } 413 if !dir && rp.MustBeDir() { 414 return syserror.ENOENT 415 } 416 if parent.isSynthetic() { 417 if createInSyntheticDir == nil { 418 return linuxerr.EPERM 419 } 420 if err := createInSyntheticDir(parent, name); err != nil { 421 return err 422 } 423 parent.touchCMtime() 424 parent.dirents = nil 425 ev := linux.IN_CREATE 426 if dir { 427 ev |= linux.IN_ISDIR 428 } 429 parent.watches.Notify(ctx, name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */) 430 return nil 431 } 432 // No cached dentry exists; however, in InteropModeShared there might still be 433 // an existing file at name. Just attempt the file creation RPC anyways. If a 434 // file does exist, the RPC will fail with EEXIST like we would have. 435 if err := createInRemoteDir(parent, name, &ds); err != nil { 436 return err 437 } 438 if fs.opts.interop != InteropModeShared { 439 if child, ok := parent.children[name]; ok && child == nil { 440 // Delete the now-stale negative dentry. 441 delete(parent.children, name) 442 } 443 parent.touchCMtime() 444 parent.dirents = nil 445 } 446 ev := linux.IN_CREATE 447 if dir { 448 ev |= linux.IN_ISDIR 449 } 450 parent.watches.Notify(ctx, name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */) 451 return nil 452 } 453 454 // Preconditions: !rp.Done(). 455 func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool) error { 456 var ds *[]*dentry 457 fs.renameMu.RLock() 458 defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 459 start := rp.Start().Impl().(*dentry) 460 parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) 461 if err != nil { 462 return err 463 } 464 if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil { 465 return err 466 } 467 if err := rp.Mount().CheckBeginWrite(); err != nil { 468 return err 469 } 470 defer rp.Mount().EndWrite() 471 472 name := rp.Component() 473 if dir { 474 if name == "." { 475 return linuxerr.EINVAL 476 } 477 if name == ".." { 478 return linuxerr.ENOTEMPTY 479 } 480 } else { 481 if name == "." || name == ".." { 482 return syserror.EISDIR 483 } 484 } 485 486 vfsObj := rp.VirtualFilesystem() 487 if err := fs.revalidateOne(ctx, vfsObj, parent, rp.Component(), &ds); err != nil { 488 return err 489 } 490 491 mntns := vfs.MountNamespaceFromContext(ctx) 492 defer mntns.DecRef(ctx) 493 494 parent.dirMu.Lock() 495 defer parent.dirMu.Unlock() 496 497 // Load child if sticky bit is set because we need to determine whether 498 // deletion is allowed. 499 var child *dentry 500 if atomic.LoadUint32(&parent.mode)&linux.ModeSticky == 0 { 501 var ok bool 502 child, ok = parent.children[name] 503 if ok && child == nil { 504 // Hit a negative cached entry, child doesn't exist. 505 return syserror.ENOENT 506 } 507 } else { 508 child, _, err = fs.stepLocked(ctx, rp, parent, false /* mayFollowSymlinks */, &ds) 509 if err != nil { 510 return err 511 } 512 if err := parent.mayDelete(rp.Credentials(), child); err != nil { 513 return err 514 } 515 } 516 517 // If a child dentry exists, prepare to delete it. This should fail if it is 518 // a mount point. We detect mount points by speculatively calling 519 // PrepareDeleteDentry, which fails if child is a mount point. 520 // 521 // Also note that if child is nil, then it can't be a mount point. 522 if child != nil { 523 // Hold child.dirMu so we can check child.children and 524 // child.syntheticChildren. We don't access these fields until a bit later, 525 // but locking child.dirMu after calling vfs.PrepareDeleteDentry() would 526 // create an inconsistent lock ordering between dentry.dirMu and 527 // vfs.Dentry.mu (in the VFS lock order, it would make dentry.dirMu both "a 528 // FilesystemImpl lock" and "a lock acquired by a FilesystemImpl between 529 // PrepareDeleteDentry and CommitDeleteDentry). To avoid this, lock 530 // child.dirMu before calling PrepareDeleteDentry. 531 child.dirMu.Lock() 532 defer child.dirMu.Unlock() 533 if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil { 534 return err 535 } 536 } 537 flags := uint32(0) 538 // If a dentry exists, use it for best-effort checks on its deletability. 539 if dir { 540 if child != nil { 541 // child must be an empty directory. 542 if child.syntheticChildren != 0 { 543 // This is definitely not an empty directory, irrespective of 544 // fs.opts.interop. 545 vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: PrepareDeleteDentry called if child != nil. 546 return linuxerr.ENOTEMPTY 547 } 548 // If InteropModeShared is in effect and the first call to 549 // PrepareDeleteDentry above succeeded, then child wasn't 550 // revalidated (so we can't expect its file type to be correct) and 551 // individually revalidating its children (to confirm that they 552 // still exist) would be a waste of time. 553 if child.cachedMetadataAuthoritative() { 554 if !child.isDir() { 555 vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above. 556 return syserror.ENOTDIR 557 } 558 for _, grandchild := range child.children { 559 if grandchild != nil { 560 vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above. 561 return linuxerr.ENOTEMPTY 562 } 563 } 564 } 565 } 566 flags = linux.AT_REMOVEDIR 567 } else { 568 // child must be a non-directory file. 569 if child != nil && child.isDir() { 570 vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above. 571 return syserror.EISDIR 572 } 573 if rp.MustBeDir() { 574 if child != nil { 575 vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above. 576 } 577 return syserror.ENOTDIR 578 } 579 } 580 if parent.isSynthetic() { 581 if child == nil { 582 return syserror.ENOENT 583 } 584 } else if child == nil || !child.isSynthetic() { 585 err = parent.file.unlinkAt(ctx, name, flags) 586 if err != nil { 587 if child != nil { 588 vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above. 589 } 590 return err 591 } 592 } 593 594 // Generate inotify events for rmdir or unlink. 595 if dir { 596 parent.watches.Notify(ctx, name, linux.IN_DELETE|linux.IN_ISDIR, 0, vfs.InodeEvent, true /* unlinked */) 597 } else { 598 var cw *vfs.Watches 599 if child != nil { 600 cw = &child.watches 601 } 602 vfs.InotifyRemoveChild(ctx, cw, &parent.watches, name) 603 } 604 605 if child != nil { 606 vfsObj.CommitDeleteDentry(ctx, &child.vfsd) // +checklocksforce: see above. 607 child.setDeleted() 608 if child.isSynthetic() { 609 parent.syntheticChildren-- 610 child.decRefNoCaching() 611 } 612 ds = appendDentry(ds, child) 613 } 614 parent.cacheNegativeLookupLocked(name) 615 if parent.cachedMetadataAuthoritative() { 616 parent.dirents = nil 617 parent.touchCMtime() 618 if dir { 619 parent.decLinks() 620 } 621 } 622 return nil 623 } 624 625 // AccessAt implements vfs.Filesystem.Impl.AccessAt. 626 func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error { 627 var ds *[]*dentry 628 fs.renameMu.RLock() 629 defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 630 d, err := fs.resolveLocked(ctx, rp, &ds) 631 if err != nil { 632 return err 633 } 634 return d.checkPermissions(creds, ats) 635 } 636 637 // GetDentryAt implements vfs.FilesystemImpl.GetDentryAt. 638 func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { 639 var ds *[]*dentry 640 fs.renameMu.RLock() 641 defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 642 d, err := fs.resolveLocked(ctx, rp, &ds) 643 if err != nil { 644 return nil, err 645 } 646 if opts.CheckSearchable { 647 if !d.isDir() { 648 return nil, syserror.ENOTDIR 649 } 650 if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { 651 return nil, err 652 } 653 } 654 d.IncRef() 655 // Call d.checkCachingLocked() so it can be removed from the cache if needed. 656 ds = appendDentry(ds, d) 657 return &d.vfsd, nil 658 } 659 660 // GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt. 661 func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { 662 var ds *[]*dentry 663 fs.renameMu.RLock() 664 defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 665 start := rp.Start().Impl().(*dentry) 666 d, err := fs.walkParentDirLocked(ctx, rp, start, &ds) 667 if err != nil { 668 return nil, err 669 } 670 d.IncRef() 671 // Call d.checkCachingLocked() so it can be removed from the cache if needed. 672 ds = appendDentry(ds, d) 673 return &d.vfsd, nil 674 } 675 676 // LinkAt implements vfs.FilesystemImpl.LinkAt. 677 func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { 678 return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, childName string, _ **[]*dentry) error { 679 if rp.Mount() != vd.Mount() { 680 return linuxerr.EXDEV 681 } 682 d := vd.Dentry().Impl().(*dentry) 683 if d.isDir() { 684 return linuxerr.EPERM 685 } 686 gid := auth.KGID(atomic.LoadUint32(&d.gid)) 687 uid := auth.KUID(atomic.LoadUint32(&d.uid)) 688 mode := linux.FileMode(atomic.LoadUint32(&d.mode)) 689 if err := vfs.MayLink(rp.Credentials(), mode, uid, gid); err != nil { 690 return err 691 } 692 if d.nlink == 0 { 693 return syserror.ENOENT 694 } 695 if d.nlink == math.MaxUint32 { 696 return linuxerr.EMLINK 697 } 698 if err := parent.file.link(ctx, d.file, childName); err != nil { 699 return err 700 } 701 702 // Success! 703 atomic.AddUint32(&d.nlink, 1) 704 return nil 705 }, nil) 706 } 707 708 // MkdirAt implements vfs.FilesystemImpl.MkdirAt. 709 func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { 710 creds := rp.Credentials() 711 return fs.doCreateAt(ctx, rp, true /* dir */, func(parent *dentry, name string, ds **[]*dentry) error { 712 // If the parent is a setgid directory, use the parent's GID 713 // rather than the caller's and enable setgid. 714 kgid := creds.EffectiveKGID 715 mode := opts.Mode 716 if atomic.LoadUint32(&parent.mode)&linux.S_ISGID != 0 { 717 kgid = auth.KGID(atomic.LoadUint32(&parent.gid)) 718 mode |= linux.S_ISGID 719 } 720 if _, err := parent.file.mkdir(ctx, name, p9.FileMode(mode), (p9.UID)(creds.EffectiveKUID), p9.GID(kgid)); err != nil { 721 if !opts.ForSyntheticMountpoint || linuxerr.Equals(linuxerr.EEXIST, err) { 722 return err 723 } 724 ctx.Infof("Failed to create remote directory %q: %v; falling back to synthetic directory", name, err) 725 parent.createSyntheticChildLocked(&createSyntheticOpts{ 726 name: name, 727 mode: linux.S_IFDIR | opts.Mode, 728 kuid: creds.EffectiveKUID, 729 kgid: creds.EffectiveKGID, 730 }) 731 *ds = appendDentry(*ds, parent) 732 } 733 if fs.opts.interop != InteropModeShared { 734 parent.incLinks() 735 } 736 return nil 737 }, func(parent *dentry, name string) error { 738 if !opts.ForSyntheticMountpoint { 739 // Can't create non-synthetic files in synthetic directories. 740 return linuxerr.EPERM 741 } 742 parent.createSyntheticChildLocked(&createSyntheticOpts{ 743 name: name, 744 mode: linux.S_IFDIR | opts.Mode, 745 kuid: creds.EffectiveKUID, 746 kgid: creds.EffectiveKGID, 747 }) 748 parent.incLinks() 749 return nil 750 }) 751 } 752 753 // MknodAt implements vfs.FilesystemImpl.MknodAt. 754 func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { 755 return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string, ds **[]*dentry) error { 756 creds := rp.Credentials() 757 _, err := parent.file.mknod(ctx, name, (p9.FileMode)(opts.Mode), opts.DevMajor, opts.DevMinor, (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID)) 758 if !linuxerr.Equals(linuxerr.EPERM, err) { 759 return err 760 } 761 762 // EPERM means that gofer does not allow creating a socket or pipe. Fallback 763 // to creating a synthetic one, i.e. one that is kept entirely in memory. 764 765 // Check that we're not overriding an existing file with a synthetic one. 766 _, _, err = fs.stepLocked(ctx, rp, parent, true, ds) 767 switch { 768 case err == nil: 769 // Step succeeded, another file exists. 770 return syserror.EEXIST 771 case !linuxerr.Equals(linuxerr.ENOENT, err): 772 // Unexpected error. 773 return err 774 } 775 776 switch opts.Mode.FileType() { 777 case linux.S_IFSOCK: 778 parent.createSyntheticChildLocked(&createSyntheticOpts{ 779 name: name, 780 mode: opts.Mode, 781 kuid: creds.EffectiveKUID, 782 kgid: creds.EffectiveKGID, 783 endpoint: opts.Endpoint, 784 }) 785 *ds = appendDentry(*ds, parent) 786 return nil 787 case linux.S_IFIFO: 788 parent.createSyntheticChildLocked(&createSyntheticOpts{ 789 name: name, 790 mode: opts.Mode, 791 kuid: creds.EffectiveKUID, 792 kgid: creds.EffectiveKGID, 793 pipe: pipe.NewVFSPipe(true /* isNamed */, pipe.DefaultPipeSize), 794 }) 795 *ds = appendDentry(*ds, parent) 796 return nil 797 } 798 // Retain error from gofer if synthetic file cannot be created internally. 799 return linuxerr.EPERM 800 }, nil) 801 } 802 803 // OpenAt implements vfs.FilesystemImpl.OpenAt. 804 func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) { 805 // Reject O_TMPFILE, which is not supported; supporting it correctly in the 806 // presence of other remote filesystem users requires remote filesystem 807 // support, and it isn't clear that there's any way to implement this in 808 // 9P. 809 if opts.Flags&linux.O_TMPFILE != 0 { 810 return nil, syserror.EOPNOTSUPP 811 } 812 mayCreate := opts.Flags&linux.O_CREAT != 0 813 mustCreate := opts.Flags&(linux.O_CREAT|linux.O_EXCL) == (linux.O_CREAT | linux.O_EXCL) 814 815 var ds *[]*dentry 816 fs.renameMu.RLock() 817 unlocked := false 818 unlock := func() { 819 if !unlocked { 820 fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 821 unlocked = true 822 } 823 } 824 defer unlock() 825 826 start := rp.Start().Impl().(*dentry) 827 if rp.Done() { 828 // Reject attempts to open mount root directory with O_CREAT. 829 if mayCreate && rp.MustBeDir() { 830 return nil, syserror.EISDIR 831 } 832 if mustCreate { 833 return nil, syserror.EEXIST 834 } 835 if !start.cachedMetadataAuthoritative() { 836 // Refresh dentry's attributes before opening. 837 if err := start.updateFromGetattr(ctx); err != nil { 838 return nil, err 839 } 840 } 841 start.IncRef() 842 defer start.DecRef(ctx) 843 unlock() 844 // start is intentionally not added to ds (which would remove it from the 845 // cache) because doing so regresses performance in practice. 846 return start.open(ctx, rp, &opts) 847 } 848 849 afterTrailingSymlink: 850 parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) 851 if err != nil { 852 return nil, err 853 } 854 // Check for search permission in the parent directory. 855 if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { 856 return nil, err 857 } 858 // Reject attempts to open directories with O_CREAT. 859 if mayCreate && rp.MustBeDir() { 860 return nil, syserror.EISDIR 861 } 862 if err := fs.revalidateOne(ctx, rp.VirtualFilesystem(), parent, rp.Component(), &ds); err != nil { 863 return nil, err 864 } 865 // Determine whether or not we need to create a file. 866 parent.dirMu.Lock() 867 child, _, err := fs.stepLocked(ctx, rp, parent, false /* mayFollowSymlinks */, &ds) 868 if linuxerr.Equals(linuxerr.ENOENT, err) && mayCreate { 869 if parent.isSynthetic() { 870 parent.dirMu.Unlock() 871 return nil, linuxerr.EPERM 872 } 873 fd, err := parent.createAndOpenChildLocked(ctx, rp, &opts, &ds) 874 parent.dirMu.Unlock() 875 return fd, err 876 } 877 parent.dirMu.Unlock() 878 if err != nil { 879 return nil, err 880 } 881 if mustCreate { 882 return nil, syserror.EEXIST 883 } 884 // Open existing child or follow symlink. 885 if child.isSymlink() && rp.ShouldFollowSymlink() { 886 target, err := child.readlink(ctx, rp.Mount()) 887 if err != nil { 888 return nil, err 889 } 890 if err := rp.HandleSymlink(target); err != nil { 891 return nil, err 892 } 893 start = parent 894 goto afterTrailingSymlink 895 } 896 if rp.MustBeDir() && !child.isDir() { 897 return nil, syserror.ENOTDIR 898 } 899 child.IncRef() 900 defer child.DecRef(ctx) 901 unlock() 902 // child is intentionally not added to ds (which would remove it from the 903 // cache) because doing so regresses performance in practice. 904 return child.open(ctx, rp, &opts) 905 } 906 907 // Preconditions: The caller must hold no locks (since opening pipes may block 908 // indefinitely). 909 func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) (*vfs.FileDescription, error) { 910 ats := vfs.AccessTypesForOpenFlags(opts) 911 if err := d.checkPermissions(rp.Credentials(), ats); err != nil { 912 return nil, err 913 } 914 915 trunc := opts.Flags&linux.O_TRUNC != 0 && d.fileType() == linux.S_IFREG 916 if trunc { 917 // Lock metadataMu *while* we open a regular file with O_TRUNC because 918 // open(2) will change the file size on server. 919 d.metadataMu.Lock() 920 defer d.metadataMu.Unlock() 921 } 922 923 var vfd *vfs.FileDescription 924 var err error 925 mnt := rp.Mount() 926 switch d.fileType() { 927 case linux.S_IFREG: 928 if !d.fs.opts.regularFilesUseSpecialFileFD { 929 if err := d.ensureSharedHandle(ctx, ats.MayRead(), ats.MayWrite(), trunc); err != nil { 930 return nil, err 931 } 932 fd, err := newRegularFileFD(mnt, d, opts.Flags) 933 if err != nil { 934 return nil, err 935 } 936 vfd = &fd.vfsfd 937 } 938 case linux.S_IFDIR: 939 // Can't open directories with O_CREAT. 940 if opts.Flags&linux.O_CREAT != 0 { 941 return nil, syserror.EISDIR 942 } 943 // Can't open directories writably. 944 if ats&vfs.MayWrite != 0 { 945 return nil, syserror.EISDIR 946 } 947 if opts.Flags&linux.O_DIRECT != 0 { 948 return nil, linuxerr.EINVAL 949 } 950 if !d.isSynthetic() { 951 if err := d.ensureSharedHandle(ctx, ats&vfs.MayRead != 0, false /* write */, false /* trunc */); err != nil { 952 return nil, err 953 } 954 } 955 fd := &directoryFD{} 956 fd.LockFD.Init(&d.locks) 957 if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { 958 return nil, err 959 } 960 if atomic.LoadInt32(&d.readFD) >= 0 { 961 fsmetric.GoferOpensHost.Increment() 962 } else { 963 fsmetric.GoferOpens9P.Increment() 964 } 965 return &fd.vfsfd, nil 966 case linux.S_IFLNK: 967 // Can't open symlinks without O_PATH, which is handled at the VFS layer. 968 return nil, linuxerr.ELOOP 969 case linux.S_IFSOCK: 970 if d.isSynthetic() { 971 return nil, linuxerr.ENXIO 972 } 973 if d.fs.iopts.OpenSocketsByConnecting { 974 return d.openSocketByConnecting(ctx, opts) 975 } 976 case linux.S_IFIFO: 977 if d.isSynthetic() { 978 return d.pipe.Open(ctx, mnt, &d.vfsd, opts.Flags, &d.locks) 979 } 980 } 981 982 if vfd == nil { 983 if vfd, err = d.openSpecialFile(ctx, mnt, opts); err != nil { 984 return nil, err 985 } 986 } 987 988 if trunc { 989 // If no errors occured so far then update file size in memory. This 990 // step is required even if !d.cachedMetadataAuthoritative() because 991 // d.mappings has to be updated. 992 // d.metadataMu has already been acquired if trunc == true. 993 d.updateSizeLocked(0) 994 995 if d.cachedMetadataAuthoritative() { 996 d.touchCMtimeLocked() 997 } 998 } 999 return vfd, err 1000 } 1001 1002 func (d *dentry) openSocketByConnecting(ctx context.Context, opts *vfs.OpenOptions) (*vfs.FileDescription, error) { 1003 if opts.Flags&linux.O_DIRECT != 0 { 1004 return nil, linuxerr.EINVAL 1005 } 1006 fdObj, err := d.file.connect(ctx, p9.AnonymousSocket) 1007 if err != nil { 1008 return nil, err 1009 } 1010 fd, err := host.NewFD(ctx, kernel.KernelFromContext(ctx).HostMount(), fdObj.FD(), &host.NewFDOptions{ 1011 HaveFlags: true, 1012 Flags: opts.Flags, 1013 }) 1014 if err != nil { 1015 fdObj.Close() 1016 return nil, err 1017 } 1018 fdObj.Release() 1019 return fd, nil 1020 } 1021 1022 func (d *dentry) openSpecialFile(ctx context.Context, mnt *vfs.Mount, opts *vfs.OpenOptions) (*vfs.FileDescription, error) { 1023 ats := vfs.AccessTypesForOpenFlags(opts) 1024 if opts.Flags&linux.O_DIRECT != 0 { 1025 return nil, linuxerr.EINVAL 1026 } 1027 // We assume that the server silently inserts O_NONBLOCK in the open flags 1028 // for all named pipes (because all existing gofers do this). 1029 // 1030 // NOTE(b/133875563): This makes named pipe opens racy, because the 1031 // mechanisms for translating nonblocking to blocking opens can only detect 1032 // the instantaneous presence of a peer holding the other end of the pipe 1033 // open, not whether the pipe was *previously* opened by a peer that has 1034 // since closed its end. 1035 isBlockingOpenOfNamedPipe := d.fileType() == linux.S_IFIFO && opts.Flags&linux.O_NONBLOCK == 0 1036 retry: 1037 h, err := openHandle(ctx, d.file, ats.MayRead(), ats.MayWrite(), opts.Flags&linux.O_TRUNC != 0) 1038 if err != nil { 1039 if isBlockingOpenOfNamedPipe && ats == vfs.MayWrite && linuxerr.Equals(linuxerr.ENXIO, err) { 1040 // An attempt to open a named pipe with O_WRONLY|O_NONBLOCK fails 1041 // with ENXIO if opening the same named pipe with O_WRONLY would 1042 // block because there are no readers of the pipe. 1043 if err := sleepBetweenNamedPipeOpenChecks(ctx); err != nil { 1044 return nil, err 1045 } 1046 goto retry 1047 } 1048 return nil, err 1049 } 1050 if isBlockingOpenOfNamedPipe && ats == vfs.MayRead && h.fd >= 0 { 1051 if err := blockUntilNonblockingPipeHasWriter(ctx, h.fd); err != nil { 1052 h.close(ctx) 1053 return nil, err 1054 } 1055 } 1056 fd, err := newSpecialFileFD(h, mnt, d, opts.Flags) 1057 if err != nil { 1058 h.close(ctx) 1059 return nil, err 1060 } 1061 return &fd.vfsfd, nil 1062 } 1063 1064 // Preconditions: 1065 // * d.fs.renameMu must be locked. 1066 // * d.dirMu must be locked. 1067 // * !d.isSynthetic(). 1068 func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions, ds **[]*dentry) (*vfs.FileDescription, error) { 1069 if err := d.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil { 1070 return nil, err 1071 } 1072 if d.isDeleted() { 1073 return nil, syserror.ENOENT 1074 } 1075 mnt := rp.Mount() 1076 if err := mnt.CheckBeginWrite(); err != nil { 1077 return nil, err 1078 } 1079 defer mnt.EndWrite() 1080 1081 // 9P2000.L's lcreate takes a fid representing the parent directory, and 1082 // converts it into an open fid representing the created file, so we need 1083 // to duplicate the directory fid first. 1084 _, dirfile, err := d.file.walk(ctx, nil) 1085 if err != nil { 1086 return nil, err 1087 } 1088 creds := rp.Credentials() 1089 name := rp.Component() 1090 // We only want the access mode for creating the file. 1091 createFlags := p9.OpenFlags(opts.Flags) & p9.OpenFlagsModeMask 1092 1093 // If the parent is a setgid directory, use the parent's GID rather 1094 // than the caller's. 1095 kgid := creds.EffectiveKGID 1096 if atomic.LoadUint32(&d.mode)&linux.S_ISGID != 0 { 1097 kgid = auth.KGID(atomic.LoadUint32(&d.gid)) 1098 } 1099 1100 fdobj, openFile, createQID, _, err := dirfile.create(ctx, name, createFlags, p9.FileMode(opts.Mode), (p9.UID)(creds.EffectiveKUID), p9.GID(kgid)) 1101 if err != nil { 1102 dirfile.close(ctx) 1103 return nil, err 1104 } 1105 // Then we need to walk to the file we just created to get a non-open fid 1106 // representing it, and to get its metadata. This must use d.file since, as 1107 // explained above, dirfile was invalidated by dirfile.Create(). 1108 _, nonOpenFile, attrMask, attr, err := d.file.walkGetAttrOne(ctx, name) 1109 if err != nil { 1110 openFile.close(ctx) 1111 if fdobj != nil { 1112 fdobj.Close() 1113 } 1114 return nil, err 1115 } 1116 1117 // Construct the new dentry. 1118 child, err := d.fs.newDentry(ctx, nonOpenFile, createQID, attrMask, &attr) 1119 if err != nil { 1120 nonOpenFile.close(ctx) 1121 openFile.close(ctx) 1122 if fdobj != nil { 1123 fdobj.Close() 1124 } 1125 return nil, err 1126 } 1127 // Incorporate the fid that was opened by lcreate. 1128 useRegularFileFD := child.fileType() == linux.S_IFREG && !d.fs.opts.regularFilesUseSpecialFileFD 1129 if useRegularFileFD { 1130 openFD := int32(-1) 1131 if fdobj != nil { 1132 openFD = int32(fdobj.Release()) 1133 } 1134 child.handleMu.Lock() 1135 if vfs.MayReadFileWithOpenFlags(opts.Flags) { 1136 child.readFile = openFile 1137 if fdobj != nil { 1138 child.readFD = openFD 1139 child.mmapFD = openFD 1140 } 1141 } 1142 if vfs.MayWriteFileWithOpenFlags(opts.Flags) { 1143 child.writeFile = openFile 1144 child.writeFD = openFD 1145 } 1146 child.handleMu.Unlock() 1147 } 1148 // Insert the dentry into the tree. 1149 d.cacheNewChildLocked(child, name) 1150 appendNewChildDentry(ds, d, child) 1151 if d.cachedMetadataAuthoritative() { 1152 d.touchCMtime() 1153 d.dirents = nil 1154 } 1155 1156 // Finally, construct a file description representing the created file. 1157 var childVFSFD *vfs.FileDescription 1158 if useRegularFileFD { 1159 fd, err := newRegularFileFD(mnt, child, opts.Flags) 1160 if err != nil { 1161 return nil, err 1162 } 1163 childVFSFD = &fd.vfsfd 1164 } else { 1165 h := handle{ 1166 file: openFile, 1167 fd: -1, 1168 } 1169 if fdobj != nil { 1170 h.fd = int32(fdobj.Release()) 1171 } 1172 fd, err := newSpecialFileFD(h, mnt, child, opts.Flags) 1173 if err != nil { 1174 h.close(ctx) 1175 return nil, err 1176 } 1177 childVFSFD = &fd.vfsfd 1178 } 1179 d.watches.Notify(ctx, name, linux.IN_CREATE, 0, vfs.PathEvent, false /* unlinked */) 1180 return childVFSFD, nil 1181 } 1182 1183 // ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt. 1184 func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { 1185 var ds *[]*dentry 1186 fs.renameMu.RLock() 1187 defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1188 d, err := fs.resolveLocked(ctx, rp, &ds) 1189 if err != nil { 1190 return "", err 1191 } 1192 if !d.isSymlink() { 1193 return "", linuxerr.EINVAL 1194 } 1195 return d.readlink(ctx, rp.Mount()) 1196 } 1197 1198 // RenameAt implements vfs.FilesystemImpl.RenameAt. 1199 func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { 1200 // Resolve newParent first to verify that it's on this Mount. 1201 var ds *[]*dentry 1202 fs.renameMu.Lock() 1203 defer fs.renameMuUnlockAndCheckCaching(ctx, &ds) 1204 newParent, err := fs.walkParentDirLocked(ctx, rp, rp.Start().Impl().(*dentry), &ds) 1205 if err != nil { 1206 return err 1207 } 1208 1209 if opts.Flags&^linux.RENAME_NOREPLACE != 0 { 1210 return linuxerr.EINVAL 1211 } 1212 if fs.opts.interop == InteropModeShared && opts.Flags&linux.RENAME_NOREPLACE != 0 { 1213 // Requires 9P support to synchronize with other remote filesystem 1214 // users. 1215 return linuxerr.EINVAL 1216 } 1217 1218 newName := rp.Component() 1219 if newName == "." || newName == ".." { 1220 if opts.Flags&linux.RENAME_NOREPLACE != 0 { 1221 return syserror.EEXIST 1222 } 1223 return linuxerr.EBUSY 1224 } 1225 mnt := rp.Mount() 1226 if mnt != oldParentVD.Mount() { 1227 return linuxerr.EXDEV 1228 } 1229 if err := mnt.CheckBeginWrite(); err != nil { 1230 return err 1231 } 1232 defer mnt.EndWrite() 1233 1234 oldParent := oldParentVD.Dentry().Impl().(*dentry) 1235 if !oldParent.cachedMetadataAuthoritative() { 1236 if err := oldParent.updateFromGetattr(ctx); err != nil { 1237 return err 1238 } 1239 } 1240 creds := rp.Credentials() 1241 if err := oldParent.checkPermissions(creds, vfs.MayWrite|vfs.MayExec); err != nil { 1242 return err 1243 } 1244 1245 vfsObj := rp.VirtualFilesystem() 1246 if err := fs.revalidateOne(ctx, vfsObj, newParent, newName, &ds); err != nil { 1247 return err 1248 } 1249 if err := fs.revalidateOne(ctx, vfsObj, oldParent, oldName, &ds); err != nil { 1250 return err 1251 } 1252 1253 // We need a dentry representing the renamed file since, if it's a 1254 // directory, we need to check for write permission on it. 1255 oldParent.dirMu.Lock() 1256 defer oldParent.dirMu.Unlock() 1257 renamed, err := fs.getChildLocked(ctx, oldParent, oldName, &ds) 1258 if err != nil { 1259 return err 1260 } 1261 if err := oldParent.mayDelete(creds, renamed); err != nil { 1262 return err 1263 } 1264 if renamed.isDir() { 1265 if renamed == newParent || genericIsAncestorDentry(renamed, newParent) { 1266 return linuxerr.EINVAL 1267 } 1268 if oldParent != newParent { 1269 if err := renamed.checkPermissions(creds, vfs.MayWrite); err != nil { 1270 return err 1271 } 1272 } 1273 } else { 1274 if opts.MustBeDir || rp.MustBeDir() { 1275 return syserror.ENOTDIR 1276 } 1277 } 1278 1279 if oldParent != newParent { 1280 if err := newParent.checkPermissions(creds, vfs.MayWrite|vfs.MayExec); err != nil { 1281 return err 1282 } 1283 newParent.dirMu.Lock() 1284 defer newParent.dirMu.Unlock() 1285 } 1286 if newParent.isDeleted() { 1287 return syserror.ENOENT 1288 } 1289 replaced, err := fs.getChildLocked(ctx, newParent, newName, &ds) 1290 if err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) { 1291 return err 1292 } 1293 var replacedVFSD *vfs.Dentry 1294 if replaced != nil { 1295 if opts.Flags&linux.RENAME_NOREPLACE != 0 { 1296 return syserror.EEXIST 1297 } 1298 replacedVFSD = &replaced.vfsd 1299 if replaced.isDir() { 1300 if !renamed.isDir() { 1301 return syserror.EISDIR 1302 } 1303 if genericIsAncestorDentry(replaced, renamed) { 1304 return linuxerr.ENOTEMPTY 1305 } 1306 } else { 1307 if rp.MustBeDir() || renamed.isDir() { 1308 return syserror.ENOTDIR 1309 } 1310 } 1311 } 1312 1313 if oldParent == newParent && oldName == newName { 1314 return nil 1315 } 1316 mntns := vfs.MountNamespaceFromContext(ctx) 1317 defer mntns.DecRef(ctx) 1318 if err := vfsObj.PrepareRenameDentry(mntns, &renamed.vfsd, replacedVFSD); err != nil { 1319 return err 1320 } 1321 1322 // Update the remote filesystem. 1323 if !renamed.isSynthetic() { 1324 if err := renamed.file.rename(ctx, newParent.file, newName); err != nil { 1325 vfsObj.AbortRenameDentry(&renamed.vfsd, replacedVFSD) 1326 return err 1327 } 1328 } else if replaced != nil && !replaced.isSynthetic() { 1329 // We are replacing an existing real file with a synthetic one, so we 1330 // need to unlink the former. 1331 flags := uint32(0) 1332 if replaced.isDir() { 1333 flags = linux.AT_REMOVEDIR 1334 } 1335 if err := newParent.file.unlinkAt(ctx, newName, flags); err != nil { 1336 vfsObj.AbortRenameDentry(&renamed.vfsd, replacedVFSD) 1337 return err 1338 } 1339 } 1340 1341 // Update the dentry tree. 1342 vfsObj.CommitRenameReplaceDentry(ctx, &renamed.vfsd, replacedVFSD) 1343 if replaced != nil { 1344 replaced.setDeleted() 1345 if replaced.isSynthetic() { 1346 newParent.syntheticChildren-- 1347 replaced.decRefNoCaching() 1348 } 1349 ds = appendDentry(ds, replaced) 1350 } 1351 oldParent.cacheNegativeLookupLocked(oldName) 1352 // We don't use newParent.cacheNewChildLocked() since we don't want to mess 1353 // with reference counts and queue oldParent for checkCachingLocked if the 1354 // parent isn't actually changing. 1355 if oldParent != newParent { 1356 oldParent.decRefNoCaching() 1357 newParent.IncRef() 1358 ds = appendDentry(ds, newParent) 1359 ds = appendDentry(ds, oldParent) 1360 if renamed.isSynthetic() { 1361 oldParent.syntheticChildren-- 1362 newParent.syntheticChildren++ 1363 } 1364 renamed.parent = newParent 1365 } 1366 renamed.name = newName 1367 if newParent.children == nil { 1368 newParent.children = make(map[string]*dentry) 1369 } 1370 newParent.children[newName] = renamed 1371 1372 // Update metadata. 1373 if renamed.cachedMetadataAuthoritative() { 1374 renamed.touchCtime() 1375 } 1376 if oldParent.cachedMetadataAuthoritative() { 1377 oldParent.dirents = nil 1378 oldParent.touchCMtime() 1379 if renamed.isDir() { 1380 oldParent.decLinks() 1381 } 1382 } 1383 if newParent.cachedMetadataAuthoritative() { 1384 newParent.dirents = nil 1385 newParent.touchCMtime() 1386 if renamed.isDir() && (replaced == nil || !replaced.isDir()) { 1387 // Increase the link count if we did not replace another directory. 1388 newParent.incLinks() 1389 } 1390 } 1391 vfs.InotifyRename(ctx, &renamed.watches, &oldParent.watches, &newParent.watches, oldName, newName, renamed.isDir()) 1392 return nil 1393 } 1394 1395 // RmdirAt implements vfs.FilesystemImpl.RmdirAt. 1396 func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { 1397 return fs.unlinkAt(ctx, rp, true /* dir */) 1398 } 1399 1400 // SetStatAt implements vfs.FilesystemImpl.SetStatAt. 1401 func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { 1402 var ds *[]*dentry 1403 fs.renameMu.RLock() 1404 d, err := fs.resolveLocked(ctx, rp, &ds) 1405 if err != nil { 1406 fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1407 return err 1408 } 1409 err = d.setStat(ctx, rp.Credentials(), &opts, rp.Mount()) 1410 fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1411 if err != nil { 1412 return err 1413 } 1414 1415 if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 { 1416 d.InotifyWithParent(ctx, ev, 0, vfs.InodeEvent) 1417 } 1418 return nil 1419 } 1420 1421 // StatAt implements vfs.FilesystemImpl.StatAt. 1422 func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { 1423 var ds *[]*dentry 1424 fs.renameMu.RLock() 1425 defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1426 d, err := fs.resolveLocked(ctx, rp, &ds) 1427 if err != nil { 1428 return linux.Statx{}, err 1429 } 1430 // Since walking updates metadata for all traversed dentries under 1431 // InteropModeShared, including the returned one, we can return cached 1432 // metadata here regardless of fs.opts.interop. 1433 var stat linux.Statx 1434 d.statTo(&stat) 1435 return stat, nil 1436 } 1437 1438 // StatFSAt implements vfs.FilesystemImpl.StatFSAt. 1439 func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { 1440 var ds *[]*dentry 1441 fs.renameMu.RLock() 1442 defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1443 d, err := fs.resolveLocked(ctx, rp, &ds) 1444 if err != nil { 1445 return linux.Statfs{}, err 1446 } 1447 // If d is synthetic, invoke statfs on the first ancestor of d that isn't. 1448 for d.isSynthetic() { 1449 d = d.parent 1450 } 1451 fsstat, err := d.file.statFS(ctx) 1452 if err != nil { 1453 return linux.Statfs{}, err 1454 } 1455 nameLen := uint64(fsstat.NameLength) 1456 if nameLen > maxFilenameLen { 1457 nameLen = maxFilenameLen 1458 } 1459 return linux.Statfs{ 1460 // This is primarily for distinguishing a gofer file system in 1461 // tests. Testing is important, so instead of defining 1462 // something completely random, use a standard value. 1463 Type: linux.V9FS_MAGIC, 1464 BlockSize: int64(fsstat.BlockSize), 1465 Blocks: fsstat.Blocks, 1466 BlocksFree: fsstat.BlocksFree, 1467 BlocksAvailable: fsstat.BlocksAvailable, 1468 Files: fsstat.Files, 1469 FilesFree: fsstat.FilesFree, 1470 NameLength: nameLen, 1471 }, nil 1472 } 1473 1474 // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. 1475 func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { 1476 return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string, _ **[]*dentry) error { 1477 creds := rp.Credentials() 1478 _, err := parent.file.symlink(ctx, target, name, (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID)) 1479 return err 1480 }, nil) 1481 } 1482 1483 // UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. 1484 func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { 1485 return fs.unlinkAt(ctx, rp, false /* dir */) 1486 } 1487 1488 // BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt. 1489 func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) { 1490 var ds *[]*dentry 1491 fs.renameMu.RLock() 1492 defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1493 d, err := fs.resolveLocked(ctx, rp, &ds) 1494 if err != nil { 1495 return nil, err 1496 } 1497 if err := d.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil { 1498 return nil, err 1499 } 1500 if d.isSocket() { 1501 if !d.isSynthetic() { 1502 d.IncRef() 1503 ds = appendDentry(ds, d) 1504 return &endpoint{ 1505 dentry: d, 1506 path: opts.Addr, 1507 }, nil 1508 } 1509 if d.endpoint != nil { 1510 return d.endpoint, nil 1511 } 1512 } 1513 return nil, linuxerr.ECONNREFUSED 1514 } 1515 1516 // ListXattrAt implements vfs.FilesystemImpl.ListXattrAt. 1517 func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) { 1518 var ds *[]*dentry 1519 fs.renameMu.RLock() 1520 defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1521 d, err := fs.resolveLocked(ctx, rp, &ds) 1522 if err != nil { 1523 return nil, err 1524 } 1525 return d.listXattr(ctx, rp.Credentials(), size) 1526 } 1527 1528 // GetXattrAt implements vfs.FilesystemImpl.GetXattrAt. 1529 func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) { 1530 var ds *[]*dentry 1531 fs.renameMu.RLock() 1532 defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1533 d, err := fs.resolveLocked(ctx, rp, &ds) 1534 if err != nil { 1535 return "", err 1536 } 1537 return d.getXattr(ctx, rp.Credentials(), &opts) 1538 } 1539 1540 // SetXattrAt implements vfs.FilesystemImpl.SetXattrAt. 1541 func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error { 1542 var ds *[]*dentry 1543 fs.renameMu.RLock() 1544 d, err := fs.resolveLocked(ctx, rp, &ds) 1545 if err != nil { 1546 fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1547 return err 1548 } 1549 err = d.setXattr(ctx, rp.Credentials(), &opts) 1550 fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1551 if err != nil { 1552 return err 1553 } 1554 1555 d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent) 1556 return nil 1557 } 1558 1559 // RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt. 1560 func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { 1561 var ds *[]*dentry 1562 fs.renameMu.RLock() 1563 d, err := fs.resolveLocked(ctx, rp, &ds) 1564 if err != nil { 1565 fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1566 return err 1567 } 1568 err = d.removeXattr(ctx, rp.Credentials(), name) 1569 fs.renameMuRUnlockAndCheckCaching(ctx, &ds) 1570 if err != nil { 1571 return err 1572 } 1573 1574 d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent) 1575 return nil 1576 } 1577 1578 // PrependPath implements vfs.FilesystemImpl.PrependPath. 1579 func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error { 1580 fs.renameMu.RLock() 1581 defer fs.renameMu.RUnlock() 1582 return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b) 1583 } 1584 1585 type mopt struct { 1586 key string 1587 value interface{} 1588 } 1589 1590 func (m mopt) String() string { 1591 if m.value == nil { 1592 return fmt.Sprintf("%s", m.key) 1593 } 1594 return fmt.Sprintf("%s=%v", m.key, m.value) 1595 } 1596 1597 // MountOptions implements vfs.FilesystemImpl.MountOptions. 1598 func (fs *filesystem) MountOptions() string { 1599 optsKV := []mopt{ 1600 {moptTransport, transportModeFD}, // Only valid value, currently. 1601 {moptReadFD, fs.opts.fd}, // Currently, read and write FD are the same. 1602 {moptWriteFD, fs.opts.fd}, // Currently, read and write FD are the same. 1603 {moptAname, fs.opts.aname}, 1604 {moptDfltUID, fs.opts.dfltuid}, 1605 {moptDfltGID, fs.opts.dfltgid}, 1606 {moptMsize, fs.opts.msize}, 1607 {moptVersion, fs.opts.version}, 1608 {moptDentryCacheLimit, fs.opts.maxCachedDentries}, 1609 } 1610 1611 switch fs.opts.interop { 1612 case InteropModeExclusive: 1613 optsKV = append(optsKV, mopt{moptCache, cacheFSCache}) 1614 case InteropModeWritethrough: 1615 optsKV = append(optsKV, mopt{moptCache, cacheFSCacheWritethrough}) 1616 case InteropModeShared: 1617 if fs.opts.regularFilesUseSpecialFileFD { 1618 optsKV = append(optsKV, mopt{moptCache, cacheNone}) 1619 } else { 1620 optsKV = append(optsKV, mopt{moptCache, cacheRemoteRevalidating}) 1621 } 1622 } 1623 if fs.opts.forcePageCache { 1624 optsKV = append(optsKV, mopt{moptForcePageCache, nil}) 1625 } 1626 if fs.opts.limitHostFDTranslation { 1627 optsKV = append(optsKV, mopt{moptLimitHostFDTranslation, nil}) 1628 } 1629 if fs.opts.overlayfsStaleRead { 1630 optsKV = append(optsKV, mopt{moptOverlayfsStaleRead, nil}) 1631 } 1632 1633 opts := make([]string, 0, len(optsKV)) 1634 for _, opt := range optsKV { 1635 opts = append(opts, opt.String()) 1636 } 1637 return strings.Join(opts, ",") 1638 }