github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/fsimpl/gofer/directfs_dentry.go (about) 1 // Copyright 2022 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package gofer 16 17 import ( 18 "fmt" 19 "math" 20 "path" 21 "path/filepath" 22 23 "golang.org/x/sys/unix" 24 "github.com/metacubex/gvisor/pkg/abi/linux" 25 "github.com/metacubex/gvisor/pkg/atomicbitops" 26 "github.com/metacubex/gvisor/pkg/context" 27 "github.com/metacubex/gvisor/pkg/fsutil" 28 "github.com/metacubex/gvisor/pkg/lisafs" 29 "github.com/metacubex/gvisor/pkg/log" 30 "github.com/metacubex/gvisor/pkg/sentry/kernel/auth" 31 "github.com/metacubex/gvisor/pkg/sentry/socket/unix/transport" 32 "github.com/metacubex/gvisor/pkg/sentry/vfs" 33 ) 34 35 // LINT.IfChange 36 37 const ( 38 hostOpenFlags = unix.O_NOFOLLOW | unix.O_CLOEXEC 39 ) 40 41 // tryOpen tries to open() with different modes in the following order: 42 // 1. RDONLY | NONBLOCK: for all files, directories, ro mounts, FIFOs. 43 // Use non-blocking to prevent getting stuck inside open(2) for 44 // FIFOs. This option has no effect on regular files. 45 // 2. PATH: for symlinks, sockets. 46 func tryOpen(open func(int) (int, error)) (int, error) { 47 flags := []int{ 48 unix.O_RDONLY | unix.O_NONBLOCK, 49 unix.O_PATH, 50 } 51 52 var ( 53 hostFD int 54 err error 55 ) 56 for _, flag := range flags { 57 hostFD, err = open(flag | hostOpenFlags) 58 if err == nil { 59 return hostFD, nil 60 } 61 62 if err == unix.ENOENT { 63 // File doesn't exist, no point in retrying. 64 break 65 } 66 } 67 return -1, err 68 } 69 70 // getDirectfsRootDentry creates a new dentry representing the root dentry for 71 // this mountpoint. getDirectfsRootDentry takes ownership of rootHostFD and 72 // rootControlFD. 73 func (fs *filesystem) getDirectfsRootDentry(ctx context.Context, rootHostFD int, rootControlFD lisafs.ClientFD) (*dentry, error) { 74 d, err := fs.newDirectfsDentry(rootHostFD) 75 if err != nil { 76 log.Warningf("newDirectfsDentry failed for mount point dentry: %v", err) 77 rootControlFD.Close(ctx, false /* flush */) 78 return nil, err 79 } 80 d.impl.(*directfsDentry).controlFDLisa = rootControlFD 81 return d, nil 82 } 83 84 // directfsDentry is a host dentry implementation. It represents a dentry 85 // backed by a host file descriptor. All operations are directly performed on 86 // the host. A gofer is only involved for some operations on the mount point 87 // dentry (when dentry.parent = nil). We are forced to fall back to the gofer 88 // due to the lack of procfs in the sandbox process. 89 // 90 // +stateify savable 91 type directfsDentry struct { 92 dentry 93 94 // controlFD is the host FD to this file. controlFD is immutable until 95 // destruction, which is synchronized with dentry.handleMu. 96 controlFD int 97 98 // controlFDLisa is a lisafs control FD on this dentry. 99 // This is used to fallback to using lisafs RPCs in the following cases: 100 // * When parent dentry is required to perform operations but 101 // dentry.parent = nil (root dentry). 102 // * For path-based syscalls (like connect(2) and bind(2)) on sockets. 103 // 104 // For the root dentry, controlFDLisa is always set and is immutable. 105 // For sockets, controlFDLisa is protected by dentry.handleMu and is 106 // immutable after initialization. 107 controlFDLisa lisafs.ClientFD `state:"nosave"` 108 } 109 110 // newDirectfsDentry creates a new dentry representing the given file. The dentry 111 // initially has no references, but is not cached; it is the caller's 112 // responsibility to set the dentry's reference count and/or call 113 // dentry.checkCachingLocked() as appropriate. 114 // newDirectDentry takes ownership of controlFD. 115 func (fs *filesystem) newDirectfsDentry(controlFD int) (*dentry, error) { 116 var stat unix.Stat_t 117 if err := unix.Fstat(controlFD, &stat); err != nil { 118 log.Warningf("failed to fstat(2) FD %d: %v", controlFD, err) 119 _ = unix.Close(controlFD) 120 return nil, err 121 } 122 inoKey := inoKeyFromStat(&stat) 123 d := &directfsDentry{ 124 dentry: dentry{ 125 fs: fs, 126 inoKey: inoKey, 127 ino: fs.inoFromKey(inoKey), 128 mode: atomicbitops.FromUint32(stat.Mode), 129 uid: atomicbitops.FromUint32(stat.Uid), 130 gid: atomicbitops.FromUint32(stat.Gid), 131 blockSize: atomicbitops.FromUint32(uint32(stat.Blksize)), 132 readFD: atomicbitops.FromInt32(-1), 133 writeFD: atomicbitops.FromInt32(-1), 134 mmapFD: atomicbitops.FromInt32(-1), 135 size: atomicbitops.FromUint64(uint64(stat.Size)), 136 atime: atomicbitops.FromInt64(dentryTimestampFromUnix(stat.Atim)), 137 mtime: atomicbitops.FromInt64(dentryTimestampFromUnix(stat.Mtim)), 138 ctime: atomicbitops.FromInt64(dentryTimestampFromUnix(stat.Ctim)), 139 nlink: atomicbitops.FromUint32(uint32(stat.Nlink)), 140 }, 141 controlFD: controlFD, 142 } 143 d.dentry.init(d) 144 fs.syncMu.Lock() 145 fs.syncableDentries.PushBack(&d.syncableListEntry) 146 fs.syncMu.Unlock() 147 return &d.dentry, nil 148 } 149 150 // Precondition: fs.renameMu is locked. 151 func (d *directfsDentry) openHandle(ctx context.Context, flags uint32) (handle, error) { 152 parent := d.parent.Load() 153 if parent == nil { 154 // This is a mount point. We don't have parent. Fallback to using lisafs. 155 if !d.controlFDLisa.Ok() { 156 panic("directfsDentry.controlFDLisa is not set for mount point dentry") 157 } 158 openFD, hostFD, err := d.controlFDLisa.OpenAt(ctx, flags) 159 if err != nil { 160 return noHandle, err 161 } 162 d.fs.client.CloseFD(ctx, openFD, true /* flush */) 163 if hostFD < 0 { 164 log.Warningf("gofer did not donate an FD for mount point") 165 return noHandle, unix.EIO 166 } 167 return handle{fd: int32(hostFD)}, nil 168 } 169 170 // The only way to re-open an FD with different flags is via procfs or 171 // openat(2) from the parent. Procfs does not exist here. So use parent. 172 flags |= hostOpenFlags 173 openFD, err := unix.Openat(parent.impl.(*directfsDentry).controlFD, d.name, int(flags), 0) 174 if err != nil { 175 return noHandle, err 176 } 177 return handle{fd: int32(openFD)}, nil 178 } 179 180 // Precondition: fs.renameMu is locked. 181 func (d *directfsDentry) ensureLisafsControlFD(ctx context.Context) error { 182 d.handleMu.Lock() 183 defer d.handleMu.Unlock() 184 if d.controlFDLisa.Ok() { 185 return nil 186 } 187 188 var names []string 189 root := d 190 for root.parent.Load() != nil { 191 names = append(names, root.name) 192 root = root.parent.Load().impl.(*directfsDentry) 193 } 194 if !root.controlFDLisa.Ok() { 195 panic("controlFDLisa is not set for mount point dentry") 196 } 197 if len(names) == 0 { 198 return nil // d == root 199 } 200 // Reverse names. 201 last := len(names) - 1 202 for i := 0; i < len(names)/2; i++ { 203 names[i], names[last-i] = names[last-i], names[i] 204 } 205 status, inodes, err := root.controlFDLisa.WalkMultiple(ctx, names) 206 if err != nil { 207 return err 208 } 209 defer func() { 210 // Close everything except for inodes[last] if it exists. 211 for i := 0; i < len(inodes) && i < last; i++ { 212 flush := i == last-1 || i == len(inodes)-1 213 d.fs.client.CloseFD(ctx, inodes[i].ControlFD, flush) 214 } 215 }() 216 switch status { 217 case lisafs.WalkComponentDoesNotExist: 218 return unix.ENOENT 219 case lisafs.WalkComponentSymlink: 220 log.Warningf("intermediate path component was a symlink? names = %v, inodes = %+v", names, inodes) 221 return unix.ELOOP 222 case lisafs.WalkSuccess: 223 d.controlFDLisa = d.fs.client.NewFD(inodes[last].ControlFD) 224 return nil 225 } 226 panic("unreachable") 227 } 228 229 // Precondition: d.metadataMu must be locked. 230 // 231 // +checklocks:d.metadataMu 232 func (d *directfsDentry) updateMetadataLocked(h handle) error { 233 handleMuRLocked := false 234 if h.fd < 0 { 235 // Use open FDs in preferenece to the control FD. Control FDs may be opened 236 // with O_PATH. This may be significantly more efficient in some 237 // implementations. Prefer a writable FD over a readable one since some 238 // filesystem implementations may update a writable FD's metadata after 239 // writes, without making metadata updates immediately visible to read-only 240 // FDs representing the same file. 241 d.handleMu.RLock() 242 switch { 243 case d.writeFD.RacyLoad() >= 0: 244 h.fd = d.writeFD.RacyLoad() 245 handleMuRLocked = true 246 case d.readFD.RacyLoad() >= 0: 247 h.fd = d.readFD.RacyLoad() 248 handleMuRLocked = true 249 default: 250 h.fd = int32(d.controlFD) 251 d.handleMu.RUnlock() 252 } 253 } 254 255 var stat unix.Stat_t 256 err := unix.Fstat(int(h.fd), &stat) 257 if handleMuRLocked { 258 // handleMu must be released before updateMetadataFromStatLocked(). 259 d.handleMu.RUnlock() // +checklocksforce: complex case. 260 } 261 if err != nil { 262 return err 263 } 264 return d.updateMetadataFromStatLocked(&stat) 265 } 266 267 // Precondition: fs.renameMu is locked if d is a socket. 268 func (d *directfsDentry) chmod(ctx context.Context, mode uint16) error { 269 if !d.isSocket() { 270 return unix.Fchmod(d.controlFD, uint32(mode)) 271 } 272 273 // fchmod(2) on socket files created via bind(2) fails. We need to 274 // fchmodat(2) it from its parent. 275 if parent := d.parent.Load(); parent != nil { 276 // We have parent FD, just use that. Note that AT_SYMLINK_NOFOLLOW flag is 277 // currently not supported. So we don't use it. 278 return unix.Fchmodat(parent.impl.(*directfsDentry).controlFD, d.name, uint32(mode), 0 /* flags */) 279 } 280 281 // This is a mount point socket. We don't have a parent FD. Fallback to using 282 // lisafs. 283 if !d.controlFDLisa.Ok() { 284 panic("directfsDentry.controlFDLisa is not set for mount point socket") 285 } 286 287 return chmod(ctx, d.controlFDLisa, mode) 288 } 289 290 // Preconditions: 291 // - d.handleMu is locked if d is a regular file. 292 // - fs.renameMu is locked if d is a symlink. 293 func (d *directfsDentry) utimensat(ctx context.Context, stat *linux.Statx) error { 294 if stat.Mask&(linux.STATX_ATIME|linux.STATX_MTIME) == 0 { 295 return nil 296 } 297 298 utimes := [2]unix.Timespec{ 299 {Sec: 0, Nsec: unix.UTIME_OMIT}, 300 {Sec: 0, Nsec: unix.UTIME_OMIT}, 301 } 302 if stat.Mask&unix.STATX_ATIME != 0 { 303 utimes[0].Sec = stat.Atime.Sec 304 utimes[0].Nsec = int64(stat.Atime.Nsec) 305 } 306 if stat.Mask&unix.STATX_MTIME != 0 { 307 utimes[1].Sec = stat.Mtime.Sec 308 utimes[1].Nsec = int64(stat.Mtime.Nsec) 309 } 310 311 if !d.isSymlink() { 312 hostFD := d.controlFD 313 if d.isRegularFile() { 314 // utimensat(2) requires a writable FD for regular files. See BUGS 315 // section. dentry.prepareSetStat() should have acquired a writable FD. 316 hostFD = int(d.writeFD.RacyLoad()) 317 } 318 // Non-symlinks can operate directly on the fd using an empty name. 319 return fsutil.Utimensat(hostFD, "", utimes, 0) 320 } 321 322 // utimensat operates different that other syscalls. To operate on a 323 // symlink it *requires* AT_SYMLINK_NOFOLLOW with dirFD and a non-empty 324 // name. 325 if parent := d.parent.Load(); parent != nil { 326 return fsutil.Utimensat(parent.impl.(*directfsDentry).controlFD, d.name, utimes, unix.AT_SYMLINK_NOFOLLOW) 327 } 328 329 // This is a mount point symlink. We don't have a parent FD. Fallback to 330 // using lisafs. 331 if !d.controlFDLisa.Ok() { 332 panic("directfsDentry.controlFDLisa is not set for mount point symlink") 333 } 334 335 setStat := linux.Statx{ 336 Mask: stat.Mask & (linux.STATX_ATIME | linux.STATX_MTIME), 337 Atime: stat.Atime, 338 Mtime: stat.Mtime, 339 } 340 _, failureErr, err := d.controlFDLisa.SetStat(ctx, &setStat) 341 if err != nil { 342 return err 343 } 344 return failureErr 345 } 346 347 // Precondition: fs.renameMu is locked. 348 func (d *directfsDentry) prepareSetStat(ctx context.Context, stat *linux.Statx) error { 349 if stat.Mask&unix.STATX_SIZE != 0 || 350 (stat.Mask&(unix.STATX_ATIME|unix.STATX_MTIME) != 0 && d.isRegularFile()) { 351 // Need to ensure a writable FD is available. See setStatLocked() to 352 // understand why. 353 return d.ensureSharedHandle(ctx, false /* read */, true /* write */, false /* trunc */) 354 } 355 return nil 356 } 357 358 // Preconditions: 359 // - d.handleMu is locked. 360 // - fs.renameMu is locked. 361 func (d *directfsDentry) setStatLocked(ctx context.Context, stat *linux.Statx) (failureMask uint32, failureErr error) { 362 if stat.Mask&unix.STATX_MODE != 0 { 363 if err := d.chmod(ctx, stat.Mode&^unix.S_IFMT); err != nil { 364 failureMask |= unix.STATX_MODE 365 failureErr = err 366 } 367 } 368 369 if stat.Mask&unix.STATX_SIZE != 0 { 370 // ftruncate(2) requires a writable FD. 371 if err := unix.Ftruncate(int(d.writeFD.RacyLoad()), int64(stat.Size)); err != nil { 372 failureMask |= unix.STATX_SIZE 373 failureErr = err 374 } 375 } 376 377 if err := d.utimensat(ctx, stat); err != nil { 378 failureMask |= (stat.Mask & (unix.STATX_ATIME | unix.STATX_MTIME)) 379 failureErr = err 380 } 381 382 if stat.Mask&(unix.STATX_UID|unix.STATX_GID) != 0 { 383 // "If the owner or group is specified as -1, then that ID is not changed" 384 // - chown(2) 385 uid := -1 386 if stat.Mask&unix.STATX_UID != 0 { 387 uid = int(stat.UID) 388 } 389 gid := -1 390 if stat.Mask&unix.STATX_GID != 0 { 391 gid = int(stat.GID) 392 } 393 if err := fchown(d.controlFD, uid, gid); err != nil { 394 failureMask |= stat.Mask & (unix.STATX_UID | unix.STATX_GID) 395 failureErr = err 396 } 397 } 398 return 399 } 400 401 func fchown(fd, uid, gid int) error { 402 return unix.Fchownat(fd, "", uid, gid, unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW) 403 } 404 405 // Precondition: d.handleMu must be locked. 406 func (d *directfsDentry) destroy(ctx context.Context) { 407 if d.controlFD >= 0 { 408 _ = unix.Close(d.controlFD) 409 d.controlFD = -1 410 } 411 if d.controlFDLisa.Ok() { 412 d.controlFDLisa.Close(ctx, true /* flush */) 413 } 414 } 415 416 func (d *directfsDentry) getHostChild(name string) (*dentry, error) { 417 childFD, err := tryOpen(func(flags int) (int, error) { 418 return unix.Openat(d.controlFD, name, flags, 0) 419 }) 420 if err != nil { 421 return nil, err 422 } 423 return d.fs.newDirectfsDentry(childFD) 424 } 425 426 func (d *directfsDentry) getXattr(name string, size uint64) (string, error) { 427 data := make([]byte, size) 428 if _, err := unix.Fgetxattr(d.controlFD, name, data); err != nil { 429 return "", err 430 } 431 return string(data), nil 432 } 433 434 // getCreatedChild opens the newly created child, sets its uid/gid, constructs 435 // a disconnected dentry and returns it. 436 func (d *directfsDentry) getCreatedChild(name string, uid, gid int, isDir bool) (*dentry, error) { 437 unlinkFlags := 0 438 extraOpenFlags := 0 439 if isDir { 440 extraOpenFlags |= unix.O_DIRECTORY 441 unlinkFlags |= unix.AT_REMOVEDIR 442 } 443 deleteChild := func() { 444 // Best effort attempt to remove the newly created child on failure. 445 if err := unix.Unlinkat(d.controlFD, name, unlinkFlags); err != nil { 446 log.Warningf("error unlinking newly created child %q after failure: %v", filepath.Join(genericDebugPathname(&d.dentry), name), err) 447 } 448 } 449 450 childFD, err := tryOpen(func(flags int) (int, error) { 451 return unix.Openat(d.controlFD, name, flags|extraOpenFlags, 0) 452 }) 453 if err != nil { 454 deleteChild() 455 return nil, err 456 } 457 458 // "If the owner or group is specified as -1, then that ID is not changed" 459 // - chown(2). Only bother making the syscall if the owner is changing. 460 if uid != -1 || gid != -1 { 461 if err := fchown(childFD, uid, gid); err != nil { 462 deleteChild() 463 _ = unix.Close(childFD) 464 return nil, err 465 } 466 } 467 child, err := d.fs.newDirectfsDentry(childFD) 468 if err != nil { 469 // Ownership of childFD was passed to newDirectDentry(), so no need to 470 // clean that up. 471 deleteChild() 472 return nil, err 473 } 474 return child, nil 475 } 476 477 func (d *directfsDentry) mknod(ctx context.Context, name string, creds *auth.Credentials, opts *vfs.MknodOptions) (*dentry, error) { 478 if _, ok := opts.Endpoint.(transport.HostBoundEndpoint); ok { 479 return d.bindAt(ctx, name, creds, opts) 480 } 481 482 // From mknod(2) man page: 483 // "EPERM: [...] if the filesystem containing pathname does not support 484 // the type of node requested." 485 if opts.Mode.FileType() != linux.ModeRegular { 486 return nil, unix.EPERM 487 } 488 489 if err := unix.Mknodat(d.controlFD, name, uint32(opts.Mode), 0); err != nil { 490 return nil, err 491 } 492 return d.getCreatedChild(name, int(creds.EffectiveKUID), int(creds.EffectiveKGID), false /* isDir */) 493 } 494 495 // Precondition: opts.Endpoint != nil and is transport.HostBoundEndpoint type. 496 func (d *directfsDentry) bindAt(ctx context.Context, name string, creds *auth.Credentials, opts *vfs.MknodOptions) (*dentry, error) { 497 // There are no filesystems mounted in the sandbox process's mount namespace. 498 // So we can't perform absolute path traversals. So fallback to using lisafs. 499 if err := d.ensureLisafsControlFD(ctx); err != nil { 500 return nil, err 501 } 502 sockType := opts.Endpoint.(transport.Endpoint).Type() 503 childInode, boundSocketFD, err := d.controlFDLisa.BindAt(ctx, sockType, name, opts.Mode, lisafs.UID(creds.EffectiveKUID), lisafs.GID(creds.EffectiveKGID)) 504 if err != nil { 505 return nil, err 506 } 507 d.fs.client.CloseFD(ctx, childInode.ControlFD, true /* flush */) 508 // Update opts.Endpoint that it is bound. 509 hbep := opts.Endpoint.(transport.HostBoundEndpoint) 510 if err := hbep.SetBoundSocketFD(ctx, boundSocketFD); err != nil { 511 if err := unix.Unlinkat(d.controlFD, name, 0); err != nil { 512 log.Warningf("error unlinking newly created socket %q after failure: %v", filepath.Join(genericDebugPathname(&d.dentry), name), err) 513 } 514 return nil, err 515 } 516 // Socket already has the right UID/GID set, so use uid = gid = -1. 517 child, err := d.getCreatedChild(name, -1 /* uid */, -1 /* gid */, false /* isDir */) 518 if err != nil { 519 hbep.ResetBoundSocketFD(ctx) 520 return nil, err 521 } 522 // Set the endpoint on the newly created child dentry. 523 child.endpoint = opts.Endpoint 524 return child, nil 525 } 526 527 // Precondition: d.fs.renameMu must be locked. 528 func (d *directfsDentry) link(target *directfsDentry, name string) (*dentry, error) { 529 // Using linkat(targetFD, "", newdirfd, name, AT_EMPTY_PATH) requires 530 // CAP_DAC_READ_SEARCH in the *root* userns. With directfs, the sandbox 531 // process has CAP_DAC_READ_SEARCH in its own userns. But the sandbox is 532 // running in a different userns. So we can't use AT_EMPTY_PATH. Fallback to 533 // using olddirfd to call linkat(2). 534 // Also note that d and target are from the same mount. Given target is a 535 // non-directory and d is a directory, target.parent must exist. 536 if err := unix.Linkat(target.parent.Load().impl.(*directfsDentry).controlFD, target.name, d.controlFD, name, 0); err != nil { 537 return nil, err 538 } 539 // Note that we don't need to set uid/gid for the new child. This is a hard 540 // link. The original file already has the right owner. 541 // TODO(gvisor.dev/issue/6739): Hard linked dentries should share the same 542 // inode fields. 543 return d.getCreatedChild(name, -1 /* uid */, -1 /* gid */, false /* isDir */) 544 } 545 546 func (d *directfsDentry) mkdir(name string, mode linux.FileMode, uid auth.KUID, gid auth.KGID) (*dentry, error) { 547 if err := unix.Mkdirat(d.controlFD, name, uint32(mode)); err != nil { 548 return nil, err 549 } 550 return d.getCreatedChild(name, int(uid), int(gid), true /* isDir */) 551 } 552 553 func (d *directfsDentry) symlink(name, target string, creds *auth.Credentials) (*dentry, error) { 554 if err := unix.Symlinkat(target, d.controlFD, name); err != nil { 555 return nil, err 556 } 557 return d.getCreatedChild(name, int(creds.EffectiveKUID), int(creds.EffectiveKGID), false /* isDir */) 558 } 559 560 func (d *directfsDentry) openCreate(name string, accessFlags uint32, mode linux.FileMode, uid auth.KUID, gid auth.KGID) (*dentry, handle, error) { 561 createFlags := unix.O_CREAT | unix.O_EXCL | int(accessFlags) | hostOpenFlags 562 childHandleFD, err := unix.Openat(d.controlFD, name, createFlags, uint32(mode&^linux.FileTypeMask)) 563 if err != nil { 564 return nil, noHandle, err 565 } 566 567 child, err := d.getCreatedChild(name, int(uid), int(gid), false /* isDir */) 568 if err != nil { 569 _ = unix.Close(childHandleFD) 570 return nil, noHandle, err 571 } 572 return child, handle{fd: int32(childHandleFD)}, nil 573 } 574 575 func (d *directfsDentry) getDirentsLocked(recordDirent func(name string, key inoKey, dType uint8)) error { 576 readFD := int(d.readFD.RacyLoad()) 577 if _, err := unix.Seek(readFD, 0, 0); err != nil { 578 return err 579 } 580 581 return fsutil.ForEachDirent(readFD, func(ino uint64, off int64, ftype uint8, name string, reclen uint16) { 582 // We also want the device ID, which annoyingly incurs an additional 583 // syscall per dirent. 584 // TODO(gvisor.dev/issue/6665): Get rid of per-dirent stat. 585 stat, err := fsutil.StatAt(d.controlFD, name) 586 if err != nil { 587 log.Warningf("Getdent64: skipping file %q with failed stat, err: %v", path.Join(genericDebugPathname(&d.dentry), name), err) 588 return 589 } 590 recordDirent(name, inoKeyFromStat(&stat), ftype) 591 }) 592 } 593 594 // Precondition: fs.renameMu is locked. 595 func (d *directfsDentry) connect(ctx context.Context, sockType linux.SockType) (int, error) { 596 // There are no filesystems mounted in the sandbox process's mount namespace. 597 // So we can't perform absolute path traversals. So fallback to using lisafs. 598 if err := d.ensureLisafsControlFD(ctx); err != nil { 599 return -1, err 600 } 601 return d.controlFDLisa.Connect(ctx, sockType) 602 } 603 604 func (d *directfsDentry) readlink() (string, error) { 605 // This is similar to what os.Readlink does. 606 for linkLen := 128; linkLen < math.MaxUint16; linkLen *= 2 { 607 b := make([]byte, linkLen) 608 n, err := unix.Readlinkat(d.controlFD, "", b) 609 610 if err != nil { 611 return "", err 612 } 613 if n < int(linkLen) { 614 return string(b[:n]), nil 615 } 616 } 617 return "", unix.ENOMEM 618 } 619 620 func (d *directfsDentry) statfs() (linux.Statfs, error) { 621 var statFS unix.Statfs_t 622 if err := unix.Fstatfs(d.controlFD, &statFS); err != nil { 623 return linux.Statfs{}, err 624 } 625 return linux.Statfs{ 626 BlockSize: statFS.Bsize, 627 FragmentSize: statFS.Bsize, 628 Blocks: statFS.Blocks, 629 BlocksFree: statFS.Bfree, 630 BlocksAvailable: statFS.Bavail, 631 Files: statFS.Files, 632 FilesFree: statFS.Ffree, 633 NameLength: uint64(statFS.Namelen), 634 }, nil 635 } 636 637 func (d *directfsDentry) restoreFile(ctx context.Context, controlFD int, opts *vfs.CompleteRestoreOptions) error { 638 if controlFD < 0 { 639 log.Warningf("directfsDentry.restoreFile called with invalid controlFD") 640 return unix.EINVAL 641 } 642 var stat unix.Stat_t 643 if err := unix.Fstat(controlFD, &stat); err != nil { 644 _ = unix.Close(controlFD) 645 return err 646 } 647 648 d.controlFD = controlFD 649 // We do not preserve inoKey across checkpoint/restore, so: 650 // 651 // - We must assume that the host filesystem did not change in a way that 652 // would invalidate dentries, since we can't revalidate dentries by 653 // checking inoKey. 654 // 655 // - We need to associate the new inoKey with the existing d.ino. 656 d.inoKey = inoKeyFromStat(&stat) 657 d.fs.inoMu.Lock() 658 d.fs.inoByKey[d.inoKey] = d.ino 659 d.fs.inoMu.Unlock() 660 661 // Check metadata stability before updating metadata. 662 d.metadataMu.Lock() 663 defer d.metadataMu.Unlock() 664 if d.isRegularFile() { 665 if opts.ValidateFileSizes { 666 if d.size.RacyLoad() != uint64(stat.Size) { 667 return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: file size validation failed: size changed from %d to %d", genericDebugPathname(&d.dentry), d.size.Load(), stat.Size)} 668 } 669 } 670 if opts.ValidateFileModificationTimestamps { 671 if want := dentryTimestampFromUnix(stat.Mtim); d.mtime.RacyLoad() != want { 672 return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: mtime validation failed: mtime changed from %+v to %+v", genericDebugPathname(&d.dentry), linux.NsecToStatxTimestamp(d.mtime.RacyLoad()), linux.NsecToStatxTimestamp(want))} 673 } 674 } 675 } 676 if !d.cachedMetadataAuthoritative() { 677 d.updateMetadataFromStatLocked(&stat) 678 } 679 680 if rw, ok := d.fs.savedDentryRW[&d.dentry]; ok { 681 if err := d.ensureSharedHandle(ctx, rw.read, rw.write, false /* trunc */); err != nil { 682 return err 683 } 684 } 685 686 return nil 687 } 688 689 // doRevalidationDirectfs stats all dentries in `state`. It will update or 690 // invalidate dentries in the cache based on the result. 691 // 692 // Preconditions: 693 // - fs.renameMu must be locked. 694 // - InteropModeShared is in effect. 695 func doRevalidationDirectfs(ctx context.Context, vfsObj *vfs.VirtualFilesystem, state *revalidateState, ds **[]*dentry) error { 696 // Explicitly declare start dentry, instead of using the function receiver. 697 // The function receiver has to be named `d` (to be consistent with other 698 // receivers). But `d` variable is also used below in various places. This 699 // helps with readability and makes code less error prone. 700 start := state.start.impl.(*directfsDentry) 701 if state.refreshStart { 702 start.updateMetadata(ctx) 703 } 704 705 parent := start 706 for _, d := range state.dentries { 707 childFD, err := unix.Openat(parent.controlFD, d.name, unix.O_PATH|hostOpenFlags, 0) 708 if err != nil && err != unix.ENOENT { 709 return err 710 } 711 712 var stat unix.Stat_t 713 // Lock metadata *before* getting attributes for d. 714 d.metadataMu.Lock() 715 found := err == nil 716 if found { 717 err = unix.Fstat(childFD, &stat) 718 _ = unix.Close(childFD) 719 if err != nil { 720 d.metadataMu.Unlock() 721 return err 722 } 723 } 724 725 // Note that synthetic dentries will always fail this comparison check. 726 if !found || d.inoKey != inoKeyFromStat(&stat) { 727 d.metadataMu.Unlock() 728 if !found && d.isSynthetic() { 729 // We have a synthetic file, and no remote file has arisen to replace 730 // it. 731 return nil 732 } 733 // The file at this path has changed or no longer exists. Mark the 734 // dentry invalidated. 735 d.invalidate(ctx, vfsObj, ds) 736 return nil 737 } 738 739 // The file at this path hasn't changed. Just update cached metadata. 740 d.impl.(*directfsDentry).updateMetadataFromStatLocked(&stat) // +checklocksforce: d.metadataMu is locked above. 741 d.metadataMu.Unlock() 742 743 // Advance parent. 744 parent = d.impl.(*directfsDentry) 745 } 746 return nil 747 } 748 749 // LINT.ThenChange(../../../../runsc/fsgofer/lisafs.go)