github.com/ttpreport/gvisor-ligolo@v0.0.0-20240123134145-a858404967ba/pkg/sentry/fsimpl/gofer/directfs_dentry.go (about) 1 // Copyright 2022 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package gofer 16 17 import ( 18 "fmt" 19 "math" 20 "path" 21 "path/filepath" 22 23 "github.com/ttpreport/gvisor-ligolo/pkg/abi/linux" 24 "github.com/ttpreport/gvisor-ligolo/pkg/atomicbitops" 25 "github.com/ttpreport/gvisor-ligolo/pkg/context" 26 "github.com/ttpreport/gvisor-ligolo/pkg/fsutil" 27 "github.com/ttpreport/gvisor-ligolo/pkg/lisafs" 28 "github.com/ttpreport/gvisor-ligolo/pkg/log" 29 "github.com/ttpreport/gvisor-ligolo/pkg/sentry/kernel/auth" 30 "github.com/ttpreport/gvisor-ligolo/pkg/sentry/socket/unix/transport" 31 "github.com/ttpreport/gvisor-ligolo/pkg/sentry/vfs" 32 "golang.org/x/sys/unix" 33 ) 34 35 // LINT.IfChange 36 37 const ( 38 hostOpenFlags = unix.O_NOFOLLOW | unix.O_CLOEXEC 39 ) 40 41 // tryOpen tries to open() with different modes in the following order: 42 // 1. RDONLY | NONBLOCK: for all files, directories, ro mounts, FIFOs. 43 // Use non-blocking to prevent getting stuck inside open(2) for 44 // FIFOs. This option has no effect on regular files. 45 // 2. PATH: for symlinks, sockets. 46 func tryOpen(open func(int) (int, error)) (int, error) { 47 flags := []int{ 48 unix.O_RDONLY | unix.O_NONBLOCK, 49 unix.O_PATH, 50 } 51 52 var ( 53 hostFD int 54 err error 55 ) 56 for _, flag := range flags { 57 hostFD, err = open(flag | hostOpenFlags) 58 if err == nil { 59 return hostFD, nil 60 } 61 62 if err == unix.ENOENT { 63 // File doesn't exist, no point in retrying. 64 break 65 } 66 } 67 return -1, err 68 } 69 70 // getDirectfsRootDentry creates a new dentry representing the root dentry for 71 // this mountpoint. getDirectfsRootDentry takes ownership of rootHostFD and 72 // rootControlFD. 73 func (fs *filesystem) getDirectfsRootDentry(ctx context.Context, rootHostFD int, rootControlFD lisafs.ClientFD) (*dentry, error) { 74 d, err := fs.newDirectfsDentry(rootHostFD) 75 if err != nil { 76 log.Warningf("newDirectfsDentry failed for mount point dentry: %v", err) 77 rootControlFD.Close(ctx, false /* flush */) 78 return nil, err 79 } 80 d.impl.(*directfsDentry).controlFDLisa = rootControlFD 81 return d, nil 82 } 83 84 // directfsDentry is a host dentry implementation. It represents a dentry 85 // backed by a host file descriptor. All operations are directly performed on 86 // the host. A gofer is only involved for some operations on the mount point 87 // dentry (when dentry.parent = nil). We are forced to fall back to the gofer 88 // due to the lack of procfs in the sandbox process. 89 // 90 // +stateify savable 91 type directfsDentry struct { 92 dentry 93 94 // controlFD is the host FD to this file. controlFD is immutable. 95 controlFD int 96 97 // controlFDLisa is a lisafs control FD on this dentry. 98 // This is used to fallback to using lisafs RPCs in the following cases: 99 // * When parent dentry is required to perform operations but 100 // dentry.parent = nil (root dentry). 101 // * For path-based syscalls (like connect(2) and bind(2)) on sockets. 102 // 103 // For the root dentry, controlFDLisa is always set and is immutable. 104 // For sockets, controlFDLisa is protected by dentry.handleMu and is 105 // immutable after initialization. 106 controlFDLisa lisafs.ClientFD `state:"nosave"` 107 } 108 109 // newDirectfsDentry creates a new dentry representing the given file. The dentry 110 // initially has no references, but is not cached; it is the caller's 111 // responsibility to set the dentry's reference count and/or call 112 // dentry.checkCachingLocked() as appropriate. 113 // newDirectDentry takes ownership of controlFD. 114 func (fs *filesystem) newDirectfsDentry(controlFD int) (*dentry, error) { 115 var stat unix.Stat_t 116 if err := unix.Fstat(controlFD, &stat); err != nil { 117 log.Warningf("failed to fstat(2) FD %d: %v", controlFD, err) 118 _ = unix.Close(controlFD) 119 return nil, err 120 } 121 inoKey := inoKeyFromStat(&stat) 122 d := &directfsDentry{ 123 dentry: dentry{ 124 fs: fs, 125 inoKey: inoKey, 126 ino: fs.inoFromKey(inoKey), 127 mode: atomicbitops.FromUint32(stat.Mode), 128 uid: atomicbitops.FromUint32(stat.Uid), 129 gid: atomicbitops.FromUint32(stat.Gid), 130 blockSize: atomicbitops.FromUint32(uint32(stat.Blksize)), 131 readFD: atomicbitops.FromInt32(-1), 132 writeFD: atomicbitops.FromInt32(-1), 133 mmapFD: atomicbitops.FromInt32(-1), 134 size: atomicbitops.FromUint64(uint64(stat.Size)), 135 atime: atomicbitops.FromInt64(dentryTimestampFromUnix(stat.Atim)), 136 mtime: atomicbitops.FromInt64(dentryTimestampFromUnix(stat.Mtim)), 137 ctime: atomicbitops.FromInt64(dentryTimestampFromUnix(stat.Ctim)), 138 nlink: atomicbitops.FromUint32(uint32(stat.Nlink)), 139 }, 140 controlFD: controlFD, 141 } 142 d.dentry.init(d) 143 fs.syncMu.Lock() 144 fs.syncableDentries.PushBack(&d.syncableListEntry) 145 fs.syncMu.Unlock() 146 return &d.dentry, nil 147 } 148 149 // Precondition: fs.renameMu is locked. 150 func (d *directfsDentry) openHandle(ctx context.Context, flags uint32) (handle, error) { 151 if d.parent == nil { 152 // This is a mount point. We don't have parent. Fallback to using lisafs. 153 if !d.controlFDLisa.Ok() { 154 panic("directfsDentry.controlFDLisa is not set for mount point dentry") 155 } 156 openFD, hostFD, err := d.controlFDLisa.OpenAt(ctx, flags) 157 if err != nil { 158 return noHandle, err 159 } 160 d.fs.client.CloseFD(ctx, openFD, true /* flush */) 161 if hostFD < 0 { 162 log.Warningf("gofer did not donate an FD for mount point") 163 return noHandle, unix.EIO 164 } 165 return handle{fd: int32(hostFD)}, nil 166 } 167 168 // The only way to re-open an FD with different flags is via procfs or 169 // openat(2) from the parent. Procfs does not exist here. So use parent. 170 flags |= hostOpenFlags 171 openFD, err := unix.Openat(d.parent.impl.(*directfsDentry).controlFD, d.name, int(flags), 0) 172 if err != nil { 173 return noHandle, err 174 } 175 return handle{fd: int32(openFD)}, nil 176 } 177 178 // Precondition: fs.renameMu is locked. 179 func (d *directfsDentry) ensureLisafsControlFD(ctx context.Context) error { 180 d.handleMu.Lock() 181 defer d.handleMu.Unlock() 182 if d.controlFDLisa.Ok() { 183 return nil 184 } 185 186 var names []string 187 root := d 188 for root.parent != nil { 189 names = append(names, root.name) 190 root = root.parent.impl.(*directfsDentry) 191 } 192 if !root.controlFDLisa.Ok() { 193 panic("controlFDLisa is not set for mount point dentry") 194 } 195 if len(names) == 0 { 196 return nil // d == root 197 } 198 // Reverse names. 199 last := len(names) - 1 200 for i := 0; i < len(names)/2; i++ { 201 names[i], names[last-i] = names[last-i], names[i] 202 } 203 status, inodes, err := root.controlFDLisa.WalkMultiple(ctx, names) 204 if err != nil { 205 return err 206 } 207 defer func() { 208 // Close everything except for inodes[last] if it exists. 209 for i := 0; i < len(inodes) && i < last; i++ { 210 flush := i == last-1 || i == len(inodes)-1 211 d.fs.client.CloseFD(ctx, inodes[i].ControlFD, flush) 212 } 213 }() 214 switch status { 215 case lisafs.WalkComponentDoesNotExist: 216 return unix.ENOENT 217 case lisafs.WalkComponentSymlink: 218 log.Warningf("intermediate path component was a symlink? names = %v, inodes = %+v", names, inodes) 219 return unix.ELOOP 220 case lisafs.WalkSuccess: 221 d.controlFDLisa = d.fs.client.NewFD(inodes[last].ControlFD) 222 return nil 223 } 224 panic("unreachable") 225 } 226 227 // Precondition: d.metadataMu must be locked. 228 // 229 // +checklocks:d.metadataMu 230 func (d *directfsDentry) updateMetadataLocked(h handle) error { 231 handleMuRLocked := false 232 if h.fd < 0 { 233 // Use open FDs in preferenece to the control FD. Control FDs may be opened 234 // with O_PATH. This may be significantly more efficient in some 235 // implementations. Prefer a writable FD over a readable one since some 236 // filesystem implementations may update a writable FD's metadata after 237 // writes, without making metadata updates immediately visible to read-only 238 // FDs representing the same file. 239 d.handleMu.RLock() 240 switch { 241 case d.writeFD.RacyLoad() >= 0: 242 h.fd = d.writeFD.RacyLoad() 243 handleMuRLocked = true 244 case d.readFD.RacyLoad() >= 0: 245 h.fd = d.readFD.RacyLoad() 246 handleMuRLocked = true 247 default: 248 h.fd = int32(d.controlFD) 249 d.handleMu.RUnlock() 250 } 251 } 252 253 var stat unix.Stat_t 254 err := unix.Fstat(int(h.fd), &stat) 255 if handleMuRLocked { 256 // handleMu must be released before updateMetadataFromStatLocked(). 257 d.handleMu.RUnlock() // +checklocksforce: complex case. 258 } 259 if err != nil { 260 return err 261 } 262 return d.updateMetadataFromStatLocked(&stat) 263 } 264 265 // Precondition: fs.renameMu is locked if d is a socket. 266 func (d *directfsDentry) chmod(ctx context.Context, mode uint16) error { 267 if !d.isSocket() { 268 return unix.Fchmod(d.controlFD, uint32(mode)) 269 } 270 271 // fchmod(2) on socket files created via bind(2) fails. We need to 272 // fchmodat(2) it from its parent. 273 if d.parent != nil { 274 // We have parent FD, just use that. Note that AT_SYMLINK_NOFOLLOW flag is 275 // currently not supported. So we don't use it. 276 return unix.Fchmodat(d.parent.impl.(*directfsDentry).controlFD, d.name, uint32(mode), 0 /* flags */) 277 } 278 279 // This is a mount point socket. We don't have a parent FD. Fallback to using 280 // lisafs. 281 if !d.controlFDLisa.Ok() { 282 panic("directfsDentry.controlFDLisa is not set for mount point socket") 283 } 284 285 return chmod(ctx, d.controlFDLisa, mode) 286 } 287 288 // Preconditions: 289 // - d.handleMu is locked if d is a regular file. 290 // - fs.renameMu is locked if d is a symlink. 291 func (d *directfsDentry) utimensat(ctx context.Context, stat *linux.Statx) error { 292 if stat.Mask&(linux.STATX_ATIME|linux.STATX_MTIME) == 0 { 293 return nil 294 } 295 296 utimes := [2]unix.Timespec{ 297 {Sec: 0, Nsec: unix.UTIME_OMIT}, 298 {Sec: 0, Nsec: unix.UTIME_OMIT}, 299 } 300 if stat.Mask&unix.STATX_ATIME != 0 { 301 utimes[0].Sec = stat.Atime.Sec 302 utimes[0].Nsec = int64(stat.Atime.Nsec) 303 } 304 if stat.Mask&unix.STATX_MTIME != 0 { 305 utimes[1].Sec = stat.Mtime.Sec 306 utimes[1].Nsec = int64(stat.Mtime.Nsec) 307 } 308 309 if !d.isSymlink() { 310 hostFD := d.controlFD 311 if d.isRegularFile() { 312 // utimensat(2) requires a writable FD for regular files. See BUGS 313 // section. dentry.prepareSetStat() should have acquired a writable FD. 314 hostFD = int(d.writeFD.RacyLoad()) 315 } 316 // Non-symlinks can operate directly on the fd using an empty name. 317 return fsutil.Utimensat(hostFD, "", utimes, 0) 318 } 319 320 // utimensat operates different that other syscalls. To operate on a 321 // symlink it *requires* AT_SYMLINK_NOFOLLOW with dirFD and a non-empty 322 // name. 323 if d.parent != nil { 324 return fsutil.Utimensat(d.parent.impl.(*directfsDentry).controlFD, d.name, utimes, unix.AT_SYMLINK_NOFOLLOW) 325 } 326 327 // This is a mount point symlink. We don't have a parent FD. Fallback to 328 // using lisafs. 329 if !d.controlFDLisa.Ok() { 330 panic("directfsDentry.controlFDLisa is not set for mount point symlink") 331 } 332 333 setStat := linux.Statx{ 334 Mask: stat.Mask & (linux.STATX_ATIME | linux.STATX_MTIME), 335 Atime: stat.Atime, 336 Mtime: stat.Mtime, 337 } 338 _, failureErr, err := d.controlFDLisa.SetStat(ctx, &setStat) 339 if err != nil { 340 return err 341 } 342 return failureErr 343 } 344 345 // Precondition: fs.renameMu is locked. 346 func (d *directfsDentry) prepareSetStat(ctx context.Context, stat *linux.Statx) error { 347 if stat.Mask&unix.STATX_SIZE != 0 || 348 (stat.Mask&(unix.STATX_ATIME|unix.STATX_MTIME) != 0 && d.isRegularFile()) { 349 // Need to ensure a writable FD is available. See setStatLocked() to 350 // understand why. 351 return d.ensureSharedHandle(ctx, false /* read */, true /* write */, false /* trunc */) 352 } 353 return nil 354 } 355 356 // Preconditions: 357 // - d.handleMu is locked. 358 // - fs.renameMu is locked. 359 func (d *directfsDentry) setStatLocked(ctx context.Context, stat *linux.Statx) (failureMask uint32, failureErr error) { 360 if stat.Mask&unix.STATX_MODE != 0 { 361 if err := d.chmod(ctx, stat.Mode&^unix.S_IFMT); err != nil { 362 failureMask |= unix.STATX_MODE 363 failureErr = err 364 } 365 } 366 367 if stat.Mask&unix.STATX_SIZE != 0 { 368 // ftruncate(2) requires a writable FD. 369 if err := unix.Ftruncate(int(d.writeFD.RacyLoad()), int64(stat.Size)); err != nil { 370 failureMask |= unix.STATX_SIZE 371 failureErr = err 372 } 373 } 374 375 if err := d.utimensat(ctx, stat); err != nil { 376 failureMask |= (stat.Mask & (unix.STATX_ATIME | unix.STATX_MTIME)) 377 failureErr = err 378 } 379 380 if stat.Mask&(unix.STATX_UID|unix.STATX_GID) != 0 { 381 // "If the owner or group is specified as -1, then that ID is not changed" 382 // - chown(2) 383 uid := -1 384 if stat.Mask&unix.STATX_UID != 0 { 385 uid = int(stat.UID) 386 } 387 gid := -1 388 if stat.Mask&unix.STATX_GID != 0 { 389 gid = int(stat.GID) 390 } 391 if err := fchown(d.controlFD, uid, gid); err != nil { 392 failureMask |= stat.Mask & (unix.STATX_UID | unix.STATX_GID) 393 failureErr = err 394 } 395 } 396 return 397 } 398 399 func fchown(fd, uid, gid int) error { 400 return unix.Fchownat(fd, "", uid, gid, unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW) 401 } 402 403 func (d *directfsDentry) destroy(ctx context.Context) { 404 if d.controlFD >= 0 { 405 _ = unix.Close(d.controlFD) 406 } 407 if d.controlFDLisa.Ok() { 408 d.controlFDLisa.Close(ctx, true /* flush */) 409 } 410 } 411 412 func (d *directfsDentry) getHostChild(name string) (*dentry, error) { 413 childFD, err := tryOpen(func(flags int) (int, error) { 414 return unix.Openat(d.controlFD, name, flags, 0) 415 }) 416 if err != nil { 417 return nil, err 418 } 419 return d.fs.newDirectfsDentry(childFD) 420 } 421 422 // getCreatedChild opens the newly created child, sets its uid/gid, constructs 423 // a disconnected dentry and returns it. 424 func (d *directfsDentry) getCreatedChild(name string, uid, gid int, isDir bool) (*dentry, error) { 425 unlinkFlags := 0 426 extraOpenFlags := 0 427 if isDir { 428 extraOpenFlags |= unix.O_DIRECTORY 429 unlinkFlags |= unix.AT_REMOVEDIR 430 } 431 deleteChild := func() { 432 // Best effort attempt to remove the newly created child on failure. 433 if err := unix.Unlinkat(d.controlFD, name, unlinkFlags); err != nil { 434 log.Warningf("error unlinking newly created child %q after failure: %v", filepath.Join(genericDebugPathname(&d.dentry), name), err) 435 } 436 } 437 438 childFD, err := tryOpen(func(flags int) (int, error) { 439 return unix.Openat(d.controlFD, name, flags|extraOpenFlags, 0) 440 }) 441 if err != nil { 442 deleteChild() 443 return nil, err 444 } 445 446 // "If the owner or group is specified as -1, then that ID is not changed" 447 // - chown(2). Only bother making the syscall if the owner is changing. 448 if uid != -1 || gid != -1 { 449 if err := fchown(childFD, uid, gid); err != nil { 450 deleteChild() 451 _ = unix.Close(childFD) 452 return nil, err 453 } 454 } 455 child, err := d.fs.newDirectfsDentry(childFD) 456 if err != nil { 457 // Ownership of childFD was passed to newDirectDentry(), so no need to 458 // clean that up. 459 deleteChild() 460 return nil, err 461 } 462 return child, nil 463 } 464 465 func (d *directfsDentry) mknod(ctx context.Context, name string, creds *auth.Credentials, opts *vfs.MknodOptions) (*dentry, error) { 466 if _, ok := opts.Endpoint.(transport.HostBoundEndpoint); ok { 467 return d.bindAt(ctx, name, creds, opts) 468 } 469 470 // From mknod(2) man page: 471 // "EPERM: [...] if the filesystem containing pathname does not support 472 // the type of node requested." 473 if opts.Mode.FileType() != linux.ModeRegular { 474 return nil, unix.EPERM 475 } 476 477 if err := unix.Mknodat(d.controlFD, name, uint32(opts.Mode), 0); err != nil { 478 return nil, err 479 } 480 return d.getCreatedChild(name, int(creds.EffectiveKUID), int(creds.EffectiveKGID), false /* isDir */) 481 } 482 483 // Precondition: opts.Endpoint != nil and is transport.HostBoundEndpoint type. 484 func (d *directfsDentry) bindAt(ctx context.Context, name string, creds *auth.Credentials, opts *vfs.MknodOptions) (*dentry, error) { 485 // There are no filesystems mounted in the sandbox process's mount namespace. 486 // So we can't perform absolute path traversals. So fallback to using lisafs. 487 if err := d.ensureLisafsControlFD(ctx); err != nil { 488 return nil, err 489 } 490 sockType := opts.Endpoint.(transport.Endpoint).Type() 491 childInode, boundSocketFD, err := d.controlFDLisa.BindAt(ctx, sockType, name, opts.Mode, lisafs.UID(creds.EffectiveKUID), lisafs.GID(creds.EffectiveKGID)) 492 if err != nil { 493 return nil, err 494 } 495 d.fs.client.CloseFD(ctx, childInode.ControlFD, true /* flush */) 496 // Update opts.Endpoint that it is bound. 497 hbep := opts.Endpoint.(transport.HostBoundEndpoint) 498 if err := hbep.SetBoundSocketFD(ctx, boundSocketFD); err != nil { 499 if err := unix.Unlinkat(d.controlFD, name, 0); err != nil { 500 log.Warningf("error unlinking newly created socket %q after failure: %v", filepath.Join(genericDebugPathname(&d.dentry), name), err) 501 } 502 return nil, err 503 } 504 // Socket already has the right UID/GID set, so use uid = gid = -1. 505 child, err := d.getCreatedChild(name, -1 /* uid */, -1 /* gid */, false /* isDir */) 506 if err != nil { 507 hbep.ResetBoundSocketFD(ctx) 508 return nil, err 509 } 510 // Set the endpoint on the newly created child dentry. 511 child.endpoint = opts.Endpoint 512 return child, nil 513 } 514 515 // Precondition: d.fs.renameMu must be locked. 516 func (d *directfsDentry) link(target *directfsDentry, name string) (*dentry, error) { 517 // Using linkat(targetFD, "", newdirfd, name, AT_EMPTY_PATH) requires 518 // CAP_DAC_READ_SEARCH in the *root* userns. With directfs, the sandbox 519 // process has CAP_DAC_READ_SEARCH in its own userns. But the sandbox is 520 // running in a different userns. So we can't use AT_EMPTY_PATH. Fallback to 521 // using olddirfd to call linkat(2). 522 // Also note that d and target are from the same mount. Given target is a 523 // non-directory and d is a directory, target.parent must exist. 524 if err := unix.Linkat(target.parent.impl.(*directfsDentry).controlFD, target.name, d.controlFD, name, 0); err != nil { 525 return nil, err 526 } 527 // Note that we don't need to set uid/gid for the new child. This is a hard 528 // link. The original file already has the right owner. 529 return d.getCreatedChild(name, -1 /* uid */, -1 /* gid */, false /* isDir */) 530 } 531 532 func (d *directfsDentry) mkdir(name string, mode linux.FileMode, uid auth.KUID, gid auth.KGID) (*dentry, error) { 533 if err := unix.Mkdirat(d.controlFD, name, uint32(mode)); err != nil { 534 return nil, err 535 } 536 return d.getCreatedChild(name, int(uid), int(gid), true /* isDir */) 537 } 538 539 func (d *directfsDentry) symlink(name, target string, creds *auth.Credentials) (*dentry, error) { 540 if err := unix.Symlinkat(target, d.controlFD, name); err != nil { 541 return nil, err 542 } 543 return d.getCreatedChild(name, int(creds.EffectiveKUID), int(creds.EffectiveKGID), false /* isDir */) 544 } 545 546 func (d *directfsDentry) openCreate(name string, accessFlags uint32, mode linux.FileMode, uid auth.KUID, gid auth.KGID) (*dentry, handle, error) { 547 createFlags := unix.O_CREAT | unix.O_EXCL | int(accessFlags) | hostOpenFlags 548 childHandleFD, err := unix.Openat(d.controlFD, name, createFlags, uint32(mode&^linux.FileTypeMask)) 549 if err != nil { 550 return nil, noHandle, err 551 } 552 553 child, err := d.getCreatedChild(name, int(uid), int(gid), false /* isDir */) 554 if err != nil { 555 _ = unix.Close(childHandleFD) 556 return nil, noHandle, err 557 } 558 return child, handle{fd: int32(childHandleFD)}, nil 559 } 560 561 func (d *directfsDentry) getDirentsLocked(recordDirent func(name string, key inoKey, dType uint8)) error { 562 readFD := int(d.readFD.RacyLoad()) 563 if _, err := unix.Seek(readFD, 0, 0); err != nil { 564 return err 565 } 566 567 var direntsBuf [8192]byte 568 for { 569 n, err := unix.Getdents(readFD, direntsBuf[:]) 570 if err != nil { 571 return err 572 } 573 if n <= 0 { 574 return nil 575 } 576 577 fsutil.ParseDirents(direntsBuf[:n], func(ino uint64, off int64, ftype uint8, name string, reclen uint16) bool { 578 // We also want the device ID, which annoyingly incurs an additional 579 // syscall per dirent. 580 // TODO(gvisor.dev/issue/6665): Get rid of per-dirent stat. 581 stat, err := fsutil.StatAt(d.controlFD, name) 582 if err != nil { 583 log.Warningf("Getdent64: skipping file %q with failed stat, err: %v", path.Join(genericDebugPathname(&d.dentry), name), err) 584 return true 585 } 586 recordDirent(name, inoKeyFromStat(&stat), ftype) 587 return true 588 }) 589 } 590 } 591 592 // Precondition: fs.renameMu is locked. 593 func (d *directfsDentry) connect(ctx context.Context, sockType linux.SockType) (int, error) { 594 // There are no filesystems mounted in the sandbox process's mount namespace. 595 // So we can't perform absolute path traversals. So fallback to using lisafs. 596 if err := d.ensureLisafsControlFD(ctx); err != nil { 597 return -1, err 598 } 599 return d.controlFDLisa.Connect(ctx, sockType) 600 } 601 602 func (d *directfsDentry) readlink() (string, error) { 603 // This is similar to what os.Readlink does. 604 for linkLen := 128; linkLen < math.MaxUint16; linkLen *= 2 { 605 b := make([]byte, linkLen) 606 n, err := unix.Readlinkat(d.controlFD, "", b) 607 608 if err != nil { 609 return "", err 610 } 611 if n < int(linkLen) { 612 return string(b[:n]), nil 613 } 614 } 615 return "", unix.ENOMEM 616 } 617 618 func (d *directfsDentry) statfs() (linux.Statfs, error) { 619 var statFS unix.Statfs_t 620 if err := unix.Fstatfs(d.controlFD, &statFS); err != nil { 621 return linux.Statfs{}, err 622 } 623 return linux.Statfs{ 624 BlockSize: statFS.Bsize, 625 FragmentSize: statFS.Bsize, 626 Blocks: statFS.Blocks, 627 BlocksFree: statFS.Bfree, 628 BlocksAvailable: statFS.Bavail, 629 Files: statFS.Files, 630 FilesFree: statFS.Ffree, 631 NameLength: uint64(statFS.Namelen), 632 }, nil 633 } 634 635 func (d *directfsDentry) restoreFile(ctx context.Context, controlFD int, opts *vfs.CompleteRestoreOptions) error { 636 if controlFD < 0 { 637 log.Warningf("directfsDentry.restoreFile called with invalid controlFD") 638 return unix.EINVAL 639 } 640 var stat unix.Stat_t 641 if err := unix.Fstat(controlFD, &stat); err != nil { 642 _ = unix.Close(controlFD) 643 return err 644 } 645 646 d.controlFD = controlFD 647 // We do not preserve inoKey across checkpoint/restore, so: 648 // 649 // - We must assume that the host filesystem did not change in a way that 650 // would invalidate dentries, since we can't revalidate dentries by 651 // checking inoKey. 652 // 653 // - We need to associate the new inoKey with the existing d.ino. 654 d.inoKey = inoKeyFromStat(&stat) 655 d.fs.inoMu.Lock() 656 d.fs.inoByKey[d.inoKey] = d.ino 657 d.fs.inoMu.Unlock() 658 659 // Check metadata stability before updating metadata. 660 d.metadataMu.Lock() 661 defer d.metadataMu.Unlock() 662 if d.isRegularFile() { 663 if opts.ValidateFileSizes { 664 if d.size.RacyLoad() != uint64(stat.Size) { 665 return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: file size validation failed: size changed from %d to %d", genericDebugPathname(&d.dentry), d.size.Load(), stat.Size)} 666 } 667 } 668 if opts.ValidateFileModificationTimestamps { 669 if want := dentryTimestampFromUnix(stat.Mtim); d.mtime.RacyLoad() != want { 670 return vfs.ErrCorruption{fmt.Errorf("gofer.dentry(%q).restoreFile: mtime validation failed: mtime changed from %+v to %+v", genericDebugPathname(&d.dentry), linux.NsecToStatxTimestamp(d.mtime.RacyLoad()), linux.NsecToStatxTimestamp(want))} 671 } 672 } 673 } 674 if !d.cachedMetadataAuthoritative() { 675 d.updateMetadataFromStatLocked(&stat) 676 } 677 678 if rw, ok := d.fs.savedDentryRW[&d.dentry]; ok { 679 if err := d.ensureSharedHandle(ctx, rw.read, rw.write, false /* trunc */); err != nil { 680 return err 681 } 682 } 683 684 return nil 685 } 686 687 // doRevalidationDirectfs stats all dentries in `state`. It will update or 688 // invalidate dentries in the cache based on the result. 689 // 690 // Preconditions: 691 // - fs.renameMu must be locked. 692 // - InteropModeShared is in effect. 693 func doRevalidationDirectfs(ctx context.Context, vfsObj *vfs.VirtualFilesystem, state *revalidateState, ds **[]*dentry) error { 694 // Explicitly declare start dentry, instead of using the function receiver. 695 // The function receiver has to be named `d` (to be consistent with other 696 // receivers). But `d` variable is also used below in various places. This 697 // helps with readability and makes code less error prone. 698 start := state.start.impl.(*directfsDentry) 699 if state.refreshStart { 700 start.updateMetadata(ctx) 701 } 702 703 parent := start 704 for _, d := range state.dentries { 705 childFD, err := unix.Openat(parent.controlFD, d.name, unix.O_PATH|hostOpenFlags, 0) 706 if err != nil && err != unix.ENOENT { 707 return err 708 } 709 710 var stat unix.Stat_t 711 // Lock metadata *before* getting attributes for d. 712 d.metadataMu.Lock() 713 found := err == nil 714 if found { 715 err = unix.Fstat(childFD, &stat) 716 _ = unix.Close(childFD) 717 if err != nil { 718 d.metadataMu.Unlock() 719 return err 720 } 721 } 722 723 // Note that synthetic dentries will always fail this comparison check. 724 if !found || d.inoKey != inoKeyFromStat(&stat) { 725 d.metadataMu.Unlock() 726 if !found && d.isSynthetic() { 727 // We have a synthetic file, and no remote file has arisen to replace 728 // it. 729 return nil 730 } 731 // The file at this path has changed or no longer exists. Mark the 732 // dentry invalidated. 733 d.invalidate(ctx, vfsObj, ds) 734 return nil 735 } 736 737 // The file at this path hasn't changed. Just update cached metadata. 738 d.impl.(*directfsDentry).updateMetadataFromStatLocked(&stat) // +checklocksforce: d.metadataMu is locked above. 739 d.metadataMu.Unlock() 740 741 // Advance parent. 742 parent = d.impl.(*directfsDentry) 743 } 744 return nil 745 } 746 747 // LINT.ThenChange(../../../../runsc/fsgofer/lisafs.go)