github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/runsc/fsgofer/fsgofer.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package fsgofer implements p9.File giving access to local files using 16 // a simple mapping from a path prefix that is added to the path requested 17 // by the sandbox. Ex: 18 // 19 // prefix: "/docker/imgs/alpine" 20 // app path: /bin/ls => /docker/imgs/alpine/bin/ls 21 package fsgofer 22 23 import ( 24 "errors" 25 "fmt" 26 "io" 27 "math" 28 "os" 29 "path" 30 "path/filepath" 31 "runtime" 32 "strconv" 33 34 "golang.org/x/sys/unix" 35 "github.com/SagerNet/gvisor/pkg/cleanup" 36 "github.com/SagerNet/gvisor/pkg/fd" 37 "github.com/SagerNet/gvisor/pkg/log" 38 "github.com/SagerNet/gvisor/pkg/p9" 39 "github.com/SagerNet/gvisor/pkg/sync" 40 ) 41 42 const ( 43 // invalidMode is set to a value that doesn't match any other valid 44 // modes to ensure an unopened/closed file fails all mode checks. 45 invalidMode = p9.OpenFlags(math.MaxUint32) 46 47 openFlags = unix.O_NOFOLLOW | unix.O_CLOEXEC 48 49 allowedOpenFlags = unix.O_TRUNC 50 ) 51 52 // verityXattrs are the extended attributes used by verity file system. 53 var verityXattrs = map[string]struct{}{ 54 "user.merkle.offset": struct{}{}, 55 "user.merkle.size": struct{}{}, 56 "user.merkle.childrenOffset": struct{}{}, 57 "user.merkle.childrenSize": struct{}{}, 58 } 59 60 // join is equivalent to path.Join() but skips path.Clean() which is expensive. 61 func join(parent, child string) string { 62 return parent + "/" + child 63 } 64 65 // Config sets configuration options for each attach point. 66 type Config struct { 67 // ROMount is set to true if this is a readonly mount. 68 ROMount bool 69 70 // PanicOnWrite panics on attempts to write to RO mounts. 71 PanicOnWrite bool 72 73 // HostUDS signals whether the gofer can mount a host's UDS. 74 HostUDS bool 75 76 // EnableVerityXattr allows access to extended attributes used by the 77 // verity file system. 78 EnableVerityXattr bool 79 } 80 81 type attachPoint struct { 82 prefix string 83 conf Config 84 85 // attachedMu protects attached. 86 attachedMu sync.Mutex 87 attached bool 88 89 // deviceMu protects devices and nextDevice. 90 deviceMu sync.Mutex 91 92 // nextDevice is the next device id that will be allocated. 93 nextDevice uint8 94 95 // devices is a map from actual host devices to "small" integers that 96 // can be combined with host inode to form a unique virtual inode id. 97 devices map[uint64]uint8 98 } 99 100 // NewAttachPoint creates a new attacher that gives local file 101 // access to all files under 'prefix'. 'prefix' must be an absolute path. 102 func NewAttachPoint(prefix string, c Config) (p9.Attacher, error) { 103 // Sanity check the prefix. 104 if !filepath.IsAbs(prefix) { 105 return nil, fmt.Errorf("attach point prefix must be absolute %q", prefix) 106 } 107 return &attachPoint{ 108 prefix: prefix, 109 conf: c, 110 devices: make(map[uint64]uint8), 111 }, nil 112 } 113 114 // Attach implements p9.Attacher. 115 func (a *attachPoint) Attach() (p9.File, error) { 116 a.attachedMu.Lock() 117 defer a.attachedMu.Unlock() 118 119 if a.attached { 120 return nil, fmt.Errorf("attach point already attached, prefix: %s", a.prefix) 121 } 122 123 f, readable, err := openAnyFile(a.prefix, func(mode int) (*fd.FD, error) { 124 return fd.Open(a.prefix, openFlags|mode, 0) 125 }) 126 if err != nil { 127 return nil, fmt.Errorf("unable to open %q: %v", a.prefix, err) 128 } 129 130 stat, err := fstat(f.FD()) 131 if err != nil { 132 return nil, fmt.Errorf("unable to stat %q: %v", a.prefix, err) 133 } 134 135 lf, err := newLocalFile(a, f, a.prefix, readable, &stat) 136 if err != nil { 137 return nil, fmt.Errorf("unable to create localFile %q: %v", a.prefix, err) 138 } 139 a.attached = true 140 return lf, nil 141 } 142 143 // makeQID returns a unique QID for the given stat buffer. 144 func (a *attachPoint) makeQID(stat *unix.Stat_t) p9.QID { 145 a.deviceMu.Lock() 146 defer a.deviceMu.Unlock() 147 148 // First map the host device id to a unique 8-bit integer. 149 dev, ok := a.devices[stat.Dev] 150 if !ok { 151 a.devices[stat.Dev] = a.nextDevice 152 dev = a.nextDevice 153 a.nextDevice++ 154 if a.nextDevice < dev { 155 panic(fmt.Sprintf("device id overflow! map: %+v", a.devices)) 156 } 157 } 158 159 // Construct a "virtual" inode id with the uint8 device number in the 160 // first 8 bits, and the rest of the bits from the host inode id. 161 maskedIno := stat.Ino & 0x00ffffffffffffff 162 if maskedIno != stat.Ino { 163 log.Warningf("first 8 bytes of host inode id %x will be truncated to construct virtual inode id", stat.Ino) 164 } 165 ino := uint64(dev)<<56 | maskedIno 166 return p9.QID{ 167 Type: p9.FileMode(stat.Mode).QIDType(), 168 Path: ino, 169 } 170 } 171 172 // localFile implements p9.File wrapping a local file. The underlying file 173 // is opened during Walk() and stored in 'file' to be used with other 174 // operations. The file is opened as readonly, unless it's a symlink or there is 175 // no read access, which requires O_PATH. 176 // 177 // The file may be reopened if the requested mode in Open() is not a subset of 178 // current mode. Consequently, 'file' could have a mode wider than requested and 179 // must be verified before read/write operations. Before the file is opened and 180 // after it's closed, 'mode' is set to an invalid value to prevent an unopened 181 // file from being used. 182 // 183 // The reason that the file is not opened initially as read-write is for better 184 // performance with 'overlay2' storage driver. overlay2 eagerly copies the 185 // entire file up when it's opened in write mode, and would perform badly when 186 // multiple files are only being opened for read (esp. startup). 187 // 188 // File operations must use "at" functions whenever possible: 189 // * Local operations must use AT_EMPTY_PATH: 190 // fchownat(fd, "", AT_EMPTY_PATH, ...), instead of chown(fullpath, ...) 191 // * Creation operations must use (fd + name): 192 // mkdirat(fd, name, ...), instead of mkdir(fullpath, ...) 193 // 194 // Apart from being faster, it also adds another layer of defense against 195 // symlink attacks (note that O_NOFOLLOW applies only to the last element in 196 // the path). 197 // 198 // The few exceptions where it cannot be done are: utimensat on symlinks, and 199 // Connect() for the socket address. 200 type localFile struct { 201 p9.DisallowClientCalls 202 203 // attachPoint is the attachPoint that serves this localFile. 204 attachPoint *attachPoint 205 206 // hostPath is the full path to the host file. It can be used for logging and 207 // the few cases where full path is required to operation the host file. In 208 // all other cases, use "file" directly. 209 // 210 // Note: it's safely updated by the Renamed hook. 211 hostPath string 212 213 // file is opened when localFile is created and it's never nil. It may be 214 // reopened if the Open() mode is wider than the mode the file was originally 215 // opened with. 216 file *fd.FD 217 218 // controlReadable tells whether 'file' was opened with read permissions 219 // during a walk. 220 controlReadable bool 221 222 // mode is the mode in which the file was opened. Set to invalidMode 223 // if localFile isn't opened. 224 mode p9.OpenFlags 225 226 // fileType for this file. It is equivalent to: 227 // unix.Stat_t.Mode & unix.S_IFMT 228 fileType uint32 229 230 qid p9.QID 231 232 // readDirMu protects against concurrent Readdir calls. 233 readDirMu sync.Mutex 234 235 // lastDirentOffset is the last offset returned by Readdir(). If another call 236 // to Readdir is made at the same offset, the file doesn't need to be 237 // repositioned. This is an important optimization because the caller must 238 // always make one extra call to detect EOF (empty result, no error). 239 lastDirentOffset uint64 240 } 241 242 var procSelfFD *fd.FD 243 244 // OpenProcSelfFD opens the /proc/self/fd directory, which will be used to 245 // reopen file descriptors. 246 func OpenProcSelfFD() error { 247 d, err := unix.Open("/proc/self/fd", unix.O_RDONLY|unix.O_DIRECTORY, 0) 248 if err != nil { 249 return fmt.Errorf("error opening /proc/self/fd: %v", err) 250 } 251 procSelfFD = fd.New(d) 252 return nil 253 } 254 255 func reopenProcFd(f *fd.FD, mode int) (*fd.FD, error) { 256 d, err := unix.Openat(int(procSelfFD.FD()), strconv.Itoa(f.FD()), mode&^unix.O_NOFOLLOW, 0) 257 if err != nil { 258 return nil, err 259 } 260 261 return fd.New(d), nil 262 } 263 264 func openAnyFileFromParent(parent *localFile, name string) (*fd.FD, string, bool, error) { 265 pathDebug := join(parent.hostPath, name) 266 f, readable, err := openAnyFile(pathDebug, func(mode int) (*fd.FD, error) { 267 return fd.OpenAt(parent.file, name, openFlags|mode, 0) 268 }) 269 return f, pathDebug, readable, err 270 } 271 272 // openAnyFile attempts to open the file in O_RDONLY. If it fails, falls back 273 // to O_PATH. 'path' is used for logging messages only. 'fn' is what does the 274 // actual file open and is customizable by the caller. 275 func openAnyFile(pathDebug string, fn func(mode int) (*fd.FD, error)) (*fd.FD, bool, error) { 276 // Attempt to open file in the following mode in order: 277 // 1. RDONLY | NONBLOCK: for all files, directories, ro mounts, FIFOs. 278 // Use non-blocking to prevent getting stuck inside open(2) for 279 // FIFOs. This option has no effect on regular files. 280 // 2. PATH: for symlinks, sockets. 281 options := []struct { 282 mode int 283 readable bool 284 }{ 285 { 286 mode: unix.O_RDONLY | unix.O_NONBLOCK, 287 readable: true, 288 }, 289 { 290 mode: unix.O_PATH, 291 readable: false, 292 }, 293 } 294 295 var err error 296 for i, option := range options { 297 var file *fd.FD 298 file, err = fn(option.mode) 299 if err == nil { 300 // Succeeded opening the file, we're done. 301 return file, option.readable, nil 302 } 303 switch e := extractErrno(err); e { 304 case unix.ENOENT: 305 // File doesn't exist, no point in retrying. 306 return nil, false, e 307 } 308 // File failed to open. Try again with next mode, preserving 'err' in case 309 // this was the last attempt. 310 log.Debugf("Attempt %d to open file failed, mode: %#x, path: %q, err: %v", i, openFlags|option.mode, pathDebug, err) 311 } 312 // All attempts to open file have failed, return the last error. 313 log.Debugf("Failed to open file, path: %q, err: %v", pathDebug, err) 314 return nil, false, extractErrno(err) 315 } 316 317 func checkSupportedFileType(mode uint32, permitSocket bool) error { 318 switch mode & unix.S_IFMT { 319 case unix.S_IFREG, unix.S_IFDIR, unix.S_IFLNK: 320 return nil 321 322 case unix.S_IFSOCK: 323 if !permitSocket { 324 return unix.EPERM 325 } 326 return nil 327 328 default: 329 return unix.EPERM 330 } 331 } 332 333 func newLocalFile(a *attachPoint, file *fd.FD, path string, readable bool, stat *unix.Stat_t) (*localFile, error) { 334 if err := checkSupportedFileType(stat.Mode, a.conf.HostUDS); err != nil { 335 return nil, err 336 } 337 338 return &localFile{ 339 attachPoint: a, 340 hostPath: path, 341 file: file, 342 mode: invalidMode, 343 fileType: stat.Mode & unix.S_IFMT, 344 qid: a.makeQID(stat), 345 controlReadable: readable, 346 }, nil 347 } 348 349 // newFDMaybe creates a fd.FD from a file, dup'ing the FD and setting it as 350 // non-blocking. If anything fails, returns nil. It's better to have a file 351 // without host FD, than to fail the operation. 352 func newFDMaybe(file *fd.FD) *fd.FD { 353 dupFD, err := unix.Dup(file.FD()) 354 // Technically, the runtime may call the finalizer on file as soon as 355 // FD() returns. 356 runtime.KeepAlive(file) 357 if err != nil { 358 return nil 359 } 360 dup := fd.New(dupFD) 361 362 // fd is blocking; non-blocking is required. 363 if err := unix.SetNonblock(dup.FD(), true); err != nil { 364 _ = dup.Close() 365 return nil 366 } 367 return dup 368 } 369 370 func fstat(fd int) (unix.Stat_t, error) { 371 var stat unix.Stat_t 372 if err := unix.Fstat(fd, &stat); err != nil { 373 return unix.Stat_t{}, err 374 } 375 return stat, nil 376 } 377 378 func fchown(fd int, uid p9.UID, gid p9.GID) error { 379 return unix.Fchownat(fd, "", int(uid), int(gid), unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW) 380 } 381 382 func setOwnerIfNeeded(fd int, uid p9.UID, gid p9.GID) (unix.Stat_t, error) { 383 stat, err := fstat(fd) 384 if err != nil { 385 return unix.Stat_t{}, err 386 } 387 388 // Change ownership if not set accordinly. 389 if uint32(uid) != stat.Uid || uint32(gid) != stat.Gid { 390 if err := fchown(fd, uid, gid); err != nil { 391 return unix.Stat_t{}, err 392 } 393 stat.Uid = uint32(uid) 394 stat.Gid = uint32(gid) 395 } 396 return stat, nil 397 } 398 399 // Open implements p9.File. 400 func (l *localFile) Open(flags p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) { 401 if l.isOpen() { 402 panic(fmt.Sprintf("attempting to open already opened file: %q", l.hostPath)) 403 } 404 mode := flags & p9.OpenFlagsModeMask 405 if mode == p9.WriteOnly || mode == p9.ReadWrite || flags&p9.OpenTruncate != 0 { 406 if err := l.checkROMount(); err != nil { 407 return nil, p9.QID{}, 0, err 408 } 409 } 410 411 // Check if control file can be used or if a new open must be created. 412 var newFile *fd.FD 413 if mode == p9.ReadOnly && l.controlReadable && flags.OSFlags()&allowedOpenFlags == 0 { 414 log.Debugf("Open reusing control file, flags: %v, %q", flags, l.hostPath) 415 newFile = l.file 416 } else { 417 // Ideally reopen would call name_to_handle_at (with empty name) and 418 // open_by_handle_at to reopen the file without using 'hostPath'. However, 419 // name_to_handle_at and open_by_handle_at aren't supported by overlay2. 420 log.Debugf("Open reopening file, flags: %v, %q", flags, l.hostPath) 421 var err error 422 osFlags := flags.OSFlags() & (unix.O_ACCMODE | allowedOpenFlags) 423 newFile, err = reopenProcFd(l.file, openFlags|osFlags) 424 if err != nil { 425 return nil, p9.QID{}, 0, extractErrno(err) 426 } 427 } 428 429 var fd *fd.FD 430 if l.fileType == unix.S_IFREG { 431 // Donate FD for regular files only. 432 fd = newFDMaybe(newFile) 433 } 434 435 // Close old file in case a new one was created. 436 if newFile != l.file { 437 if err := l.file.Close(); err != nil { 438 log.Warningf("Error closing file %q: %v", l.hostPath, err) 439 } 440 l.file = newFile 441 } 442 l.mode = mode 443 return fd, l.qid, 0, nil 444 } 445 446 // Create implements p9.File. 447 func (l *localFile) Create(name string, p9Flags p9.OpenFlags, perm p9.FileMode, uid p9.UID, gid p9.GID) (*fd.FD, p9.File, p9.QID, uint32, error) { 448 if err := l.checkROMount(); err != nil { 449 return nil, nil, p9.QID{}, 0, err 450 } 451 452 // Set file creation flags, plus allowed open flags from caller. 453 osFlags := openFlags | unix.O_CREAT | unix.O_EXCL 454 osFlags |= p9Flags.OSFlags() & allowedOpenFlags 455 456 // 'file' may be used for other operations (e.g. Walk), so read access is 457 // always added to flags. Note that resulting file might have a wider mode 458 // than needed for each particular case. 459 mode := p9Flags & p9.OpenFlagsModeMask 460 if mode == p9.WriteOnly { 461 osFlags |= unix.O_RDWR 462 } else { 463 osFlags |= mode.OSFlags() 464 } 465 466 child, err := fd.OpenAt(l.file, name, osFlags, uint32(perm.Permissions())) 467 if err != nil { 468 return nil, nil, p9.QID{}, 0, extractErrno(err) 469 } 470 cu := cleanup.Make(func() { 471 _ = child.Close() 472 // Best effort attempt to remove the file in case of failure. 473 if err := unix.Unlinkat(l.file.FD(), name, 0); err != nil { 474 log.Warningf("error unlinking file %q after failure: %v", path.Join(l.hostPath, name), err) 475 } 476 }) 477 defer cu.Clean() 478 479 stat, err := setOwnerIfNeeded(child.FD(), uid, gid) 480 if err != nil { 481 return nil, nil, p9.QID{}, 0, extractErrno(err) 482 } 483 484 c := &localFile{ 485 attachPoint: l.attachPoint, 486 hostPath: join(l.hostPath, name), 487 file: child, 488 mode: mode, 489 fileType: unix.S_IFREG, 490 qid: l.attachPoint.makeQID(&stat), 491 } 492 493 cu.Release() 494 return newFDMaybe(c.file), c, c.qid, 0, nil 495 } 496 497 // Mkdir implements p9.File. 498 func (l *localFile) Mkdir(name string, perm p9.FileMode, uid p9.UID, gid p9.GID) (p9.QID, error) { 499 if err := l.checkROMount(); err != nil { 500 return p9.QID{}, err 501 } 502 503 if err := unix.Mkdirat(l.file.FD(), name, uint32(perm.Permissions())); err != nil { 504 return p9.QID{}, extractErrno(err) 505 } 506 cu := cleanup.Make(func() { 507 // Best effort attempt to remove the dir in case of failure. 508 if err := unix.Unlinkat(l.file.FD(), name, unix.AT_REMOVEDIR); err != nil { 509 log.Warningf("error unlinking dir %q after failure: %v", path.Join(l.hostPath, name), err) 510 } 511 }) 512 defer cu.Clean() 513 514 // Open directory to change ownership and stat it. 515 flags := unix.O_DIRECTORY | unix.O_RDONLY | openFlags 516 f, err := fd.OpenAt(l.file, name, flags, 0) 517 if err != nil { 518 return p9.QID{}, extractErrno(err) 519 } 520 defer f.Close() 521 522 stat, err := setOwnerIfNeeded(f.FD(), uid, gid) 523 if err != nil { 524 return p9.QID{}, extractErrno(err) 525 } 526 527 cu.Release() 528 return l.attachPoint.makeQID(&stat), nil 529 } 530 531 // Walk implements p9.File. 532 func (l *localFile) Walk(names []string) ([]p9.QID, p9.File, error) { 533 qids, file, _, err := l.walk(names) 534 return qids, file, err 535 } 536 537 // WalkGetAttr implements p9.File. 538 func (l *localFile) WalkGetAttr(names []string) ([]p9.QID, p9.File, p9.AttrMask, p9.Attr, error) { 539 qids, file, stat, err := l.walk(names) 540 if err != nil { 541 return nil, nil, p9.AttrMask{}, p9.Attr{}, err 542 } 543 mask, attr := l.fillAttr(&stat) 544 return qids, file, mask, attr, nil 545 } 546 547 func (l *localFile) walk(names []string) ([]p9.QID, p9.File, unix.Stat_t, error) { 548 // Duplicate current file if 'names' is empty. 549 if len(names) == 0 { 550 newFile, readable, err := openAnyFile(l.hostPath, func(mode int) (*fd.FD, error) { 551 return reopenProcFd(l.file, openFlags|mode) 552 }) 553 if err != nil { 554 return nil, nil, unix.Stat_t{}, extractErrno(err) 555 } 556 557 stat, err := fstat(newFile.FD()) 558 if err != nil { 559 _ = newFile.Close() 560 return nil, nil, unix.Stat_t{}, extractErrno(err) 561 } 562 563 c := &localFile{ 564 attachPoint: l.attachPoint, 565 hostPath: l.hostPath, 566 file: newFile, 567 mode: invalidMode, 568 fileType: l.fileType, 569 qid: l.attachPoint.makeQID(&stat), 570 controlReadable: readable, 571 } 572 return []p9.QID{c.qid}, c, stat, nil 573 } 574 575 qids := make([]p9.QID, 0, len(names)) 576 var lastStat unix.Stat_t 577 last := l 578 for _, name := range names { 579 f, path, readable, err := openAnyFileFromParent(last, name) 580 if last != l { 581 _ = last.Close() 582 } 583 if err != nil { 584 return nil, nil, unix.Stat_t{}, extractErrno(err) 585 } 586 lastStat, err = fstat(f.FD()) 587 if err != nil { 588 _ = f.Close() 589 return nil, nil, unix.Stat_t{}, extractErrno(err) 590 } 591 c, err := newLocalFile(last.attachPoint, f, path, readable, &lastStat) 592 if err != nil { 593 _ = f.Close() 594 return nil, nil, unix.Stat_t{}, extractErrno(err) 595 } 596 597 qids = append(qids, c.qid) 598 last = c 599 } 600 return qids, last, lastStat, nil 601 } 602 603 // StatFS implements p9.File. 604 func (l *localFile) StatFS() (p9.FSStat, error) { 605 var s unix.Statfs_t 606 if err := unix.Fstatfs(l.file.FD(), &s); err != nil { 607 return p9.FSStat{}, extractErrno(err) 608 } 609 610 // Populate with what's available. 611 return p9.FSStat{ 612 Type: uint32(s.Type), 613 BlockSize: uint32(s.Bsize), 614 Blocks: s.Blocks, 615 BlocksFree: s.Bfree, 616 BlocksAvailable: s.Bavail, 617 Files: s.Files, 618 FilesFree: s.Ffree, 619 NameLength: uint32(s.Namelen), 620 }, nil 621 } 622 623 // FSync implements p9.File. 624 func (l *localFile) FSync() error { 625 if !l.isOpen() { 626 return unix.EBADF 627 } 628 if err := unix.Fsync(l.file.FD()); err != nil { 629 return extractErrno(err) 630 } 631 return nil 632 } 633 634 // GetAttr implements p9.File. 635 func (l *localFile) GetAttr(_ p9.AttrMask) (p9.QID, p9.AttrMask, p9.Attr, error) { 636 stat, err := fstat(l.file.FD()) 637 if err != nil { 638 return p9.QID{}, p9.AttrMask{}, p9.Attr{}, extractErrno(err) 639 } 640 mask, attr := l.fillAttr(&stat) 641 return l.qid, mask, attr, nil 642 } 643 644 func (l *localFile) fillAttr(stat *unix.Stat_t) (p9.AttrMask, p9.Attr) { 645 attr := p9.Attr{ 646 Mode: p9.FileMode(stat.Mode), 647 UID: p9.UID(stat.Uid), 648 GID: p9.GID(stat.Gid), 649 NLink: uint64(stat.Nlink), 650 RDev: stat.Rdev, 651 Size: uint64(stat.Size), 652 BlockSize: uint64(stat.Blksize), 653 Blocks: uint64(stat.Blocks), 654 ATimeSeconds: uint64(stat.Atim.Sec), 655 ATimeNanoSeconds: uint64(stat.Atim.Nsec), 656 MTimeSeconds: uint64(stat.Mtim.Sec), 657 MTimeNanoSeconds: uint64(stat.Mtim.Nsec), 658 CTimeSeconds: uint64(stat.Ctim.Sec), 659 CTimeNanoSeconds: uint64(stat.Ctim.Nsec), 660 } 661 valid := p9.AttrMask{ 662 Mode: true, 663 UID: true, 664 GID: true, 665 NLink: true, 666 RDev: true, 667 Size: true, 668 Blocks: true, 669 ATime: true, 670 MTime: true, 671 CTime: true, 672 } 673 return valid, attr 674 } 675 676 // SetAttr implements p9.File. Due to mismatch in file API, options 677 // cannot be changed atomically and user may see partial changes when 678 // an error happens. 679 func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error { 680 if err := l.checkROMount(); err != nil { 681 return err 682 } 683 684 allowed := p9.SetAttrMask{ 685 Permissions: true, 686 UID: true, 687 GID: true, 688 Size: true, 689 ATime: true, 690 MTime: true, 691 ATimeNotSystemTime: true, 692 MTimeNotSystemTime: true, 693 } 694 695 if valid.Empty() { 696 // Nothing to do. 697 return nil 698 } 699 700 // Handle all the sanity checks up front so that the client gets a 701 // consistent result that is not attribute dependent. 702 if !valid.IsSubsetOf(allowed) { 703 log.Warningf("SetAttr() failed for %q, mask: %v", l.hostPath, valid) 704 return unix.EPERM 705 } 706 707 // Check if it's possible to use cached file, or if another one needs to be 708 // opened for write. 709 f := l.file 710 if l.fileType == unix.S_IFREG && l.mode != p9.WriteOnly && l.mode != p9.ReadWrite { 711 var err error 712 f, err = reopenProcFd(l.file, openFlags|os.O_WRONLY) 713 if err != nil { 714 return extractErrno(err) 715 } 716 defer f.Close() 717 } 718 719 // The semantics are to either return an error if no changes were made, 720 // or no error if *all* changes were made. Well, this can be impossible 721 // if the filesystem rejects at least one of the changes, especially 722 // since some operations are not easy to undo atomically. 723 // 724 // This could be made better if SetAttr actually returned the changes 725 // it did make, so the client can at least know what has changed. So 726 // we at least attempt to make all of the changes and return a generic 727 // error if any of them fails, which at least doesn't bias any change 728 // over another. 729 var err error 730 if valid.Permissions { 731 if cerr := unix.Fchmod(f.FD(), uint32(attr.Permissions)); cerr != nil { 732 log.Debugf("SetAttr fchmod failed %q, err: %v", l.hostPath, cerr) 733 err = extractErrno(cerr) 734 } 735 } 736 737 if valid.Size { 738 if terr := unix.Ftruncate(f.FD(), int64(attr.Size)); terr != nil { 739 log.Debugf("SetAttr ftruncate failed %q, err: %v", l.hostPath, terr) 740 err = extractErrno(terr) 741 } 742 } 743 744 if valid.ATime || valid.MTime { 745 utimes := [2]unix.Timespec{ 746 {Sec: 0, Nsec: unix.UTIME_OMIT}, 747 {Sec: 0, Nsec: unix.UTIME_OMIT}, 748 } 749 if valid.ATime { 750 if valid.ATimeNotSystemTime { 751 utimes[0].Sec = int64(attr.ATimeSeconds) 752 utimes[0].Nsec = int64(attr.ATimeNanoSeconds) 753 } else { 754 utimes[0].Nsec = unix.UTIME_NOW 755 } 756 } 757 if valid.MTime { 758 if valid.MTimeNotSystemTime { 759 utimes[1].Sec = int64(attr.MTimeSeconds) 760 utimes[1].Nsec = int64(attr.MTimeNanoSeconds) 761 } else { 762 utimes[1].Nsec = unix.UTIME_NOW 763 } 764 } 765 766 if l.fileType == unix.S_IFLNK { 767 // utimensat operates different that other syscalls. To operate on a 768 // symlink it *requires* AT_SYMLINK_NOFOLLOW with dirFD and a non-empty 769 // name. 770 parent, oErr := unix.Open(path.Dir(l.hostPath), openFlags|unix.O_PATH, 0) 771 if oErr != nil { 772 return extractErrno(oErr) 773 } 774 defer unix.Close(parent) 775 776 if tErr := utimensat(parent, path.Base(l.hostPath), utimes, unix.AT_SYMLINK_NOFOLLOW); tErr != nil { 777 log.Debugf("SetAttr utimens failed %q, err: %v", l.hostPath, tErr) 778 err = extractErrno(tErr) 779 } 780 } else { 781 // Directories and regular files can operate directly on the fd 782 // using empty name. 783 if terr := utimensat(f.FD(), "", utimes, 0); terr != nil { 784 log.Debugf("SetAttr utimens failed %q, err: %v", l.hostPath, terr) 785 err = extractErrno(terr) 786 } 787 } 788 } 789 790 if valid.UID || valid.GID { 791 uid := p9.NoUID 792 if valid.UID { 793 uid = attr.UID 794 } 795 gid := p9.NoGID 796 if valid.GID { 797 gid = attr.GID 798 } 799 if oErr := fchown(f.FD(), uid, gid); oErr != nil { 800 log.Debugf("SetAttr fchownat failed %q, err: %v", l.hostPath, oErr) 801 err = extractErrno(oErr) 802 } 803 } 804 805 return err 806 } 807 808 func (l *localFile) GetXattr(name string, size uint64) (string, error) { 809 if !l.attachPoint.conf.EnableVerityXattr { 810 return "", unix.EOPNOTSUPP 811 } 812 if _, ok := verityXattrs[name]; !ok { 813 return "", unix.EOPNOTSUPP 814 } 815 buffer := make([]byte, size) 816 if _, err := unix.Fgetxattr(l.file.FD(), name, buffer); err != nil { 817 return "", err 818 } 819 return string(buffer), nil 820 } 821 822 func (l *localFile) SetXattr(name string, value string, flags uint32) error { 823 if !l.attachPoint.conf.EnableVerityXattr { 824 return unix.EOPNOTSUPP 825 } 826 if _, ok := verityXattrs[name]; !ok { 827 return unix.EOPNOTSUPP 828 } 829 return unix.Fsetxattr(l.file.FD(), name, []byte(value), int(flags)) 830 } 831 832 func (*localFile) ListXattr(uint64) (map[string]struct{}, error) { 833 return nil, unix.EOPNOTSUPP 834 } 835 836 func (*localFile) RemoveXattr(string) error { 837 return unix.EOPNOTSUPP 838 } 839 840 // Allocate implements p9.File. 841 func (l *localFile) Allocate(mode p9.AllocateMode, offset, length uint64) error { 842 if !l.isOpen() { 843 return unix.EBADF 844 } 845 846 if err := unix.Fallocate(l.file.FD(), mode.ToLinux(), int64(offset), int64(length)); err != nil { 847 return extractErrno(err) 848 } 849 return nil 850 } 851 852 // Rename implements p9.File; this should never be called. 853 func (*localFile) Rename(p9.File, string) error { 854 panic("rename called directly") 855 } 856 857 // RenameAt implements p9.File.RenameAt. 858 func (l *localFile) RenameAt(oldName string, directory p9.File, newName string) error { 859 if err := l.checkROMount(); err != nil { 860 return err 861 } 862 863 newParent := directory.(*localFile) 864 if err := renameat(l.file.FD(), oldName, newParent.file.FD(), newName); err != nil { 865 return extractErrno(err) 866 } 867 return nil 868 } 869 870 // ReadAt implements p9.File. 871 func (l *localFile) ReadAt(p []byte, offset uint64) (int, error) { 872 if l.mode != p9.ReadOnly && l.mode != p9.ReadWrite { 873 return 0, unix.EBADF 874 } 875 if !l.isOpen() { 876 return 0, unix.EBADF 877 } 878 879 r, err := l.file.ReadAt(p, int64(offset)) 880 switch err { 881 case nil, io.EOF: 882 return r, nil 883 default: 884 return r, extractErrno(err) 885 } 886 } 887 888 // WriteAt implements p9.File. 889 func (l *localFile) WriteAt(p []byte, offset uint64) (int, error) { 890 if l.mode != p9.WriteOnly && l.mode != p9.ReadWrite { 891 return 0, unix.EBADF 892 } 893 if !l.isOpen() { 894 return 0, unix.EBADF 895 } 896 897 w, err := l.file.WriteAt(p, int64(offset)) 898 if err != nil { 899 return w, extractErrno(err) 900 } 901 return w, nil 902 } 903 904 // Symlink implements p9.File. 905 func (l *localFile) Symlink(target, newName string, uid p9.UID, gid p9.GID) (p9.QID, error) { 906 if err := l.checkROMount(); err != nil { 907 return p9.QID{}, err 908 } 909 910 if err := unix.Symlinkat(target, l.file.FD(), newName); err != nil { 911 return p9.QID{}, extractErrno(err) 912 } 913 cu := cleanup.Make(func() { 914 // Best effort attempt to remove the symlink in case of failure. 915 if err := unix.Unlinkat(l.file.FD(), newName, 0); err != nil { 916 log.Warningf("error unlinking file %q after failure: %v", path.Join(l.hostPath, newName), err) 917 } 918 }) 919 defer cu.Clean() 920 921 // Open symlink to change ownership and stat it. 922 f, err := fd.OpenAt(l.file, newName, unix.O_PATH|openFlags, 0) 923 if err != nil { 924 return p9.QID{}, extractErrno(err) 925 } 926 defer f.Close() 927 928 stat, err := setOwnerIfNeeded(f.FD(), uid, gid) 929 if err != nil { 930 return p9.QID{}, extractErrno(err) 931 } 932 933 cu.Release() 934 return l.attachPoint.makeQID(&stat), nil 935 } 936 937 // Link implements p9.File. 938 func (l *localFile) Link(target p9.File, newName string) error { 939 if err := l.checkROMount(); err != nil { 940 return err 941 } 942 943 targetFile := target.(*localFile) 944 if err := unix.Linkat(targetFile.file.FD(), "", l.file.FD(), newName, unix.AT_EMPTY_PATH); err != nil { 945 return extractErrno(err) 946 } 947 return nil 948 } 949 950 // Mknod implements p9.File. 951 func (l *localFile) Mknod(name string, mode p9.FileMode, _ uint32, _ uint32, uid p9.UID, gid p9.GID) (p9.QID, error) { 952 if err := l.checkROMount(); err != nil { 953 return p9.QID{}, err 954 } 955 956 // From mknod(2) man page: 957 // "EPERM: [...] if the filesystem containing pathname does not support 958 // the type of node requested." 959 if mode.FileType() != p9.ModeRegular { 960 return p9.QID{}, unix.EPERM 961 } 962 963 // Allow Mknod to create regular files. 964 if err := unix.Mknodat(l.file.FD(), name, uint32(mode), 0); err != nil { 965 return p9.QID{}, err 966 } 967 cu := cleanup.Make(func() { 968 // Best effort attempt to remove the file in case of failure. 969 if err := unix.Unlinkat(l.file.FD(), name, 0); err != nil { 970 log.Warningf("error unlinking file %q after failure: %v", path.Join(l.hostPath, name), err) 971 } 972 }) 973 defer cu.Clean() 974 975 // Open file to change ownership and stat it. 976 child, err := fd.OpenAt(l.file, name, unix.O_PATH|openFlags, 0) 977 if err != nil { 978 return p9.QID{}, extractErrno(err) 979 } 980 defer child.Close() 981 982 stat, err := setOwnerIfNeeded(child.FD(), uid, gid) 983 if err != nil { 984 return p9.QID{}, extractErrno(err) 985 } 986 987 cu.Release() 988 return l.attachPoint.makeQID(&stat), nil 989 } 990 991 // UnlinkAt implements p9.File. 992 func (l *localFile) UnlinkAt(name string, flags uint32) error { 993 if err := l.checkROMount(); err != nil { 994 return err 995 } 996 997 if err := unix.Unlinkat(l.file.FD(), name, int(flags)); err != nil { 998 return extractErrno(err) 999 } 1000 return nil 1001 } 1002 1003 // Readdir implements p9.File. 1004 func (l *localFile) Readdir(offset uint64, count uint32) ([]p9.Dirent, error) { 1005 if l.mode != p9.ReadOnly && l.mode != p9.ReadWrite { 1006 return nil, unix.EBADF 1007 } 1008 if !l.isOpen() { 1009 return nil, unix.EBADF 1010 } 1011 1012 // Readdirnames is a cursor over directories, so seek back to 0 to ensure it's 1013 // reading all directory contents. Take a lock because this operation is 1014 // stateful. 1015 l.readDirMu.Lock() 1016 defer l.readDirMu.Unlock() 1017 1018 skip := uint64(0) 1019 1020 // Check if the file is at the correct position already. If not, seek to 1021 // the beginning and read the entire directory again. We always seek if 1022 // offset is 0, since this is side-effectual (equivalent to rewinddir(3), 1023 // which causes the directory stream to resynchronize with the directory's 1024 // current contents). 1025 if l.lastDirentOffset != offset || offset == 0 { 1026 if _, err := unix.Seek(l.file.FD(), 0, 0); err != nil { 1027 return nil, extractErrno(err) 1028 } 1029 skip = offset 1030 } 1031 1032 dirents, err := l.readDirent(l.file.FD(), offset, count, skip) 1033 if err == nil { 1034 // On success, remember the offset that was returned at the current 1035 // position. 1036 l.lastDirentOffset = offset + uint64(len(dirents)) 1037 } else { 1038 // On failure, the state is unknown, force call to seek() next time. 1039 l.lastDirentOffset = math.MaxUint64 1040 } 1041 return dirents, err 1042 } 1043 1044 func (l *localFile) readDirent(f int, offset uint64, count uint32, skip uint64) ([]p9.Dirent, error) { 1045 var dirents []p9.Dirent 1046 1047 // Limit 'count' to cap the slice size that is returned. 1048 const maxCount = 100000 1049 if count > maxCount { 1050 count = maxCount 1051 } 1052 1053 // Pre-allocate buffers that will be reused to get partial results. 1054 direntsBuf := make([]byte, 8192) 1055 names := make([]string, 0, 100) 1056 1057 end := offset + uint64(count) 1058 for offset < end { 1059 dirSize, err := unix.ReadDirent(f, direntsBuf) 1060 if err != nil { 1061 return dirents, err 1062 } 1063 if dirSize <= 0 { 1064 return dirents, nil 1065 } 1066 1067 names := names[:0] 1068 _, _, names = unix.ParseDirent(direntsBuf[:dirSize], -1, names) 1069 1070 // Skip over entries that the caller is not interested in. 1071 if skip > 0 { 1072 if skip > uint64(len(names)) { 1073 skip -= uint64(len(names)) 1074 names = names[:0] 1075 } else { 1076 names = names[skip:] 1077 skip = 0 1078 } 1079 } 1080 for _, name := range names { 1081 stat, err := statAt(l.file.FD(), name) 1082 if err != nil { 1083 log.Warningf("Readdir is skipping file with failed stat %q, err: %v", l.hostPath, err) 1084 continue 1085 } 1086 qid := l.attachPoint.makeQID(&stat) 1087 offset++ 1088 dirents = append(dirents, p9.Dirent{ 1089 QID: qid, 1090 Type: qid.Type, 1091 Name: name, 1092 Offset: offset, 1093 }) 1094 } 1095 } 1096 return dirents, nil 1097 } 1098 1099 // Readlink implements p9.File. 1100 func (l *localFile) Readlink() (string, error) { 1101 // Shamelessly stolen from os.Readlink (added upper bound limit to buffer). 1102 const limit = 1024 * 1024 1103 for len := 128; len < limit; len *= 2 { 1104 b := make([]byte, len) 1105 n, err := unix.Readlinkat(l.file.FD(), "", b) 1106 if err != nil { 1107 return "", extractErrno(err) 1108 } 1109 if n < len { 1110 return string(b[:n]), nil 1111 } 1112 } 1113 return "", unix.ENOMEM 1114 } 1115 1116 // Flush implements p9.File. 1117 func (l *localFile) Flush() error { 1118 return nil 1119 } 1120 1121 // Connect implements p9.File. 1122 func (l *localFile) Connect(flags p9.ConnectFlags) (*fd.FD, error) { 1123 if !l.attachPoint.conf.HostUDS { 1124 return nil, unix.ECONNREFUSED 1125 } 1126 1127 // TODO(github.com/SagerNet/issue/1003): Due to different app vs replacement 1128 // mappings, the app path may have fit in the sockaddr, but we can't 1129 // fit f.path in our sockaddr. We'd need to redirect through a shorter 1130 // path in order to actually connect to this socket. 1131 const UNIX_PATH_MAX = 108 // defined in afunix.h 1132 if len(l.hostPath) > UNIX_PATH_MAX { 1133 return nil, unix.ECONNREFUSED 1134 } 1135 1136 var stype int 1137 switch flags { 1138 case p9.StreamSocket: 1139 stype = unix.SOCK_STREAM 1140 case p9.DgramSocket: 1141 stype = unix.SOCK_DGRAM 1142 case p9.SeqpacketSocket: 1143 stype = unix.SOCK_SEQPACKET 1144 default: 1145 return nil, unix.ENXIO 1146 } 1147 1148 f, err := unix.Socket(unix.AF_UNIX, stype, 0) 1149 if err != nil { 1150 return nil, err 1151 } 1152 1153 if err := unix.SetNonblock(f, true); err != nil { 1154 _ = unix.Close(f) 1155 return nil, err 1156 } 1157 1158 sa := unix.SockaddrUnix{Name: l.hostPath} 1159 if err := unix.Connect(f, &sa); err != nil { 1160 _ = unix.Close(f) 1161 return nil, err 1162 } 1163 1164 return fd.New(f), nil 1165 } 1166 1167 // Close implements p9.File. 1168 func (l *localFile) Close() error { 1169 l.mode = invalidMode 1170 err := l.file.Close() 1171 l.file = nil 1172 return err 1173 } 1174 1175 func (l *localFile) isOpen() bool { 1176 return l.mode != invalidMode 1177 } 1178 1179 // Renamed implements p9.Renamed. 1180 func (l *localFile) Renamed(newDir p9.File, newName string) { 1181 l.hostPath = join(newDir.(*localFile).hostPath, newName) 1182 } 1183 1184 // extractErrno tries to determine the errno. 1185 func extractErrno(err error) unix.Errno { 1186 if err == nil { 1187 // This should never happen. The likely result will be that 1188 // some user gets the frustrating "error: SUCCESS" message. 1189 log.Warningf("extractErrno called with nil error!") 1190 return 0 1191 } 1192 1193 switch err { 1194 case os.ErrNotExist: 1195 return unix.ENOENT 1196 case os.ErrExist: 1197 return unix.EEXIST 1198 case os.ErrPermission: 1199 return unix.EACCES 1200 case os.ErrInvalid: 1201 return unix.EINVAL 1202 } 1203 1204 // See if it's an errno or a common wrapped error. 1205 switch e := err.(type) { 1206 case unix.Errno: 1207 return e 1208 case *os.PathError: 1209 return extractErrno(e.Err) 1210 case *os.LinkError: 1211 return extractErrno(e.Err) 1212 case *os.SyscallError: 1213 return extractErrno(e.Err) 1214 } 1215 1216 // Fall back to EIO. 1217 log.Debugf("Unknown error: %v, defaulting to EIO", err) 1218 return unix.EIO 1219 } 1220 1221 func (l *localFile) checkROMount() error { 1222 if conf := l.attachPoint.conf; conf.ROMount { 1223 return unix.EROFS 1224 } 1225 return nil 1226 } 1227 1228 func (l *localFile) MultiGetAttr(names []string) ([]p9.FullStat, error) { 1229 stats := make([]p9.FullStat, 0, len(names)) 1230 1231 if len(names) > 0 && names[0] == "" { 1232 qid, valid, attr, err := l.GetAttr(p9.AttrMask{}) 1233 if err != nil { 1234 return nil, err 1235 } 1236 stats = append(stats, p9.FullStat{ 1237 QID: qid, 1238 Valid: valid, 1239 Attr: attr, 1240 }) 1241 names = names[1:] 1242 } 1243 1244 parent := l.file.FD() 1245 for _, name := range names { 1246 child, err := unix.Openat(parent, name, openFlags|unix.O_PATH, 0) 1247 if parent != l.file.FD() { 1248 // Parent is no longer needed. 1249 _ = unix.Close(parent) 1250 parent = -1 1251 } 1252 if err != nil { 1253 if errors.Is(err, unix.ENOENT) { 1254 // No pont in continuing any further. 1255 break 1256 } 1257 return nil, err 1258 } 1259 1260 var stat unix.Stat_t 1261 if err := unix.Fstat(child, &stat); err != nil { 1262 _ = unix.Close(child) 1263 return nil, err 1264 } 1265 valid, attr := l.fillAttr(&stat) 1266 stats = append(stats, p9.FullStat{ 1267 QID: l.attachPoint.makeQID(&stat), 1268 Valid: valid, 1269 Attr: attr, 1270 }) 1271 if (stat.Mode & unix.S_IFMT) != unix.S_IFDIR { 1272 // Doesn't need to continue if entry is not a dir. Including symlinks 1273 // that cannot be followed. 1274 _ = unix.Close(child) 1275 break 1276 } 1277 parent = child 1278 } 1279 if parent != -1 && parent != l.file.FD() { 1280 _ = unix.Close(parent) 1281 } 1282 return stats, nil 1283 }