github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/cmd/grail-fuse/gfs/gfs.go (about) 1 // Package gfs implements FUSE on top oh grailfile. Function Main is the entry 2 // point. 3 package gfs 4 5 import ( 6 "context" 7 "crypto/sha512" 8 "encoding/binary" 9 "fmt" 10 "io" 11 "os" 12 "runtime/debug" 13 "sync" 14 "sync/atomic" 15 "syscall" 16 "time" 17 "unsafe" 18 19 "github.com/Schaudge/grailbase/errors" 20 "github.com/Schaudge/grailbase/file" 21 "github.com/Schaudge/grailbase/log" 22 gunsafe "github.com/Schaudge/grailbase/unsafe" 23 "github.com/hanwen/go-fuse/v2/fs" 24 "github.com/hanwen/go-fuse/v2/fuse" 25 ) 26 27 // Inode represents a file or a directory. 28 type inode struct { 29 fs.Inode 30 // full pathname, such as "s3://bucket/key0/key1" 31 path string 32 // dir entry as stored in the parent directory. 33 ent fuse.DirEntry 34 35 mu sync.Mutex // guards the following fields. 36 stat cachedStat // TODO: Remove this since we're now using kernel caching. 37 38 // nDirStreamRef tracks the usage of this inode in DirStreams. It is used 39 // to decide whether an inode can be reused to service LOOKUP 40 // operations. To handle READDIRPLUS, go-fuse interleaves LOOKUP calls for 41 // each directory entry. We allow the inode associated with the previous 42 // directory entry to be used in LOOKUP to avoid costly API calls. 43 // 44 // Because an inode can be the previous entry in multiple DirStreams, we 45 // maintain a reference count. 46 // 47 // It is possible for the inode to be forgotten, e.g. when the kernel is 48 // low on memory, before the LOOKUP call. If this happens, LOOKUP will not 49 // be able to reuse it. This seems to happen rarely, if at all, in 50 // practice. 51 nDirStreamRef int32 52 } 53 54 // Amount of time to cache directory entries and file stats (size, mtime). 55 const cacheExpiration = 5 * time.Minute 56 57 // RootInode is a singleton inode created for the root mount point. 58 type rootInode struct { 59 inode 60 // The context to be used for all file operations. It's vcontext.Background() 61 // in Grail environments. 62 // TODO(josh): Consider removing and using operation-specific contexts instead (like readdir). 63 ctx context.Context 64 // Directory for storing tmp files. 65 tmpDir string 66 } 67 68 // Handle represents an open file handle. 69 type handle struct { 70 // The file that the handle belongs to 71 inode *inode 72 // Open mode bits. O_WRONLY, etc. 73 openMode uint32 74 // Size passed to Setattr, if any. -1 if not set. 75 requestedSize int64 76 // Remembers the result of the first Flush. If Flush is called multiple times 77 // they will return this code. 78 closeErrno syscall.Errno 79 80 // At most one of the following three will be set. Initialized lazily on 81 // first Read or Write. 82 dw *directWrite // O_WRONLY|O_TRUNC, or O_WRONLY for a new file. 83 dr *directRead // O_RDONLY. 84 tmp *tmpIO // everything else, e.g., O_RDWR or O_APPEND. 85 } 86 87 // openMode is a bitmap of O_RDONLY, O_APPEND, etc. 88 func newHandle(inode *inode, openMode uint32) *handle { 89 return &handle{inode: inode, openMode: openMode, requestedSize: -1} 90 } 91 92 // DirectWrite is part of open file handle. It uploads data directly to the remote 93 // file. Used when creating a new file, or overwriting an existing file with 94 // O_WRONLY|O_TRUNC. 95 type directWrite struct { 96 fp file.File 97 w io.Writer 98 // The next expected write offset. Calling Write on a wrong offset results in 99 // error (w doesn't implement a seeker). 100 off int64 101 } 102 103 // DirectRead is part of open file handle. It is used when reading a file 104 // readonly. 105 type directRead struct { 106 fp file.File 107 r io.ReadSeeker 108 } 109 110 // TmpIO is part of open file handle. It writes data to a file in the local file 111 // system. On Flush (i.e., close), the file contents are copied to the remote 112 // file. It is used w/ O_RDWR, O_APPEND, etc. 113 type tmpIO struct { 114 fp *os.File // refers to a file in -tmp-dir. 115 } 116 117 // CachedStat is stored in inode and a directory entry to provide quick access 118 // to basic stats. 119 type cachedStat struct { 120 expiration time.Time 121 size int64 122 modTime time.Time 123 } 124 125 func downCast(n *fs.Inode) *inode { 126 nn := (*inode)(unsafe.Pointer(n)) 127 if nn.path == "" { 128 log.Panicf("not an inode: %+v", n) 129 } 130 return nn 131 } 132 133 var ( 134 _ fs.InodeEmbedder = (*inode)(nil) 135 136 _ fs.NodeAccesser = (*inode)(nil) 137 _ fs.NodeCreater = (*inode)(nil) 138 _ fs.NodeGetattrer = (*inode)(nil) 139 _ fs.NodeLookuper = (*inode)(nil) 140 _ fs.NodeMkdirer = (*inode)(nil) 141 _ fs.NodeOpener = (*inode)(nil) 142 _ fs.NodeReaddirer = (*inode)(nil) 143 _ fs.NodeRmdirer = (*inode)(nil) 144 _ fs.NodeSetattrer = (*inode)(nil) 145 _ fs.NodeUnlinker = (*inode)(nil) 146 147 _ fs.FileFlusher = (*handle)(nil) 148 _ fs.FileFsyncer = (*handle)(nil) 149 _ fs.FileLseeker = (*handle)(nil) 150 _ fs.FileReader = (*handle)(nil) 151 _ fs.FileReleaser = (*handle)(nil) 152 _ fs.FileWriter = (*handle)(nil) 153 ) 154 155 func newAttr(ino uint64, mode uint32, size uint64, optionalMtime time.Time) (attr fuse.Attr) { 156 const blockSize = 1 << 20 157 attr.Ino = ino 158 attr.Mode = mode 159 attr.Nlink = 1 160 attr.Size = size 161 attr.Blocks = (attr.Size-1)/blockSize + 1 162 if !optionalMtime.IsZero() { 163 attr.SetTimes(nil, &optionalMtime, nil) 164 } 165 return 166 } 167 168 // GetModeBits produces the persistent mode bits so that the kernel can 169 // distinguish regular files from directories. 170 func getModeBits(isDir bool) uint32 { 171 mode := uint32(0) 172 if isDir { 173 mode |= syscall.S_IFDIR | 0755 174 } else { 175 mode |= syscall.S_IFREG | 0644 176 } 177 return mode 178 } 179 180 // GetIno produces a fake inode number by hashing the path. 181 func getIno(path string) uint64 { 182 h := sha512.Sum512_256(gunsafe.StringToBytes(path)) 183 return binary.LittleEndian.Uint64(h[:8]) 184 } 185 186 // GetFileName extracts the filename part of the path. "dir" is the directory 187 // that the file belongs in. 188 func getFileName(dir *inode, path string) string { 189 if dir.IsRoot() { 190 return path[len(dir.path):] 191 } 192 return path[len(dir.path)+1:] // +1 to remove '/'. 193 } 194 195 func errToErrno(err error) syscall.Errno { 196 if err == nil { 197 return 0 198 } 199 log.Debug.Printf("error %v: stack=%s", err, string(debug.Stack())) 200 switch { 201 case err == nil: 202 return 0 203 case errors.Is(errors.Timeout, err): 204 return syscall.ETIMEDOUT 205 case errors.Is(errors.Canceled, err): 206 return syscall.EINTR 207 case errors.Is(errors.NotExist, err): 208 return syscall.ENOENT 209 case errors.Is(errors.Exists, err): 210 return syscall.EEXIST 211 case errors.Is(errors.NotAllowed, err): 212 return syscall.EACCES 213 case errors.Is(errors.Integrity, err): 214 return syscall.EIO 215 case errors.Is(errors.Invalid, err): 216 return syscall.EINVAL 217 case errors.Is(errors.Precondition, err), errors.Is(errors.Unavailable, err): 218 return syscall.EAGAIN 219 case errors.Is(errors.Net, err): 220 return syscall.ENETUNREACH 221 case errors.Is(errors.TooManyTries, err): 222 log.Error.Print(err) 223 return syscall.EINVAL 224 } 225 return fs.ToErrno(err) 226 } 227 228 // Root reports the inode of the root mountpoint. 229 func (n *inode) root() *rootInode { return n.Root().Operations().(*rootInode) } 230 231 // Ctx reports the context passed from the application when mounting the 232 // filesystem. 233 func (n *inode) ctx() context.Context { return n.root().ctx } 234 235 // addDirStreamRef adds a single reference to this inode. It must be eventually 236 // followed by a dropRef. 237 func (n *inode) addDirStreamRef() { 238 _ = atomic.AddInt32(&n.nDirStreamRef, 1) 239 } 240 241 // dropDirStreamRef drops a single reference to this inode. 242 func (n *inode) dropDirStreamRef() { 243 if x := atomic.AddInt32(&n.nDirStreamRef, -1); x < 0 { 244 panic("negative reference count; unmatched drop") 245 } 246 } 247 248 // previousOfAnyDirStream returns true iff the inode is the previous entry 249 // returned by any outstanding DirStream. 250 func (n *inode) previousOfAnyDirStream() bool { 251 return atomic.LoadInt32(&n.nDirStreamRef) > 0 252 } 253 254 // Access is called to implement access(2). 255 func (n *inode) Access(_ context.Context, mask uint32) syscall.Errno { 256 // TODO(saito) I'm not sure returning 0 blindly is ok here. 257 log.Debug.Printf("setattr %s: mask=%x", n.path, mask) 258 return 0 259 } 260 261 // Setattr is called to change file attributes. This function only supports 262 // changing the size. 263 func (n *inode) Setattr(_ context.Context, fhi fs.FileHandle, in *fuse.SetAttrIn, out *fuse.AttrOut) syscall.Errno { 264 n.mu.Lock() 265 defer n.mu.Unlock() 266 267 usize, ok := in.GetSize() 268 if !ok { 269 // We don't support setting other attributes now. 270 return 0 271 } 272 size := int64(usize) 273 274 if fhi != nil { 275 fh := fhi.(*handle) 276 switch { 277 case fh.dw != nil: 278 if size == fh.dw.off { 279 return 0 280 } 281 log.Error.Printf("setattr %s: setting size to %d in directio mode not supported (request: %+v)", n.path, size, in) 282 return syscall.ENOSYS 283 case fh.dr != nil: 284 log.Error.Printf("setattr %s: readonly", n.path) 285 return syscall.EPERM 286 case fh.tmp != nil: 287 return errToErrno(fh.tmp.fp.Truncate(size)) 288 default: 289 fh.requestedSize = size 290 return 0 291 } 292 } 293 294 if size != 0 { 295 log.Error.Printf("setattr %s: setting size to nonzero value (%d) not supported", n.path, size) 296 return syscall.ENOSYS 297 } 298 ctx := n.ctx() 299 fp, err := file.Create(ctx, n.path) 300 if err != nil { 301 log.Error.Printf("setattr %s: %v", n.path, err) 302 return errToErrno(err) 303 } 304 if err := fp.Close(ctx); err != nil { 305 log.Error.Printf("setattr %s: %v", n.path, err) 306 return errToErrno(err) 307 } 308 return 0 309 } 310 311 func (n *inode) Getattr(_ context.Context, fhi fs.FileHandle, out *fuse.AttrOut) syscall.Errno { 312 ctx := n.ctx() 313 if n.ent.Ino == 0 || n.ent.Mode == 0 { 314 log.Panicf("node %s: ino or mode unset: %+v", n.path, n) 315 } 316 if n.IsDir() { 317 log.Debug.Printf("getattr %s: directory", n.path) 318 out.Attr = newAttr(n.ent.Ino, n.ent.Mode, 0, time.Time{}) 319 return 0 320 } 321 322 var fh *handle 323 if fhi != nil { 324 fh = fhi.(*handle) 325 } 326 327 n.mu.Lock() 328 defer n.mu.Unlock() 329 if fh != nil { 330 if err := fh.maybeInitIO(); err != nil { 331 return errToErrno(err) 332 } 333 if t := fh.tmp; t != nil { 334 log.Debug.Printf("getattr %s: tmp", n.path) 335 stat, err := t.fp.Stat() 336 if err != nil { 337 log.Printf("getattr %s (%s): %v", n.path, t.fp.Name(), err) 338 return errToErrno(err) 339 } 340 out.Attr = newAttr(n.ent.Ino, n.ent.Mode, uint64(stat.Size()), stat.ModTime()) 341 return 0 342 } 343 if fh.dw != nil { 344 out.Attr = newAttr(n.ent.Ino, n.ent.Mode, uint64(n.stat.size), n.stat.modTime) 345 return 0 346 } 347 // fall through 348 } 349 stat, err := n.getCachedStat(ctx) 350 if err != nil { 351 log.Printf("getattr %s: err %v", n.path, err) 352 return errToErrno(err) 353 } 354 out.Attr = newAttr(n.ent.Ino, n.ent.Mode, uint64(stat.size), stat.modTime) 355 log.Debug.Printf("getattr %s: out %+v", n.path, out) 356 return 0 357 } 358 359 func (n *inode) getCachedStat(ctx context.Context) (cachedStat, error) { 360 now := time.Now() 361 if now.After(n.stat.expiration) { 362 log.Debug.Printf("getcachedstat %s: cache miss", n.path) 363 info, err := file.Stat(ctx, n.path) 364 if err != nil { 365 log.Printf("getcachedstat %s: err %v", n.path, err) 366 return cachedStat{}, err 367 } 368 n.stat = cachedStat{ 369 expiration: now.Add(cacheExpiration), 370 size: info.Size(), 371 modTime: info.ModTime(), 372 } 373 } else { 374 log.Debug.Printf("getcachedstat %s: cache hit %+v now %v", n.path, n.stat, now) 375 } 376 return n.stat, nil 377 } 378 379 // MaybeInitIO is called on the first call to Read or Write after open. It 380 // initializes either the directio uploader or a tempfile. 381 // 382 // REQUIRES: fh.inode.mu is locked 383 func (fh *handle) maybeInitIO() error { 384 n := fh.inode 385 if fh.dw != nil || fh.dr != nil || fh.tmp != nil { 386 return nil 387 } 388 if (fh.openMode & fuse.O_ANYWRITE) == 0 { 389 // Readonly handle should have fh.direct set at the time of Open. 390 log.Panicf("open %s: uninitialized readonly handle", n.path) 391 } 392 if fh.inode == nil { 393 log.Panicf("open %s: nil inode: %+v", n.path, fh) 394 } 395 ctx := n.ctx() 396 if (fh.openMode&syscall.O_RDWR) != syscall.O_RDWR && 397 (fh.requestedSize == 0 || (fh.openMode&syscall.O_TRUNC == syscall.O_TRUNC)) { 398 // We are fully overwriting the file. Do that w/o a local tmpfile. 399 log.Debug.Printf("open %s: direct IO", n.path) 400 fp, err := file.Create(ctx, n.path) 401 if err != nil { 402 return err 403 } 404 fh.dw = &directWrite{fp: fp, w: fp.Writer(ctx)} 405 return nil 406 } 407 // Do all reads/writes on a local tmp file, and copy it to the remote file on 408 // close. 409 log.Debug.Printf("open %s: tmp IO", n.path) 410 in, err := file.Open(ctx, n.path) 411 if err != nil { 412 log.Error.Printf("open %s: %v", n.path, err) 413 return err 414 } 415 tmpPath := file.Join(n.root().tmpDir, fmt.Sprintf("%08x", n.ent.Ino)) 416 tmp, err := os.OpenFile(tmpPath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0600) 417 if err != nil { 418 log.Error.Printf("create %s (open %s): %v", tmpPath, n.path, err) 419 _ = in.Close(ctx) 420 return errToErrno(err) 421 } 422 inSize, err := io.Copy(tmp, in.Reader(ctx)) 423 log.Debug.Printf("copy %s->%s: n+%d, %v", n.path, tmp.Name(), inSize, err) 424 if err != nil { 425 _ = in.Close(ctx) 426 _ = tmp.Close() 427 return errToErrno(err) 428 } 429 if err := in.Close(ctx); err != nil { 430 _ = tmp.Close() 431 return errToErrno(err) 432 } 433 now := time.Now() 434 n.stat.expiration = now.Add(cacheExpiration) 435 n.stat.size = inSize 436 n.stat.modTime = now 437 fh.tmp = &tmpIO{ 438 fp: tmp, 439 } 440 return nil 441 } 442 443 func (fh *handle) Read(_ context.Context, dest []byte, off int64) (fuse.ReadResult, syscall.Errno) { 444 n := fh.inode 445 readDirect := func() (fuse.ReadResult, syscall.Errno) { 446 d := fh.dr 447 if d == nil { 448 return nil, syscall.EINVAL 449 } 450 log.Debug.Printf("read %s(fh=%p): off=%d seek start", n.path, fh, off) 451 newOff, err := d.r.Seek(off, io.SeekStart) 452 log.Debug.Printf("read %s(fh=%p): off=%d seek end", n.path, fh, off) 453 if err != nil { 454 return nil, errToErrno(err) 455 } 456 if newOff != off { 457 log.Panicf("%d <-> %d", newOff, off) 458 } 459 460 nByte, err := d.r.Read(dest) 461 log.Debug.Printf("read %s(fh=%p): off=%d, nbyte=%d, err=%v", n.path, fh, off, nByte, err) 462 if err != nil { 463 if err != io.EOF { 464 return nil, errToErrno(err) 465 } 466 } 467 return fuse.ReadResultData(dest[:nByte]), 0 468 } 469 470 readTmp := func() (fuse.ReadResult, syscall.Errno) { 471 t := fh.tmp 472 nByte, err := t.fp.ReadAt(dest, off) 473 if err != nil { 474 if err != io.EOF { 475 return nil, errToErrno(err) 476 } 477 } 478 return fuse.ReadResultData(dest[:nByte]), 0 479 } 480 481 n.mu.Lock() 482 defer n.mu.Unlock() 483 if err := fh.maybeInitIO(); err != nil { 484 //return fuse.ReadResult{}, errToErrno(err) 485 return nil, errToErrno(err) 486 } 487 switch { 488 case fh.dr != nil: 489 return readDirect() 490 case fh.tmp != nil: 491 return readTmp() 492 default: 493 log.Error.Printf("read %s: reading unopened or writeonly file", n.path) 494 return nil, syscall.EBADF 495 } 496 } 497 498 func (fh *handle) Lseek(ctx context.Context, off uint64, whence uint32) (uint64, syscall.Errno) { 499 const ( 500 // Copied from https://github.com/torvalds/linux/blob/a050a6d2b7e80ca52b2f4141eaf3420d201b72b3/tools/include/uapi/linux/fs.h#L43-L47. 501 SEEK_DATA = 3 502 SEEK_HOLE = 4 503 ) 504 switch whence { 505 case SEEK_DATA: 506 return off, 0 // We don't support holes so current offset is correct. 507 case SEEK_HOLE: 508 stat, err := fh.inode.getCachedStat(ctx) 509 if err != nil { 510 log.Error.Printf("lseek %s: stat: %v", fh.inode.path, err) 511 return 0, errToErrno(err) 512 } 513 return uint64(stat.size), 0 514 } 515 log.Error.Printf("lseek %s: unimplemented whence: %d", fh.inode.path, whence) 516 return 0, syscall.ENOSYS 517 } 518 519 func (fh *handle) Write(_ context.Context, dest []byte, off int64) (uint32, syscall.Errno) { 520 n := fh.inode 521 tmpWrite := func() (uint32, syscall.Errno) { 522 nByte, err := fh.tmp.fp.WriteAt(dest, off) 523 if err != nil { 524 log.Error.Printf("write %s: size=%d, off=%d: %v", n.path, len(dest), off, err) 525 return 0, errToErrno(err) 526 } 527 return uint32(nByte), 0 528 } 529 530 directWrite := func() (uint32, syscall.Errno) { 531 d := fh.dw 532 if d.off != off { 533 log.Error.Printf("write %s: offset mismatch (expect %d, got %d)", n.path, d.off, off) 534 return 0, syscall.EINVAL 535 } 536 if d.w == nil { 537 // closed already 538 log.Printf("write %s: already closed", n.path) 539 return 0, syscall.EBADF 540 } 541 nByte, err := d.w.Write(dest) 542 if err != nil { 543 if nByte > 0 { 544 panic(n) 545 } 546 return 0, errToErrno(err) 547 } 548 d.off += int64(nByte) 549 log.Debug.Printf("write %s: done %d bytes", n.path, nByte) 550 return uint32(nByte), 0 551 } 552 553 n.mu.Lock() 554 defer n.mu.Unlock() 555 log.Debug.Printf("write %s: %d bytes, off=%d", n.path, len(dest), off) 556 if err := fh.maybeInitIO(); err != nil { 557 return 0, errToErrno(err) 558 } 559 switch { 560 case fh.dw != nil: 561 return directWrite() 562 case fh.tmp != nil: 563 return tmpWrite() 564 default: 565 // file descriptor already closed 566 log.Error.Printf("write %s: writing after close", n.path) 567 return 0, syscall.EBADF 568 } 569 } 570 571 func (fh *handle) Fsync(_ context.Context, _ uint32) syscall.Errno { 572 n := fh.inode 573 n.mu.Lock() 574 defer n.mu.Unlock() 575 if d := fh.dw; d != nil { 576 n := fh.inode 577 // There's not much we can do, but returning ENOSYS breaks too many apps. 578 now := time.Now() 579 n.stat.expiration = now.Add(cacheExpiration) 580 n.stat.size = d.off 581 n.stat.modTime = now 582 log.Debug.Printf("fsync %s: update stats: stat=%v", n.path, n.stat) 583 } 584 return 0 585 } 586 587 // Release is called just before the inode is dropped from the kernel memory. 588 // Return value is unused. 589 func (fh *handle) Release(_ context.Context) syscall.Errno { 590 n := fh.inode 591 n.mu.Lock() 592 defer n.mu.Unlock() 593 switch { 594 case fh.tmp != nil: 595 if fh.tmp.fp != nil { 596 log.Panicf("%s: release called w/o flush", n.path) 597 } 598 case fh.dw != nil: 599 if fh.dw.fp != nil || fh.dw.w != nil { 600 log.Panicf("%s: release called w/o flush", n.path) 601 } 602 default: 603 if fh.dr != nil { 604 // Readonly handles are closed on the last release. 605 _ = fh.dr.fp.Close(n.ctx()) 606 } 607 } 608 return 0 609 } 610 611 // Flush is called on close(2). It may be called multiple times when the file 612 // descriptor is duped. 613 // 614 // TODO(saito) We don't support dups now. We close the underlying filestream on 615 // the first close and subsequent flush calls will do nothing. 616 func (fh *handle) Flush(_ context.Context) syscall.Errno { 617 n := fh.inode 618 ctx := n.ctx() 619 620 flushTmpAndUnlock := func() syscall.Errno { 621 t := fh.tmp 622 mu := &n.mu 623 defer func() { 624 if mu != nil { 625 mu.Unlock() 626 } 627 }() 628 if t.fp == nil { 629 mu.Unlock() 630 return fh.closeErrno 631 } 632 out, err := file.Create(ctx, n.path) 633 if err != nil { 634 log.Error.Printf("flush %s (create): err=%v", n.path, err) 635 fh.closeErrno = errToErrno(err) 636 _ = t.fp.Close() 637 mu.Unlock() 638 return fh.closeErrno 639 } 640 defer func() { 641 if out != nil { 642 _ = out.Close(ctx) 643 } 644 if t.fp != nil { 645 _ = t.fp.Close() 646 t.fp = nil 647 } 648 }() 649 650 newOff, err := t.fp.Seek(0, io.SeekStart) 651 if err != nil { 652 log.Error.Printf("flush %s (seek): err=%v", n.path, err) 653 fh.closeErrno = errToErrno(err) 654 return fh.closeErrno 655 } 656 if newOff != 0 { 657 log.Panicf("newoff %d", newOff) 658 } 659 660 nByte, err := io.Copy(out.Writer(ctx), t.fp) 661 if err != nil { 662 log.Error.Printf("flush %s (copy): err=%v", n.path, err) 663 fh.closeErrno = errToErrno(err) 664 return fh.closeErrno 665 } 666 errp := errors.Once{} 667 errp.Set(t.fp.Close()) 668 errp.Set(out.Close(ctx)) 669 out = nil 670 t.fp = nil 671 if err := errp.Err(); err != nil { 672 fh.closeErrno = errToErrno(err) 673 log.Error.Printf("flush %s (close): err=%v", n.path, err) 674 return fh.closeErrno 675 } 676 677 now := time.Now() 678 n.stat.expiration = now.Add(cacheExpiration) 679 n.stat.size = nByte 680 n.stat.modTime = now 681 682 closeErrno := fh.closeErrno 683 mu.Unlock() 684 mu = nil 685 return closeErrno 686 } 687 688 flushDirectAndUnlock := func() syscall.Errno { 689 mu := &n.mu 690 defer func() { 691 if mu != nil { 692 mu.Unlock() 693 } 694 }() 695 d := fh.dw 696 if d.fp == nil { 697 return fh.closeErrno 698 } 699 700 err := d.fp.Close(ctx) 701 fh.closeErrno = errToErrno(err) 702 log.Debug.Printf("flush %s fh=%p, err=%v", n.path, fh, err) 703 if d.w != nil { 704 now := time.Now() 705 n.stat.expiration = now.Add(cacheExpiration) 706 n.stat.size = d.off 707 n.stat.modTime = now 708 } 709 d.fp = nil 710 d.w = nil 711 closeErrno := fh.closeErrno 712 mu.Unlock() 713 mu = nil 714 return closeErrno 715 } 716 n.mu.Lock() 717 switch { 718 case fh.tmp != nil: 719 return flushTmpAndUnlock() 720 case fh.dw != nil: 721 return flushDirectAndUnlock() 722 } 723 n.mu.Unlock() 724 return 0 725 } 726 727 // Create is called to create a new file. 728 func (n *inode) Create(ctx context.Context, name string, flags uint32, mode uint32, 729 out *fuse.EntryOut) (*fs.Inode, fs.FileHandle, uint32, syscall.Errno) { 730 newPath := file.Join(n.path, name) 731 childNode := &inode{ 732 path: newPath, 733 ent: fuse.DirEntry{ 734 Name: name, 735 Ino: getIno(newPath), 736 Mode: getModeBits(false)}} 737 childInode := n.NewInode(ctx, childNode, fs.StableAttr{ 738 Mode: childNode.ent.Mode, 739 Ino: childNode.ent.Ino, 740 }) 741 fh := newHandle(childNode, syscall.O_WRONLY|syscall.O_CREAT|syscall.O_TRUNC) 742 fh.requestedSize = 0 743 log.Debug.Printf("create %s: (mode %x)", n.path, mode) 744 out.Attr = newAttr(n.ent.Ino, n.ent.Mode, 0, time.Time{}) 745 return childInode, fh, 0, 0 746 } 747 748 // Open opens an existing file. 749 func (n *inode) Open(_ context.Context, mode uint32) (fs.FileHandle, uint32, syscall.Errno) { 750 n.mu.Lock() 751 defer n.mu.Unlock() 752 ctx := n.ctx() 753 if n.IsRoot() { 754 // The entries under the root must be buckets, so we can't open it directly. 755 log.Error.Printf("open %s: cannot open a file under root", n.path) 756 return nil, 0, syscall.EINVAL 757 } 758 _, dirInode := n.Parent() 759 if dirInode == nil { 760 log.Panicf("open %s: parent dir does't exist", n.path) 761 } 762 if (mode & fuse.O_ANYWRITE) == 0 { 763 fp, err := file.Open(n.ctx(), n.path) 764 if err != nil { 765 log.Error.Printf("open %s (mode %x): %v", n.path, mode, err) 766 return nil, 0, errToErrno(err) 767 } 768 fh := newHandle(n, mode) 769 fh.dr = &directRead{fp: fp, r: fp.Reader(ctx)} 770 log.Debug.Printf("open %s: mode %x, fh %p", n.path, mode, fh) 771 return fh, 0, 0 772 } 773 774 fh := newHandle(n, mode) 775 return fh, 0, 0 776 } 777 778 // FsDirStream implements readdir. 779 type fsDirStream struct { 780 ctx context.Context 781 dir *inode 782 lister file.Lister 783 err error 784 785 seenParent bool // Whether Next has already returned '..'. 786 seenSelf bool // Whether Next has already returned '.'. 787 peekedChild bool // Whether HasNext has Scan()-ed a child that Next hasn't returned yet. 788 789 // previousInode is the inode of the previous entry, i.e. the most recent 790 // entry returned by Next. We hold a reference to service LOOKUP 791 // operations that go-fuse issues when servicing READDIRPLUS. See 792 // dirStreamUsage. 793 previousInode *fs.Inode 794 } 795 796 // HasNext implements fs.DirStream 797 func (s *fsDirStream) HasNext() bool { 798 s.dir.mu.Lock() // TODO: Remove? 799 defer s.dir.mu.Unlock() 800 801 if s.err != nil || s.lister == nil { 802 return false 803 } 804 if !s.seenParent || !s.seenSelf || s.peekedChild { 805 return true 806 } 807 for s.lister.Scan() { 808 if getFileName(s.dir, s.lister.Path()) != "" { 809 s.peekedChild = true 810 return true 811 } 812 // Assume this is a directory marker: 813 // https://web.archive.org/web/20190424231712/https://docs.aws.amazon.com/AmazonS3/latest/user-guide/using-folders.html 814 // s3file's List returns these, but empty filenames seem to cause problems for FUSE. 815 // TODO: Filtering these in s3file, if it's ok for other users. 816 } 817 return false 818 } 819 820 // Next implements fs.DirStream 821 func (s *fsDirStream) Next() (fuse.DirEntry, syscall.Errno) { 822 s.dir.mu.Lock() 823 defer s.dir.mu.Unlock() 824 825 if s.err != nil { 826 return fuse.DirEntry{}, errToErrno(s.err) 827 } 828 if err := s.lister.Err(); err != nil { 829 if _, canceled := <-s.ctx.Done(); canceled { 830 s.err = errors.E(errors.Canceled, "list canceled", err) 831 } else { 832 s.err = err 833 } 834 return fuse.DirEntry{}, errToErrno(s.err) 835 } 836 837 ent := fuse.DirEntry{} 838 stat := cachedStat{expiration: time.Now().Add(cacheExpiration)} 839 840 if !s.seenParent { 841 s.seenParent = true 842 _, parent := s.dir.Parent() 843 if parent != nil { 844 // Not root. 845 parentDir := downCast(parent) 846 ent = parentDir.ent 847 ent.Name = ".." 848 stat = parentDir.stat 849 return ent, 0 850 } 851 } 852 if !s.seenSelf { 853 s.seenSelf = true 854 ent = s.dir.ent 855 ent.Name = "." 856 stat = s.dir.stat 857 return ent, 0 858 } 859 s.peekedChild = false 860 861 ent = fuse.DirEntry{ 862 Name: getFileName(s.dir, s.lister.Path()), 863 Mode: getModeBits(s.lister.IsDir()), 864 Ino: getIno(s.lister.Path()), 865 } 866 if info := s.lister.Info(); info != nil { 867 stat.size, stat.modTime = info.Size(), info.ModTime() 868 } 869 inode := s.dir.NewInode( 870 s.ctx, 871 &inode{path: file.Join(s.dir.path, ent.Name), ent: ent, stat: stat}, 872 fs.StableAttr{Mode: ent.Mode, Ino: ent.Ino}, 873 ) 874 _ = s.dir.AddChild(ent.Name, inode, true) 875 s.lockedSetPreviousInode(inode) 876 return ent, 0 877 } 878 879 // Close implements fs.DirStream 880 func (s *fsDirStream) Close() { 881 s.dir.mu.Lock() 882 s.lockedClearPreviousInode() 883 s.dir.mu.Unlock() 884 } 885 886 func (s *fsDirStream) lockedSetPreviousInode(n *fs.Inode) { 887 s.lockedClearPreviousInode() 888 s.previousInode = n 889 s.previousInode.Operations().(*inode).addDirStreamRef() 890 } 891 892 func (s *fsDirStream) lockedClearPreviousInode() { 893 if s.previousInode == nil { 894 return 895 } 896 s.previousInode.Operations().(*inode).dropDirStreamRef() 897 s.previousInode = nil 898 } 899 900 func (n *inode) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fs.Inode, syscall.Errno) { 901 log.Debug.Printf("lookup %s: name=%s start", n.path, name) 902 903 childInode := n.GetChild(name) 904 if childInode != nil && childInode.Operations().(*inode).previousOfAnyDirStream() { 905 log.Debug.Printf("lookup %s: name=%s using existing child inode", n.path, name) 906 } else { 907 var ( 908 childPath = file.Join(n.path, name) 909 foundDir bool 910 foundFile cachedStat 911 lister = file.List(ctx, childPath, true /* recursive */) 912 ) 913 // Look for either a file or a directory at this path. 914 // If both exist, assume file is a directory marker. 915 for lister.Scan() { 916 if lister.IsDir() || // We've found an exact match, and it's a directory. 917 lister.Path() != childPath { // We're seeing children, so childPath must be a directory. 918 foundDir = true 919 break 920 } 921 info := lister.Info() 922 foundFile = cachedStat{time.Now().Add(cacheExpiration), info.Size(), info.ModTime()} 923 } 924 if err := lister.Err(); err != nil { 925 if errors.Is(errors.NotExist, err) || errors.Is(errors.NotAllowed, err) { 926 // Ignore. 927 } else { 928 return nil, errToErrno(err) 929 } 930 } 931 932 if !foundDir && foundFile == (cachedStat{}) { 933 log.Debug.Printf("lookup: %s name='%s' not found", n.path, name) 934 return nil, syscall.ENOENT 935 } 936 937 ent := fuse.DirEntry{ 938 Name: childPath, 939 Mode: getModeBits(foundDir), 940 Ino: getIno(childPath), 941 } 942 childInode = n.NewInode( 943 ctx, 944 &inode{path: childPath, ent: ent, stat: foundFile}, 945 fs.StableAttr{ 946 Mode: ent.Mode, 947 Ino: ent.Ino, 948 }) 949 } 950 ops := childInode.Operations().(*inode) 951 out.Attr = newAttr(ops.ent.Ino, ops.ent.Mode, uint64(ops.stat.size), ops.stat.modTime) 952 out.SetEntryTimeout(cacheExpiration) 953 out.SetAttrTimeout(cacheExpiration) 954 log.Debug.Printf("lookup %s name='%s' done: mode=%o ino=%d stat=%+v", n.path, name, ops.ent.Mode, ops.ent.Ino, ops.stat) 955 return childInode, 0 956 } 957 958 func (n *inode) Readdir(ctx context.Context) (fs.DirStream, syscall.Errno) { 959 log.Debug.Printf("readdir %s: start", n.path) 960 // TODO(josh): Newer Linux kernels (4.20+) can cache the entries from readdir. Make sure this works 961 // and invalidates reasonably. 962 // References: 963 // Linux patch series: https://github.com/torvalds/linux/commit/69e345511 964 // go-fuse support: https://github.com/hanwen/go-fuse/commit/fa1304749db6eafd8fe64338f10c9750cf693274 965 // libfuse's documentation (describing some kernel behavior): http://web.archive.org/web/20210118113434/https://libfuse.github.io/doxygen/structfuse__lowlevel__ops.html#afa15612c68f7971cadfe3d3ec0a8b70e 966 return &fsDirStream{ 967 ctx: ctx, 968 dir: n, 969 lister: file.List(ctx, n.path, false /*nonrecursive*/), 970 }, 0 971 } 972 973 func (n *inode) Unlink(_ context.Context, name string) syscall.Errno { 974 childPath := file.Join(n.path, name) 975 err := file.Remove(n.ctx(), childPath) 976 log.Debug.Printf("unlink %s: err %v", childPath, err) 977 return errToErrno(err) 978 } 979 980 func (n *inode) Rmdir(_ context.Context, name string) syscall.Errno { 981 // Nothing to do. 982 return 0 983 } 984 985 func (n *inode) Mkdir(ctx context.Context, name string, _ uint32, out *fuse.EntryOut) (*fs.Inode, syscall.Errno) { 986 n.mu.Lock() 987 defer n.mu.Unlock() 988 // TODO: Consider creating an S3 "directory" object so this new directory persists for new listings. 989 // https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html 990 newPath := file.Join(n.path, name) 991 childNode := &inode{ 992 path: newPath, 993 ent: fuse.DirEntry{ 994 Name: name, 995 Ino: getIno(newPath), 996 Mode: getModeBits(true)}} 997 childInode := n.NewInode(ctx, childNode, fs.StableAttr{ 998 Mode: childNode.ent.Mode, 999 Ino: childNode.ent.Ino, 1000 }) 1001 out.Attr = newAttr(n.ent.Ino, n.ent.Mode, 0, time.Time{}) 1002 out.SetEntryTimeout(cacheExpiration) 1003 out.SetAttrTimeout(cacheExpiration) 1004 return childInode, 0 1005 }