github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/file/fsnodefuse/fsnodefuse.go (about)

     1  // fsnodefuse implements github.com/hanwen/go-fuse/v2/fs for fsnode.T.
     2  // It's a work-in-progress. No correctness or stability is guaranteed. Or even suggested.
     3  //
     4  // fsnode.Parent naturally becomes a directory. fsnode.Leaf becomes a file. Support for FUSE
     5  // operations on that file depends on what Leaf.Open returns. If that fsctx.File is also
     6  //   spliceio.ReaderAt:
     7  //     FUSE file supports concurrent, random reads and uses splices to reduce
     8  //     userspace <-> kernelspace memory copying.
     9  //   ioctx.ReaderAt:
    10  //     FUSE file supports concurrent, random reads.
    11  //   Otherwise:
    12  //     FUSE file supports in-order, contiguous reads only. That is, each read must
    13  //     start where the previous one ended.	At fsctx.File EOF, file size is recorded
    14  //     and then overrides what fsctx.File.Stat() reports for future getattr calls,
    15  //     so users can see they're done reading.
    16  //     TODO: Decide if there's a better place for this feature.
    17  package fsnodefuse
    18  
    19  import (
    20  	"fmt"
    21  	"runtime"
    22  
    23  	"github.com/Schaudge/grailbase/file/fsnode"
    24  	"github.com/Schaudge/grailbase/file/internal/kernel"
    25  	"github.com/hanwen/go-fuse/v2/fs"
    26  	"github.com/hanwen/go-fuse/v2/fuse"
    27  )
    28  
    29  // NewRoot creates a FUSE inode whose contents are the given fsnode.T.
    30  // Note that this inode must be mounted with options from ConfigureRequiredMountOptions.
    31  func NewRoot(node fsnode.T) fs.InodeEmbedder {
    32  	switch n := node.(type) {
    33  	case fsnode.Parent:
    34  		return &dirInode{n: n}
    35  	case fsnode.Leaf:
    36  		// TODO(josh): Test this path.
    37  		return &regInode{n: n}
    38  	}
    39  	panic(fmt.Sprintf("unrecognized fsnode type: %T, %[1]v", node))
    40  }
    41  
    42  // ConfigureRequiredMountOptions sets values in opts to be compatible with fsnodefuse's
    43  // implementation. Users of NewRoot must use these options, and they should call this last,
    44  // to make sure the required options take effect.
    45  func ConfigureRequiredMountOptions(opts *fuse.MountOptions) {
    46  	opts.MaxReadAhead = kernel.MaxReadAhead
    47  }
    48  
    49  // ConfigureDefaultMountOptions provides defaults that callers may want to start with, for performance.
    50  func ConfigureDefaultMountOptions(opts *fuse.MountOptions) {
    51  	// Increase MaxBackground from its default value (12) to improve S3 read performance.
    52  	//
    53  	// Empirically, while reading a 30 GiB files in chunks in parallel, the number of concurrent
    54  	// reads processed by our FUSE server [1] was ~12 with the default, corresponding to poor
    55  	// network utilization (only 500 Mb/s on m5d.4x in EC2); it rises to ~120 after, and network
    56  	// read bandwidth rises to >7 Gb/s, close to the speed of reading directly from S3 with
    57  	// this machine (~9 Gb/s).
    58  	//
    59  	// libfuse documentation [2] suggests that this limits the number of kernel readahead
    60  	// requests, so raising the limit may allow kernel readahead for every chunk, which could
    61  	// plausibly explain the performance benefit. (There's also mention of large direct I/O
    62  	// requests from userspace; josh@ did not think his Go test program was using direct I/O for
    63  	// this benchmark, but maybe he just didn't know).
    64  	//
    65  	// This particular value is a somewhat-informed guess. We'd like it to be high enough to
    66  	// admit all the parallelism that applications may profitably want. EC2 instances generally
    67  	// have <1 Gb/s network bandwidth per CPU (m5n.24x is around that, and non-'n' types have
    68  	// several times less), and S3 connections are limited to ~700 Mb/s [3], so just a couple of
    69  	// read chunks per CPU are sufficient to be I/O-bound for large objects. Many smaller object
    70  	// reads tend to not reach maximum bandwidth, so applications may increase parallelism,
    71  	// so we set our limit several times higher.
    72  	// TODO: Run more benchmarks (like github.com/Schaudge/grailbase/file/filebench) and tune.
    73  	//
    74  	// [1] As measured by simple logging: https://gitlab.com/grailbio/grail/-/merge_requests/8292/diffs?commit_id=7681acfcac836b92eaca60eb567245b32b81ec50
    75  	// [2] https://web.archive.org/web/20220815053939/https://libfuse.github.io/doxygen/structfuse__conn__info.html#a5f9e695735727343448ae1e1a86dfa03
    76  	// [3] 85-90 MB/s: https://web.archive.org/web/20220325121400/https://docs.aws.amazon.com/AmazonS3/latest/userguide/optimizing-performance-design-patterns.html#optimizing-performance-parallelization
    77  	opts.MaxBackground = 16 * runtime.NumCPU()
    78  
    79  	// We don't use extended attributes so we can skip these requests to improve performance.
    80  	opts.DisableXAttrs = true
    81  }