github.com/grailbio/base@v0.0.11/file/fsnodefuse/fsnodefuse.go (about) 1 // fsnodefuse implements github.com/hanwen/go-fuse/v2/fs for fsnode.T. 2 // It's a work-in-progress. No correctness or stability is guaranteed. Or even suggested. 3 // 4 // fsnode.Parent naturally becomes a directory. fsnode.Leaf becomes a file. Support for FUSE 5 // operations on that file depends on what Leaf.Open returns. If that fsctx.File is also 6 // spliceio.ReaderAt: 7 // FUSE file supports concurrent, random reads and uses splices to reduce 8 // userspace <-> kernelspace memory copying. 9 // ioctx.ReaderAt: 10 // FUSE file supports concurrent, random reads. 11 // Otherwise: 12 // FUSE file supports in-order, contiguous reads only. That is, each read must 13 // start where the previous one ended. At fsctx.File EOF, file size is recorded 14 // and then overrides what fsctx.File.Stat() reports for future getattr calls, 15 // so users can see they're done reading. 16 // TODO: Decide if there's a better place for this feature. 17 package fsnodefuse 18 19 import ( 20 "fmt" 21 "runtime" 22 23 "github.com/grailbio/base/file/fsnode" 24 "github.com/grailbio/base/file/internal/kernel" 25 "github.com/hanwen/go-fuse/v2/fs" 26 "github.com/hanwen/go-fuse/v2/fuse" 27 ) 28 29 // NewRoot creates a FUSE inode whose contents are the given fsnode.T. 30 // Note that this inode must be mounted with options from ConfigureRequiredMountOptions. 31 func NewRoot(node fsnode.T) fs.InodeEmbedder { 32 switch n := node.(type) { 33 case fsnode.Parent: 34 return &dirInode{n: n} 35 case fsnode.Leaf: 36 // TODO(josh): Test this path. 37 return ®Inode{n: n} 38 } 39 panic(fmt.Sprintf("unrecognized fsnode type: %T, %[1]v", node)) 40 } 41 42 // ConfigureRequiredMountOptions sets values in opts to be compatible with fsnodefuse's 43 // implementation. Users of NewRoot must use these options, and they should call this last, 44 // to make sure the required options take effect. 45 func ConfigureRequiredMountOptions(opts *fuse.MountOptions) { 46 opts.MaxReadAhead = kernel.MaxReadAhead 47 } 48 49 // ConfigureDefaultMountOptions provides defaults that callers may want to start with, for performance. 50 func ConfigureDefaultMountOptions(opts *fuse.MountOptions) { 51 // Increase MaxBackground from its default value (12) to improve S3 read performance. 52 // 53 // Empirically, while reading a 30 GiB files in chunks in parallel, the number of concurrent 54 // reads processed by our FUSE server [1] was ~12 with the default, corresponding to poor 55 // network utilization (only 500 Mb/s on m5d.4x in EC2); it rises to ~120 after, and network 56 // read bandwidth rises to >7 Gb/s, close to the speed of reading directly from S3 with 57 // this machine (~9 Gb/s). 58 // 59 // libfuse documentation [2] suggests that this limits the number of kernel readahead 60 // requests, so raising the limit may allow kernel readahead for every chunk, which could 61 // plausibly explain the performance benefit. (There's also mention of large direct I/O 62 // requests from userspace; josh@ did not think his Go test program was using direct I/O for 63 // this benchmark, but maybe he just didn't know). 64 // 65 // This particular value is a somewhat-informed guess. We'd like it to be high enough to 66 // admit all the parallelism that applications may profitably want. EC2 instances generally 67 // have <1 Gb/s network bandwidth per CPU (m5n.24x is around that, and non-'n' types have 68 // several times less), and S3 connections are limited to ~700 Mb/s [3], so just a couple of 69 // read chunks per CPU are sufficient to be I/O-bound for large objects. Many smaller object 70 // reads tend to not reach maximum bandwidth, so applications may increase parallelism, 71 // so we set our limit several times higher. 72 // TODO: Run more benchmarks (like github.com/grailbio/base/file/filebench) and tune. 73 // 74 // [1] As measured by simple logging: https://gitlab.com/grailbio/grail/-/merge_requests/8292/diffs?commit_id=7681acfcac836b92eaca60eb567245b32b81ec50 75 // [2] https://web.archive.org/web/20220815053939/https://libfuse.github.io/doxygen/structfuse__conn__info.html#a5f9e695735727343448ae1e1a86dfa03 76 // [3] 85-90 MB/s: https://web.archive.org/web/20220325121400/https://docs.aws.amazon.com/AmazonS3/latest/userguide/optimizing-performance-design-patterns.html#optimizing-performance-parallelization 77 opts.MaxBackground = 16 * runtime.NumCPU() 78 79 // We don't use extended attributes so we can skip these requests to improve performance. 80 opts.DisableXAttrs = true 81 }