github.com/grailbio/base@v0.0.11/file/internal/readmatcher/readmatcher.go (about) 1 package readmatcher 2 3 import ( 4 "context" 5 stderrors "errors" 6 "sync" 7 8 "github.com/grailbio/base/errors" 9 "github.com/grailbio/base/file/fsnodefuse/trailingbuf" 10 "github.com/grailbio/base/file/internal/kernel" 11 "github.com/grailbio/base/ioctx" 12 ) 13 14 type ( 15 // TODO: Avoid somewhat hidden internal dependency on kernel.MaxReadAhead. 16 readerAt struct { 17 offsetReader func(int64) ioctx.ReadCloser 18 softMaxReaders int 19 20 // mu guards the fields below. It's held while looking up a reader, but not during reads. 21 // TODO: Consider making this RWMutex. 22 mu sync.Mutex 23 // clock counts reader usages, increasing monotonically. Reader creation and usage is 24 // "timestamped" according to this clock, letting us prune least-recently-used. 25 clock int64 26 // TODO: More efficient data structure. 27 readers readers 28 } 29 // readers is a collection of backend readers. It's ordered by createdAt: 30 // readers[i].createdAt < readers[j].createdAt iff i < j. 31 // Elements in the middle may be removed; then we just shift the tail forward by 1. 32 readers []*reader 33 reader struct { 34 // These fields are set at creation and never mutated. 35 ioctx.ReaderAt 36 ioctx.Closer 37 38 // These fields are accessed only while holding the the parent readerAt's lock. 39 maxPos int64 40 inUse int64 41 lastUsedAt int64 42 createdAt int64 43 } 44 ) 45 46 const defaultMaxReaders = 1024 47 48 var ( 49 _ ioctx.ReaderAt = (*readerAt)(nil) 50 _ ioctx.Closer = (*readerAt)(nil) 51 ) 52 53 type Opt func(*readerAt) 54 55 func SoftMaxReaders(n int) Opt { return func(r *readerAt) { r.softMaxReaders = n } } 56 57 // New returns a ReaderAt that "multiplexes" incoming reads onto one of a collection of "backend" 58 // readers. It matches read to backend based on last read position; a reader is selected if its last 59 // request ended near where the new read starts. 60 // 61 // It is intended for use with biofs+S3. S3 readers have high initialization costs vs. 62 // subsequently reading bytes, because that is S3's performance characteristic. ReaderAt maps 63 // incoming reads to a backend S3 reader that may be able to efficiently serve it. Otherwise, it 64 // opens a new reader. Our intention is that this will adapt to non-S3-aware clients' read 65 // patterns (small reads). S3-aware clients can always choose to read big chunks to avoid 66 // performance worst-cases. But, the Linux kernel limits FUSE read requests to 128 KiB, and we 67 // can't feasibly change that, so we adapt. 68 // 69 // To performantly handle Linux kernel readahead requests, the matching algorithm allows 70 // out-of-order positions within a small window (see trailingbuf). 71 // 72 // offsetReader opens a reader into the underlying file. 73 func New(offsetReader func(int64) ioctx.ReadCloser, opts ...Opt) interface { 74 ioctx.ReaderAt 75 ioctx.Closer 76 } { 77 r := readerAt{offsetReader: offsetReader, softMaxReaders: defaultMaxReaders} 78 for _, opt := range opts { 79 opt(&r) 80 } 81 return &r 82 } 83 84 func (m *readerAt) ReadAt(ctx context.Context, dst []byte, off int64) (int, error) { 85 var minCreatedAt int64 86 for { 87 r := m.acquire(off, minCreatedAt) 88 n, err := r.ReadAt(ctx, dst, off) 89 m.release(r, off+int64(n)) 90 if err != nil && stderrors.Is(err, trailingbuf.ErrTooFarBehind) { 91 minCreatedAt = r.createdAt + 1 92 continue 93 } 94 return n, err 95 } 96 } 97 98 func (m *readerAt) acquire(off int64, minCreatedAt int64) *reader { 99 m.mu.Lock() 100 defer m.mu.Unlock() 101 for _, r := range m.readers { 102 if r.createdAt < minCreatedAt { 103 continue 104 } 105 if r.maxPos-kernel.MaxReadAhead <= off && off <= r.maxPos+kernel.MaxReadAhead { 106 r.inUse++ 107 r.lastUsedAt = m.clock 108 m.clock++ 109 return r 110 } 111 } 112 m.lockedGC() 113 rc := m.offsetReader(off) 114 r := &reader{ 115 ReaderAt: trailingbuf.New(rc, off, kernel.MaxReadAhead), 116 Closer: rc, 117 maxPos: off, 118 inUse: 1, 119 lastUsedAt: m.clock, 120 createdAt: m.clock, 121 } 122 m.clock++ 123 m.readers.add(r) 124 return r 125 } 126 127 func (m *readerAt) release(r *reader, newPos int64) { 128 m.mu.Lock() 129 defer m.mu.Unlock() 130 if newPos > r.maxPos { 131 r.maxPos = newPos 132 } 133 r.inUse-- 134 m.lockedGC() 135 } 136 137 func (m *readerAt) lockedGC() { 138 for len(m.readers) > m.softMaxReaders { 139 i, ok := m.readers.idleLeastRecentlyUsedIndex() 140 if !ok { 141 return 142 } 143 m.readers.remove(i) 144 } 145 } 146 147 func (m *readerAt) Close(ctx context.Context) (err error) { 148 m.mu.Lock() 149 defer m.mu.Unlock() 150 for _, rc := range m.readers { 151 errors.CleanUpCtx(ctx, rc.Close, &err) 152 } 153 m.readers = nil 154 return 155 } 156 157 func (rs *readers) add(r *reader) { 158 *rs = append(*rs, r) 159 } 160 161 func (rs *readers) remove(i int) { 162 *rs = append((*rs)[:i], (*rs)[i+1:]...) 163 } 164 165 func (rs *readers) idleLeastRecentlyUsedIndex() (int, bool) { 166 minIdx := -1 167 for i, r := range *rs { 168 if r.inUse > 0 { 169 continue 170 } 171 if minIdx < 0 || r.lastUsedAt < (*rs)[minIdx].lastUsedAt { 172 minIdx = i 173 } 174 } 175 if minIdx < 0 { 176 return -1, false 177 } 178 return minIdx, true 179 }