github.com/grailbio/base@v0.0.11/file/internal/readmatcher/readmatcher.go (about)

     1  package readmatcher
     2  
     3  import (
     4  	"context"
     5  	stderrors "errors"
     6  	"sync"
     7  
     8  	"github.com/grailbio/base/errors"
     9  	"github.com/grailbio/base/file/fsnodefuse/trailingbuf"
    10  	"github.com/grailbio/base/file/internal/kernel"
    11  	"github.com/grailbio/base/ioctx"
    12  )
    13  
    14  type (
    15  	// TODO: Avoid somewhat hidden internal dependency on kernel.MaxReadAhead.
    16  	readerAt struct {
    17  		offsetReader   func(int64) ioctx.ReadCloser
    18  		softMaxReaders int
    19  
    20  		// mu guards the fields below. It's held while looking up a reader, but not during reads.
    21  		// TODO: Consider making this RWMutex.
    22  		mu sync.Mutex
    23  		// clock counts reader usages, increasing monotonically. Reader creation and usage is
    24  		// "timestamped" according to this clock, letting us prune least-recently-used.
    25  		clock int64
    26  		// TODO: More efficient data structure.
    27  		readers readers
    28  	}
    29  	// readers is a collection of backend readers. It's ordered by createdAt:
    30  	//   readers[i].createdAt < readers[j].createdAt iff i < j.
    31  	// Elements in the middle may be removed; then we just shift the tail forward by 1.
    32  	readers []*reader
    33  	reader  struct {
    34  		// These fields are set at creation and never mutated.
    35  		ioctx.ReaderAt
    36  		ioctx.Closer
    37  
    38  		// These fields are accessed only while holding the the parent readerAt's lock.
    39  		maxPos     int64
    40  		inUse      int64
    41  		lastUsedAt int64
    42  		createdAt  int64
    43  	}
    44  )
    45  
    46  const defaultMaxReaders = 1024
    47  
    48  var (
    49  	_ ioctx.ReaderAt = (*readerAt)(nil)
    50  	_ ioctx.Closer   = (*readerAt)(nil)
    51  )
    52  
    53  type Opt func(*readerAt)
    54  
    55  func SoftMaxReaders(n int) Opt { return func(r *readerAt) { r.softMaxReaders = n } }
    56  
    57  // New returns a ReaderAt that "multiplexes" incoming reads onto one of a collection of "backend"
    58  // readers. It matches read to backend based on last read position; a reader is selected if its last
    59  // request ended near where the new read starts.
    60  //
    61  // It is intended for use with biofs+S3. S3 readers have high initialization costs vs.
    62  // subsequently reading bytes, because that is S3's performance characteristic. ReaderAt maps
    63  // incoming reads to a backend S3 reader that may be able to efficiently serve it. Otherwise, it
    64  // opens a new reader. Our intention is that this will adapt to non-S3-aware clients' read
    65  // patterns (small reads). S3-aware clients can always choose to read big chunks to avoid
    66  // performance worst-cases. But, the Linux kernel limits FUSE read requests to 128 KiB, and we
    67  // can't feasibly change that, so we adapt.
    68  //
    69  // To performantly handle Linux kernel readahead requests, the matching algorithm allows
    70  // out-of-order positions within a small window (see trailingbuf).
    71  //
    72  // offsetReader opens a reader into the underlying file.
    73  func New(offsetReader func(int64) ioctx.ReadCloser, opts ...Opt) interface {
    74  	ioctx.ReaderAt
    75  	ioctx.Closer
    76  } {
    77  	r := readerAt{offsetReader: offsetReader, softMaxReaders: defaultMaxReaders}
    78  	for _, opt := range opts {
    79  		opt(&r)
    80  	}
    81  	return &r
    82  }
    83  
    84  func (m *readerAt) ReadAt(ctx context.Context, dst []byte, off int64) (int, error) {
    85  	var minCreatedAt int64
    86  	for {
    87  		r := m.acquire(off, minCreatedAt)
    88  		n, err := r.ReadAt(ctx, dst, off)
    89  		m.release(r, off+int64(n))
    90  		if err != nil && stderrors.Is(err, trailingbuf.ErrTooFarBehind) {
    91  			minCreatedAt = r.createdAt + 1
    92  			continue
    93  		}
    94  		return n, err
    95  	}
    96  }
    97  
    98  func (m *readerAt) acquire(off int64, minCreatedAt int64) *reader {
    99  	m.mu.Lock()
   100  	defer m.mu.Unlock()
   101  	for _, r := range m.readers {
   102  		if r.createdAt < minCreatedAt {
   103  			continue
   104  		}
   105  		if r.maxPos-kernel.MaxReadAhead <= off && off <= r.maxPos+kernel.MaxReadAhead {
   106  			r.inUse++
   107  			r.lastUsedAt = m.clock
   108  			m.clock++
   109  			return r
   110  		}
   111  	}
   112  	m.lockedGC()
   113  	rc := m.offsetReader(off)
   114  	r := &reader{
   115  		ReaderAt:   trailingbuf.New(rc, off, kernel.MaxReadAhead),
   116  		Closer:     rc,
   117  		maxPos:     off,
   118  		inUse:      1,
   119  		lastUsedAt: m.clock,
   120  		createdAt:  m.clock,
   121  	}
   122  	m.clock++
   123  	m.readers.add(r)
   124  	return r
   125  }
   126  
   127  func (m *readerAt) release(r *reader, newPos int64) {
   128  	m.mu.Lock()
   129  	defer m.mu.Unlock()
   130  	if newPos > r.maxPos {
   131  		r.maxPos = newPos
   132  	}
   133  	r.inUse--
   134  	m.lockedGC()
   135  }
   136  
   137  func (m *readerAt) lockedGC() {
   138  	for len(m.readers) > m.softMaxReaders {
   139  		i, ok := m.readers.idleLeastRecentlyUsedIndex()
   140  		if !ok {
   141  			return
   142  		}
   143  		m.readers.remove(i)
   144  	}
   145  }
   146  
   147  func (m *readerAt) Close(ctx context.Context) (err error) {
   148  	m.mu.Lock()
   149  	defer m.mu.Unlock()
   150  	for _, rc := range m.readers {
   151  		errors.CleanUpCtx(ctx, rc.Close, &err)
   152  	}
   153  	m.readers = nil
   154  	return
   155  }
   156  
   157  func (rs *readers) add(r *reader) {
   158  	*rs = append(*rs, r)
   159  }
   160  
   161  func (rs *readers) remove(i int) {
   162  	*rs = append((*rs)[:i], (*rs)[i+1:]...)
   163  }
   164  
   165  func (rs *readers) idleLeastRecentlyUsedIndex() (int, bool) {
   166  	minIdx := -1
   167  	for i, r := range *rs {
   168  		if r.inUse > 0 {
   169  			continue
   170  		}
   171  		if minIdx < 0 || r.lastUsedAt < (*rs)[minIdx].lastUsedAt {
   172  			minIdx = i
   173  		}
   174  	}
   175  	if minIdx < 0 {
   176  		return -1, false
   177  	}
   178  	return minIdx, true
   179  }