github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/objstorage/objstorageprovider/readahead.go (about)

     1  // Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package objstorageprovider
     6  
     7  const (
     8  	// Constants for dynamic readahead of data blocks. Note that the size values
     9  	// make sense as some multiple of the default block size; and they should
    10  	// both be larger than the default block size.
    11  	minFileReadsForReadahead = 2
    12  	// TODO(bilal): Have the initial size value be a factor of the block size,
    13  	// as opposed to a hardcoded value.
    14  	initialReadaheadSize = 64 << 10 /* 64KB */
    15  )
    16  
    17  // readaheadState contains state variables related to readahead. Updated on
    18  // file reads.
    19  type readaheadState struct {
    20  	// Number of sequential reads.
    21  	numReads         int64
    22  	maxReadaheadSize int64
    23  	// Size issued to the next call to Prefetch. Starts at or above
    24  	// initialReadaheadSize and grows exponentially until maxReadaheadSize.
    25  	size int64
    26  	// prevSize is the size used in the last Prefetch call.
    27  	prevSize int64
    28  	// The byte offset up to which the OS has been asked to read ahead / cached.
    29  	// When reading ahead, reads up to this limit should not incur an IO
    30  	// operation. Reads after this limit can benefit from a new call to
    31  	// Prefetch.
    32  	limit int64
    33  }
    34  
    35  func makeReadaheadState(maxReadaheadSize int64) readaheadState {
    36  	return readaheadState{
    37  		size:             initialReadaheadSize,
    38  		maxReadaheadSize: maxReadaheadSize,
    39  	}
    40  }
    41  
    42  func (rs *readaheadState) recordCacheHit(offset, blockLength int64) {
    43  	currentReadEnd := offset + blockLength
    44  	if rs.numReads >= minFileReadsForReadahead {
    45  		if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize {
    46  			// This is a read that would have resulted in a readahead, had it
    47  			// not been a cache hit.
    48  			rs.limit = currentReadEnd
    49  			return
    50  		}
    51  		if currentReadEnd < rs.limit-rs.prevSize || offset > rs.limit+rs.maxReadaheadSize {
    52  			// We read too far away from rs.limit to benefit from readahead in
    53  			// any scenario. Reset all variables.
    54  			rs.numReads = 1
    55  			rs.limit = currentReadEnd
    56  			rs.size = initialReadaheadSize
    57  			rs.prevSize = 0
    58  			return
    59  		}
    60  		// Reads in the range [rs.limit - rs.prevSize, rs.limit] end up
    61  		// here. This is a read that is potentially benefitting from a past
    62  		// readahead.
    63  		return
    64  	}
    65  	if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize {
    66  		// Blocks are being read sequentially and would benefit from readahead
    67  		// down the line.
    68  		rs.numReads++
    69  		return
    70  	}
    71  	// We read too far ahead of the last read, or before it. This indicates
    72  	// a random read, where readahead is not desirable. Reset all variables.
    73  	rs.numReads = 1
    74  	rs.limit = currentReadEnd
    75  	rs.size = initialReadaheadSize
    76  	rs.prevSize = 0
    77  }
    78  
    79  // maybeReadahead updates state and determines whether to issue a readahead /
    80  // prefetch call for a block read at offset for blockLength bytes.
    81  // Returns a size value (greater than 0) that should be prefetched if readahead
    82  // would be beneficial.
    83  func (rs *readaheadState) maybeReadahead(offset, blockLength int64) int64 {
    84  	currentReadEnd := offset + blockLength
    85  	if rs.numReads >= minFileReadsForReadahead {
    86  		// The minimum threshold of sequential reads to justify reading ahead
    87  		// has been reached.
    88  		// There are two intervals: the interval being read:
    89  		// [offset, currentReadEnd]
    90  		// as well as the interval where a read would benefit from read ahead:
    91  		// [rs.limit, rs.limit + rs.size]
    92  		// We increase the latter interval to
    93  		// [rs.limit, rs.limit + rs.maxReadaheadSize] to account for cases where
    94  		// readahead may not be beneficial with a small readahead size, but over
    95  		// time the readahead size would increase exponentially to make it
    96  		// beneficial.
    97  		if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize {
    98  			// We are doing a read in the interval ahead of
    99  			// the last readahead range. In the diagrams below, ++++ is the last
   100  			// readahead range, ==== is the range represented by
   101  			// [rs.limit, rs.limit + rs.maxReadaheadSize], and ---- is the range
   102  			// being read.
   103  			//
   104  			//               rs.limit           rs.limit + rs.maxReadaheadSize
   105  			//         ++++++++++|===========================|
   106  			//
   107  			//              |-------------|
   108  			//            offset       currentReadEnd
   109  			//
   110  			// This case is also possible, as are all cases with an overlap
   111  			// between [rs.limit, rs.limit + rs.maxReadaheadSize] and [offset,
   112  			// currentReadEnd]:
   113  			//
   114  			//               rs.limit           rs.limit + rs.maxReadaheadSize
   115  			//         ++++++++++|===========================|
   116  			//
   117  			//                                            |-------------|
   118  			//                                         offset       currentReadEnd
   119  			//
   120  			//
   121  			rs.numReads++
   122  			rs.limit = offset + rs.size
   123  			rs.prevSize = rs.size
   124  			// Increase rs.size for the next read.
   125  			rs.size *= 2
   126  			if rs.size > rs.maxReadaheadSize {
   127  				rs.size = rs.maxReadaheadSize
   128  			}
   129  			return rs.prevSize
   130  		}
   131  		if currentReadEnd < rs.limit-rs.prevSize || offset > rs.limit+rs.maxReadaheadSize {
   132  			// The above conditional has rs.limit > rs.prevSize to confirm that
   133  			// rs.limit - rs.prevSize would not underflow.
   134  			// We read too far away from rs.limit to benefit from readahead in
   135  			// any scenario. Reset all variables.
   136  			// The case where we read too far ahead:
   137  			//
   138  			// (rs.limit - rs.prevSize)    (rs.limit)   (rs.limit + rs.maxReadaheadSize)
   139  			//                    |+++++++++++++|=============|
   140  			//
   141  			//                                                  |-------------|
   142  			//                                             offset       currentReadEnd
   143  			//
   144  			// Or too far behind:
   145  			//
   146  			// (rs.limit - rs.prevSize)    (rs.limit)   (rs.limit + rs.maxReadaheadSize)
   147  			//                    |+++++++++++++|=============|
   148  			//
   149  			//    |-------------|
   150  			// offset       currentReadEnd
   151  			//
   152  			rs.numReads = 1
   153  			rs.limit = currentReadEnd
   154  			rs.size = initialReadaheadSize
   155  			rs.prevSize = 0
   156  
   157  			return 0
   158  		}
   159  		// Reads in the range [rs.limit - rs.prevSize, rs.limit] end up
   160  		// here. This is a read that is potentially benefitting from a past
   161  		// readahead, but there's no reason to issue a readahead call at the
   162  		// moment.
   163  		//
   164  		// (rs.limit - rs.prevSize)            (rs.limit + rs.maxReadaheadSize)
   165  		//                    |+++++++++++++|===============|
   166  		//                             (rs.limit)
   167  		//
   168  		//                        |-------|
   169  		//                     offset    currentReadEnd
   170  		//
   171  		rs.numReads++
   172  		return 0
   173  	}
   174  	if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize {
   175  		// Blocks are being read sequentially and would benefit from readahead
   176  		// down the line.
   177  		//
   178  		//                       (rs.limit)   (rs.limit + rs.maxReadaheadSize)
   179  		//                         |=============|
   180  		//
   181  		//                    |-------|
   182  		//                offset    currentReadEnd
   183  		//
   184  		rs.numReads++
   185  		return 0
   186  	}
   187  	// We read too far ahead of the last read, or before it. This indicates
   188  	// a random read, where readahead is not desirable. Reset all variables.
   189  	//
   190  	// (rs.limit - rs.maxReadaheadSize)  (rs.limit)   (rs.limit + rs.maxReadaheadSize)
   191  	//                     |+++++++++++++|=============|
   192  	//
   193  	//                                                    |-------|
   194  	//                                                offset    currentReadEnd
   195  	//
   196  	rs.numReads = 1
   197  	rs.limit = currentReadEnd
   198  	rs.size = initialReadaheadSize
   199  	rs.prevSize = 0
   200  	return 0
   201  }