github.com/cockroachdb/pebble@v1.1.2/objstorage/objstorageprovider/readahead.go (about)

     1  // Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package objstorageprovider
     6  
     7  import "github.com/cockroachdb/pebble/internal/invariants"
     8  
     9  const (
    10  	// Constants for dynamic readahead of data blocks. Note that the size values
    11  	// make sense as some multiple of the default block size; and they should
    12  	// both be larger than the default block size.
    13  	minFileReadsForReadahead = 2
    14  	// TODO(bilal): Have the initial size value be a factor of the block size,
    15  	// as opposed to a hardcoded value.
    16  	initialReadaheadSize = 64 << 10 /* 64KB */
    17  )
    18  
    19  // readaheadState contains state variables related to readahead. Updated on
    20  // file reads.
    21  type readaheadState struct {
    22  	// Number of sequential reads.
    23  	numReads         int64
    24  	maxReadaheadSize int64
    25  	// Size issued to the next call to Prefetch. Starts at or above
    26  	// initialReadaheadSize and grows exponentially until maxReadaheadSize.
    27  	size int64
    28  	// prevSize is the size used in the last Prefetch call.
    29  	prevSize int64
    30  	// The byte offset up to which the OS has been asked to read ahead / cached.
    31  	// When reading ahead, reads up to this limit should not incur an IO
    32  	// operation. Reads after this limit can benefit from a new call to
    33  	// Prefetch.
    34  	limit int64
    35  }
    36  
    37  func makeReadaheadState(maxReadaheadSize int64) readaheadState {
    38  	return readaheadState{
    39  		size:             initialReadaheadSize,
    40  		maxReadaheadSize: maxReadaheadSize,
    41  	}
    42  }
    43  
    44  func (rs *readaheadState) recordCacheHit(offset, blockLength int64) {
    45  	currentReadEnd := offset + blockLength
    46  	if rs.numReads >= minFileReadsForReadahead {
    47  		if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize {
    48  			// This is a read that would have resulted in a readahead, had it
    49  			// not been a cache hit.
    50  			rs.limit = currentReadEnd
    51  			return
    52  		}
    53  		if currentReadEnd < rs.limit-rs.prevSize || offset > rs.limit+rs.maxReadaheadSize {
    54  			// We read too far away from rs.limit to benefit from readahead in
    55  			// any scenario. Reset all variables.
    56  			rs.numReads = 1
    57  			rs.limit = currentReadEnd
    58  			rs.size = initialReadaheadSize
    59  			rs.prevSize = 0
    60  			return
    61  		}
    62  		// Reads in the range [rs.limit - rs.prevSize, rs.limit] end up
    63  		// here. This is a read that is potentially benefitting from a past
    64  		// readahead.
    65  		return
    66  	}
    67  	if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize {
    68  		// Blocks are being read sequentially and would benefit from readahead
    69  		// down the line.
    70  		rs.numReads++
    71  		return
    72  	}
    73  	// We read too far ahead of the last read, or before it. This indicates
    74  	// a random read, where readahead is not desirable. Reset all variables.
    75  	rs.numReads = 1
    76  	rs.limit = currentReadEnd
    77  	rs.size = initialReadaheadSize
    78  	rs.prevSize = 0
    79  }
    80  
    81  // maybeReadahead updates state and determines whether to issue a readahead /
    82  // prefetch call for a block read at offset for blockLength bytes.
    83  // Returns a size value (greater than 0) that should be prefetched if readahead
    84  // would be beneficial.
    85  func (rs *readaheadState) maybeReadahead(offset, blockLength int64) int64 {
    86  	if invariants.Enabled && rs.maxReadaheadSize == 0 {
    87  		panic("readaheadState not initialized")
    88  	}
    89  	currentReadEnd := offset + blockLength
    90  	if rs.numReads >= minFileReadsForReadahead {
    91  		// The minimum threshold of sequential reads to justify reading ahead
    92  		// has been reached.
    93  		// There are two intervals: the interval being read:
    94  		// [offset, currentReadEnd]
    95  		// as well as the interval where a read would benefit from read ahead:
    96  		// [rs.limit, rs.limit + rs.size]
    97  		// We increase the latter interval to
    98  		// [rs.limit, rs.limit + rs.maxReadaheadSize] to account for cases where
    99  		// readahead may not be beneficial with a small readahead size, but over
   100  		// time the readahead size would increase exponentially to make it
   101  		// beneficial.
   102  		if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize {
   103  			// We are doing a read in the interval ahead of
   104  			// the last readahead range. In the diagrams below, ++++ is the last
   105  			// readahead range, ==== is the range represented by
   106  			// [rs.limit, rs.limit + rs.maxReadaheadSize], and ---- is the range
   107  			// being read.
   108  			//
   109  			//               rs.limit           rs.limit + rs.maxReadaheadSize
   110  			//         ++++++++++|===========================|
   111  			//
   112  			//              |-------------|
   113  			//            offset       currentReadEnd
   114  			//
   115  			// This case is also possible, as are all cases with an overlap
   116  			// between [rs.limit, rs.limit + rs.maxReadaheadSize] and [offset,
   117  			// currentReadEnd]:
   118  			//
   119  			//               rs.limit           rs.limit + rs.maxReadaheadSize
   120  			//         ++++++++++|===========================|
   121  			//
   122  			//                                            |-------------|
   123  			//                                         offset       currentReadEnd
   124  			//
   125  			//
   126  			rs.numReads++
   127  			rs.limit = offset + rs.size
   128  			rs.prevSize = rs.size
   129  			// Increase rs.size for the next read.
   130  			rs.size *= 2
   131  			if rs.size > rs.maxReadaheadSize {
   132  				rs.size = rs.maxReadaheadSize
   133  			}
   134  			return rs.prevSize
   135  		}
   136  		if currentReadEnd < rs.limit-rs.prevSize || offset > rs.limit+rs.maxReadaheadSize {
   137  			// The above conditional has rs.limit > rs.prevSize to confirm that
   138  			// rs.limit - rs.prevSize would not underflow.
   139  			// We read too far away from rs.limit to benefit from readahead in
   140  			// any scenario. Reset all variables.
   141  			// The case where we read too far ahead:
   142  			//
   143  			// (rs.limit - rs.prevSize)    (rs.limit)   (rs.limit + rs.maxReadaheadSize)
   144  			//                    |+++++++++++++|=============|
   145  			//
   146  			//                                                  |-------------|
   147  			//                                             offset       currentReadEnd
   148  			//
   149  			// Or too far behind:
   150  			//
   151  			// (rs.limit - rs.prevSize)    (rs.limit)   (rs.limit + rs.maxReadaheadSize)
   152  			//                    |+++++++++++++|=============|
   153  			//
   154  			//    |-------------|
   155  			// offset       currentReadEnd
   156  			//
   157  			rs.numReads = 1
   158  			rs.limit = currentReadEnd
   159  			rs.size = initialReadaheadSize
   160  			rs.prevSize = 0
   161  
   162  			return 0
   163  		}
   164  		// Reads in the range [rs.limit - rs.prevSize, rs.limit] end up
   165  		// here. This is a read that is potentially benefitting from a past
   166  		// readahead, but there's no reason to issue a readahead call at the
   167  		// moment.
   168  		//
   169  		// (rs.limit - rs.prevSize)            (rs.limit + rs.maxReadaheadSize)
   170  		//                    |+++++++++++++|===============|
   171  		//                             (rs.limit)
   172  		//
   173  		//                        |-------|
   174  		//                     offset    currentReadEnd
   175  		//
   176  		rs.numReads++
   177  		return 0
   178  	}
   179  	if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize {
   180  		// Blocks are being read sequentially and would benefit from readahead
   181  		// down the line.
   182  		//
   183  		//                       (rs.limit)   (rs.limit + rs.maxReadaheadSize)
   184  		//                         |=============|
   185  		//
   186  		//                    |-------|
   187  		//                offset    currentReadEnd
   188  		//
   189  		rs.numReads++
   190  		return 0
   191  	}
   192  	// We read too far ahead of the last read, or before it. This indicates
   193  	// a random read, where readahead is not desirable. Reset all variables.
   194  	//
   195  	// (rs.limit - rs.maxReadaheadSize)  (rs.limit)   (rs.limit + rs.maxReadaheadSize)
   196  	//                     |+++++++++++++|=============|
   197  	//
   198  	//                                                    |-------|
   199  	//                                                offset    currentReadEnd
   200  	//
   201  	rs.numReads = 1
   202  	rs.limit = currentReadEnd
   203  	rs.size = initialReadaheadSize
   204  	rs.prevSize = 0
   205  	return 0
   206  }