github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/objstorage/objstorageprovider/readahead.go (about) 1 // Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package objstorageprovider 6 7 const ( 8 // Constants for dynamic readahead of data blocks. Note that the size values 9 // make sense as some multiple of the default block size; and they should 10 // both be larger than the default block size. 11 minFileReadsForReadahead = 2 12 // TODO(bilal): Have the initial size value be a factor of the block size, 13 // as opposed to a hardcoded value. 14 initialReadaheadSize = 64 << 10 /* 64KB */ 15 ) 16 17 // readaheadState contains state variables related to readahead. Updated on 18 // file reads. 19 type readaheadState struct { 20 // Number of sequential reads. 21 numReads int64 22 maxReadaheadSize int64 23 // Size issued to the next call to Prefetch. Starts at or above 24 // initialReadaheadSize and grows exponentially until maxReadaheadSize. 25 size int64 26 // prevSize is the size used in the last Prefetch call. 27 prevSize int64 28 // The byte offset up to which the OS has been asked to read ahead / cached. 29 // When reading ahead, reads up to this limit should not incur an IO 30 // operation. Reads after this limit can benefit from a new call to 31 // Prefetch. 32 limit int64 33 } 34 35 func makeReadaheadState(maxReadaheadSize int64) readaheadState { 36 return readaheadState{ 37 size: initialReadaheadSize, 38 maxReadaheadSize: maxReadaheadSize, 39 } 40 } 41 42 func (rs *readaheadState) recordCacheHit(offset, blockLength int64) { 43 currentReadEnd := offset + blockLength 44 if rs.numReads >= minFileReadsForReadahead { 45 if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize { 46 // This is a read that would have resulted in a readahead, had it 47 // not been a cache hit. 48 rs.limit = currentReadEnd 49 return 50 } 51 if currentReadEnd < rs.limit-rs.prevSize || offset > rs.limit+rs.maxReadaheadSize { 52 // We read too far away from rs.limit to benefit from readahead in 53 // any scenario. Reset all variables. 54 rs.numReads = 1 55 rs.limit = currentReadEnd 56 rs.size = initialReadaheadSize 57 rs.prevSize = 0 58 return 59 } 60 // Reads in the range [rs.limit - rs.prevSize, rs.limit] end up 61 // here. This is a read that is potentially benefitting from a past 62 // readahead. 63 return 64 } 65 if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize { 66 // Blocks are being read sequentially and would benefit from readahead 67 // down the line. 68 rs.numReads++ 69 return 70 } 71 // We read too far ahead of the last read, or before it. This indicates 72 // a random read, where readahead is not desirable. Reset all variables. 73 rs.numReads = 1 74 rs.limit = currentReadEnd 75 rs.size = initialReadaheadSize 76 rs.prevSize = 0 77 } 78 79 // maybeReadahead updates state and determines whether to issue a readahead / 80 // prefetch call for a block read at offset for blockLength bytes. 81 // Returns a size value (greater than 0) that should be prefetched if readahead 82 // would be beneficial. 83 func (rs *readaheadState) maybeReadahead(offset, blockLength int64) int64 { 84 currentReadEnd := offset + blockLength 85 if rs.numReads >= minFileReadsForReadahead { 86 // The minimum threshold of sequential reads to justify reading ahead 87 // has been reached. 88 // There are two intervals: the interval being read: 89 // [offset, currentReadEnd] 90 // as well as the interval where a read would benefit from read ahead: 91 // [rs.limit, rs.limit + rs.size] 92 // We increase the latter interval to 93 // [rs.limit, rs.limit + rs.maxReadaheadSize] to account for cases where 94 // readahead may not be beneficial with a small readahead size, but over 95 // time the readahead size would increase exponentially to make it 96 // beneficial. 97 if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize { 98 // We are doing a read in the interval ahead of 99 // the last readahead range. In the diagrams below, ++++ is the last 100 // readahead range, ==== is the range represented by 101 // [rs.limit, rs.limit + rs.maxReadaheadSize], and ---- is the range 102 // being read. 103 // 104 // rs.limit rs.limit + rs.maxReadaheadSize 105 // ++++++++++|===========================| 106 // 107 // |-------------| 108 // offset currentReadEnd 109 // 110 // This case is also possible, as are all cases with an overlap 111 // between [rs.limit, rs.limit + rs.maxReadaheadSize] and [offset, 112 // currentReadEnd]: 113 // 114 // rs.limit rs.limit + rs.maxReadaheadSize 115 // ++++++++++|===========================| 116 // 117 // |-------------| 118 // offset currentReadEnd 119 // 120 // 121 rs.numReads++ 122 rs.limit = offset + rs.size 123 rs.prevSize = rs.size 124 // Increase rs.size for the next read. 125 rs.size *= 2 126 if rs.size > rs.maxReadaheadSize { 127 rs.size = rs.maxReadaheadSize 128 } 129 return rs.prevSize 130 } 131 if currentReadEnd < rs.limit-rs.prevSize || offset > rs.limit+rs.maxReadaheadSize { 132 // The above conditional has rs.limit > rs.prevSize to confirm that 133 // rs.limit - rs.prevSize would not underflow. 134 // We read too far away from rs.limit to benefit from readahead in 135 // any scenario. Reset all variables. 136 // The case where we read too far ahead: 137 // 138 // (rs.limit - rs.prevSize) (rs.limit) (rs.limit + rs.maxReadaheadSize) 139 // |+++++++++++++|=============| 140 // 141 // |-------------| 142 // offset currentReadEnd 143 // 144 // Or too far behind: 145 // 146 // (rs.limit - rs.prevSize) (rs.limit) (rs.limit + rs.maxReadaheadSize) 147 // |+++++++++++++|=============| 148 // 149 // |-------------| 150 // offset currentReadEnd 151 // 152 rs.numReads = 1 153 rs.limit = currentReadEnd 154 rs.size = initialReadaheadSize 155 rs.prevSize = 0 156 157 return 0 158 } 159 // Reads in the range [rs.limit - rs.prevSize, rs.limit] end up 160 // here. This is a read that is potentially benefitting from a past 161 // readahead, but there's no reason to issue a readahead call at the 162 // moment. 163 // 164 // (rs.limit - rs.prevSize) (rs.limit + rs.maxReadaheadSize) 165 // |+++++++++++++|===============| 166 // (rs.limit) 167 // 168 // |-------| 169 // offset currentReadEnd 170 // 171 rs.numReads++ 172 return 0 173 } 174 if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize { 175 // Blocks are being read sequentially and would benefit from readahead 176 // down the line. 177 // 178 // (rs.limit) (rs.limit + rs.maxReadaheadSize) 179 // |=============| 180 // 181 // |-------| 182 // offset currentReadEnd 183 // 184 rs.numReads++ 185 return 0 186 } 187 // We read too far ahead of the last read, or before it. This indicates 188 // a random read, where readahead is not desirable. Reset all variables. 189 // 190 // (rs.limit - rs.maxReadaheadSize) (rs.limit) (rs.limit + rs.maxReadaheadSize) 191 // |+++++++++++++|=============| 192 // 193 // |-------| 194 // offset currentReadEnd 195 // 196 rs.numReads = 1 197 rs.limit = currentReadEnd 198 rs.size = initialReadaheadSize 199 rs.prevSize = 0 200 return 0 201 }