github.com/cockroachdb/pebble@v1.1.2/objstorage/objstorageprovider/readahead.go (about) 1 // Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package objstorageprovider 6 7 import "github.com/cockroachdb/pebble/internal/invariants" 8 9 const ( 10 // Constants for dynamic readahead of data blocks. Note that the size values 11 // make sense as some multiple of the default block size; and they should 12 // both be larger than the default block size. 13 minFileReadsForReadahead = 2 14 // TODO(bilal): Have the initial size value be a factor of the block size, 15 // as opposed to a hardcoded value. 16 initialReadaheadSize = 64 << 10 /* 64KB */ 17 ) 18 19 // readaheadState contains state variables related to readahead. Updated on 20 // file reads. 21 type readaheadState struct { 22 // Number of sequential reads. 23 numReads int64 24 maxReadaheadSize int64 25 // Size issued to the next call to Prefetch. Starts at or above 26 // initialReadaheadSize and grows exponentially until maxReadaheadSize. 27 size int64 28 // prevSize is the size used in the last Prefetch call. 29 prevSize int64 30 // The byte offset up to which the OS has been asked to read ahead / cached. 31 // When reading ahead, reads up to this limit should not incur an IO 32 // operation. Reads after this limit can benefit from a new call to 33 // Prefetch. 34 limit int64 35 } 36 37 func makeReadaheadState(maxReadaheadSize int64) readaheadState { 38 return readaheadState{ 39 size: initialReadaheadSize, 40 maxReadaheadSize: maxReadaheadSize, 41 } 42 } 43 44 func (rs *readaheadState) recordCacheHit(offset, blockLength int64) { 45 currentReadEnd := offset + blockLength 46 if rs.numReads >= minFileReadsForReadahead { 47 if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize { 48 // This is a read that would have resulted in a readahead, had it 49 // not been a cache hit. 50 rs.limit = currentReadEnd 51 return 52 } 53 if currentReadEnd < rs.limit-rs.prevSize || offset > rs.limit+rs.maxReadaheadSize { 54 // We read too far away from rs.limit to benefit from readahead in 55 // any scenario. Reset all variables. 56 rs.numReads = 1 57 rs.limit = currentReadEnd 58 rs.size = initialReadaheadSize 59 rs.prevSize = 0 60 return 61 } 62 // Reads in the range [rs.limit - rs.prevSize, rs.limit] end up 63 // here. This is a read that is potentially benefitting from a past 64 // readahead. 65 return 66 } 67 if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize { 68 // Blocks are being read sequentially and would benefit from readahead 69 // down the line. 70 rs.numReads++ 71 return 72 } 73 // We read too far ahead of the last read, or before it. This indicates 74 // a random read, where readahead is not desirable. Reset all variables. 75 rs.numReads = 1 76 rs.limit = currentReadEnd 77 rs.size = initialReadaheadSize 78 rs.prevSize = 0 79 } 80 81 // maybeReadahead updates state and determines whether to issue a readahead / 82 // prefetch call for a block read at offset for blockLength bytes. 83 // Returns a size value (greater than 0) that should be prefetched if readahead 84 // would be beneficial. 85 func (rs *readaheadState) maybeReadahead(offset, blockLength int64) int64 { 86 if invariants.Enabled && rs.maxReadaheadSize == 0 { 87 panic("readaheadState not initialized") 88 } 89 currentReadEnd := offset + blockLength 90 if rs.numReads >= minFileReadsForReadahead { 91 // The minimum threshold of sequential reads to justify reading ahead 92 // has been reached. 93 // There are two intervals: the interval being read: 94 // [offset, currentReadEnd] 95 // as well as the interval where a read would benefit from read ahead: 96 // [rs.limit, rs.limit + rs.size] 97 // We increase the latter interval to 98 // [rs.limit, rs.limit + rs.maxReadaheadSize] to account for cases where 99 // readahead may not be beneficial with a small readahead size, but over 100 // time the readahead size would increase exponentially to make it 101 // beneficial. 102 if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize { 103 // We are doing a read in the interval ahead of 104 // the last readahead range. In the diagrams below, ++++ is the last 105 // readahead range, ==== is the range represented by 106 // [rs.limit, rs.limit + rs.maxReadaheadSize], and ---- is the range 107 // being read. 108 // 109 // rs.limit rs.limit + rs.maxReadaheadSize 110 // ++++++++++|===========================| 111 // 112 // |-------------| 113 // offset currentReadEnd 114 // 115 // This case is also possible, as are all cases with an overlap 116 // between [rs.limit, rs.limit + rs.maxReadaheadSize] and [offset, 117 // currentReadEnd]: 118 // 119 // rs.limit rs.limit + rs.maxReadaheadSize 120 // ++++++++++|===========================| 121 // 122 // |-------------| 123 // offset currentReadEnd 124 // 125 // 126 rs.numReads++ 127 rs.limit = offset + rs.size 128 rs.prevSize = rs.size 129 // Increase rs.size for the next read. 130 rs.size *= 2 131 if rs.size > rs.maxReadaheadSize { 132 rs.size = rs.maxReadaheadSize 133 } 134 return rs.prevSize 135 } 136 if currentReadEnd < rs.limit-rs.prevSize || offset > rs.limit+rs.maxReadaheadSize { 137 // The above conditional has rs.limit > rs.prevSize to confirm that 138 // rs.limit - rs.prevSize would not underflow. 139 // We read too far away from rs.limit to benefit from readahead in 140 // any scenario. Reset all variables. 141 // The case where we read too far ahead: 142 // 143 // (rs.limit - rs.prevSize) (rs.limit) (rs.limit + rs.maxReadaheadSize) 144 // |+++++++++++++|=============| 145 // 146 // |-------------| 147 // offset currentReadEnd 148 // 149 // Or too far behind: 150 // 151 // (rs.limit - rs.prevSize) (rs.limit) (rs.limit + rs.maxReadaheadSize) 152 // |+++++++++++++|=============| 153 // 154 // |-------------| 155 // offset currentReadEnd 156 // 157 rs.numReads = 1 158 rs.limit = currentReadEnd 159 rs.size = initialReadaheadSize 160 rs.prevSize = 0 161 162 return 0 163 } 164 // Reads in the range [rs.limit - rs.prevSize, rs.limit] end up 165 // here. This is a read that is potentially benefitting from a past 166 // readahead, but there's no reason to issue a readahead call at the 167 // moment. 168 // 169 // (rs.limit - rs.prevSize) (rs.limit + rs.maxReadaheadSize) 170 // |+++++++++++++|===============| 171 // (rs.limit) 172 // 173 // |-------| 174 // offset currentReadEnd 175 // 176 rs.numReads++ 177 return 0 178 } 179 if currentReadEnd >= rs.limit && offset <= rs.limit+rs.maxReadaheadSize { 180 // Blocks are being read sequentially and would benefit from readahead 181 // down the line. 182 // 183 // (rs.limit) (rs.limit + rs.maxReadaheadSize) 184 // |=============| 185 // 186 // |-------| 187 // offset currentReadEnd 188 // 189 rs.numReads++ 190 return 0 191 } 192 // We read too far ahead of the last read, or before it. This indicates 193 // a random read, where readahead is not desirable. Reset all variables. 194 // 195 // (rs.limit - rs.maxReadaheadSize) (rs.limit) (rs.limit + rs.maxReadaheadSize) 196 // |+++++++++++++|=============| 197 // 198 // |-------| 199 // offset currentReadEnd 200 // 201 rs.numReads = 1 202 rs.limit = currentReadEnd 203 rs.size = initialReadaheadSize 204 rs.prevSize = 0 205 return 0 206 }