github.com/petermattis/pebble@v0.0.0-20190905164901-ab51a2166067/pacer.go (about) 1 // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package pebble 6 7 import ( 8 "context" 9 "errors" 10 "time" 11 ) 12 13 var nilPacer = &noopPacer{} 14 15 type limiter interface { 16 WaitN(ctx context.Context, n int) (err error) 17 AllowN(now time.Time, n int) bool 18 Burst() int 19 } 20 21 // pacer is the interface for flush and compaction rate limiters. The rate limiter 22 // is possible applied on each iteration step of a flush or compaction. This is to 23 // limit background IO usage so that it does not contend with foreground traffic. 24 type pacer interface { 25 maybeThrottle(bytesIterated uint64) error 26 } 27 28 // internalPacer contains fields and methods common to both compactionPacer and 29 // flushPacer. 30 type internalPacer struct { 31 limiter limiter 32 33 iterCount uint64 34 prevBytesIterated uint64 35 refreshBytesThreshold uint64 36 slowdownThreshold uint64 37 } 38 39 // limit applies rate limiting if the current byte level is below the configured 40 // threshold. 41 func (p *internalPacer) limit(amount, currentLevel uint64) error { 42 if currentLevel <= p.slowdownThreshold { 43 burst := p.limiter.Burst() 44 for amount > uint64(burst) { 45 err := p.limiter.WaitN(context.Background(), burst) 46 if err != nil { 47 return err 48 } 49 amount -= uint64(burst) 50 } 51 err := p.limiter.WaitN(context.Background(), int(amount)) 52 if err != nil { 53 return err 54 } 55 } else { 56 burst := p.limiter.Burst() 57 for amount > uint64(burst) { 58 p.limiter.AllowN(time.Now(), burst) 59 amount -= uint64(burst) 60 } 61 p.limiter.AllowN(time.Now(), int(amount)) 62 } 63 return nil 64 } 65 66 // compactionPacerInfo contains information necessary for compaction pacing. 67 type compactionPacerInfo struct { 68 // slowdownThreshold is the low watermark for compaction debt. If compaction debt is 69 // below this threshold, we slow down compactions. If compaction debt is above this 70 // threshold, we let compactions continue as fast as possible. We want to keep 71 // compaction speed as slow as possible to match the speed of flushes. This threshold 72 // is set so that a single flush cannot contribute enough compaction debt to overshoot 73 // the threshold. 74 slowdownThreshold uint64 75 totalCompactionDebt uint64 76 // totalDirtyBytes is the number of dirty bytes in memtables. The compaction 77 // pacer can monitor changes to this value to determine if user writes have 78 // stopped. 79 totalDirtyBytes uint64 80 } 81 82 // compactionPacerEnv defines the environment in which the compaction rate limiter 83 // is applied. 84 type compactionPacerEnv struct { 85 limiter limiter 86 memTableSize uint64 87 88 getInfo func() compactionPacerInfo 89 } 90 91 // compactionPacer rate limits compactions depending on compaction debt. The rate 92 // limiter is applied at a rate that keeps compaction debt at a steady level. If 93 // compaction debt increases at a rate that is faster than the system can handle, 94 // no rate limit is applied. 95 type compactionPacer struct { 96 internalPacer 97 env compactionPacerEnv 98 totalCompactionDebt uint64 99 totalDirtyBytes uint64 100 } 101 102 func newCompactionPacer(env compactionPacerEnv) *compactionPacer { 103 return &compactionPacer{ 104 env: env, 105 internalPacer: internalPacer{ 106 limiter: env.limiter, 107 }, 108 } 109 } 110 111 // maybeThrottle slows down compactions to match memtable flush rate. The DB 112 // provides a compaction debt estimate and a slowdown threshold. We subtract the 113 // compaction debt estimate by the bytes iterated in the current compaction. If 114 // the new compaction debt estimate is below the threshold, the rate limiter is 115 // applied. If the new compaction debt is above the threshold, the rate limiter 116 // is not applied. 117 func (p *compactionPacer) maybeThrottle(bytesIterated uint64) error { 118 if bytesIterated == 0 { 119 return errors.New("pebble: maybeThrottle supplied with invalid bytesIterated") 120 } 121 122 // Recalculate total compaction debt and the slowdown threshold only once 123 // every 1000 iterations or when the refresh threshold is hit since it 124 // requires grabbing DB.mu which is expensive. 125 if p.iterCount == 0 || bytesIterated > p.refreshBytesThreshold { 126 pacerInfo := p.env.getInfo() 127 p.slowdownThreshold = pacerInfo.slowdownThreshold 128 p.totalCompactionDebt = pacerInfo.totalCompactionDebt 129 p.refreshBytesThreshold = bytesIterated + (p.env.memTableSize * 5 / 100) 130 p.iterCount = 1000 131 if p.totalDirtyBytes == pacerInfo.totalDirtyBytes { 132 // The total dirty bytes in the memtables have not changed since the 133 // previous call: user writes have completely stopped. Allow the 134 // compaction to proceed as fast as possible until the next 135 // recalculation. We adjust the recalculation threshold so that we can be 136 // nimble in the face of new user writes. 137 p.totalCompactionDebt += p.slowdownThreshold 138 p.iterCount = 100 139 } 140 p.totalDirtyBytes = pacerInfo.totalDirtyBytes 141 } 142 p.iterCount-- 143 144 var curCompactionDebt uint64 145 if p.totalCompactionDebt > bytesIterated { 146 curCompactionDebt = p.totalCompactionDebt - bytesIterated 147 } 148 149 compactAmount := bytesIterated - p.prevBytesIterated 150 p.prevBytesIterated = bytesIterated 151 152 // We slow down compactions when the compaction debt falls below the slowdown 153 // threshold, which is set dynamically based on the number of non-empty levels. 154 // This will only occur if compactions can keep up with the pace of flushes. If 155 // bytes are flushed faster than how fast compactions can occur, compactions 156 // proceed at maximum (unthrottled) speed. 157 return p.limit(compactAmount, curCompactionDebt) 158 } 159 160 // flushPacerInfo contains information necessary for compaction pacing. 161 type flushPacerInfo struct { 162 totalBytes uint64 163 } 164 165 // flushPacerEnv defines the environment in which the compaction rate limiter is 166 // applied. 167 type flushPacerEnv struct { 168 limiter limiter 169 memTableSize uint64 170 171 getInfo func() flushPacerInfo 172 } 173 174 // flushPacer rate limits memtable flushing to match the speed of incoming user 175 // writes. If user writes come in faster than the memtable can be flushed, no 176 // rate limit is applied. 177 type flushPacer struct { 178 internalPacer 179 env flushPacerEnv 180 totalBytes uint64 181 adjustedTotalBytes uint64 182 } 183 184 func newFlushPacer(env flushPacerEnv) *flushPacer { 185 return &flushPacer{ 186 env: env, 187 internalPacer: internalPacer{ 188 limiter: env.limiter, 189 slowdownThreshold: env.memTableSize * 105 / 100, 190 }, 191 } 192 } 193 194 // maybeThrottle slows down memtable flushing to match user write rate. The DB 195 // provides the total number of bytes in all the memtables. We subtract this total 196 // by the number of bytes flushed in the current flush to get a "dirty byte" count. 197 // If the dirty byte count is below the watermark (105% memtable size), the rate 198 // limiter is applied. If the dirty byte count is above the watermark, the rate 199 // limiter is not applied. 200 func (p *flushPacer) maybeThrottle(bytesIterated uint64) error { 201 if bytesIterated == 0 { 202 return errors.New("pebble: maybeThrottle supplied with invalid bytesIterated") 203 } 204 205 // Recalculate total memtable bytes only once every 1000 iterations or 206 // when the refresh threshold is hit since getting the total memtable 207 // byte count requires grabbing DB.mu which is expensive. 208 if p.iterCount == 0 || bytesIterated > p.refreshBytesThreshold { 209 pacerInfo := p.env.getInfo() 210 p.iterCount = 1000 211 p.refreshBytesThreshold = bytesIterated + (p.env.memTableSize * 5 / 100) 212 p.adjustedTotalBytes = pacerInfo.totalBytes 213 if p.totalBytes == pacerInfo.totalBytes { 214 // The total bytes in the memtables have not changed since the previous 215 // call: user writes have completely stopped. Allow the flush to proceed 216 // as fast as possible until the next recalculation. We adjust the 217 // recalculation threshold so that we can be nimble in the face of new 218 // user writes. 219 p.adjustedTotalBytes += p.slowdownThreshold 220 p.iterCount = 100 221 } 222 p.totalBytes = pacerInfo.totalBytes 223 } 224 p.iterCount-- 225 226 // dirtyBytes is the total number of bytes in the memtables minus the number of 227 // bytes flushed. It represents unflushed bytes in all the memtables, even the 228 // ones which aren't being flushed such as the mutable memtable. 229 dirtyBytes := p.adjustedTotalBytes - bytesIterated 230 flushAmount := bytesIterated - p.prevBytesIterated 231 p.prevBytesIterated = bytesIterated 232 233 // We slow down memtable flushing when the dirty bytes indicator falls 234 // below the low watermark, which is 105% memtable size. This will only 235 // occur if memtable flushing can keep up with the pace of incoming 236 // writes. If writes come in faster than how fast the memtable can flush, 237 // flushing proceeds at maximum (unthrottled) speed. 238 return p.limit(flushAmount, dirtyBytes) 239 } 240 241 type noopPacer struct{} 242 243 func (p *noopPacer) maybeThrottle(_ uint64) error { 244 return nil 245 }