github.com/petermattis/pebble@v0.0.0-20190905164901-ab51a2166067/internal/pacertoy/rocksdb/main.go (about) 1 // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package main 6 7 import ( 8 "context" 9 "fmt" 10 "math" 11 "sync" 12 "sync/atomic" 13 "time" 14 15 "github.com/petermattis/pebble/internal/rate" 16 17 "golang.org/x/exp/rand" 18 ) 19 20 const ( 21 // Max rate for all compactions. This is intentionally set low enough that 22 // user writes will have to be delayed. 23 maxCompactionRate = 80 << 20 // 80 MB/s 24 25 memtableSize = 64 << 20 // 64 MB 26 memtableStopThreshold = 2 * memtableSize 27 maxWriteRate = 30 << 20 // 30 MB/s 28 startingWriteRate = 30 << 20 // 30 MB/s 29 30 l0SlowdownThreshold = 4 31 l0CompactionThreshold = 1 32 33 levelRatio = 10 34 numLevels = 7 35 36 // Slowdown threshold is set at the compaction debt incurred by the largest 37 // possible compaction. 38 compactionDebtSlowdownThreshold = memtableSize*(numLevels-2) 39 ) 40 41 type compactionPacer struct { 42 level int64 43 drainer *rate.Limiter 44 } 45 46 func newCompactionPacer() *compactionPacer { 47 p := &compactionPacer{ 48 drainer: rate.NewLimiter(maxCompactionRate, maxCompactionRate), 49 } 50 return p 51 } 52 53 func (p *compactionPacer) fill(n int64) { 54 atomic.AddInt64(&p.level, n) 55 } 56 57 func (p *compactionPacer) drain(n int64) { 58 p.drainer.WaitN(context.Background(), int(n)) 59 60 atomic.AddInt64(&p.level, -n) 61 } 62 63 type flushPacer struct { 64 level int64 65 memtableStopThreshold float64 66 fillCond sync.Cond 67 } 68 69 func newFlushPacer(mu *sync.Mutex) *flushPacer { 70 p := &flushPacer{ 71 memtableStopThreshold: memtableStopThreshold, 72 } 73 p.fillCond.L = mu 74 return p 75 } 76 77 func (p *flushPacer) fill(n int64) { 78 for float64(atomic.LoadInt64(&p.level)) >= p.memtableStopThreshold { 79 p.fillCond.Wait() 80 } 81 atomic.AddInt64(&p.level, n) 82 p.fillCond.Signal() 83 } 84 85 func (p *flushPacer) drain(n int64) { 86 atomic.AddInt64(&p.level, -n) 87 } 88 89 type DB struct { 90 mu sync.Mutex 91 flushPacer *flushPacer 92 flushCond sync.Cond 93 memtables []*int64 94 fill int64 95 drain int64 96 97 compactionMu sync.Mutex 98 compactionPacer *compactionPacer 99 // L0 is represented as an array of integers whereas every other level 100 // is represented as a single integer. 101 L0 []*int64 102 // Non-L0 sstables. sstables[0] == L1. 103 sstables []int64 104 maxSSTableSizes []int64 105 compactionFlushCond sync.Cond 106 prevCompactionDebt float64 107 previouslyInDebt bool 108 109 writeLimiter *rate.Limiter 110 } 111 112 func newDB() *DB { 113 db := &DB{} 114 db.flushPacer = newFlushPacer(&db.mu) 115 db.flushCond.L = &db.mu 116 db.memtables = append(db.memtables, new(int64)) 117 118 db.compactionFlushCond.L = &db.compactionMu 119 db.L0 = append(db.L0, new(int64)) 120 db.compactionPacer = newCompactionPacer() 121 122 db.maxSSTableSizes = make([]int64, numLevels-1) 123 base := int64(levelRatio) 124 for i := uint64(0); i < numLevels-2; i++ { 125 // Each level is 10 times larger than the one above it. 126 db.maxSSTableSizes[i] = memtableSize * l0CompactionThreshold * base 127 base *= levelRatio 128 129 // Begin with each level full. 130 newLevel := db.maxSSTableSizes[i] 131 132 db.sstables = append(db.sstables, newLevel) 133 } 134 db.sstables = append(db.sstables, 0) 135 db.maxSSTableSizes[numLevels-2] = math.MaxInt64 136 137 db.writeLimiter = rate.NewLimiter(startingWriteRate, startingWriteRate) 138 139 go db.drainMemtable() 140 go db.drainCompaction() 141 142 return db 143 } 144 145 // drainCompaction simulates background compactions. 146 func (db *DB) drainCompaction() { 147 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 148 149 for { 150 db.compactionMu.Lock() 151 152 for len(db.L0) <= l0CompactionThreshold { 153 db.compactionFlushCond.Wait() 154 } 155 l0Table := db.L0[0] 156 db.compactionMu.Unlock() 157 158 for i, size := int64(0), int64(0); i < *l0Table; i += size { 159 size = 10000 + rng.Int63n(500) 160 if size > (*l0Table - i) { 161 size = *l0Table - i 162 } 163 db.compactionPacer.drain(size) 164 } 165 166 db.compactionMu.Lock() 167 db.L0 = db.L0[1:] 168 db.compactionMu.Unlock() 169 170 singleTableSize := int64(memtableSize) 171 tablesToCompact := 0 172 for i := range db.sstables { 173 newSSTableSize := atomic.AddInt64(&db.sstables[i], singleTableSize) 174 if newSSTableSize > db.maxSSTableSizes[i] { 175 atomic.AddInt64(&db.sstables[i], -singleTableSize) 176 tablesToCompact++ 177 } else { 178 // Lower levels do not need compaction if level above it did not 179 // need compaction. 180 break 181 } 182 } 183 184 totalCompactionBytes := int64(tablesToCompact * memtableSize) 185 db.compactionPacer.fill(totalCompactionBytes) 186 187 for t := 0; t < tablesToCompact; t++ { 188 for i, size := int64(0), int64(0); i < memtableSize; i += size { 189 size = 10000 + rng.Int63n(500) 190 if size > (totalCompactionBytes - i) { 191 size = totalCompactionBytes - i 192 } 193 db.compactionPacer.drain(size) 194 } 195 196 db.delayUserWrites() 197 } 198 } 199 } 200 201 // fillCompaction fills L0 sstables. 202 func (db *DB) fillCompaction(size int64) { 203 db.compactionMu.Lock() 204 205 db.compactionPacer.fill(size) 206 207 last := db.L0[len(db.L0)-1] 208 if *last+size > memtableSize { 209 last = new(int64) 210 db.L0 = append(db.L0, last) 211 db.compactionFlushCond.Signal() 212 } 213 *last += size 214 215 db.compactionMu.Unlock() 216 } 217 218 // drainMemtable simulates memtable flushing. 219 func (db *DB) drainMemtable() { 220 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 221 222 for { 223 db.mu.Lock() 224 for len(db.memtables) <= 1 { 225 db.flushCond.Wait() 226 } 227 memtable := db.memtables[0] 228 db.mu.Unlock() 229 230 for i, size := int64(0), int64(0); i < *memtable; i += size { 231 size = 1000 + rng.Int63n(50) 232 if size > (*memtable - i) { 233 size = *memtable - i 234 } 235 db.flushPacer.drain(size) 236 atomic.AddInt64(&db.drain, size) 237 238 db.fillCompaction(size) 239 } 240 241 db.delayUserWrites() 242 243 db.mu.Lock() 244 db.memtables = db.memtables[1:] 245 db.mu.Unlock() 246 } 247 } 248 249 // delayUserWrites applies write delays depending on compaction debt. 250 func (db *DB) delayUserWrites() { 251 totalCompactionBytes := atomic.LoadInt64(&db.compactionPacer.level) 252 compactionDebt := math.Max(float64(totalCompactionBytes)-l0CompactionThreshold*memtableSize, 0.0) 253 254 db.mu.Lock() 255 if len(db.L0) > l0SlowdownThreshold || compactionDebt > compactionDebtSlowdownThreshold { 256 db.previouslyInDebt = true 257 if compactionDebt > db.prevCompactionDebt { 258 // Debt is growing. 259 drainLimit := db.writeLimiter.Limit() * 0.8 260 if drainLimit > 0 { 261 db.writeLimiter.SetLimit(drainLimit) 262 } 263 } else { 264 // Debt is shrinking. 265 drainLimit := db.writeLimiter.Limit() * 1/0.8 266 if drainLimit <= maxWriteRate { 267 db.writeLimiter.SetLimit(drainLimit) 268 } 269 } 270 } else if db.previouslyInDebt { 271 // If compaction was previously delayed and has recovered, RocksDB 272 // "rewards" the rate by double the slowdown ratio. 273 274 // From RocksDB: 275 // If the DB recovers from delay conditions, we reward with reducing 276 // double the slowdown ratio. This is to balance the long term slowdown 277 // increase signal. 278 drainLimit := db.writeLimiter.Limit() * 1.4 279 if drainLimit <= maxWriteRate { 280 db.writeLimiter.SetLimit(drainLimit) 281 } 282 db.previouslyInDebt = false 283 } 284 285 db.prevCompactionDebt = compactionDebt 286 db.mu.Unlock() 287 } 288 289 // fillMemtable simulates memtable filling. 290 func (db *DB) fillMemtable(size int64) { 291 db.mu.Lock() 292 293 db.flushPacer.fill(size) 294 atomic.AddInt64(&db.fill, size) 295 296 last := db.memtables[len(db.memtables)-1] 297 if *last+size > memtableSize { 298 last = new(int64) 299 db.memtables = append(db.memtables, last) 300 db.flushCond.Signal() 301 } 302 *last += size 303 304 db.mu.Unlock() 305 } 306 307 // printLevels prints the levels. 308 func (db *DB) printLevels() { 309 db.mu.Lock() 310 for i := range db.sstables { 311 fmt.Printf("Level %d: %d/%d\n", i+1, db.sstables[i]/(1024*1024), db.maxSSTableSizes[i]/(1024*1024)) 312 } 313 db.mu.Unlock() 314 } 315 316 // simulateWrite simulates user writes. 317 func simulateWrite(db *DB) { 318 limiter := rate.NewLimiter(10<<20, 10<<20) // 10 MB/s 319 fmt.Printf("filling at 10 MB/sec\n") 320 321 setRate := func(mb int) { 322 fmt.Printf("filling at %d MB/sec\n", mb) 323 limiter.SetLimit(rate.Limit(mb << 20)) 324 } 325 326 go func() { 327 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 328 for { 329 secs := 5 + rng.Intn(5) 330 time.Sleep(time.Duration(secs) * time.Second) 331 mb := 11 + rng.Intn(20) 332 setRate(mb) 333 } 334 }() 335 336 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 337 338 for { 339 size := 1000 + rng.Int63n(50) 340 limiter.WaitN(context.Background(), int(size)) 341 db.writeLimiter.WaitN(context.Background(), int(size)) 342 db.fillMemtable(size) 343 } 344 } 345 346 func main() { 347 db := newDB() 348 349 go simulateWrite(db) 350 351 tick := time.NewTicker(time.Second) 352 start := time.Now() 353 lastNow := start 354 var lastFill, lastDrain int64 355 356 for i := 0; ; i++ { 357 select { 358 case <-tick.C: 359 if (i % 20) == 0 { 360 fmt.Printf("_elapsed___memtbs____dirty_____fill____drain____cdebt__l0count___max-w-rate\n") 361 } 362 363 if (i % 7) == 0 { 364 //db.printLevels() 365 } 366 367 db.mu.Lock() 368 memtableCount := len(db.memtables) 369 db.mu.Unlock() 370 dirty := atomic.LoadInt64(&db.flushPacer.level) 371 fill := atomic.LoadInt64(&db.fill) 372 drain := atomic.LoadInt64(&db.drain) 373 374 db.compactionMu.Lock() 375 compactionL0 := len(db.L0) 376 db.compactionMu.Unlock() 377 totalCompactionBytes := atomic.LoadInt64(&db.compactionPacer.level) 378 compactionDebt := math.Max(float64(totalCompactionBytes)-l0CompactionThreshold*memtableSize, 0.0) 379 maxWriteRate := db.writeLimiter.Limit() 380 381 now := time.Now() 382 elapsed := now.Sub(lastNow).Seconds() 383 fmt.Printf("%8s %8d %8.1f %8.1f %8.1f %8.1f %8d %12.1f\n", 384 time.Duration(now.Sub(start).Seconds()+0.5)*time.Second, 385 memtableCount, 386 float64(dirty)/(1024.0*1024.0), 387 float64(fill-lastFill)/(1024.0*1024.0*elapsed), 388 float64(drain-lastDrain)/(1024.0*1024.0*elapsed), 389 compactionDebt/(1024.0*1024.0), 390 compactionL0, 391 maxWriteRate/(1024.0*1024.0)) 392 393 lastNow = now 394 lastFill = fill 395 lastDrain = drain 396 } 397 } 398 }