github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/internal/pacertoy/rocksdb/main.go (about)

     1  // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package main
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  	"sync"
    11  	"sync/atomic"
    12  	"time"
    13  
    14  	"github.com/cockroachdb/pebble/internal/rate"
    15  	"golang.org/x/exp/rand"
    16  )
    17  
    18  const (
    19  	// Max rate for all compactions. This is intentionally set low enough that
    20  	// user writes will have to be delayed.
    21  	maxCompactionRate = 80 << 20 // 80 MB/s
    22  
    23  	memtableSize          = 64 << 20 // 64 MB
    24  	memtableStopThreshold = 2 * memtableSize
    25  	maxWriteRate          = 30 << 20 // 30 MB/s
    26  	startingWriteRate     = 30 << 20 // 30 MB/s
    27  
    28  	l0SlowdownThreshold   = 4
    29  	l0CompactionThreshold = 1
    30  
    31  	levelRatio = 10
    32  	numLevels  = 7
    33  
    34  	// Slowdown threshold is set at the compaction debt incurred by the largest
    35  	// possible compaction.
    36  	compactionDebtSlowdownThreshold = memtableSize * (numLevels - 2)
    37  )
    38  
    39  type compactionPacer struct {
    40  	level   atomic.Int64
    41  	drainer *rate.Limiter
    42  }
    43  
    44  func newCompactionPacer() *compactionPacer {
    45  	p := &compactionPacer{
    46  		drainer: rate.NewLimiter(maxCompactionRate, maxCompactionRate),
    47  	}
    48  	return p
    49  }
    50  
    51  func (p *compactionPacer) fill(n int64) {
    52  	p.level.Add(n)
    53  }
    54  
    55  func (p *compactionPacer) drain(n int64) {
    56  	p.drainer.Wait(float64(n))
    57  
    58  	p.level.Add(-n)
    59  }
    60  
    61  type flushPacer struct {
    62  	level                 atomic.Int64
    63  	memtableStopThreshold float64
    64  	fillCond              sync.Cond
    65  }
    66  
    67  func newFlushPacer(mu *sync.Mutex) *flushPacer {
    68  	p := &flushPacer{
    69  		memtableStopThreshold: memtableStopThreshold,
    70  	}
    71  	p.fillCond.L = mu
    72  	return p
    73  }
    74  
    75  func (p *flushPacer) fill(n int64) {
    76  	for float64(p.level.Load()) >= p.memtableStopThreshold {
    77  		p.fillCond.Wait()
    78  	}
    79  	p.level.Add(n)
    80  	p.fillCond.Signal()
    81  }
    82  
    83  func (p *flushPacer) drain(n int64) {
    84  	p.level.Add(-n)
    85  }
    86  
    87  // DB models a RocksDB DB.
    88  type DB struct {
    89  	mu         sync.Mutex
    90  	flushPacer *flushPacer
    91  	flushCond  sync.Cond
    92  	memtables  []*int64
    93  	fill       atomic.Int64
    94  	drain      atomic.Int64
    95  
    96  	compactionMu    sync.Mutex
    97  	compactionPacer *compactionPacer
    98  	// L0 is represented as an array of integers whereas every other level
    99  	// is represented as a single integer.
   100  	L0 []*int64
   101  	// Non-L0 sstables. sstables[0] == L1.
   102  	sstables            []atomic.Int64
   103  	maxSSTableSizes     []int64
   104  	compactionFlushCond sync.Cond
   105  	prevCompactionDebt  float64
   106  	previouslyInDebt    bool
   107  
   108  	writeLimiter *rate.Limiter
   109  }
   110  
   111  func newDB() *DB {
   112  	db := &DB{}
   113  	db.flushPacer = newFlushPacer(&db.mu)
   114  	db.flushCond.L = &db.mu
   115  	db.memtables = append(db.memtables, new(int64))
   116  
   117  	db.compactionFlushCond.L = &db.compactionMu
   118  	db.L0 = append(db.L0, new(int64))
   119  	db.compactionPacer = newCompactionPacer()
   120  
   121  	db.maxSSTableSizes = make([]int64, numLevels-1)
   122  	db.sstables = make([]atomic.Int64, numLevels-1)
   123  	base := int64(levelRatio)
   124  	for i := uint64(0); i < numLevels-2; i++ {
   125  		// Each level is 10 times larger than the one above it.
   126  		db.maxSSTableSizes[i] = memtableSize * l0CompactionThreshold * base
   127  		base *= levelRatio
   128  
   129  		// Begin with each level full.
   130  		newLevel := db.maxSSTableSizes[i]
   131  
   132  		db.sstables[i].Store(newLevel)
   133  	}
   134  	db.sstables[numLevels-2].Store(0)
   135  	db.maxSSTableSizes[numLevels-2] = math.MaxInt64
   136  
   137  	db.writeLimiter = rate.NewLimiter(startingWriteRate, startingWriteRate)
   138  
   139  	go db.drainMemtable()
   140  	go db.drainCompaction()
   141  
   142  	return db
   143  }
   144  
   145  // drainCompaction simulates background compactions.
   146  func (db *DB) drainCompaction() {
   147  	rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
   148  
   149  	for {
   150  		db.compactionMu.Lock()
   151  
   152  		for len(db.L0) <= l0CompactionThreshold {
   153  			db.compactionFlushCond.Wait()
   154  		}
   155  		l0Table := db.L0[0]
   156  		db.compactionMu.Unlock()
   157  
   158  		for i, size := int64(0), int64(0); i < *l0Table; i += size {
   159  			size = 10000 + rng.Int63n(500)
   160  			if size > (*l0Table - i) {
   161  				size = *l0Table - i
   162  			}
   163  			db.compactionPacer.drain(size)
   164  		}
   165  
   166  		db.compactionMu.Lock()
   167  		db.L0 = db.L0[1:]
   168  		db.compactionMu.Unlock()
   169  
   170  		singleTableSize := int64(memtableSize)
   171  		tablesToCompact := 0
   172  		for i := range db.sstables {
   173  			newSSTableSize := db.sstables[i].Add(singleTableSize)
   174  			if newSSTableSize > db.maxSSTableSizes[i] {
   175  				db.sstables[i].Add(-singleTableSize)
   176  				tablesToCompact++
   177  			} else {
   178  				// Lower levels do not need compaction if level above it did not
   179  				// need compaction.
   180  				break
   181  			}
   182  		}
   183  
   184  		totalCompactionBytes := int64(tablesToCompact * memtableSize)
   185  		db.compactionPacer.fill(totalCompactionBytes)
   186  
   187  		for t := 0; t < tablesToCompact; t++ {
   188  			for i, size := int64(0), int64(0); i < memtableSize; i += size {
   189  				size = 10000 + rng.Int63n(500)
   190  				if size > (totalCompactionBytes - i) {
   191  					size = totalCompactionBytes - i
   192  				}
   193  				db.compactionPacer.drain(size)
   194  			}
   195  
   196  			db.delayUserWrites()
   197  		}
   198  	}
   199  }
   200  
   201  // fillCompaction fills L0 sstables.
   202  func (db *DB) fillCompaction(size int64) {
   203  	db.compactionMu.Lock()
   204  
   205  	db.compactionPacer.fill(size)
   206  
   207  	last := db.L0[len(db.L0)-1]
   208  	if *last+size > memtableSize {
   209  		last = new(int64)
   210  		db.L0 = append(db.L0, last)
   211  		db.compactionFlushCond.Signal()
   212  	}
   213  	*last += size
   214  
   215  	db.compactionMu.Unlock()
   216  }
   217  
   218  // drainMemtable simulates memtable flushing.
   219  func (db *DB) drainMemtable() {
   220  	rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
   221  
   222  	for {
   223  		db.mu.Lock()
   224  		for len(db.memtables) <= 1 {
   225  			db.flushCond.Wait()
   226  		}
   227  		memtable := db.memtables[0]
   228  		db.mu.Unlock()
   229  
   230  		for i, size := int64(0), int64(0); i < *memtable; i += size {
   231  			size = 1000 + rng.Int63n(50)
   232  			if size > (*memtable - i) {
   233  				size = *memtable - i
   234  			}
   235  			db.flushPacer.drain(size)
   236  			db.drain.Add(size)
   237  
   238  			db.fillCompaction(size)
   239  		}
   240  
   241  		db.delayUserWrites()
   242  
   243  		db.mu.Lock()
   244  		db.memtables = db.memtables[1:]
   245  		db.mu.Unlock()
   246  	}
   247  }
   248  
   249  // delayUserWrites applies write delays depending on compaction debt.
   250  func (db *DB) delayUserWrites() {
   251  	totalCompactionBytes := db.compactionPacer.level.Load()
   252  	compactionDebt := math.Max(float64(totalCompactionBytes)-l0CompactionThreshold*memtableSize, 0.0)
   253  
   254  	db.mu.Lock()
   255  	if len(db.L0) > l0SlowdownThreshold || compactionDebt > compactionDebtSlowdownThreshold {
   256  		db.previouslyInDebt = true
   257  		if compactionDebt > db.prevCompactionDebt {
   258  			// Debt is growing.
   259  			drainLimit := db.writeLimiter.Rate() * 0.8
   260  			if drainLimit > 0 {
   261  				db.writeLimiter.SetRate(drainLimit)
   262  			}
   263  		} else {
   264  			// Debt is shrinking.
   265  			drainLimit := db.writeLimiter.Rate() * 1 / 0.8
   266  			if drainLimit <= maxWriteRate {
   267  				db.writeLimiter.SetRate(drainLimit)
   268  			}
   269  		}
   270  	} else if db.previouslyInDebt {
   271  		// If compaction was previously delayed and has recovered, RocksDB
   272  		// "rewards" the rate by double the slowdown ratio.
   273  
   274  		// From RocksDB:
   275  		// If the DB recovers from delay conditions, we reward with reducing
   276  		// double the slowdown ratio. This is to balance the long term slowdown
   277  		// increase signal.
   278  		drainLimit := db.writeLimiter.Rate() * 1.4
   279  		if drainLimit <= maxWriteRate {
   280  			db.writeLimiter.SetRate(drainLimit)
   281  		}
   282  		db.previouslyInDebt = false
   283  	}
   284  
   285  	db.prevCompactionDebt = compactionDebt
   286  	db.mu.Unlock()
   287  }
   288  
   289  // fillMemtable simulates memtable filling.
   290  func (db *DB) fillMemtable(size int64) {
   291  	db.mu.Lock()
   292  
   293  	db.flushPacer.fill(size)
   294  	db.fill.Add(size)
   295  
   296  	last := db.memtables[len(db.memtables)-1]
   297  	if *last+size > memtableSize {
   298  		last = new(int64)
   299  		db.memtables = append(db.memtables, last)
   300  		db.flushCond.Signal()
   301  	}
   302  	*last += size
   303  
   304  	db.mu.Unlock()
   305  }
   306  
   307  // simulateWrite simulates user writes.
   308  func simulateWrite(db *DB) {
   309  	limiter := rate.NewLimiter(10<<20, 10<<20) // 10 MB/s
   310  	fmt.Printf("filling at 10 MB/sec\n")
   311  
   312  	setRate := func(mb int) {
   313  		fmt.Printf("filling at %d MB/sec\n", mb)
   314  		limiter.SetRate(float64(mb << 20))
   315  	}
   316  
   317  	go func() {
   318  		rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
   319  		for {
   320  			secs := 5 + rng.Intn(5)
   321  			time.Sleep(time.Duration(secs) * time.Second)
   322  			mb := 11 + rng.Intn(20)
   323  			setRate(mb)
   324  		}
   325  	}()
   326  
   327  	rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
   328  
   329  	for {
   330  		size := 1000 + rng.Int63n(50)
   331  		limiter.Wait(float64(size))
   332  		db.writeLimiter.Wait(float64(size))
   333  		db.fillMemtable(size)
   334  	}
   335  }
   336  
   337  func main() {
   338  	db := newDB()
   339  
   340  	go simulateWrite(db)
   341  
   342  	tick := time.NewTicker(time.Second)
   343  	start := time.Now()
   344  	lastNow := start
   345  	var lastFill, lastDrain int64
   346  
   347  	for i := 0; ; i++ {
   348  		<-tick.C
   349  		if (i % 20) == 0 {
   350  			fmt.Printf("_elapsed___memtbs____dirty_____fill____drain____cdebt__l0count___max-w-rate\n")
   351  		}
   352  
   353  		db.mu.Lock()
   354  		memtableCount := len(db.memtables)
   355  		db.mu.Unlock()
   356  		dirty := db.flushPacer.level.Load()
   357  		fill := db.fill.Load()
   358  		drain := db.drain.Load()
   359  
   360  		db.compactionMu.Lock()
   361  		compactionL0 := len(db.L0)
   362  		db.compactionMu.Unlock()
   363  		totalCompactionBytes := db.compactionPacer.level.Load()
   364  		compactionDebt := math.Max(float64(totalCompactionBytes)-l0CompactionThreshold*memtableSize, 0.0)
   365  		maxWriteRate := db.writeLimiter.Rate()
   366  
   367  		now := time.Now()
   368  		elapsed := now.Sub(lastNow).Seconds()
   369  		fmt.Printf("%8s %8d %8.1f %8.1f %8.1f %8.1f %8d %12.1f\n",
   370  			time.Duration(now.Sub(start).Seconds()+0.5)*time.Second,
   371  			memtableCount,
   372  			float64(dirty)/(1024.0*1024.0),
   373  			float64(fill-lastFill)/(1024.0*1024.0*elapsed),
   374  			float64(drain-lastDrain)/(1024.0*1024.0*elapsed),
   375  			compactionDebt/(1024.0*1024.0),
   376  			compactionL0,
   377  			maxWriteRate/(1024.0*1024.0))
   378  
   379  		lastNow = now
   380  		lastFill = fill
   381  		lastDrain = drain
   382  	}
   383  }