github.com/petermattis/pebble@v0.0.0-20190905164901-ab51a2166067/internal/pacertoy/rocksdb/main.go (about)

     1  // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package main
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"math"
    11  	"sync"
    12  	"sync/atomic"
    13  	"time"
    14  
    15  	"github.com/petermattis/pebble/internal/rate"
    16  
    17  	"golang.org/x/exp/rand"
    18  )
    19  
    20  const (
    21  	// Max rate for all compactions. This is intentionally set low enough that
    22  	// user writes will have to be delayed.
    23  	maxCompactionRate     = 80 << 20 // 80 MB/s
    24  
    25  	memtableSize          = 64 << 20 // 64 MB
    26  	memtableStopThreshold = 2 * memtableSize
    27  	maxWriteRate          = 30 << 20 // 30 MB/s
    28  	startingWriteRate     = 30 << 20 // 30 MB/s
    29  
    30  	l0SlowdownThreshold   = 4
    31  	l0CompactionThreshold = 1
    32  
    33  	levelRatio            = 10
    34  	numLevels             = 7
    35  
    36  	// Slowdown threshold is set at the compaction debt incurred by the largest
    37  	// possible compaction.
    38  	compactionDebtSlowdownThreshold = memtableSize*(numLevels-2)
    39  )
    40  
    41  type compactionPacer struct {
    42  	level             int64
    43  	drainer           *rate.Limiter
    44  }
    45  
    46  func newCompactionPacer() *compactionPacer {
    47  	p := &compactionPacer{
    48  		drainer: rate.NewLimiter(maxCompactionRate, maxCompactionRate),
    49  	}
    50  	return p
    51  }
    52  
    53  func (p *compactionPacer) fill(n int64) {
    54  	atomic.AddInt64(&p.level, n)
    55  }
    56  
    57  func (p *compactionPacer) drain(n int64) {
    58  	p.drainer.WaitN(context.Background(), int(n))
    59  
    60  	atomic.AddInt64(&p.level, -n)
    61  }
    62  
    63  type flushPacer struct {
    64  	level                 int64
    65  	memtableStopThreshold float64
    66  	fillCond              sync.Cond
    67  }
    68  
    69  func newFlushPacer(mu *sync.Mutex) *flushPacer {
    70  	p := &flushPacer{
    71  		memtableStopThreshold: memtableStopThreshold,
    72  	}
    73  	p.fillCond.L = mu
    74  	return p
    75  }
    76  
    77  func (p *flushPacer) fill(n int64) {
    78  	for float64(atomic.LoadInt64(&p.level)) >= p.memtableStopThreshold {
    79  		p.fillCond.Wait()
    80  	}
    81  	atomic.AddInt64(&p.level, n)
    82  	p.fillCond.Signal()
    83  }
    84  
    85  func (p *flushPacer) drain(n int64) {
    86  	atomic.AddInt64(&p.level, -n)
    87  }
    88  
    89  type DB struct {
    90  	mu         sync.Mutex
    91  	flushPacer *flushPacer
    92  	flushCond  sync.Cond
    93  	memtables  []*int64
    94  	fill       int64
    95  	drain      int64
    96  
    97  	compactionMu        sync.Mutex
    98  	compactionPacer     *compactionPacer
    99  	// L0 is represented as an array of integers whereas every other level
   100  	// is represented as a single integer.
   101  	L0                  []*int64
   102  	// Non-L0 sstables. sstables[0] == L1.
   103  	sstables            []int64
   104  	maxSSTableSizes     []int64
   105  	compactionFlushCond sync.Cond
   106  	prevCompactionDebt  float64
   107  	previouslyInDebt    bool
   108  
   109  	writeLimiter        *rate.Limiter
   110  }
   111  
   112  func newDB() *DB {
   113  	db := &DB{}
   114  	db.flushPacer = newFlushPacer(&db.mu)
   115  	db.flushCond.L = &db.mu
   116  	db.memtables = append(db.memtables, new(int64))
   117  
   118  	db.compactionFlushCond.L = &db.compactionMu
   119  	db.L0 = append(db.L0, new(int64))
   120  	db.compactionPacer = newCompactionPacer()
   121  
   122  	db.maxSSTableSizes = make([]int64, numLevels-1)
   123  	base := int64(levelRatio)
   124  	for i := uint64(0); i < numLevels-2; i++ {
   125  		// Each level is 10 times larger than the one above it.
   126  		db.maxSSTableSizes[i] = memtableSize * l0CompactionThreshold * base
   127  		base *= levelRatio
   128  
   129  		// Begin with each level full.
   130  		newLevel := db.maxSSTableSizes[i]
   131  
   132  		db.sstables = append(db.sstables, newLevel)
   133  	}
   134  	db.sstables = append(db.sstables, 0)
   135  	db.maxSSTableSizes[numLevels-2] = math.MaxInt64
   136  
   137  	db.writeLimiter = rate.NewLimiter(startingWriteRate, startingWriteRate)
   138  
   139  	go db.drainMemtable()
   140  	go db.drainCompaction()
   141  
   142  	return db
   143  }
   144  
   145  // drainCompaction simulates background compactions.
   146  func (db *DB) drainCompaction() {
   147  	rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
   148  
   149  	for {
   150  		db.compactionMu.Lock()
   151  
   152  		for len(db.L0) <= l0CompactionThreshold {
   153  			db.compactionFlushCond.Wait()
   154  		}
   155  		l0Table := db.L0[0]
   156  		db.compactionMu.Unlock()
   157  
   158  		for i, size := int64(0), int64(0); i < *l0Table; i += size {
   159  			size = 10000 + rng.Int63n(500)
   160  			if size > (*l0Table - i) {
   161  				size = *l0Table - i
   162  			}
   163  			db.compactionPacer.drain(size)
   164  		}
   165  
   166  		db.compactionMu.Lock()
   167  		db.L0 = db.L0[1:]
   168  		db.compactionMu.Unlock()
   169  
   170  		singleTableSize := int64(memtableSize)
   171  		tablesToCompact := 0
   172  		for i := range db.sstables {
   173  			newSSTableSize := atomic.AddInt64(&db.sstables[i], singleTableSize)
   174  			if newSSTableSize > db.maxSSTableSizes[i] {
   175  				atomic.AddInt64(&db.sstables[i], -singleTableSize)
   176  				tablesToCompact++
   177  			} else {
   178  				// Lower levels do not need compaction if level above it did not
   179  				// need compaction.
   180  				break
   181  			}
   182  		}
   183  
   184  		totalCompactionBytes := int64(tablesToCompact * memtableSize)
   185  		db.compactionPacer.fill(totalCompactionBytes)
   186  
   187  		for t := 0; t < tablesToCompact; t++ {
   188  			for i, size := int64(0), int64(0); i < memtableSize; i += size {
   189  				size = 10000 + rng.Int63n(500)
   190  				if size > (totalCompactionBytes - i) {
   191  					size = totalCompactionBytes - i
   192  				}
   193  				db.compactionPacer.drain(size)
   194  			}
   195  
   196  			db.delayUserWrites()
   197  		}
   198  	}
   199  }
   200  
   201  // fillCompaction fills L0 sstables.
   202  func (db *DB) fillCompaction(size int64) {
   203  	db.compactionMu.Lock()
   204  
   205  	db.compactionPacer.fill(size)
   206  
   207  	last := db.L0[len(db.L0)-1]
   208  	if *last+size > memtableSize {
   209  		last = new(int64)
   210  		db.L0 = append(db.L0, last)
   211  		db.compactionFlushCond.Signal()
   212  	}
   213  	*last += size
   214  
   215  	db.compactionMu.Unlock()
   216  }
   217  
   218  // drainMemtable simulates memtable flushing.
   219  func (db *DB) drainMemtable() {
   220  	rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
   221  
   222  	for {
   223  		db.mu.Lock()
   224  		for len(db.memtables) <= 1 {
   225  			db.flushCond.Wait()
   226  		}
   227  		memtable := db.memtables[0]
   228  		db.mu.Unlock()
   229  
   230  		for i, size := int64(0), int64(0); i < *memtable; i += size {
   231  			size = 1000 + rng.Int63n(50)
   232  			if size > (*memtable - i) {
   233  				size = *memtable - i
   234  			}
   235  			db.flushPacer.drain(size)
   236  			atomic.AddInt64(&db.drain, size)
   237  
   238  			db.fillCompaction(size)
   239  		}
   240  
   241  		db.delayUserWrites()
   242  
   243  		db.mu.Lock()
   244  		db.memtables = db.memtables[1:]
   245  		db.mu.Unlock()
   246  	}
   247  }
   248  
   249  // delayUserWrites applies write delays depending on compaction debt.
   250  func (db *DB) delayUserWrites() {
   251  	totalCompactionBytes := atomic.LoadInt64(&db.compactionPacer.level)
   252  	compactionDebt := math.Max(float64(totalCompactionBytes)-l0CompactionThreshold*memtableSize, 0.0)
   253  
   254  	db.mu.Lock()
   255  	if len(db.L0) > l0SlowdownThreshold || compactionDebt > compactionDebtSlowdownThreshold {
   256  		db.previouslyInDebt = true
   257  		if compactionDebt > db.prevCompactionDebt {
   258  			// Debt is growing.
   259  			drainLimit := db.writeLimiter.Limit() * 0.8
   260  			if drainLimit > 0 {
   261  				db.writeLimiter.SetLimit(drainLimit)
   262  			}
   263  		} else {
   264  			// Debt is shrinking.
   265  			drainLimit := db.writeLimiter.Limit() * 1/0.8
   266  			if drainLimit <= maxWriteRate {
   267  				db.writeLimiter.SetLimit(drainLimit)
   268  			}
   269  		}
   270  	} else if db.previouslyInDebt {
   271  		// If compaction was previously delayed and has recovered, RocksDB
   272  		// "rewards" the rate by double the slowdown ratio.
   273  
   274  		// From RocksDB:
   275  		// If the DB recovers from delay conditions, we reward with reducing
   276  		// double the slowdown ratio. This is to balance the long term slowdown
   277  		// increase signal.
   278  		drainLimit := db.writeLimiter.Limit() * 1.4
   279  		if drainLimit <= maxWriteRate {
   280  			db.writeLimiter.SetLimit(drainLimit)
   281  		}
   282  		db.previouslyInDebt = false
   283  	}
   284  
   285  	db.prevCompactionDebt = compactionDebt
   286  	db.mu.Unlock()
   287  }
   288  
   289  // fillMemtable simulates memtable filling.
   290  func (db *DB) fillMemtable(size int64) {
   291  	db.mu.Lock()
   292  
   293  	db.flushPacer.fill(size)
   294  	atomic.AddInt64(&db.fill, size)
   295  
   296  	last := db.memtables[len(db.memtables)-1]
   297  	if *last+size > memtableSize {
   298  		last = new(int64)
   299  		db.memtables = append(db.memtables, last)
   300  		db.flushCond.Signal()
   301  	}
   302  	*last += size
   303  
   304  	db.mu.Unlock()
   305  }
   306  
   307  // printLevels prints the levels.
   308  func (db *DB) printLevels() {
   309  	db.mu.Lock()
   310  	for i := range db.sstables {
   311  		fmt.Printf("Level %d: %d/%d\n", i+1, db.sstables[i]/(1024*1024), db.maxSSTableSizes[i]/(1024*1024))
   312  	}
   313  	db.mu.Unlock()
   314  }
   315  
   316  // simulateWrite simulates user writes.
   317  func simulateWrite(db *DB) {
   318  	limiter := rate.NewLimiter(10<<20, 10<<20) // 10 MB/s
   319  	fmt.Printf("filling at 10 MB/sec\n")
   320  
   321  	setRate := func(mb int) {
   322  		fmt.Printf("filling at %d MB/sec\n", mb)
   323  		limiter.SetLimit(rate.Limit(mb << 20))
   324  	}
   325  
   326  	go func() {
   327  		rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
   328  		for {
   329  			secs := 5 + rng.Intn(5)
   330  			time.Sleep(time.Duration(secs) * time.Second)
   331  			mb := 11 + rng.Intn(20)
   332  			setRate(mb)
   333  		}
   334  	}()
   335  
   336  	rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
   337  
   338  	for {
   339  		size := 1000 + rng.Int63n(50)
   340  		limiter.WaitN(context.Background(), int(size))
   341  		db.writeLimiter.WaitN(context.Background(), int(size))
   342  		db.fillMemtable(size)
   343  	}
   344  }
   345  
   346  func main() {
   347  	db := newDB()
   348  
   349  	go simulateWrite(db)
   350  
   351  	tick := time.NewTicker(time.Second)
   352  	start := time.Now()
   353  	lastNow := start
   354  	var lastFill, lastDrain int64
   355  
   356  	for i := 0; ; i++ {
   357  		select {
   358  		case <-tick.C:
   359  			if (i % 20) == 0 {
   360  				fmt.Printf("_elapsed___memtbs____dirty_____fill____drain____cdebt__l0count___max-w-rate\n")
   361  			}
   362  
   363  			if (i % 7) == 0 {
   364  				//db.printLevels()
   365  			}
   366  
   367  			db.mu.Lock()
   368  			memtableCount := len(db.memtables)
   369  			db.mu.Unlock()
   370  			dirty := atomic.LoadInt64(&db.flushPacer.level)
   371  			fill := atomic.LoadInt64(&db.fill)
   372  			drain := atomic.LoadInt64(&db.drain)
   373  
   374  			db.compactionMu.Lock()
   375  			compactionL0 := len(db.L0)
   376  			db.compactionMu.Unlock()
   377  			totalCompactionBytes := atomic.LoadInt64(&db.compactionPacer.level)
   378  			compactionDebt := math.Max(float64(totalCompactionBytes)-l0CompactionThreshold*memtableSize, 0.0)
   379  			maxWriteRate := db.writeLimiter.Limit()
   380  
   381  			now := time.Now()
   382  			elapsed := now.Sub(lastNow).Seconds()
   383  			fmt.Printf("%8s %8d %8.1f %8.1f %8.1f %8.1f %8d %12.1f\n",
   384  				time.Duration(now.Sub(start).Seconds()+0.5)*time.Second,
   385  				memtableCount,
   386  				float64(dirty)/(1024.0*1024.0),
   387  				float64(fill-lastFill)/(1024.0*1024.0*elapsed),
   388  				float64(drain-lastDrain)/(1024.0*1024.0*elapsed),
   389  				compactionDebt/(1024.0*1024.0),
   390  				compactionL0,
   391  				maxWriteRate/(1024.0*1024.0))
   392  
   393  			lastNow = now
   394  			lastFill = fill
   395  			lastDrain = drain
   396  		}
   397  	}
   398  }