github.1485827954.workers.dev/ethereum/go-ethereum@v1.14.3/ethdb/pebble/pebble.go (about)

     1  // Copyright 2023 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  // Package pebble implements the key-value database layer based on pebble.
    18  package pebble
    19  
    20  import (
    21  	"bytes"
    22  	"fmt"
    23  	"runtime"
    24  	"sync"
    25  	"sync/atomic"
    26  	"time"
    27  
    28  	"github.com/cockroachdb/pebble"
    29  	"github.com/cockroachdb/pebble/bloom"
    30  	"github.com/ethereum/go-ethereum/common"
    31  	"github.com/ethereum/go-ethereum/ethdb"
    32  	"github.com/ethereum/go-ethereum/log"
    33  	"github.com/ethereum/go-ethereum/metrics"
    34  )
    35  
    36  const (
    37  	// minCache is the minimum amount of memory in megabytes to allocate to pebble
    38  	// read and write caching, split half and half.
    39  	minCache = 16
    40  
    41  	// minHandles is the minimum number of files handles to allocate to the open
    42  	// database files.
    43  	minHandles = 16
    44  
    45  	// metricsGatheringInterval specifies the interval to retrieve pebble database
    46  	// compaction, io and pause stats to report to the user.
    47  	metricsGatheringInterval = 3 * time.Second
    48  
    49  	// degradationWarnInterval specifies how often warning should be printed if the
    50  	// leveldb database cannot keep up with requested writes.
    51  	degradationWarnInterval = time.Minute
    52  )
    53  
    54  // Database is a persistent key-value store based on the pebble storage engine.
    55  // Apart from basic data storage functionality it also supports batch writes and
    56  // iterating over the keyspace in binary-alphabetical order.
    57  type Database struct {
    58  	fn string     // filename for reporting
    59  	db *pebble.DB // Underlying pebble storage engine
    60  
    61  	compTimeMeter       metrics.Meter // Meter for measuring the total time spent in database compaction
    62  	compReadMeter       metrics.Meter // Meter for measuring the data read during compaction
    63  	compWriteMeter      metrics.Meter // Meter for measuring the data written during compaction
    64  	writeDelayNMeter    metrics.Meter // Meter for measuring the write delay number due to database compaction
    65  	writeDelayMeter     metrics.Meter // Meter for measuring the write delay duration due to database compaction
    66  	diskSizeGauge       metrics.Gauge // Gauge for tracking the size of all the levels in the database
    67  	diskReadMeter       metrics.Meter // Meter for measuring the effective amount of data read
    68  	diskWriteMeter      metrics.Meter // Meter for measuring the effective amount of data written
    69  	memCompGauge        metrics.Gauge // Gauge for tracking the number of memory compaction
    70  	level0CompGauge     metrics.Gauge // Gauge for tracking the number of table compaction in level0
    71  	nonlevel0CompGauge  metrics.Gauge // Gauge for tracking the number of table compaction in non0 level
    72  	seekCompGauge       metrics.Gauge // Gauge for tracking the number of table compaction caused by read opt
    73  	manualMemAllocGauge metrics.Gauge // Gauge for tracking amount of non-managed memory currently allocated
    74  
    75  	levelsGauge []metrics.Gauge // Gauge for tracking the number of tables in levels
    76  
    77  	quitLock sync.RWMutex    // Mutex protecting the quit channel and the closed flag
    78  	quitChan chan chan error // Quit channel to stop the metrics collection before closing the database
    79  	closed   bool            // keep track of whether we're Closed
    80  
    81  	log log.Logger // Contextual logger tracking the database path
    82  
    83  	activeComp    int           // Current number of active compactions
    84  	compStartTime time.Time     // The start time of the earliest currently-active compaction
    85  	compTime      atomic.Int64  // Total time spent in compaction in ns
    86  	level0Comp    atomic.Uint32 // Total number of level-zero compactions
    87  	nonLevel0Comp atomic.Uint32 // Total number of non level-zero compactions
    88  
    89  	writeStalled        atomic.Bool  // Flag whether the write is stalled
    90  	writeDelayStartTime time.Time    // The start time of the latest write stall
    91  	writeDelayCount     atomic.Int64 // Total number of write stall counts
    92  	writeDelayTime      atomic.Int64 // Total time spent in write stalls
    93  
    94  	writeOptions *pebble.WriteOptions
    95  }
    96  
    97  func (d *Database) onCompactionBegin(info pebble.CompactionInfo) {
    98  	if d.activeComp == 0 {
    99  		d.compStartTime = time.Now()
   100  	}
   101  	l0 := info.Input[0]
   102  	if l0.Level == 0 {
   103  		d.level0Comp.Add(1)
   104  	} else {
   105  		d.nonLevel0Comp.Add(1)
   106  	}
   107  	d.activeComp++
   108  }
   109  
   110  func (d *Database) onCompactionEnd(info pebble.CompactionInfo) {
   111  	if d.activeComp == 1 {
   112  		d.compTime.Add(int64(time.Since(d.compStartTime)))
   113  	} else if d.activeComp == 0 {
   114  		panic("should not happen")
   115  	}
   116  	d.activeComp--
   117  }
   118  
   119  func (d *Database) onWriteStallBegin(b pebble.WriteStallBeginInfo) {
   120  	d.writeDelayStartTime = time.Now()
   121  	d.writeDelayCount.Add(1)
   122  	d.writeStalled.Store(true)
   123  }
   124  
   125  func (d *Database) onWriteStallEnd() {
   126  	d.writeDelayTime.Add(int64(time.Since(d.writeDelayStartTime)))
   127  	d.writeStalled.Store(false)
   128  }
   129  
   130  // panicLogger is just a noop logger to disable Pebble's internal logger.
   131  //
   132  // TODO(karalabe): Remove when Pebble sets this as the default.
   133  type panicLogger struct{}
   134  
   135  func (l panicLogger) Infof(format string, args ...interface{}) {
   136  }
   137  
   138  func (l panicLogger) Errorf(format string, args ...interface{}) {
   139  }
   140  
   141  func (l panicLogger) Fatalf(format string, args ...interface{}) {
   142  	panic(fmt.Errorf("fatal: "+format, args...))
   143  }
   144  
   145  // New returns a wrapped pebble DB object. The namespace is the prefix that the
   146  // metrics reporting should use for surfacing internal stats.
   147  func New(file string, cache int, handles int, namespace string, readonly bool, ephemeral bool) (*Database, error) {
   148  	// Ensure we have some minimal caching and file guarantees
   149  	if cache < minCache {
   150  		cache = minCache
   151  	}
   152  	if handles < minHandles {
   153  		handles = minHandles
   154  	}
   155  	logger := log.New("database", file)
   156  	logger.Info("Allocated cache and file handles", "cache", common.StorageSize(cache*1024*1024), "handles", handles)
   157  
   158  	// The max memtable size is limited by the uint32 offsets stored in
   159  	// internal/arenaskl.node, DeferredBatchOp, and flushableBatchEntry.
   160  	//
   161  	// - MaxUint32 on 64-bit platforms;
   162  	// - MaxInt on 32-bit platforms.
   163  	//
   164  	// It is used when slices are limited to Uint32 on 64-bit platforms (the
   165  	// length limit for slices is naturally MaxInt on 32-bit platforms).
   166  	//
   167  	// Taken from https://github.com/cockroachdb/pebble/blob/master/internal/constants/constants.go
   168  	maxMemTableSize := (1<<31)<<(^uint(0)>>63) - 1
   169  
   170  	// Two memory tables is configured which is identical to leveldb,
   171  	// including a frozen memory table and another live one.
   172  	memTableLimit := 2
   173  	memTableSize := cache * 1024 * 1024 / 2 / memTableLimit
   174  
   175  	// The memory table size is currently capped at maxMemTableSize-1 due to a
   176  	// known bug in the pebble where maxMemTableSize is not recognized as a
   177  	// valid size.
   178  	//
   179  	// TODO use the maxMemTableSize as the maximum table size once the issue
   180  	// in pebble is fixed.
   181  	if memTableSize >= maxMemTableSize {
   182  		memTableSize = maxMemTableSize - 1
   183  	}
   184  	db := &Database{
   185  		fn:           file,
   186  		log:          logger,
   187  		quitChan:     make(chan chan error),
   188  		writeOptions: &pebble.WriteOptions{Sync: !ephemeral},
   189  	}
   190  	opt := &pebble.Options{
   191  		// Pebble has a single combined cache area and the write
   192  		// buffers are taken from this too. Assign all available
   193  		// memory allowance for cache.
   194  		Cache:        pebble.NewCache(int64(cache * 1024 * 1024)),
   195  		MaxOpenFiles: handles,
   196  
   197  		// The size of memory table(as well as the write buffer).
   198  		// Note, there may have more than two memory tables in the system.
   199  		MemTableSize: uint64(memTableSize),
   200  
   201  		// MemTableStopWritesThreshold places a hard limit on the size
   202  		// of the existent MemTables(including the frozen one).
   203  		// Note, this must be the number of tables not the size of all memtables
   204  		// according to https://github.com/cockroachdb/pebble/blob/master/options.go#L738-L742
   205  		// and to https://github.com/cockroachdb/pebble/blob/master/db.go#L1892-L1903.
   206  		MemTableStopWritesThreshold: memTableLimit,
   207  
   208  		// The default compaction concurrency(1 thread),
   209  		// Here use all available CPUs for faster compaction.
   210  		MaxConcurrentCompactions: func() int { return runtime.NumCPU() },
   211  
   212  		// Per-level options. Options for at least one level must be specified. The
   213  		// options for the last level are used for all subsequent levels.
   214  		Levels: []pebble.LevelOptions{
   215  			{TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)},
   216  			{TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)},
   217  			{TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)},
   218  			{TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)},
   219  			{TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)},
   220  			{TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)},
   221  			{TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)},
   222  		},
   223  		ReadOnly: readonly,
   224  		EventListener: &pebble.EventListener{
   225  			CompactionBegin: db.onCompactionBegin,
   226  			CompactionEnd:   db.onCompactionEnd,
   227  			WriteStallBegin: db.onWriteStallBegin,
   228  			WriteStallEnd:   db.onWriteStallEnd,
   229  		},
   230  		Logger: panicLogger{}, // TODO(karalabe): Delete when this is upstreamed in Pebble
   231  	}
   232  	// Disable seek compaction explicitly. Check https://github.com/ethereum/go-ethereum/pull/20130
   233  	// for more details.
   234  	opt.Experimental.ReadSamplingMultiplier = -1
   235  
   236  	// Open the db and recover any potential corruptions
   237  	innerDB, err := pebble.Open(file, opt)
   238  	if err != nil {
   239  		return nil, err
   240  	}
   241  	db.db = innerDB
   242  
   243  	db.compTimeMeter = metrics.GetOrRegisterMeter(namespace+"compact/time", nil)
   244  	db.compReadMeter = metrics.GetOrRegisterMeter(namespace+"compact/input", nil)
   245  	db.compWriteMeter = metrics.GetOrRegisterMeter(namespace+"compact/output", nil)
   246  	db.diskSizeGauge = metrics.GetOrRegisterGauge(namespace+"disk/size", nil)
   247  	db.diskReadMeter = metrics.GetOrRegisterMeter(namespace+"disk/read", nil)
   248  	db.diskWriteMeter = metrics.GetOrRegisterMeter(namespace+"disk/write", nil)
   249  	db.writeDelayMeter = metrics.GetOrRegisterMeter(namespace+"compact/writedelay/duration", nil)
   250  	db.writeDelayNMeter = metrics.GetOrRegisterMeter(namespace+"compact/writedelay/counter", nil)
   251  	db.memCompGauge = metrics.GetOrRegisterGauge(namespace+"compact/memory", nil)
   252  	db.level0CompGauge = metrics.GetOrRegisterGauge(namespace+"compact/level0", nil)
   253  	db.nonlevel0CompGauge = metrics.GetOrRegisterGauge(namespace+"compact/nonlevel0", nil)
   254  	db.seekCompGauge = metrics.GetOrRegisterGauge(namespace+"compact/seek", nil)
   255  	db.manualMemAllocGauge = metrics.GetOrRegisterGauge(namespace+"memory/manualalloc", nil)
   256  
   257  	// Start up the metrics gathering and return
   258  	go db.meter(metricsGatheringInterval, namespace)
   259  	return db, nil
   260  }
   261  
   262  // Close stops the metrics collection, flushes any pending data to disk and closes
   263  // all io accesses to the underlying key-value store.
   264  func (d *Database) Close() error {
   265  	d.quitLock.Lock()
   266  	defer d.quitLock.Unlock()
   267  	// Allow double closing, simplifies things
   268  	if d.closed {
   269  		return nil
   270  	}
   271  	d.closed = true
   272  	if d.quitChan != nil {
   273  		errc := make(chan error)
   274  		d.quitChan <- errc
   275  		if err := <-errc; err != nil {
   276  			d.log.Error("Metrics collection failed", "err", err)
   277  		}
   278  		d.quitChan = nil
   279  	}
   280  	return d.db.Close()
   281  }
   282  
   283  // Has retrieves if a key is present in the key-value store.
   284  func (d *Database) Has(key []byte) (bool, error) {
   285  	d.quitLock.RLock()
   286  	defer d.quitLock.RUnlock()
   287  	if d.closed {
   288  		return false, pebble.ErrClosed
   289  	}
   290  	_, closer, err := d.db.Get(key)
   291  	if err == pebble.ErrNotFound {
   292  		return false, nil
   293  	} else if err != nil {
   294  		return false, err
   295  	}
   296  	closer.Close()
   297  	return true, nil
   298  }
   299  
   300  // Get retrieves the given key if it's present in the key-value store.
   301  func (d *Database) Get(key []byte) ([]byte, error) {
   302  	d.quitLock.RLock()
   303  	defer d.quitLock.RUnlock()
   304  	if d.closed {
   305  		return nil, pebble.ErrClosed
   306  	}
   307  	dat, closer, err := d.db.Get(key)
   308  	if err != nil {
   309  		return nil, err
   310  	}
   311  	ret := make([]byte, len(dat))
   312  	copy(ret, dat)
   313  	closer.Close()
   314  	return ret, nil
   315  }
   316  
   317  // Put inserts the given value into the key-value store.
   318  func (d *Database) Put(key []byte, value []byte) error {
   319  	d.quitLock.RLock()
   320  	defer d.quitLock.RUnlock()
   321  	if d.closed {
   322  		return pebble.ErrClosed
   323  	}
   324  	return d.db.Set(key, value, d.writeOptions)
   325  }
   326  
   327  // Delete removes the key from the key-value store.
   328  func (d *Database) Delete(key []byte) error {
   329  	d.quitLock.RLock()
   330  	defer d.quitLock.RUnlock()
   331  	if d.closed {
   332  		return pebble.ErrClosed
   333  	}
   334  	return d.db.Delete(key, nil)
   335  }
   336  
   337  // NewBatch creates a write-only key-value store that buffers changes to its host
   338  // database until a final write is called.
   339  func (d *Database) NewBatch() ethdb.Batch {
   340  	return &batch{
   341  		b:  d.db.NewBatch(),
   342  		db: d,
   343  	}
   344  }
   345  
   346  // NewBatchWithSize creates a write-only database batch with pre-allocated buffer.
   347  func (d *Database) NewBatchWithSize(size int) ethdb.Batch {
   348  	return &batch{
   349  		b:  d.db.NewBatchWithSize(size),
   350  		db: d,
   351  	}
   352  }
   353  
   354  // snapshot wraps a pebble snapshot for implementing the Snapshot interface.
   355  type snapshot struct {
   356  	db *pebble.Snapshot
   357  }
   358  
   359  // NewSnapshot creates a database snapshot based on the current state.
   360  // The created snapshot will not be affected by all following mutations
   361  // happened on the database.
   362  // Note don't forget to release the snapshot once it's used up, otherwise
   363  // the stale data will never be cleaned up by the underlying compactor.
   364  func (d *Database) NewSnapshot() (ethdb.Snapshot, error) {
   365  	snap := d.db.NewSnapshot()
   366  	return &snapshot{db: snap}, nil
   367  }
   368  
   369  // Has retrieves if a key is present in the snapshot backing by a key-value
   370  // data store.
   371  func (snap *snapshot) Has(key []byte) (bool, error) {
   372  	_, closer, err := snap.db.Get(key)
   373  	if err != nil {
   374  		if err != pebble.ErrNotFound {
   375  			return false, err
   376  		} else {
   377  			return false, nil
   378  		}
   379  	}
   380  	closer.Close()
   381  	return true, nil
   382  }
   383  
   384  // Get retrieves the given key if it's present in the snapshot backing by
   385  // key-value data store.
   386  func (snap *snapshot) Get(key []byte) ([]byte, error) {
   387  	dat, closer, err := snap.db.Get(key)
   388  	if err != nil {
   389  		return nil, err
   390  	}
   391  	ret := make([]byte, len(dat))
   392  	copy(ret, dat)
   393  	closer.Close()
   394  	return ret, nil
   395  }
   396  
   397  // Release releases associated resources. Release should always succeed and can
   398  // be called multiple times without causing error.
   399  func (snap *snapshot) Release() {
   400  	snap.db.Close()
   401  }
   402  
   403  // upperBound returns the upper bound for the given prefix
   404  func upperBound(prefix []byte) (limit []byte) {
   405  	for i := len(prefix) - 1; i >= 0; i-- {
   406  		c := prefix[i]
   407  		if c == 0xff {
   408  			continue
   409  		}
   410  		limit = make([]byte, i+1)
   411  		copy(limit, prefix)
   412  		limit[i] = c + 1
   413  		break
   414  	}
   415  	return limit
   416  }
   417  
   418  // Stat returns the internal metrics of Pebble in a text format. It's a developer
   419  // method to read everything there is to read independent of Pebble version.
   420  //
   421  // The property is unused in Pebble as there's only one thing to retrieve.
   422  func (d *Database) Stat(property string) (string, error) {
   423  	return d.db.Metrics().String(), nil
   424  }
   425  
   426  // Compact flattens the underlying data store for the given key range. In essence,
   427  // deleted and overwritten versions are discarded, and the data is rearranged to
   428  // reduce the cost of operations needed to access them.
   429  //
   430  // A nil start is treated as a key before all keys in the data store; a nil limit
   431  // is treated as a key after all keys in the data store. If both is nil then it
   432  // will compact entire data store.
   433  func (d *Database) Compact(start []byte, limit []byte) error {
   434  	// There is no special flag to represent the end of key range
   435  	// in pebble(nil in leveldb). Use an ugly hack to construct a
   436  	// large key to represent it.
   437  	// Note any prefixed database entry will be smaller than this
   438  	// flag, as for trie nodes we need the 32 byte 0xff because
   439  	// there might be a shared prefix starting with a number of
   440  	// 0xff-s, so 32 ensures than only a hash collision could touch it.
   441  	// https://github.com/cockroachdb/pebble/issues/2359#issuecomment-1443995833
   442  	if limit == nil {
   443  		limit = bytes.Repeat([]byte{0xff}, 32)
   444  	}
   445  	return d.db.Compact(start, limit, true) // Parallelization is preferred
   446  }
   447  
   448  // Path returns the path to the database directory.
   449  func (d *Database) Path() string {
   450  	return d.fn
   451  }
   452  
   453  // meter periodically retrieves internal pebble counters and reports them to
   454  // the metrics subsystem.
   455  func (d *Database) meter(refresh time.Duration, namespace string) {
   456  	var errc chan error
   457  	timer := time.NewTimer(refresh)
   458  	defer timer.Stop()
   459  
   460  	// Create storage and warning log tracer for write delay.
   461  	var (
   462  		compTimes  [2]int64
   463  		compWrites [2]int64
   464  		compReads  [2]int64
   465  
   466  		nWrites [2]int64
   467  
   468  		writeDelayTimes      [2]int64
   469  		writeDelayCounts     [2]int64
   470  		lastWriteStallReport time.Time
   471  	)
   472  
   473  	// Iterate ad infinitum and collect the stats
   474  	for i := 1; errc == nil; i++ {
   475  		var (
   476  			compWrite int64
   477  			compRead  int64
   478  			nWrite    int64
   479  
   480  			stats              = d.db.Metrics()
   481  			compTime           = d.compTime.Load()
   482  			writeDelayCount    = d.writeDelayCount.Load()
   483  			writeDelayTime     = d.writeDelayTime.Load()
   484  			nonLevel0CompCount = int64(d.nonLevel0Comp.Load())
   485  			level0CompCount    = int64(d.level0Comp.Load())
   486  		)
   487  		writeDelayTimes[i%2] = writeDelayTime
   488  		writeDelayCounts[i%2] = writeDelayCount
   489  		compTimes[i%2] = compTime
   490  
   491  		for _, levelMetrics := range stats.Levels {
   492  			nWrite += int64(levelMetrics.BytesCompacted)
   493  			nWrite += int64(levelMetrics.BytesFlushed)
   494  			compWrite += int64(levelMetrics.BytesCompacted)
   495  			compRead += int64(levelMetrics.BytesRead)
   496  		}
   497  
   498  		nWrite += int64(stats.WAL.BytesWritten)
   499  
   500  		compWrites[i%2] = compWrite
   501  		compReads[i%2] = compRead
   502  		nWrites[i%2] = nWrite
   503  
   504  		if d.writeDelayNMeter != nil {
   505  			d.writeDelayNMeter.Mark(writeDelayCounts[i%2] - writeDelayCounts[(i-1)%2])
   506  		}
   507  		if d.writeDelayMeter != nil {
   508  			d.writeDelayMeter.Mark(writeDelayTimes[i%2] - writeDelayTimes[(i-1)%2])
   509  		}
   510  		// Print a warning log if writing has been stalled for a while. The log will
   511  		// be printed per minute to avoid overwhelming users.
   512  		if d.writeStalled.Load() && writeDelayCounts[i%2] == writeDelayCounts[(i-1)%2] &&
   513  			time.Now().After(lastWriteStallReport.Add(degradationWarnInterval)) {
   514  			d.log.Warn("Database compacting, degraded performance")
   515  			lastWriteStallReport = time.Now()
   516  		}
   517  		if d.compTimeMeter != nil {
   518  			d.compTimeMeter.Mark(compTimes[i%2] - compTimes[(i-1)%2])
   519  		}
   520  		if d.compReadMeter != nil {
   521  			d.compReadMeter.Mark(compReads[i%2] - compReads[(i-1)%2])
   522  		}
   523  		if d.compWriteMeter != nil {
   524  			d.compWriteMeter.Mark(compWrites[i%2] - compWrites[(i-1)%2])
   525  		}
   526  		if d.diskSizeGauge != nil {
   527  			d.diskSizeGauge.Update(int64(stats.DiskSpaceUsage()))
   528  		}
   529  		if d.diskReadMeter != nil {
   530  			d.diskReadMeter.Mark(0) // pebble doesn't track non-compaction reads
   531  		}
   532  		if d.diskWriteMeter != nil {
   533  			d.diskWriteMeter.Mark(nWrites[i%2] - nWrites[(i-1)%2])
   534  		}
   535  		// See https://github.com/cockroachdb/pebble/pull/1628#pullrequestreview-1026664054
   536  		manuallyAllocated := stats.BlockCache.Size + int64(stats.MemTable.Size) + int64(stats.MemTable.ZombieSize)
   537  		d.manualMemAllocGauge.Update(manuallyAllocated)
   538  		d.memCompGauge.Update(stats.Flush.Count)
   539  		d.nonlevel0CompGauge.Update(nonLevel0CompCount)
   540  		d.level0CompGauge.Update(level0CompCount)
   541  		d.seekCompGauge.Update(stats.Compact.ReadCount)
   542  
   543  		for i, level := range stats.Levels {
   544  			// Append metrics for additional layers
   545  			if i >= len(d.levelsGauge) {
   546  				d.levelsGauge = append(d.levelsGauge, metrics.GetOrRegisterGauge(namespace+fmt.Sprintf("tables/level%v", i), nil))
   547  			}
   548  			d.levelsGauge[i].Update(level.NumFiles)
   549  		}
   550  
   551  		// Sleep a bit, then repeat the stats collection
   552  		select {
   553  		case errc = <-d.quitChan:
   554  			// Quit requesting, stop hammering the database
   555  		case <-timer.C:
   556  			timer.Reset(refresh)
   557  			// Timeout, gather a new set of stats
   558  		}
   559  	}
   560  	errc <- nil
   561  }
   562  
   563  // batch is a write-only batch that commits changes to its host database
   564  // when Write is called. A batch cannot be used concurrently.
   565  type batch struct {
   566  	b    *pebble.Batch
   567  	db   *Database
   568  	size int
   569  }
   570  
   571  // Put inserts the given value into the batch for later committing.
   572  func (b *batch) Put(key, value []byte) error {
   573  	b.b.Set(key, value, nil)
   574  	b.size += len(key) + len(value)
   575  	return nil
   576  }
   577  
   578  // Delete inserts the a key removal into the batch for later committing.
   579  func (b *batch) Delete(key []byte) error {
   580  	b.b.Delete(key, nil)
   581  	b.size += len(key)
   582  	return nil
   583  }
   584  
   585  // ValueSize retrieves the amount of data queued up for writing.
   586  func (b *batch) ValueSize() int {
   587  	return b.size
   588  }
   589  
   590  // Write flushes any accumulated data to disk.
   591  func (b *batch) Write() error {
   592  	b.db.quitLock.RLock()
   593  	defer b.db.quitLock.RUnlock()
   594  	if b.db.closed {
   595  		return pebble.ErrClosed
   596  	}
   597  	return b.b.Commit(b.db.writeOptions)
   598  }
   599  
   600  // Reset resets the batch for reuse.
   601  func (b *batch) Reset() {
   602  	b.b.Reset()
   603  	b.size = 0
   604  }
   605  
   606  // Replay replays the batch contents.
   607  func (b *batch) Replay(w ethdb.KeyValueWriter) error {
   608  	reader := b.b.Reader()
   609  	for {
   610  		kind, k, v, ok, err := reader.Next()
   611  		if !ok || err != nil {
   612  			break
   613  		}
   614  		// The (k,v) slices might be overwritten if the batch is reset/reused,
   615  		// and the receiver should copy them if they are to be retained long-term.
   616  		if kind == pebble.InternalKeyKindSet {
   617  			w.Put(k, v)
   618  		} else if kind == pebble.InternalKeyKindDelete {
   619  			w.Delete(k)
   620  		} else {
   621  			return fmt.Errorf("unhandled operation, keytype: %v", kind)
   622  		}
   623  	}
   624  	return nil
   625  }
   626  
   627  // pebbleIterator is a wrapper of underlying iterator in storage engine.
   628  // The purpose of this structure is to implement the missing APIs.
   629  //
   630  // The pebble iterator is not thread-safe.
   631  type pebbleIterator struct {
   632  	iter     *pebble.Iterator
   633  	moved    bool
   634  	released bool
   635  }
   636  
   637  // NewIterator creates a binary-alphabetical iterator over a subset
   638  // of database content with a particular key prefix, starting at a particular
   639  // initial key (or after, if it does not exist).
   640  func (d *Database) NewIterator(prefix []byte, start []byte) ethdb.Iterator {
   641  	iter, _ := d.db.NewIter(&pebble.IterOptions{
   642  		LowerBound: append(prefix, start...),
   643  		UpperBound: upperBound(prefix),
   644  	})
   645  	iter.First()
   646  	return &pebbleIterator{iter: iter, moved: true, released: false}
   647  }
   648  
   649  // Next moves the iterator to the next key/value pair. It returns whether the
   650  // iterator is exhausted.
   651  func (iter *pebbleIterator) Next() bool {
   652  	if iter.moved {
   653  		iter.moved = false
   654  		return iter.iter.Valid()
   655  	}
   656  	return iter.iter.Next()
   657  }
   658  
   659  // Error returns any accumulated error. Exhausting all the key/value pairs
   660  // is not considered to be an error.
   661  func (iter *pebbleIterator) Error() error {
   662  	return iter.iter.Error()
   663  }
   664  
   665  // Key returns the key of the current key/value pair, or nil if done. The caller
   666  // should not modify the contents of the returned slice, and its contents may
   667  // change on the next call to Next.
   668  func (iter *pebbleIterator) Key() []byte {
   669  	return iter.iter.Key()
   670  }
   671  
   672  // Value returns the value of the current key/value pair, or nil if done. The
   673  // caller should not modify the contents of the returned slice, and its contents
   674  // may change on the next call to Next.
   675  func (iter *pebbleIterator) Value() []byte {
   676  	return iter.iter.Value()
   677  }
   678  
   679  // Release releases associated resources. Release should always succeed and can
   680  // be called multiple times without causing error.
   681  func (iter *pebbleIterator) Release() {
   682  	if !iter.released {
   683  		iter.iter.Close()
   684  		iter.released = true
   685  	}
   686  }