github.com/MetalBlockchain/metalgo@v1.11.9/database/leveldb/db.go (about)

     1  // Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved.
     2  // See the file LICENSE for licensing terms.
     3  
     4  package leveldb
     5  
     6  import (
     7  	"bytes"
     8  	"context"
     9  	"encoding/json"
    10  	"fmt"
    11  	"math"
    12  	"slices"
    13  	"sync"
    14  	"time"
    15  
    16  	"github.com/prometheus/client_golang/prometheus"
    17  	"github.com/syndtr/goleveldb/leveldb"
    18  	"github.com/syndtr/goleveldb/leveldb/errors"
    19  	"github.com/syndtr/goleveldb/leveldb/filter"
    20  	"github.com/syndtr/goleveldb/leveldb/iterator"
    21  	"github.com/syndtr/goleveldb/leveldb/opt"
    22  	"github.com/syndtr/goleveldb/leveldb/util"
    23  	"go.uber.org/zap"
    24  
    25  	"github.com/MetalBlockchain/metalgo/database"
    26  	"github.com/MetalBlockchain/metalgo/utils"
    27  	"github.com/MetalBlockchain/metalgo/utils/logging"
    28  )
    29  
    30  const (
    31  	// Name is the name of this database for database switches
    32  	Name = "leveldb"
    33  
    34  	// DefaultBlockCacheSize is the number of bytes to use for block caching in
    35  	// leveldb.
    36  	DefaultBlockCacheSize = 12 * opt.MiB
    37  
    38  	// DefaultWriteBufferSize is the number of bytes to use for buffers in
    39  	// leveldb.
    40  	DefaultWriteBufferSize = 12 * opt.MiB
    41  
    42  	// DefaultHandleCap is the number of files descriptors to cap levelDB to
    43  	// use.
    44  	DefaultHandleCap = 1024
    45  
    46  	// DefaultBitsPerKey is the number of bits to add to the bloom filter per
    47  	// key.
    48  	DefaultBitsPerKey = 10
    49  
    50  	// DefaultMaxManifestFileSize is the default maximum size of a manifest
    51  	// file.
    52  	//
    53  	// This avoids https://github.com/syndtr/goleveldb/issues/413.
    54  	DefaultMaxManifestFileSize = math.MaxInt64
    55  
    56  	// DefaultMetricUpdateFrequency is the frequency to poll the LevelDB
    57  	// metrics.
    58  	DefaultMetricUpdateFrequency = 10 * time.Second
    59  
    60  	// levelDBByteOverhead is the number of bytes of constant overhead that
    61  	// should be added to a batch size per operation.
    62  	levelDBByteOverhead = 8
    63  )
    64  
    65  var (
    66  	_ database.Database = (*Database)(nil)
    67  	_ database.Batch    = (*batch)(nil)
    68  	_ database.Iterator = (*iter)(nil)
    69  
    70  	ErrInvalidConfig = errors.New("invalid config")
    71  	ErrCouldNotOpen  = errors.New("could not open")
    72  )
    73  
    74  // Database is a persistent key-value store. Apart from basic data storage
    75  // functionality it also supports batch writes and iterating over the keyspace
    76  // in binary-alphabetical order.
    77  type Database struct {
    78  	*leveldb.DB
    79  	// metrics is only initialized and used when [MetricUpdateFrequency] is >= 0
    80  	// in the config
    81  	metrics   metrics
    82  	closed    utils.Atomic[bool]
    83  	closeOnce sync.Once
    84  	// closeCh is closed when Close() is called.
    85  	closeCh chan struct{}
    86  	// closeWg is used to wait for all goroutines created by New() to exit.
    87  	// This avoids racy behavior when Close() is called at the same time as
    88  	// Stats(). See: https://github.com/syndtr/goleveldb/issues/418
    89  	closeWg sync.WaitGroup
    90  }
    91  
    92  type config struct {
    93  	// BlockCacheCapacity defines the capacity of the 'sorted table' block caching.
    94  	// Use -1 for zero, this has same effect as specifying NoCacher to BlockCacher.
    95  	//
    96  	// The default value is 12MiB.
    97  	BlockCacheCapacity int `json:"blockCacheCapacity"`
    98  	// BlockSize is the minimum uncompressed size in bytes of each 'sorted table'
    99  	// block.
   100  	//
   101  	// The default value is 4KiB.
   102  	BlockSize int `json:"blockSize"`
   103  	// CompactionExpandLimitFactor limits compaction size after expanded.
   104  	// This will be multiplied by table size limit at compaction target level.
   105  	//
   106  	// The default value is 25.
   107  	CompactionExpandLimitFactor int `json:"compactionExpandLimitFactor"`
   108  	// CompactionGPOverlapsFactor limits overlaps in grandparent (Level + 2)
   109  	// that a single 'sorted table' generates.  This will be multiplied by
   110  	// table size limit at grandparent level.
   111  	//
   112  	// The default value is 10.
   113  	CompactionGPOverlapsFactor int `json:"compactionGPOverlapsFactor"`
   114  	// CompactionL0Trigger defines number of 'sorted table' at level-0 that will
   115  	// trigger compaction.
   116  	//
   117  	// The default value is 4.
   118  	CompactionL0Trigger int `json:"compactionL0Trigger"`
   119  	// CompactionSourceLimitFactor limits compaction source size. This doesn't apply to
   120  	// level-0.
   121  	// This will be multiplied by table size limit at compaction target level.
   122  	//
   123  	// The default value is 1.
   124  	CompactionSourceLimitFactor int `json:"compactionSourceLimitFactor"`
   125  	// CompactionTableSize limits size of 'sorted table' that compaction generates.
   126  	// The limits for each level will be calculated as:
   127  	//   CompactionTableSize * (CompactionTableSizeMultiplier ^ Level)
   128  	// The multiplier for each level can also fine-tuned using CompactionTableSizeMultiplierPerLevel.
   129  	//
   130  	// The default value is 2MiB.
   131  	CompactionTableSize int `json:"compactionTableSize"`
   132  	// CompactionTableSizeMultiplier defines multiplier for CompactionTableSize.
   133  	//
   134  	// The default value is 1.
   135  	CompactionTableSizeMultiplier float64 `json:"compactionTableSizeMultiplier"`
   136  	// CompactionTableSizeMultiplierPerLevel defines per-level multiplier for
   137  	// CompactionTableSize.
   138  	// Use zero to skip a level.
   139  	//
   140  	// The default value is nil.
   141  	CompactionTableSizeMultiplierPerLevel []float64 `json:"compactionTableSizeMultiplierPerLevel"`
   142  	// CompactionTotalSize limits total size of 'sorted table' for each level.
   143  	// The limits for each level will be calculated as:
   144  	//   CompactionTotalSize * (CompactionTotalSizeMultiplier ^ Level)
   145  	// The multiplier for each level can also fine-tuned using
   146  	// CompactionTotalSizeMultiplierPerLevel.
   147  	//
   148  	// The default value is 10MiB.
   149  	CompactionTotalSize int `json:"compactionTotalSize"`
   150  	// CompactionTotalSizeMultiplier defines multiplier for CompactionTotalSize.
   151  	//
   152  	// The default value is 10.
   153  	CompactionTotalSizeMultiplier float64 `json:"compactionTotalSizeMultiplier"`
   154  	// DisableSeeksCompaction allows disabling 'seeks triggered compaction'.
   155  	// The purpose of 'seeks triggered compaction' is to optimize database so
   156  	// that 'level seeks' can be minimized, however this might generate many
   157  	// small compaction which may not preferable.
   158  	//
   159  	// The default is true.
   160  	DisableSeeksCompaction bool `json:"disableSeeksCompaction"`
   161  	// OpenFilesCacheCapacity defines the capacity of the open files caching.
   162  	// Use -1 for zero, this has same effect as specifying NoCacher to OpenFilesCacher.
   163  	//
   164  	// The default value is 1024.
   165  	OpenFilesCacheCapacity int `json:"openFilesCacheCapacity"`
   166  	// WriteBuffer defines maximum size of a 'memdb' before flushed to
   167  	// 'sorted table'. 'memdb' is an in-memory DB backed by an on-disk
   168  	// unsorted journal.
   169  	//
   170  	// LevelDB may held up to two 'memdb' at the same time.
   171  	//
   172  	// The default value is 6MiB.
   173  	WriteBuffer      int `json:"writeBuffer"`
   174  	FilterBitsPerKey int `json:"filterBitsPerKey"`
   175  
   176  	// MaxManifestFileSize is the maximum size limit of the MANIFEST-****** file.
   177  	// When the MANIFEST-****** file grows beyond this size, LevelDB will create
   178  	// a new MANIFEST file.
   179  	//
   180  	// The default value is infinity.
   181  	MaxManifestFileSize int64 `json:"maxManifestFileSize"`
   182  
   183  	// MetricUpdateFrequency is the frequency to poll LevelDB metrics.
   184  	// If <= 0, LevelDB metrics aren't polled.
   185  	MetricUpdateFrequency time.Duration `json:"metricUpdateFrequency"`
   186  }
   187  
   188  // New returns a wrapped LevelDB object.
   189  func New(file string, configBytes []byte, log logging.Logger, reg prometheus.Registerer) (database.Database, error) {
   190  	parsedConfig := config{
   191  		BlockCacheCapacity:     DefaultBlockCacheSize,
   192  		DisableSeeksCompaction: true,
   193  		OpenFilesCacheCapacity: DefaultHandleCap,
   194  		WriteBuffer:            DefaultWriteBufferSize / 2,
   195  		FilterBitsPerKey:       DefaultBitsPerKey,
   196  		MaxManifestFileSize:    DefaultMaxManifestFileSize,
   197  		MetricUpdateFrequency:  DefaultMetricUpdateFrequency,
   198  	}
   199  	if len(configBytes) > 0 {
   200  		if err := json.Unmarshal(configBytes, &parsedConfig); err != nil {
   201  			return nil, fmt.Errorf("%w: %w", ErrInvalidConfig, err)
   202  		}
   203  	}
   204  
   205  	log.Info("creating leveldb",
   206  		zap.Reflect("config", parsedConfig),
   207  	)
   208  
   209  	// Open the db and recover any potential corruptions
   210  	db, err := leveldb.OpenFile(file, &opt.Options{
   211  		BlockCacheCapacity:            parsedConfig.BlockCacheCapacity,
   212  		BlockSize:                     parsedConfig.BlockSize,
   213  		CompactionExpandLimitFactor:   parsedConfig.CompactionExpandLimitFactor,
   214  		CompactionGPOverlapsFactor:    parsedConfig.CompactionGPOverlapsFactor,
   215  		CompactionL0Trigger:           parsedConfig.CompactionL0Trigger,
   216  		CompactionSourceLimitFactor:   parsedConfig.CompactionSourceLimitFactor,
   217  		CompactionTableSize:           parsedConfig.CompactionTableSize,
   218  		CompactionTableSizeMultiplier: parsedConfig.CompactionTableSizeMultiplier,
   219  		CompactionTotalSize:           parsedConfig.CompactionTotalSize,
   220  		CompactionTotalSizeMultiplier: parsedConfig.CompactionTotalSizeMultiplier,
   221  		DisableSeeksCompaction:        parsedConfig.DisableSeeksCompaction,
   222  		OpenFilesCacheCapacity:        parsedConfig.OpenFilesCacheCapacity,
   223  		WriteBuffer:                   parsedConfig.WriteBuffer,
   224  		Filter:                        filter.NewBloomFilter(parsedConfig.FilterBitsPerKey),
   225  		MaxManifestFileSize:           parsedConfig.MaxManifestFileSize,
   226  	})
   227  	if _, corrupted := err.(*errors.ErrCorrupted); corrupted {
   228  		db, err = leveldb.RecoverFile(file, nil)
   229  	}
   230  	if err != nil {
   231  		return nil, fmt.Errorf("%w: %w", ErrCouldNotOpen, err)
   232  	}
   233  
   234  	wrappedDB := &Database{
   235  		DB:      db,
   236  		closeCh: make(chan struct{}),
   237  	}
   238  	if parsedConfig.MetricUpdateFrequency > 0 {
   239  		metrics, err := newMetrics(reg)
   240  		if err != nil {
   241  			// Drop any close error to report the original error
   242  			_ = db.Close()
   243  			return nil, err
   244  		}
   245  		wrappedDB.metrics = metrics
   246  		wrappedDB.closeWg.Add(1)
   247  		go func() {
   248  			t := time.NewTicker(parsedConfig.MetricUpdateFrequency)
   249  			defer func() {
   250  				t.Stop()
   251  				wrappedDB.closeWg.Done()
   252  			}()
   253  
   254  			for {
   255  				if err := wrappedDB.updateMetrics(); err != nil {
   256  					log.Warn("failed to update leveldb metrics",
   257  						zap.Error(err),
   258  					)
   259  				}
   260  
   261  				select {
   262  				case <-t.C:
   263  				case <-wrappedDB.closeCh:
   264  					return
   265  				}
   266  			}
   267  		}()
   268  	}
   269  	return wrappedDB, nil
   270  }
   271  
   272  // Has returns if the key is set in the database
   273  func (db *Database) Has(key []byte) (bool, error) {
   274  	has, err := db.DB.Has(key, nil)
   275  	return has, updateError(err)
   276  }
   277  
   278  // Get returns the value the key maps to in the database
   279  func (db *Database) Get(key []byte) ([]byte, error) {
   280  	value, err := db.DB.Get(key, nil)
   281  	return value, updateError(err)
   282  }
   283  
   284  // Put sets the value of the provided key to the provided value
   285  func (db *Database) Put(key []byte, value []byte) error {
   286  	return updateError(db.DB.Put(key, value, nil))
   287  }
   288  
   289  // Delete removes the key from the database
   290  func (db *Database) Delete(key []byte) error {
   291  	return updateError(db.DB.Delete(key, nil))
   292  }
   293  
   294  // NewBatch creates a write/delete-only buffer that is atomically committed to
   295  // the database when write is called
   296  func (db *Database) NewBatch() database.Batch {
   297  	return &batch{db: db}
   298  }
   299  
   300  // NewIterator creates a lexicographically ordered iterator over the database
   301  func (db *Database) NewIterator() database.Iterator {
   302  	return &iter{
   303  		db:       db,
   304  		Iterator: db.DB.NewIterator(new(util.Range), nil),
   305  	}
   306  }
   307  
   308  // NewIteratorWithStart creates a lexicographically ordered iterator over the
   309  // database starting at the provided key
   310  func (db *Database) NewIteratorWithStart(start []byte) database.Iterator {
   311  	return &iter{
   312  		db:       db,
   313  		Iterator: db.DB.NewIterator(&util.Range{Start: start}, nil),
   314  	}
   315  }
   316  
   317  // NewIteratorWithPrefix creates a lexicographically ordered iterator over the
   318  // database ignoring keys that do not start with the provided prefix
   319  func (db *Database) NewIteratorWithPrefix(prefix []byte) database.Iterator {
   320  	return &iter{
   321  		db:       db,
   322  		Iterator: db.DB.NewIterator(util.BytesPrefix(prefix), nil),
   323  	}
   324  }
   325  
   326  // NewIteratorWithStartAndPrefix creates a lexicographically ordered iterator
   327  // over the database starting at start and ignoring keys that do not start with
   328  // the provided prefix
   329  func (db *Database) NewIteratorWithStartAndPrefix(start, prefix []byte) database.Iterator {
   330  	iterRange := util.BytesPrefix(prefix)
   331  	if bytes.Compare(start, prefix) == 1 {
   332  		iterRange.Start = start
   333  	}
   334  	return &iter{
   335  		db:       db,
   336  		Iterator: db.DB.NewIterator(iterRange, nil),
   337  	}
   338  }
   339  
   340  // This comment is basically copy pasted from the underlying levelDB library:
   341  
   342  // Compact the underlying DB for the given key range.
   343  // Specifically, deleted and overwritten versions are discarded,
   344  // and the data is rearranged to reduce the cost of operations
   345  // needed to access the data. This operation should typically only
   346  // be invoked by users who understand the underlying implementation.
   347  //
   348  // A nil start is treated as a key before all keys in the DB.
   349  // And a nil limit is treated as a key after all keys in the DB.
   350  // Therefore if both are nil then it will compact entire DB.
   351  func (db *Database) Compact(start []byte, limit []byte) error {
   352  	return updateError(db.DB.CompactRange(util.Range{Start: start, Limit: limit}))
   353  }
   354  
   355  func (db *Database) Close() error {
   356  	db.closed.Set(true)
   357  	db.closeOnce.Do(func() {
   358  		close(db.closeCh)
   359  	})
   360  	db.closeWg.Wait()
   361  	return updateError(db.DB.Close())
   362  }
   363  
   364  func (db *Database) HealthCheck(context.Context) (interface{}, error) {
   365  	if db.closed.Get() {
   366  		return nil, database.ErrClosed
   367  	}
   368  	return nil, nil
   369  }
   370  
   371  // batch is a wrapper around a levelDB batch to contain sizes.
   372  type batch struct {
   373  	leveldb.Batch
   374  	db   *Database
   375  	size int
   376  }
   377  
   378  // Put the value into the batch for later writing
   379  func (b *batch) Put(key, value []byte) error {
   380  	b.Batch.Put(key, value)
   381  	b.size += len(key) + len(value) + levelDBByteOverhead
   382  	return nil
   383  }
   384  
   385  // Delete the key during writing
   386  func (b *batch) Delete(key []byte) error {
   387  	b.Batch.Delete(key)
   388  	b.size += len(key) + levelDBByteOverhead
   389  	return nil
   390  }
   391  
   392  // Size retrieves the amount of data queued up for writing.
   393  func (b *batch) Size() int {
   394  	return b.size
   395  }
   396  
   397  // Write flushes any accumulated data to disk.
   398  func (b *batch) Write() error {
   399  	return updateError(b.db.DB.Write(&b.Batch, nil))
   400  }
   401  
   402  // Reset resets the batch for reuse.
   403  func (b *batch) Reset() {
   404  	b.Batch.Reset()
   405  	b.size = 0
   406  }
   407  
   408  // Replay the batch contents.
   409  func (b *batch) Replay(w database.KeyValueWriterDeleter) error {
   410  	replay := &replayer{writerDeleter: w}
   411  	if err := b.Batch.Replay(replay); err != nil {
   412  		// Never actually returns an error, because Replay just returns nil
   413  		return err
   414  	}
   415  	return replay.err
   416  }
   417  
   418  // Inner returns itself
   419  func (b *batch) Inner() database.Batch {
   420  	return b
   421  }
   422  
   423  type replayer struct {
   424  	writerDeleter database.KeyValueWriterDeleter
   425  	err           error
   426  }
   427  
   428  func (r *replayer) Put(key, value []byte) {
   429  	if r.err != nil {
   430  		return
   431  	}
   432  	r.err = r.writerDeleter.Put(key, value)
   433  }
   434  
   435  func (r *replayer) Delete(key []byte) {
   436  	if r.err != nil {
   437  		return
   438  	}
   439  	r.err = r.writerDeleter.Delete(key)
   440  }
   441  
   442  type iter struct {
   443  	db *Database
   444  	iterator.Iterator
   445  
   446  	key, val []byte
   447  	err      error
   448  }
   449  
   450  func (it *iter) Next() bool {
   451  	// Short-circuit and set an error if the underlying database has been closed.
   452  	if it.db.closed.Get() {
   453  		it.key = nil
   454  		it.val = nil
   455  		it.err = database.ErrClosed
   456  		return false
   457  	}
   458  
   459  	hasNext := it.Iterator.Next()
   460  	if hasNext {
   461  		it.key = slices.Clone(it.Iterator.Key())
   462  		it.val = slices.Clone(it.Iterator.Value())
   463  	} else {
   464  		it.key = nil
   465  		it.val = nil
   466  	}
   467  	return hasNext
   468  }
   469  
   470  func (it *iter) Error() error {
   471  	if it.err != nil {
   472  		return it.err
   473  	}
   474  	return updateError(it.Iterator.Error())
   475  }
   476  
   477  func (it *iter) Key() []byte {
   478  	return it.key
   479  }
   480  
   481  func (it *iter) Value() []byte {
   482  	return it.val
   483  }
   484  
   485  func updateError(err error) error {
   486  	switch err {
   487  	case leveldb.ErrClosed:
   488  		return database.ErrClosed
   489  	case leveldb.ErrNotFound:
   490  		return database.ErrNotFound
   491  	default:
   492  		return err
   493  	}
   494  }