github.com/klaytn/klaytn@v1.12.1/storage/database/rocksdb_database.go (about)

     1  // Copyright 2023 The klaytn Authors
     2  // This file is part of the klaytn library.
     3  //
     4  // The klaytn library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The klaytn library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the klaytn library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  //go:build rocksdb
    18  // +build rocksdb
    19  
    20  package database
    21  
    22  import (
    23  	"fmt"
    24  	"os"
    25  	"strings"
    26  	"time"
    27  
    28  	"github.com/klaytn/klaytn/log"
    29  	klaytnmetrics "github.com/klaytn/klaytn/metrics"
    30  	metricutils "github.com/klaytn/klaytn/metrics/utils"
    31  	"github.com/linxGnu/grocksdb"
    32  	"github.com/rcrowley/go-metrics"
    33  )
    34  
    35  func filterPolicyStrToNative(t string) *grocksdb.NativeFilterPolicy {
    36  	switch t {
    37  	case "bloom":
    38  		return grocksdb.NewBloomFilter(defaultBitsPerKey)
    39  	case "ribbon":
    40  		return grocksdb.NewRibbonFilterPolicy(defaultBitsPerKey)
    41  	default:
    42  		return nil
    43  	}
    44  }
    45  
    46  func compressionStrToType(t string) grocksdb.CompressionType {
    47  	switch t {
    48  	case "snappy":
    49  		return grocksdb.SnappyCompression
    50  	case "zlib":
    51  		return grocksdb.ZLibCompression
    52  	case "bz2":
    53  		return grocksdb.Bz2Compression
    54  	case "lz4":
    55  		return grocksdb.LZ4Compression
    56  	case "lz4hc":
    57  		return grocksdb.LZ4HCCompression
    58  	case "xpress":
    59  		return grocksdb.XpressCompression
    60  	case "zstd":
    61  		return grocksdb.ZSTDCompression
    62  	default:
    63  		return grocksdb.NoCompression
    64  	}
    65  }
    66  
    67  type rocksDB struct {
    68  	config *RocksDBConfig
    69  	db     *grocksdb.DB // rocksDB instance
    70  
    71  	wo *grocksdb.WriteOptions
    72  	ro *grocksdb.ReadOptions
    73  
    74  	quitCh          chan struct{}
    75  	metrics         []metrics.Meter
    76  	getTimer        klaytnmetrics.HybridTimer
    77  	putTimer        klaytnmetrics.HybridTimer
    78  	batchWriteTimer klaytnmetrics.HybridTimer
    79  
    80  	prefix string
    81  	logger log.Logger
    82  }
    83  
    84  // openFile checks if the path is valid directory or not. If not exists, the path directory is created.
    85  func openFile(path string, needToMake bool) error {
    86  	if fi, err := os.Stat(path); err == nil {
    87  		if !fi.IsDir() {
    88  			return fmt.Errorf("rocksdb: open %s: not a directory", path)
    89  		}
    90  	} else if os.IsNotExist(err) && needToMake {
    91  		if err := os.MkdirAll(path, 0o755); err != nil {
    92  			return err
    93  		}
    94  	} else {
    95  		return err
    96  	}
    97  
    98  	return nil
    99  }
   100  
   101  func NewRocksDB(path string, config *RocksDBConfig) (*rocksDB, error) {
   102  	localLogger := logger.NewWith("path", path)
   103  
   104  	if err := openFile(path, !config.Secondary); err != nil {
   105  		return nil, err
   106  	}
   107  
   108  	// Ensure we have some minimal caching and file guarantees
   109  	if config.CacheSize < minCacheSizeForRocksDB {
   110  		logger.Warn("Cache size too small, increasing to minimum recommended", "oldCacheSize", config.CacheSize, "newCacheSize", minCacheSizeForRocksDB)
   111  		config.CacheSize = minCacheSizeForRocksDB
   112  	}
   113  	if config.MaxOpenFiles < minOpenFilesForRocksDB {
   114  		logger.Warn("Max open files too small, increasing to minimum recommended", "oldMaxOpenFiles", config.MaxOpenFiles, "newMaxOpenFiles", minOpenFilesForRocksDB)
   115  		config.MaxOpenFiles = minOpenFilesForRocksDB
   116  	}
   117  
   118  	blockCacheSize := config.CacheSize / 2 * 1024 * 1024 // half of cacheSize in MiB
   119  	bufferSize := config.CacheSize / 2 * 1024 * 1024     // half of cacheSize in MiB
   120  
   121  	bbto := grocksdb.NewDefaultBlockBasedTableOptions()
   122  	bbto.SetBlockCache(grocksdb.NewLRUCache(blockCacheSize))
   123  	if cacheIndexAndFilter := config.CacheIndexAndFilter; cacheIndexAndFilter {
   124  		bbto.SetCacheIndexAndFilterBlocks(cacheIndexAndFilter)
   125  		bbto.SetPinL0FilterAndIndexBlocksInCache(cacheIndexAndFilter)
   126  	}
   127  
   128  	policy := filterPolicyStrToNative(config.FilterPolicy)
   129  	if policy != nil {
   130  		bbto.SetFilterPolicy(policy)
   131  		bbto.SetOptimizeFiltersForMemory(true)
   132  	}
   133  
   134  	opts := grocksdb.NewDefaultOptions()
   135  	opts.SetBlockBasedTableFactory(bbto)
   136  	opts.SetCreateIfMissing(true)
   137  	opts.SetWriteBufferSize(bufferSize)
   138  	opts.SetDumpMallocStats(config.DumpMallocStat)
   139  	opts.SetCompression(compressionStrToType(config.CompressionType))
   140  	opts.SetBottommostCompression(compressionStrToType(config.BottommostCompressionType))
   141  	opts.SetMaxOpenFiles(config.MaxOpenFiles)
   142  
   143  	logger.Info("RocksDB configuration", "blockCacheSize", blockCacheSize, "bufferSize", bufferSize, "enableDumpMallocStat", config.DumpMallocStat, "compressionType", config.CompressionType, "bottommostCompressionType", config.BottommostCompressionType, "filterPolicy", config.FilterPolicy, "disableMetrics", config.DisableMetrics, "maxOpenFiles", config.MaxOpenFiles, "cacheIndexAndFilter", config.CacheIndexAndFilter)
   144  
   145  	var (
   146  		db  *grocksdb.DB
   147  		err error
   148  	)
   149  
   150  	if config.Secondary {
   151  		db, err = grocksdb.OpenDbAsSecondary(opts, path, path)
   152  	} else {
   153  		db, err = grocksdb.OpenDb(opts, path)
   154  	}
   155  	if err != nil {
   156  		return nil, err
   157  	}
   158  	return &rocksDB{
   159  		config: config,
   160  		db:     db,
   161  		wo:     grocksdb.NewDefaultWriteOptions(),
   162  		ro:     grocksdb.NewDefaultReadOptions(),
   163  		logger: localLogger,
   164  		quitCh: make(chan struct{}),
   165  	}, nil
   166  }
   167  
   168  func (db *rocksDB) Type() DBType {
   169  	return RocksDB
   170  }
   171  
   172  func (db *rocksDB) Put(key []byte, value []byte) error {
   173  	if db.config.Secondary {
   174  		return nil
   175  	}
   176  	if !db.config.DisableMetrics {
   177  		start := time.Now()
   178  		defer db.putTimer.Update(time.Since(start))
   179  	}
   180  	return db.db.Put(db.wo, key, value)
   181  }
   182  
   183  func (db *rocksDB) Has(key []byte) (bool, error) {
   184  	dat, err := db.db.GetBytes(db.ro, key)
   185  	if dat == nil || err != nil {
   186  		return false, err
   187  	}
   188  
   189  	return true, nil
   190  }
   191  
   192  func (db *rocksDB) Get(key []byte) ([]byte, error) {
   193  	if !db.config.DisableMetrics {
   194  		start := time.Now()
   195  		defer db.getTimer.Update(time.Since(start))
   196  	}
   197  	return db.get(key)
   198  }
   199  
   200  func (db *rocksDB) get(key []byte) ([]byte, error) {
   201  	dat, err := db.db.GetBytes(db.ro, key)
   202  	if dat == nil {
   203  		return nil, dataNotFoundErr
   204  	}
   205  
   206  	if err != nil {
   207  		return nil, err
   208  	}
   209  	return dat, nil
   210  }
   211  
   212  func (db *rocksDB) Delete(key []byte) error {
   213  	if db.config.Secondary {
   214  		return nil
   215  	}
   216  	return db.db.Delete(db.wo, key)
   217  }
   218  
   219  func (db *rocksDB) TryCatchUpWithPrimary() error {
   220  	return db.db.TryCatchUpWithPrimary()
   221  }
   222  
   223  type rdbIter struct {
   224  	first  bool
   225  	iter   *grocksdb.Iterator
   226  	prefix []byte
   227  	db     *rocksDB
   228  }
   229  
   230  // Next moves the iterator to the next key/value pair. It returns whether the
   231  // iterator is exhausted.
   232  func (i *rdbIter) Next() bool {
   233  	if i.first {
   234  		i.first = false
   235  	} else {
   236  		i.iter.Next()
   237  	}
   238  	return i.iter.ValidForPrefix(i.prefix)
   239  }
   240  
   241  // Error returns any accumulated error. Exhausting all the key/value pairs
   242  // is not considered to be an error.
   243  func (i *rdbIter) Error() error {
   244  	if i.first {
   245  		return nil
   246  	}
   247  	return i.iter.Err()
   248  }
   249  
   250  // Key returns the key of the current key/value pair, or nil if done. The caller
   251  // should not modify the contents of the returned slice, and its contents may
   252  // change on the next call to Next.
   253  func (i *rdbIter) Key() []byte {
   254  	if i.first {
   255  		return nil
   256  	}
   257  	key := i.iter.Key()
   258  	defer key.Free()
   259  	return key.Data()
   260  }
   261  
   262  // Value returns the value of the current key/value pair, or nil if done. The
   263  // caller should not modify the contents of the returned slice, and its contents
   264  // may change on the next call to Next.
   265  func (i *rdbIter) Value() []byte {
   266  	if i.first {
   267  		return nil
   268  	}
   269  	val := i.iter.Value()
   270  	defer val.Free()
   271  	return val.Data()
   272  }
   273  
   274  // Release releases associated resources. Release should always succeed and can
   275  // be called multiple times without causing error.
   276  func (i *rdbIter) Release() {
   277  	i.iter.Close()
   278  }
   279  
   280  // NewIterator creates a binary-alphabetical iterator over a subset
   281  // of database content with a particular key prefix, starting at a particular
   282  // initial key (or after, if it does not exist).
   283  func (db *rocksDB) NewIterator(prefix []byte, start []byte) Iterator {
   284  	iter := db.db.NewIterator(db.ro)
   285  	firstKey := append(prefix, start...)
   286  	iter.Seek(firstKey)
   287  	return &rdbIter{first: true, iter: iter, prefix: prefix, db: db}
   288  }
   289  
   290  func (db *rocksDB) Close() {
   291  	close(db.quitCh)
   292  	db.db.CancelAllBackgroundWork(true)
   293  	db.db.Close()
   294  	db.wo.Destroy()
   295  	db.ro.Destroy()
   296  	db.logger.Info("RocksDB is closed")
   297  }
   298  
   299  func (db *rocksDB) updateMeter(name string, meter metrics.Meter) {
   300  	v, s := db.db.GetIntProperty(name)
   301  	if s {
   302  		meter.Mark(int64(v))
   303  	}
   304  }
   305  
   306  // Meter configures the database metrics collectors and
   307  func (db *rocksDB) Meter(prefix string) {
   308  	db.prefix = prefix
   309  
   310  	for _, property := range properties {
   311  		splited := strings.Split(property, ".")
   312  		name := strings.ReplaceAll(splited[1], "-", "/")
   313  		db.metrics = append(db.metrics, metrics.NewRegisteredMeter(prefix+name, nil))
   314  	}
   315  	db.getTimer = klaytnmetrics.NewRegisteredHybridTimer(prefix+"get/time", nil)
   316  	db.putTimer = klaytnmetrics.NewRegisteredHybridTimer(prefix+"put/time", nil)
   317  	db.batchWriteTimer = klaytnmetrics.NewRegisteredHybridTimer(prefix+"batchwrite/time", nil)
   318  
   319  	// Short circuit metering if the metrics system is disabled
   320  	// Above meters are initialized by NilMeter if metricutils.Enabled == false
   321  	if !metricutils.Enabled || db.config.DisableMetrics {
   322  		return
   323  	}
   324  
   325  	go db.meter(3 * time.Second)
   326  }
   327  
   328  func (db *rocksDB) meter(t time.Duration) {
   329  	ticker := time.NewTicker(t)
   330  	defer ticker.Stop()
   331  
   332  	for {
   333  		select {
   334  		case <-db.quitCh:
   335  			return
   336  		case <-ticker.C:
   337  			for idx, property := range properties {
   338  				db.updateMeter(property, db.metrics[idx])
   339  			}
   340  		}
   341  	}
   342  }
   343  
   344  func (db *rocksDB) NewBatch() Batch {
   345  	return &rdbBatch{b: grocksdb.NewWriteBatch(), db: db}
   346  }
   347  
   348  // rdbBatch is a write-only rocksdb batch that commits changes to its host database
   349  // when Write is called. A batch cannot be used concurrently.
   350  type rdbBatch struct {
   351  	b    *grocksdb.WriteBatch
   352  	db   *rocksDB
   353  	size int
   354  }
   355  
   356  // Put inserts the given value into the batch for later committing.
   357  func (b *rdbBatch) Put(key, value []byte) error {
   358  	if b.db.config.Secondary {
   359  		return nil
   360  	}
   361  	b.b.Put(key, value)
   362  	b.size += len(value)
   363  	return nil
   364  }
   365  
   366  // Delete inserts a key removal into the batch for later committing.
   367  func (b *rdbBatch) Delete(key []byte) error {
   368  	if b.db.config.Secondary {
   369  		return nil
   370  	}
   371  	b.b.Delete(key)
   372  	b.size++
   373  	return nil
   374  }
   375  
   376  // Write flushes any accumulated data to disk.
   377  func (b *rdbBatch) Write() error {
   378  	if b.db.config.Secondary {
   379  		return nil
   380  	}
   381  	if !b.db.config.DisableMetrics {
   382  		start := time.Now()
   383  		defer b.db.batchWriteTimer.Update(time.Since(start))
   384  	}
   385  	return b.write()
   386  }
   387  
   388  func (b *rdbBatch) write() error {
   389  	return b.db.db.Write(b.db.wo, b.b)
   390  }
   391  
   392  // ValueSize retrieves the amount of data queued up for writing.
   393  func (b *rdbBatch) ValueSize() int {
   394  	return b.size
   395  }
   396  
   397  // Reset resets the batch for reuse.
   398  func (b *rdbBatch) Reset() {
   399  	b.b.Clear()
   400  	b.size = 0
   401  }
   402  
   403  // Release free memory allocated to rocksdb batch object.
   404  func (b *rdbBatch) Release() {
   405  	b.b.Destroy()
   406  }
   407  
   408  // Replay replays the batch contents.
   409  func (b *rdbBatch) Replay(w KeyValueWriter) error {
   410  	b.db.logger.Crit("rocksdb batch does not implement Replay method")
   411  	return nil
   412  }