github.com/klaytn/klaytn@v1.10.2/storage/database/sharded_database.go (about)

     1  // Copyright 2019 The klaytn Authors
     2  // This file is part of the klaytn library.
     3  //
     4  // The klaytn library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The klaytn library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the klaytn library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package database
    18  
    19  import (
    20  	"bytes"
    21  	"container/heap"
    22  	"context"
    23  	"fmt"
    24  	"path"
    25  	"strconv"
    26  	"sync"
    27  
    28  	"github.com/klaytn/klaytn/common"
    29  )
    30  
    31  var errKeyLengthZero = fmt.Errorf("database key for sharded database should be greater than 0")
    32  
    33  const numShardsLimit = 256
    34  
    35  type shardedDB struct {
    36  	fn        string
    37  	shards    []Database
    38  	numShards uint
    39  
    40  	sdbBatchTaskCh chan sdbBatchTask
    41  }
    42  
    43  type sdbBatchTask struct {
    44  	batch    Batch               // A batch that each worker executes.
    45  	index    int                 // Index of given batch.
    46  	resultCh chan sdbBatchResult // Batch result channel for each shardedDBBatch.
    47  }
    48  
    49  type sdbBatchResult struct {
    50  	index int   // Index of the batch result.
    51  	err   error // Error from the batch write operation.
    52  }
    53  
    54  // newShardedDB creates database with numShards shards, or partitions.
    55  // The type of database is specified DBConfig.DBType.
    56  func newShardedDB(dbc *DBConfig, et DBEntryType, numShards uint) (*shardedDB, error) {
    57  	if numShards == 0 {
    58  		logger.Crit("numShards should be greater than 0!")
    59  	}
    60  
    61  	if numShards > numShardsLimit {
    62  		logger.Crit(fmt.Sprintf("numShards should be equal to or smaller than %v, but it is %v.", numShardsLimit, numShards))
    63  	}
    64  
    65  	if !IsPow2(numShards) {
    66  		logger.Crit(fmt.Sprintf("numShards should be power of two, but it is %v", numShards))
    67  	}
    68  
    69  	shards := make([]Database, 0, numShards)
    70  	sdbBatchTaskCh := make(chan sdbBatchTask, numShards*2)
    71  	sdbLevelDBCacheSize := dbc.LevelDBCacheSize / int(numShards)
    72  	sdbOpenFilesLimit := dbc.OpenFilesLimit / int(numShards)
    73  	for i := 0; i < int(numShards); i++ {
    74  		copiedDBC := *dbc
    75  		copiedDBC.Dir = path.Join(copiedDBC.Dir, strconv.Itoa(i))
    76  		copiedDBC.LevelDBCacheSize = sdbLevelDBCacheSize
    77  		copiedDBC.OpenFilesLimit = sdbOpenFilesLimit
    78  
    79  		db, err := newDatabase(&copiedDBC, et)
    80  		if err != nil {
    81  			return nil, err
    82  		}
    83  		shards = append(shards, db)
    84  		go batchWriteWorker(sdbBatchTaskCh)
    85  	}
    86  
    87  	logger.Info("Created a sharded database", "dbType", et, "numShards", numShards)
    88  	return &shardedDB{
    89  		fn: dbc.Dir, shards: shards,
    90  		numShards: numShards, sdbBatchTaskCh: sdbBatchTaskCh,
    91  	}, nil
    92  }
    93  
    94  // batchWriteWorker executes passed batch tasks.
    95  func batchWriteWorker(batchTasks <-chan sdbBatchTask) {
    96  	for task := range batchTasks {
    97  		task.resultCh <- sdbBatchResult{task.index, task.batch.Write()}
    98  	}
    99  }
   100  
   101  // IsPow2 checks if the given number is power of two or not.
   102  func IsPow2(num uint) bool {
   103  	return (num & (num - 1)) == 0
   104  }
   105  
   106  // shardIndexByKey returns shard index derived from the given key.
   107  // If len(key) is zero, it returns errKeyLengthZero.
   108  func shardIndexByKey(key []byte, numShards uint) (int, error) {
   109  	if len(key) == 0 {
   110  		return 0, errKeyLengthZero
   111  	}
   112  
   113  	return int(key[0]) & (int(numShards) - 1), nil
   114  }
   115  
   116  // getShardByKey returns the shard corresponding to the given key.
   117  func (db *shardedDB) getShardByKey(key []byte) (Database, error) {
   118  	if shardIndex, err := shardIndexByKey(key, uint(db.numShards)); err != nil {
   119  		return nil, err
   120  	} else {
   121  		return db.shards[shardIndex], nil
   122  	}
   123  }
   124  
   125  func (db *shardedDB) Put(key []byte, value []byte) error {
   126  	if shard, err := db.getShardByKey(key); err != nil {
   127  		return err
   128  	} else {
   129  		return shard.Put(key, value)
   130  	}
   131  }
   132  
   133  func (db *shardedDB) Get(key []byte) ([]byte, error) {
   134  	if shard, err := db.getShardByKey(key); err != nil {
   135  		return nil, err
   136  	} else {
   137  		return shard.Get(key)
   138  	}
   139  }
   140  
   141  func (db *shardedDB) Has(key []byte) (bool, error) {
   142  	if shard, err := db.getShardByKey(key); err != nil {
   143  		return false, err
   144  	} else {
   145  		return shard.Has(key)
   146  	}
   147  }
   148  
   149  func (db *shardedDB) Delete(key []byte) error {
   150  	if shard, err := db.getShardByKey(key); err != nil {
   151  		return err
   152  	} else {
   153  		return shard.Delete(key)
   154  	}
   155  }
   156  
   157  func (db *shardedDB) Close() {
   158  	close(db.sdbBatchTaskCh)
   159  
   160  	for _, shard := range db.shards {
   161  		shard.Close()
   162  	}
   163  }
   164  
   165  // Not enough size of channel slows down the iterator
   166  const shardedDBCombineChanSize = 1024 // Size of resultCh
   167  const shardedDBSubChannelSize = 128   // Size of each sub-channel of resultChs
   168  
   169  // shardedDBIterator iterates all items of each shardDB.
   170  // This is useful when you want to get items in serial in binary-alphabetigcal order.
   171  type shardedDBIterator struct {
   172  	parallelIterator shardedDBParallelIterator
   173  
   174  	resultCh chan common.Entry
   175  	key      []byte // current key
   176  	value    []byte // current value
   177  }
   178  
   179  // NewIterator creates a binary-alphabetical iterator over a subset
   180  // of database content with a particular key prefix, starting at a particular
   181  // initial key (or after, if it does not exist).
   182  func (db *shardedDB) NewIterator(prefix []byte, start []byte) Iterator {
   183  	it := &shardedDBIterator{
   184  		parallelIterator: db.NewParallelIterator(context.TODO(), prefix, start, nil),
   185  		resultCh:         make(chan common.Entry, shardedDBCombineChanSize),
   186  	}
   187  
   188  	go it.runCombineWorker()
   189  
   190  	return it
   191  }
   192  
   193  // NewIteratorUnsorted creates a iterator over the entire keyspace contained within
   194  // the key-value database. This is useful when you want to get items fast in serial.
   195  // If you want to get ordered items in serial, checkout shardedDB.NewIterator()
   196  // If you want to get items in parallel from channels, checkout shardedDB.NewParallelIterator()
   197  // IteratorUnsorted is a implementation of Iterator and data are accessed with
   198  // Next(), Key() and Value() methods. With ChanIterator, data can be accessed with
   199  // channels. The channels are gained with Channels() method.
   200  func (db *shardedDB) NewIteratorUnsorted(prefix []byte, start []byte) Iterator {
   201  	resultCh := make(chan common.Entry, shardedDBCombineChanSize)
   202  	return &shardedDBIterator{
   203  		parallelIterator: db.NewParallelIterator(context.TODO(), prefix, start, resultCh),
   204  		resultCh:         resultCh,
   205  	}
   206  }
   207  
   208  // runCombineWorker fetches any key/value from resultChs and put the data in resultCh
   209  // in binary-alphabetical order.
   210  func (it *shardedDBIterator) runCombineWorker() {
   211  	// creates min-priority queue smallest values from each iterators
   212  	entries := &entryHeap{}
   213  	heap.Init(entries)
   214  	for i, ch := range it.parallelIterator.resultChs {
   215  		if e, ok := <-ch; ok {
   216  			heap.Push(entries, entryWithShardNum{e, i})
   217  		}
   218  	}
   219  
   220  chanIter:
   221  	for len(*entries) != 0 {
   222  		// check if done
   223  		select {
   224  		case <-it.parallelIterator.ctx.Done():
   225  			logger.Trace("[shardedDBIterator] combine worker ends due to ctx")
   226  			break chanIter
   227  		default:
   228  		}
   229  
   230  		// look for smallest key
   231  		minEntry := heap.Pop(entries).(entryWithShardNum)
   232  
   233  		// fill resultCh with smallest key
   234  		it.resultCh <- minEntry.Entry
   235  
   236  		// fill used entry with new entry
   237  		// skip this if channel is closed
   238  		if e, ok := <-it.parallelIterator.resultChs[minEntry.shardNum]; ok {
   239  			heap.Push(entries, entryWithShardNum{e, minEntry.shardNum})
   240  		}
   241  	}
   242  	logger.Trace("[shardedDBIterator] combine worker finished")
   243  	close(it.resultCh)
   244  }
   245  
   246  // Next gets the next item from iterators.
   247  func (it *shardedDBIterator) Next() bool {
   248  	e, ok := <-it.resultCh
   249  	if !ok {
   250  		logger.Debug("[shardedDBIterator] Next is called on closed channel")
   251  		return false
   252  	}
   253  	it.key, it.value = e.Key, e.Val
   254  	return true
   255  }
   256  
   257  func (it *shardedDBIterator) Error() error {
   258  	for i, iter := range it.parallelIterator.iterators {
   259  		if iter.Error() != nil {
   260  			logger.Error("[shardedDBIterator] error from iterator",
   261  				"err", iter.Error(), "shardNum", i, "key", it.key, "val", it.value)
   262  			return iter.Error()
   263  		}
   264  	}
   265  	return nil
   266  }
   267  
   268  func (it *shardedDBIterator) Key() []byte {
   269  	return it.key
   270  }
   271  
   272  func (it *shardedDBIterator) Value() []byte {
   273  	return it.value
   274  }
   275  
   276  func (it *shardedDBIterator) Release() {
   277  	it.parallelIterator.cancel()
   278  }
   279  
   280  type entryWithShardNum struct {
   281  	common.Entry
   282  	shardNum int
   283  }
   284  
   285  type entryHeap []entryWithShardNum
   286  
   287  func (e entryHeap) Len() int {
   288  	return len(e)
   289  }
   290  
   291  func (e entryHeap) Less(i, j int) bool {
   292  	return bytes.Compare(e[i].Key, e[j].Key) < 0
   293  }
   294  
   295  func (e entryHeap) Swap(i, j int) {
   296  	e[i], e[j] = e[j], e[i]
   297  }
   298  
   299  func (e *entryHeap) Push(x interface{}) {
   300  	*e = append(*e, x.(entryWithShardNum))
   301  }
   302  
   303  func (e *entryHeap) Pop() interface{} {
   304  	old := *e
   305  	n := len(old)
   306  	element := old[n-1]
   307  	*e = old[0 : n-1]
   308  	return element
   309  }
   310  
   311  // shardedDBParallelIterator creates iterators for each shard DB.
   312  // Channels subscribing each iterators can be gained.
   313  // Each iterators fetch values in binary-alphabetical order.
   314  // This is useful when you want to operate on each items in parallel.
   315  type shardedDBParallelIterator struct {
   316  	ctx    context.Context
   317  	cancel context.CancelFunc
   318  
   319  	iterators []Iterator
   320  
   321  	combinedChan bool // all workers put items to one resultChan
   322  	shardNum     int  // num of shards left to iterate
   323  	shardNumMu   *sync.Mutex
   324  	resultChs    []chan common.Entry
   325  }
   326  
   327  // NewParallelIterator creates iterators for each shard DB. This is useful when you
   328  // want to operate on each items in parallel.
   329  // If `resultCh` is given, all items are written to `resultCh`, unsorted with a
   330  // particular key prefix, starting at a particular initial key. If `resultCh`
   331  // is not given, new channels are created for each DB. Items are written to
   332  // corresponding channels in binary-alphabetical order. The channels can be
   333  // gained by calling `Channels()`.
   334  //
   335  // If you want to get ordered items in serial, checkout shardedDB.NewIterator()
   336  // If you want to get unordered items in serial with Iterator Interface,
   337  // checkout shardedDB.NewIteratorUnsorted().
   338  func (db *shardedDB) NewParallelIterator(ctx context.Context, prefix []byte, start []byte, resultCh chan common.Entry) shardedDBParallelIterator {
   339  	if ctx == nil {
   340  		ctx = context.TODO()
   341  	}
   342  
   343  	it := shardedDBParallelIterator{
   344  		ctx:          ctx,
   345  		cancel:       nil,
   346  		iterators:    make([]Iterator, len(db.shards)),
   347  		combinedChan: resultCh != nil,
   348  		shardNum:     len(db.shards),
   349  		shardNumMu:   &sync.Mutex{},
   350  		resultChs:    make([]chan common.Entry, len(db.shards)),
   351  	}
   352  	it.ctx, it.cancel = context.WithCancel(ctx)
   353  
   354  	for i, shard := range db.shards {
   355  		it.iterators[i] = shard.NewIterator(prefix, start)
   356  		if resultCh == nil {
   357  			it.resultChs[i] = make(chan common.Entry, shardedDBSubChannelSize)
   358  		} else {
   359  			it.resultChs[i] = resultCh
   360  		}
   361  		go it.runChanWorker(it.ctx, it.iterators[i], it.resultChs[i])
   362  	}
   363  
   364  	return it
   365  }
   366  
   367  // runChanWorker runs a worker. The worker gets key/value pair from
   368  // `it` and push the value to `resultCh`.
   369  // `iterator.Release()` is called after all iterating is finished.
   370  // `resultCh` is closed after the iterating is finished.
   371  func (sit *shardedDBParallelIterator) runChanWorker(ctx context.Context, it Iterator, resultCh chan common.Entry) {
   372  iter:
   373  	for it.Next() {
   374  		select {
   375  		case <-ctx.Done():
   376  			break iter
   377  		default:
   378  		}
   379  		key := make([]byte, len(it.Key()))
   380  		val := make([]byte, len(it.Value()))
   381  		copy(key, it.Key())
   382  		copy(val, it.Value())
   383  		resultCh <- common.Entry{Key: key, Val: val}
   384  	}
   385  	// Release the iterator. There is nothing to iterate anymore.
   386  	it.Release()
   387  	// Close `resultCh`. If it is `combinedChan`, the close only happens
   388  	// when this is the last living worker.
   389  	sit.shardNumMu.Lock()
   390  	defer sit.shardNumMu.Unlock()
   391  	if sit.shardNum--; sit.combinedChan && sit.shardNum > 0 {
   392  		return
   393  	}
   394  	close(resultCh)
   395  }
   396  
   397  // Channels returns channels that can subscribe on.
   398  func (it *shardedDBParallelIterator) Channels() []chan common.Entry {
   399  	return it.resultChs
   400  }
   401  
   402  // Release stops all iterators, channels and workers
   403  // Even Release() is called, there could be some items left in the channel.
   404  // Each iterator.Release() is called in `runChanWorker`.
   405  func (it *shardedDBParallelIterator) Release() {
   406  	it.cancel()
   407  }
   408  
   409  func (db *shardedDB) NewBatch() Batch {
   410  	batches := make([]Batch, 0, db.numShards)
   411  	for i := 0; i < int(db.numShards); i++ {
   412  		batches = append(batches, db.shards[i].NewBatch())
   413  	}
   414  
   415  	return &shardedDBBatch{
   416  		batches: batches, numBatches: db.numShards,
   417  		taskCh: db.sdbBatchTaskCh, resultCh: make(chan sdbBatchResult, db.numShards),
   418  	}
   419  }
   420  
   421  func (db *shardedDB) Type() DBType {
   422  	return ShardedDB
   423  }
   424  
   425  func (db *shardedDB) Meter(prefix string) {
   426  	for index, shard := range db.shards {
   427  		shard.Meter(prefix + strconv.Itoa(index))
   428  	}
   429  }
   430  
   431  type shardedDBBatch struct {
   432  	batches    []Batch
   433  	numBatches uint
   434  
   435  	taskCh   chan sdbBatchTask
   436  	resultCh chan sdbBatchResult
   437  }
   438  
   439  func (sdbBatch *shardedDBBatch) Put(key []byte, value []byte) error {
   440  	if ShardIndex, err := shardIndexByKey(key, sdbBatch.numBatches); err != nil {
   441  		return err
   442  	} else {
   443  		return sdbBatch.batches[ShardIndex].Put(key, value)
   444  	}
   445  }
   446  
   447  func (sdbBatch *shardedDBBatch) Delete(key []byte) error {
   448  	if ShardIndex, err := shardIndexByKey(key, sdbBatch.numBatches); err != nil {
   449  		return err
   450  	} else {
   451  		return sdbBatch.batches[ShardIndex].Delete(key)
   452  	}
   453  }
   454  
   455  // ValueSize is called to determine whether to write batches when it exceeds
   456  // certain limit. shardedDB returns the largest size of its batches to
   457  // write all batches at once when one of batch exceeds the limit.
   458  func (sdbBatch *shardedDBBatch) ValueSize() int {
   459  	maxSize := 0
   460  	for _, batch := range sdbBatch.batches {
   461  		if batch.ValueSize() > maxSize {
   462  			maxSize = batch.ValueSize()
   463  		}
   464  	}
   465  	return maxSize
   466  }
   467  
   468  // Write passes the list of batch tasks to taskCh so batch can be processed
   469  // by underlying workers. Write waits until all workers return the result.
   470  func (sdbBatch *shardedDBBatch) Write() error {
   471  	for index, batch := range sdbBatch.batches {
   472  		sdbBatch.taskCh <- sdbBatchTask{batch, index, sdbBatch.resultCh}
   473  	}
   474  
   475  	var err error
   476  	for range sdbBatch.batches {
   477  		if batchResult := <-sdbBatch.resultCh; batchResult.err != nil {
   478  			logger.Error("Error while writing sharded batch", "index", batchResult.index, "err", batchResult.err)
   479  			err = batchResult.err
   480  		}
   481  	}
   482  	// Leave logs for each error but only return the last one.
   483  	return err
   484  }
   485  
   486  func (sdbBatch *shardedDBBatch) Reset() {
   487  	for _, batch := range sdbBatch.batches {
   488  		batch.Reset()
   489  	}
   490  }
   491  
   492  func (sdbBatch *shardedDBBatch) Replay(w KeyValueWriter) error {
   493  	for _, batch := range sdbBatch.batches {
   494  		if err := batch.Replay(w); err != nil {
   495  			return err
   496  		}
   497  	}
   498  	return nil
   499  }