github.com/ethereum/go-ethereum@v1.16.1/triedb/pathdb/buffer.go (about)

     1  // Copyright 2022 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package pathdb
    18  
    19  import (
    20  	"errors"
    21  	"fmt"
    22  	"time"
    23  
    24  	"github.com/VictoriaMetrics/fastcache"
    25  	"github.com/ethereum/go-ethereum/common"
    26  	"github.com/ethereum/go-ethereum/core/rawdb"
    27  	"github.com/ethereum/go-ethereum/ethdb"
    28  	"github.com/ethereum/go-ethereum/log"
    29  	"github.com/ethereum/go-ethereum/trie/trienode"
    30  )
    31  
    32  // buffer is a collection of modified states along with the modified trie nodes.
    33  // They are cached here to aggregate the disk write. The content of the buffer
    34  // must be checked before diving into disk (since it basically is not yet written
    35  // data).
    36  type buffer struct {
    37  	layers uint64    // The number of diff layers aggregated inside
    38  	limit  uint64    // The maximum memory allowance in bytes
    39  	nodes  *nodeSet  // Aggregated trie node set
    40  	states *stateSet // Aggregated state set
    41  
    42  	// done is the notifier whether the content in buffer has been flushed or not.
    43  	// This channel is nil if the buffer is not frozen.
    44  	done chan struct{}
    45  
    46  	// flushErr memorizes the error if any exception occurs during flushing
    47  	flushErr error
    48  }
    49  
    50  // newBuffer initializes the buffer with the provided states and trie nodes.
    51  func newBuffer(limit int, nodes *nodeSet, states *stateSet, layers uint64) *buffer {
    52  	// Don't panic for lazy users if any provided set is nil
    53  	if nodes == nil {
    54  		nodes = newNodeSet(nil)
    55  	}
    56  	if states == nil {
    57  		states = newStates(nil, nil, false)
    58  	}
    59  	return &buffer{
    60  		layers: layers,
    61  		limit:  uint64(limit),
    62  		nodes:  nodes,
    63  		states: states,
    64  	}
    65  }
    66  
    67  // account retrieves the account blob with account address hash.
    68  func (b *buffer) account(hash common.Hash) ([]byte, bool) {
    69  	return b.states.account(hash)
    70  }
    71  
    72  // storage retrieves the storage slot with account address hash and slot key hash.
    73  func (b *buffer) storage(addrHash common.Hash, storageHash common.Hash) ([]byte, bool) {
    74  	return b.states.storage(addrHash, storageHash)
    75  }
    76  
    77  // node retrieves the trie node with node path and its trie identifier.
    78  func (b *buffer) node(owner common.Hash, path []byte) (*trienode.Node, bool) {
    79  	return b.nodes.node(owner, path)
    80  }
    81  
    82  // commit merges the provided states and trie nodes into the buffer.
    83  func (b *buffer) commit(nodes *nodeSet, states *stateSet) *buffer {
    84  	b.layers++
    85  	b.nodes.merge(nodes)
    86  	b.states.merge(states)
    87  	return b
    88  }
    89  
    90  // revertTo is the reverse operation of commit. It also merges the provided states
    91  // and trie nodes into the buffer. The key difference is that the provided state
    92  // set should reverse the changes made by the most recent state transition.
    93  func (b *buffer) revertTo(db ethdb.KeyValueReader, nodes map[common.Hash]map[string]*trienode.Node, accounts map[common.Hash][]byte, storages map[common.Hash]map[common.Hash][]byte) error {
    94  	// Short circuit if no embedded state transition to revert
    95  	if b.layers == 0 {
    96  		return errStateUnrecoverable
    97  	}
    98  	b.layers--
    99  
   100  	// Reset the entire buffer if only a single transition left
   101  	if b.layers == 0 {
   102  		b.reset()
   103  		return nil
   104  	}
   105  	b.nodes.revertTo(db, nodes)
   106  	b.states.revertTo(accounts, storages)
   107  	return nil
   108  }
   109  
   110  // reset cleans up the disk cache.
   111  func (b *buffer) reset() {
   112  	b.layers = 0
   113  	b.nodes.reset()
   114  	b.states.reset()
   115  }
   116  
   117  // empty returns an indicator if buffer is empty.
   118  func (b *buffer) empty() bool {
   119  	return b.layers == 0
   120  }
   121  
   122  // full returns an indicator if the size of accumulated content exceeds the
   123  // configured threshold.
   124  func (b *buffer) full() bool {
   125  	return b.size() > b.limit
   126  }
   127  
   128  // size returns the approximate memory size of the held content.
   129  func (b *buffer) size() uint64 {
   130  	return b.states.size + b.nodes.size
   131  }
   132  
   133  // flush persists the in-memory dirty trie node into the disk if the configured
   134  // memory threshold is reached. Note, all data must be written atomically.
   135  func (b *buffer) flush(root common.Hash, db ethdb.KeyValueStore, freezer ethdb.AncientWriter, progress []byte, nodesCache, statesCache *fastcache.Cache, id uint64, postFlush func()) {
   136  	if b.done != nil {
   137  		panic("duplicated flush operation")
   138  	}
   139  	b.done = make(chan struct{}) // allocate the channel for notification
   140  
   141  	// Schedule the background thread to construct the batch, which usually
   142  	// take a few seconds.
   143  	go func() {
   144  		defer func() {
   145  			if postFlush != nil {
   146  				postFlush()
   147  			}
   148  			close(b.done)
   149  		}()
   150  
   151  		// Ensure the target state id is aligned with the internal counter.
   152  		head := rawdb.ReadPersistentStateID(db)
   153  		if head+b.layers != id {
   154  			b.flushErr = fmt.Errorf("buffer layers (%d) cannot be applied on top of persisted state id (%d) to reach requested state id (%d)", b.layers, head, id)
   155  			return
   156  		}
   157  
   158  		// Terminate the state snapshot generation if it's active
   159  		var (
   160  			start = time.Now()
   161  			batch = db.NewBatchWithSize((b.nodes.dbsize() + b.states.dbsize()) * 11 / 10) // extra 10% for potential pebble internal stuff
   162  		)
   163  		// Explicitly sync the state freezer to ensure all written data is persisted to disk
   164  		// before updating the key-value store.
   165  		//
   166  		// This step is crucial to guarantee that the corresponding state history remains
   167  		// available for state rollback.
   168  		if freezer != nil {
   169  			if err := freezer.SyncAncient(); err != nil {
   170  				b.flushErr = err
   171  				return
   172  			}
   173  		}
   174  		nodes := b.nodes.write(batch, nodesCache)
   175  		accounts, slots := b.states.write(batch, progress, statesCache)
   176  		rawdb.WritePersistentStateID(batch, id)
   177  		rawdb.WriteSnapshotRoot(batch, root)
   178  
   179  		// Flush all mutations in a single batch
   180  		size := batch.ValueSize()
   181  		if err := batch.Write(); err != nil {
   182  			b.flushErr = err
   183  			return
   184  		}
   185  		commitBytesMeter.Mark(int64(size))
   186  		commitNodesMeter.Mark(int64(nodes))
   187  		commitAccountsMeter.Mark(int64(accounts))
   188  		commitStoragesMeter.Mark(int64(slots))
   189  		commitTimeTimer.UpdateSince(start)
   190  
   191  		// The content in the frozen buffer is kept for consequent state access,
   192  		// TODO (rjl493456442) measure the gc overhead for holding this struct.
   193  		// TODO (rjl493456442) can we somehow get rid of it after flushing??
   194  		// TODO (rjl493456442) buffer itself is not thread-safe, add the lock
   195  		// protection if try to reset the buffer here.
   196  		// b.reset()
   197  		log.Debug("Persisted buffer content", "nodes", nodes, "accounts", accounts, "slots", slots, "bytes", common.StorageSize(size), "elapsed", common.PrettyDuration(time.Since(start)))
   198  	}()
   199  }
   200  
   201  // waitFlush blocks until the buffer has been fully flushed and returns any
   202  // stored errors that occurred during the process.
   203  func (b *buffer) waitFlush() error {
   204  	if b.done == nil {
   205  		return errors.New("the buffer is not frozen")
   206  	}
   207  	<-b.done
   208  	return b.flushErr
   209  }