github.com/MetalBlockchain/subnet-evm@v0.4.9/trie/database.go (about)

     1  // (c) 2020-2022, Ava Labs, Inc.
     2  //
     3  // This file is a derived work, based on the go-ethereum library whose original
     4  // notices appear below.
     5  //
     6  // It is distributed under a license compatible with the licensing terms of the
     7  // original code from which it is derived.
     8  //
     9  // Much love to the original authors for their work.
    10  // **********
    11  // Copyright 2018 The go-ethereum Authors
    12  // This file is part of the go-ethereum library.
    13  //
    14  // The go-ethereum library is free software: you can redistribute it and/or modify
    15  // it under the terms of the GNU Lesser General Public License as published by
    16  // the Free Software Foundation, either version 3 of the License, or
    17  // (at your option) any later version.
    18  //
    19  // The go-ethereum library is distributed in the hope that it will be useful,
    20  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    21  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    22  // GNU Lesser General Public License for more details.
    23  //
    24  // You should have received a copy of the GNU Lesser General Public License
    25  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    26  
    27  package trie
    28  
    29  import (
    30  	"errors"
    31  	"fmt"
    32  	"io"
    33  	"reflect"
    34  	"sync"
    35  	"time"
    36  
    37  	"github.com/MetalBlockchain/subnet-evm/core/rawdb"
    38  	"github.com/MetalBlockchain/subnet-evm/core/types"
    39  	"github.com/MetalBlockchain/subnet-evm/ethdb"
    40  	"github.com/MetalBlockchain/subnet-evm/metrics"
    41  	"github.com/MetalBlockchain/subnet-evm/utils"
    42  	"github.com/ethereum/go-ethereum/common"
    43  	"github.com/ethereum/go-ethereum/log"
    44  	"github.com/ethereum/go-ethereum/rlp"
    45  )
    46  
    47  const (
    48  	cacheStatsUpdateFrequency = 1000 // update trie cache stats once per 1000 ops
    49  )
    50  
    51  var (
    52  	memcacheCleanHitMeter   = metrics.NewRegisteredMeter("trie/memcache/clean/hit", nil)
    53  	memcacheCleanMissMeter  = metrics.NewRegisteredMeter("trie/memcache/clean/miss", nil)
    54  	memcacheCleanReadMeter  = metrics.NewRegisteredMeter("trie/memcache/clean/read", nil)
    55  	memcacheCleanWriteMeter = metrics.NewRegisteredMeter("trie/memcache/clean/write", nil)
    56  
    57  	memcacheDirtyHitMeter       = metrics.NewRegisteredMeter("trie/memcache/dirty/hit", nil)
    58  	memcacheDirtyMissMeter      = metrics.NewRegisteredMeter("trie/memcache/dirty/miss", nil)
    59  	memcacheDirtyReadMeter      = metrics.NewRegisteredMeter("trie/memcache/dirty/read", nil)
    60  	memcacheDirtyWriteMeter     = metrics.NewRegisteredMeter("trie/memcache/dirty/write", nil)
    61  	memcacheDirtySizeGauge      = metrics.NewRegisteredGaugeFloat64("trie/memcache/dirty/size", nil)
    62  	memcacheDirtyChildSizeGauge = metrics.NewRegisteredGaugeFloat64("trie/memcache/dirty/childsize", nil)
    63  	memcacheDirtyNodesGauge     = metrics.NewRegisteredGauge("trie/memcache/dirty/nodes", nil)
    64  
    65  	memcacheFlushMeter         = metrics.NewRegisteredMeter("trie/memcache/flush/count", nil)
    66  	memcacheFlushTimeTimer     = metrics.NewRegisteredResettingTimer("trie/memcache/flush/time", nil)
    67  	memcacheFlushLockTimeTimer = metrics.NewRegisteredResettingTimer("trie/memcache/flush/locktime", nil)
    68  	memcacheFlushNodesMeter    = metrics.NewRegisteredMeter("trie/memcache/flush/nodes", nil)
    69  	memcacheFlushSizeMeter     = metrics.NewRegisteredMeter("trie/memcache/flush/size", nil)
    70  
    71  	memcacheGCTimeTimer  = metrics.NewRegisteredResettingTimer("trie/memcache/gc/time", nil)
    72  	memcacheGCNodesMeter = metrics.NewRegisteredMeter("trie/memcache/gc/nodes", nil)
    73  	memcacheGCSizeMeter  = metrics.NewRegisteredMeter("trie/memcache/gc/size", nil)
    74  
    75  	memcacheCommitMeter         = metrics.NewRegisteredMeter("trie/memcache/commit/count", nil)
    76  	memcacheCommitTimeTimer     = metrics.NewRegisteredResettingTimer("trie/memcache/commit/time", nil)
    77  	memcacheCommitLockTimeTimer = metrics.NewRegisteredResettingTimer("trie/memcache/commit/locktime", nil)
    78  	memcacheCommitNodesMeter    = metrics.NewRegisteredMeter("trie/memcache/commit/nodes", nil)
    79  	memcacheCommitSizeMeter     = metrics.NewRegisteredMeter("trie/memcache/commit/size", nil)
    80  )
    81  
    82  // Database is an intermediate write layer between the trie data structures and
    83  // the disk database. The aim is to accumulate trie writes in-memory and only
    84  // periodically flush a couple tries to disk, garbage collecting the remainder.
    85  //
    86  // The trie Database is thread-safe in its mutations and is thread-safe in providing individual,
    87  // independent node access.
    88  type Database struct {
    89  	diskdb ethdb.KeyValueStore // Persistent storage for matured trie nodes
    90  
    91  	cleans  *utils.MeteredCache         // GC friendly memory cache of clean node RLPs
    92  	dirties map[common.Hash]*cachedNode // Data and references relationships of dirty trie nodes
    93  	oldest  common.Hash                 // Oldest tracked node, flush-list head
    94  	newest  common.Hash                 // Newest tracked node, flush-list tail
    95  
    96  	gctime  time.Duration      // Time spent on garbage collection since last commit
    97  	gcnodes uint64             // Nodes garbage collected since last commit
    98  	gcsize  common.StorageSize // Data storage garbage collected since last commit
    99  
   100  	flushtime  time.Duration      // Time spent on data flushing since last commit
   101  	flushnodes uint64             // Nodes flushed since last commit
   102  	flushsize  common.StorageSize // Data storage flushed since last commit
   103  
   104  	dirtiesSize  common.StorageSize // Storage size of the dirty node cache (exc. metadata)
   105  	childrenSize common.StorageSize // Storage size of the external children tracking
   106  	preimages    *preimageStore     // The store for caching preimages
   107  
   108  	lock sync.RWMutex
   109  }
   110  
   111  // rawNode is a simple binary blob used to differentiate between collapsed trie
   112  // nodes and already encoded RLP binary blobs (while at the same time store them
   113  // in the same cache fields).
   114  type rawNode []byte
   115  
   116  func (n rawNode) cache() (hashNode, bool)   { panic("this should never end up in a live trie") }
   117  func (n rawNode) fstring(ind string) string { panic("this should never end up in a live trie") }
   118  
   119  func (n rawNode) EncodeRLP(w io.Writer) error {
   120  	_, err := w.Write(n)
   121  	return err
   122  }
   123  
   124  // rawFullNode represents only the useful data content of a full node, with the
   125  // caches and flags stripped out to minimize its data storage. This type honors
   126  // the same RLP encoding as the original parent.
   127  type rawFullNode [17]node
   128  
   129  func (n rawFullNode) cache() (hashNode, bool)   { panic("this should never end up in a live trie") }
   130  func (n rawFullNode) fstring(ind string) string { panic("this should never end up in a live trie") }
   131  
   132  func (n rawFullNode) EncodeRLP(w io.Writer) error {
   133  	eb := rlp.NewEncoderBuffer(w)
   134  	n.encode(eb)
   135  	return eb.Flush()
   136  }
   137  
   138  // rawShortNode represents only the useful data content of a short node, with the
   139  // caches and flags stripped out to minimize its data storage. This type honors
   140  // the same RLP encoding as the original parent.
   141  type rawShortNode struct {
   142  	Key []byte
   143  	Val node
   144  }
   145  
   146  func (n rawShortNode) cache() (hashNode, bool)   { panic("this should never end up in a live trie") }
   147  func (n rawShortNode) fstring(ind string) string { panic("this should never end up in a live trie") }
   148  
   149  // cachedNode is all the information we know about a single cached trie node
   150  // in the memory database write layer.
   151  type cachedNode struct {
   152  	node node   // Cached collapsed trie node, or raw rlp data
   153  	size uint16 // Byte size of the useful cached data
   154  
   155  	parents  uint32                 // Number of live nodes referencing this one
   156  	children map[common.Hash]uint16 // External children referenced by this node
   157  
   158  	flushPrev common.Hash // Previous node in the flush-list
   159  	flushNext common.Hash // Next node in the flush-list
   160  }
   161  
   162  // cachedNodeSize is the raw size of a cachedNode data structure without any
   163  // node data included. It's an approximate size, but should be a lot better
   164  // than not counting them.
   165  var cachedNodeSize = int(reflect.TypeOf(cachedNode{}).Size())
   166  
   167  // cachedNodeChildrenSize is the raw size of an initialized but empty external
   168  // reference map.
   169  const cachedNodeChildrenSize = 48
   170  
   171  // rlp returns the raw rlp encoded blob of the cached trie node, either directly
   172  // from the cache, or by regenerating it from the collapsed node.
   173  func (n *cachedNode) rlp() []byte {
   174  	if node, ok := n.node.(rawNode); ok {
   175  		return node
   176  	}
   177  	return nodeToBytes(n.node)
   178  }
   179  
   180  // obj returns the decoded and expanded trie node, either directly from the cache,
   181  // or by regenerating it from the rlp encoded blob.
   182  func (n *cachedNode) obj(hash common.Hash) node {
   183  	if node, ok := n.node.(rawNode); ok {
   184  		// The raw-blob format nodes are loaded either from the
   185  		// clean cache or the database, they are all in their own
   186  		// copy and safe to use unsafe decoder.
   187  		return mustDecodeNodeUnsafe(hash[:], node)
   188  	}
   189  	return expandNode(hash[:], n.node)
   190  }
   191  
   192  // forChilds invokes the callback for all the tracked children of this node,
   193  // both the implicit ones from inside the node as well as the explicit ones
   194  // from outside the node.
   195  func (n *cachedNode) forChilds(onChild func(hash common.Hash)) {
   196  	for child := range n.children {
   197  		onChild(child)
   198  	}
   199  	if _, ok := n.node.(rawNode); !ok {
   200  		forGatherChildren(n.node, onChild)
   201  	}
   202  }
   203  
   204  // forGatherChildren traverses the node hierarchy of a collapsed storage node and
   205  // invokes the callback for all the hashnode children.
   206  func forGatherChildren(n node, onChild func(hash common.Hash)) {
   207  	switch n := n.(type) {
   208  	case *rawShortNode:
   209  		forGatherChildren(n.Val, onChild)
   210  	case rawFullNode:
   211  		for i := 0; i < 16; i++ {
   212  			forGatherChildren(n[i], onChild)
   213  		}
   214  	case hashNode:
   215  		onChild(common.BytesToHash(n))
   216  	case valueNode, nil, rawNode:
   217  	default:
   218  		panic(fmt.Sprintf("unknown node type: %T", n))
   219  	}
   220  }
   221  
   222  // simplifyNode traverses the hierarchy of an expanded memory node and discards
   223  // all the internal caches, returning a node that only contains the raw data.
   224  func simplifyNode(n node) node {
   225  	switch n := n.(type) {
   226  	case *shortNode:
   227  		// Short nodes discard the flags and cascade
   228  		return &rawShortNode{Key: n.Key, Val: simplifyNode(n.Val)}
   229  
   230  	case *fullNode:
   231  		// Full nodes discard the flags and cascade
   232  		node := rawFullNode(n.Children)
   233  		for i := 0; i < len(node); i++ {
   234  			if node[i] != nil {
   235  				node[i] = simplifyNode(node[i])
   236  			}
   237  		}
   238  		return node
   239  
   240  	case valueNode, hashNode, rawNode:
   241  		return n
   242  
   243  	default:
   244  		panic(fmt.Sprintf("unknown node type: %T", n))
   245  	}
   246  }
   247  
   248  // expandNode traverses the node hierarchy of a collapsed storage node and converts
   249  // all fields and keys into expanded memory form.
   250  func expandNode(hash hashNode, n node) node {
   251  	switch n := n.(type) {
   252  	case *rawShortNode:
   253  		// Short nodes need key and child expansion
   254  		return &shortNode{
   255  			Key: compactToHex(n.Key),
   256  			Val: expandNode(nil, n.Val),
   257  			flags: nodeFlag{
   258  				hash: hash,
   259  			},
   260  		}
   261  
   262  	case rawFullNode:
   263  		// Full nodes need child expansion
   264  		node := &fullNode{
   265  			flags: nodeFlag{
   266  				hash: hash,
   267  			},
   268  		}
   269  		for i := 0; i < len(node.Children); i++ {
   270  			if n[i] != nil {
   271  				node.Children[i] = expandNode(nil, n[i])
   272  			}
   273  		}
   274  		return node
   275  
   276  	case valueNode, hashNode:
   277  		return n
   278  
   279  	default:
   280  		panic(fmt.Sprintf("unknown node type: %T", n))
   281  	}
   282  }
   283  
   284  // Config defines all necessary options for database.
   285  type Config struct {
   286  	Cache       int    // Memory allowance (MB) to use for caching trie nodes in memory
   287  	Preimages   bool   // Flag whether the preimage of trie key is recorded
   288  	Journal     string // File location to load trie clean cache from
   289  	StatsPrefix string // Prefix for cache stats (disabled if empty)
   290  }
   291  
   292  // NewDatabase creates a new trie database to store ephemeral trie content before
   293  // its written out to disk or garbage collected. No read cache is created, so all
   294  // data retrievals will hit the underlying disk database.
   295  func NewDatabase(diskdb ethdb.KeyValueStore) *Database {
   296  	return NewDatabaseWithConfig(diskdb, nil)
   297  }
   298  
   299  // NewDatabaseWithConfig creates a new trie database to store ephemeral trie content
   300  // before its written out to disk or garbage collected. It also acts as a read cache
   301  // for nodes loaded from disk.
   302  func NewDatabaseWithConfig(diskdb ethdb.KeyValueStore, config *Config) *Database {
   303  	var cleans *utils.MeteredCache
   304  	if config != nil && config.Cache > 0 {
   305  		cleans = utils.NewMeteredCache(config.Cache*1024*1024, config.Journal, config.StatsPrefix, cacheStatsUpdateFrequency)
   306  	}
   307  	var preimage *preimageStore
   308  	if config != nil && config.Preimages {
   309  		preimage = newPreimageStore(diskdb)
   310  	}
   311  	db := &Database{
   312  		diskdb: diskdb,
   313  		cleans: cleans,
   314  		dirties: map[common.Hash]*cachedNode{{}: {
   315  			children: make(map[common.Hash]uint16),
   316  		}},
   317  		preimages: preimage,
   318  	}
   319  	return db
   320  }
   321  
   322  // DiskDB retrieves the persistent storage backing the trie database.
   323  func (db *Database) DiskDB() ethdb.KeyValueStore {
   324  	return db.diskdb
   325  }
   326  
   327  // insert inserts a simplified trie node into the memory database.
   328  // All nodes inserted by this function will be reference tracked
   329  // and in theory should only used for **trie nodes** insertion.
   330  func (db *Database) insert(hash common.Hash, size int, node node) {
   331  	// If the node's already cached, skip
   332  	if _, ok := db.dirties[hash]; ok {
   333  		return
   334  	}
   335  	memcacheDirtyWriteMeter.Mark(int64(size))
   336  
   337  	// Create the cached entry for this node
   338  	entry := &cachedNode{
   339  		node:      node,
   340  		size:      uint16(size),
   341  		flushPrev: db.newest,
   342  	}
   343  	entry.forChilds(func(child common.Hash) {
   344  		if c := db.dirties[child]; c != nil {
   345  			c.parents++
   346  		}
   347  	})
   348  	db.dirties[hash] = entry
   349  
   350  	// Update the flush-list endpoints
   351  	if db.oldest == (common.Hash{}) {
   352  		db.oldest, db.newest = hash, hash
   353  	} else {
   354  		db.dirties[db.newest].flushNext, db.newest = hash, hash
   355  	}
   356  	db.dirtiesSize += common.StorageSize(common.HashLength + entry.size)
   357  }
   358  
   359  // RawNode retrieves an encoded cached trie node from memory. If it cannot be found
   360  // cached, the method queries the persistent database for the content. This function
   361  // will not return the metaroot.
   362  func (db *Database) RawNode(h common.Hash) ([]byte, error) {
   363  	if h == (common.Hash{}) {
   364  		return nil, errors.New("not found")
   365  	}
   366  	enc, cn, err := db.node(h)
   367  	if err != nil {
   368  		return nil, err
   369  	}
   370  	if len(enc) > 0 {
   371  		return enc, nil
   372  	}
   373  	return cn.rlp(), nil
   374  }
   375  
   376  // EncodedNode returns a formatted [node] when given a node hash. If no node
   377  // exists, nil is returned. This function will return the metaroot.
   378  func (db *Database) EncodedNode(h common.Hash) node {
   379  	enc, cn, err := db.node(h)
   380  	if err != nil {
   381  		return nil
   382  	}
   383  	if len(enc) > 0 {
   384  		return mustDecodeNode(h[:], enc)
   385  	}
   386  	return cn.obj(h)
   387  }
   388  
   389  // node retrieves an encoded cached trie node from memory. If it cannot be found
   390  // cached, the method queries the persistent database for the content.
   391  //
   392  // We do not return a single node representation to avoid useless
   393  // encoding/decoding depending on the caller.
   394  func (db *Database) node(hash common.Hash) ([]byte, *cachedNode, error) {
   395  	// Retrieve the node from the clean cache if available
   396  	if db.cleans != nil {
   397  		k := hash[:]
   398  		enc, found := db.cleans.HasGet(nil, k)
   399  		if found {
   400  			if len(enc) > 0 {
   401  				memcacheCleanHitMeter.Mark(1)
   402  				memcacheCleanReadMeter.Mark(int64(len(enc)))
   403  				return enc, nil, nil
   404  			} else {
   405  				// Delete anything from cache that may have been added incorrectly
   406  				//
   407  				// This will prevent a panic as callers of this function assume the raw
   408  				// or cached node is populated.
   409  				log.Debug("removing empty value found in cleans cache", "k", k)
   410  				db.cleans.Del(k)
   411  			}
   412  		}
   413  	}
   414  	// Retrieve the node from the dirty cache if available
   415  	db.lock.RLock()
   416  	dirty := db.dirties[hash]
   417  	db.lock.RUnlock()
   418  
   419  	if dirty != nil {
   420  		memcacheDirtyHitMeter.Mark(1)
   421  		memcacheDirtyReadMeter.Mark(int64(dirty.size))
   422  		return nil, dirty, nil
   423  	}
   424  	memcacheDirtyMissMeter.Mark(1)
   425  
   426  	// Content unavailable in memory, attempt to retrieve from disk
   427  	enc := rawdb.ReadTrieNode(db.diskdb, hash)
   428  	if len(enc) > 0 {
   429  		if db.cleans != nil {
   430  			db.cleans.Set(hash[:], enc)
   431  			memcacheCleanMissMeter.Mark(1)
   432  			memcacheCleanWriteMeter.Mark(int64(len(enc)))
   433  		}
   434  		return enc, nil, nil
   435  	}
   436  	return nil, nil, errors.New("not found")
   437  }
   438  
   439  // Nodes retrieves the hashes of all the nodes cached within the memory database.
   440  // This method is extremely expensive and should only be used to validate internal
   441  // states in test code.
   442  func (db *Database) Nodes() []common.Hash {
   443  	db.lock.RLock()
   444  	defer db.lock.RUnlock()
   445  
   446  	var hashes = make([]common.Hash, 0, len(db.dirties))
   447  	for hash := range db.dirties {
   448  		if hash != (common.Hash{}) { // Special case for "root" references/nodes
   449  			hashes = append(hashes, hash)
   450  		}
   451  	}
   452  	return hashes
   453  }
   454  
   455  // Reference adds a new reference from a parent node to a child node.
   456  // This function is used to add reference between internal trie node
   457  // and external node(e.g. storage trie root), all internal trie nodes
   458  // are referenced together by database itself.
   459  func (db *Database) Reference(child common.Hash, parent common.Hash) {
   460  	db.lock.Lock()
   461  	defer db.lock.Unlock()
   462  
   463  	db.reference(child, parent)
   464  }
   465  
   466  func (db *Database) reference(child common.Hash, parent common.Hash) {
   467  	// If the node does not exist, it's a node pulled from disk, skip
   468  	node, ok := db.dirties[child]
   469  	if !ok {
   470  		return
   471  	}
   472  	// If the reference already exists, only duplicate for roots
   473  	if db.dirties[parent].children == nil {
   474  		db.dirties[parent].children = make(map[common.Hash]uint16)
   475  		db.childrenSize += cachedNodeChildrenSize
   476  	} else if _, ok = db.dirties[parent].children[child]; ok && parent != (common.Hash{}) {
   477  		return
   478  	}
   479  	node.parents++
   480  	db.dirties[parent].children[child]++
   481  	if db.dirties[parent].children[child] == 1 {
   482  		db.childrenSize += common.HashLength + 2 // uint16 counter
   483  	}
   484  }
   485  
   486  // Dereference removes an existing reference from a root node.
   487  func (db *Database) Dereference(root common.Hash) {
   488  	// Sanity check to ensure that the meta-root is not removed
   489  	if root == (common.Hash{}) {
   490  		log.Error("Attempted to dereference the trie cache meta root")
   491  		return
   492  	}
   493  
   494  	db.lock.Lock()
   495  	defer db.lock.Unlock()
   496  	nodes, storage, start := len(db.dirties), db.dirtiesSize, time.Now()
   497  	db.dereference(root, common.Hash{})
   498  
   499  	db.gcnodes += uint64(nodes - len(db.dirties))
   500  	db.gcsize += storage - db.dirtiesSize
   501  	db.gctime += time.Since(start)
   502  
   503  	memcacheDirtySizeGauge.Update(float64(db.dirtiesSize))
   504  	memcacheDirtyChildSizeGauge.Update(float64(db.childrenSize))
   505  	memcacheDirtyNodesGauge.Update(int64(len(db.dirties)))
   506  
   507  	memcacheGCTimeTimer.Update(time.Since(start))
   508  	memcacheGCSizeMeter.Mark(int64(storage - db.dirtiesSize))
   509  	memcacheGCNodesMeter.Mark(int64(nodes - len(db.dirties)))
   510  
   511  	log.Debug("Dereferenced trie from memory database", "nodes", nodes-len(db.dirties), "size", storage-db.dirtiesSize, "time", time.Since(start),
   512  		"gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize)
   513  }
   514  
   515  // dereference is the private locked version of Dereference.
   516  func (db *Database) dereference(child common.Hash, parent common.Hash) {
   517  	// Dereference the parent-child
   518  	node := db.dirties[parent]
   519  
   520  	if node.children != nil && node.children[child] > 0 {
   521  		node.children[child]--
   522  		if node.children[child] == 0 {
   523  			delete(node.children, child)
   524  			db.childrenSize -= (common.HashLength + 2) // uint16 counter
   525  		}
   526  	}
   527  	// If the child does not exist, it's a previously committed node.
   528  	node, ok := db.dirties[child]
   529  	if !ok {
   530  		return
   531  	}
   532  	// If there are no more references to the child, delete it and cascade
   533  	if node.parents > 0 {
   534  		// This is a special cornercase where a node loaded from disk (i.e. not in the
   535  		// memcache any more) gets reinjected as a new node (short node split into full,
   536  		// then reverted into short), causing a cached node to have no parents. That is
   537  		// no problem in itself, but don't make maxint parents out of it.
   538  		node.parents--
   539  	}
   540  	if node.parents == 0 {
   541  		// Remove the node from the flush-list
   542  		switch child {
   543  		case db.oldest:
   544  			db.oldest = node.flushNext
   545  			db.dirties[node.flushNext].flushPrev = common.Hash{}
   546  		case db.newest:
   547  			db.newest = node.flushPrev
   548  			db.dirties[node.flushPrev].flushNext = common.Hash{}
   549  		default:
   550  			db.dirties[node.flushPrev].flushNext = node.flushNext
   551  			db.dirties[node.flushNext].flushPrev = node.flushPrev
   552  		}
   553  		// Dereference all children and delete the node
   554  		node.forChilds(func(hash common.Hash) {
   555  			db.dereference(hash, child)
   556  		})
   557  		delete(db.dirties, child)
   558  		db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size))
   559  		if node.children != nil {
   560  			db.childrenSize -= cachedNodeChildrenSize
   561  		}
   562  	}
   563  }
   564  
   565  // flushItem is used to track all [cachedNode]s that must be written to disk
   566  type flushItem struct {
   567  	hash common.Hash
   568  	node *cachedNode
   569  	rlp  []byte
   570  }
   571  
   572  // writeFlushItems writes all items in [toFlush] to disk in batches of
   573  // [ethdb.IdealBatchSize]. This function does not access any variables inside
   574  // of [Database] and does not need to be synchronized.
   575  func (db *Database) writeFlushItems(toFlush []*flushItem) error {
   576  	batch := db.diskdb.NewBatch()
   577  	for _, item := range toFlush {
   578  		rlp := item.node.rlp()
   579  		item.rlp = rlp
   580  		rawdb.WriteTrieNode(batch, item.hash, rlp)
   581  
   582  		// If we exceeded the ideal batch size, commit and reset
   583  		if batch.ValueSize() >= ethdb.IdealBatchSize {
   584  			if err := batch.Write(); err != nil {
   585  				log.Error("Failed to write flush list to disk", "err", err)
   586  				return err
   587  			}
   588  			batch.Reset()
   589  		}
   590  	}
   591  
   592  	// Flush out any remainder data from the last batch
   593  	if err := batch.Write(); err != nil {
   594  		log.Error("Failed to write flush list to disk", "err", err)
   595  		return err
   596  	}
   597  
   598  	return nil
   599  }
   600  
   601  // Cap iteratively flushes old but still referenced trie nodes until the total
   602  // memory usage goes below the given threshold.
   603  func (db *Database) Cap(limit common.StorageSize) error {
   604  	start := time.Now()
   605  	// If the preimage cache got large enough, push to disk. If it's still small
   606  	// leave for later to deduplicate writes.
   607  	if db.preimages != nil {
   608  		if err := db.preimages.commit(false); err != nil {
   609  			return err
   610  		}
   611  	}
   612  
   613  	// It is important that outside code doesn't see an inconsistent state
   614  	// (referenced data removed from memory cache during commit but not yet
   615  	// in persistent storage). This is ensured by only uncaching existing
   616  	// data when the database write finalizes.
   617  	db.lock.RLock()
   618  	lockStart := time.Now()
   619  	nodes, storage := len(db.dirties), db.dirtiesSize
   620  
   621  	// db.dirtiesSize only contains the useful data in the cache, but when reporting
   622  	// the total memory consumption, the maintenance metadata is also needed to be
   623  	// counted.
   624  	pendingSize := db.dirtiesSize + common.StorageSize((len(db.dirties)-1)*cachedNodeSize)
   625  	pendingSize += db.childrenSize - common.StorageSize(len(db.dirties[common.Hash{}].children)*(common.HashLength+2))
   626  	if pendingSize <= limit {
   627  		db.lock.RUnlock()
   628  		return nil
   629  	}
   630  
   631  	// Keep removing nodes from the flush-list until we're below allowance
   632  	toFlush := make([]*flushItem, 0, 128)
   633  	oldest := db.oldest
   634  	for pendingSize > limit && oldest != (common.Hash{}) {
   635  		// Fetch the oldest referenced node and push into the batch
   636  		node := db.dirties[oldest]
   637  		toFlush = append(toFlush, &flushItem{oldest, node, nil})
   638  
   639  		// Iterate to the next flush item, or abort if the size cap was achieved. Size
   640  		// is the total size, including the useful cached data (hash -> blob), the
   641  		// cache item metadata, as well as external children mappings.
   642  		pendingSize -= common.StorageSize(common.HashLength + int(node.size) + cachedNodeSize)
   643  		if node.children != nil {
   644  			pendingSize -= common.StorageSize(cachedNodeChildrenSize + len(node.children)*(common.HashLength+2))
   645  		}
   646  		oldest = node.flushNext
   647  	}
   648  	db.lock.RUnlock()
   649  	lockTime := time.Since(lockStart)
   650  
   651  	// Write nodes to disk
   652  	if err := db.writeFlushItems(toFlush); err != nil {
   653  		return err
   654  	}
   655  
   656  	// Flush all written items from dirites
   657  	//
   658  	// NOTE: The order of the flushlist may have changed while the lock was not
   659  	// held, so we cannot just iterate to [oldest].
   660  	db.lock.Lock()
   661  	defer db.lock.Unlock()
   662  	lockStart = time.Now()
   663  	for _, item := range toFlush {
   664  		// [item.rlp] is populated in [writeFlushItems]
   665  		db.removeFromDirties(item.hash, item.rlp)
   666  	}
   667  	db.flushnodes += uint64(nodes - len(db.dirties))
   668  	db.flushsize += storage - db.dirtiesSize
   669  	db.flushtime += time.Since(start)
   670  
   671  	memcacheDirtySizeGauge.Update(float64(db.dirtiesSize))
   672  	memcacheDirtyChildSizeGauge.Update(float64(db.childrenSize))
   673  	memcacheDirtyNodesGauge.Update(int64(len(db.dirties)))
   674  
   675  	memcacheFlushMeter.Mark(1)
   676  	memcacheFlushTimeTimer.Update(time.Since(start))
   677  	memcacheFlushLockTimeTimer.Update(lockTime + time.Since(lockStart))
   678  	memcacheFlushSizeMeter.Mark(int64(storage - db.dirtiesSize))
   679  	memcacheFlushNodesMeter.Mark(int64(nodes - len(db.dirties)))
   680  
   681  	log.Debug("Persisted nodes from memory database", "nodes", nodes-len(db.dirties), "size", storage-db.dirtiesSize, "time", time.Since(start),
   682  		"flushnodes", db.flushnodes, "flushsize", db.flushsize, "flushtime", db.flushtime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize)
   683  	return nil
   684  }
   685  
   686  // Commit iterates over all the children of a particular node, writes them out
   687  // to disk, forcefully tearing down all references in both directions. As a side
   688  // effect, all pre-images accumulated up to this point are also written.
   689  func (db *Database) Commit(node common.Hash, report bool, callback func(common.Hash)) error {
   690  	start := time.Now()
   691  	if db.preimages != nil {
   692  		if err := db.preimages.commit(true); err != nil {
   693  			return err
   694  		}
   695  	}
   696  
   697  	// It is important that outside code doesn't see an inconsistent state (referenced
   698  	// data removed from memory cache during commit but not yet in persistent storage).
   699  	// This is ensured by only uncaching existing data when the database write finalizes.
   700  	db.lock.RLock()
   701  	lockStart := time.Now()
   702  	nodes, storage := len(db.dirties), db.dirtiesSize
   703  	toFlush, err := db.commit(node, make([]*flushItem, 0, 128), callback)
   704  	if err != nil {
   705  		db.lock.RUnlock()
   706  		log.Error("Failed to commit trie from trie database", "err", err)
   707  		return err
   708  	}
   709  	db.lock.RUnlock()
   710  	lockTime := time.Since(lockStart)
   711  
   712  	// Write nodes to disk
   713  	if err := db.writeFlushItems(toFlush); err != nil {
   714  		return err
   715  	}
   716  
   717  	// Flush all written items from dirites
   718  	db.lock.Lock()
   719  	defer db.lock.Unlock()
   720  	lockStart = time.Now()
   721  	for _, item := range toFlush {
   722  		// [item.rlp] is populated in [writeFlushItems]
   723  		db.removeFromDirties(item.hash, item.rlp)
   724  	}
   725  
   726  	memcacheDirtySizeGauge.Update(float64(db.dirtiesSize))
   727  	memcacheDirtyChildSizeGauge.Update(float64(db.childrenSize))
   728  	memcacheDirtyNodesGauge.Update(int64(len(db.dirties)))
   729  
   730  	memcacheCommitMeter.Mark(1)
   731  	memcacheCommitTimeTimer.Update(time.Since(start))
   732  	memcacheCommitLockTimeTimer.Update(lockTime + time.Since(lockStart))
   733  	memcacheCommitSizeMeter.Mark(int64(storage - db.dirtiesSize))
   734  	memcacheCommitNodesMeter.Mark(int64(nodes - len(db.dirties)))
   735  
   736  	logger := log.Info
   737  	if !report {
   738  		logger = log.Debug
   739  	}
   740  	logger("Persisted trie from memory database", "nodes", nodes-len(db.dirties)+int(db.flushnodes), "size", storage-db.dirtiesSize+db.flushsize, "time", time.Since(start)+db.flushtime,
   741  		"gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize)
   742  
   743  	// Reset the garbage collection statistics
   744  	db.gcnodes, db.gcsize, db.gctime = 0, 0, 0
   745  	db.flushnodes, db.flushsize, db.flushtime = 0, 0, 0
   746  	return nil
   747  }
   748  
   749  // commit is the private locked version of Commit. This function does not
   750  // mutate any data, rather it collects all data that should be committed.
   751  //
   752  // [callback] will be invoked as soon as it is determined a trie node will be
   753  // flushed to disk (before it is actually written).
   754  func (db *Database) commit(hash common.Hash, toFlush []*flushItem, callback func(common.Hash)) ([]*flushItem, error) {
   755  	// If the node does not exist, it's a previously committed node
   756  	node, ok := db.dirties[hash]
   757  	if !ok {
   758  		return toFlush, nil
   759  	}
   760  	var err error
   761  	node.forChilds(func(child common.Hash) {
   762  		if err == nil {
   763  			toFlush, err = db.commit(child, toFlush, callback)
   764  		}
   765  	})
   766  	if err != nil {
   767  		return nil, err
   768  	}
   769  	// By processing the children of each node before the node itself, we ensure
   770  	// that children are committed before their parents (an invariant of this
   771  	// package).
   772  	toFlush = append(toFlush, &flushItem{hash, node, nil})
   773  	if callback != nil {
   774  		callback(hash)
   775  	}
   776  	return toFlush, nil
   777  }
   778  
   779  // removeFromDirties is invoked after database writes and implements dirty data uncaching.
   780  //
   781  // This is the post-processing step of a commit operation where the already persisted trie is
   782  // removed from the dirty cache and moved into the clean cache. The reason behind
   783  // the two-phase commit is to ensure data availability while moving from memory
   784  // to disk.
   785  //
   786  // It is assumed the caller holds the [dirtiesLock] when this function is
   787  // called.
   788  func (db *Database) removeFromDirties(hash common.Hash, rlp []byte) {
   789  	// If the node does not exist, we're done on this path. This could happen if
   790  	// nodes are capped to disk while another thread is committing those same
   791  	// nodes.
   792  	node, ok := db.dirties[hash]
   793  	if !ok {
   794  		return
   795  	}
   796  	// Node still exists, remove it from the flush-list
   797  	switch hash {
   798  	case db.oldest:
   799  		db.oldest = node.flushNext
   800  		db.dirties[node.flushNext].flushPrev = common.Hash{}
   801  	case db.newest:
   802  		db.newest = node.flushPrev
   803  		db.dirties[node.flushPrev].flushNext = common.Hash{}
   804  	default:
   805  		db.dirties[node.flushPrev].flushNext = node.flushNext
   806  		db.dirties[node.flushNext].flushPrev = node.flushPrev
   807  	}
   808  	// Remove the node from the dirty cache
   809  	delete(db.dirties, hash)
   810  	db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size))
   811  	if node.children != nil {
   812  		db.childrenSize -= common.StorageSize(cachedNodeChildrenSize + len(node.children)*(common.HashLength+2))
   813  	}
   814  	// Move the flushed node into the clean cache to prevent insta-reloads
   815  	if db.cleans != nil {
   816  		db.cleans.Set(hash[:], rlp)
   817  		memcacheCleanWriteMeter.Mark(int64(len(rlp)))
   818  	}
   819  }
   820  
   821  // Update inserts the dirty nodes in provided nodeset into database and
   822  // links the account trie with multiple storage tries if necessary.
   823  func (db *Database) Update(nodes *MergedNodeSet) error {
   824  	db.lock.Lock()
   825  	defer db.lock.Unlock()
   826  
   827  	return db.update(nodes)
   828  }
   829  
   830  // UpdateAndReferenceRoot inserts the dirty nodes in provided nodeset into
   831  // database and links the account trie with multiple storage tries if necessary,
   832  // then adds a reference [from] root to the metaroot while holding the db's lock.
   833  func (db *Database) UpdateAndReferenceRoot(nodes *MergedNodeSet, root common.Hash) error {
   834  	db.lock.Lock()
   835  	defer db.lock.Unlock()
   836  
   837  	if err := db.update(nodes); err != nil {
   838  		return err
   839  	}
   840  	db.reference(root, common.Hash{})
   841  	return nil
   842  }
   843  
   844  func (db *Database) update(nodes *MergedNodeSet) error {
   845  	// Insert dirty nodes into the database. In the same tree, it must be
   846  	// ensured that children are inserted first, then parent so that children
   847  	// can be linked with their parent correctly.
   848  	//
   849  	// Note, the storage tries must be flushed before the account trie to
   850  	// retain the invariant that children go into the dirty cache first.
   851  	var order []common.Hash
   852  	for owner := range nodes.sets {
   853  		if owner == (common.Hash{}) {
   854  			continue
   855  		}
   856  		order = append(order, owner)
   857  	}
   858  	if _, ok := nodes.sets[common.Hash{}]; ok {
   859  		order = append(order, common.Hash{})
   860  	}
   861  	for _, owner := range order {
   862  		subset := nodes.sets[owner]
   863  		for _, path := range subset.paths {
   864  			n, ok := subset.nodes[path]
   865  			if !ok {
   866  				return fmt.Errorf("missing node %x %v", owner, path)
   867  			}
   868  			db.insert(n.hash, int(n.size), n.node)
   869  		}
   870  	}
   871  	// Link up the account trie and storage trie if the node points
   872  	// to an account trie leaf.
   873  	if set, present := nodes.sets[common.Hash{}]; present {
   874  		for _, n := range set.leaves {
   875  			var account types.StateAccount
   876  			if err := rlp.DecodeBytes(n.blob, &account); err != nil {
   877  				return err
   878  			}
   879  			if account.Root != emptyRoot {
   880  				db.reference(account.Root, n.parent)
   881  			}
   882  		}
   883  	}
   884  	return nil
   885  }
   886  
   887  // Size returns the current storage size of the memory cache in front of the
   888  // persistent database layer.
   889  func (db *Database) Size() (common.StorageSize, common.StorageSize) {
   890  	// db.dirtiesSize only contains the useful data in the cache, but when reporting
   891  	// the total memory consumption, the maintenance metadata is also needed to be
   892  	// counted.
   893  	db.lock.RLock()
   894  	defer db.lock.RUnlock()
   895  	var metadataSize = common.StorageSize((len(db.dirties) - 1) * cachedNodeSize)
   896  	var metarootRefs = common.StorageSize(len(db.dirties[common.Hash{}].children) * (common.HashLength + 2))
   897  	var preimageSize common.StorageSize
   898  	if db.preimages != nil {
   899  		preimageSize = db.preimages.size()
   900  	}
   901  	return db.dirtiesSize + db.childrenSize + metadataSize - metarootRefs, preimageSize
   902  }
   903  
   904  // CommitPreimages flushes the dangling preimages to disk. It is meant to be
   905  // called when closing the blockchain object, so that preimages are persisted
   906  // to the database.
   907  func (db *Database) CommitPreimages() error {
   908  	db.lock.Lock()
   909  	defer db.lock.Unlock()
   910  
   911  	if db.preimages == nil {
   912  		return nil
   913  	}
   914  	return db.preimages.commit(true)
   915  }
   916  
   917  // saveCache saves clean state cache to given directory path
   918  // using specified CPU cores.
   919  func (db *Database) saveCache(dir string, threads int) error {
   920  	if db.cleans == nil {
   921  		return nil
   922  	}
   923  	log.Info("Writing clean trie cache to disk", "path", dir, "threads", threads)
   924  
   925  	start := time.Now()
   926  	err := db.cleans.SaveToFileConcurrent(dir, threads)
   927  	if err != nil {
   928  		log.Error("Failed to persist clean trie cache", "error", err)
   929  		return err
   930  	}
   931  	log.Info("Persisted the clean trie cache", "path", dir, "elapsed", common.PrettyDuration(time.Since(start)))
   932  	return nil
   933  }
   934  
   935  // SaveCachePeriodically atomically saves fast cache data to the given dir with
   936  // the specified interval. All dump operation will only use a single CPU core.
   937  func (db *Database) SaveCachePeriodically(dir string, interval time.Duration, stopCh <-chan struct{}) {
   938  	ticker := time.NewTicker(interval)
   939  	defer ticker.Stop()
   940  
   941  	for {
   942  		select {
   943  		case <-ticker.C:
   944  			db.saveCache(dir, 1)
   945  		case <-stopCh:
   946  			return
   947  		}
   948  	}
   949  }