github.com/googgoog/go-ethereum@v1.9.7/trie/database.go (about)

     1  // Copyright 2018 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package trie
    18  
    19  import (
    20  	"encoding/binary"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"reflect"
    25  	"sync"
    26  	"time"
    27  
    28  	"github.com/allegro/bigcache"
    29  	"github.com/ethereum/go-ethereum/common"
    30  	"github.com/ethereum/go-ethereum/ethdb"
    31  	"github.com/ethereum/go-ethereum/log"
    32  	"github.com/ethereum/go-ethereum/metrics"
    33  	"github.com/ethereum/go-ethereum/rlp"
    34  )
    35  
    36  var (
    37  	memcacheCleanHitMeter   = metrics.NewRegisteredMeter("trie/memcache/clean/hit", nil)
    38  	memcacheCleanMissMeter  = metrics.NewRegisteredMeter("trie/memcache/clean/miss", nil)
    39  	memcacheCleanReadMeter  = metrics.NewRegisteredMeter("trie/memcache/clean/read", nil)
    40  	memcacheCleanWriteMeter = metrics.NewRegisteredMeter("trie/memcache/clean/write", nil)
    41  
    42  	memcacheFlushTimeTimer  = metrics.NewRegisteredResettingTimer("trie/memcache/flush/time", nil)
    43  	memcacheFlushNodesMeter = metrics.NewRegisteredMeter("trie/memcache/flush/nodes", nil)
    44  	memcacheFlushSizeMeter  = metrics.NewRegisteredMeter("trie/memcache/flush/size", nil)
    45  
    46  	memcacheGCTimeTimer  = metrics.NewRegisteredResettingTimer("trie/memcache/gc/time", nil)
    47  	memcacheGCNodesMeter = metrics.NewRegisteredMeter("trie/memcache/gc/nodes", nil)
    48  	memcacheGCSizeMeter  = metrics.NewRegisteredMeter("trie/memcache/gc/size", nil)
    49  
    50  	memcacheCommitTimeTimer  = metrics.NewRegisteredResettingTimer("trie/memcache/commit/time", nil)
    51  	memcacheCommitNodesMeter = metrics.NewRegisteredMeter("trie/memcache/commit/nodes", nil)
    52  	memcacheCommitSizeMeter  = metrics.NewRegisteredMeter("trie/memcache/commit/size", nil)
    53  )
    54  
    55  // secureKeyPrefix is the database key prefix used to store trie node preimages.
    56  var secureKeyPrefix = []byte("secure-key-")
    57  
    58  // secureKeyLength is the length of the above prefix + 32byte hash.
    59  const secureKeyLength = 11 + 32
    60  
    61  // Database is an intermediate write layer between the trie data structures and
    62  // the disk database. The aim is to accumulate trie writes in-memory and only
    63  // periodically flush a couple tries to disk, garbage collecting the remainder.
    64  //
    65  // Note, the trie Database is **not** thread safe in its mutations, but it **is**
    66  // thread safe in providing individual, independent node access. The rationale
    67  // behind this split design is to provide read access to RPC handlers and sync
    68  // servers even while the trie is executing expensive garbage collection.
    69  type Database struct {
    70  	diskdb ethdb.KeyValueStore // Persistent storage for matured trie nodes
    71  
    72  	cleans  *bigcache.BigCache          // GC friendly memory cache of clean node RLPs
    73  	dirties map[common.Hash]*cachedNode // Data and references relationships of dirty nodes
    74  	oldest  common.Hash                 // Oldest tracked node, flush-list head
    75  	newest  common.Hash                 // Newest tracked node, flush-list tail
    76  
    77  	preimages map[common.Hash][]byte // Preimages of nodes from the secure trie
    78  	seckeybuf [secureKeyLength]byte  // Ephemeral buffer for calculating preimage keys
    79  
    80  	gctime  time.Duration      // Time spent on garbage collection since last commit
    81  	gcnodes uint64             // Nodes garbage collected since last commit
    82  	gcsize  common.StorageSize // Data storage garbage collected since last commit
    83  
    84  	flushtime  time.Duration      // Time spent on data flushing since last commit
    85  	flushnodes uint64             // Nodes flushed since last commit
    86  	flushsize  common.StorageSize // Data storage flushed since last commit
    87  
    88  	dirtiesSize   common.StorageSize // Storage size of the dirty node cache (exc. metadata)
    89  	childrenSize  common.StorageSize // Storage size of the external children tracking
    90  	preimagesSize common.StorageSize // Storage size of the preimages cache
    91  
    92  	lock sync.RWMutex
    93  }
    94  
    95  // rawNode is a simple binary blob used to differentiate between collapsed trie
    96  // nodes and already encoded RLP binary blobs (while at the same time store them
    97  // in the same cache fields).
    98  type rawNode []byte
    99  
   100  func (n rawNode) canUnload(uint16, uint16) bool { panic("this should never end up in a live trie") }
   101  func (n rawNode) cache() (hashNode, bool)       { panic("this should never end up in a live trie") }
   102  func (n rawNode) fstring(ind string) string     { panic("this should never end up in a live trie") }
   103  
   104  // rawFullNode represents only the useful data content of a full node, with the
   105  // caches and flags stripped out to minimize its data storage. This type honors
   106  // the same RLP encoding as the original parent.
   107  type rawFullNode [17]node
   108  
   109  func (n rawFullNode) canUnload(uint16, uint16) bool { panic("this should never end up in a live trie") }
   110  func (n rawFullNode) cache() (hashNode, bool)       { panic("this should never end up in a live trie") }
   111  func (n rawFullNode) fstring(ind string) string     { panic("this should never end up in a live trie") }
   112  
   113  func (n rawFullNode) EncodeRLP(w io.Writer) error {
   114  	var nodes [17]node
   115  
   116  	for i, child := range n {
   117  		if child != nil {
   118  			nodes[i] = child
   119  		} else {
   120  			nodes[i] = nilValueNode
   121  		}
   122  	}
   123  	return rlp.Encode(w, nodes)
   124  }
   125  
   126  // rawShortNode represents only the useful data content of a short node, with the
   127  // caches and flags stripped out to minimize its data storage. This type honors
   128  // the same RLP encoding as the original parent.
   129  type rawShortNode struct {
   130  	Key []byte
   131  	Val node
   132  }
   133  
   134  func (n rawShortNode) canUnload(uint16, uint16) bool { panic("this should never end up in a live trie") }
   135  func (n rawShortNode) cache() (hashNode, bool)       { panic("this should never end up in a live trie") }
   136  func (n rawShortNode) fstring(ind string) string     { panic("this should never end up in a live trie") }
   137  
   138  // cachedNode is all the information we know about a single cached node in the
   139  // memory database write layer.
   140  type cachedNode struct {
   141  	node node   // Cached collapsed trie node, or raw rlp data
   142  	size uint16 // Byte size of the useful cached data
   143  
   144  	parents  uint32                 // Number of live nodes referencing this one
   145  	children map[common.Hash]uint16 // External children referenced by this node
   146  
   147  	flushPrev common.Hash // Previous node in the flush-list
   148  	flushNext common.Hash // Next node in the flush-list
   149  }
   150  
   151  // cachedNodeSize is the raw size of a cachedNode data structure without any
   152  // node data included. It's an approximate size, but should be a lot better
   153  // than not counting them.
   154  var cachedNodeSize = int(reflect.TypeOf(cachedNode{}).Size())
   155  
   156  // cachedNodeChildrenSize is the raw size of an initialized but empty external
   157  // reference map.
   158  const cachedNodeChildrenSize = 48
   159  
   160  // rlp returns the raw rlp encoded blob of the cached node, either directly from
   161  // the cache, or by regenerating it from the collapsed node.
   162  func (n *cachedNode) rlp() []byte {
   163  	if node, ok := n.node.(rawNode); ok {
   164  		return node
   165  	}
   166  	blob, err := rlp.EncodeToBytes(n.node)
   167  	if err != nil {
   168  		panic(err)
   169  	}
   170  	return blob
   171  }
   172  
   173  // obj returns the decoded and expanded trie node, either directly from the cache,
   174  // or by regenerating it from the rlp encoded blob.
   175  func (n *cachedNode) obj(hash common.Hash) node {
   176  	if node, ok := n.node.(rawNode); ok {
   177  		return mustDecodeNode(hash[:], node)
   178  	}
   179  	return expandNode(hash[:], n.node)
   180  }
   181  
   182  // childs returns all the tracked children of this node, both the implicit ones
   183  // from inside the node as well as the explicit ones from outside the node.
   184  func (n *cachedNode) childs() []common.Hash {
   185  	children := make([]common.Hash, 0, 16)
   186  	for child := range n.children {
   187  		children = append(children, child)
   188  	}
   189  	if _, ok := n.node.(rawNode); !ok {
   190  		gatherChildren(n.node, &children)
   191  	}
   192  	return children
   193  }
   194  
   195  // gatherChildren traverses the node hierarchy of a collapsed storage node and
   196  // retrieves all the hashnode children.
   197  func gatherChildren(n node, children *[]common.Hash) {
   198  	switch n := n.(type) {
   199  	case *rawShortNode:
   200  		gatherChildren(n.Val, children)
   201  
   202  	case rawFullNode:
   203  		for i := 0; i < 16; i++ {
   204  			gatherChildren(n[i], children)
   205  		}
   206  	case hashNode:
   207  		*children = append(*children, common.BytesToHash(n))
   208  
   209  	case valueNode, nil:
   210  
   211  	default:
   212  		panic(fmt.Sprintf("unknown node type: %T", n))
   213  	}
   214  }
   215  
   216  // simplifyNode traverses the hierarchy of an expanded memory node and discards
   217  // all the internal caches, returning a node that only contains the raw data.
   218  func simplifyNode(n node) node {
   219  	switch n := n.(type) {
   220  	case *shortNode:
   221  		// Short nodes discard the flags and cascade
   222  		return &rawShortNode{Key: n.Key, Val: simplifyNode(n.Val)}
   223  
   224  	case *fullNode:
   225  		// Full nodes discard the flags and cascade
   226  		node := rawFullNode(n.Children)
   227  		for i := 0; i < len(node); i++ {
   228  			if node[i] != nil {
   229  				node[i] = simplifyNode(node[i])
   230  			}
   231  		}
   232  		return node
   233  
   234  	case valueNode, hashNode, rawNode:
   235  		return n
   236  
   237  	default:
   238  		panic(fmt.Sprintf("unknown node type: %T", n))
   239  	}
   240  }
   241  
   242  // expandNode traverses the node hierarchy of a collapsed storage node and converts
   243  // all fields and keys into expanded memory form.
   244  func expandNode(hash hashNode, n node) node {
   245  	switch n := n.(type) {
   246  	case *rawShortNode:
   247  		// Short nodes need key and child expansion
   248  		return &shortNode{
   249  			Key: compactToHex(n.Key),
   250  			Val: expandNode(nil, n.Val),
   251  			flags: nodeFlag{
   252  				hash: hash,
   253  			},
   254  		}
   255  
   256  	case rawFullNode:
   257  		// Full nodes need child expansion
   258  		node := &fullNode{
   259  			flags: nodeFlag{
   260  				hash: hash,
   261  			},
   262  		}
   263  		for i := 0; i < len(node.Children); i++ {
   264  			if n[i] != nil {
   265  				node.Children[i] = expandNode(nil, n[i])
   266  			}
   267  		}
   268  		return node
   269  
   270  	case valueNode, hashNode:
   271  		return n
   272  
   273  	default:
   274  		panic(fmt.Sprintf("unknown node type: %T", n))
   275  	}
   276  }
   277  
   278  // trienodeHasher is a struct to be used with BigCache, which uses a Hasher to
   279  // determine which shard to place an entry into. It's not a cryptographic hash,
   280  // just to provide a bit of anti-collision (default is FNV64a).
   281  //
   282  // Since trie keys are already hashes, we can just use the key directly to
   283  // map shard id.
   284  type trienodeHasher struct{}
   285  
   286  // Sum64 implements the bigcache.Hasher interface.
   287  func (t trienodeHasher) Sum64(key string) uint64 {
   288  	return binary.BigEndian.Uint64([]byte(key))
   289  }
   290  
   291  // NewDatabase creates a new trie database to store ephemeral trie content before
   292  // its written out to disk or garbage collected. No read cache is created, so all
   293  // data retrievals will hit the underlying disk database.
   294  func NewDatabase(diskdb ethdb.KeyValueStore) *Database {
   295  	return NewDatabaseWithCache(diskdb, 0)
   296  }
   297  
   298  // NewDatabaseWithCache creates a new trie database to store ephemeral trie content
   299  // before its written out to disk or garbage collected. It also acts as a read cache
   300  // for nodes loaded from disk.
   301  func NewDatabaseWithCache(diskdb ethdb.KeyValueStore, cache int) *Database {
   302  	var cleans *bigcache.BigCache
   303  	if cache > 0 {
   304  		cleans, _ = bigcache.NewBigCache(bigcache.Config{
   305  			Shards:             1024,
   306  			LifeWindow:         time.Hour,
   307  			MaxEntriesInWindow: cache * 1024,
   308  			MaxEntrySize:       512,
   309  			HardMaxCacheSize:   cache,
   310  			Hasher:             trienodeHasher{},
   311  		})
   312  	}
   313  	return &Database{
   314  		diskdb: diskdb,
   315  		cleans: cleans,
   316  		dirties: map[common.Hash]*cachedNode{{}: {
   317  			children: make(map[common.Hash]uint16),
   318  		}},
   319  		preimages: make(map[common.Hash][]byte),
   320  	}
   321  }
   322  
   323  // DiskDB retrieves the persistent storage backing the trie database.
   324  func (db *Database) DiskDB() ethdb.KeyValueReader {
   325  	return db.diskdb
   326  }
   327  
   328  // InsertBlob writes a new reference tracked blob to the memory database if it's
   329  // yet unknown. This method should only be used for non-trie nodes that require
   330  // reference counting, since trie nodes are garbage collected directly through
   331  // their embedded children.
   332  func (db *Database) InsertBlob(hash common.Hash, blob []byte) {
   333  	db.lock.Lock()
   334  	defer db.lock.Unlock()
   335  
   336  	db.insert(hash, blob, rawNode(blob))
   337  }
   338  
   339  // insert inserts a collapsed trie node into the memory database. This method is
   340  // a more generic version of InsertBlob, supporting both raw blob insertions as
   341  // well ex trie node insertions. The blob must always be specified to allow proper
   342  // size tracking.
   343  func (db *Database) insert(hash common.Hash, blob []byte, node node) {
   344  	// If the node's already cached, skip
   345  	if _, ok := db.dirties[hash]; ok {
   346  		return
   347  	}
   348  	// Create the cached entry for this node
   349  	entry := &cachedNode{
   350  		node:      simplifyNode(node),
   351  		size:      uint16(len(blob)),
   352  		flushPrev: db.newest,
   353  	}
   354  	for _, child := range entry.childs() {
   355  		if c := db.dirties[child]; c != nil {
   356  			c.parents++
   357  		}
   358  	}
   359  	db.dirties[hash] = entry
   360  
   361  	// Update the flush-list endpoints
   362  	if db.oldest == (common.Hash{}) {
   363  		db.oldest, db.newest = hash, hash
   364  	} else {
   365  		db.dirties[db.newest].flushNext, db.newest = hash, hash
   366  	}
   367  	db.dirtiesSize += common.StorageSize(common.HashLength + entry.size)
   368  }
   369  
   370  // insertPreimage writes a new trie node pre-image to the memory database if it's
   371  // yet unknown. The method will make a copy of the slice.
   372  //
   373  // Note, this method assumes that the database's lock is held!
   374  func (db *Database) insertPreimage(hash common.Hash, preimage []byte) {
   375  	if _, ok := db.preimages[hash]; ok {
   376  		return
   377  	}
   378  	db.preimages[hash] = common.CopyBytes(preimage)
   379  	db.preimagesSize += common.StorageSize(common.HashLength + len(preimage))
   380  }
   381  
   382  // node retrieves a cached trie node from memory, or returns nil if none can be
   383  // found in the memory cache.
   384  func (db *Database) node(hash common.Hash) node {
   385  	// Retrieve the node from the clean cache if available
   386  	if db.cleans != nil {
   387  		if enc, err := db.cleans.Get(string(hash[:])); err == nil && enc != nil {
   388  			memcacheCleanHitMeter.Mark(1)
   389  			memcacheCleanReadMeter.Mark(int64(len(enc)))
   390  			return mustDecodeNode(hash[:], enc)
   391  		}
   392  	}
   393  	// Retrieve the node from the dirty cache if available
   394  	db.lock.RLock()
   395  	dirty := db.dirties[hash]
   396  	db.lock.RUnlock()
   397  
   398  	if dirty != nil {
   399  		return dirty.obj(hash)
   400  	}
   401  	// Content unavailable in memory, attempt to retrieve from disk
   402  	enc, err := db.diskdb.Get(hash[:])
   403  	if err != nil || enc == nil {
   404  		return nil
   405  	}
   406  	if db.cleans != nil {
   407  		db.cleans.Set(string(hash[:]), enc)
   408  		memcacheCleanMissMeter.Mark(1)
   409  		memcacheCleanWriteMeter.Mark(int64(len(enc)))
   410  	}
   411  	return mustDecodeNode(hash[:], enc)
   412  }
   413  
   414  // Node retrieves an encoded cached trie node from memory. If it cannot be found
   415  // cached, the method queries the persistent database for the content.
   416  func (db *Database) Node(hash common.Hash) ([]byte, error) {
   417  	// It doens't make sense to retrieve the metaroot
   418  	if hash == (common.Hash{}) {
   419  		return nil, errors.New("not found")
   420  	}
   421  	// Retrieve the node from the clean cache if available
   422  	if db.cleans != nil {
   423  		if enc, err := db.cleans.Get(string(hash[:])); err == nil && enc != nil {
   424  			memcacheCleanHitMeter.Mark(1)
   425  			memcacheCleanReadMeter.Mark(int64(len(enc)))
   426  			return enc, nil
   427  		}
   428  	}
   429  	// Retrieve the node from the dirty cache if available
   430  	db.lock.RLock()
   431  	dirty := db.dirties[hash]
   432  	db.lock.RUnlock()
   433  
   434  	if dirty != nil {
   435  		return dirty.rlp(), nil
   436  	}
   437  	// Content unavailable in memory, attempt to retrieve from disk
   438  	enc, err := db.diskdb.Get(hash[:])
   439  	if err == nil && enc != nil {
   440  		if db.cleans != nil {
   441  			db.cleans.Set(string(hash[:]), enc)
   442  			memcacheCleanMissMeter.Mark(1)
   443  			memcacheCleanWriteMeter.Mark(int64(len(enc)))
   444  		}
   445  	}
   446  	return enc, err
   447  }
   448  
   449  // preimage retrieves a cached trie node pre-image from memory. If it cannot be
   450  // found cached, the method queries the persistent database for the content.
   451  func (db *Database) preimage(hash common.Hash) ([]byte, error) {
   452  	// Retrieve the node from cache if available
   453  	db.lock.RLock()
   454  	preimage := db.preimages[hash]
   455  	db.lock.RUnlock()
   456  
   457  	if preimage != nil {
   458  		return preimage, nil
   459  	}
   460  	// Content unavailable in memory, attempt to retrieve from disk
   461  	return db.diskdb.Get(db.secureKey(hash[:]))
   462  }
   463  
   464  // secureKey returns the database key for the preimage of key, as an ephemeral
   465  // buffer. The caller must not hold onto the return value because it will become
   466  // invalid on the next call.
   467  func (db *Database) secureKey(key []byte) []byte {
   468  	buf := append(db.seckeybuf[:0], secureKeyPrefix...)
   469  	buf = append(buf, key...)
   470  	return buf
   471  }
   472  
   473  // Nodes retrieves the hashes of all the nodes cached within the memory database.
   474  // This method is extremely expensive and should only be used to validate internal
   475  // states in test code.
   476  func (db *Database) Nodes() []common.Hash {
   477  	db.lock.RLock()
   478  	defer db.lock.RUnlock()
   479  
   480  	var hashes = make([]common.Hash, 0, len(db.dirties))
   481  	for hash := range db.dirties {
   482  		if hash != (common.Hash{}) { // Special case for "root" references/nodes
   483  			hashes = append(hashes, hash)
   484  		}
   485  	}
   486  	return hashes
   487  }
   488  
   489  // Reference adds a new reference from a parent node to a child node.
   490  func (db *Database) Reference(child common.Hash, parent common.Hash) {
   491  	db.lock.Lock()
   492  	defer db.lock.Unlock()
   493  
   494  	db.reference(child, parent)
   495  }
   496  
   497  // reference is the private locked version of Reference.
   498  func (db *Database) reference(child common.Hash, parent common.Hash) {
   499  	// If the node does not exist, it's a node pulled from disk, skip
   500  	node, ok := db.dirties[child]
   501  	if !ok {
   502  		return
   503  	}
   504  	// If the reference already exists, only duplicate for roots
   505  	if db.dirties[parent].children == nil {
   506  		db.dirties[parent].children = make(map[common.Hash]uint16)
   507  		db.childrenSize += cachedNodeChildrenSize
   508  	} else if _, ok = db.dirties[parent].children[child]; ok && parent != (common.Hash{}) {
   509  		return
   510  	}
   511  	node.parents++
   512  	db.dirties[parent].children[child]++
   513  	if db.dirties[parent].children[child] == 1 {
   514  		db.childrenSize += common.HashLength + 2 // uint16 counter
   515  	}
   516  }
   517  
   518  // Dereference removes an existing reference from a root node.
   519  func (db *Database) Dereference(root common.Hash) {
   520  	// Sanity check to ensure that the meta-root is not removed
   521  	if root == (common.Hash{}) {
   522  		log.Error("Attempted to dereference the trie cache meta root")
   523  		return
   524  	}
   525  	db.lock.Lock()
   526  	defer db.lock.Unlock()
   527  
   528  	nodes, storage, start := len(db.dirties), db.dirtiesSize, time.Now()
   529  	db.dereference(root, common.Hash{})
   530  
   531  	db.gcnodes += uint64(nodes - len(db.dirties))
   532  	db.gcsize += storage - db.dirtiesSize
   533  	db.gctime += time.Since(start)
   534  
   535  	memcacheGCTimeTimer.Update(time.Since(start))
   536  	memcacheGCSizeMeter.Mark(int64(storage - db.dirtiesSize))
   537  	memcacheGCNodesMeter.Mark(int64(nodes - len(db.dirties)))
   538  
   539  	log.Debug("Dereferenced trie from memory database", "nodes", nodes-len(db.dirties), "size", storage-db.dirtiesSize, "time", time.Since(start),
   540  		"gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize)
   541  }
   542  
   543  // dereference is the private locked version of Dereference.
   544  func (db *Database) dereference(child common.Hash, parent common.Hash) {
   545  	// Dereference the parent-child
   546  	node := db.dirties[parent]
   547  
   548  	if node.children != nil && node.children[child] > 0 {
   549  		node.children[child]--
   550  		if node.children[child] == 0 {
   551  			delete(node.children, child)
   552  			db.childrenSize -= (common.HashLength + 2) // uint16 counter
   553  		}
   554  	}
   555  	// If the child does not exist, it's a previously committed node.
   556  	node, ok := db.dirties[child]
   557  	if !ok {
   558  		return
   559  	}
   560  	// If there are no more references to the child, delete it and cascade
   561  	if node.parents > 0 {
   562  		// This is a special cornercase where a node loaded from disk (i.e. not in the
   563  		// memcache any more) gets reinjected as a new node (short node split into full,
   564  		// then reverted into short), causing a cached node to have no parents. That is
   565  		// no problem in itself, but don't make maxint parents out of it.
   566  		node.parents--
   567  	}
   568  	if node.parents == 0 {
   569  		// Remove the node from the flush-list
   570  		switch child {
   571  		case db.oldest:
   572  			db.oldest = node.flushNext
   573  			db.dirties[node.flushNext].flushPrev = common.Hash{}
   574  		case db.newest:
   575  			db.newest = node.flushPrev
   576  			db.dirties[node.flushPrev].flushNext = common.Hash{}
   577  		default:
   578  			db.dirties[node.flushPrev].flushNext = node.flushNext
   579  			db.dirties[node.flushNext].flushPrev = node.flushPrev
   580  		}
   581  		// Dereference all children and delete the node
   582  		for _, hash := range node.childs() {
   583  			db.dereference(hash, child)
   584  		}
   585  		delete(db.dirties, child)
   586  		db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size))
   587  		if node.children != nil {
   588  			db.childrenSize -= cachedNodeChildrenSize
   589  		}
   590  	}
   591  }
   592  
   593  // Cap iteratively flushes old but still referenced trie nodes until the total
   594  // memory usage goes below the given threshold.
   595  //
   596  // Note, this method is a non-synchronized mutator. It is unsafe to call this
   597  // concurrently with other mutators.
   598  func (db *Database) Cap(limit common.StorageSize) error {
   599  	// Create a database batch to flush persistent data out. It is important that
   600  	// outside code doesn't see an inconsistent state (referenced data removed from
   601  	// memory cache during commit but not yet in persistent storage). This is ensured
   602  	// by only uncaching existing data when the database write finalizes.
   603  	nodes, storage, start := len(db.dirties), db.dirtiesSize, time.Now()
   604  	batch := db.diskdb.NewBatch()
   605  
   606  	// db.dirtiesSize only contains the useful data in the cache, but when reporting
   607  	// the total memory consumption, the maintenance metadata is also needed to be
   608  	// counted.
   609  	size := db.dirtiesSize + common.StorageSize((len(db.dirties)-1)*cachedNodeSize)
   610  	size += db.childrenSize - common.StorageSize(len(db.dirties[common.Hash{}].children)*(common.HashLength+2))
   611  
   612  	// If the preimage cache got large enough, push to disk. If it's still small
   613  	// leave for later to deduplicate writes.
   614  	flushPreimages := db.preimagesSize > 4*1024*1024
   615  	if flushPreimages {
   616  		for hash, preimage := range db.preimages {
   617  			if err := batch.Put(db.secureKey(hash[:]), preimage); err != nil {
   618  				log.Error("Failed to commit preimage from trie database", "err", err)
   619  				return err
   620  			}
   621  			if batch.ValueSize() > ethdb.IdealBatchSize {
   622  				if err := batch.Write(); err != nil {
   623  					return err
   624  				}
   625  				batch.Reset()
   626  			}
   627  		}
   628  	}
   629  	// Keep committing nodes from the flush-list until we're below allowance
   630  	oldest := db.oldest
   631  	for size > limit && oldest != (common.Hash{}) {
   632  		// Fetch the oldest referenced node and push into the batch
   633  		node := db.dirties[oldest]
   634  		if err := batch.Put(oldest[:], node.rlp()); err != nil {
   635  			return err
   636  		}
   637  		// If we exceeded the ideal batch size, commit and reset
   638  		if batch.ValueSize() >= ethdb.IdealBatchSize {
   639  			if err := batch.Write(); err != nil {
   640  				log.Error("Failed to write flush list to disk", "err", err)
   641  				return err
   642  			}
   643  			batch.Reset()
   644  		}
   645  		// Iterate to the next flush item, or abort if the size cap was achieved. Size
   646  		// is the total size, including the useful cached data (hash -> blob), the
   647  		// cache item metadata, as well as external children mappings.
   648  		size -= common.StorageSize(common.HashLength + int(node.size) + cachedNodeSize)
   649  		if node.children != nil {
   650  			size -= common.StorageSize(cachedNodeChildrenSize + len(node.children)*(common.HashLength+2))
   651  		}
   652  		oldest = node.flushNext
   653  	}
   654  	// Flush out any remainder data from the last batch
   655  	if err := batch.Write(); err != nil {
   656  		log.Error("Failed to write flush list to disk", "err", err)
   657  		return err
   658  	}
   659  	// Write successful, clear out the flushed data
   660  	db.lock.Lock()
   661  	defer db.lock.Unlock()
   662  
   663  	if flushPreimages {
   664  		db.preimages = make(map[common.Hash][]byte)
   665  		db.preimagesSize = 0
   666  	}
   667  	for db.oldest != oldest {
   668  		node := db.dirties[db.oldest]
   669  		delete(db.dirties, db.oldest)
   670  		db.oldest = node.flushNext
   671  
   672  		db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size))
   673  		if node.children != nil {
   674  			db.childrenSize -= common.StorageSize(cachedNodeChildrenSize + len(node.children)*(common.HashLength+2))
   675  		}
   676  	}
   677  	if db.oldest != (common.Hash{}) {
   678  		db.dirties[db.oldest].flushPrev = common.Hash{}
   679  	}
   680  	db.flushnodes += uint64(nodes - len(db.dirties))
   681  	db.flushsize += storage - db.dirtiesSize
   682  	db.flushtime += time.Since(start)
   683  
   684  	memcacheFlushTimeTimer.Update(time.Since(start))
   685  	memcacheFlushSizeMeter.Mark(int64(storage - db.dirtiesSize))
   686  	memcacheFlushNodesMeter.Mark(int64(nodes - len(db.dirties)))
   687  
   688  	log.Debug("Persisted nodes from memory database", "nodes", nodes-len(db.dirties), "size", storage-db.dirtiesSize, "time", time.Since(start),
   689  		"flushnodes", db.flushnodes, "flushsize", db.flushsize, "flushtime", db.flushtime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize)
   690  
   691  	return nil
   692  }
   693  
   694  // Commit iterates over all the children of a particular node, writes them out
   695  // to disk, forcefully tearing down all references in both directions. As a side
   696  // effect, all pre-images accumulated up to this point are also written.
   697  //
   698  // Note, this method is a non-synchronized mutator. It is unsafe to call this
   699  // concurrently with other mutators.
   700  func (db *Database) Commit(node common.Hash, report bool) error {
   701  	// Create a database batch to flush persistent data out. It is important that
   702  	// outside code doesn't see an inconsistent state (referenced data removed from
   703  	// memory cache during commit but not yet in persistent storage). This is ensured
   704  	// by only uncaching existing data when the database write finalizes.
   705  	start := time.Now()
   706  	batch := db.diskdb.NewBatch()
   707  
   708  	// Move all of the accumulated preimages into a write batch
   709  	for hash, preimage := range db.preimages {
   710  		if err := batch.Put(db.secureKey(hash[:]), preimage); err != nil {
   711  			log.Error("Failed to commit preimage from trie database", "err", err)
   712  			return err
   713  		}
   714  		// If the batch is too large, flush to disk
   715  		if batch.ValueSize() > ethdb.IdealBatchSize {
   716  			if err := batch.Write(); err != nil {
   717  				return err
   718  			}
   719  			batch.Reset()
   720  		}
   721  	}
   722  	// Since we're going to replay trie node writes into the clean cache, flush out
   723  	// any batched pre-images before continuing.
   724  	if err := batch.Write(); err != nil {
   725  		return err
   726  	}
   727  	batch.Reset()
   728  
   729  	// Move the trie itself into the batch, flushing if enough data is accumulated
   730  	nodes, storage := len(db.dirties), db.dirtiesSize
   731  
   732  	uncacher := &cleaner{db}
   733  	if err := db.commit(node, batch, uncacher); err != nil {
   734  		log.Error("Failed to commit trie from trie database", "err", err)
   735  		return err
   736  	}
   737  	// Trie mostly committed to disk, flush any batch leftovers
   738  	if err := batch.Write(); err != nil {
   739  		log.Error("Failed to write trie to disk", "err", err)
   740  		return err
   741  	}
   742  	// Uncache any leftovers in the last batch
   743  	db.lock.Lock()
   744  	defer db.lock.Unlock()
   745  
   746  	batch.Replay(uncacher)
   747  	batch.Reset()
   748  
   749  	// Reset the storage counters and bumpd metrics
   750  	db.preimages = make(map[common.Hash][]byte)
   751  	db.preimagesSize = 0
   752  
   753  	memcacheCommitTimeTimer.Update(time.Since(start))
   754  	memcacheCommitSizeMeter.Mark(int64(storage - db.dirtiesSize))
   755  	memcacheCommitNodesMeter.Mark(int64(nodes - len(db.dirties)))
   756  
   757  	logger := log.Info
   758  	if !report {
   759  		logger = log.Debug
   760  	}
   761  	logger("Persisted trie from memory database", "nodes", nodes-len(db.dirties)+int(db.flushnodes), "size", storage-db.dirtiesSize+db.flushsize, "time", time.Since(start)+db.flushtime,
   762  		"gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize)
   763  
   764  	// Reset the garbage collection statistics
   765  	db.gcnodes, db.gcsize, db.gctime = 0, 0, 0
   766  	db.flushnodes, db.flushsize, db.flushtime = 0, 0, 0
   767  
   768  	return nil
   769  }
   770  
   771  // commit is the private locked version of Commit.
   772  func (db *Database) commit(hash common.Hash, batch ethdb.Batch, uncacher *cleaner) error {
   773  	// If the node does not exist, it's a previously committed node
   774  	node, ok := db.dirties[hash]
   775  	if !ok {
   776  		return nil
   777  	}
   778  	for _, child := range node.childs() {
   779  		if err := db.commit(child, batch, uncacher); err != nil {
   780  			return err
   781  		}
   782  	}
   783  	if err := batch.Put(hash[:], node.rlp()); err != nil {
   784  		return err
   785  	}
   786  	// If we've reached an optimal batch size, commit and start over
   787  	if batch.ValueSize() >= ethdb.IdealBatchSize {
   788  		if err := batch.Write(); err != nil {
   789  			return err
   790  		}
   791  		db.lock.Lock()
   792  		batch.Replay(uncacher)
   793  		batch.Reset()
   794  		db.lock.Unlock()
   795  	}
   796  	return nil
   797  }
   798  
   799  // cleaner is a database batch replayer that takes a batch of write operations
   800  // and cleans up the trie database from anything written to disk.
   801  type cleaner struct {
   802  	db *Database
   803  }
   804  
   805  // Put reacts to database writes and implements dirty data uncaching. This is the
   806  // post-processing step of a commit operation where the already persisted trie is
   807  // removed from the dirty cache and moved into the clean cache. The reason behind
   808  // the two-phase commit is to ensure ensure data availability while moving from
   809  // memory to disk.
   810  func (c *cleaner) Put(key []byte, rlp []byte) error {
   811  	hash := common.BytesToHash(key)
   812  
   813  	// If the node does not exist, we're done on this path
   814  	node, ok := c.db.dirties[hash]
   815  	if !ok {
   816  		return nil
   817  	}
   818  	// Node still exists, remove it from the flush-list
   819  	switch hash {
   820  	case c.db.oldest:
   821  		c.db.oldest = node.flushNext
   822  		c.db.dirties[node.flushNext].flushPrev = common.Hash{}
   823  	case c.db.newest:
   824  		c.db.newest = node.flushPrev
   825  		c.db.dirties[node.flushPrev].flushNext = common.Hash{}
   826  	default:
   827  		c.db.dirties[node.flushPrev].flushNext = node.flushNext
   828  		c.db.dirties[node.flushNext].flushPrev = node.flushPrev
   829  	}
   830  	// Remove the node from the dirty cache
   831  	delete(c.db.dirties, hash)
   832  	c.db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size))
   833  	if node.children != nil {
   834  		c.db.dirtiesSize -= common.StorageSize(cachedNodeChildrenSize + len(node.children)*(common.HashLength+2))
   835  	}
   836  	// Move the flushed node into the clean cache to prevent insta-reloads
   837  	if c.db.cleans != nil {
   838  		c.db.cleans.Set(string(hash[:]), rlp)
   839  	}
   840  	return nil
   841  }
   842  
   843  func (c *cleaner) Delete(key []byte) error {
   844  	panic("Not implemented")
   845  }
   846  
   847  // Size returns the current storage size of the memory cache in front of the
   848  // persistent database layer.
   849  func (db *Database) Size() (common.StorageSize, common.StorageSize) {
   850  	db.lock.RLock()
   851  	defer db.lock.RUnlock()
   852  
   853  	// db.dirtiesSize only contains the useful data in the cache, but when reporting
   854  	// the total memory consumption, the maintenance metadata is also needed to be
   855  	// counted.
   856  	var metadataSize = common.StorageSize((len(db.dirties) - 1) * cachedNodeSize)
   857  	var metarootRefs = common.StorageSize(len(db.dirties[common.Hash{}].children) * (common.HashLength + 2))
   858  	return db.dirtiesSize + db.childrenSize + metadataSize - metarootRefs, db.preimagesSize
   859  }
   860  
   861  // verifyIntegrity is a debug method to iterate over the entire trie stored in
   862  // memory and check whether every node is reachable from the meta root. The goal
   863  // is to find any errors that might cause memory leaks and or trie nodes to go
   864  // missing.
   865  //
   866  // This method is extremely CPU and memory intensive, only use when must.
   867  func (db *Database) verifyIntegrity() {
   868  	// Iterate over all the cached nodes and accumulate them into a set
   869  	reachable := map[common.Hash]struct{}{{}: {}}
   870  
   871  	for child := range db.dirties[common.Hash{}].children {
   872  		db.accumulate(child, reachable)
   873  	}
   874  	// Find any unreachable but cached nodes
   875  	var unreachable []string
   876  	for hash, node := range db.dirties {
   877  		if _, ok := reachable[hash]; !ok {
   878  			unreachable = append(unreachable, fmt.Sprintf("%x: {Node: %v, Parents: %d, Prev: %x, Next: %x}",
   879  				hash, node.node, node.parents, node.flushPrev, node.flushNext))
   880  		}
   881  	}
   882  	if len(unreachable) != 0 {
   883  		panic(fmt.Sprintf("trie cache memory leak: %v", unreachable))
   884  	}
   885  }
   886  
   887  // accumulate iterates over the trie defined by hash and accumulates all the
   888  // cached children found in memory.
   889  func (db *Database) accumulate(hash common.Hash, reachable map[common.Hash]struct{}) {
   890  	// Mark the node reachable if present in the memory cache
   891  	node, ok := db.dirties[hash]
   892  	if !ok {
   893  		return
   894  	}
   895  	reachable[hash] = struct{}{}
   896  
   897  	// Iterate over all the children and accumulate them too
   898  	for _, child := range node.childs() {
   899  		db.accumulate(child, reachable)
   900  	}
   901  }