github.com/baptiste-b-pegasys/quorum/v22@v22.4.2/core/state/snapshot/snapshot.go (about)

     1  // Copyright 2019 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  // Package snapshot implements a journalled, dynamic state dump.
    18  package snapshot
    19  
    20  import (
    21  	"bytes"
    22  	"errors"
    23  	"fmt"
    24  	"sync"
    25  	"sync/atomic"
    26  
    27  	"github.com/ethereum/go-ethereum/common"
    28  	"github.com/ethereum/go-ethereum/core/rawdb"
    29  	"github.com/ethereum/go-ethereum/ethdb"
    30  	"github.com/ethereum/go-ethereum/log"
    31  	"github.com/ethereum/go-ethereum/metrics"
    32  	"github.com/ethereum/go-ethereum/rlp"
    33  	"github.com/ethereum/go-ethereum/trie"
    34  )
    35  
    36  var (
    37  	snapshotCleanAccountHitMeter   = metrics.NewRegisteredMeter("state/snapshot/clean/account/hit", nil)
    38  	snapshotCleanAccountMissMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/account/miss", nil)
    39  	snapshotCleanAccountInexMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/account/inex", nil)
    40  	snapshotCleanAccountReadMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/account/read", nil)
    41  	snapshotCleanAccountWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/write", nil)
    42  
    43  	snapshotCleanStorageHitMeter   = metrics.NewRegisteredMeter("state/snapshot/clean/storage/hit", nil)
    44  	snapshotCleanStorageMissMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/storage/miss", nil)
    45  	snapshotCleanStorageInexMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/storage/inex", nil)
    46  	snapshotCleanStorageReadMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/storage/read", nil)
    47  	snapshotCleanStorageWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/write", nil)
    48  
    49  	snapshotDirtyAccountHitMeter   = metrics.NewRegisteredMeter("state/snapshot/dirty/account/hit", nil)
    50  	snapshotDirtyAccountMissMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/account/miss", nil)
    51  	snapshotDirtyAccountInexMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/account/inex", nil)
    52  	snapshotDirtyAccountReadMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/account/read", nil)
    53  	snapshotDirtyAccountWriteMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/write", nil)
    54  
    55  	snapshotDirtyStorageHitMeter   = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/hit", nil)
    56  	snapshotDirtyStorageMissMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/miss", nil)
    57  	snapshotDirtyStorageInexMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/inex", nil)
    58  	snapshotDirtyStorageReadMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/read", nil)
    59  	snapshotDirtyStorageWriteMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/write", nil)
    60  
    61  	snapshotDirtyAccountHitDepthHist = metrics.NewRegisteredHistogram("state/snapshot/dirty/account/hit/depth", nil, metrics.NewExpDecaySample(1028, 0.015))
    62  	snapshotDirtyStorageHitDepthHist = metrics.NewRegisteredHistogram("state/snapshot/dirty/storage/hit/depth", nil, metrics.NewExpDecaySample(1028, 0.015))
    63  
    64  	snapshotFlushAccountItemMeter = metrics.NewRegisteredMeter("state/snapshot/flush/account/item", nil)
    65  	snapshotFlushAccountSizeMeter = metrics.NewRegisteredMeter("state/snapshot/flush/account/size", nil)
    66  	snapshotFlushStorageItemMeter = metrics.NewRegisteredMeter("state/snapshot/flush/storage/item", nil)
    67  	snapshotFlushStorageSizeMeter = metrics.NewRegisteredMeter("state/snapshot/flush/storage/size", nil)
    68  
    69  	snapshotBloomIndexTimer = metrics.NewRegisteredResettingTimer("state/snapshot/bloom/index", nil)
    70  	snapshotBloomErrorGauge = metrics.NewRegisteredGaugeFloat64("state/snapshot/bloom/error", nil)
    71  
    72  	snapshotBloomAccountTrueHitMeter  = metrics.NewRegisteredMeter("state/snapshot/bloom/account/truehit", nil)
    73  	snapshotBloomAccountFalseHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/account/falsehit", nil)
    74  	snapshotBloomAccountMissMeter     = metrics.NewRegisteredMeter("state/snapshot/bloom/account/miss", nil)
    75  
    76  	snapshotBloomStorageTrueHitMeter  = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/truehit", nil)
    77  	snapshotBloomStorageFalseHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/falsehit", nil)
    78  	snapshotBloomStorageMissMeter     = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/miss", nil)
    79  
    80  	// ErrSnapshotStale is returned from data accessors if the underlying snapshot
    81  	// layer had been invalidated due to the chain progressing forward far enough
    82  	// to not maintain the layer's original state.
    83  	ErrSnapshotStale = errors.New("snapshot stale")
    84  
    85  	// ErrNotCoveredYet is returned from data accessors if the underlying snapshot
    86  	// is being generated currently and the requested data item is not yet in the
    87  	// range of accounts covered.
    88  	ErrNotCoveredYet = errors.New("not covered yet")
    89  
    90  	// ErrNotConstructed is returned if the callers want to iterate the snapshot
    91  	// while the generation is not finished yet.
    92  	ErrNotConstructed = errors.New("snapshot is not constructed")
    93  
    94  	// errSnapshotCycle is returned if a snapshot is attempted to be inserted
    95  	// that forms a cycle in the snapshot tree.
    96  	errSnapshotCycle = errors.New("snapshot cycle")
    97  )
    98  
    99  // Snapshot represents the functionality supported by a snapshot storage layer.
   100  type Snapshot interface {
   101  	// Root returns the root hash for which this snapshot was made.
   102  	Root() common.Hash
   103  
   104  	// Account directly retrieves the account associated with a particular hash in
   105  	// the snapshot slim data format.
   106  	Account(hash common.Hash) (*Account, error)
   107  
   108  	// AccountRLP directly retrieves the account RLP associated with a particular
   109  	// hash in the snapshot slim data format.
   110  	AccountRLP(hash common.Hash) ([]byte, error)
   111  
   112  	// Storage directly retrieves the storage data associated with a particular hash,
   113  	// within a particular account.
   114  	Storage(accountHash, storageHash common.Hash) ([]byte, error)
   115  }
   116  
   117  // snapshot is the internal version of the snapshot data layer that supports some
   118  // additional methods compared to the public API.
   119  type snapshot interface {
   120  	Snapshot
   121  
   122  	// Parent returns the subsequent layer of a snapshot, or nil if the base was
   123  	// reached.
   124  	//
   125  	// Note, the method is an internal helper to avoid type switching between the
   126  	// disk and diff layers. There is no locking involved.
   127  	Parent() snapshot
   128  
   129  	// Update creates a new layer on top of the existing snapshot diff tree with
   130  	// the specified data items.
   131  	//
   132  	// Note, the maps are retained by the method to avoid copying everything.
   133  	Update(blockRoot common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer
   134  
   135  	// Journal commits an entire diff hierarchy to disk into a single journal entry.
   136  	// This is meant to be used during shutdown to persist the snapshot without
   137  	// flattening everything down (bad for reorgs).
   138  	Journal(buffer *bytes.Buffer) (common.Hash, error)
   139  
   140  	// LegacyJournal is basically identical to Journal. it's the legacy version for
   141  	// flushing legacy journal. Now the only purpose of this function is for testing.
   142  	LegacyJournal(buffer *bytes.Buffer) (common.Hash, error)
   143  
   144  	// Stale return whether this layer has become stale (was flattened across) or
   145  	// if it's still live.
   146  	Stale() bool
   147  
   148  	// AccountIterator creates an account iterator over an arbitrary layer.
   149  	AccountIterator(seek common.Hash) AccountIterator
   150  
   151  	// StorageIterator creates a storage iterator over an arbitrary layer.
   152  	StorageIterator(account common.Hash, seek common.Hash) (StorageIterator, bool)
   153  }
   154  
   155  // SnapshotTree is an Ethereum state snapshot tree. It consists of one persistent
   156  // base layer backed by a key-value store, on top of which arbitrarily many in-
   157  // memory diff layers are topped. The memory diffs can form a tree with branching,
   158  // but the disk layer is singleton and common to all. If a reorg goes deeper than
   159  // the disk layer, everything needs to be deleted.
   160  //
   161  // The goal of a state snapshot is twofold: to allow direct access to account and
   162  // storage data to avoid expensive multi-level trie lookups; and to allow sorted,
   163  // cheap iteration of the account/storage tries for sync aid.
   164  type Tree struct {
   165  	diskdb ethdb.KeyValueStore      // Persistent database to store the snapshot
   166  	triedb *trie.Database           // In-memory cache to access the trie through
   167  	cache  int                      // Megabytes permitted to use for read caches
   168  	layers map[common.Hash]snapshot // Collection of all known layers
   169  	lock   sync.RWMutex
   170  }
   171  
   172  // New attempts to load an already existing snapshot from a persistent key-value
   173  // store (with a number of memory layers from a journal), ensuring that the head
   174  // of the snapshot matches the expected one.
   175  //
   176  // If the snapshot is missing or the disk layer is broken, the entire is deleted
   177  // and will be reconstructed from scratch based on the tries in the key-value
   178  // store, on a background thread. If the memory layers from the journal is not
   179  // continuous with disk layer or the journal is missing, all diffs will be discarded
   180  // iff it's in "recovery" mode, otherwise rebuild is mandatory.
   181  func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, async bool, rebuild bool, recovery bool) (*Tree, error) {
   182  	// Create a new, empty snapshot tree
   183  	snap := &Tree{
   184  		diskdb: diskdb,
   185  		triedb: triedb,
   186  		cache:  cache,
   187  		layers: make(map[common.Hash]snapshot),
   188  	}
   189  	if !async {
   190  		defer snap.waitBuild()
   191  	}
   192  	// Attempt to load a previously persisted snapshot and rebuild one if failed
   193  	head, err := loadSnapshot(diskdb, triedb, cache, root, recovery)
   194  	if err != nil {
   195  		if rebuild {
   196  			log.Warn("Failed to load snapshot, regenerating", "err", err)
   197  			snap.Rebuild(root)
   198  			return snap, nil
   199  		}
   200  		return nil, err // Bail out the error, don't rebuild automatically.
   201  	}
   202  	// Existing snapshot loaded, seed all the layers
   203  	for head != nil {
   204  		snap.layers[head.Root()] = head
   205  		head = head.Parent()
   206  	}
   207  	return snap, nil
   208  }
   209  
   210  // waitBuild blocks until the snapshot finishes rebuilding. This method is meant
   211  // to be used by tests to ensure we're testing what we believe we are.
   212  func (t *Tree) waitBuild() {
   213  	// Find the rebuild termination channel
   214  	var done chan struct{}
   215  
   216  	t.lock.RLock()
   217  	for _, layer := range t.layers {
   218  		if layer, ok := layer.(*diskLayer); ok {
   219  			done = layer.genPending
   220  			break
   221  		}
   222  	}
   223  	t.lock.RUnlock()
   224  
   225  	// Wait until the snapshot is generated
   226  	if done != nil {
   227  		<-done
   228  	}
   229  }
   230  
   231  // Snapshot retrieves a snapshot belonging to the given block root, or nil if no
   232  // snapshot is maintained for that block.
   233  func (t *Tree) Snapshot(blockRoot common.Hash) Snapshot {
   234  	t.lock.RLock()
   235  	defer t.lock.RUnlock()
   236  
   237  	return t.layers[blockRoot]
   238  }
   239  
   240  // Snapshots returns all visited layers from the topmost layer with specific
   241  // root and traverses downward. The layer amount is limited by the given number.
   242  // If nodisk is set, then disk layer is excluded.
   243  func (t *Tree) Snapshots(root common.Hash, limits int, nodisk bool) []Snapshot {
   244  	t.lock.RLock()
   245  	defer t.lock.RUnlock()
   246  
   247  	if limits == 0 {
   248  		return nil
   249  	}
   250  	layer := t.layers[root]
   251  	if layer == nil {
   252  		return nil
   253  	}
   254  	var ret []Snapshot
   255  	for {
   256  		if _, isdisk := layer.(*diskLayer); isdisk && nodisk {
   257  			break
   258  		}
   259  		ret = append(ret, layer)
   260  		limits -= 1
   261  		if limits == 0 {
   262  			break
   263  		}
   264  		parent := layer.Parent()
   265  		if parent == nil {
   266  			break
   267  		}
   268  		layer = parent
   269  	}
   270  	return ret
   271  }
   272  
   273  // Update adds a new snapshot into the tree, if that can be linked to an existing
   274  // old parent. It is disallowed to insert a disk layer (the origin of all).
   275  func (t *Tree) Update(blockRoot common.Hash, parentRoot common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) error {
   276  	// Reject noop updates to avoid self-loops in the snapshot tree. This is a
   277  	// special case that can only happen for Clique networks where empty blocks
   278  	// don't modify the state (0 block subsidy).
   279  	//
   280  	// Although we could silently ignore this internally, it should be the caller's
   281  	// responsibility to avoid even attempting to insert such a snapshot.
   282  	if blockRoot == parentRoot {
   283  		return errSnapshotCycle
   284  	}
   285  	// Generate a new snapshot on top of the parent
   286  	parent := t.Snapshot(parentRoot).(snapshot)
   287  	if parent == nil {
   288  		return fmt.Errorf("parent [%#x] snapshot missing", parentRoot)
   289  	}
   290  	snap := parent.Update(blockRoot, destructs, accounts, storage)
   291  
   292  	// Save the new snapshot for later
   293  	t.lock.Lock()
   294  	defer t.lock.Unlock()
   295  
   296  	t.layers[snap.root] = snap
   297  	return nil
   298  }
   299  
   300  // Cap traverses downwards the snapshot tree from a head block hash until the
   301  // number of allowed layers are crossed. All layers beyond the permitted number
   302  // are flattened downwards.
   303  //
   304  // Note, the final diff layer count in general will be one more than the amount
   305  // requested. This happens because the bottom-most diff layer is the accumulator
   306  // which may or may not overflow and cascade to disk. Since this last layer's
   307  // survival is only known *after* capping, we need to omit it from the count if
   308  // we want to ensure that *at least* the requested number of diff layers remain.
   309  func (t *Tree) Cap(root common.Hash, layers int) error {
   310  	// Retrieve the head snapshot to cap from
   311  	snap := t.Snapshot(root)
   312  	if snap == nil {
   313  		return fmt.Errorf("snapshot [%#x] missing", root)
   314  	}
   315  	diff, ok := snap.(*diffLayer)
   316  	if !ok {
   317  		return fmt.Errorf("snapshot [%#x] is disk layer", root)
   318  	}
   319  	// If the generator is still running, use a more aggressive cap
   320  	diff.origin.lock.RLock()
   321  	if diff.origin.genMarker != nil && layers > 8 {
   322  		layers = 8
   323  	}
   324  	diff.origin.lock.RUnlock()
   325  
   326  	// Run the internal capping and discard all stale layers
   327  	t.lock.Lock()
   328  	defer t.lock.Unlock()
   329  
   330  	// Flattening the bottom-most diff layer requires special casing since there's
   331  	// no child to rewire to the grandparent. In that case we can fake a temporary
   332  	// child for the capping and then remove it.
   333  	if layers == 0 {
   334  		// If full commit was requested, flatten the diffs and merge onto disk
   335  		diff.lock.RLock()
   336  		base := diffToDisk(diff.flatten().(*diffLayer))
   337  		diff.lock.RUnlock()
   338  
   339  		// Replace the entire snapshot tree with the flat base
   340  		t.layers = map[common.Hash]snapshot{base.root: base}
   341  		return nil
   342  	}
   343  	persisted := t.cap(diff, layers)
   344  
   345  	// Remove any layer that is stale or links into a stale layer
   346  	children := make(map[common.Hash][]common.Hash)
   347  	for root, snap := range t.layers {
   348  		if diff, ok := snap.(*diffLayer); ok {
   349  			parent := diff.parent.Root()
   350  			children[parent] = append(children[parent], root)
   351  		}
   352  	}
   353  	var remove func(root common.Hash)
   354  	remove = func(root common.Hash) {
   355  		delete(t.layers, root)
   356  		for _, child := range children[root] {
   357  			remove(child)
   358  		}
   359  		delete(children, root)
   360  	}
   361  	for root, snap := range t.layers {
   362  		if snap.Stale() {
   363  			remove(root)
   364  		}
   365  	}
   366  	// If the disk layer was modified, regenerate all the cumulative blooms
   367  	if persisted != nil {
   368  		var rebloom func(root common.Hash)
   369  		rebloom = func(root common.Hash) {
   370  			if diff, ok := t.layers[root].(*diffLayer); ok {
   371  				diff.rebloom(persisted)
   372  			}
   373  			for _, child := range children[root] {
   374  				rebloom(child)
   375  			}
   376  		}
   377  		rebloom(persisted.root)
   378  	}
   379  	return nil
   380  }
   381  
   382  // cap traverses downwards the diff tree until the number of allowed layers are
   383  // crossed. All diffs beyond the permitted number are flattened downwards. If the
   384  // layer limit is reached, memory cap is also enforced (but not before).
   385  //
   386  // The method returns the new disk layer if diffs were persisted into it.
   387  //
   388  // Note, the final diff layer count in general will be one more than the amount
   389  // requested. This happens because the bottom-most diff layer is the accumulator
   390  // which may or may not overflow and cascade to disk. Since this last layer's
   391  // survival is only known *after* capping, we need to omit it from the count if
   392  // we want to ensure that *at least* the requested number of diff layers remain.
   393  func (t *Tree) cap(diff *diffLayer, layers int) *diskLayer {
   394  	// Dive until we run out of layers or reach the persistent database
   395  	for i := 0; i < layers-1; i++ {
   396  		// If we still have diff layers below, continue down
   397  		if parent, ok := diff.parent.(*diffLayer); ok {
   398  			diff = parent
   399  		} else {
   400  			// Diff stack too shallow, return without modifications
   401  			return nil
   402  		}
   403  	}
   404  	// We're out of layers, flatten anything below, stopping if it's the disk or if
   405  	// the memory limit is not yet exceeded.
   406  	switch parent := diff.parent.(type) {
   407  	case *diskLayer:
   408  		return nil
   409  
   410  	case *diffLayer:
   411  		// Flatten the parent into the grandparent. The flattening internally obtains a
   412  		// write lock on grandparent.
   413  		flattened := parent.flatten().(*diffLayer)
   414  		t.layers[flattened.root] = flattened
   415  
   416  		diff.lock.Lock()
   417  		defer diff.lock.Unlock()
   418  
   419  		diff.parent = flattened
   420  		if flattened.memory < aggregatorMemoryLimit {
   421  			// Accumulator layer is smaller than the limit, so we can abort, unless
   422  			// there's a snapshot being generated currently. In that case, the trie
   423  			// will move fron underneath the generator so we **must** merge all the
   424  			// partial data down into the snapshot and restart the generation.
   425  			if flattened.parent.(*diskLayer).genAbort == nil {
   426  				return nil
   427  			}
   428  		}
   429  	default:
   430  		panic(fmt.Sprintf("unknown data layer: %T", parent))
   431  	}
   432  	// If the bottom-most layer is larger than our memory cap, persist to disk
   433  	bottom := diff.parent.(*diffLayer)
   434  
   435  	bottom.lock.RLock()
   436  	base := diffToDisk(bottom)
   437  	bottom.lock.RUnlock()
   438  
   439  	t.layers[base.root] = base
   440  	diff.parent = base
   441  	return base
   442  }
   443  
   444  // diffToDisk merges a bottom-most diff into the persistent disk layer underneath
   445  // it. The method will panic if called onto a non-bottom-most diff layer.
   446  //
   447  // The disk layer persistence should be operated in an atomic way. All updates should
   448  // be discarded if the whole transition if not finished.
   449  func diffToDisk(bottom *diffLayer) *diskLayer {
   450  	var (
   451  		base  = bottom.parent.(*diskLayer)
   452  		batch = base.diskdb.NewBatch()
   453  		stats *generatorStats
   454  	)
   455  	// If the disk layer is running a snapshot generator, abort it
   456  	if base.genAbort != nil {
   457  		abort := make(chan *generatorStats)
   458  		base.genAbort <- abort
   459  		stats = <-abort
   460  	}
   461  	// Put the deletion in the batch writer, flush all updates in the final step.
   462  	rawdb.DeleteSnapshotRoot(batch)
   463  
   464  	// Mark the original base as stale as we're going to create a new wrapper
   465  	base.lock.Lock()
   466  	if base.stale {
   467  		panic("parent disk layer is stale") // we've committed into the same base from two children, boo
   468  	}
   469  	base.stale = true
   470  	base.lock.Unlock()
   471  
   472  	// Destroy all the destructed accounts from the database
   473  	for hash := range bottom.destructSet {
   474  		// Skip any account not covered yet by the snapshot
   475  		if base.genMarker != nil && bytes.Compare(hash[:], base.genMarker) > 0 {
   476  			continue
   477  		}
   478  		// Remove all storage slots
   479  		rawdb.DeleteAccountSnapshot(batch, hash)
   480  		base.cache.Set(hash[:], nil)
   481  
   482  		it := rawdb.IterateStorageSnapshots(base.diskdb, hash)
   483  		for it.Next() {
   484  			if key := it.Key(); len(key) == 65 { // TODO(karalabe): Yuck, we should move this into the iterator
   485  				batch.Delete(key)
   486  				base.cache.Del(key[1:])
   487  
   488  				snapshotFlushStorageItemMeter.Mark(1)
   489  			}
   490  		}
   491  		it.Release()
   492  	}
   493  	// Push all updated accounts into the database
   494  	for hash, data := range bottom.accountData {
   495  		// Skip any account not covered yet by the snapshot
   496  		if base.genMarker != nil && bytes.Compare(hash[:], base.genMarker) > 0 {
   497  			continue
   498  		}
   499  		// Push the account to disk
   500  		rawdb.WriteAccountSnapshot(batch, hash, data)
   501  		base.cache.Set(hash[:], data)
   502  		snapshotCleanAccountWriteMeter.Mark(int64(len(data)))
   503  
   504  		snapshotFlushAccountItemMeter.Mark(1)
   505  		snapshotFlushAccountSizeMeter.Mark(int64(len(data)))
   506  	}
   507  	// Push all the storage slots into the database
   508  	for accountHash, storage := range bottom.storageData {
   509  		// Skip any account not covered yet by the snapshot
   510  		if base.genMarker != nil && bytes.Compare(accountHash[:], base.genMarker) > 0 {
   511  			continue
   512  		}
   513  		// Generation might be mid-account, track that case too
   514  		midAccount := base.genMarker != nil && bytes.Equal(accountHash[:], base.genMarker[:common.HashLength])
   515  
   516  		for storageHash, data := range storage {
   517  			// Skip any slot not covered yet by the snapshot
   518  			if midAccount && bytes.Compare(storageHash[:], base.genMarker[common.HashLength:]) > 0 {
   519  				continue
   520  			}
   521  			if len(data) > 0 {
   522  				rawdb.WriteStorageSnapshot(batch, accountHash, storageHash, data)
   523  				base.cache.Set(append(accountHash[:], storageHash[:]...), data)
   524  				snapshotCleanStorageWriteMeter.Mark(int64(len(data)))
   525  			} else {
   526  				rawdb.DeleteStorageSnapshot(batch, accountHash, storageHash)
   527  				base.cache.Set(append(accountHash[:], storageHash[:]...), nil)
   528  			}
   529  			snapshotFlushStorageItemMeter.Mark(1)
   530  			snapshotFlushStorageSizeMeter.Mark(int64(len(data)))
   531  		}
   532  	}
   533  	// Update the snapshot block marker and write any remainder data
   534  	rawdb.WriteSnapshotRoot(batch, bottom.root)
   535  
   536  	// Write out the generator progress marker and report
   537  	journalProgress(batch, base.genMarker, stats)
   538  
   539  	// Flush all the updates in the single db operation. Ensure the
   540  	// disk layer transition is atomic.
   541  	if err := batch.Write(); err != nil {
   542  		log.Crit("Failed to write leftover snapshot", "err", err)
   543  	}
   544  	log.Debug("Journalled disk layer", "root", bottom.root, "complete", base.genMarker == nil)
   545  	res := &diskLayer{
   546  		root:       bottom.root,
   547  		cache:      base.cache,
   548  		diskdb:     base.diskdb,
   549  		triedb:     base.triedb,
   550  		genMarker:  base.genMarker,
   551  		genPending: base.genPending,
   552  	}
   553  	// If snapshot generation hasn't finished yet, port over all the starts and
   554  	// continue where the previous round left off.
   555  	//
   556  	// Note, the `base.genAbort` comparison is not used normally, it's checked
   557  	// to allow the tests to play with the marker without triggering this path.
   558  	if base.genMarker != nil && base.genAbort != nil {
   559  		res.genMarker = base.genMarker
   560  		res.genAbort = make(chan chan *generatorStats)
   561  		go res.generate(stats)
   562  	}
   563  	return res
   564  }
   565  
   566  // Journal commits an entire diff hierarchy to disk into a single journal entry.
   567  // This is meant to be used during shutdown to persist the snapshot without
   568  // flattening everything down (bad for reorgs).
   569  //
   570  // The method returns the root hash of the base layer that needs to be persisted
   571  // to disk as a trie too to allow continuing any pending generation op.
   572  func (t *Tree) Journal(root common.Hash) (common.Hash, error) {
   573  	// Retrieve the head snapshot to journal from var snap snapshot
   574  	snap := t.Snapshot(root)
   575  	if snap == nil {
   576  		return common.Hash{}, fmt.Errorf("snapshot [%#x] missing", root)
   577  	}
   578  	// Run the journaling
   579  	t.lock.Lock()
   580  	defer t.lock.Unlock()
   581  
   582  	// Firstly write out the metadata of journal
   583  	journal := new(bytes.Buffer)
   584  	if err := rlp.Encode(journal, journalVersion); err != nil {
   585  		return common.Hash{}, err
   586  	}
   587  	diskroot := t.diskRoot()
   588  	if diskroot == (common.Hash{}) {
   589  		return common.Hash{}, errors.New("invalid disk root")
   590  	}
   591  	// Secondly write out the disk layer root, ensure the
   592  	// diff journal is continuous with disk.
   593  	if err := rlp.Encode(journal, diskroot); err != nil {
   594  		return common.Hash{}, err
   595  	}
   596  	// Finally write out the journal of each layer in reverse order.
   597  	base, err := snap.(snapshot).Journal(journal)
   598  	if err != nil {
   599  		return common.Hash{}, err
   600  	}
   601  	// Store the journal into the database and return
   602  	rawdb.WriteSnapshotJournal(t.diskdb, journal.Bytes())
   603  	return base, nil
   604  }
   605  
   606  // LegacyJournal is basically identical to Journal. it's the legacy
   607  // version for flushing legacy journal. Now the only purpose of this
   608  // function is for testing.
   609  func (t *Tree) LegacyJournal(root common.Hash) (common.Hash, error) {
   610  	// Retrieve the head snapshot to journal from var snap snapshot
   611  	snap := t.Snapshot(root)
   612  	if snap == nil {
   613  		return common.Hash{}, fmt.Errorf("snapshot [%#x] missing", root)
   614  	}
   615  	// Run the journaling
   616  	t.lock.Lock()
   617  	defer t.lock.Unlock()
   618  
   619  	journal := new(bytes.Buffer)
   620  	base, err := snap.(snapshot).LegacyJournal(journal)
   621  	if err != nil {
   622  		return common.Hash{}, err
   623  	}
   624  	// Store the journal into the database and return
   625  	rawdb.WriteSnapshotJournal(t.diskdb, journal.Bytes())
   626  	return base, nil
   627  }
   628  
   629  // Rebuild wipes all available snapshot data from the persistent database and
   630  // discard all caches and diff layers. Afterwards, it starts a new snapshot
   631  // generator with the given root hash.
   632  func (t *Tree) Rebuild(root common.Hash) {
   633  	t.lock.Lock()
   634  	defer t.lock.Unlock()
   635  
   636  	// Firstly delete any recovery flag in the database. Because now we are
   637  	// building a brand new snapshot.
   638  	rawdb.DeleteSnapshotRecoveryNumber(t.diskdb)
   639  
   640  	// Track whether there's a wipe currently running and keep it alive if so
   641  	var wiper chan struct{}
   642  
   643  	// Iterate over and mark all layers stale
   644  	for _, layer := range t.layers {
   645  		switch layer := layer.(type) {
   646  		case *diskLayer:
   647  			// If the base layer is generating, abort it and save
   648  			if layer.genAbort != nil {
   649  				abort := make(chan *generatorStats)
   650  				layer.genAbort <- abort
   651  
   652  				if stats := <-abort; stats != nil {
   653  					wiper = stats.wiping
   654  				}
   655  			}
   656  			// Layer should be inactive now, mark it as stale
   657  			layer.lock.Lock()
   658  			layer.stale = true
   659  			layer.lock.Unlock()
   660  
   661  		case *diffLayer:
   662  			// If the layer is a simple diff, simply mark as stale
   663  			layer.lock.Lock()
   664  			atomic.StoreUint32(&layer.stale, 1)
   665  			layer.lock.Unlock()
   666  
   667  		default:
   668  			panic(fmt.Sprintf("unknown layer type: %T", layer))
   669  		}
   670  	}
   671  	// Start generating a new snapshot from scratch on a background thread. The
   672  	// generator will run a wiper first if there's not one running right now.
   673  	log.Info("Rebuilding state snapshot")
   674  	t.layers = map[common.Hash]snapshot{
   675  		root: generateSnapshot(t.diskdb, t.triedb, t.cache, root, wiper),
   676  	}
   677  }
   678  
   679  // AccountIterator creates a new account iterator for the specified root hash and
   680  // seeks to a starting account hash.
   681  func (t *Tree) AccountIterator(root common.Hash, seek common.Hash) (AccountIterator, error) {
   682  	ok, err := t.generating()
   683  	if err != nil {
   684  		return nil, err
   685  	}
   686  	if ok {
   687  		return nil, ErrNotConstructed
   688  	}
   689  	return newFastAccountIterator(t, root, seek)
   690  }
   691  
   692  // StorageIterator creates a new storage iterator for the specified root hash and
   693  // account. The iterator will be move to the specific start position.
   694  func (t *Tree) StorageIterator(root common.Hash, account common.Hash, seek common.Hash) (StorageIterator, error) {
   695  	ok, err := t.generating()
   696  	if err != nil {
   697  		return nil, err
   698  	}
   699  	if ok {
   700  		return nil, ErrNotConstructed
   701  	}
   702  	return newFastStorageIterator(t, root, account, seek)
   703  }
   704  
   705  // Verify iterates the whole state(all the accounts as well as the corresponding storages)
   706  // with the specific root and compares the re-computed hash with the original one.
   707  func (t *Tree) Verify(root common.Hash) error {
   708  	acctIt, err := t.AccountIterator(root, common.Hash{})
   709  	if err != nil {
   710  		return err
   711  	}
   712  	defer acctIt.Release()
   713  
   714  	got, err := generateTrieRoot(nil, acctIt, common.Hash{}, stackTrieGenerate, func(db ethdb.KeyValueWriter, accountHash, codeHash common.Hash, stat *generateStats) (common.Hash, error) {
   715  		storageIt, err := t.StorageIterator(root, accountHash, common.Hash{})
   716  		if err != nil {
   717  			return common.Hash{}, err
   718  		}
   719  		defer storageIt.Release()
   720  
   721  		hash, err := generateTrieRoot(nil, storageIt, accountHash, stackTrieGenerate, nil, stat, false)
   722  		if err != nil {
   723  			return common.Hash{}, err
   724  		}
   725  		return hash, nil
   726  	}, newGenerateStats(), true)
   727  
   728  	if err != nil {
   729  		return err
   730  	}
   731  	if got != root {
   732  		return fmt.Errorf("state root hash mismatch: got %x, want %x", got, root)
   733  	}
   734  	return nil
   735  }
   736  
   737  // disklayer is an internal helper function to return the disk layer.
   738  // The lock of snapTree is assumed to be held already.
   739  func (t *Tree) disklayer() *diskLayer {
   740  	var snap snapshot
   741  	for _, s := range t.layers {
   742  		snap = s
   743  		break
   744  	}
   745  	if snap == nil {
   746  		return nil
   747  	}
   748  	switch layer := snap.(type) {
   749  	case *diskLayer:
   750  		return layer
   751  	case *diffLayer:
   752  		return layer.origin
   753  	default:
   754  		panic(fmt.Sprintf("%T: undefined layer", snap))
   755  	}
   756  }
   757  
   758  // diskRoot is a internal helper function to return the disk layer root.
   759  // The lock of snapTree is assumed to be held already.
   760  func (t *Tree) diskRoot() common.Hash {
   761  	disklayer := t.disklayer()
   762  	if disklayer == nil {
   763  		return common.Hash{}
   764  	}
   765  	return disklayer.Root()
   766  }
   767  
   768  // generating is an internal helper function which reports whether the snapshot
   769  // is still under the construction.
   770  func (t *Tree) generating() (bool, error) {
   771  	t.lock.Lock()
   772  	defer t.lock.Unlock()
   773  
   774  	layer := t.disklayer()
   775  	if layer == nil {
   776  		return false, errors.New("disk layer is missing")
   777  	}
   778  	layer.lock.RLock()
   779  	defer layer.lock.RUnlock()
   780  	return layer.genMarker != nil, nil
   781  }
   782  
   783  // diskRoot is a external helper function to return the disk layer root.
   784  func (t *Tree) DiskRoot() common.Hash {
   785  	t.lock.Lock()
   786  	defer t.lock.Unlock()
   787  
   788  	return t.diskRoot()
   789  }