github.com/klaytn/klaytn@v1.10.2/snapshot/snapshot.go

github.com/klaytn/klaytn@v1.10.2/snapshot/snapshot.go (about)

     1  // Modifications Copyright 2021 The klaytn Authors
     2  // Copyright 2020 The go-ethereum Authors
     3  // This file is part of go-ethereum.
     4  //
     5  // go-ethereum is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // go-ethereum is distributed in the hope that it will be useful,
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    13  // GNU General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU General Public License
    16  // along with go-ethereum. If not, see <http://www.gnu.org/licenses/>.
    17  //
    18  // This file is derived from core/state/snapshot/snapshot.go (2021/10/21).
    19  // Modified and improved for the klaytn development.
    20  
    21  package snapshot
    22  
    23  import (
    24  	"bytes"
    25  	"errors"
    26  	"fmt"
    27  	"sync"
    28  	"sync/atomic"
    29  
    30  	"github.com/klaytn/klaytn/blockchain/types/account"
    31  	"github.com/klaytn/klaytn/common"
    32  	"github.com/klaytn/klaytn/crypto"
    33  	"github.com/klaytn/klaytn/log"
    34  	"github.com/klaytn/klaytn/rlp"
    35  	"github.com/klaytn/klaytn/storage/database"
    36  	"github.com/klaytn/klaytn/storage/statedb"
    37  	"github.com/rcrowley/go-metrics"
    38  )
    39  
    40  var logger = log.NewModuleLogger(log.Snapshot)
    41  
    42  var (
    43  	// emptyRoot is the known root hash of an empty trie.
    44  	emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421")
    45  
    46  	// emptyCode is the known hash of the empty EVM bytecode.
    47  	emptyCode = crypto.Keccak256Hash(nil)
    48  )
    49  
    50  var (
    51  	snapshotCleanAccountHitMeter   = metrics.NewRegisteredMeter("state/snapshot/clean/account/hit", nil)
    52  	snapshotCleanAccountMissMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/account/miss", nil)
    53  	snapshotCleanAccountInexMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/account/inex", nil)
    54  	snapshotCleanAccountReadMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/account/read", nil)
    55  	snapshotCleanAccountWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/write", nil)
    56  
    57  	snapshotCleanStorageHitMeter   = metrics.NewRegisteredMeter("state/snapshot/clean/storage/hit", nil)
    58  	snapshotCleanStorageMissMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/storage/miss", nil)
    59  	snapshotCleanStorageInexMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/storage/inex", nil)
    60  	snapshotCleanStorageReadMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/storage/read", nil)
    61  	snapshotCleanStorageWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/write", nil)
    62  
    63  	snapshotDirtyAccountHitMeter   = metrics.NewRegisteredMeter("state/snapshot/dirty/account/hit", nil)
    64  	snapshotDirtyAccountMissMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/account/miss", nil)
    65  	snapshotDirtyAccountInexMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/account/inex", nil)
    66  	snapshotDirtyAccountReadMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/account/read", nil)
    67  	snapshotDirtyAccountWriteMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/write", nil)
    68  
    69  	snapshotDirtyStorageHitMeter   = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/hit", nil)
    70  	snapshotDirtyStorageMissMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/miss", nil)
    71  	snapshotDirtyStorageInexMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/inex", nil)
    72  	snapshotDirtyStorageReadMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/read", nil)
    73  	snapshotDirtyStorageWriteMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/write", nil)
    74  
    75  	snapshotDirtyAccountHitDepthHist = metrics.NewRegisteredHistogram("state/snapshot/dirty/account/hit/depth", nil, metrics.NewExpDecaySample(1028, 0.015))
    76  	snapshotDirtyStorageHitDepthHist = metrics.NewRegisteredHistogram("state/snapshot/dirty/storage/hit/depth", nil, metrics.NewExpDecaySample(1028, 0.015))
    77  
    78  	snapshotFlushAccountItemMeter = metrics.NewRegisteredMeter("state/snapshot/flush/account/item", nil)
    79  	snapshotFlushAccountSizeMeter = metrics.NewRegisteredMeter("state/snapshot/flush/account/size", nil)
    80  	snapshotFlushStorageItemMeter = metrics.NewRegisteredMeter("state/snapshot/flush/storage/item", nil)
    81  	snapshotFlushStorageSizeMeter = metrics.NewRegisteredMeter("state/snapshot/flush/storage/size", nil)
    82  
    83  	// TODO-Klaytn-Snapshot update snapshotBloomIndexTimer
    84  	// snapshotBloomIndexTimer = metrics.NewRegisteredResettingTimer("state/snapshot/bloom/index", nil)
    85  	snapshotBloomErrorGauge = metrics.NewRegisteredGaugeFloat64("state/snapshot/bloom/error", nil)
    86  
    87  	snapshotBloomAccountTrueHitMeter  = metrics.NewRegisteredMeter("state/snapshot/bloom/account/truehit", nil)
    88  	snapshotBloomAccountFalseHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/account/falsehit", nil)
    89  	snapshotBloomAccountMissMeter     = metrics.NewRegisteredMeter("state/snapshot/bloom/account/miss", nil)
    90  
    91  	snapshotBloomStorageTrueHitMeter  = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/truehit", nil)
    92  	snapshotBloomStorageFalseHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/falsehit", nil)
    93  	snapshotBloomStorageMissMeter     = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/miss", nil)
    94  
    95  	// ErrSnapshotStale is returned from data accessors if the underlying snapshot
    96  	// layer had been invalidated due to the chain progressing forward far enough
    97  	// to not maintain the layer's original state.
    98  	ErrSnapshotStale = errors.New("snapshot stale")
    99  
   100  	// ErrNotCoveredYet is returned from data accessors if the underlying snapshot
   101  	// is being generated currently and the requested data item is not yet in the
   102  	// range of accounts covered.
   103  	ErrNotCoveredYet = errors.New("not covered yet")
   104  
   105  	// ErrNotConstructed is returned if the callers want to iterate the snapshot
   106  	// while the generation is not finished yet.
   107  	ErrNotConstructed = errors.New("snapshot is not constructed")
   108  
   109  	// errSnapshotCycle is returned if a snapshot is attempted to be inserted
   110  	// that forms a cycle in the snapshot tree.
   111  	errSnapshotCycle = errors.New("snapshot cycle")
   112  )
   113  
   114  // Snapshot represents the functionality supported by a snapshot storage layer.
   115  type Snapshot interface {
   116  	// Root returns the root hash for which this snapshot was made.
   117  	Root() common.Hash
   118  
   119  	// Account directly retrieves the account associated with a particular hash in
   120  	// the snapshot slim data format.
   121  	Account(hash common.Hash) (account.Account, error)
   122  
   123  	// AccountRLP directly retrieves the account RLP associated with a particular
   124  	// hash in the snapshot slim data format.
   125  	AccountRLP(hash common.Hash) ([]byte, error)
   126  
   127  	// Storage directly retrieves the storage data associated with a particular hash,
   128  	// within a particular account.
   129  	Storage(accountHash, storageHash common.Hash) ([]byte, error)
   130  }
   131  
   132  // snapshot is the internal version of the snapshot data layer that supports some
   133  // additional methods compared to the public API.
   134  type snapshot interface {
   135  	Snapshot
   136  
   137  	// Parent returns the subsequent layer of a snapshot, or nil if the base was
   138  	// reached.
   139  	//
   140  	// Note, the method is an internal helper to avoid type switching between the
   141  	// disk and diff layers. There is no locking involved.
   142  	Parent() snapshot
   143  
   144  	// Update creates a new layer on top of the existing snapshot diff tree with
   145  	// the specified data items.
   146  	//
   147  	// Note, the maps are retained by the method to avoid copying everything.
   148  	Update(blockRoot common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer
   149  
   150  	// Journal commits an entire diff hierarchy to disk into a single journal entry.
   151  	// This is meant to be used during shutdown to persist the snapshot without
   152  	// flattening everything down (bad for reorgs).
   153  	Journal(buffer *bytes.Buffer) (common.Hash, error)
   154  
   155  	// Stale return whether this layer has become stale (was flattened across) or
   156  	// if it's still live.
   157  	Stale() bool
   158  
   159  	// AccountIterator creates an account iterator over an arbitrary layer.
   160  	AccountIterator(seek common.Hash) AccountIterator
   161  
   162  	// StorageIterator creates a storage iterator over an arbitrary layer.
   163  	StorageIterator(account common.Hash, seek common.Hash) (StorageIterator, bool)
   164  }
   165  
   166  // Tree is an Ethereum state snapshot tree. It consists of one persistent base
   167  // layer backed by a key-value store, on top of which arbitrarily many in-memory
   168  // diff layers are topped. The memory diffs can form a tree with branching, but
   169  // the disk layer is singleton and common to all. If a reorg goes deeper than the
   170  // disk layer, everything needs to be deleted.
   171  //
   172  // The goal of a state snapshot is twofold: to allow direct access to account and
   173  // storage data to avoid expensive multi-level trie lookups; and to allow sorted,
   174  // cheap iteration of the account/storage tries for sync aid.
   175  type Tree struct {
   176  	diskdb database.DBManager       // Persistent database to store the snapshot
   177  	triedb *statedb.Database        // In-memory cache to access the trie through
   178  	cache  int                      // Megabytes permitted to use for read caches
   179  	layers map[common.Hash]snapshot // Collection of all known layers
   180  	lock   sync.RWMutex
   181  
   182  	// Test hooks
   183  	onFlatten func() // Hook invoked when the bottom most diff layers are flattened
   184  }
   185  
   186  // New attempts to load an already existing snapshot from a persistent key-value
   187  // store (with a number of memory layers from a journal), ensuring that the head
   188  // of the snapshot matches the expected one.
   189  //
   190  // If the snapshot is missing or the disk layer is broken, the snapshot will be
   191  // reconstructed using both the existing data and the state trie.
   192  // The repair happens on a background thread.
   193  //
   194  // If the memory layers in the journal do not match the disk layer (e.g. there is
   195  // a gap) or the journal is missing, there are two repair cases:
   196  //
   197  // - if the 'recovery' parameter is true, all memory diff-layers will be discarded.
   198  //   This case happens when the snapshot is 'ahead' of the state trie.
   199  // - otherwise, the entire snapshot is considered invalid and will be recreated on
   200  //   a background thread.
   201  func New(diskdb database.DBManager, triedb *statedb.Database, cache int, root common.Hash, async bool, rebuild bool, recovery bool) (*Tree, error) {
   202  	// Create a new, empty snapshot tree
   203  	snap := &Tree{
   204  		diskdb: diskdb,
   205  		triedb: triedb,
   206  		cache:  cache,
   207  		layers: make(map[common.Hash]snapshot),
   208  	}
   209  	if !async {
   210  		defer snap.waitBuild()
   211  	}
   212  	// Attempt to load a previously persisted snapshot and rebuild one if failed
   213  	head, disabled, err := loadSnapshot(diskdb, triedb, cache, root, recovery)
   214  	if disabled {
   215  		logger.Warn("Snapshot maintenance disabled (syncing)")
   216  		return snap, nil
   217  	}
   218  	if err != nil {
   219  		if rebuild {
   220  			logger.Warn("Failed to load snapshot, regenerating", "err", err)
   221  			snap.Rebuild(root)
   222  			return snap, nil
   223  		}
   224  		return nil, err // Bail out the error, don't rebuild automatically.
   225  	}
   226  	// Existing snapshot loaded, seed all the layers
   227  	for head != nil {
   228  		snap.layers[head.Root()] = head
   229  		head = head.Parent()
   230  	}
   231  	return snap, nil
   232  }
   233  
   234  // waitBuild blocks until the snapshot finishes rebuilding. This method is meant
   235  // to be used by tests to ensure we're testing what we believe we are.
   236  func (t *Tree) waitBuild() {
   237  	// Find the rebuild termination channel
   238  	var done chan struct{}
   239  
   240  	t.lock.RLock()
   241  	for _, layer := range t.layers {
   242  		if layer, ok := layer.(*diskLayer); ok {
   243  			done = layer.genPending
   244  			break
   245  		}
   246  	}
   247  	t.lock.RUnlock()
   248  
   249  	// Wait until the snapshot is generated
   250  	if done != nil {
   251  		<-done
   252  	}
   253  }
   254  
   255  // Disable interrupts any pending snapshot generator, deletes all the snapshot
   256  // layers in memory and marks snapshots disabled globally. In order to resume
   257  // the snapshot functionality, the caller must invoke Rebuild.
   258  func (t *Tree) Disable() {
   259  	// Interrupt any live snapshot layers
   260  	t.lock.Lock()
   261  	defer t.lock.Unlock()
   262  
   263  	for _, layer := range t.layers {
   264  		switch layer := layer.(type) {
   265  		case *diskLayer:
   266  			// If the base layer is generating, abort it
   267  			if layer.genAbort != nil {
   268  				abort := make(chan *generatorStats)
   269  				layer.genAbort <- abort
   270  				<-abort
   271  			}
   272  			// Layer should be inactive now, mark it as stale
   273  			layer.lock.Lock()
   274  			layer.stale = true
   275  			layer.lock.Unlock()
   276  
   277  		case *diffLayer:
   278  			// If the layer is a simple diff, simply mark as stale
   279  			layer.lock.Lock()
   280  			atomic.StoreUint32(&layer.stale, 1)
   281  			layer.lock.Unlock()
   282  
   283  		default:
   284  			panic(fmt.Sprintf("unknown layer type: %T", layer))
   285  		}
   286  	}
   287  	t.layers = map[common.Hash]snapshot{}
   288  
   289  	// Delete all snapshot liveness information from the database
   290  	batch := t.diskdb.NewSnapshotDBBatch()
   291  
   292  	batch.WriteSnapshotDisabled()
   293  	batch.DeleteSnapshotRoot()
   294  	batch.DeleteSnapshotJournal()
   295  	batch.DeleteSnapshotGenerator()
   296  	batch.DeleteSnapshotRecoveryNumber()
   297  	// Note, we don't delete the sync progress
   298  
   299  	if err := batch.Write(); err != nil {
   300  		logger.Crit("Failed to disable snapshots", "err", err)
   301  	}
   302  }
   303  
   304  // Snapshot retrieves a snapshot belonging to the given block root, or nil if no
   305  // snapshot is maintained for that block.
   306  func (t *Tree) Snapshot(blockRoot common.Hash) Snapshot {
   307  	t.lock.RLock()
   308  	defer t.lock.RUnlock()
   309  
   310  	return t.layers[blockRoot]
   311  }
   312  
   313  // Snapshots returns all visited layers from the topmost layer with specific
   314  // root and traverses downward. The layer amount is limited by the given number.
   315  // If nodisk is set, then disk layer is excluded.
   316  func (t *Tree) Snapshots(root common.Hash, limits int, nodisk bool) []Snapshot {
   317  	t.lock.RLock()
   318  	defer t.lock.RUnlock()
   319  
   320  	if limits == 0 {
   321  		return nil
   322  	}
   323  	layer := t.layers[root]
   324  	if layer == nil {
   325  		return nil
   326  	}
   327  	var ret []Snapshot
   328  	for {
   329  		if _, isdisk := layer.(*diskLayer); isdisk && nodisk {
   330  			break
   331  		}
   332  		ret = append(ret, layer)
   333  		limits -= 1
   334  		if limits == 0 {
   335  			break
   336  		}
   337  		parent := layer.Parent()
   338  		if parent == nil {
   339  			break
   340  		}
   341  		layer = parent
   342  	}
   343  	return ret
   344  }
   345  
   346  // Update adds a new snapshot into the tree, if that can be linked to an existing
   347  // old parent. It is disallowed to insert a disk layer (the origin of all).
   348  func (t *Tree) Update(blockRoot common.Hash, parentRoot common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) error {
   349  	// Reject noop updates to avoid self-loops in the snapshot tree. This is a
   350  	// special case that can only happen for Clique networks where empty blocks
   351  	// don't modify the state (0 block subsidy).
   352  	//
   353  	// Although we could silently ignore this internally, it should be the caller's
   354  	// responsibility to avoid even attempting to insert such a snapshot.
   355  	if blockRoot == parentRoot {
   356  		return errSnapshotCycle
   357  	}
   358  	// Generate a new snapshot on top of the parent
   359  	parent := t.Snapshot(parentRoot)
   360  	if parent == nil {
   361  		return fmt.Errorf("parent [%#x] snapshot missing", parentRoot)
   362  	}
   363  	snap := parent.(snapshot).Update(blockRoot, destructs, accounts, storage)
   364  
   365  	// Save the new snapshot for later
   366  	t.lock.Lock()
   367  	defer t.lock.Unlock()
   368  
   369  	t.layers[snap.root] = snap
   370  	return nil
   371  }
   372  
   373  // Cap traverses downwards the snapshot tree from a head block hash until the
   374  // number of allowed layers are crossed. All layers beyond the permitted number
   375  // are flattened downwards.
   376  //
   377  // Note, the final diff layer count in general will be one more than the amount
   378  // requested. This happens because the bottom-most diff layer is the accumulator
   379  // which may or may not overflow and cascade to disk. Since this last layer's
   380  // survival is only known *after* capping, we need to omit it from the count if
   381  // we want to ensure that *at least* the requested number of diff layers remain.
   382  func (t *Tree) Cap(root common.Hash, layers int) error {
   383  	// Retrieve the head snapshot to cap from
   384  	snap := t.Snapshot(root)
   385  	if snap == nil {
   386  		return fmt.Errorf("snapshot [%#x] missing", root)
   387  	}
   388  	diff, ok := snap.(*diffLayer)
   389  	if !ok {
   390  		return fmt.Errorf("snapshot [%#x] is disk layer", root)
   391  	}
   392  	// If the generator is still running, use a more aggressive cap
   393  	diff.origin.lock.RLock()
   394  	if diff.origin.genMarker != nil && layers > 8 {
   395  		layers = 8
   396  	}
   397  	diff.origin.lock.RUnlock()
   398  
   399  	// Run the internal capping and discard all stale layers
   400  	t.lock.Lock()
   401  	defer t.lock.Unlock()
   402  
   403  	// Flattening the bottom-most diff layer requires special casing since there's
   404  	// no child to rewire to the grandparent. In that case we can fake a temporary
   405  	// child for the capping and then remove it.
   406  	if layers == 0 {
   407  		// If full commit was requested, flatten the diffs and merge onto disk
   408  		diff.lock.RLock()
   409  		base := diffToDisk(diff.flatten().(*diffLayer))
   410  		diff.lock.RUnlock()
   411  
   412  		// Replace the entire snapshot tree with the flat base
   413  		t.layers = map[common.Hash]snapshot{base.root: base}
   414  		return nil
   415  	}
   416  	persisted := t.cap(diff, layers)
   417  
   418  	// Remove any layer that is stale or links into a stale layer
   419  	children := make(map[common.Hash][]common.Hash)
   420  	for root, snap := range t.layers {
   421  		if diff, ok := snap.(*diffLayer); ok {
   422  			parent := diff.parent.Root()
   423  			children[parent] = append(children[parent], root)
   424  		}
   425  	}
   426  	var remove func(root common.Hash)
   427  	remove = func(root common.Hash) {
   428  		delete(t.layers, root)
   429  		for _, child := range children[root] {
   430  			remove(child)
   431  		}
   432  		delete(children, root)
   433  	}
   434  	for root, snap := range t.layers {
   435  		if snap.Stale() {
   436  			remove(root)
   437  		}
   438  	}
   439  	// If the disk layer was modified, regenerate all the cumulative blooms
   440  	if persisted != nil {
   441  		var rebloom func(root common.Hash)
   442  		rebloom = func(root common.Hash) {
   443  			if diff, ok := t.layers[root].(*diffLayer); ok {
   444  				diff.rebloom(persisted)
   445  			}
   446  			for _, child := range children[root] {
   447  				rebloom(child)
   448  			}
   449  		}
   450  		rebloom(persisted.root)
   451  	}
   452  	return nil
   453  }
   454  
   455  // cap traverses downwards the diff tree until the number of allowed layers are
   456  // crossed. All diffs beyond the permitted number are flattened downwards. If the
   457  // layer limit is reached, memory cap is also enforced (but not before).
   458  //
   459  // The method returns the new disk layer if diffs were persisted into it.
   460  //
   461  // Note, the final diff layer count in general will be one more than the amount
   462  // requested. This happens because the bottom-most diff layer is the accumulator
   463  // which may or may not overflow and cascade to disk. Since this last layer's
   464  // survival is only known *after* capping, we need to omit it from the count if
   465  // we want to ensure that *at least* the requested number of diff layers remain.
   466  func (t *Tree) cap(diff *diffLayer, layers int) *diskLayer {
   467  	// Dive until we run out of layers or reach the persistent database
   468  	for i := 0; i < layers-1; i++ {
   469  		// If we still have diff layers below, continue down
   470  		if parent, ok := diff.parent.(*diffLayer); ok {
   471  			diff = parent
   472  		} else {
   473  			// Diff stack too shallow, return without modifications
   474  			return nil
   475  		}
   476  	}
   477  	// We're out of layers, flatten anything below, stopping if it's the disk or if
   478  	// the memory limit is not yet exceeded.
   479  	switch parent := diff.parent.(type) {
   480  	case *diskLayer:
   481  		return nil
   482  
   483  	case *diffLayer:
   484  		// Hold the write lock until the flattened parent is linked correctly.
   485  		// Otherwise, the stale layer may be accessed by external reads in the
   486  		// meantime.
   487  		diff.lock.Lock()
   488  		defer diff.lock.Unlock()
   489  
   490  		// Flatten the parent into the grandparent. The flattening internally obtains a
   491  		// write lock on grandparent.
   492  		flattened := parent.flatten().(*diffLayer)
   493  		t.layers[flattened.root] = flattened
   494  
   495  		// Invoke the hook if it's registered. Ugly hack.
   496  		if t.onFlatten != nil {
   497  			t.onFlatten()
   498  		}
   499  		diff.parent = flattened
   500  		if flattened.memory < aggregatorMemoryLimit {
   501  			// Accumulator layer is smaller than the limit, so we can abort, unless
   502  			// there's a snapshot being generated currently. In that case, the trie
   503  			// will move from underneath the generator so we **must** merge all the
   504  			// partial data down into the snapshot and restart the generation.
   505  			if flattened.parent.(*diskLayer).genAbort == nil {
   506  				return nil
   507  			}
   508  		}
   509  	default:
   510  		panic(fmt.Sprintf("unknown data layer: %T", parent))
   511  	}
   512  	// If the bottom-most layer is larger than our memory cap, persist to disk
   513  	bottom := diff.parent.(*diffLayer)
   514  
   515  	bottom.lock.RLock()
   516  	base := diffToDisk(bottom)
   517  	bottom.lock.RUnlock()
   518  
   519  	t.layers[base.root] = base
   520  	diff.parent = base
   521  	return base
   522  }
   523  
   524  // diffToDisk merges a bottom-most diff into the persistent disk layer underneath
   525  // it. The method will panic if called onto a non-bottom-most diff layer.
   526  //
   527  // The disk layer persistence should be operated in an atomic way. All updates should
   528  // be discarded if the whole transition if not finished.
   529  func diffToDisk(bottom *diffLayer) *diskLayer {
   530  	var (
   531  		base  = bottom.parent.(*diskLayer)
   532  		batch = base.diskdb.NewSnapshotDBBatch()
   533  		stats *generatorStats
   534  	)
   535  	// If the disk layer is running a snapshot generator, abort it
   536  	if base.genAbort != nil {
   537  		abort := make(chan *generatorStats)
   538  		base.genAbort <- abort
   539  		stats = <-abort
   540  	}
   541  	// Put the deletion in the batch writer, flush all updates in the final step.
   542  	batch.DeleteSnapshotRoot()
   543  
   544  	// Mark the original base as stale as we're going to create a new wrapper
   545  	base.lock.Lock()
   546  	if base.stale {
   547  		panic("parent disk layer is stale") // we've committed into the same base from two children, boo
   548  	}
   549  	base.stale = true
   550  	base.lock.Unlock()
   551  
   552  	// Destroy all the destructed accounts from the database
   553  	for hash := range bottom.destructSet {
   554  		// Skip any account not covered yet by the snapshot
   555  		if base.genMarker != nil && bytes.Compare(hash[:], base.genMarker) > 0 {
   556  			continue
   557  		}
   558  		// Remove all storage slots
   559  		batch.DeleteAccountSnapshot(hash)
   560  		base.cache.Set(hash[:], nil)
   561  
   562  		it := base.diskdb.NewSnapshotDBIterator(database.StorageSnapshotsKey(hash), nil)
   563  		for it.Next() {
   564  			if key := it.Key(); len(key) == 65 { // TODO(karalabe): Yuck, we should move this into the iterator
   565  				batch.Delete(key)
   566  				base.cache.Del(key[1:])
   567  				snapshotFlushStorageItemMeter.Mark(1)
   568  
   569  				// Ensure we don't delete too much data blindly (contract can be
   570  				// huge). It's ok to flush, the root will go missing in case of a
   571  				// crash and we'll detect and regenerate the snapshot.
   572  				if batch.ValueSize() > database.IdealBatchSize {
   573  					if err := batch.Write(); err != nil {
   574  						logger.Crit("Failed to write storage deletions", "err", err)
   575  					}
   576  					batch.Reset()
   577  				}
   578  			}
   579  		}
   580  		it.Release()
   581  	}
   582  	// Push all updated accounts into the database
   583  	for hash, data := range bottom.accountData {
   584  		// Skip any account not covered yet by the snapshot
   585  		if base.genMarker != nil && bytes.Compare(hash[:], base.genMarker) > 0 {
   586  			continue
   587  		}
   588  		// Push the account to disk
   589  		batch.WriteAccountSnapshot(hash, data)
   590  		base.cache.Set(hash[:], data)
   591  		snapshotCleanAccountWriteMeter.Mark(int64(len(data)))
   592  
   593  		snapshotFlushAccountItemMeter.Mark(1)
   594  		snapshotFlushAccountSizeMeter.Mark(int64(len(data)))
   595  
   596  		// Ensure we don't write too much data blindly. It's ok to flush, the
   597  		// root will go missing in case of a crash and we'll detect and regen
   598  		// the snapshot.
   599  		if batch.ValueSize() > database.IdealBatchSize {
   600  			if err := batch.Write(); err != nil {
   601  				logger.Crit("Failed to write storage deletions", "err", err)
   602  			}
   603  			batch.Reset()
   604  		}
   605  	}
   606  	// Push all the storage slots into the database
   607  	for accountHash, storage := range bottom.storageData {
   608  		// Skip any account not covered yet by the snapshot
   609  		if base.genMarker != nil && bytes.Compare(accountHash[:], base.genMarker) > 0 {
   610  			continue
   611  		}
   612  		// Generation might be mid-account, track that case too
   613  		midAccount := base.genMarker != nil && bytes.Equal(accountHash[:], base.genMarker[:common.HashLength])
   614  
   615  		for storageHash, data := range storage {
   616  			// Skip any slot not covered yet by the snapshot
   617  			if midAccount && bytes.Compare(storageHash[:], base.genMarker[common.HashLength:]) > 0 {
   618  				continue
   619  			}
   620  			if len(data) > 0 {
   621  				batch.WriteStorageSnapshot(accountHash, storageHash, data)
   622  				base.cache.Set(append(accountHash[:], storageHash[:]...), data)
   623  				snapshotCleanStorageWriteMeter.Mark(int64(len(data)))
   624  			} else {
   625  				batch.DeleteStorageSnapshot(accountHash, storageHash)
   626  				base.cache.Set(append(accountHash[:], storageHash[:]...), nil)
   627  			}
   628  			snapshotFlushStorageItemMeter.Mark(1)
   629  			snapshotFlushStorageSizeMeter.Mark(int64(len(data)))
   630  		}
   631  	}
   632  	// Update the snapshot block marker and write any remainder data
   633  	batch.WriteSnapshotRoot(bottom.root)
   634  
   635  	// Write out the generator progress marker and report
   636  	journalProgress(batch, base.genMarker, stats)
   637  
   638  	// Flush all the updates in the single db operation. Ensure the
   639  	// disk layer transition is atomic.
   640  	if err := batch.Write(); err != nil {
   641  		logger.Crit("Failed to write leftover snapshot", "err", err)
   642  	}
   643  	logger.Debug("Journalled disk layer", "root", bottom.root, "complete", base.genMarker == nil)
   644  	res := &diskLayer{
   645  		root:       bottom.root,
   646  		cache:      base.cache,
   647  		diskdb:     base.diskdb,
   648  		triedb:     base.triedb,
   649  		genMarker:  base.genMarker,
   650  		genPending: base.genPending,
   651  	}
   652  	// If snapshot generation hasn't finished yet, port over all the starts and
   653  	// continue where the previous round left off.
   654  	//
   655  	// Note, the `base.genAbort` comparison is not used normally, it's checked
   656  	// to allow the tests to play with the marker without triggering this path.
   657  	if base.genMarker != nil && base.genAbort != nil {
   658  		res.genMarker = base.genMarker
   659  		res.genAbort = make(chan chan *generatorStats)
   660  		go res.generate(stats)
   661  	}
   662  	return res
   663  }
   664  
   665  // Journal commits an entire diff hierarchy to disk into a single journal entry.
   666  // This is meant to be used during shutdown to persist the snapshot without
   667  // flattening everything down (bad for reorgs).
   668  //
   669  // The method returns the root hash of the base layer that needs to be persisted
   670  // to disk as a trie too to allow continuing any pending generation op.
   671  func (t *Tree) Journal(root common.Hash) (common.Hash, error) {
   672  	// Retrieve the head snapshot to journal from var snap snapshot
   673  	snap := t.Snapshot(root)
   674  	if snap == nil {
   675  		return common.Hash{}, fmt.Errorf("snapshot [%#x] missing", root)
   676  	}
   677  	// Run the journaling
   678  	t.lock.Lock()
   679  	defer t.lock.Unlock()
   680  
   681  	// Firstly write out the metadata of journal
   682  	journal := new(bytes.Buffer)
   683  	if err := rlp.Encode(journal, journalVersion); err != nil {
   684  		return common.Hash{}, err
   685  	}
   686  	diskroot := t.diskRoot()
   687  	if diskroot == (common.Hash{}) {
   688  		return common.Hash{}, errors.New("invalid disk root")
   689  	}
   690  	// Secondly write out the disk layer root, ensure the
   691  	// diff journal is continuous with disk.
   692  	if err := rlp.Encode(journal, diskroot); err != nil {
   693  		return common.Hash{}, err
   694  	}
   695  	// Finally write out the journal of each layer in reverse order.
   696  	base, err := snap.(snapshot).Journal(journal)
   697  	if err != nil {
   698  		return common.Hash{}, err
   699  	}
   700  	// Store the journal into the database and return
   701  	t.diskdb.WriteSnapshotJournal(journal.Bytes())
   702  	return base, nil
   703  }
   704  
   705  // Rebuild wipes all available snapshot data from the persistent database and
   706  // discard all caches and diff layers. Afterwards, it starts a new snapshot
   707  // generator with the given root hash.
   708  func (t *Tree) Rebuild(root common.Hash) {
   709  	t.lock.Lock()
   710  	defer t.lock.Unlock()
   711  
   712  	// Firstly delete any recovery flag in the database. Because now we are
   713  	// building a brand new snapshot. Also reenable the snapshot feature.
   714  	t.diskdb.DeleteSnapshotRecoveryNumber()
   715  	t.diskdb.DeleteSnapshotDisabled()
   716  
   717  	// Iterate over and mark all layers stale
   718  	for _, layer := range t.layers {
   719  		switch layer := layer.(type) {
   720  		case *diskLayer:
   721  			// If the base layer is generating, abort it and save
   722  			if layer.genAbort != nil {
   723  				abort := make(chan *generatorStats)
   724  				layer.genAbort <- abort
   725  				<-abort
   726  			}
   727  			// Layer should be inactive now, mark it as stale
   728  			layer.lock.Lock()
   729  			layer.stale = true
   730  			layer.lock.Unlock()
   731  
   732  		case *diffLayer:
   733  			// If the layer is a simple diff, simply mark as stale
   734  			layer.lock.Lock()
   735  			atomic.StoreUint32(&layer.stale, 1)
   736  			layer.lock.Unlock()
   737  
   738  		default:
   739  			panic(fmt.Sprintf("unknown layer type: %T", layer))
   740  		}
   741  	}
   742  	// Start generating a new snapshot from scratch on a background thread. The
   743  	// generator will run a wiper first if there's not one running right now.
   744  	logger.Info("Rebuilding state snapshot")
   745  	t.layers = map[common.Hash]snapshot{
   746  		root: generateSnapshot(t.diskdb, t.triedb, t.cache, root),
   747  	}
   748  }
   749  
   750  // AccountIterator creates a new account iterator for the specified root hash and
   751  // seeks to a starting account hash.
   752  func (t *Tree) AccountIterator(root common.Hash, seek common.Hash) (AccountIterator, error) {
   753  	ok, err := t.generating()
   754  	if err != nil {
   755  		return nil, err
   756  	}
   757  	if ok {
   758  		return nil, ErrNotConstructed
   759  	}
   760  	return newFastAccountIterator(t, root, seek)
   761  }
   762  
   763  // StorageIterator creates a new storage iterator for the specified root hash and
   764  // account. The iterator will be move to the specific start position.
   765  func (t *Tree) StorageIterator(root common.Hash, account common.Hash, seek common.Hash) (StorageIterator, error) {
   766  	ok, err := t.generating()
   767  	if err != nil {
   768  		return nil, err
   769  	}
   770  	if ok {
   771  		return nil, ErrNotConstructed
   772  	}
   773  	return newFastStorageIterator(t, root, account, seek)
   774  }
   775  
   776  // Verify iterates the whole state(all the accounts as well as the corresponding storages)
   777  // with the specific root and compares the re-computed hash with the original one.
   778  func (t *Tree) Verify(root common.Hash) error {
   779  	acctIt, err := t.AccountIterator(root, common.Hash{})
   780  	if err != nil {
   781  		return err
   782  	}
   783  	defer acctIt.Release()
   784  
   785  	got, err := generateTrieRoot(acctIt, common.Hash{}, trieGenerate, func(accountHash, codeHash common.Hash, stat *generateStats) (common.Hash, error) {
   786  		storageIt, err := t.StorageIterator(root, accountHash, common.Hash{})
   787  		if err != nil {
   788  			return common.Hash{}, err
   789  		}
   790  		defer storageIt.Release()
   791  
   792  		hash, err := generateTrieRoot(storageIt, accountHash, trieGenerate, nil, stat, false)
   793  		if err != nil {
   794  			return common.Hash{}, err
   795  		}
   796  		return hash, nil
   797  	}, newGenerateStats(), true)
   798  	if err != nil {
   799  		return err
   800  	}
   801  	if got != root {
   802  		return fmt.Errorf("state root hash mismatch: got %x, want %x", got, root)
   803  	}
   804  	return nil
   805  }
   806  
   807  // disklayer is an internal helper function to return the disk layer.
   808  // The lock of snapTree is assumed to be held already.
   809  func (t *Tree) disklayer() *diskLayer {
   810  	var snap snapshot
   811  	for _, s := range t.layers {
   812  		snap = s
   813  		break
   814  	}
   815  	if snap == nil {
   816  		return nil
   817  	}
   818  	switch layer := snap.(type) {
   819  	case *diskLayer:
   820  		return layer
   821  	case *diffLayer:
   822  		return layer.origin
   823  	default:
   824  		panic(fmt.Sprintf("%T: undefined layer", snap))
   825  	}
   826  }
   827  
   828  // diskRoot is a internal helper function to return the disk layer root.
   829  // The lock of snapTree is assumed to be held already.
   830  func (t *Tree) diskRoot() common.Hash {
   831  	disklayer := t.disklayer()
   832  	if disklayer == nil {
   833  		return common.Hash{}
   834  	}
   835  	return disklayer.Root()
   836  }
   837  
   838  // generating is an internal helper function which reports whether the snapshot
   839  // is still under the construction.
   840  func (t *Tree) generating() (bool, error) {
   841  	t.lock.Lock()
   842  	defer t.lock.Unlock()
   843  
   844  	layer := t.disklayer()
   845  	if layer == nil {
   846  		return false, errors.New("disk layer is missing")
   847  	}
   848  	layer.lock.RLock()
   849  	defer layer.lock.RUnlock()
   850  	return layer.genMarker != nil, nil
   851  }
   852  
   853  // diskRoot is a external helper function to return the disk layer root.
   854  func (t *Tree) DiskRoot() common.Hash {
   855  	t.lock.Lock()
   856  	defer t.lock.Unlock()
   857  
   858  	return t.diskRoot()
   859  }