github.com/ethereum/go-ethereum@v1.16.1/core/state/snapshot/snapshot.go

github.com/ethereum/go-ethereum@v1.16.1/core/state/snapshot/snapshot.go (about)

     1  // Copyright 2019 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  // Package snapshot implements a journalled, dynamic state dump.
    18  package snapshot
    19  
    20  import (
    21  	"bytes"
    22  	"errors"
    23  	"fmt"
    24  	"sync"
    25  
    26  	"github.com/ethereum/go-ethereum/common"
    27  	"github.com/ethereum/go-ethereum/core/rawdb"
    28  	"github.com/ethereum/go-ethereum/core/types"
    29  	"github.com/ethereum/go-ethereum/ethdb"
    30  	"github.com/ethereum/go-ethereum/log"
    31  	"github.com/ethereum/go-ethereum/metrics"
    32  	"github.com/ethereum/go-ethereum/rlp"
    33  	"github.com/ethereum/go-ethereum/triedb"
    34  )
    35  
    36  var (
    37  	snapshotCleanAccountHitMeter   = metrics.NewRegisteredMeter("state/snapshot/clean/account/hit", nil)
    38  	snapshotCleanAccountMissMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/account/miss", nil)
    39  	snapshotCleanAccountInexMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/account/inex", nil)
    40  	snapshotCleanAccountReadMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/account/read", nil)
    41  	snapshotCleanAccountWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/write", nil)
    42  
    43  	snapshotCleanStorageHitMeter   = metrics.NewRegisteredMeter("state/snapshot/clean/storage/hit", nil)
    44  	snapshotCleanStorageMissMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/storage/miss", nil)
    45  	snapshotCleanStorageInexMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/storage/inex", nil)
    46  	snapshotCleanStorageReadMeter  = metrics.NewRegisteredMeter("state/snapshot/clean/storage/read", nil)
    47  	snapshotCleanStorageWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/write", nil)
    48  
    49  	snapshotDirtyAccountHitMeter   = metrics.NewRegisteredMeter("state/snapshot/dirty/account/hit", nil)
    50  	snapshotDirtyAccountMissMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/account/miss", nil)
    51  	snapshotDirtyAccountInexMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/account/inex", nil)
    52  	snapshotDirtyAccountReadMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/account/read", nil)
    53  	snapshotDirtyAccountWriteMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/write", nil)
    54  
    55  	snapshotDirtyStorageHitMeter   = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/hit", nil)
    56  	snapshotDirtyStorageMissMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/miss", nil)
    57  	snapshotDirtyStorageInexMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/inex", nil)
    58  	snapshotDirtyStorageReadMeter  = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/read", nil)
    59  	snapshotDirtyStorageWriteMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/write", nil)
    60  
    61  	snapshotDirtyAccountHitDepthHist = metrics.NewRegisteredHistogram("state/snapshot/dirty/account/hit/depth", nil, metrics.NewExpDecaySample(1028, 0.015))
    62  	snapshotDirtyStorageHitDepthHist = metrics.NewRegisteredHistogram("state/snapshot/dirty/storage/hit/depth", nil, metrics.NewExpDecaySample(1028, 0.015))
    63  
    64  	snapshotFlushAccountItemMeter = metrics.NewRegisteredMeter("state/snapshot/flush/account/item", nil)
    65  	snapshotFlushAccountSizeMeter = metrics.NewRegisteredMeter("state/snapshot/flush/account/size", nil)
    66  	snapshotFlushStorageItemMeter = metrics.NewRegisteredMeter("state/snapshot/flush/storage/item", nil)
    67  	snapshotFlushStorageSizeMeter = metrics.NewRegisteredMeter("state/snapshot/flush/storage/size", nil)
    68  
    69  	snapshotBloomIndexTimer = metrics.NewRegisteredResettingTimer("state/snapshot/bloom/index", nil)
    70  	snapshotBloomErrorGauge = metrics.NewRegisteredGaugeFloat64("state/snapshot/bloom/error", nil)
    71  
    72  	snapshotBloomAccountTrueHitMeter  = metrics.NewRegisteredMeter("state/snapshot/bloom/account/truehit", nil)
    73  	snapshotBloomAccountFalseHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/account/falsehit", nil)
    74  	snapshotBloomAccountMissMeter     = metrics.NewRegisteredMeter("state/snapshot/bloom/account/miss", nil)
    75  
    76  	snapshotBloomStorageTrueHitMeter  = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/truehit", nil)
    77  	snapshotBloomStorageFalseHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/falsehit", nil)
    78  	snapshotBloomStorageMissMeter     = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/miss", nil)
    79  
    80  	// ErrSnapshotStale is returned from data accessors if the underlying snapshot
    81  	// layer had been invalidated due to the chain progressing forward far enough
    82  	// to not maintain the layer's original state.
    83  	ErrSnapshotStale = errors.New("snapshot stale")
    84  
    85  	// ErrNotCoveredYet is returned from data accessors if the underlying snapshot
    86  	// is being generated currently and the requested data item is not yet in the
    87  	// range of accounts covered.
    88  	ErrNotCoveredYet = errors.New("not covered yet")
    89  
    90  	// ErrNotConstructed is returned if the callers want to iterate the snapshot
    91  	// while the generation is not finished yet.
    92  	ErrNotConstructed = errors.New("snapshot is not constructed")
    93  
    94  	// errSnapshotCycle is returned if a snapshot is attempted to be inserted
    95  	// that forms a cycle in the snapshot tree.
    96  	errSnapshotCycle = errors.New("snapshot cycle")
    97  )
    98  
    99  // Snapshot represents the functionality supported by a snapshot storage layer.
   100  type Snapshot interface {
   101  	// Root returns the root hash for which this snapshot was made.
   102  	Root() common.Hash
   103  
   104  	// Account directly retrieves the account associated with a particular hash in
   105  	// the snapshot slim data format.
   106  	Account(hash common.Hash) (*types.SlimAccount, error)
   107  
   108  	// AccountRLP directly retrieves the account RLP associated with a particular
   109  	// hash in the snapshot slim data format.
   110  	AccountRLP(hash common.Hash) ([]byte, error)
   111  
   112  	// Storage directly retrieves the storage data associated with a particular hash,
   113  	// within a particular account.
   114  	Storage(accountHash, storageHash common.Hash) ([]byte, error)
   115  }
   116  
   117  // snapshot is the internal version of the snapshot data layer that supports some
   118  // additional methods compared to the public API.
   119  type snapshot interface {
   120  	Snapshot
   121  
   122  	// Parent returns the subsequent layer of a snapshot, or nil if the base was
   123  	// reached.
   124  	//
   125  	// Note, the method is an internal helper to avoid type switching between the
   126  	// disk and diff layers. There is no locking involved.
   127  	Parent() snapshot
   128  
   129  	// Update creates a new layer on top of the existing snapshot diff tree with
   130  	// the specified data items.
   131  	//
   132  	// Note, the maps are retained by the method to avoid copying everything.
   133  	Update(blockRoot common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer
   134  
   135  	// Journal commits an entire diff hierarchy to disk into a single journal entry.
   136  	// This is meant to be used during shutdown to persist the snapshot without
   137  	// flattening everything down (bad for reorgs).
   138  	Journal(buffer *bytes.Buffer) (common.Hash, error)
   139  
   140  	// Stale return whether this layer has become stale (was flattened across) or
   141  	// if it's still live.
   142  	Stale() bool
   143  
   144  	// AccountIterator creates an account iterator over an arbitrary layer.
   145  	AccountIterator(seek common.Hash) AccountIterator
   146  
   147  	// StorageIterator creates a storage iterator over an arbitrary layer.
   148  	StorageIterator(account common.Hash, seek common.Hash) StorageIterator
   149  }
   150  
   151  // Config includes the configurations for snapshots.
   152  type Config struct {
   153  	CacheSize  int  // Megabytes permitted to use for read caches
   154  	Recovery   bool // Indicator that the snapshots is in the recovery mode
   155  	NoBuild    bool // Indicator that the snapshots generation is disallowed
   156  	AsyncBuild bool // The snapshot generation is allowed to be constructed asynchronously
   157  }
   158  
   159  // Tree is an Ethereum state snapshot tree. It consists of one persistent base
   160  // layer backed by a key-value store, on top of which arbitrarily many in-memory
   161  // diff layers are topped. The memory diffs can form a tree with branching, but
   162  // the disk layer is singleton and common to all. If a reorg goes deeper than the
   163  // disk layer, everything needs to be deleted.
   164  //
   165  // The goal of a state snapshot is twofold: to allow direct access to account and
   166  // storage data to avoid expensive multi-level trie lookups; and to allow sorted,
   167  // cheap iteration of the account/storage tries for sync aid.
   168  type Tree struct {
   169  	config Config                   // Snapshots configurations
   170  	diskdb ethdb.KeyValueStore      // Persistent database to store the snapshot
   171  	triedb *triedb.Database         // In-memory cache to access the trie through
   172  	layers map[common.Hash]snapshot // Collection of all known layers
   173  	lock   sync.RWMutex
   174  
   175  	// Test hooks
   176  	onFlatten func() // Hook invoked when the bottom most diff layers are flattened
   177  }
   178  
   179  // New attempts to load an already existing snapshot from a persistent key-value
   180  // store (with a number of memory layers from a journal), ensuring that the head
   181  // of the snapshot matches the expected one.
   182  //
   183  // If the snapshot is missing or the disk layer is broken, the snapshot will be
   184  // reconstructed using both the existing data and the state trie.
   185  // The repair happens on a background thread.
   186  //
   187  // If the memory layers in the journal do not match the disk layer (e.g. there is
   188  // a gap) or the journal is missing, there are two repair cases:
   189  //
   190  //   - if the 'recovery' parameter is true, memory diff-layers and the disk-layer
   191  //     will all be kept. This case happens when the snapshot is 'ahead' of the
   192  //     state trie.
   193  //   - otherwise, the entire snapshot is considered invalid and will be recreated on
   194  //     a background thread.
   195  func New(config Config, diskdb ethdb.KeyValueStore, triedb *triedb.Database, root common.Hash) (*Tree, error) {
   196  	// Create a new, empty snapshot tree
   197  	snap := &Tree{
   198  		config: config,
   199  		diskdb: diskdb,
   200  		triedb: triedb,
   201  		layers: make(map[common.Hash]snapshot),
   202  	}
   203  	// Attempt to load a previously persisted snapshot and rebuild one if failed
   204  	head, disabled, err := loadSnapshot(diskdb, triedb, root, config.CacheSize, config.Recovery, config.NoBuild)
   205  	if disabled {
   206  		log.Warn("Snapshot maintenance disabled (syncing)")
   207  		return snap, nil
   208  	}
   209  	// Create the building waiter iff the background generation is allowed
   210  	if !config.NoBuild && !config.AsyncBuild {
   211  		defer snap.waitBuild()
   212  	}
   213  	if err != nil {
   214  		log.Warn("Failed to load snapshot", "err", err)
   215  		if !config.NoBuild {
   216  			snap.Rebuild(root)
   217  			return snap, nil
   218  		}
   219  		return nil, err // Bail out the error, don't rebuild automatically.
   220  	}
   221  	// Existing snapshot loaded, seed all the layers
   222  	for head != nil {
   223  		snap.layers[head.Root()] = head
   224  		head = head.Parent()
   225  	}
   226  	return snap, nil
   227  }
   228  
   229  // waitBuild blocks until the snapshot finishes rebuilding. This method is meant
   230  // to be used by tests to ensure we're testing what we believe we are.
   231  func (t *Tree) waitBuild() {
   232  	// Find the rebuild termination channel
   233  	var done chan struct{}
   234  
   235  	t.lock.RLock()
   236  	for _, layer := range t.layers {
   237  		if layer, ok := layer.(*diskLayer); ok {
   238  			done = layer.genPending
   239  			break
   240  		}
   241  	}
   242  	t.lock.RUnlock()
   243  
   244  	// Wait until the snapshot is generated
   245  	if done != nil {
   246  		<-done
   247  	}
   248  }
   249  
   250  // Disable interrupts any pending snapshot generator, deletes all the snapshot
   251  // layers in memory and marks snapshots disabled globally. In order to resume
   252  // the snapshot functionality, the caller must invoke Rebuild.
   253  func (t *Tree) Disable() {
   254  	// Interrupt any live snapshot layers
   255  	t.lock.Lock()
   256  	defer t.lock.Unlock()
   257  
   258  	for _, layer := range t.layers {
   259  		switch layer := layer.(type) {
   260  		case *diskLayer:
   261  			// TODO this function will hang if it's called twice. Will
   262  			// fix it in the following PRs.
   263  			layer.stopGeneration()
   264  			layer.markStale()
   265  			layer.Release()
   266  
   267  		case *diffLayer:
   268  			// If the layer is a simple diff, simply mark as stale
   269  			layer.lock.Lock()
   270  			layer.stale.Store(true)
   271  			layer.lock.Unlock()
   272  
   273  		default:
   274  			panic(fmt.Sprintf("unknown layer type: %T", layer))
   275  		}
   276  	}
   277  	t.layers = map[common.Hash]snapshot{}
   278  
   279  	// Delete all snapshot liveness information from the database
   280  	batch := t.diskdb.NewBatch()
   281  
   282  	rawdb.WriteSnapshotDisabled(batch)
   283  	rawdb.DeleteSnapshotRoot(batch)
   284  	rawdb.DeleteSnapshotJournal(batch)
   285  	rawdb.DeleteSnapshotGenerator(batch)
   286  	rawdb.DeleteSnapshotRecoveryNumber(batch)
   287  	// Note, we don't delete the sync progress
   288  
   289  	if err := batch.Write(); err != nil {
   290  		log.Crit("Failed to disable snapshots", "err", err)
   291  	}
   292  }
   293  
   294  // Snapshot retrieves a snapshot belonging to the given block root, or nil if no
   295  // snapshot is maintained for that block.
   296  func (t *Tree) Snapshot(blockRoot common.Hash) Snapshot {
   297  	t.lock.RLock()
   298  	defer t.lock.RUnlock()
   299  
   300  	return t.layers[blockRoot]
   301  }
   302  
   303  // Snapshots returns all visited layers from the topmost layer with specific
   304  // root and traverses downward. The layer amount is limited by the given number.
   305  // If nodisk is set, then disk layer is excluded.
   306  func (t *Tree) Snapshots(root common.Hash, limits int, nodisk bool) []Snapshot {
   307  	t.lock.RLock()
   308  	defer t.lock.RUnlock()
   309  
   310  	if limits == 0 {
   311  		return nil
   312  	}
   313  	layer := t.layers[root]
   314  	if layer == nil {
   315  		return nil
   316  	}
   317  	var ret []Snapshot
   318  	for {
   319  		if _, isdisk := layer.(*diskLayer); isdisk && nodisk {
   320  			break
   321  		}
   322  		ret = append(ret, layer)
   323  		limits -= 1
   324  		if limits == 0 {
   325  			break
   326  		}
   327  		parent := layer.Parent()
   328  		if parent == nil {
   329  			break
   330  		}
   331  		layer = parent
   332  	}
   333  	return ret
   334  }
   335  
   336  // Update adds a new snapshot into the tree, if that can be linked to an existing
   337  // old parent. It is disallowed to insert a disk layer (the origin of all).
   338  func (t *Tree) Update(blockRoot common.Hash, parentRoot common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) error {
   339  	// Reject noop updates to avoid self-loops in the snapshot tree. This is a
   340  	// special case that can only happen for Clique networks where empty blocks
   341  	// don't modify the state (0 block subsidy).
   342  	//
   343  	// Although we could silently ignore this internally, it should be the caller's
   344  	// responsibility to avoid even attempting to insert such a snapshot.
   345  	if blockRoot == parentRoot {
   346  		return errSnapshotCycle
   347  	}
   348  	// Generate a new snapshot on top of the parent
   349  	parent := t.Snapshot(parentRoot)
   350  	if parent == nil {
   351  		return fmt.Errorf("parent [%#x] snapshot missing", parentRoot)
   352  	}
   353  	snap := parent.(snapshot).Update(blockRoot, accounts, storage)
   354  
   355  	// Save the new snapshot for later
   356  	t.lock.Lock()
   357  	defer t.lock.Unlock()
   358  
   359  	t.layers[snap.root] = snap
   360  	return nil
   361  }
   362  
   363  // Cap traverses downwards the snapshot tree from a head block hash until the
   364  // number of allowed layers are crossed. All layers beyond the permitted number
   365  // are flattened downwards.
   366  //
   367  // Note, the final diff layer count in general will be one more than the amount
   368  // requested. This happens because the bottom-most diff layer is the accumulator
   369  // which may or may not overflow and cascade to disk. Since this last layer's
   370  // survival is only known *after* capping, we need to omit it from the count if
   371  // we want to ensure that *at least* the requested number of diff layers remain.
   372  func (t *Tree) Cap(root common.Hash, layers int) error {
   373  	// Retrieve the head snapshot to cap from
   374  	snap := t.Snapshot(root)
   375  	if snap == nil {
   376  		return fmt.Errorf("snapshot [%#x] missing", root)
   377  	}
   378  	diff, ok := snap.(*diffLayer)
   379  	if !ok {
   380  		return fmt.Errorf("snapshot [%#x] is disk layer", root)
   381  	}
   382  	// If the generator is still running, use a more aggressive cap
   383  	diff.origin.lock.RLock()
   384  	if diff.origin.genMarker != nil && layers > 8 {
   385  		layers = 8
   386  	}
   387  	diff.origin.lock.RUnlock()
   388  
   389  	// Run the internal capping and discard all stale layers
   390  	t.lock.Lock()
   391  	defer t.lock.Unlock()
   392  
   393  	// Flattening the bottom-most diff layer requires special casing since there's
   394  	// no child to rewire to the grandparent. In that case we can fake a temporary
   395  	// child for the capping and then remove it.
   396  	if layers == 0 {
   397  		// If full commit was requested, flatten the diffs and merge onto disk
   398  		diff.lock.RLock()
   399  		base := diffToDisk(diff.flatten().(*diffLayer))
   400  		diff.lock.RUnlock()
   401  
   402  		// Replace the entire snapshot tree with the flat base
   403  		t.layers = map[common.Hash]snapshot{base.root: base}
   404  		return nil
   405  	}
   406  	persisted := t.cap(diff, layers)
   407  
   408  	// Remove any layer that is stale or links into a stale layer
   409  	children := make(map[common.Hash][]common.Hash)
   410  	for root, snap := range t.layers {
   411  		if diff, ok := snap.(*diffLayer); ok {
   412  			parent := diff.parent.Root()
   413  			children[parent] = append(children[parent], root)
   414  		}
   415  	}
   416  	var remove func(root common.Hash)
   417  	remove = func(root common.Hash) {
   418  		delete(t.layers, root)
   419  		for _, child := range children[root] {
   420  			remove(child)
   421  		}
   422  		delete(children, root)
   423  	}
   424  	for root, snap := range t.layers {
   425  		if snap.Stale() {
   426  			remove(root)
   427  		}
   428  	}
   429  	// If the disk layer was modified, regenerate all the cumulative blooms
   430  	if persisted != nil {
   431  		var rebloom func(root common.Hash)
   432  		rebloom = func(root common.Hash) {
   433  			if diff, ok := t.layers[root].(*diffLayer); ok {
   434  				diff.rebloom(persisted)
   435  			}
   436  			for _, child := range children[root] {
   437  				rebloom(child)
   438  			}
   439  		}
   440  		rebloom(persisted.root)
   441  	}
   442  	return nil
   443  }
   444  
   445  // cap traverses downwards the diff tree until the number of allowed layers are
   446  // crossed. All diffs beyond the permitted number are flattened downwards. If the
   447  // layer limit is reached, memory cap is also enforced (but not before).
   448  //
   449  // The method returns the new disk layer if diffs were persisted into it.
   450  //
   451  // Note, the final diff layer count in general will be one more than the amount
   452  // requested. This happens because the bottom-most diff layer is the accumulator
   453  // which may or may not overflow and cascade to disk. Since this last layer's
   454  // survival is only known *after* capping, we need to omit it from the count if
   455  // we want to ensure that *at least* the requested number of diff layers remain.
   456  func (t *Tree) cap(diff *diffLayer, layers int) *diskLayer {
   457  	// Dive until we run out of layers or reach the persistent database
   458  	for i := 0; i < layers-1; i++ {
   459  		// If we still have diff layers below, continue down
   460  		if parent, ok := diff.parent.(*diffLayer); ok {
   461  			diff = parent
   462  		} else {
   463  			// Diff stack too shallow, return without modifications
   464  			return nil
   465  		}
   466  	}
   467  	// We're out of layers, flatten anything below, stopping if it's the disk or if
   468  	// the memory limit is not yet exceeded.
   469  	switch parent := diff.parent.(type) {
   470  	case *diskLayer:
   471  		return nil
   472  
   473  	case *diffLayer:
   474  		// Hold the write lock until the flattened parent is linked correctly.
   475  		// Otherwise, the stale layer may be accessed by external reads in the
   476  		// meantime.
   477  		diff.lock.Lock()
   478  		defer diff.lock.Unlock()
   479  
   480  		// Flatten the parent into the grandparent. The flattening internally obtains a
   481  		// write lock on grandparent.
   482  		flattened := parent.flatten().(*diffLayer)
   483  		t.layers[flattened.root] = flattened
   484  
   485  		// Invoke the hook if it's registered. Ugly hack.
   486  		if t.onFlatten != nil {
   487  			t.onFlatten()
   488  		}
   489  		diff.parent = flattened
   490  		if flattened.memory < aggregatorMemoryLimit {
   491  			// Accumulator layer is smaller than the limit, so we can abort, unless
   492  			// there's a snapshot being generated currently. In that case, the trie
   493  			// will move from underneath the generator so we **must** merge all the
   494  			// partial data down into the snapshot and restart the generation.
   495  			if flattened.parent.(*diskLayer).genAbort == nil {
   496  				return nil
   497  			}
   498  		}
   499  	default:
   500  		panic(fmt.Sprintf("unknown data layer: %T", parent))
   501  	}
   502  	// If the bottom-most layer is larger than our memory cap, persist to disk
   503  	bottom := diff.parent.(*diffLayer)
   504  
   505  	bottom.lock.RLock()
   506  	base := diffToDisk(bottom)
   507  	bottom.lock.RUnlock()
   508  
   509  	t.layers[base.root] = base
   510  	diff.parent = base
   511  	return base
   512  }
   513  
   514  // diffToDisk merges a bottom-most diff into the persistent disk layer underneath
   515  // it. The method will panic if called onto a non-bottom-most diff layer.
   516  //
   517  // The disk layer persistence should be operated in an atomic way. All updates should
   518  // be discarded if the whole transition if not finished.
   519  func diffToDisk(bottom *diffLayer) *diskLayer {
   520  	var (
   521  		base  = bottom.parent.(*diskLayer)
   522  		batch = base.diskdb.NewBatch()
   523  		stats *generatorStats
   524  	)
   525  	// If the disk layer is running a snapshot generator, abort it
   526  	if base.genAbort != nil {
   527  		abort := make(chan *generatorStats)
   528  		base.genAbort <- abort
   529  		stats = <-abort
   530  	}
   531  	// Put the deletion in the batch writer, flush all updates in the final step.
   532  	rawdb.DeleteSnapshotRoot(batch)
   533  
   534  	// Mark the original base as stale as we're going to create a new wrapper
   535  	base.lock.Lock()
   536  	if base.stale {
   537  		panic("parent disk layer is stale") // we've committed into the same base from two children, boo
   538  	}
   539  	base.stale = true
   540  	base.lock.Unlock()
   541  
   542  	// Push all updated accounts into the database
   543  	for hash, data := range bottom.accountData {
   544  		// Skip any account not covered yet by the snapshot
   545  		if base.genMarker != nil && bytes.Compare(hash[:], base.genMarker) > 0 {
   546  			continue
   547  		}
   548  		// Push the account to disk
   549  		if len(data) != 0 {
   550  			rawdb.WriteAccountSnapshot(batch, hash, data)
   551  			base.cache.Set(hash[:], data)
   552  			snapshotCleanAccountWriteMeter.Mark(int64(len(data)))
   553  		} else {
   554  			rawdb.DeleteAccountSnapshot(batch, hash)
   555  			base.cache.Set(hash[:], nil)
   556  		}
   557  		snapshotFlushAccountItemMeter.Mark(1)
   558  		snapshotFlushAccountSizeMeter.Mark(int64(len(data)))
   559  
   560  		// Ensure we don't write too much data blindly. It's ok to flush, the
   561  		// root will go missing in case of a crash and we'll detect and regen
   562  		// the snapshot.
   563  		if batch.ValueSize() > 64*1024*1024 {
   564  			if err := batch.Write(); err != nil {
   565  				log.Crit("Failed to write state changes", "err", err)
   566  			}
   567  			batch.Reset()
   568  		}
   569  	}
   570  	// Push all the storage slots into the database
   571  	for accountHash, storage := range bottom.storageData {
   572  		// Skip any account not covered yet by the snapshot
   573  		if base.genMarker != nil && bytes.Compare(accountHash[:], base.genMarker) > 0 {
   574  			continue
   575  		}
   576  		// Generation might be mid-account, track that case too
   577  		midAccount := base.genMarker != nil && bytes.Equal(accountHash[:], base.genMarker[:common.HashLength])
   578  
   579  		for storageHash, data := range storage {
   580  			// Skip any slot not covered yet by the snapshot
   581  			if midAccount && bytes.Compare(storageHash[:], base.genMarker[common.HashLength:]) > 0 {
   582  				continue
   583  			}
   584  			if len(data) > 0 {
   585  				rawdb.WriteStorageSnapshot(batch, accountHash, storageHash, data)
   586  				base.cache.Set(append(accountHash[:], storageHash[:]...), data)
   587  				snapshotCleanStorageWriteMeter.Mark(int64(len(data)))
   588  			} else {
   589  				rawdb.DeleteStorageSnapshot(batch, accountHash, storageHash)
   590  				base.cache.Set(append(accountHash[:], storageHash[:]...), nil)
   591  			}
   592  			snapshotFlushStorageItemMeter.Mark(1)
   593  			snapshotFlushStorageSizeMeter.Mark(int64(len(data)))
   594  
   595  			// Ensure we don't write too much data blindly. It's ok to flush, the
   596  			// root will go missing in case of a crash and we'll detect and regen
   597  			// the snapshot.
   598  			if batch.ValueSize() > 64*1024*1024 {
   599  				if err := batch.Write(); err != nil {
   600  					log.Crit("Failed to write state changes", "err", err)
   601  				}
   602  				batch.Reset()
   603  			}
   604  		}
   605  	}
   606  	// Update the snapshot block marker and write any remainder data
   607  	rawdb.WriteSnapshotRoot(batch, bottom.root)
   608  
   609  	// Write out the generator progress marker and report
   610  	journalProgress(batch, base.genMarker, stats)
   611  
   612  	// Flush all the updates in the single db operation. Ensure the
   613  	// disk layer transition is atomic.
   614  	if err := batch.Write(); err != nil {
   615  		log.Crit("Failed to write leftover snapshot", "err", err)
   616  	}
   617  	log.Debug("Journalled disk layer", "root", bottom.root, "complete", base.genMarker == nil)
   618  	res := &diskLayer{
   619  		root:       bottom.root,
   620  		cache:      base.cache,
   621  		diskdb:     base.diskdb,
   622  		triedb:     base.triedb,
   623  		genMarker:  base.genMarker,
   624  		genPending: base.genPending,
   625  	}
   626  	// If snapshot generation hasn't finished yet, port over all the starts and
   627  	// continue where the previous round left off.
   628  	//
   629  	// Note, the `base.genAbort` comparison is not used normally, it's checked
   630  	// to allow the tests to play with the marker without triggering this path.
   631  	if base.genMarker != nil && base.genAbort != nil {
   632  		res.genMarker = base.genMarker
   633  		res.genAbort = make(chan chan *generatorStats)
   634  		go res.generate(stats)
   635  	}
   636  	return res
   637  }
   638  
   639  // Release releases resources
   640  func (t *Tree) Release() {
   641  	t.lock.RLock()
   642  	defer t.lock.RUnlock()
   643  
   644  	if dl := t.disklayer(); dl != nil {
   645  		dl.Release()
   646  	}
   647  }
   648  
   649  // Journal commits an entire diff hierarchy to disk into a single journal entry.
   650  // This is meant to be used during shutdown to persist the snapshot without
   651  // flattening everything down (bad for reorgs).
   652  //
   653  // The method returns the root hash of the base layer that needs to be persisted
   654  // to disk as a trie too to allow continuing any pending generation op.
   655  func (t *Tree) Journal(root common.Hash) (common.Hash, error) {
   656  	// Retrieve the head snapshot to journal from var snap snapshot
   657  	snap := t.Snapshot(root)
   658  	if snap == nil {
   659  		return common.Hash{}, fmt.Errorf("snapshot [%#x] missing", root)
   660  	}
   661  	// Run the journaling
   662  	t.lock.Lock()
   663  	defer t.lock.Unlock()
   664  
   665  	// Firstly write out the metadata of journal
   666  	journal := new(bytes.Buffer)
   667  	if err := rlp.Encode(journal, journalCurrentVersion); err != nil {
   668  		return common.Hash{}, err
   669  	}
   670  	diskroot := t.diskRoot()
   671  	if diskroot == (common.Hash{}) {
   672  		return common.Hash{}, errors.New("invalid disk root")
   673  	}
   674  	// Secondly write out the disk layer root, ensure the
   675  	// diff journal is continuous with disk.
   676  	if err := rlp.Encode(journal, diskroot); err != nil {
   677  		return common.Hash{}, err
   678  	}
   679  	// Finally write out the journal of each layer in reverse order.
   680  	base, err := snap.(snapshot).Journal(journal)
   681  	if err != nil {
   682  		return common.Hash{}, err
   683  	}
   684  	// Store the journal into the database and return
   685  	rawdb.WriteSnapshotJournal(t.diskdb, journal.Bytes())
   686  	return base, nil
   687  }
   688  
   689  // Rebuild wipes all available snapshot data from the persistent database and
   690  // discard all caches and diff layers. Afterwards, it starts a new snapshot
   691  // generator with the given root hash.
   692  func (t *Tree) Rebuild(root common.Hash) {
   693  	t.lock.Lock()
   694  	defer t.lock.Unlock()
   695  
   696  	// Firstly delete any recovery flag in the database. Because now we are
   697  	// building a brand new snapshot. Also reenable the snapshot feature.
   698  	rawdb.DeleteSnapshotRecoveryNumber(t.diskdb)
   699  	rawdb.DeleteSnapshotDisabled(t.diskdb)
   700  
   701  	// Iterate over and mark all layers stale
   702  	for _, layer := range t.layers {
   703  		switch layer := layer.(type) {
   704  		case *diskLayer:
   705  			// TODO this function will hang if it's called twice. Will
   706  			// fix it in the following PRs.
   707  			layer.stopGeneration()
   708  			layer.markStale()
   709  			layer.Release()
   710  
   711  		case *diffLayer:
   712  			// If the layer is a simple diff, simply mark as stale
   713  			layer.lock.Lock()
   714  			layer.stale.Store(true)
   715  			layer.lock.Unlock()
   716  
   717  		default:
   718  			panic(fmt.Sprintf("unknown layer type: %T", layer))
   719  		}
   720  	}
   721  	// Start generating a new snapshot from scratch on a background thread. The
   722  	// generator will run a wiper first if there's not one running right now.
   723  	log.Info("Rebuilding state snapshot")
   724  	t.layers = map[common.Hash]snapshot{
   725  		root: generateSnapshot(t.diskdb, t.triedb, t.config.CacheSize, root),
   726  	}
   727  }
   728  
   729  // AccountIterator creates a new account iterator for the specified root hash and
   730  // seeks to a starting account hash.
   731  func (t *Tree) AccountIterator(root common.Hash, seek common.Hash) (AccountIterator, error) {
   732  	ok, err := t.generating()
   733  	if err != nil {
   734  		return nil, err
   735  	}
   736  	if ok {
   737  		return nil, ErrNotConstructed
   738  	}
   739  	return newFastAccountIterator(t, root, seek)
   740  }
   741  
   742  // StorageIterator creates a new storage iterator for the specified root hash and
   743  // account. The iterator will be move to the specific start position.
   744  func (t *Tree) StorageIterator(root common.Hash, account common.Hash, seek common.Hash) (StorageIterator, error) {
   745  	ok, err := t.generating()
   746  	if err != nil {
   747  		return nil, err
   748  	}
   749  	if ok {
   750  		return nil, ErrNotConstructed
   751  	}
   752  	return newFastStorageIterator(t, root, account, seek)
   753  }
   754  
   755  // Verify iterates the whole state(all the accounts as well as the corresponding storages)
   756  // with the specific root and compares the re-computed hash with the original one.
   757  func (t *Tree) Verify(root common.Hash) error {
   758  	acctIt, err := t.AccountIterator(root, common.Hash{})
   759  	if err != nil {
   760  		return err
   761  	}
   762  	defer acctIt.Release()
   763  
   764  	got, err := generateTrieRoot(nil, "", acctIt, common.Hash{}, stackTrieGenerate, func(db ethdb.KeyValueWriter, accountHash, codeHash common.Hash, stat *generateStats) (common.Hash, error) {
   765  		storageIt, err := t.StorageIterator(root, accountHash, common.Hash{})
   766  		if err != nil {
   767  			return common.Hash{}, err
   768  		}
   769  		defer storageIt.Release()
   770  
   771  		hash, err := generateTrieRoot(nil, "", storageIt, accountHash, stackTrieGenerate, nil, stat, false)
   772  		if err != nil {
   773  			return common.Hash{}, err
   774  		}
   775  		return hash, nil
   776  	}, newGenerateStats(), true)
   777  
   778  	if err != nil {
   779  		return err
   780  	}
   781  	if got != root {
   782  		return fmt.Errorf("state root hash mismatch: got %x, want %x", got, root)
   783  	}
   784  	return nil
   785  }
   786  
   787  // disklayer is an internal helper function to return the disk layer.
   788  // The lock of snapTree is assumed to be held already.
   789  func (t *Tree) disklayer() *diskLayer {
   790  	var snap snapshot
   791  	for _, s := range t.layers {
   792  		snap = s
   793  		break
   794  	}
   795  	if snap == nil {
   796  		return nil
   797  	}
   798  	switch layer := snap.(type) {
   799  	case *diskLayer:
   800  		return layer
   801  	case *diffLayer:
   802  		layer.lock.RLock()
   803  		defer layer.lock.RUnlock()
   804  		return layer.origin
   805  	default:
   806  		panic(fmt.Sprintf("%T: undefined layer", snap))
   807  	}
   808  }
   809  
   810  // diskRoot is an internal helper function to return the disk layer root.
   811  // The lock of snapTree is assumed to be held already.
   812  func (t *Tree) diskRoot() common.Hash {
   813  	disklayer := t.disklayer()
   814  	if disklayer == nil {
   815  		return common.Hash{}
   816  	}
   817  	return disklayer.Root()
   818  }
   819  
   820  // generating is an internal helper function which reports whether the snapshot
   821  // is still under the construction.
   822  func (t *Tree) generating() (bool, error) {
   823  	t.lock.RLock()
   824  	defer t.lock.RUnlock()
   825  
   826  	layer := t.disklayer()
   827  	if layer == nil {
   828  		return false, errors.New("disk layer is missing")
   829  	}
   830  	layer.lock.RLock()
   831  	defer layer.lock.RUnlock()
   832  	return layer.genMarker != nil, nil
   833  }
   834  
   835  // DiskRoot is an external helper function to return the disk layer root.
   836  func (t *Tree) DiskRoot() common.Hash {
   837  	t.lock.RLock()
   838  	defer t.lock.RUnlock()
   839  
   840  	return t.diskRoot()
   841  }
   842  
   843  // Size returns the memory usage of the diff layers above the disk layer and the
   844  // dirty nodes buffered in the disk layer. Currently, the implementation uses a
   845  // special diff layer (the first) as an aggregator simulating a dirty buffer, so
   846  // the second return will always be 0. However, this will be made consistent with
   847  // the pathdb, which will require a second return.
   848  func (t *Tree) Size() (diffs common.StorageSize, buf common.StorageSize) {
   849  	t.lock.RLock()
   850  	defer t.lock.RUnlock()
   851  
   852  	var size common.StorageSize
   853  	for _, layer := range t.layers {
   854  		if layer, ok := layer.(*diffLayer); ok {
   855  			size += common.StorageSize(layer.memory)
   856  		}
   857  	}
   858  	return size, 0
   859  }