github.com/ethereum/go-ethereum@v1.16.1/core/state/snapshot/conversion.go (about)

     1  // Copyright 2020 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package snapshot
    18  
    19  import (
    20  	"encoding/binary"
    21  	"errors"
    22  	"fmt"
    23  	"math"
    24  	"runtime"
    25  	"sync"
    26  	"time"
    27  
    28  	"github.com/ethereum/go-ethereum/common"
    29  	"github.com/ethereum/go-ethereum/core/rawdb"
    30  	"github.com/ethereum/go-ethereum/core/types"
    31  	"github.com/ethereum/go-ethereum/ethdb"
    32  	"github.com/ethereum/go-ethereum/log"
    33  	"github.com/ethereum/go-ethereum/rlp"
    34  	"github.com/ethereum/go-ethereum/trie"
    35  )
    36  
    37  // trieKV represents a trie key-value pair
    38  type trieKV struct {
    39  	key   common.Hash
    40  	value []byte
    41  }
    42  
    43  type (
    44  	// trieGeneratorFn is the interface of trie generation which can
    45  	// be implemented by different trie algorithm.
    46  	trieGeneratorFn func(db ethdb.KeyValueWriter, scheme string, owner common.Hash, in chan (trieKV), out chan (common.Hash))
    47  
    48  	// leafCallbackFn is the callback invoked at the leaves of the trie,
    49  	// returns the subtrie root with the specified subtrie identifier.
    50  	leafCallbackFn func(db ethdb.KeyValueWriter, accountHash, codeHash common.Hash, stat *generateStats) (common.Hash, error)
    51  )
    52  
    53  // GenerateTrie takes the whole snapshot tree as the input, traverses all the
    54  // accounts as well as the corresponding storages and regenerate the whole state
    55  // (account trie + all storage tries).
    56  func GenerateTrie(snaptree *Tree, root common.Hash, src ethdb.Database, dst ethdb.KeyValueWriter) error {
    57  	// Traverse all state by snapshot, re-generate the whole state trie
    58  	acctIt, err := snaptree.AccountIterator(root, common.Hash{})
    59  	if err != nil {
    60  		return err // The required snapshot might not exist.
    61  	}
    62  	defer acctIt.Release()
    63  
    64  	scheme := snaptree.triedb.Scheme()
    65  	got, err := generateTrieRoot(dst, scheme, acctIt, common.Hash{}, stackTrieGenerate, func(dst ethdb.KeyValueWriter, accountHash, codeHash common.Hash, stat *generateStats) (common.Hash, error) {
    66  		// Migrate the code first, commit the contract code into the tmp db.
    67  		if codeHash != types.EmptyCodeHash {
    68  			code := rawdb.ReadCode(src, codeHash)
    69  			if len(code) == 0 {
    70  				return common.Hash{}, errors.New("failed to read contract code")
    71  			}
    72  			rawdb.WriteCode(dst, codeHash, code)
    73  		}
    74  		// Then migrate all storage trie nodes into the tmp db.
    75  		storageIt, err := snaptree.StorageIterator(root, accountHash, common.Hash{})
    76  		if err != nil {
    77  			return common.Hash{}, err
    78  		}
    79  		defer storageIt.Release()
    80  
    81  		hash, err := generateTrieRoot(dst, scheme, storageIt, accountHash, stackTrieGenerate, nil, stat, false)
    82  		if err != nil {
    83  			return common.Hash{}, err
    84  		}
    85  		return hash, nil
    86  	}, newGenerateStats(), true)
    87  
    88  	if err != nil {
    89  		return err
    90  	}
    91  	if got != root {
    92  		return fmt.Errorf("state root hash mismatch: got %x, want %x", got, root)
    93  	}
    94  	return nil
    95  }
    96  
    97  // generateStats is a collection of statistics gathered by the trie generator
    98  // for logging purposes.
    99  type generateStats struct {
   100  	head  common.Hash
   101  	start time.Time
   102  
   103  	accounts uint64 // Number of accounts done (including those being crawled)
   104  	slots    uint64 // Number of storage slots done (including those being crawled)
   105  
   106  	slotsStart map[common.Hash]time.Time   // Start time for account slot crawling
   107  	slotsHead  map[common.Hash]common.Hash // Slot head for accounts being crawled
   108  
   109  	lock sync.RWMutex
   110  }
   111  
   112  // newGenerateStats creates a new generator stats.
   113  func newGenerateStats() *generateStats {
   114  	return &generateStats{
   115  		slotsStart: make(map[common.Hash]time.Time),
   116  		slotsHead:  make(map[common.Hash]common.Hash),
   117  		start:      time.Now(),
   118  	}
   119  }
   120  
   121  // progressAccounts updates the generator stats for the account range.
   122  func (stat *generateStats) progressAccounts(account common.Hash, done uint64) {
   123  	stat.lock.Lock()
   124  	defer stat.lock.Unlock()
   125  
   126  	stat.accounts += done
   127  	stat.head = account
   128  }
   129  
   130  // finishAccounts updates the generator stats for the finished account range.
   131  func (stat *generateStats) finishAccounts(done uint64) {
   132  	stat.lock.Lock()
   133  	defer stat.lock.Unlock()
   134  
   135  	stat.accounts += done
   136  }
   137  
   138  // progressContract updates the generator stats for a specific in-progress contract.
   139  func (stat *generateStats) progressContract(account common.Hash, slot common.Hash, done uint64) {
   140  	stat.lock.Lock()
   141  	defer stat.lock.Unlock()
   142  
   143  	stat.slots += done
   144  	stat.slotsHead[account] = slot
   145  	if _, ok := stat.slotsStart[account]; !ok {
   146  		stat.slotsStart[account] = time.Now()
   147  	}
   148  }
   149  
   150  // finishContract updates the generator stats for a specific just-finished contract.
   151  func (stat *generateStats) finishContract(account common.Hash, done uint64) {
   152  	stat.lock.Lock()
   153  	defer stat.lock.Unlock()
   154  
   155  	stat.slots += done
   156  	delete(stat.slotsHead, account)
   157  	delete(stat.slotsStart, account)
   158  }
   159  
   160  // report prints the cumulative progress statistic smartly.
   161  func (stat *generateStats) report() {
   162  	stat.lock.RLock()
   163  	defer stat.lock.RUnlock()
   164  
   165  	ctx := []interface{}{
   166  		"accounts", stat.accounts,
   167  		"slots", stat.slots,
   168  		"elapsed", common.PrettyDuration(time.Since(stat.start)),
   169  	}
   170  	if stat.accounts > 0 {
   171  		// If there's progress on the account trie, estimate the time to finish crawling it
   172  		if done := binary.BigEndian.Uint64(stat.head[:8]) / stat.accounts; done > 0 {
   173  			var (
   174  				left  = (math.MaxUint64 - binary.BigEndian.Uint64(stat.head[:8])) / stat.accounts
   175  				speed = done/uint64(time.Since(stat.start)/time.Millisecond+1) + 1 // +1s to avoid division by zero
   176  				eta   = time.Duration(left/speed) * time.Millisecond
   177  			)
   178  			// If there are large contract crawls in progress, estimate their finish time
   179  			for acc, head := range stat.slotsHead {
   180  				start := stat.slotsStart[acc]
   181  				if done := binary.BigEndian.Uint64(head[:8]); done > 0 {
   182  					var (
   183  						left  = math.MaxUint64 - binary.BigEndian.Uint64(head[:8])
   184  						speed = done/uint64(time.Since(start)/time.Millisecond+1) + 1 // +1s to avoid division by zero
   185  					)
   186  					// Override the ETA if larger than the largest until now
   187  					if slotETA := time.Duration(left/speed) * time.Millisecond; eta < slotETA {
   188  						eta = slotETA
   189  					}
   190  				}
   191  			}
   192  			ctx = append(ctx, []interface{}{
   193  				"eta", common.PrettyDuration(eta),
   194  			}...)
   195  		}
   196  	}
   197  	log.Info("Iterating state snapshot", ctx...)
   198  }
   199  
   200  // reportDone prints the last log when the whole generation is finished.
   201  func (stat *generateStats) reportDone() {
   202  	stat.lock.RLock()
   203  	defer stat.lock.RUnlock()
   204  
   205  	var ctx []interface{}
   206  	ctx = append(ctx, []interface{}{"accounts", stat.accounts}...)
   207  	if stat.slots != 0 {
   208  		ctx = append(ctx, []interface{}{"slots", stat.slots}...)
   209  	}
   210  	ctx = append(ctx, []interface{}{"elapsed", common.PrettyDuration(time.Since(stat.start))}...)
   211  	log.Info("Iterated snapshot", ctx...)
   212  }
   213  
   214  // runReport periodically prints the progress information.
   215  func runReport(stats *generateStats, stop chan bool) {
   216  	timer := time.NewTimer(0)
   217  	defer timer.Stop()
   218  
   219  	for {
   220  		select {
   221  		case <-timer.C:
   222  			stats.report()
   223  			timer.Reset(time.Second * 8)
   224  		case success := <-stop:
   225  			if success {
   226  				stats.reportDone()
   227  			}
   228  			return
   229  		}
   230  	}
   231  }
   232  
   233  // generateTrieRoot generates the trie hash based on the snapshot iterator.
   234  // It can be used for generating account trie, storage trie or even the
   235  // whole state which connects the accounts and the corresponding storages.
   236  func generateTrieRoot(db ethdb.KeyValueWriter, scheme string, it Iterator, account common.Hash, generatorFn trieGeneratorFn, leafCallback leafCallbackFn, stats *generateStats, report bool) (common.Hash, error) {
   237  	var (
   238  		in      = make(chan trieKV)         // chan to pass leaves
   239  		out     = make(chan common.Hash, 1) // chan to collect result
   240  		stoplog = make(chan bool, 1)        // 1-size buffer, works when logging is not enabled
   241  		wg      sync.WaitGroup
   242  	)
   243  	// Spin up a go-routine for trie hash re-generation
   244  	wg.Add(1)
   245  	go func() {
   246  		defer wg.Done()
   247  		generatorFn(db, scheme, account, in, out)
   248  	}()
   249  	// Spin up a go-routine for progress logging
   250  	if report && stats != nil {
   251  		wg.Add(1)
   252  		go func() {
   253  			defer wg.Done()
   254  			runReport(stats, stoplog)
   255  		}()
   256  	}
   257  	// Create a semaphore to assign tasks and collect results through. We'll pre-
   258  	// fill it with nils, thus using the same channel for both limiting concurrent
   259  	// processing and gathering results.
   260  	threads := runtime.NumCPU()
   261  	results := make(chan error, threads)
   262  	for i := 0; i < threads; i++ {
   263  		results <- nil // fill the semaphore
   264  	}
   265  	// stop is a helper function to shutdown the background threads
   266  	// and return the re-generated trie hash.
   267  	stop := func(fail error) (common.Hash, error) {
   268  		close(in)
   269  		result := <-out
   270  		for i := 0; i < threads; i++ {
   271  			if err := <-results; err != nil && fail == nil {
   272  				fail = err
   273  			}
   274  		}
   275  		stoplog <- fail == nil
   276  
   277  		wg.Wait()
   278  		return result, fail
   279  	}
   280  	var (
   281  		logged    = time.Now()
   282  		processed = uint64(0)
   283  		leaf      trieKV
   284  	)
   285  	// Start to feed leaves
   286  	for it.Next() {
   287  		if account == (common.Hash{}) {
   288  			var (
   289  				err      error
   290  				fullData []byte
   291  			)
   292  			if leafCallback == nil {
   293  				fullData, err = types.FullAccountRLP(it.(AccountIterator).Account())
   294  				if err != nil {
   295  					return stop(err)
   296  				}
   297  			} else {
   298  				// Wait until the semaphore allows us to continue, aborting if
   299  				// a sub-task failed
   300  				if err := <-results; err != nil {
   301  					results <- nil // stop will drain the results, add a noop back for this error we just consumed
   302  					return stop(err)
   303  				}
   304  				// Fetch the next account and process it concurrently
   305  				account, err := types.FullAccount(it.(AccountIterator).Account())
   306  				if err != nil {
   307  					return stop(err)
   308  				}
   309  				go func(hash common.Hash) {
   310  					subroot, err := leafCallback(db, hash, common.BytesToHash(account.CodeHash), stats)
   311  					if err != nil {
   312  						results <- err
   313  						return
   314  					}
   315  					if account.Root != subroot {
   316  						results <- fmt.Errorf("invalid subroot(path %x), want %x, have %x", hash, account.Root, subroot)
   317  						return
   318  					}
   319  					results <- nil
   320  				}(it.Hash())
   321  				fullData, err = rlp.EncodeToBytes(account)
   322  				if err != nil {
   323  					return stop(err)
   324  				}
   325  			}
   326  			leaf = trieKV{it.Hash(), fullData}
   327  		} else {
   328  			leaf = trieKV{it.Hash(), common.CopyBytes(it.(StorageIterator).Slot())}
   329  		}
   330  		in <- leaf
   331  
   332  		// Accumulate the generation statistic if it's required.
   333  		processed++
   334  		if time.Since(logged) > 3*time.Second && stats != nil {
   335  			if account == (common.Hash{}) {
   336  				stats.progressAccounts(it.Hash(), processed)
   337  			} else {
   338  				stats.progressContract(account, it.Hash(), processed)
   339  			}
   340  			logged, processed = time.Now(), 0
   341  		}
   342  	}
   343  	// Commit the last part statistic.
   344  	if processed > 0 && stats != nil {
   345  		if account == (common.Hash{}) {
   346  			stats.finishAccounts(processed)
   347  		} else {
   348  			stats.finishContract(account, processed)
   349  		}
   350  	}
   351  	return stop(nil)
   352  }
   353  
   354  func stackTrieGenerate(db ethdb.KeyValueWriter, scheme string, owner common.Hash, in chan trieKV, out chan common.Hash) {
   355  	var onTrieNode trie.OnTrieNode
   356  	if db != nil {
   357  		onTrieNode = func(path []byte, hash common.Hash, blob []byte) {
   358  			rawdb.WriteTrieNode(db, owner, path, hash, blob, scheme)
   359  		}
   360  	}
   361  	t := trie.NewStackTrie(onTrieNode)
   362  	for leaf := range in {
   363  		t.Update(leaf.key[:], leaf.value)
   364  	}
   365  	out <- t.Hash()
   366  }