github.com/MetalBlockchain/subnet-evm@v0.4.9/sync/statesync/state_syncer.go (about)

     1  // (c) 2021-2022, Ava Labs, Inc. All rights reserved.
     2  // See the file LICENSE for licensing terms.
     3  
     4  package statesync
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"sync"
    10  
    11  	"github.com/MetalBlockchain/subnet-evm/core/state/snapshot"
    12  	"github.com/MetalBlockchain/subnet-evm/ethdb"
    13  	syncclient "github.com/MetalBlockchain/subnet-evm/sync/client"
    14  	"github.com/MetalBlockchain/subnet-evm/trie"
    15  	"github.com/ethereum/go-ethereum/common"
    16  	"golang.org/x/sync/errgroup"
    17  )
    18  
    19  const (
    20  	segmentThreshold       = 500_000 // if we estimate trie to have greater than this number of leafs, split it
    21  	numStorageTrieSegments = 4
    22  	numMainTrieSegments    = 8
    23  	defaultNumThreads      = 8
    24  )
    25  
    26  type StateSyncerConfig struct {
    27  	Root                     common.Hash
    28  	Client                   syncclient.Client
    29  	DB                       ethdb.Database
    30  	BatchSize                int
    31  	MaxOutstandingCodeHashes int // Maximum number of code hashes in the code syncer queue
    32  	NumCodeFetchingWorkers   int // Number of code syncing threads
    33  }
    34  
    35  // stateSync keeps the state of the entire state sync operation.
    36  type stateSync struct {
    37  	db        ethdb.Database    // database we are syncing
    38  	root      common.Hash       // root of the EVM state we are syncing to
    39  	trieDB    *trie.Database    // trieDB on top of db we are syncing. used to restore any existing tries.
    40  	snapshot  snapshot.Snapshot // used to access the database we are syncing as a snapshot.
    41  	batchSize int               // write batches when they reach this size
    42  	client    syncclient.Client // used to contact peers over the network
    43  
    44  	segments   chan syncclient.LeafSyncTask   // channel of tasks to sync
    45  	syncer     *syncclient.CallbackLeafSyncer // performs the sync, looping over each task's range and invoking specified callbacks
    46  	codeSyncer *codeSyncer                    // manages the asynchronous download and batching of code hashes
    47  	trieQueue  *trieQueue                     // manages a persistent list of storage tries we need to sync and any segments that are created for them
    48  
    49  	// track the main account trie specifically to commit its root at the end of the operation
    50  	mainTrie *trieToSync
    51  
    52  	// track the tries currently being synced
    53  	lock            sync.RWMutex
    54  	triesInProgress map[common.Hash]*trieToSync
    55  
    56  	// track completion and progress of work
    57  	mainTrieDone       chan struct{}
    58  	triesInProgressSem chan struct{}
    59  	done               chan error
    60  	stats              *trieSyncStats
    61  }
    62  
    63  func NewStateSyncer(config *StateSyncerConfig) (*stateSync, error) {
    64  	ss := &stateSync{
    65  		batchSize:       config.BatchSize,
    66  		db:              config.DB,
    67  		client:          config.Client,
    68  		root:            config.Root,
    69  		trieDB:          trie.NewDatabase(config.DB),
    70  		snapshot:        snapshot.NewDiskLayer(config.DB),
    71  		stats:           newTrieSyncStats(),
    72  		triesInProgress: make(map[common.Hash]*trieToSync),
    73  
    74  		// [triesInProgressSem] is used to keep the number of tries syncing
    75  		// less than or equal to [defaultNumThreads].
    76  		triesInProgressSem: make(chan struct{}, defaultNumThreads),
    77  
    78  		// Each [trieToSync] will have a maximum of [numSegments] segments.
    79  		// We set the capacity of [segments] such that [defaultNumThreads]
    80  		// storage tries can sync concurrently.
    81  		segments:     make(chan syncclient.LeafSyncTask, defaultNumThreads*numStorageTrieSegments),
    82  		mainTrieDone: make(chan struct{}),
    83  		done:         make(chan error, 1),
    84  	}
    85  	ss.syncer = syncclient.NewCallbackLeafSyncer(config.Client, ss.segments)
    86  	ss.codeSyncer = newCodeSyncer(CodeSyncerConfig{
    87  		DB:                       config.DB,
    88  		Client:                   config.Client,
    89  		MaxOutstandingCodeHashes: config.MaxOutstandingCodeHashes,
    90  		NumCodeFetchingWorkers:   config.NumCodeFetchingWorkers,
    91  	})
    92  
    93  	ss.trieQueue = NewTrieQueue(config.DB)
    94  	if err := ss.trieQueue.clearIfRootDoesNotMatch(ss.root); err != nil {
    95  		return nil, err
    96  	}
    97  
    98  	// create a trieToSync for the main trie and mark it as in progress.
    99  	var err error
   100  	ss.mainTrie, err = NewTrieToSync(ss, ss.root, common.Hash{}, NewMainTrieTask(ss))
   101  	if err != nil {
   102  		return nil, err
   103  	}
   104  	ss.addTrieInProgress(ss.root, ss.mainTrie)
   105  	ss.mainTrie.startSyncing() // start syncing after tracking the trie as in progress
   106  	return ss, nil
   107  }
   108  
   109  // onStorageTrieFinished is called after a storage trie finishes syncing.
   110  func (t *stateSync) onStorageTrieFinished(root common.Hash) error {
   111  	<-t.triesInProgressSem // allow another trie to start (release the semaphore)
   112  	// mark the storage trie as done in trieQueue
   113  	if err := t.trieQueue.StorageTrieDone(root); err != nil {
   114  		return err
   115  	}
   116  	// track the completion of this storage trie
   117  	return t.removeTrieInProgress(root)
   118  }
   119  
   120  // onMainTrieFinishes is called after the main trie finishes syncing.
   121  func (t *stateSync) onMainTrieFinished() error {
   122  	t.codeSyncer.notifyAccountTrieCompleted()
   123  
   124  	// count the number of storage tries we need to sync for eta purposes.
   125  	numStorageTries, err := t.trieQueue.countTries()
   126  	if err != nil {
   127  		return err
   128  	}
   129  	t.stats.setTriesRemaining(numStorageTries)
   130  
   131  	// mark the main trie done
   132  	close(t.mainTrieDone)
   133  	return t.removeTrieInProgress(t.root)
   134  }
   135  
   136  // onSyncComplete is called after the account trie and
   137  // all storage tries have completed syncing. We persist
   138  // [mainTrie]'s batch last to avoid persisting the state
   139  // root before all storage tries are done syncing.
   140  func (t *stateSync) onSyncComplete() error {
   141  	return t.mainTrie.batch.Write()
   142  }
   143  
   144  // storageTrieProducer waits for the main trie to finish
   145  // syncing then starts to add storage trie roots along
   146  // with their corresponding accounts to the segments channel.
   147  // returns nil if all storage tries were iterated and an
   148  // error if one occurred or the context expired.
   149  func (t *stateSync) storageTrieProducer(ctx context.Context) error {
   150  	// Wait for main trie to finish to ensure when this thread terminates
   151  	// there are no more storage tries to sync
   152  	select {
   153  	case <-t.mainTrieDone:
   154  	case <-ctx.Done():
   155  		return ctx.Err()
   156  	}
   157  
   158  	for {
   159  		// check ctx here to exit the loop early
   160  		if err := ctx.Err(); err != nil {
   161  			return err
   162  		}
   163  
   164  		root, accounts, more, err := t.trieQueue.getNextTrie()
   165  		if err != nil {
   166  			return err
   167  		}
   168  		// If there are no storage tries, then root will be the empty hash on the first pass.
   169  		if root != (common.Hash{}) {
   170  			// acquire semaphore (to keep number of tries in progress limited)
   171  			select {
   172  			case t.triesInProgressSem <- struct{}{}:
   173  			case <-ctx.Done():
   174  				return ctx.Err()
   175  			}
   176  
   177  			// Arbitrarily use the first account for making requests to the server.
   178  			// Note: getNextTrie guarantees that if a non-nil storage root is returned, then the
   179  			// slice of account hashes is non-empty.
   180  			syncAccount := accounts[0]
   181  			// create a trieToSync for the storage trie and mark it as in progress.
   182  			storageTrie, err := NewTrieToSync(t, root, syncAccount, NewStorageTrieTask(t, root, accounts))
   183  			if err != nil {
   184  				return err
   185  			}
   186  			t.addTrieInProgress(root, storageTrie)
   187  			storageTrie.startSyncing() // start syncing after tracking the trie as in progress
   188  		}
   189  		// if there are no more storage tries, close
   190  		// the task queue and exit the producer.
   191  		if !more {
   192  			close(t.segments)
   193  			return nil
   194  		}
   195  	}
   196  }
   197  
   198  func (t *stateSync) Start(ctx context.Context) error {
   199  	// Start the code syncer and leaf syncer.
   200  	eg, egCtx := errgroup.WithContext(ctx)
   201  	t.codeSyncer.start(egCtx) // start the code syncer first since the leaf syncer may add code tasks
   202  	t.syncer.Start(egCtx, defaultNumThreads, t.onSyncFailure)
   203  	eg.Go(func() error {
   204  		if err := <-t.syncer.Done(); err != nil {
   205  			return err
   206  		}
   207  		return t.onSyncComplete()
   208  	})
   209  	eg.Go(func() error {
   210  		err := <-t.codeSyncer.Done()
   211  		return err
   212  	})
   213  	eg.Go(func() error {
   214  		return t.storageTrieProducer(egCtx)
   215  	})
   216  
   217  	// The errgroup wait will take care of returning the first error that occurs, or returning
   218  	// nil if both finish without an error.
   219  	go func() {
   220  		t.done <- eg.Wait()
   221  	}()
   222  	return nil
   223  }
   224  
   225  func (t *stateSync) Done() <-chan error { return t.done }
   226  
   227  // addTrieInProgress tracks the root as being currently synced.
   228  func (t *stateSync) addTrieInProgress(root common.Hash, trie *trieToSync) {
   229  	t.lock.Lock()
   230  	defer t.lock.Unlock()
   231  
   232  	t.triesInProgress[root] = trie
   233  }
   234  
   235  // removeTrieInProgress removes root from the set of tracked
   236  // tries in progress and notifies the storage root producer
   237  // so it can continue in case it was paused due to the
   238  // maximum number of tries in progress being previously reached.
   239  func (t *stateSync) removeTrieInProgress(root common.Hash) error {
   240  	t.lock.Lock()
   241  	defer t.lock.Unlock()
   242  
   243  	t.stats.trieDone(root)
   244  	if _, ok := t.triesInProgress[root]; !ok {
   245  		return fmt.Errorf("removeTrieInProgress for unexpected root: %s", root)
   246  	}
   247  	delete(t.triesInProgress, root)
   248  	return nil
   249  }
   250  
   251  // onSyncFailure is called if the sync fails, this writes all
   252  // batches of in-progress trie segments to disk to have maximum
   253  // progress to restore.
   254  func (t *stateSync) onSyncFailure(error) error {
   255  	t.lock.RLock()
   256  	defer t.lock.RUnlock()
   257  
   258  	for _, trie := range t.triesInProgress {
   259  		for _, segment := range trie.segments {
   260  			if err := segment.batch.Write(); err != nil {
   261  				return err
   262  			}
   263  		}
   264  	}
   265  	return nil
   266  }