github.com/nspcc-dev/neo-go@v0.105.2-0.20240517133400-6be757af3eba/pkg/core/statesync/module.go (about)

     1  /*
     2  Package statesync implements module for the P2P state synchronisation process. The
     3  module manages state synchronisation for non-archival nodes which are joining the
     4  network and don't have the ability to resync from the genesis block.
     5  
     6  Given the currently available state synchronisation point P, sate sync process
     7  includes the following stages:
     8  
     9  1. Fetching headers starting from height 0 up to P+1.
    10  2. Fetching MPT nodes for height P stating from the corresponding state root.
    11  3. Fetching blocks starting from height P-MaxTraceableBlocks (or 0) up to P.
    12  
    13  Steps 2 and 3 are being performed in parallel. Once all the data are collected
    14  and stored in the db, an atomic state jump is occurred to the state sync point P.
    15  Further node operation process is performed using standard sync mechanism until
    16  the node reaches synchronised state.
    17  */
    18  package statesync
    19  
    20  import (
    21  	"encoding/hex"
    22  	"errors"
    23  	"fmt"
    24  	"sync"
    25  
    26  	"github.com/nspcc-dev/neo-go/pkg/config"
    27  	"github.com/nspcc-dev/neo-go/pkg/core/block"
    28  	"github.com/nspcc-dev/neo-go/pkg/core/dao"
    29  	"github.com/nspcc-dev/neo-go/pkg/core/mpt"
    30  	"github.com/nspcc-dev/neo-go/pkg/core/stateroot"
    31  	"github.com/nspcc-dev/neo-go/pkg/core/storage"
    32  	"github.com/nspcc-dev/neo-go/pkg/io"
    33  	"github.com/nspcc-dev/neo-go/pkg/util"
    34  	"go.uber.org/zap"
    35  )
    36  
    37  // stateSyncStage is a type of state synchronisation stage.
    38  type stateSyncStage uint8
    39  
    40  const (
    41  	// inactive means that state exchange is disabled by the protocol configuration.
    42  	// Can't be combined with other states.
    43  	inactive stateSyncStage = 1 << iota
    44  	// none means that state exchange is enabled in the configuration, but
    45  	// initialisation of the state sync module wasn't yet performed, i.e.
    46  	// (*Module).Init wasn't called. Can't be combined with other states.
    47  	none
    48  	// initialized means that (*Module).Init was called, but other sync stages
    49  	// are not yet reached (i.e. that headers are requested, but not yet fetched).
    50  	// Can't be combined with other states.
    51  	initialized
    52  	// headersSynced means that headers for the current state sync point are fetched.
    53  	// May be combined with mptSynced and/or blocksSynced.
    54  	headersSynced
    55  	// mptSynced means that MPT nodes for the current state sync point are fetched.
    56  	// Always combined with headersSynced; may be combined with blocksSynced.
    57  	mptSynced
    58  	// blocksSynced means that blocks up to the current state sync point are stored.
    59  	// Always combined with headersSynced; may be combined with mptSynced.
    60  	blocksSynced
    61  )
    62  
    63  // Ledger is the interface required from Blockchain for Module to operate.
    64  type Ledger interface {
    65  	AddHeaders(...*block.Header) error
    66  	BlockHeight() uint32
    67  	GetConfig() config.Blockchain
    68  	GetHeader(hash util.Uint256) (*block.Header, error)
    69  	GetHeaderHash(uint32) util.Uint256
    70  	HeaderHeight() uint32
    71  }
    72  
    73  // Module represents state sync module and aimed to gather state-related data to
    74  // perform an atomic state jump.
    75  type Module struct {
    76  	lock sync.RWMutex
    77  	log  *zap.Logger
    78  
    79  	// syncPoint is the state synchronisation point P we're currently working against.
    80  	syncPoint uint32
    81  	// syncStage is the stage of the sync process.
    82  	syncStage stateSyncStage
    83  	// syncInterval is the delta between two adjacent state sync points.
    84  	syncInterval uint32
    85  	// blockHeight is the index of the latest stored block.
    86  	blockHeight uint32
    87  
    88  	dao      *dao.Simple
    89  	bc       Ledger
    90  	stateMod *stateroot.Module
    91  	mptpool  *Pool
    92  
    93  	billet *mpt.Billet
    94  
    95  	jumpCallback func(p uint32) error
    96  }
    97  
    98  // NewModule returns new instance of statesync module.
    99  func NewModule(bc Ledger, stateMod *stateroot.Module, log *zap.Logger, s *dao.Simple, jumpCallback func(p uint32) error) *Module {
   100  	if !(bc.GetConfig().P2PStateExchangeExtensions && bc.GetConfig().Ledger.RemoveUntraceableBlocks) {
   101  		return &Module{
   102  			dao:       s,
   103  			bc:        bc,
   104  			stateMod:  stateMod,
   105  			syncStage: inactive,
   106  		}
   107  	}
   108  	return &Module{
   109  		dao:          s,
   110  		bc:           bc,
   111  		stateMod:     stateMod,
   112  		log:          log,
   113  		syncInterval: uint32(bc.GetConfig().StateSyncInterval),
   114  		mptpool:      NewPool(),
   115  		syncStage:    none,
   116  		jumpCallback: jumpCallback,
   117  	}
   118  }
   119  
   120  // Init initializes state sync module for the current chain's height with given
   121  // callback for MPT nodes requests.
   122  func (s *Module) Init(currChainHeight uint32) error {
   123  	s.lock.Lock()
   124  	defer s.lock.Unlock()
   125  
   126  	if s.syncStage != none {
   127  		return errors.New("already initialized or inactive")
   128  	}
   129  
   130  	p := (currChainHeight / s.syncInterval) * s.syncInterval
   131  	if p < 2*s.syncInterval {
   132  		// chain is too low to start state exchange process, use the standard sync mechanism
   133  		s.syncStage = inactive
   134  		return nil
   135  	}
   136  	pOld, err := s.dao.GetStateSyncPoint()
   137  	if err == nil && pOld >= p-s.syncInterval {
   138  		// old point is still valid, so try to resync states for this point.
   139  		p = pOld
   140  	} else {
   141  		if s.bc.BlockHeight() > p-2*s.syncInterval {
   142  			// chain has already been synchronised up to old state sync point and regular blocks processing was started.
   143  			// Current block height is enough to start regular blocks processing.
   144  			s.syncStage = inactive
   145  			return nil
   146  		}
   147  		if err == nil {
   148  			// pOld was found, it is outdated, and chain wasn't completely synchronised for pOld. Need to drop the db.
   149  			return fmt.Errorf("state sync point %d is found in the storage, "+
   150  				"but sync process wasn't completed and point is outdated. Please, drop the database manually and restart the node to run state sync process", pOld)
   151  		}
   152  		if s.bc.BlockHeight() != 0 {
   153  			// pOld wasn't found, but blocks processing was started in a regular manner and latest stored block is too outdated
   154  			// to start regular blocks processing again. Need to drop the db.
   155  			return fmt.Errorf("current chain's height is too low to start regular blocks processing from the oldest sync point %d. "+
   156  				"Please, drop the database manually and restart the node to run state sync process", p-s.syncInterval)
   157  		}
   158  
   159  		// We've reached this point, so chain has genesis block only. As far as we can't ruin
   160  		// current chain's state until new state is completely fetched, outdated state-related data
   161  		// will be removed from storage during (*Blockchain).jumpToState(...) execution.
   162  		// All we need to do right now is to remove genesis-related MPT nodes.
   163  		err = s.stateMod.CleanStorage()
   164  		if err != nil {
   165  			return fmt.Errorf("failed to remove outdated MPT data from storage: %w", err)
   166  		}
   167  	}
   168  
   169  	s.syncPoint = p
   170  	s.dao.PutStateSyncPoint(p)
   171  	s.syncStage = initialized
   172  	s.log.Info("try to sync state for the latest state synchronisation point",
   173  		zap.Uint32("point", p),
   174  		zap.Uint32("evaluated chain's blockHeight", currChainHeight))
   175  
   176  	return s.defineSyncStage()
   177  }
   178  
   179  // TemporaryPrefix accepts current storage prefix and returns prefix
   180  // to use for storing intermediate items during synchronization.
   181  func TemporaryPrefix(currPrefix storage.KeyPrefix) storage.KeyPrefix {
   182  	switch currPrefix {
   183  	case storage.STStorage:
   184  		return storage.STTempStorage
   185  	case storage.STTempStorage:
   186  		return storage.STStorage
   187  	default:
   188  		panic(fmt.Sprintf("invalid storage prefix: %x", currPrefix))
   189  	}
   190  }
   191  
   192  // defineSyncStage sequentially checks and sets sync state process stage after Module
   193  // initialization. It also performs initialization of MPT Billet if necessary.
   194  func (s *Module) defineSyncStage() error {
   195  	// check headers sync stage first
   196  	ltstHeaderHeight := s.bc.HeaderHeight()
   197  	if ltstHeaderHeight > s.syncPoint {
   198  		s.syncStage = headersSynced
   199  		s.log.Info("headers are in sync",
   200  			zap.Uint32("headerHeight", s.bc.HeaderHeight()))
   201  	}
   202  
   203  	// check blocks sync stage
   204  	s.blockHeight = s.getLatestSavedBlock(s.syncPoint)
   205  	if s.blockHeight >= s.syncPoint {
   206  		s.syncStage |= blocksSynced
   207  		s.log.Info("blocks are in sync",
   208  			zap.Uint32("blockHeight", s.blockHeight))
   209  	}
   210  
   211  	// check MPT sync stage
   212  	if s.blockHeight > s.syncPoint {
   213  		s.syncStage |= mptSynced
   214  		s.log.Info("MPT is in sync",
   215  			zap.Uint32("stateroot height", s.stateMod.CurrentLocalHeight()))
   216  	} else if s.syncStage&headersSynced != 0 {
   217  		header, err := s.bc.GetHeader(s.bc.GetHeaderHash(s.syncPoint + 1))
   218  		if err != nil {
   219  			return fmt.Errorf("failed to get header to initialize MPT billet: %w", err)
   220  		}
   221  		var mode mpt.TrieMode
   222  		// No need to enable GC here, it only has latest things.
   223  		if s.bc.GetConfig().Ledger.KeepOnlyLatestState || s.bc.GetConfig().Ledger.RemoveUntraceableBlocks {
   224  			mode |= mpt.ModeLatest
   225  		}
   226  		s.billet = mpt.NewBillet(header.PrevStateRoot, mode,
   227  			TemporaryPrefix(s.dao.Version.StoragePrefix), s.dao.Store)
   228  		s.log.Info("MPT billet initialized",
   229  			zap.Uint32("height", s.syncPoint),
   230  			zap.String("state root", header.PrevStateRoot.StringBE()))
   231  		pool := NewPool()
   232  		pool.Add(header.PrevStateRoot, []byte{})
   233  		err = s.billet.Traverse(func(_ []byte, n mpt.Node, _ []byte) bool {
   234  			nPaths, ok := pool.TryGet(n.Hash())
   235  			if !ok {
   236  				// if this situation occurs, then it's a bug in MPT pool or Traverse.
   237  				panic("failed to get MPT node from the pool")
   238  			}
   239  			pool.Remove(n.Hash())
   240  			childrenPaths := make(map[util.Uint256][][]byte)
   241  			for _, path := range nPaths {
   242  				nChildrenPaths := mpt.GetChildrenPaths(path, n)
   243  				for hash, paths := range nChildrenPaths {
   244  					childrenPaths[hash] = append(childrenPaths[hash], paths...) // it's OK to have duplicates, they'll be handled by mempool
   245  				}
   246  			}
   247  			pool.Update(nil, childrenPaths)
   248  			return false
   249  		}, true)
   250  		if err != nil {
   251  			return fmt.Errorf("failed to traverse MPT during initialization: %w", err)
   252  		}
   253  		s.mptpool.Update(nil, pool.GetAll())
   254  		if s.mptpool.Count() == 0 {
   255  			s.syncStage |= mptSynced
   256  			s.log.Info("MPT is in sync",
   257  				zap.Uint32("stateroot height", s.syncPoint))
   258  		}
   259  	}
   260  
   261  	if s.syncStage == headersSynced|blocksSynced|mptSynced {
   262  		s.log.Info("state is in sync, starting regular blocks processing")
   263  		s.syncStage = inactive
   264  	}
   265  	return nil
   266  }
   267  
   268  // getLatestSavedBlock returns either current block index (if it's still relevant
   269  // to continue state sync process) or H-1 where H is the index of the earliest
   270  // block that should be saved next.
   271  func (s *Module) getLatestSavedBlock(p uint32) uint32 {
   272  	var result uint32
   273  	mtb := s.bc.GetConfig().MaxTraceableBlocks
   274  	if p > mtb {
   275  		result = p - mtb
   276  	}
   277  	storedH, err := s.dao.GetStateSyncCurrentBlockHeight()
   278  	if err == nil && storedH > result {
   279  		result = storedH
   280  	}
   281  	actualH := s.bc.BlockHeight()
   282  	if actualH > result {
   283  		result = actualH
   284  	}
   285  	return result
   286  }
   287  
   288  // AddHeaders validates and adds specified headers to the chain.
   289  func (s *Module) AddHeaders(hdrs ...*block.Header) error {
   290  	s.lock.Lock()
   291  	defer s.lock.Unlock()
   292  
   293  	if s.syncStage != initialized {
   294  		return errors.New("headers were not requested")
   295  	}
   296  
   297  	hdrsErr := s.bc.AddHeaders(hdrs...)
   298  	if s.bc.HeaderHeight() > s.syncPoint {
   299  		err := s.defineSyncStage()
   300  		if err != nil {
   301  			return fmt.Errorf("failed to define current sync stage: %w", err)
   302  		}
   303  	}
   304  	return hdrsErr
   305  }
   306  
   307  // AddBlock verifies and saves block skipping executable scripts.
   308  func (s *Module) AddBlock(block *block.Block) error {
   309  	s.lock.Lock()
   310  	defer s.lock.Unlock()
   311  
   312  	if s.syncStage&headersSynced == 0 || s.syncStage&blocksSynced != 0 {
   313  		return nil
   314  	}
   315  
   316  	if s.blockHeight == s.syncPoint {
   317  		return nil
   318  	}
   319  	expectedHeight := s.blockHeight + 1
   320  	if expectedHeight != block.Index {
   321  		return fmt.Errorf("expected %d, got %d: invalid block index", expectedHeight, block.Index)
   322  	}
   323  	if s.bc.GetConfig().StateRootInHeader != block.StateRootEnabled {
   324  		return fmt.Errorf("stateroot setting mismatch: %v != %v", s.bc.GetConfig().StateRootInHeader, block.StateRootEnabled)
   325  	}
   326  	if !s.bc.GetConfig().SkipBlockVerification {
   327  		merkle := block.ComputeMerkleRoot()
   328  		if !block.MerkleRoot.Equals(merkle) {
   329  			return errors.New("invalid block: MerkleRoot mismatch")
   330  		}
   331  	}
   332  	cache := s.dao.GetPrivate()
   333  	if err := cache.StoreAsBlock(block, nil, nil); err != nil {
   334  		return err
   335  	}
   336  
   337  	cache.PutStateSyncCurrentBlockHeight(block.Index)
   338  
   339  	for _, tx := range block.Transactions {
   340  		if err := cache.StoreAsTransaction(tx, block.Index, nil); err != nil {
   341  			return err
   342  		}
   343  	}
   344  
   345  	_, err := cache.Persist()
   346  	if err != nil {
   347  		return fmt.Errorf("failed to persist results: %w", err)
   348  	}
   349  	s.blockHeight = block.Index
   350  	if s.blockHeight == s.syncPoint {
   351  		s.syncStage |= blocksSynced
   352  		s.log.Info("blocks are in sync",
   353  			zap.Uint32("blockHeight", s.blockHeight))
   354  		s.checkSyncIsCompleted()
   355  	}
   356  	return nil
   357  }
   358  
   359  // AddMPTNodes tries to add provided set of MPT nodes to the MPT billet if they are
   360  // not yet collected.
   361  func (s *Module) AddMPTNodes(nodes [][]byte) error {
   362  	s.lock.Lock()
   363  	defer s.lock.Unlock()
   364  
   365  	if s.syncStage&headersSynced == 0 || s.syncStage&mptSynced != 0 {
   366  		return errors.New("MPT nodes were not requested")
   367  	}
   368  
   369  	for _, nBytes := range nodes {
   370  		var n mpt.NodeObject
   371  		r := io.NewBinReaderFromBuf(nBytes)
   372  		n.DecodeBinary(r)
   373  		if r.Err != nil {
   374  			return fmt.Errorf("failed to decode MPT node: %w", r.Err)
   375  		}
   376  		err := s.restoreNode(n.Node)
   377  		if err != nil {
   378  			return err
   379  		}
   380  	}
   381  	if s.mptpool.Count() == 0 {
   382  		s.syncStage |= mptSynced
   383  		s.log.Info("MPT is in sync",
   384  			zap.Uint32("height", s.syncPoint))
   385  		s.checkSyncIsCompleted()
   386  	}
   387  	return nil
   388  }
   389  
   390  func (s *Module) restoreNode(n mpt.Node) error {
   391  	nPaths, ok := s.mptpool.TryGet(n.Hash())
   392  	if !ok {
   393  		// it can easily happen after receiving the same data from different peers.
   394  		return nil
   395  	}
   396  	var childrenPaths = make(map[util.Uint256][][]byte)
   397  	for _, path := range nPaths {
   398  		// Must clone here in order to avoid future collapse collisions. If the node's refcount>1 then MPT pool
   399  		// will manage all paths for this node and call RestoreHashNode separately for each of the paths.
   400  		err := s.billet.RestoreHashNode(path, n.Clone())
   401  		if err != nil {
   402  			return fmt.Errorf("failed to restore MPT node with hash %s and path %s: %w", n.Hash().StringBE(), hex.EncodeToString(path), err)
   403  		}
   404  		for h, paths := range mpt.GetChildrenPaths(path, n) {
   405  			childrenPaths[h] = append(childrenPaths[h], paths...) // it's OK to have duplicates, they'll be handled by mempool
   406  		}
   407  	}
   408  
   409  	s.mptpool.Update(map[util.Uint256][][]byte{n.Hash(): nPaths}, childrenPaths)
   410  
   411  	for h := range childrenPaths {
   412  		if child, err := s.billet.GetFromStore(h); err == nil {
   413  			// child is already in the storage, so we don't need to request it one more time.
   414  			err = s.restoreNode(child)
   415  			if err != nil {
   416  				return fmt.Errorf("unable to restore saved children: %w", err)
   417  			}
   418  		}
   419  	}
   420  	return nil
   421  }
   422  
   423  // checkSyncIsCompleted checks whether state sync process is completed, i.e. headers up to P+1
   424  // height are fetched, blocks up to P height are stored and MPT nodes for P height are stored.
   425  // If so, then jumping to P state sync point occurs. It is not protected by lock, thus caller
   426  // should take care of it.
   427  func (s *Module) checkSyncIsCompleted() {
   428  	if s.syncStage != headersSynced|mptSynced|blocksSynced {
   429  		return
   430  	}
   431  	s.log.Info("state is in sync",
   432  		zap.Uint32("state sync point", s.syncPoint))
   433  	err := s.jumpCallback(s.syncPoint)
   434  	if err != nil {
   435  		s.log.Fatal("failed to jump to the latest state sync point", zap.Error(err))
   436  	}
   437  	s.syncStage = inactive
   438  	s.dispose()
   439  }
   440  
   441  func (s *Module) dispose() {
   442  	s.billet = nil
   443  }
   444  
   445  // BlockHeight returns index of the last stored block.
   446  func (s *Module) BlockHeight() uint32 {
   447  	s.lock.RLock()
   448  	defer s.lock.RUnlock()
   449  
   450  	return s.blockHeight
   451  }
   452  
   453  // IsActive tells whether state sync module is on and still gathering state
   454  // synchronisation data (headers, blocks or MPT nodes).
   455  func (s *Module) IsActive() bool {
   456  	s.lock.RLock()
   457  	defer s.lock.RUnlock()
   458  
   459  	return !(s.syncStage == inactive || (s.syncStage == headersSynced|mptSynced|blocksSynced))
   460  }
   461  
   462  // IsInitialized tells whether state sync module does not require initialization.
   463  // If `false` is returned then Init can be safely called.
   464  func (s *Module) IsInitialized() bool {
   465  	s.lock.RLock()
   466  	defer s.lock.RUnlock()
   467  
   468  	return s.syncStage != none
   469  }
   470  
   471  // NeedHeaders tells whether the module hasn't completed headers synchronisation.
   472  func (s *Module) NeedHeaders() bool {
   473  	s.lock.RLock()
   474  	defer s.lock.RUnlock()
   475  
   476  	return s.syncStage == initialized
   477  }
   478  
   479  // NeedMPTNodes returns whether the module hasn't completed MPT synchronisation.
   480  func (s *Module) NeedMPTNodes() bool {
   481  	s.lock.RLock()
   482  	defer s.lock.RUnlock()
   483  
   484  	return s.syncStage&headersSynced != 0 && s.syncStage&mptSynced == 0
   485  }
   486  
   487  // Traverse traverses local MPT nodes starting from the specified root down to its
   488  // children calling `process` for each serialised node until stop condition is satisfied.
   489  func (s *Module) Traverse(root util.Uint256, process func(node mpt.Node, nodeBytes []byte) bool) error {
   490  	s.lock.RLock()
   491  	defer s.lock.RUnlock()
   492  
   493  	var mode mpt.TrieMode
   494  	// GC must be turned off here to allow access to the archived nodes.
   495  	if s.bc.GetConfig().Ledger.KeepOnlyLatestState || s.bc.GetConfig().Ledger.RemoveUntraceableBlocks {
   496  		mode |= mpt.ModeLatest
   497  	}
   498  	b := mpt.NewBillet(root, mode, 0, storage.NewMemCachedStore(s.dao.Store))
   499  	return b.Traverse(func(pathToNode []byte, node mpt.Node, nodeBytes []byte) bool {
   500  		return process(node, nodeBytes)
   501  	}, false)
   502  }
   503  
   504  // GetUnknownMPTNodesBatch returns set of currently unknown MPT nodes (`limit` at max).
   505  func (s *Module) GetUnknownMPTNodesBatch(limit int) []util.Uint256 {
   506  	s.lock.RLock()
   507  	defer s.lock.RUnlock()
   508  
   509  	return s.mptpool.GetBatch(limit)
   510  }