github.com/unicornultrafoundation/go-u2u@v1.0.0-rc1.0.20240205080301-e74a83d3fadc/gossip/protocols/snap/snapstream/snapleecher/statesync.go (about)

     1  // Copyright 2017 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package snapleecher
    18  
    19  import (
    20  	"sync"
    21  	"time"
    22  
    23  	"github.com/unicornultrafoundation/go-u2u/common"
    24  	"github.com/unicornultrafoundation/go-u2u/core/state"
    25  	"github.com/unicornultrafoundation/go-u2u/crypto"
    26  	"github.com/unicornultrafoundation/go-u2u/log"
    27  	"github.com/unicornultrafoundation/go-u2u/trie"
    28  	"golang.org/x/crypto/sha3"
    29  )
    30  
    31  // stateReq represents a batch of state fetch requests grouped together into
    32  // a single data retrieval network packet.
    33  type stateReq struct {
    34  	nItems    uint16                    // Number of items requested for download (max is 384, so uint16 is sufficient)
    35  	trieTasks map[common.Hash]*trieTask // Trie node download tasks to track previous attempts
    36  	codeTasks map[common.Hash]*codeTask // Byte code download tasks to track previous attempts
    37  	timeout   time.Duration             // Maximum round trip time for this to complete
    38  	timer     *time.Timer               // Timer to fire when the RTT timeout expires
    39  	peer      *peerConnection           // Peer that we're requesting from
    40  	delivered time.Time                 // Time when the packet was delivered (independent when we process it)
    41  	response  [][]byte                  // Response data of the peer (nil for timeouts)
    42  	dropped   bool                      // Flag whether the peer dropped off early
    43  }
    44  
    45  // timedOut returns if this request timed out.
    46  func (req *stateReq) timedOut() bool {
    47  	return req.response == nil
    48  }
    49  
    50  // stateSyncStats is a collection of progress stats to report during a state trie
    51  // sync to RPC requests as well as to display in user logs.
    52  type stateSyncStats struct {
    53  	processed  uint64 // Number of state entries processed
    54  	duplicate  uint64 // Number of state entries downloaded twice
    55  	unexpected uint64 // Number of non-requested state entries received
    56  	pending    uint64 // Number of still pending state entries
    57  }
    58  
    59  // SyncState starts downloading state with the given root hash.
    60  func (d *Leecher) SyncState(root common.Hash) *stateSync {
    61  	// Create the state sync
    62  	s := newStateSync(d, root)
    63  	select {
    64  	case d.stateSyncStart <- s:
    65  		// If we tell the statesync to restart with a new root, we also need
    66  		// to wait for it to actually also start -- when old requests have timed
    67  		// out or been delivered
    68  		<-s.started
    69  	case <-d.quitCh:
    70  		s.err = errCancelStateFetch
    71  		close(s.done)
    72  	}
    73  	return s
    74  }
    75  
    76  // stateFetcher manages the active state sync and accepts requests
    77  // on its behalf.
    78  func (d *Leecher) stateFetcher() {
    79  	for {
    80  		select {
    81  		case s := <-d.stateSyncStart:
    82  			for next := s; next != nil; {
    83  				next = d.runStateSync(next)
    84  			}
    85  		case <-d.stateCh:
    86  			// Ignore state responses while no sync is running.
    87  		case <-d.quitCh:
    88  			return
    89  		}
    90  	}
    91  }
    92  
    93  // runStateSync runs a state synchronisation until it completes or another root
    94  // hash is requested to be switched over to.
    95  func (d *Leecher) runStateSync(s *stateSync) *stateSync {
    96  	var (
    97  		active   = make(map[string]*stateReq) // Currently in-flight requests
    98  		finished []*stateReq                  // Completed or failed requests
    99  		timeout  = make(chan *stateReq)       // Timed out active requests
   100  	)
   101  	log.Trace("State sync starting", "root", s.root)
   102  
   103  	defer func() {
   104  		// Cancel active request timers on exit. Also set peers to idle so they're
   105  		// available for the next sync.
   106  		for _, req := range active {
   107  			req.timer.Stop()
   108  			req.peer.SetNodeDataIdle(int(req.nItems), time.Now())
   109  		}
   110  	}()
   111  	go s.run()
   112  	defer s.Cancel()
   113  
   114  	// Listen for peer departure events to cancel assigned tasks
   115  	peerDrop := make(chan *peerConnection, 1024)
   116  	peerSub := s.d.peers.SubscribePeerDrops(peerDrop)
   117  	defer peerSub.Unsubscribe()
   118  
   119  	for {
   120  		// Enable sending of the first buffered element if there is one.
   121  		var (
   122  			deliverReq   *stateReq
   123  			deliverReqCh chan *stateReq
   124  		)
   125  		if len(finished) > 0 {
   126  			deliverReq = finished[0]
   127  			deliverReqCh = s.deliver
   128  		}
   129  
   130  		select {
   131  		// The stateSync lifecycle:
   132  		case next := <-d.stateSyncStart:
   133  			d.spindownStateSync(active, finished, timeout, peerDrop)
   134  			return next
   135  
   136  		case <-s.done:
   137  			d.spindownStateSync(active, finished, timeout, peerDrop)
   138  			return nil
   139  
   140  		// Send the next finished request to the current sync:
   141  		case deliverReqCh <- deliverReq:
   142  			// Shift out the first request, but also set the emptied slot to nil for GC
   143  			copy(finished, finished[1:])
   144  			finished[len(finished)-1] = nil
   145  			finished = finished[:len(finished)-1]
   146  
   147  		// Handle incoming state packs:
   148  		case pack := <-d.stateCh:
   149  			// Discard any data not requested (or previously timed out)
   150  			req := active[pack.PeerId()]
   151  			if req == nil {
   152  				log.Debug("Unrequested node data", "peer", pack.PeerId(), "len", pack.Items())
   153  				continue
   154  			}
   155  			// Finalize the request and queue up for processing
   156  			req.timer.Stop()
   157  			req.response = pack.(*statePack).states
   158  			req.delivered = time.Now()
   159  
   160  			finished = append(finished, req)
   161  			delete(active, pack.PeerId())
   162  
   163  		// Handle dropped peer connections:
   164  		case p := <-peerDrop:
   165  			// Skip if no request is currently pending
   166  			req := active[p.id]
   167  			if req == nil {
   168  				continue
   169  			}
   170  			// Finalize the request and queue up for processing
   171  			req.timer.Stop()
   172  			req.dropped = true
   173  			req.delivered = time.Now()
   174  
   175  			finished = append(finished, req)
   176  			delete(active, p.id)
   177  
   178  		// Handle timed-out requests:
   179  		case req := <-timeout:
   180  			// If the peer is already requesting something else, ignore the stale timeout.
   181  			// This can happen when the timeout and the delivery happens simultaneously,
   182  			// causing both pathways to trigger.
   183  			if active[req.peer.id] != req {
   184  				continue
   185  			}
   186  			req.delivered = time.Now()
   187  			// Move the timed out data back into the download queue
   188  			finished = append(finished, req)
   189  			delete(active, req.peer.id)
   190  
   191  		// Track outgoing state requests:
   192  		case req := <-d.trackStateReq:
   193  			// If an active request already exists for this peer, we have a problem. In
   194  			// theory the trie node schedule must never assign two requests to the same
   195  			// peer. In practice however, a peer might receive a request, disconnect and
   196  			// immediately reconnect before the previous times out. In this case the first
   197  			// request is never honored, alas we must not silently overwrite it, as that
   198  			// causes valid requests to go missing and sync to get stuck.
   199  			if old := active[req.peer.id]; old != nil {
   200  				log.Warn("Busy peer assigned new state fetch", "peer", old.peer.id)
   201  				// Move the previous request to the finished set
   202  				old.timer.Stop()
   203  				old.dropped = true
   204  				old.delivered = time.Now()
   205  				finished = append(finished, old)
   206  			}
   207  			// Start a timer to notify the sync loop if the peer stalled.
   208  			req.timer = time.AfterFunc(req.timeout, func() {
   209  				timeout <- req
   210  			})
   211  			active[req.peer.id] = req
   212  		}
   213  	}
   214  }
   215  
   216  // spindownStateSync 'drains' the outstanding requests; some will be delivered and other
   217  // will time out. This is to ensure that when the next stateSync starts working, all peers
   218  // are marked as idle and de facto _are_ idle.
   219  func (d *Leecher) spindownStateSync(active map[string]*stateReq, finished []*stateReq, timeout chan *stateReq, peerDrop chan *peerConnection) {
   220  	log.Trace("State sync spinning down", "active", len(active), "finished", len(finished))
   221  	for len(active) > 0 {
   222  		var (
   223  			req    *stateReq
   224  			reason string
   225  		)
   226  		select {
   227  		// Handle (drop) incoming state packs:
   228  		case pack := <-d.stateCh:
   229  			req = active[pack.PeerId()]
   230  			reason = "delivered"
   231  		// Handle dropped peer connections:
   232  		case p := <-peerDrop:
   233  			req = active[p.id]
   234  			reason = "peerdrop"
   235  		// Handle timed-out requests:
   236  		case req = <-timeout:
   237  			reason = "timeout"
   238  		}
   239  		if req == nil {
   240  			continue
   241  		}
   242  		req.peer.log.Trace("State peer marked idle (spindown)", "req.items", int(req.nItems), "reason", reason)
   243  		req.timer.Stop()
   244  		delete(active, req.peer.id)
   245  		req.peer.SetNodeDataIdle(int(req.nItems), time.Now())
   246  	}
   247  	// The 'finished' set contains deliveries that we were going to pass to processing.
   248  	// Those are now moot, but we still need to set those peers as idle, which would
   249  	// otherwise have been done after processing
   250  	for _, req := range finished {
   251  		req.peer.SetNodeDataIdle(int(req.nItems), time.Now())
   252  	}
   253  }
   254  
   255  // stateSync schedules requests for downloading a particular state trie defined
   256  // by a given state root.
   257  type stateSync struct {
   258  	d *Leecher // Downloader instance to access and manage current peerset
   259  
   260  	root   common.Hash        // State root currently being synced
   261  	sched  *trie.Sync         // State trie sync scheduler defining the tasks
   262  	keccak crypto.KeccakState // Keccak256 hasher to verify deliveries with
   263  
   264  	trieTasks map[common.Hash]*trieTask // Set of trie node tasks currently queued for retrieval
   265  	codeTasks map[common.Hash]*codeTask // Set of byte code tasks currently queued for retrieval
   266  
   267  	numUncommitted   int
   268  	bytesUncommitted int
   269  
   270  	started chan struct{} // Started is signalled once the sync loop starts
   271  
   272  	deliver    chan *stateReq // Delivery channel multiplexing peer responses
   273  	cancel     chan struct{}  // Channel to signal a termination request
   274  	cancelOnce sync.Once      // Ensures cancel only ever gets called once
   275  	done       chan struct{}  // Channel to signal termination completion
   276  	err        error          // Any error hit during sync (set before completion)
   277  }
   278  
   279  // trieTask represents a single trie node download task, containing a set of
   280  // peers already attempted retrieval from to detect stalled syncs and abort.
   281  type trieTask struct {
   282  	path     [][]byte
   283  	attempts map[string]struct{}
   284  }
   285  
   286  // codeTask represents a single byte code download task, containing a set of
   287  // peers already attempted retrieval from to detect stalled syncs and abort.
   288  type codeTask struct {
   289  	attempts map[string]struct{}
   290  }
   291  
   292  // newStateSync creates a new state trie download scheduler. This method does not
   293  // yet start the sync. The user needs to call run to initiate.
   294  func newStateSync(d *Leecher, root common.Hash) *stateSync {
   295  	return &stateSync{
   296  		d:         d,
   297  		root:      root,
   298  		sched:     state.NewStateSync(root, d.stateDB, d.stateBloom, nil),
   299  		keccak:    sha3.NewLegacyKeccak256().(crypto.KeccakState),
   300  		trieTasks: make(map[common.Hash]*trieTask),
   301  		codeTasks: make(map[common.Hash]*codeTask),
   302  		deliver:   make(chan *stateReq),
   303  		cancel:    make(chan struct{}),
   304  		done:      make(chan struct{}),
   305  		started:   make(chan struct{}),
   306  	}
   307  }
   308  
   309  // run starts the task assignment and response processing loop, blocking until
   310  // it finishes, and finally notifying any goroutines waiting for the loop to
   311  // finish.
   312  func (s *stateSync) run() {
   313  	close(s.started)
   314  	s.err = s.d.SnapSyncer.Sync(s.root, s.cancel)
   315  	close(s.done)
   316  }
   317  
   318  // Wait blocks until the sync is done or canceled.
   319  func (s *stateSync) Wait() error {
   320  	<-s.done
   321  	return s.err
   322  }
   323  
   324  // Cancel cancels the sync and waits until it has shut down.
   325  func (s *stateSync) Cancel() error {
   326  	s.cancelOnce.Do(func() {
   327  		close(s.cancel)
   328  	})
   329  	return s.Wait()
   330  }