github.com/luckypickle/go-ethereum-vet@v1.14.2/eth/downloader/statesync.go

github.com/luckypickle/go-ethereum-vet@v1.14.2/eth/downloader/statesync.go (about)

     1  // Copyright 2017 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package downloader
    18  
    19  import (
    20  	"fmt"
    21  	"hash"
    22  	"sync"
    23  	"time"
    24  
    25  	"github.com/luckypickle/go-ethereum-vet/common"
    26  	"github.com/luckypickle/go-ethereum-vet/core/rawdb"
    27  	"github.com/luckypickle/go-ethereum-vet/core/state"
    28  	"github.com/luckypickle/go-ethereum-vet/crypto/sha3"
    29  	"github.com/luckypickle/go-ethereum-vet/ethdb"
    30  	"github.com/luckypickle/go-ethereum-vet/log"
    31  	"github.com/luckypickle/go-ethereum-vet/trie"
    32  )
    33  
    34  // stateReq represents a batch of state fetch requests grouped together into
    35  // a single data retrieval network packet.
    36  type stateReq struct {
    37  	items    []common.Hash              // Hashes of the state items to download
    38  	tasks    map[common.Hash]*stateTask // Download tasks to track previous attempts
    39  	timeout  time.Duration              // Maximum round trip time for this to complete
    40  	timer    *time.Timer                // Timer to fire when the RTT timeout expires
    41  	peer     *peerConnection            // Peer that we're requesting from
    42  	response [][]byte                   // Response data of the peer (nil for timeouts)
    43  	dropped  bool                       // Flag whether the peer dropped off early
    44  }
    45  
    46  // timedOut returns if this request timed out.
    47  func (req *stateReq) timedOut() bool {
    48  	return req.response == nil
    49  }
    50  
    51  // stateSyncStats is a collection of progress stats to report during a state trie
    52  // sync to RPC requests as well as to display in user logs.
    53  type stateSyncStats struct {
    54  	processed  uint64 // Number of state entries processed
    55  	duplicate  uint64 // Number of state entries downloaded twice
    56  	unexpected uint64 // Number of non-requested state entries received
    57  	pending    uint64 // Number of still pending state entries
    58  }
    59  
    60  // syncState starts downloading state with the given root hash.
    61  func (d *Downloader) syncState(root common.Hash) *stateSync {
    62  	s := newStateSync(d, root)
    63  	select {
    64  	case d.stateSyncStart <- s:
    65  	case <-d.quitCh:
    66  		s.err = errCancelStateFetch
    67  		close(s.done)
    68  	}
    69  	return s
    70  }
    71  
    72  // stateFetcher manages the active state sync and accepts requests
    73  // on its behalf.
    74  func (d *Downloader) stateFetcher() {
    75  	for {
    76  		select {
    77  		case s := <-d.stateSyncStart:
    78  			for next := s; next != nil; {
    79  				next = d.runStateSync(next)
    80  			}
    81  		case <-d.stateCh:
    82  			// Ignore state responses while no sync is running.
    83  		case <-d.quitCh:
    84  			return
    85  		}
    86  	}
    87  }
    88  
    89  // runStateSync runs a state synchronisation until it completes or another root
    90  // hash is requested to be switched over to.
    91  func (d *Downloader) runStateSync(s *stateSync) *stateSync {
    92  	var (
    93  		active   = make(map[string]*stateReq) // Currently in-flight requests
    94  		finished []*stateReq                  // Completed or failed requests
    95  		timeout  = make(chan *stateReq)       // Timed out active requests
    96  	)
    97  	defer func() {
    98  		// Cancel active request timers on exit. Also set peers to idle so they're
    99  		// available for the next sync.
   100  		for _, req := range active {
   101  			req.timer.Stop()
   102  			req.peer.SetNodeDataIdle(len(req.items))
   103  		}
   104  	}()
   105  	// Run the state sync.
   106  	go s.run()
   107  	defer s.Cancel()
   108  
   109  	// Listen for peer departure events to cancel assigned tasks
   110  	peerDrop := make(chan *peerConnection, 1024)
   111  	peerSub := s.d.peers.SubscribePeerDrops(peerDrop)
   112  	defer peerSub.Unsubscribe()
   113  
   114  	for {
   115  		// Enable sending of the first buffered element if there is one.
   116  		var (
   117  			deliverReq   *stateReq
   118  			deliverReqCh chan *stateReq
   119  		)
   120  		if len(finished) > 0 {
   121  			deliverReq = finished[0]
   122  			deliverReqCh = s.deliver
   123  		}
   124  
   125  		select {
   126  		// The stateSync lifecycle:
   127  		case next := <-d.stateSyncStart:
   128  			return next
   129  
   130  		case <-s.done:
   131  			return nil
   132  
   133  		// Send the next finished request to the current sync:
   134  		case deliverReqCh <- deliverReq:
   135  			// Shift out the first request, but also set the emptied slot to nil for GC
   136  			copy(finished, finished[1:])
   137  			finished[len(finished)-1] = nil
   138  			finished = finished[:len(finished)-1]
   139  
   140  		// Handle incoming state packs:
   141  		case pack := <-d.stateCh:
   142  			// Discard any data not requested (or previously timed out)
   143  			req := active[pack.PeerId()]
   144  			if req == nil {
   145  				log.Debug("Unrequested node data", "peer", pack.PeerId(), "len", pack.Items())
   146  				continue
   147  			}
   148  			// Finalize the request and queue up for processing
   149  			req.timer.Stop()
   150  			req.response = pack.(*statePack).states
   151  
   152  			finished = append(finished, req)
   153  			delete(active, pack.PeerId())
   154  
   155  			// Handle dropped peer connections:
   156  		case p := <-peerDrop:
   157  			// Skip if no request is currently pending
   158  			req := active[p.id]
   159  			if req == nil {
   160  				continue
   161  			}
   162  			// Finalize the request and queue up for processing
   163  			req.timer.Stop()
   164  			req.dropped = true
   165  
   166  			finished = append(finished, req)
   167  			delete(active, p.id)
   168  
   169  		// Handle timed-out requests:
   170  		case req := <-timeout:
   171  			// If the peer is already requesting something else, ignore the stale timeout.
   172  			// This can happen when the timeout and the delivery happens simultaneously,
   173  			// causing both pathways to trigger.
   174  			if active[req.peer.id] != req {
   175  				continue
   176  			}
   177  			// Move the timed out data back into the download queue
   178  			finished = append(finished, req)
   179  			delete(active, req.peer.id)
   180  
   181  		// Track outgoing state requests:
   182  		case req := <-d.trackStateReq:
   183  			// If an active request already exists for this peer, we have a problem. In
   184  			// theory the trie node schedule must never assign two requests to the same
   185  			// peer. In practice however, a peer might receive a request, disconnect and
   186  			// immediately reconnect before the previous times out. In this case the first
   187  			// request is never honored, alas we must not silently overwrite it, as that
   188  			// causes valid requests to go missing and sync to get stuck.
   189  			if old := active[req.peer.id]; old != nil {
   190  				log.Warn("Busy peer assigned new state fetch", "peer", old.peer.id)
   191  
   192  				// Make sure the previous one doesn't get siletly lost
   193  				old.timer.Stop()
   194  				old.dropped = true
   195  
   196  				finished = append(finished, old)
   197  			}
   198  			// Start a timer to notify the sync loop if the peer stalled.
   199  			req.timer = time.AfterFunc(req.timeout, func() {
   200  				select {
   201  				case timeout <- req:
   202  				case <-s.done:
   203  					// Prevent leaking of timer goroutines in the unlikely case where a
   204  					// timer is fired just before exiting runStateSync.
   205  				}
   206  			})
   207  			active[req.peer.id] = req
   208  		}
   209  	}
   210  }
   211  
   212  // stateSync schedules requests for downloading a particular state trie defined
   213  // by a given state root.
   214  type stateSync struct {
   215  	d *Downloader // Downloader instance to access and manage current peerset
   216  
   217  	sched  *trie.Sync                 // State trie sync scheduler defining the tasks
   218  	keccak hash.Hash                  // Keccak256 hasher to verify deliveries with
   219  	tasks  map[common.Hash]*stateTask // Set of tasks currently queued for retrieval
   220  
   221  	numUncommitted   int
   222  	bytesUncommitted int
   223  
   224  	deliver    chan *stateReq // Delivery channel multiplexing peer responses
   225  	cancel     chan struct{}  // Channel to signal a termination request
   226  	cancelOnce sync.Once      // Ensures cancel only ever gets called once
   227  	done       chan struct{}  // Channel to signal termination completion
   228  	err        error          // Any error hit during sync (set before completion)
   229  }
   230  
   231  // stateTask represents a single trie node download task, containing a set of
   232  // peers already attempted retrieval from to detect stalled syncs and abort.
   233  type stateTask struct {
   234  	attempts map[string]struct{}
   235  }
   236  
   237  // newStateSync creates a new state trie download scheduler. This method does not
   238  // yet start the sync. The user needs to call run to initiate.
   239  func newStateSync(d *Downloader, root common.Hash) *stateSync {
   240  	return &stateSync{
   241  		d:       d,
   242  		sched:   state.NewStateSync(root, d.stateDB),
   243  		keccak:  sha3.NewKeccak256(),
   244  		tasks:   make(map[common.Hash]*stateTask),
   245  		deliver: make(chan *stateReq),
   246  		cancel:  make(chan struct{}),
   247  		done:    make(chan struct{}),
   248  	}
   249  }
   250  
   251  // run starts the task assignment and response processing loop, blocking until
   252  // it finishes, and finally notifying any goroutines waiting for the loop to
   253  // finish.
   254  func (s *stateSync) run() {
   255  	s.err = s.loop()
   256  	close(s.done)
   257  }
   258  
   259  // Wait blocks until the sync is done or canceled.
   260  func (s *stateSync) Wait() error {
   261  	<-s.done
   262  	return s.err
   263  }
   264  
   265  // Cancel cancels the sync and waits until it has shut down.
   266  func (s *stateSync) Cancel() error {
   267  	s.cancelOnce.Do(func() { close(s.cancel) })
   268  	return s.Wait()
   269  }
   270  
   271  // loop is the main event loop of a state trie sync. It it responsible for the
   272  // assignment of new tasks to peers (including sending it to them) as well as
   273  // for the processing of inbound data. Note, that the loop does not directly
   274  // receive data from peers, rather those are buffered up in the downloader and
   275  // pushed here async. The reason is to decouple processing from data receipt
   276  // and timeouts.
   277  func (s *stateSync) loop() (err error) {
   278  	// Listen for new peer events to assign tasks to them
   279  	newPeer := make(chan *peerConnection, 1024)
   280  	peerSub := s.d.peers.SubscribeNewPeers(newPeer)
   281  	defer peerSub.Unsubscribe()
   282  	defer func() {
   283  		cerr := s.commit(true)
   284  		if err == nil {
   285  			err = cerr
   286  		}
   287  	}()
   288  
   289  	// Keep assigning new tasks until the sync completes or aborts
   290  	for s.sched.Pending() > 0 {
   291  		if err = s.commit(false); err != nil {
   292  			return err
   293  		}
   294  		s.assignTasks()
   295  		// Tasks assigned, wait for something to happen
   296  		select {
   297  		case <-newPeer:
   298  			// New peer arrived, try to assign it download tasks
   299  
   300  		case <-s.cancel:
   301  			return errCancelStateFetch
   302  
   303  		case <-s.d.cancelCh:
   304  			return errCancelStateFetch
   305  
   306  		case req := <-s.deliver:
   307  			// Response, disconnect or timeout triggered, drop the peer if stalling
   308  			log.Trace("Received node data response", "peer", req.peer.id, "count", len(req.response), "dropped", req.dropped, "timeout", !req.dropped && req.timedOut())
   309  			if len(req.items) <= 2 && !req.dropped && req.timedOut() {
   310  				// 2 items are the minimum requested, if even that times out, we've no use of
   311  				// this peer at the moment.
   312  				log.Warn("Stalling state sync, dropping peer", "peer", req.peer.id)
   313  				s.d.dropPeer(req.peer.id)
   314  			}
   315  			// Process all the received blobs and check for stale delivery
   316  			if err = s.process(req); err != nil {
   317  				log.Warn("Node data write error", "err", err)
   318  				return err
   319  			}
   320  			req.peer.SetNodeDataIdle(len(req.response))
   321  		}
   322  	}
   323  	return nil
   324  }
   325  
   326  func (s *stateSync) commit(force bool) error {
   327  	if !force && s.bytesUncommitted < ethdb.IdealBatchSize {
   328  		return nil
   329  	}
   330  	start := time.Now()
   331  	b := s.d.stateDB.NewBatch()
   332  	if written, err := s.sched.Commit(b); written == 0 || err != nil {
   333  		return err
   334  	}
   335  	if err := b.Write(); err != nil {
   336  		return fmt.Errorf("DB write error: %v", err)
   337  	}
   338  	s.updateStats(s.numUncommitted, 0, 0, time.Since(start))
   339  	s.numUncommitted = 0
   340  	s.bytesUncommitted = 0
   341  	return nil
   342  }
   343  
   344  // assignTasks attempts to assign new tasks to all idle peers, either from the
   345  // batch currently being retried, or fetching new data from the trie sync itself.
   346  func (s *stateSync) assignTasks() {
   347  	// Iterate over all idle peers and try to assign them state fetches
   348  	peers, _ := s.d.peers.NodeDataIdlePeers()
   349  	for _, p := range peers {
   350  		// Assign a batch of fetches proportional to the estimated latency/bandwidth
   351  		cap := p.NodeDataCapacity(s.d.requestRTT())
   352  		req := &stateReq{peer: p, timeout: s.d.requestTTL()}
   353  		s.fillTasks(cap, req)
   354  
   355  		// If the peer was assigned tasks to fetch, send the network request
   356  		if len(req.items) > 0 {
   357  			req.peer.log.Trace("Requesting new batch of data", "type", "state", "count", len(req.items))
   358  			select {
   359  			case s.d.trackStateReq <- req:
   360  				req.peer.FetchNodeData(req.items)
   361  			case <-s.cancel:
   362  			case <-s.d.cancelCh:
   363  			}
   364  		}
   365  	}
   366  }
   367  
   368  // fillTasks fills the given request object with a maximum of n state download
   369  // tasks to send to the remote peer.
   370  func (s *stateSync) fillTasks(n int, req *stateReq) {
   371  	// Refill available tasks from the scheduler.
   372  	if len(s.tasks) < n {
   373  		new := s.sched.Missing(n - len(s.tasks))
   374  		for _, hash := range new {
   375  			s.tasks[hash] = &stateTask{make(map[string]struct{})}
   376  		}
   377  	}
   378  	// Find tasks that haven't been tried with the request's peer.
   379  	req.items = make([]common.Hash, 0, n)
   380  	req.tasks = make(map[common.Hash]*stateTask, n)
   381  	for hash, t := range s.tasks {
   382  		// Stop when we've gathered enough requests
   383  		if len(req.items) == n {
   384  			break
   385  		}
   386  		// Skip any requests we've already tried from this peer
   387  		if _, ok := t.attempts[req.peer.id]; ok {
   388  			continue
   389  		}
   390  		// Assign the request to this peer
   391  		t.attempts[req.peer.id] = struct{}{}
   392  		req.items = append(req.items, hash)
   393  		req.tasks[hash] = t
   394  		delete(s.tasks, hash)
   395  	}
   396  }
   397  
   398  // process iterates over a batch of delivered state data, injecting each item
   399  // into a running state sync, re-queuing any items that were requested but not
   400  // delivered.
   401  func (s *stateSync) process(req *stateReq) error {
   402  	// Collect processing stats and update progress if valid data was received
   403  	duplicate, unexpected := 0, 0
   404  
   405  	defer func(start time.Time) {
   406  		if duplicate > 0 || unexpected > 0 {
   407  			s.updateStats(0, duplicate, unexpected, time.Since(start))
   408  		}
   409  	}(time.Now())
   410  
   411  	// Iterate over all the delivered data and inject one-by-one into the trie
   412  	progress := false
   413  
   414  	for _, blob := range req.response {
   415  		prog, hash, err := s.processNodeData(blob)
   416  		switch err {
   417  		case nil:
   418  			s.numUncommitted++
   419  			s.bytesUncommitted += len(blob)
   420  			progress = progress || prog
   421  		case trie.ErrNotRequested:
   422  			unexpected++
   423  		case trie.ErrAlreadyProcessed:
   424  			duplicate++
   425  		default:
   426  			return fmt.Errorf("invalid state node %s: %v", hash.TerminalString(), err)
   427  		}
   428  		if _, ok := req.tasks[hash]; ok {
   429  			delete(req.tasks, hash)
   430  		}
   431  	}
   432  	// Put unfulfilled tasks back into the retry queue
   433  	npeers := s.d.peers.Len()
   434  	for hash, task := range req.tasks {
   435  		// If the node did deliver something, missing items may be due to a protocol
   436  		// limit or a previous timeout + delayed delivery. Both cases should permit
   437  		// the node to retry the missing items (to avoid single-peer stalls).
   438  		if len(req.response) > 0 || req.timedOut() {
   439  			delete(task.attempts, req.peer.id)
   440  		}
   441  		// If we've requested the node too many times already, it may be a malicious
   442  		// sync where nobody has the right data. Abort.
   443  		if len(task.attempts) >= npeers {
   444  			return fmt.Errorf("state node %s failed with all peers (%d tries, %d peers)", hash.TerminalString(), len(task.attempts), npeers)
   445  		}
   446  		// Missing item, place into the retry queue.
   447  		s.tasks[hash] = task
   448  	}
   449  	return nil
   450  }
   451  
   452  // processNodeData tries to inject a trie node data blob delivered from a remote
   453  // peer into the state trie, returning whether anything useful was written or any
   454  // error occurred.
   455  func (s *stateSync) processNodeData(blob []byte) (bool, common.Hash, error) {
   456  	res := trie.SyncResult{Data: blob}
   457  	s.keccak.Reset()
   458  	s.keccak.Write(blob)
   459  	s.keccak.Sum(res.Hash[:0])
   460  	committed, _, err := s.sched.Process([]trie.SyncResult{res})
   461  	return committed, res.Hash, err
   462  }
   463  
   464  // updateStats bumps the various state sync progress counters and displays a log
   465  // message for the user to see.
   466  func (s *stateSync) updateStats(written, duplicate, unexpected int, duration time.Duration) {
   467  	s.d.syncStatsLock.Lock()
   468  	defer s.d.syncStatsLock.Unlock()
   469  
   470  	s.d.syncStatsState.pending = uint64(s.sched.Pending())
   471  	s.d.syncStatsState.processed += uint64(written)
   472  	s.d.syncStatsState.duplicate += uint64(duplicate)
   473  	s.d.syncStatsState.unexpected += uint64(unexpected)
   474  
   475  	if written > 0 || duplicate > 0 || unexpected > 0 {
   476  		log.Info("Imported new state entries", "count", written, "elapsed", common.PrettyDuration(duration), "processed", s.d.syncStatsState.processed, "pending", s.d.syncStatsState.pending, "retry", len(s.tasks), "duplicate", s.d.syncStatsState.duplicate, "unexpected", s.d.syncStatsState.unexpected)
   477  	}
   478  	if written > 0 {
   479  		rawdb.WriteFastTrieProgress(s.d.stateDB, s.d.syncStatsState.processed)
   480  	}
   481  }