github.com/neatlab/neatio@v1.7.3-0.20220425043230-d903e92fcc75/neatptc/downloader/statesync.go (about)

     1  package downloader
     2  
     3  import (
     4  	"fmt"
     5  	"hash"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/neatlab/neatio/chain/core/rawdb"
    10  	"github.com/neatlab/neatio/chain/core/state"
    11  	"github.com/neatlab/neatio/chain/log"
    12  	"github.com/neatlab/neatio/chain/trie"
    13  	"github.com/neatlab/neatio/neatdb"
    14  	"github.com/neatlab/neatio/utilities/common"
    15  	"golang.org/x/crypto/sha3"
    16  )
    17  
    18  type stateReq struct {
    19  	items    []common.Hash
    20  	tasks    map[common.Hash]*stateTask
    21  	timeout  time.Duration
    22  	timer    *time.Timer
    23  	peer     *peerConnection
    24  	response [][]byte
    25  	dropped  bool
    26  }
    27  
    28  func (req *stateReq) timedOut() bool {
    29  	return req.response == nil
    30  }
    31  
    32  type stateSyncStats struct {
    33  	processed  uint64
    34  	duplicate  uint64
    35  	unexpected uint64
    36  	pending    uint64
    37  }
    38  
    39  func (d *Downloader) syncState(root common.Hash) *stateSync {
    40  	s := newStateSync(d, root)
    41  	select {
    42  	case d.stateSyncStart <- s:
    43  	case <-d.quitCh:
    44  		s.err = errCancelStateFetch
    45  		close(s.done)
    46  	}
    47  	return s
    48  }
    49  
    50  func (d *Downloader) stateFetcher() {
    51  	for {
    52  		select {
    53  		case s := <-d.stateSyncStart:
    54  			for next := s; next != nil; {
    55  				next = d.runStateSync(next)
    56  			}
    57  		case <-d.stateCh:
    58  
    59  		case <-d.quitCh:
    60  			return
    61  		}
    62  	}
    63  }
    64  
    65  func (d *Downloader) runStateSync(s *stateSync) *stateSync {
    66  	var (
    67  		active   = make(map[string]*stateReq)
    68  		finished []*stateReq
    69  		timeout  = make(chan *stateReq)
    70  	)
    71  	defer func() {
    72  
    73  		for _, req := range active {
    74  			req.timer.Stop()
    75  			req.peer.SetNodeDataIdle(len(req.items))
    76  		}
    77  	}()
    78  
    79  	go s.run()
    80  	defer s.Cancel()
    81  
    82  	peerDrop := make(chan *peerConnection, 1024)
    83  	peerSub := s.d.peers.SubscribePeerDrops(peerDrop)
    84  	defer peerSub.Unsubscribe()
    85  
    86  	for {
    87  
    88  		var (
    89  			deliverReq   *stateReq
    90  			deliverReqCh chan *stateReq
    91  		)
    92  		if len(finished) > 0 {
    93  			deliverReq = finished[0]
    94  			deliverReqCh = s.deliver
    95  		}
    96  
    97  		select {
    98  
    99  		case next := <-d.stateSyncStart:
   100  			return next
   101  
   102  		case <-s.done:
   103  			return nil
   104  
   105  		case deliverReqCh <- deliverReq:
   106  
   107  			copy(finished, finished[1:])
   108  			finished[len(finished)-1] = nil
   109  			finished = finished[:len(finished)-1]
   110  
   111  		case pack := <-d.stateCh:
   112  
   113  			req := active[pack.PeerId()]
   114  			if req == nil {
   115  				log.Debug("Unrequested node data", "peer", pack.PeerId(), "len", pack.Items())
   116  				continue
   117  			}
   118  
   119  			req.timer.Stop()
   120  			req.response = pack.(*statePack).states
   121  
   122  			finished = append(finished, req)
   123  			delete(active, pack.PeerId())
   124  
   125  		case p := <-peerDrop:
   126  
   127  			req := active[p.id]
   128  			if req == nil {
   129  				continue
   130  			}
   131  
   132  			req.timer.Stop()
   133  			req.dropped = true
   134  
   135  			finished = append(finished, req)
   136  			delete(active, p.id)
   137  
   138  		case req := <-timeout:
   139  
   140  			if active[req.peer.id] != req {
   141  				continue
   142  			}
   143  
   144  			finished = append(finished, req)
   145  			delete(active, req.peer.id)
   146  
   147  		case req := <-d.trackStateReq:
   148  
   149  			if old := active[req.peer.id]; old != nil {
   150  				log.Warn("Busy peer assigned new state fetch", "peer", old.peer.id)
   151  
   152  				old.timer.Stop()
   153  				old.dropped = true
   154  
   155  				finished = append(finished, old)
   156  			}
   157  
   158  			req.timer = time.AfterFunc(req.timeout, func() {
   159  				select {
   160  				case timeout <- req:
   161  				case <-s.done:
   162  
   163  				}
   164  			})
   165  			active[req.peer.id] = req
   166  		}
   167  	}
   168  }
   169  
   170  type stateSync struct {
   171  	d *Downloader
   172  
   173  	sched  *trie.Sync
   174  	keccak hash.Hash
   175  	tasks  map[common.Hash]*stateTask
   176  
   177  	numUncommitted   int
   178  	bytesUncommitted int
   179  
   180  	deliver    chan *stateReq
   181  	cancel     chan struct{}
   182  	cancelOnce sync.Once
   183  	done       chan struct{}
   184  	err        error
   185  }
   186  
   187  type stateTask struct {
   188  	attempts map[string]struct{}
   189  }
   190  
   191  func newStateSync(d *Downloader, root common.Hash) *stateSync {
   192  	return &stateSync{
   193  		d:       d,
   194  		sched:   state.NewStateSync(root, d.stateDB),
   195  		keccak:  sha3.NewLegacyKeccak256(),
   196  		tasks:   make(map[common.Hash]*stateTask),
   197  		deliver: make(chan *stateReq),
   198  		cancel:  make(chan struct{}),
   199  		done:    make(chan struct{}),
   200  	}
   201  }
   202  
   203  func (s *stateSync) run() {
   204  	s.err = s.loop()
   205  	close(s.done)
   206  }
   207  
   208  func (s *stateSync) Wait() error {
   209  	<-s.done
   210  	return s.err
   211  }
   212  
   213  func (s *stateSync) Cancel() error {
   214  	s.cancelOnce.Do(func() { close(s.cancel) })
   215  	return s.Wait()
   216  }
   217  
   218  func (s *stateSync) loop() error {
   219  
   220  	newPeer := make(chan *peerConnection, 1024)
   221  	peerSub := s.d.peers.SubscribeNewPeers(newPeer)
   222  	defer peerSub.Unsubscribe()
   223  
   224  	for s.sched.Pending() > 0 {
   225  		if err := s.commit(false); err != nil {
   226  			return err
   227  		}
   228  		s.assignTasks()
   229  
   230  		select {
   231  		case <-newPeer:
   232  
   233  		case <-s.cancel:
   234  			return errCancelStateFetch
   235  
   236  		case <-s.d.cancelCh:
   237  			return errCancelStateFetch
   238  
   239  		case req := <-s.deliver:
   240  
   241  			log.Trace("Received node data response", "peer", req.peer.id, "count", len(req.response), "dropped", req.dropped, "timeout", !req.dropped && req.timedOut())
   242  			if len(req.items) <= 2 && !req.dropped && req.timedOut() {
   243  
   244  				log.Warn("Stalling state sync, dropping peer", "peer", req.peer.id)
   245  				s.d.dropPeer(req.peer.id)
   246  			}
   247  
   248  			if err := s.process(req); err != nil {
   249  				log.Warn("Node data write error", "err", err)
   250  				return err
   251  			}
   252  			req.peer.SetNodeDataIdle(len(req.response))
   253  		}
   254  	}
   255  	return s.commit(true)
   256  }
   257  
   258  func (s *stateSync) commit(force bool) error {
   259  	if !force && s.bytesUncommitted < neatdb.IdealBatchSize {
   260  		return nil
   261  	}
   262  	start := time.Now()
   263  	b := s.d.stateDB.NewBatch()
   264  	s.sched.Commit(b)
   265  	if err := b.Write(); err != nil {
   266  		return fmt.Errorf("DB write error: %v", err)
   267  	}
   268  	s.updateStats(s.numUncommitted, 0, 0, time.Since(start))
   269  	s.numUncommitted = 0
   270  	s.bytesUncommitted = 0
   271  	return nil
   272  }
   273  
   274  func (s *stateSync) assignTasks() {
   275  
   276  	peers, _ := s.d.peers.NodeDataIdlePeers()
   277  	for _, p := range peers {
   278  
   279  		cap := p.NodeDataCapacity(s.d.requestRTT())
   280  		req := &stateReq{peer: p, timeout: s.d.requestTTL()}
   281  		s.fillTasks(cap, req)
   282  
   283  		if len(req.items) > 0 {
   284  			req.peer.log.Trace("Requesting new batch of data", "type", "state", "count", len(req.items))
   285  			select {
   286  			case s.d.trackStateReq <- req:
   287  				req.peer.FetchNodeData(req.items)
   288  			case <-s.cancel:
   289  			case <-s.d.cancelCh:
   290  			}
   291  		}
   292  	}
   293  }
   294  
   295  func (s *stateSync) fillTasks(n int, req *stateReq) {
   296  
   297  	if len(s.tasks) < n {
   298  		new := s.sched.Missing(n - len(s.tasks))
   299  		for _, hash := range new {
   300  			s.tasks[hash] = &stateTask{make(map[string]struct{})}
   301  		}
   302  	}
   303  
   304  	req.items = make([]common.Hash, 0, n)
   305  	req.tasks = make(map[common.Hash]*stateTask, n)
   306  	for hash, t := range s.tasks {
   307  
   308  		if len(req.items) == n {
   309  			break
   310  		}
   311  
   312  		if _, ok := t.attempts[req.peer.id]; ok {
   313  			continue
   314  		}
   315  
   316  		t.attempts[req.peer.id] = struct{}{}
   317  		req.items = append(req.items, hash)
   318  		req.tasks[hash] = t
   319  		delete(s.tasks, hash)
   320  	}
   321  }
   322  
   323  func (s *stateSync) process(req *stateReq) error {
   324  
   325  	duplicate, unexpected := 0, 0
   326  
   327  	defer func(start time.Time) {
   328  		if duplicate > 0 || unexpected > 0 {
   329  			s.updateStats(0, duplicate, unexpected, time.Since(start))
   330  		}
   331  	}(time.Now())
   332  
   333  	progress := false
   334  
   335  	for _, blob := range req.response {
   336  		prog, hash, err := s.processNodeData(blob)
   337  		switch err {
   338  		case nil:
   339  			s.numUncommitted++
   340  			s.bytesUncommitted += len(blob)
   341  			progress = progress || prog
   342  		case trie.ErrNotRequested:
   343  			unexpected++
   344  		case trie.ErrAlreadyProcessed:
   345  			duplicate++
   346  		default:
   347  			return fmt.Errorf("invalid state node %s: %v", hash.TerminalString(), err)
   348  		}
   349  		if _, ok := req.tasks[hash]; ok {
   350  			delete(req.tasks, hash)
   351  		}
   352  	}
   353  
   354  	npeers := s.d.peers.Len()
   355  	for hash, task := range req.tasks {
   356  
   357  		if len(req.response) > 0 || req.timedOut() {
   358  			delete(task.attempts, req.peer.id)
   359  		}
   360  
   361  		if len(task.attempts) >= npeers {
   362  			return fmt.Errorf("state node %s failed with all peers (%d tries, %d peers)", hash.TerminalString(), len(task.attempts), npeers)
   363  		}
   364  
   365  		s.tasks[hash] = task
   366  	}
   367  	return nil
   368  }
   369  
   370  func (s *stateSync) processNodeData(blob []byte) (bool, common.Hash, error) {
   371  	res := trie.SyncResult{Data: blob}
   372  	s.keccak.Reset()
   373  	s.keccak.Write(blob)
   374  	s.keccak.Sum(res.Hash[:0])
   375  	committed, _, err := s.sched.Process([]trie.SyncResult{res})
   376  	return committed, res.Hash, err
   377  }
   378  
   379  func (s *stateSync) updateStats(written, duplicate, unexpected int, duration time.Duration) {
   380  	s.d.syncStatsLock.Lock()
   381  	defer s.d.syncStatsLock.Unlock()
   382  
   383  	s.d.syncStatsState.pending = uint64(s.sched.Pending())
   384  	s.d.syncStatsState.processed += uint64(written)
   385  	s.d.syncStatsState.duplicate += uint64(duplicate)
   386  	s.d.syncStatsState.unexpected += uint64(unexpected)
   387  
   388  	if written > 0 || duplicate > 0 || unexpected > 0 {
   389  		log.Info("Imported new state entries", "count", written, "elapsed", common.PrettyDuration(duration), "processed", s.d.syncStatsState.processed, "pending", s.d.syncStatsState.pending, "retry", len(s.tasks), "duplicate", s.d.syncStatsState.duplicate, "unexpected", s.d.syncStatsState.unexpected)
   390  	}
   391  	if written > 0 {
   392  		rawdb.WriteFastTrieProgress(s.d.stateDB, s.d.syncStatsState.processed)
   393  	}
   394  }