github.com/number571/tendermint@v0.34.11-gost/internal/blockchain/v2/reactor.go (about)

     1  package v2
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"time"
     7  
     8  	proto "github.com/gogo/protobuf/proto"
     9  
    10  	bc "github.com/number571/tendermint/internal/blockchain"
    11  	"github.com/number571/tendermint/internal/blockchain/v2/internal/behavior"
    12  	cons "github.com/number571/tendermint/internal/consensus"
    13  	tmsync "github.com/number571/tendermint/internal/libs/sync"
    14  	"github.com/number571/tendermint/internal/p2p"
    15  	"github.com/number571/tendermint/libs/log"
    16  	"github.com/number571/tendermint/libs/sync"
    17  	bcproto "github.com/number571/tendermint/proto/tendermint/blockchain"
    18  	"github.com/number571/tendermint/state"
    19  	"github.com/number571/tendermint/types"
    20  )
    21  
    22  const (
    23  	// chBufferSize is the buffer size of all event channels.
    24  	chBufferSize int = 1000
    25  )
    26  
    27  type blockStore interface {
    28  	LoadBlock(height int64) *types.Block
    29  	SaveBlock(*types.Block, *types.PartSet, *types.Commit)
    30  	Base() int64
    31  	Height() int64
    32  }
    33  
    34  // BlockchainReactor handles fast sync protocol.
    35  type BlockchainReactor struct {
    36  	p2p.BaseReactor
    37  
    38  	fastSync    *sync.AtomicBool // enable fast sync on start when it's been Set
    39  	stateSynced bool             // set to true when SwitchToFastSync is called by state sync
    40  	scheduler   *Routine
    41  	processor   *Routine
    42  	logger      log.Logger
    43  
    44  	mtx           tmsync.RWMutex
    45  	maxPeerHeight int64
    46  	syncHeight    int64
    47  	events        chan Event // non-nil during a fast sync
    48  
    49  	reporter behavior.Reporter
    50  	io       iIO
    51  	store    blockStore
    52  
    53  	syncStartTime   time.Time
    54  	syncStartHeight int64
    55  	lastSyncRate    float64 // # blocks sync per sec base on the last 100 blocks
    56  }
    57  
    58  type blockApplier interface {
    59  	ApplyBlock(state state.State, blockID types.BlockID, block *types.Block) (state.State, error)
    60  }
    61  
    62  // XXX: unify naming in this package around tmState
    63  func newReactor(state state.State, store blockStore, reporter behavior.Reporter,
    64  	blockApplier blockApplier, fastSync bool, metrics *cons.Metrics) *BlockchainReactor {
    65  	initHeight := state.LastBlockHeight + 1
    66  	if initHeight == 1 {
    67  		initHeight = state.InitialHeight
    68  	}
    69  	scheduler := newScheduler(initHeight, time.Now())
    70  	pContext := newProcessorContext(store, blockApplier, state, metrics)
    71  	// TODO: Fix naming to just newProcesssor
    72  	// newPcState requires a processorContext
    73  	processor := newPcState(pContext)
    74  
    75  	return &BlockchainReactor{
    76  		scheduler:       newRoutine("scheduler", scheduler.handle, chBufferSize),
    77  		processor:       newRoutine("processor", processor.handle, chBufferSize),
    78  		store:           store,
    79  		reporter:        reporter,
    80  		logger:          log.NewNopLogger(),
    81  		fastSync:        sync.NewBool(fastSync),
    82  		syncStartHeight: initHeight,
    83  		syncStartTime:   time.Time{},
    84  		lastSyncRate:    0,
    85  	}
    86  }
    87  
    88  // NewBlockchainReactor creates a new reactor instance.
    89  func NewBlockchainReactor(
    90  	state state.State,
    91  	blockApplier blockApplier,
    92  	store blockStore,
    93  	fastSync bool,
    94  	metrics *cons.Metrics) *BlockchainReactor {
    95  	reporter := behavior.NewMockReporter()
    96  	return newReactor(state, store, reporter, blockApplier, fastSync, metrics)
    97  }
    98  
    99  // SetSwitch implements Reactor interface.
   100  func (r *BlockchainReactor) SetSwitch(sw *p2p.Switch) {
   101  	r.Switch = sw
   102  	if sw != nil {
   103  		r.io = newSwitchIo(sw)
   104  	} else {
   105  		r.io = nil
   106  	}
   107  }
   108  
   109  func (r *BlockchainReactor) setMaxPeerHeight(height int64) {
   110  	r.mtx.Lock()
   111  	defer r.mtx.Unlock()
   112  	if height > r.maxPeerHeight {
   113  		r.maxPeerHeight = height
   114  	}
   115  }
   116  
   117  func (r *BlockchainReactor) setSyncHeight(height int64) {
   118  	r.mtx.Lock()
   119  	defer r.mtx.Unlock()
   120  	r.syncHeight = height
   121  }
   122  
   123  // SyncHeight returns the height to which the BlockchainReactor has synced.
   124  func (r *BlockchainReactor) SyncHeight() int64 {
   125  	r.mtx.RLock()
   126  	defer r.mtx.RUnlock()
   127  	return r.syncHeight
   128  }
   129  
   130  // SetLogger sets the logger of the reactor.
   131  func (r *BlockchainReactor) SetLogger(logger log.Logger) {
   132  	r.logger = logger
   133  	r.scheduler.setLogger(logger)
   134  	r.processor.setLogger(logger)
   135  }
   136  
   137  // Start implements cmn.Service interface
   138  func (r *BlockchainReactor) Start() error {
   139  	r.reporter = behavior.NewSwitchReporter(r.BaseReactor.Switch)
   140  	if r.fastSync.IsSet() {
   141  		err := r.startSync(nil)
   142  		if err != nil {
   143  			return fmt.Errorf("failed to start fast sync: %w", err)
   144  		}
   145  	}
   146  	return nil
   147  }
   148  
   149  // startSync begins a fast sync, signaled by r.events being non-nil. If state is non-nil,
   150  // the scheduler and processor is updated with this state on startup.
   151  func (r *BlockchainReactor) startSync(state *state.State) error {
   152  	r.mtx.Lock()
   153  	defer r.mtx.Unlock()
   154  	if r.events != nil {
   155  		return errors.New("fast sync already in progress")
   156  	}
   157  	r.events = make(chan Event, chBufferSize)
   158  	go r.scheduler.start()
   159  	go r.processor.start()
   160  	if state != nil {
   161  		<-r.scheduler.ready()
   162  		<-r.processor.ready()
   163  		r.scheduler.send(bcResetState{state: *state})
   164  		r.processor.send(bcResetState{state: *state})
   165  	}
   166  	go r.demux(r.events)
   167  	return nil
   168  }
   169  
   170  // endSync ends a fast sync
   171  func (r *BlockchainReactor) endSync() {
   172  	r.mtx.Lock()
   173  	defer r.mtx.Unlock()
   174  	if r.events != nil {
   175  		close(r.events)
   176  	}
   177  	r.events = nil
   178  	r.scheduler.stop()
   179  	r.processor.stop()
   180  }
   181  
   182  // SwitchToFastSync is called by the state sync reactor when switching to fast sync.
   183  func (r *BlockchainReactor) SwitchToFastSync(state state.State) error {
   184  	r.stateSynced = true
   185  	state = state.Copy()
   186  
   187  	err := r.startSync(&state)
   188  	if err == nil {
   189  		r.syncStartTime = time.Now()
   190  	}
   191  
   192  	return err
   193  }
   194  
   195  // reactor generated ticker events:
   196  // ticker for cleaning peers
   197  type rTryPrunePeer struct {
   198  	priorityHigh
   199  	time time.Time
   200  }
   201  
   202  func (e rTryPrunePeer) String() string {
   203  	return fmt.Sprintf("rTryPrunePeer{%v}", e.time)
   204  }
   205  
   206  // ticker event for scheduling block requests
   207  type rTrySchedule struct {
   208  	priorityHigh
   209  	time time.Time
   210  }
   211  
   212  func (e rTrySchedule) String() string {
   213  	return fmt.Sprintf("rTrySchedule{%v}", e.time)
   214  }
   215  
   216  // ticker for block processing
   217  type rProcessBlock struct {
   218  	priorityNormal
   219  }
   220  
   221  func (e rProcessBlock) String() string {
   222  	return "rProcessBlock"
   223  }
   224  
   225  // reactor generated events based on blockchain related messages from peers:
   226  // blockResponse message received from a peer
   227  type bcBlockResponse struct {
   228  	priorityNormal
   229  	time   time.Time
   230  	peerID types.NodeID
   231  	size   int64
   232  	block  *types.Block
   233  }
   234  
   235  func (resp bcBlockResponse) String() string {
   236  	return fmt.Sprintf("bcBlockResponse{%d#%X (size: %d bytes) from %v at %v}",
   237  		resp.block.Height, resp.block.Hash(), resp.size, resp.peerID, resp.time)
   238  }
   239  
   240  // blockNoResponse message received from a peer
   241  type bcNoBlockResponse struct {
   242  	priorityNormal
   243  	time   time.Time
   244  	peerID types.NodeID
   245  	height int64
   246  }
   247  
   248  func (resp bcNoBlockResponse) String() string {
   249  	return fmt.Sprintf("bcNoBlockResponse{%v has no block at height %d at %v}",
   250  		resp.peerID, resp.height, resp.time)
   251  }
   252  
   253  // statusResponse message received from a peer
   254  type bcStatusResponse struct {
   255  	priorityNormal
   256  	time   time.Time
   257  	peerID types.NodeID
   258  	base   int64
   259  	height int64
   260  }
   261  
   262  func (resp bcStatusResponse) String() string {
   263  	return fmt.Sprintf("bcStatusResponse{%v is at height %d (base: %d) at %v}",
   264  		resp.peerID, resp.height, resp.base, resp.time)
   265  }
   266  
   267  // new peer is connected
   268  type bcAddNewPeer struct {
   269  	priorityNormal
   270  	peerID types.NodeID
   271  }
   272  
   273  func (resp bcAddNewPeer) String() string {
   274  	return fmt.Sprintf("bcAddNewPeer{%v}", resp.peerID)
   275  }
   276  
   277  // existing peer is removed
   278  type bcRemovePeer struct {
   279  	priorityHigh
   280  	peerID types.NodeID
   281  	reason interface{}
   282  }
   283  
   284  func (resp bcRemovePeer) String() string {
   285  	return fmt.Sprintf("bcRemovePeer{%v due to %v}", resp.peerID, resp.reason)
   286  }
   287  
   288  // resets the scheduler and processor state, e.g. following a switch from state syncing
   289  type bcResetState struct {
   290  	priorityHigh
   291  	state state.State
   292  }
   293  
   294  func (e bcResetState) String() string {
   295  	return fmt.Sprintf("bcResetState{%v}", e.state)
   296  }
   297  
   298  // Takes the channel as a parameter to avoid race conditions on r.events.
   299  func (r *BlockchainReactor) demux(events <-chan Event) {
   300  	var lastHundred = time.Now()
   301  
   302  	var (
   303  		processBlockFreq = 20 * time.Millisecond
   304  		doProcessBlockCh = make(chan struct{}, 1)
   305  		doProcessBlockTk = time.NewTicker(processBlockFreq)
   306  	)
   307  	defer doProcessBlockTk.Stop()
   308  
   309  	var (
   310  		prunePeerFreq = 1 * time.Second
   311  		doPrunePeerCh = make(chan struct{}, 1)
   312  		doPrunePeerTk = time.NewTicker(prunePeerFreq)
   313  	)
   314  	defer doPrunePeerTk.Stop()
   315  
   316  	var (
   317  		scheduleFreq = 20 * time.Millisecond
   318  		doScheduleCh = make(chan struct{}, 1)
   319  		doScheduleTk = time.NewTicker(scheduleFreq)
   320  	)
   321  	defer doScheduleTk.Stop()
   322  
   323  	var (
   324  		statusFreq = 10 * time.Second
   325  		doStatusCh = make(chan struct{}, 1)
   326  		doStatusTk = time.NewTicker(statusFreq)
   327  	)
   328  	defer doStatusTk.Stop()
   329  	doStatusCh <- struct{}{} // immediately broadcast to get status of existing peers
   330  
   331  	// Memoize the scSchedulerFail error to avoid printing it every scheduleFreq.
   332  	var scSchedulerFailErr error
   333  
   334  	// XXX: Extract timers to make testing atemporal
   335  	for {
   336  		select {
   337  		// Pacers: send at most per frequency but don't saturate
   338  		case <-doProcessBlockTk.C:
   339  			select {
   340  			case doProcessBlockCh <- struct{}{}:
   341  			default:
   342  			}
   343  		case <-doPrunePeerTk.C:
   344  			select {
   345  			case doPrunePeerCh <- struct{}{}:
   346  			default:
   347  			}
   348  		case <-doScheduleTk.C:
   349  			select {
   350  			case doScheduleCh <- struct{}{}:
   351  			default:
   352  			}
   353  		case <-doStatusTk.C:
   354  			select {
   355  			case doStatusCh <- struct{}{}:
   356  			default:
   357  			}
   358  
   359  		// Tickers: perform tasks periodically
   360  		case <-doScheduleCh:
   361  			r.scheduler.send(rTrySchedule{time: time.Now()})
   362  		case <-doPrunePeerCh:
   363  			r.scheduler.send(rTryPrunePeer{time: time.Now()})
   364  		case <-doProcessBlockCh:
   365  			r.processor.send(rProcessBlock{})
   366  		case <-doStatusCh:
   367  			if err := r.io.broadcastStatusRequest(); err != nil {
   368  				r.logger.Error("Error broadcasting status request", "err", err)
   369  			}
   370  
   371  		// Events from peers. Closing the channel signals event loop termination.
   372  		case event, ok := <-events:
   373  			if !ok {
   374  				r.logger.Info("Stopping event processing")
   375  				return
   376  			}
   377  			switch event := event.(type) {
   378  			case bcStatusResponse:
   379  				r.setMaxPeerHeight(event.height)
   380  				r.scheduler.send(event)
   381  			case bcAddNewPeer, bcRemovePeer, bcBlockResponse, bcNoBlockResponse:
   382  				r.scheduler.send(event)
   383  			default:
   384  				r.logger.Error("Received unexpected event", "event", fmt.Sprintf("%T", event))
   385  			}
   386  
   387  		// Incremental events from scheduler
   388  		case event := <-r.scheduler.next():
   389  			switch event := event.(type) {
   390  			case scBlockReceived:
   391  				r.processor.send(event)
   392  			case scPeerError:
   393  				r.processor.send(event)
   394  				if err := r.reporter.Report(behavior.BadMessage(event.peerID, "scPeerError")); err != nil {
   395  					r.logger.Error("Error reporting peer", "err", err)
   396  				}
   397  			case scBlockRequest:
   398  				peer := r.Switch.Peers().Get(event.peerID)
   399  				if peer == nil {
   400  					r.logger.Error("Wanted to send block request, but no such peer", "peerID", event.peerID)
   401  					continue
   402  				}
   403  				if err := r.io.sendBlockRequest(peer, event.height); err != nil {
   404  					r.logger.Error("Error sending block request", "err", err)
   405  				}
   406  			case scFinishedEv:
   407  				r.processor.send(event)
   408  				r.scheduler.stop()
   409  			case scSchedulerFail:
   410  				if scSchedulerFailErr != event.reason {
   411  					r.logger.Error("Scheduler failure", "err", event.reason.Error())
   412  					scSchedulerFailErr = event.reason
   413  				}
   414  			case scPeersPruned:
   415  				// Remove peers from the processor.
   416  				for _, peerID := range event.peers {
   417  					r.processor.send(scPeerError{peerID: peerID, reason: errors.New("peer was pruned")})
   418  				}
   419  				r.logger.Debug("Pruned peers", "count", len(event.peers))
   420  			case noOpEvent:
   421  			default:
   422  				r.logger.Error("Received unexpected scheduler event", "event", fmt.Sprintf("%T", event))
   423  			}
   424  
   425  		// Incremental events from processor
   426  		case event := <-r.processor.next():
   427  			switch event := event.(type) {
   428  			case pcBlockProcessed:
   429  				r.setSyncHeight(event.height)
   430  				if (r.syncHeight-r.syncStartHeight)%100 == 0 {
   431  					newSyncRate := 100 / time.Since(lastHundred).Seconds()
   432  					if r.lastSyncRate == 0 {
   433  						r.lastSyncRate = newSyncRate
   434  					} else {
   435  						r.lastSyncRate = 0.9*r.lastSyncRate + 0.1*newSyncRate
   436  					}
   437  					r.logger.Info("Fast Sync Rate", "height", r.syncHeight,
   438  						"max_peer_height", r.maxPeerHeight, "blocks/s", r.lastSyncRate)
   439  					lastHundred = time.Now()
   440  				}
   441  				r.scheduler.send(event)
   442  			case pcBlockVerificationFailure:
   443  				r.scheduler.send(event)
   444  			case pcFinished:
   445  				r.logger.Info("Fast sync complete, switching to consensus")
   446  				if !r.io.trySwitchToConsensus(event.tmState, event.blocksSynced > 0 || r.stateSynced) {
   447  					r.logger.Error("Failed to switch to consensus reactor")
   448  				}
   449  				r.endSync()
   450  				r.fastSync.UnSet()
   451  				return
   452  			case noOpEvent:
   453  			default:
   454  				r.logger.Error("Received unexpected processor event", "event", fmt.Sprintf("%T", event))
   455  			}
   456  
   457  		// Terminal event from scheduler
   458  		case err := <-r.scheduler.final():
   459  			switch err {
   460  			case nil:
   461  				r.logger.Info("Scheduler stopped")
   462  			default:
   463  				r.logger.Error("Scheduler aborted with error", "err", err)
   464  			}
   465  
   466  		// Terminal event from processor
   467  		case err := <-r.processor.final():
   468  			switch err {
   469  			case nil:
   470  				r.logger.Info("Processor stopped")
   471  			default:
   472  				r.logger.Error("Processor aborted with error", "err", err)
   473  			}
   474  		}
   475  	}
   476  }
   477  
   478  // Stop implements cmn.Service interface.
   479  func (r *BlockchainReactor) Stop() error {
   480  	r.logger.Info("reactor stopping")
   481  	r.endSync()
   482  	r.logger.Info("reactor stopped")
   483  	return nil
   484  }
   485  
   486  // Receive implements Reactor by handling different message types.
   487  // XXX: do not call any methods that can block or incur heavy processing.
   488  // https://github.com/number571/tendermint/issues/2888
   489  func (r *BlockchainReactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) {
   490  	logger := r.logger.With("src", src.ID(), "chID", chID)
   491  
   492  	msgProto := new(bcproto.Message)
   493  
   494  	if err := proto.Unmarshal(msgBytes, msgProto); err != nil {
   495  		logger.Error("error decoding message", "err", err)
   496  		_ = r.reporter.Report(behavior.BadMessage(src.ID(), err.Error()))
   497  		return
   498  	}
   499  
   500  	if err := msgProto.Validate(); err != nil {
   501  		logger.Error("peer sent us an invalid msg", "msg", msgProto, "err", err)
   502  		_ = r.reporter.Report(behavior.BadMessage(src.ID(), err.Error()))
   503  		return
   504  	}
   505  
   506  	r.logger.Debug("received", "msg", msgProto)
   507  
   508  	switch msg := msgProto.Sum.(type) {
   509  	case *bcproto.Message_StatusRequest:
   510  		if err := r.io.sendStatusResponse(r.store.Base(), r.store.Height(), src); err != nil {
   511  			logger.Error("Could not send status message to src peer")
   512  		}
   513  
   514  	case *bcproto.Message_BlockRequest:
   515  		block := r.store.LoadBlock(msg.BlockRequest.Height)
   516  		if block != nil {
   517  			if err := r.io.sendBlockToPeer(block, src); err != nil {
   518  				logger.Error("Could not send block message to src peer", "err", err)
   519  			}
   520  		} else {
   521  			logger.Info("peer asking for a block we don't have", "height", msg.BlockRequest.Height)
   522  			if err := r.io.sendBlockNotFound(msg.BlockRequest.Height, src); err != nil {
   523  				logger.Error("Couldn't send block not found msg", "err", err)
   524  			}
   525  		}
   526  
   527  	case *bcproto.Message_StatusResponse:
   528  		r.mtx.RLock()
   529  		if r.events != nil {
   530  			r.events <- bcStatusResponse{
   531  				peerID: src.ID(),
   532  				base:   msg.StatusResponse.Base,
   533  				height: msg.StatusResponse.Height,
   534  			}
   535  		}
   536  		r.mtx.RUnlock()
   537  
   538  	case *bcproto.Message_BlockResponse:
   539  		bi, err := types.BlockFromProto(msg.BlockResponse.Block)
   540  		if err != nil {
   541  			logger.Error("error transitioning block from protobuf", "err", err)
   542  			_ = r.reporter.Report(behavior.BadMessage(src.ID(), err.Error()))
   543  			return
   544  		}
   545  		r.mtx.RLock()
   546  		if r.events != nil {
   547  			r.events <- bcBlockResponse{
   548  				peerID: src.ID(),
   549  				block:  bi,
   550  				size:   int64(len(msgBytes)),
   551  				time:   time.Now(),
   552  			}
   553  		}
   554  		r.mtx.RUnlock()
   555  
   556  	case *bcproto.Message_NoBlockResponse:
   557  		r.mtx.RLock()
   558  		if r.events != nil {
   559  			r.events <- bcNoBlockResponse{
   560  				peerID: src.ID(),
   561  				height: msg.NoBlockResponse.Height,
   562  				time:   time.Now(),
   563  			}
   564  		}
   565  		r.mtx.RUnlock()
   566  	}
   567  }
   568  
   569  // AddPeer implements Reactor interface
   570  func (r *BlockchainReactor) AddPeer(peer p2p.Peer) {
   571  	err := r.io.sendStatusResponse(r.store.Base(), r.store.Height(), peer)
   572  	if err != nil {
   573  		r.logger.Error("could not send our status to the new peer", "peer", peer.ID, "err", err)
   574  	}
   575  
   576  	err = r.io.sendStatusRequest(peer)
   577  	if err != nil {
   578  		r.logger.Error("could not send status request to the new peer", "peer", peer.ID, "err", err)
   579  	}
   580  
   581  	r.mtx.RLock()
   582  	defer r.mtx.RUnlock()
   583  	if r.events != nil {
   584  		r.events <- bcAddNewPeer{peerID: peer.ID()}
   585  	}
   586  }
   587  
   588  // RemovePeer implements Reactor interface.
   589  func (r *BlockchainReactor) RemovePeer(peer p2p.Peer, reason interface{}) {
   590  	r.mtx.RLock()
   591  	defer r.mtx.RUnlock()
   592  	if r.events != nil {
   593  		r.events <- bcRemovePeer{
   594  			peerID: peer.ID(),
   595  			reason: reason,
   596  		}
   597  	}
   598  }
   599  
   600  // GetChannels implements Reactor
   601  func (r *BlockchainReactor) GetChannels() []*p2p.ChannelDescriptor {
   602  	return []*p2p.ChannelDescriptor{
   603  		{
   604  			ID:                  BlockchainChannel,
   605  			Priority:            5,
   606  			SendQueueCapacity:   2000,
   607  			RecvBufferCapacity:  1024,
   608  			RecvMessageCapacity: bc.MaxMsgSize,
   609  		},
   610  	}
   611  }
   612  
   613  func (r *BlockchainReactor) GetMaxPeerBlockHeight() int64 {
   614  	r.mtx.RLock()
   615  	defer r.mtx.RUnlock()
   616  	return r.maxPeerHeight
   617  }
   618  
   619  func (r *BlockchainReactor) GetTotalSyncedTime() time.Duration {
   620  	if !r.fastSync.IsSet() || r.syncStartTime.IsZero() {
   621  		return time.Duration(0)
   622  	}
   623  	return time.Since(r.syncStartTime)
   624  }
   625  
   626  func (r *BlockchainReactor) GetRemainingSyncTime() time.Duration {
   627  	if !r.fastSync.IsSet() {
   628  		return time.Duration(0)
   629  	}
   630  
   631  	r.mtx.RLock()
   632  	defer r.mtx.RUnlock()
   633  
   634  	targetSyncs := r.maxPeerHeight - r.syncStartHeight
   635  	currentSyncs := r.syncHeight - r.syncStartHeight + 1
   636  	if currentSyncs < 0 || r.lastSyncRate < 0.001 {
   637  		return time.Duration(0)
   638  	}
   639  
   640  	remain := float64(targetSyncs-currentSyncs) / r.lastSyncRate
   641  
   642  	return time.Duration(int64(remain * float64(time.Second)))
   643  }