github.com/koko1123/flow-go-1@v0.29.6/engine/common/synchronization/engine.go (about)

     1  // (c) 2019 Dapper Labs - ALL RIGHTS RESERVED
     2  
     3  package synchronization
     4  
     5  import (
     6  	"fmt"
     7  	"math/rand"
     8  	"time"
     9  
    10  	"github.com/hashicorp/go-multierror"
    11  	"github.com/rs/zerolog"
    12  
    13  	"github.com/koko1123/flow-go-1/engine"
    14  	"github.com/koko1123/flow-go-1/engine/common/fifoqueue"
    15  	"github.com/koko1123/flow-go-1/model/chainsync"
    16  	"github.com/koko1123/flow-go-1/model/flow"
    17  	"github.com/koko1123/flow-go-1/model/messages"
    18  	"github.com/koko1123/flow-go-1/module"
    19  	synccore "github.com/koko1123/flow-go-1/module/chainsync"
    20  	"github.com/koko1123/flow-go-1/module/lifecycle"
    21  	"github.com/koko1123/flow-go-1/module/metrics"
    22  	"github.com/koko1123/flow-go-1/network"
    23  	"github.com/koko1123/flow-go-1/network/channels"
    24  	"github.com/koko1123/flow-go-1/storage"
    25  )
    26  
    27  // defaultSyncResponseQueueCapacity maximum capacity of sync responses queue
    28  const defaultSyncResponseQueueCapacity = 500
    29  
    30  // defaultBlockResponseQueueCapacity maximum capacity of block responses queue
    31  const defaultBlockResponseQueueCapacity = 500
    32  
    33  // Engine is the synchronization engine, responsible for synchronizing chain state.
    34  type Engine struct {
    35  	unit    *engine.Unit
    36  	lm      *lifecycle.LifecycleManager
    37  	log     zerolog.Logger
    38  	metrics module.EngineMetrics
    39  	me      module.Local
    40  	con     network.Conduit
    41  	blocks  storage.Blocks
    42  	comp    network.Engine // compliance layer engine
    43  
    44  	pollInterval         time.Duration
    45  	scanInterval         time.Duration
    46  	core                 module.SyncCore
    47  	participantsProvider module.IdentifierProvider
    48  	finalizedHeader      *FinalizedHeaderCache
    49  
    50  	requestHandler *RequestHandler // component responsible for handling requests
    51  
    52  	pendingSyncResponses   engine.MessageStore    // message store for *message.SyncResponse
    53  	pendingBlockResponses  engine.MessageStore    // message store for *message.BlockResponse
    54  	responseMessageHandler *engine.MessageHandler // message handler responsible for response processing
    55  }
    56  
    57  // New creates a new main chain synchronization engine.
    58  func New(
    59  	log zerolog.Logger,
    60  	metrics module.EngineMetrics,
    61  	net network.Network,
    62  	me module.Local,
    63  	blocks storage.Blocks,
    64  	comp network.Engine,
    65  	core module.SyncCore,
    66  	finalizedHeader *FinalizedHeaderCache,
    67  	participantsProvider module.IdentifierProvider,
    68  	opts ...OptionFunc,
    69  ) (*Engine, error) {
    70  
    71  	opt := DefaultConfig()
    72  	for _, f := range opts {
    73  		f(opt)
    74  	}
    75  
    76  	if comp == nil {
    77  		panic("must initialize synchronization engine with comp engine")
    78  	}
    79  
    80  	// initialize the propagation engine with its dependencies
    81  	e := &Engine{
    82  		unit:                 engine.NewUnit(),
    83  		lm:                   lifecycle.NewLifecycleManager(),
    84  		log:                  log.With().Str("engine", "synchronization").Logger(),
    85  		metrics:              metrics,
    86  		me:                   me,
    87  		blocks:               blocks,
    88  		comp:                 comp,
    89  		core:                 core,
    90  		pollInterval:         opt.PollInterval,
    91  		scanInterval:         opt.ScanInterval,
    92  		finalizedHeader:      finalizedHeader,
    93  		participantsProvider: participantsProvider,
    94  	}
    95  
    96  	err := e.setupResponseMessageHandler()
    97  	if err != nil {
    98  		return nil, fmt.Errorf("could not setup message handler")
    99  	}
   100  
   101  	// register the engine with the network layer and store the conduit
   102  	con, err := net.Register(channels.SyncCommittee, e)
   103  	if err != nil {
   104  		return nil, fmt.Errorf("could not register engine: %w", err)
   105  	}
   106  	e.con = con
   107  
   108  	e.requestHandler = NewRequestHandler(log, metrics, NewResponseSender(con), me, blocks, core, finalizedHeader, true)
   109  
   110  	return e, nil
   111  }
   112  
   113  // setupResponseMessageHandler initializes the inbound queues and the MessageHandler for UNTRUSTED responses.
   114  func (e *Engine) setupResponseMessageHandler() error {
   115  	syncResponseQueue, err := fifoqueue.NewFifoQueue(defaultSyncResponseQueueCapacity)
   116  	if err != nil {
   117  		return fmt.Errorf("failed to create queue for sync responses: %w", err)
   118  	}
   119  
   120  	e.pendingSyncResponses = &engine.FifoMessageStore{
   121  		FifoQueue: syncResponseQueue,
   122  	}
   123  
   124  	blockResponseQueue, err := fifoqueue.NewFifoQueue(defaultBlockResponseQueueCapacity)
   125  	if err != nil {
   126  		return fmt.Errorf("failed to create queue for block responses: %w", err)
   127  	}
   128  
   129  	e.pendingBlockResponses = &engine.FifoMessageStore{
   130  		FifoQueue: blockResponseQueue,
   131  	}
   132  
   133  	// define message queueing behaviour
   134  	e.responseMessageHandler = engine.NewMessageHandler(
   135  		e.log,
   136  		engine.NewNotifier(),
   137  		engine.Pattern{
   138  			Match: func(msg *engine.Message) bool {
   139  				_, ok := msg.Payload.(*messages.SyncResponse)
   140  				if ok {
   141  					e.metrics.MessageReceived(metrics.EngineSynchronization, metrics.MessageSyncResponse)
   142  				}
   143  				return ok
   144  			},
   145  			Store: e.pendingSyncResponses,
   146  		},
   147  		engine.Pattern{
   148  			Match: func(msg *engine.Message) bool {
   149  				_, ok := msg.Payload.(*messages.BlockResponse)
   150  				if ok {
   151  					e.metrics.MessageReceived(metrics.EngineSynchronization, metrics.MessageBlockResponse)
   152  				}
   153  				return ok
   154  			},
   155  			Store: e.pendingBlockResponses,
   156  		},
   157  	)
   158  
   159  	return nil
   160  }
   161  
   162  // Ready returns a ready channel that is closed once the engine has fully started.
   163  func (e *Engine) Ready() <-chan struct{} {
   164  	e.lm.OnStart(func() {
   165  		<-e.finalizedHeader.Ready()
   166  		e.unit.Launch(e.checkLoop)
   167  		e.unit.Launch(e.responseProcessingLoop)
   168  		// wait for request handler to startup
   169  		<-e.requestHandler.Ready()
   170  	})
   171  	return e.lm.Started()
   172  }
   173  
   174  // Done returns a done channel that is closed once the engine has fully stopped.
   175  func (e *Engine) Done() <-chan struct{} {
   176  	e.lm.OnStop(func() {
   177  		// signal the request handler to shutdown
   178  		requestHandlerDone := e.requestHandler.Done()
   179  		// wait for request sending and response processing routines to exit
   180  		<-e.unit.Done()
   181  		// wait for request handler shutdown to complete
   182  		<-requestHandlerDone
   183  		<-e.finalizedHeader.Done()
   184  	})
   185  	return e.lm.Stopped()
   186  }
   187  
   188  // SubmitLocal submits an event originating on the local node.
   189  func (e *Engine) SubmitLocal(event interface{}) {
   190  	err := e.process(e.me.NodeID(), event)
   191  	if err != nil {
   192  		// receiving an input of incompatible type from a trusted internal component is fatal
   193  		e.log.Fatal().Err(err).Msg("internal error processing event")
   194  	}
   195  }
   196  
   197  // Submit submits the given event from the node with the given origin ID
   198  // for processing in a non-blocking manner. It returns instantly and logs
   199  // a potential processing error internally when done.
   200  func (e *Engine) Submit(channel channels.Channel, originID flow.Identifier, event interface{}) {
   201  	err := e.Process(channel, originID, event)
   202  	if err != nil {
   203  		e.log.Fatal().Err(err).Msg("internal error processing event")
   204  	}
   205  }
   206  
   207  // ProcessLocal processes an event originating on the local node.
   208  func (e *Engine) ProcessLocal(event interface{}) error {
   209  	return e.process(e.me.NodeID(), event)
   210  }
   211  
   212  // Process processes the given event from the node with the given origin ID in
   213  // a blocking manner. It returns the potential processing error when done.
   214  func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, event interface{}) error {
   215  	err := e.process(originID, event)
   216  	if err != nil {
   217  		if engine.IsIncompatibleInputTypeError(err) {
   218  			e.log.Warn().Msgf("%v delivered unsupported message %T through %v", originID, event, channel)
   219  			return nil
   220  		}
   221  		return fmt.Errorf("unexpected error while processing engine message: %w", err)
   222  	}
   223  	return nil
   224  }
   225  
   226  // process processes events for the synchronization engine.
   227  // Error returns:
   228  //   - IncompatibleInputTypeError if input has unexpected type
   229  //   - All other errors are potential symptoms of internal state corruption or bugs (fatal).
   230  func (e *Engine) process(originID flow.Identifier, event interface{}) error {
   231  	switch event.(type) {
   232  	case *messages.RangeRequest, *messages.BatchRequest, *messages.SyncRequest:
   233  		return e.requestHandler.process(originID, event)
   234  	case *messages.SyncResponse, *messages.BlockResponse:
   235  		return e.responseMessageHandler.Process(originID, event)
   236  	default:
   237  		return fmt.Errorf("received input with type %T from %x: %w", event, originID[:], engine.IncompatibleInputTypeError)
   238  	}
   239  }
   240  
   241  // responseProcessingLoop is a separate goroutine that performs processing of queued responses
   242  func (e *Engine) responseProcessingLoop() {
   243  	notifier := e.responseMessageHandler.GetNotifier()
   244  	for {
   245  		select {
   246  		case <-e.unit.Quit():
   247  			return
   248  		case <-notifier:
   249  			e.processAvailableResponses()
   250  		}
   251  	}
   252  }
   253  
   254  // processAvailableResponses is processor of pending events which drives events from networking layer to business logic.
   255  func (e *Engine) processAvailableResponses() {
   256  	for {
   257  		select {
   258  		case <-e.unit.Quit():
   259  			return
   260  		default:
   261  		}
   262  
   263  		msg, ok := e.pendingSyncResponses.Get()
   264  		if ok {
   265  			e.onSyncResponse(msg.OriginID, msg.Payload.(*messages.SyncResponse))
   266  			e.metrics.MessageHandled(metrics.EngineSynchronization, metrics.MessageSyncResponse)
   267  			continue
   268  		}
   269  
   270  		msg, ok = e.pendingBlockResponses.Get()
   271  		if ok {
   272  			e.onBlockResponse(msg.OriginID, msg.Payload.(*messages.BlockResponse))
   273  			e.metrics.MessageHandled(metrics.EngineSynchronization, metrics.MessageBlockResponse)
   274  			continue
   275  		}
   276  
   277  		// when there is no more messages in the queue, back to the loop to wait
   278  		// for the next incoming message to arrive.
   279  		return
   280  	}
   281  }
   282  
   283  // onSyncResponse processes a synchronization response.
   284  func (e *Engine) onSyncResponse(originID flow.Identifier, res *messages.SyncResponse) {
   285  	e.log.Debug().Str("origin_id", originID.String()).Msg("received sync response")
   286  	final := e.finalizedHeader.Get()
   287  	e.core.HandleHeight(final, res.Height)
   288  }
   289  
   290  // onBlockResponse processes a response containing a specifically requested block.
   291  func (e *Engine) onBlockResponse(originID flow.Identifier, res *messages.BlockResponse) {
   292  	// process the blocks one by one
   293  	if len(res.Blocks) == 0 {
   294  		e.log.Debug().Msg("received empty block response")
   295  		return
   296  	}
   297  
   298  	first := res.Blocks[0].Header.Height
   299  	last := res.Blocks[len(res.Blocks)-1].Header.Height
   300  	e.log.Debug().Uint64("first", first).Uint64("last", last).Msg("received block response")
   301  
   302  	for _, block := range res.Blocks {
   303  		if !e.core.HandleBlock(&block.Header) {
   304  			e.log.Debug().Uint64("height", block.Header.Height).Msg("block handler rejected")
   305  			continue
   306  		}
   307  	}
   308  
   309  	e.comp.SubmitLocal(res)
   310  }
   311  
   312  // checkLoop will regularly scan for items that need requesting.
   313  func (e *Engine) checkLoop() {
   314  	pollChan := make(<-chan time.Time)
   315  	if e.pollInterval > 0 {
   316  		poll := time.NewTicker(e.pollInterval)
   317  		pollChan = poll.C
   318  		defer poll.Stop()
   319  	}
   320  	scan := time.NewTicker(e.scanInterval)
   321  
   322  CheckLoop:
   323  	for {
   324  		// give the quit channel a priority to be selected
   325  		select {
   326  		case <-e.unit.Quit():
   327  			break CheckLoop
   328  		default:
   329  		}
   330  
   331  		select {
   332  		case <-e.unit.Quit():
   333  			break CheckLoop
   334  		case <-pollChan:
   335  			e.pollHeight()
   336  		case <-scan.C:
   337  			head := e.finalizedHeader.Get()
   338  			participants := e.participantsProvider.Identifiers()
   339  			ranges, batches := e.core.ScanPending(head)
   340  			e.sendRequests(participants, ranges, batches)
   341  		}
   342  	}
   343  
   344  	// some minor cleanup
   345  	scan.Stop()
   346  }
   347  
   348  // pollHeight will send a synchronization request to three random nodes.
   349  func (e *Engine) pollHeight() {
   350  	head := e.finalizedHeader.Get()
   351  	participants := e.participantsProvider.Identifiers()
   352  
   353  	// send the request for synchronization
   354  	req := &messages.SyncRequest{
   355  		Nonce:  rand.Uint64(),
   356  		Height: head.Height,
   357  	}
   358  	e.log.Debug().
   359  		Uint64("height", req.Height).
   360  		Uint64("range_nonce", req.Nonce).
   361  		Msg("sending sync request")
   362  	err := e.con.Multicast(req, synccore.DefaultPollNodes, participants...)
   363  	if err != nil {
   364  		e.log.Warn().Err(err).Msg("sending sync request to poll heights failed")
   365  		return
   366  	}
   367  	e.metrics.MessageSent(metrics.EngineSynchronization, metrics.MessageSyncRequest)
   368  }
   369  
   370  // sendRequests sends a request for each range and batch using consensus participants from last finalized snapshot.
   371  func (e *Engine) sendRequests(participants flow.IdentifierList, ranges []chainsync.Range, batches []chainsync.Batch) {
   372  	var errs *multierror.Error
   373  
   374  	for _, ran := range ranges {
   375  		req := &messages.RangeRequest{
   376  			Nonce:      rand.Uint64(),
   377  			FromHeight: ran.From,
   378  			ToHeight:   ran.To,
   379  		}
   380  		err := e.con.Multicast(req, synccore.DefaultBlockRequestNodes, participants...)
   381  		if err != nil {
   382  			errs = multierror.Append(errs, fmt.Errorf("could not submit range request: %w", err))
   383  			continue
   384  		}
   385  		e.log.Info().
   386  			Uint64("range_from", req.FromHeight).
   387  			Uint64("range_to", req.ToHeight).
   388  			Uint64("range_nonce", req.Nonce).
   389  			Msg("range requested")
   390  		e.core.RangeRequested(ran)
   391  		e.metrics.MessageSent(metrics.EngineSynchronization, metrics.MessageRangeRequest)
   392  	}
   393  
   394  	for _, batch := range batches {
   395  		req := &messages.BatchRequest{
   396  			Nonce:    rand.Uint64(),
   397  			BlockIDs: batch.BlockIDs,
   398  		}
   399  		err := e.con.Multicast(req, synccore.DefaultBlockRequestNodes, participants...)
   400  		if err != nil {
   401  			errs = multierror.Append(errs, fmt.Errorf("could not submit batch request: %w", err))
   402  			continue
   403  		}
   404  		e.log.Debug().
   405  			Strs("block_ids", flow.IdentifierList(batch.BlockIDs).Strings()).
   406  			Uint64("range_nonce", req.Nonce).
   407  			Msg("batch requested")
   408  		e.core.BatchRequested(batch)
   409  		e.metrics.MessageSent(metrics.EngineSynchronization, metrics.MessageBatchRequest)
   410  	}
   411  
   412  	if err := errs.ErrorOrNil(); err != nil {
   413  		e.log.Warn().Err(err).Msg("sending range and batch requests failed")
   414  	}
   415  }