github.com/koko1123/flow-go-1@v0.29.6/consensus/hotstuff/eventloop/event_loop.go (about)

     1  package eventloop
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"time"
     7  
     8  	"github.com/rs/zerolog"
     9  
    10  	"github.com/koko1123/flow-go-1/consensus/hotstuff"
    11  	"github.com/koko1123/flow-go-1/consensus/hotstuff/model"
    12  	"github.com/koko1123/flow-go-1/model/flow"
    13  	"github.com/koko1123/flow-go-1/module"
    14  	"github.com/koko1123/flow-go-1/module/component"
    15  	"github.com/koko1123/flow-go-1/module/irrecoverable"
    16  	"github.com/koko1123/flow-go-1/module/metrics"
    17  )
    18  
    19  type proposalTask struct {
    20  	*model.Proposal
    21  	done chan struct{}
    22  }
    23  
    24  // EventLoop buffers all incoming events to the hotstuff EventHandler, and feeds EventHandler one event at a time.
    25  type EventLoop struct {
    26  	*component.ComponentManager
    27  	log                zerolog.Logger
    28  	eventHandler       hotstuff.EventHandler
    29  	metrics            module.HotstuffMetrics
    30  	proposals          chan *proposalTask
    31  	quorumCertificates chan *flow.QuorumCertificate
    32  	startTime          time.Time
    33  }
    34  
    35  var _ hotstuff.EventLoop = (*EventLoop)(nil)
    36  var _ component.Component = (*EventLoop)(nil)
    37  
    38  // NewEventLoop creates an instance of EventLoop.
    39  func NewEventLoop(log zerolog.Logger, metrics module.HotstuffMetrics, eventHandler hotstuff.EventHandler, startTime time.Time) (*EventLoop, error) {
    40  	proposals := make(chan *proposalTask)
    41  	quorumCertificates := make(chan *flow.QuorumCertificate, 1)
    42  
    43  	el := &EventLoop{
    44  		log:                log,
    45  		eventHandler:       eventHandler,
    46  		metrics:            metrics,
    47  		proposals:          proposals,
    48  		quorumCertificates: quorumCertificates,
    49  		startTime:          startTime,
    50  	}
    51  
    52  	componentBuilder := component.NewComponentManagerBuilder()
    53  	componentBuilder.AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
    54  		ready()
    55  
    56  		// launch when scheduled by el.startTime
    57  		el.log.Info().Msgf("event loop will start at: %v", startTime)
    58  		select {
    59  		case <-ctx.Done():
    60  			return
    61  		case <-time.After(time.Until(startTime)):
    62  			el.log.Info().Msgf("starting event loop")
    63  			err := el.loop(ctx)
    64  			if err != nil {
    65  				el.log.Error().Err(err).Msg("irrecoverable event loop error")
    66  				ctx.Throw(err)
    67  			}
    68  		}
    69  	})
    70  	el.ComponentManager = componentBuilder.Build()
    71  
    72  	return el, nil
    73  }
    74  
    75  func (el *EventLoop) loop(ctx context.Context) error {
    76  
    77  	err := el.eventHandler.Start()
    78  	if err != nil {
    79  		return fmt.Errorf("could not start event handler: %w", err)
    80  	}
    81  
    82  	// hotstuff will run in an event loop to process all events synchronously. And this is what will happen when hitting errors:
    83  	// if hotstuff hits a known critical error, it will exit the loop (for instance, there is a conflicting block with a QC against finalized blocks
    84  	// if hotstuff hits a known error indicating some assumption between components is broken, it will exit the loop (for instance, hotstuff receives a block whose parent is missing)
    85  	// if hotstuff hits a known error that is safe to be ignored, it will not exit the loop (for instance, invalid proposal)
    86  	// if hotstuff hits any unknown error, it will exit the loop
    87  
    88  	shutdownSignaled := ctx.Done()
    89  	for {
    90  		// Giving timeout events the priority to be processed first
    91  		// This is to prevent attacks from malicious nodes that attempt
    92  		// to block honest nodes' pacemaker from progressing by sending
    93  		// other events.
    94  		timeoutChannel := el.eventHandler.TimeoutChannel()
    95  
    96  		// the first select makes sure we process timeouts with priority
    97  		select {
    98  
    99  		// if we receive the shutdown signal, exit the loop
   100  		case <-shutdownSignaled:
   101  			return nil
   102  
   103  		// if we receive a time out, process it and log errors
   104  		case <-timeoutChannel:
   105  
   106  			processStart := time.Now()
   107  
   108  			err := el.eventHandler.OnLocalTimeout()
   109  
   110  			// measure how long it takes for a timeout event to be processed
   111  			el.metrics.HotStuffBusyDuration(time.Since(processStart), metrics.HotstuffEventTypeTimeout)
   112  
   113  			if err != nil {
   114  				return fmt.Errorf("could not process timeout: %w", err)
   115  			}
   116  
   117  			// At this point, we have received and processed an event from the timeout channel.
   118  			// A timeout also means, we have made progress. A new timeout will have
   119  			// been started and el.eventHandler.TimeoutChannel() will be a NEW channel (for the just-started timeout)
   120  			// Very important to start the for loop from the beginning, to continue the with the new timeout channel!
   121  			continue
   122  
   123  		default:
   124  			// fall through to non-priority events
   125  		}
   126  
   127  		idleStart := time.Now()
   128  
   129  		// select for block headers/QCs here
   130  		select {
   131  
   132  		// same as before
   133  		case <-shutdownSignaled:
   134  			return nil
   135  
   136  		// same as before
   137  		case <-timeoutChannel:
   138  			// measure how long the event loop was idle waiting for an
   139  			// incoming event
   140  			el.metrics.HotStuffIdleDuration(time.Since(idleStart))
   141  
   142  			processStart := time.Now()
   143  
   144  			err := el.eventHandler.OnLocalTimeout()
   145  
   146  			// measure how long it takes for a timeout event to be processed
   147  			el.metrics.HotStuffBusyDuration(time.Since(processStart), metrics.HotstuffEventTypeTimeout)
   148  
   149  			if err != nil {
   150  				return fmt.Errorf("could not process timeout: %w", err)
   151  			}
   152  
   153  		// if we have a new proposal, process it
   154  		case p := <-el.proposals:
   155  			// measure how long the event loop was idle waiting for an
   156  			// incoming event
   157  			el.metrics.HotStuffIdleDuration(time.Since(idleStart))
   158  
   159  			processStart := time.Now()
   160  
   161  			err := el.eventHandler.OnReceiveProposal(p.Proposal)
   162  			// done processing the proposal, notify the caller (usually the compliance engine) that
   163  			// this block has been processed. If the block is valid, protocol state should have it stored.
   164  			// useful when the caller is processing a range of blocks, and waiting for the current block
   165  			// to be processed by hotstuff before processing the next block.
   166  			close(p.done)
   167  
   168  			// measure how long it takes for a proposal to be processed
   169  			el.metrics.HotStuffBusyDuration(time.Since(processStart), metrics.HotstuffEventTypeOnProposal)
   170  
   171  			if err != nil {
   172  				return fmt.Errorf("could not process proposal %v: %w", p.Block.BlockID, err)
   173  			}
   174  
   175  			el.log.Info().
   176  				Dur("dur_ms", time.Since(processStart)).
   177  				Uint64("view", p.Block.View).
   178  				Hex("block_id", p.Block.BlockID[:]).
   179  				Msg("block proposal has been processed successfully")
   180  
   181  		// if we have a new QC, process it
   182  		case qc := <-el.quorumCertificates:
   183  			// measure how long the event loop was idle waiting for an
   184  			// incoming event
   185  			el.metrics.HotStuffIdleDuration(time.Since(idleStart))
   186  
   187  			processStart := time.Now()
   188  
   189  			err := el.eventHandler.OnQCConstructed(qc)
   190  
   191  			// measure how long it takes for a QC to be processed
   192  			el.metrics.HotStuffBusyDuration(time.Since(processStart), metrics.HotstuffEventTypeOnQC)
   193  
   194  			if err != nil {
   195  				return fmt.Errorf("could not process QC: %w", err)
   196  			}
   197  		}
   198  	}
   199  }
   200  
   201  // SubmitProposal pushes the received block to the blockheader channel
   202  func (el *EventLoop) SubmitProposal(proposalHeader *flow.Header, parentView uint64) <-chan struct{} {
   203  	received := time.Now()
   204  
   205  	proposal := &proposalTask{
   206  		Proposal: model.ProposalFromFlow(proposalHeader, parentView),
   207  		done:     make(chan struct{}),
   208  	}
   209  
   210  	select {
   211  	case el.proposals <- proposal:
   212  	case <-el.ComponentManager.ShutdownSignal():
   213  		return proposal.done
   214  	}
   215  
   216  	// the wait duration is measured as how long it takes from a block being
   217  	// received to event handler commencing the processing of the block
   218  	el.metrics.HotStuffWaitDuration(time.Since(received), metrics.HotstuffEventTypeOnProposal)
   219  	return proposal.done
   220  }
   221  
   222  // SubmitTrustedQC pushes the received QC to the quorumCertificates channel
   223  func (el *EventLoop) SubmitTrustedQC(qc *flow.QuorumCertificate) {
   224  	received := time.Now()
   225  
   226  	select {
   227  	case el.quorumCertificates <- qc:
   228  	case <-el.ComponentManager.ShutdownSignal():
   229  		return
   230  	}
   231  
   232  	// the wait duration is measured as how long it takes from a qc being
   233  	// received to event handler commencing the processing of the qc
   234  	el.metrics.HotStuffWaitDuration(time.Since(received), metrics.HotstuffEventTypeOnQC)
   235  }