github.com/onflow/flow-go@v0.33.17/consensus/hotstuff/committees/consensus_committee.go (about)

     1  package committees
     2  
     3  import (
     4  	"fmt"
     5  	"sync"
     6  
     7  	"go.uber.org/atomic"
     8  
     9  	"github.com/onflow/flow-go/consensus/hotstuff"
    10  	"github.com/onflow/flow-go/consensus/hotstuff/committees/leader"
    11  	"github.com/onflow/flow-go/consensus/hotstuff/model"
    12  	"github.com/onflow/flow-go/model/flow"
    13  	"github.com/onflow/flow-go/model/flow/filter"
    14  	"github.com/onflow/flow-go/module/component"
    15  	"github.com/onflow/flow-go/module/irrecoverable"
    16  	"github.com/onflow/flow-go/state/protocol"
    17  	"github.com/onflow/flow-go/state/protocol/events"
    18  	"github.com/onflow/flow-go/state/protocol/prg"
    19  )
    20  
    21  // staticEpochInfo contains leader selection and the initial committee for one epoch.
    22  // This data structure must not be mutated after construction.
    23  type staticEpochInfo struct {
    24  	firstView    uint64                  // first view of the epoch (inclusive)
    25  	finalView    uint64                  // final view of the epoch (inclusive)
    26  	randomSource []byte                  // random source of epoch
    27  	leaders      *leader.LeaderSelection // pre-computed leader selection for the epoch
    28  	// TODO: should use identity skeleton https://github.com/dapperlabs/flow-go/issues/6232
    29  	initialCommittee     flow.IdentityList
    30  	initialCommitteeMap  map[flow.Identifier]*flow.Identity
    31  	weightThresholdForQC uint64 // computed based on initial committee weights
    32  	weightThresholdForTO uint64 // computed based on initial committee weights
    33  	dkg                  hotstuff.DKG
    34  }
    35  
    36  // newStaticEpochInfo returns the static epoch information from the epoch.
    37  // This can be cached and used for all by-view queries for this epoch.
    38  func newStaticEpochInfo(epoch protocol.Epoch) (*staticEpochInfo, error) {
    39  	firstView, err := epoch.FirstView()
    40  	if err != nil {
    41  		return nil, fmt.Errorf("could not get first view: %w", err)
    42  	}
    43  	finalView, err := epoch.FinalView()
    44  	if err != nil {
    45  		return nil, fmt.Errorf("could not get final view: %w", err)
    46  	}
    47  	randomSource, err := epoch.RandomSource()
    48  	if err != nil {
    49  		return nil, fmt.Errorf("could not get random source: %w", err)
    50  	}
    51  	leaders, err := leader.SelectionForConsensus(epoch)
    52  	if err != nil {
    53  		return nil, fmt.Errorf("could not get leader selection: %w", err)
    54  	}
    55  	initialIdentities, err := epoch.InitialIdentities()
    56  	if err != nil {
    57  		return nil, fmt.Errorf("could not initial identities: %w", err)
    58  	}
    59  	initialCommittee := initialIdentities.Filter(filter.IsVotingConsensusCommitteeMember)
    60  	dkg, err := epoch.DKG()
    61  	if err != nil {
    62  		return nil, fmt.Errorf("could not get dkg: %w", err)
    63  	}
    64  
    65  	totalWeight := initialCommittee.TotalWeight()
    66  	epochInfo := &staticEpochInfo{
    67  		firstView:            firstView,
    68  		finalView:            finalView,
    69  		randomSource:         randomSource,
    70  		leaders:              leaders,
    71  		initialCommittee:     initialCommittee,
    72  		initialCommitteeMap:  initialCommittee.Lookup(),
    73  		weightThresholdForQC: WeightThresholdToBuildQC(totalWeight),
    74  		weightThresholdForTO: WeightThresholdToTimeout(totalWeight),
    75  		dkg:                  dkg,
    76  	}
    77  	return epochInfo, nil
    78  }
    79  
    80  // newEmergencyFallbackEpoch creates an artificial fallback epoch generated from
    81  // the last committed epoch at the time epoch emergency fallback is triggered.
    82  // The fallback epoch:
    83  // * begins after the last committed epoch
    84  // * lasts until the next spork (estimated 6 months)
    85  // * has the same static committee as the last committed epoch
    86  func newEmergencyFallbackEpoch(lastCommittedEpoch *staticEpochInfo) (*staticEpochInfo, error) {
    87  
    88  	rng, err := prg.New(lastCommittedEpoch.randomSource, prg.ConsensusLeaderSelection, nil)
    89  	if err != nil {
    90  		return nil, fmt.Errorf("could not create rng from seed: %w", err)
    91  	}
    92  	leaders, err := leader.ComputeLeaderSelection(lastCommittedEpoch.finalView+1, rng, leader.EstimatedSixMonthOfViews, lastCommittedEpoch.initialCommittee)
    93  	if err != nil {
    94  		return nil, fmt.Errorf("could not compute leader selection for fallback epoch: %w", err)
    95  	}
    96  	epochInfo := &staticEpochInfo{
    97  		firstView:            lastCommittedEpoch.finalView + 1,
    98  		finalView:            lastCommittedEpoch.finalView + leader.EstimatedSixMonthOfViews,
    99  		randomSource:         lastCommittedEpoch.randomSource,
   100  		leaders:              leaders,
   101  		initialCommittee:     lastCommittedEpoch.initialCommittee,
   102  		initialCommitteeMap:  lastCommittedEpoch.initialCommitteeMap,
   103  		weightThresholdForQC: lastCommittedEpoch.weightThresholdForQC,
   104  		weightThresholdForTO: lastCommittedEpoch.weightThresholdForTO,
   105  		dkg:                  lastCommittedEpoch.dkg,
   106  	}
   107  	return epochInfo, nil
   108  }
   109  
   110  // Consensus represents the main committee for consensus nodes. The consensus
   111  // committee might be active for multiple successive epochs.
   112  type Consensus struct {
   113  	state                  protocol.State              // the protocol state
   114  	me                     flow.Identifier             // the node ID of this node
   115  	mu                     sync.RWMutex                // protects access to epochs
   116  	epochs                 map[uint64]*staticEpochInfo // cache of initial committee & leader selection per epoch
   117  	committedEpochsCh      chan *flow.Header           // protocol events for newly committed epochs (the first block of the epoch is passed over the channel)
   118  	epochEmergencyFallback chan struct{}               // protocol event for epoch emergency fallback
   119  	isEpochFallbackHandled *atomic.Bool                // ensure we only inject fallback epoch once
   120  	events.Noop                                        // implements protocol.Consumer
   121  	component.Component
   122  }
   123  
   124  var _ protocol.Consumer = (*Consensus)(nil)
   125  var _ hotstuff.Replicas = (*Consensus)(nil)
   126  var _ hotstuff.DynamicCommittee = (*Consensus)(nil)
   127  
   128  func NewConsensusCommittee(state protocol.State, me flow.Identifier) (*Consensus, error) {
   129  
   130  	com := &Consensus{
   131  		state:                  state,
   132  		me:                     me,
   133  		epochs:                 make(map[uint64]*staticEpochInfo),
   134  		committedEpochsCh:      make(chan *flow.Header, 1),
   135  		epochEmergencyFallback: make(chan struct{}, 1),
   136  		isEpochFallbackHandled: atomic.NewBool(false),
   137  	}
   138  
   139  	com.Component = component.NewComponentManagerBuilder().
   140  		AddWorker(com.handleProtocolEvents).
   141  		Build()
   142  
   143  	final := state.Final()
   144  
   145  	// pre-compute leader selection for all presently relevant committed epochs
   146  	epochs := make([]protocol.Epoch, 0, 3)
   147  	// we always prepare the current epoch
   148  	epochs = append(epochs, final.Epochs().Current())
   149  
   150  	// we prepare the previous epoch, if one exists
   151  	exists, err := protocol.PreviousEpochExists(final)
   152  	if err != nil {
   153  		return nil, fmt.Errorf("could not check previous epoch exists: %w", err)
   154  	}
   155  	if exists {
   156  		epochs = append(epochs, final.Epochs().Previous())
   157  	}
   158  
   159  	// we prepare the next epoch, if it is committed
   160  	phase, err := final.Phase()
   161  	if err != nil {
   162  		return nil, fmt.Errorf("could not check epoch phase: %w", err)
   163  	}
   164  	if phase == flow.EpochPhaseCommitted {
   165  		epochs = append(epochs, final.Epochs().Next())
   166  	}
   167  
   168  	for _, epoch := range epochs {
   169  		_, err = com.prepareEpoch(epoch)
   170  		if err != nil {
   171  			return nil, fmt.Errorf("could not prepare initial epochs: %w", err)
   172  		}
   173  	}
   174  
   175  	// if epoch emergency fallback was triggered, inject the fallback epoch
   176  	triggered, err := state.Params().EpochFallbackTriggered()
   177  	if err != nil {
   178  		return nil, fmt.Errorf("could not check epoch fallback: %w", err)
   179  	}
   180  	if triggered {
   181  		err = com.onEpochEmergencyFallbackTriggered()
   182  		if err != nil {
   183  			return nil, fmt.Errorf("could not prepare emergency fallback epoch: %w", err)
   184  		}
   185  	}
   186  
   187  	return com, nil
   188  }
   189  
   190  // IdentitiesByBlock returns the identities of all authorized consensus participants at the given block.
   191  // The order of the identities is the canonical order.
   192  // ERROR conditions:
   193  //   - state.ErrUnknownSnapshotReference if the blockID is for an unknown block
   194  func (c *Consensus) IdentitiesByBlock(blockID flow.Identifier) (flow.IdentityList, error) {
   195  	il, err := c.state.AtBlockID(blockID).Identities(filter.IsVotingConsensusCommitteeMember)
   196  	if err != nil {
   197  		return nil, fmt.Errorf("could not identities at block %x: %w", blockID, err) // state.ErrUnknownSnapshotReference or exception
   198  	}
   199  	return il, nil
   200  }
   201  
   202  // IdentityByBlock returns the identity of the node with the given node ID at the given block.
   203  // ERROR conditions:
   204  //   - model.InvalidSignerError if participantID does NOT correspond to an authorized HotStuff participant at the specified block.
   205  //   - state.ErrUnknownSnapshotReference if the blockID is for an unknown block
   206  func (c *Consensus) IdentityByBlock(blockID flow.Identifier, nodeID flow.Identifier) (*flow.Identity, error) {
   207  	identity, err := c.state.AtBlockID(blockID).Identity(nodeID)
   208  	if err != nil {
   209  		if protocol.IsIdentityNotFound(err) {
   210  			return nil, model.NewInvalidSignerErrorf("id %v is not a valid node id: %w", nodeID, err)
   211  		}
   212  		return nil, fmt.Errorf("could not get identity for node ID %x: %w", nodeID, err) // state.ErrUnknownSnapshotReference or exception
   213  	}
   214  	if !filter.IsVotingConsensusCommitteeMember(identity) {
   215  		return nil, model.NewInvalidSignerErrorf("node %v is not an authorized hotstuff voting participant", nodeID)
   216  	}
   217  	return identity, nil
   218  }
   219  
   220  // IdentitiesByEpoch returns the committee identities in the epoch which contains
   221  // the given view.
   222  // CAUTION: This method considers epochs outside of Previous, Current, Next, w.r.t. the
   223  // finalized block, to be unknown. https://github.com/onflow/flow-go/issues/4085
   224  //
   225  // Error returns:
   226  //   - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known.
   227  //     This is an expected error and must be handled.
   228  //   - unspecific error in case of unexpected problems and bugs
   229  func (c *Consensus) IdentitiesByEpoch(view uint64) (flow.IdentityList, error) {
   230  	epochInfo, err := c.staticEpochInfoByView(view)
   231  	if err != nil {
   232  		return nil, err
   233  	}
   234  	return epochInfo.initialCommittee, nil
   235  }
   236  
   237  // IdentityByEpoch returns the identity for the given node ID, in the epoch which
   238  // contains the given view.
   239  // CAUTION: This method considers epochs outside of Previous, Current, Next, w.r.t. the
   240  // finalized block, to be unknown. https://github.com/onflow/flow-go/issues/4085
   241  //
   242  // Error returns:
   243  //   - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known.
   244  //     This is an expected error and must be handled.
   245  //   - model.InvalidSignerError if nodeID was not listed by the Epoch Setup event as an
   246  //     authorized consensus participants.
   247  //   - unspecific error in case of unexpected problems and bugs
   248  func (c *Consensus) IdentityByEpoch(view uint64, nodeID flow.Identifier) (*flow.Identity, error) {
   249  	epochInfo, err := c.staticEpochInfoByView(view)
   250  	if err != nil {
   251  		return nil, err
   252  	}
   253  	identity, ok := epochInfo.initialCommitteeMap[nodeID]
   254  	if !ok {
   255  		return nil, model.NewInvalidSignerErrorf("id %v is not a valid node id", nodeID)
   256  	}
   257  	return identity, nil
   258  }
   259  
   260  // LeaderForView returns the node ID of the leader for the given view.
   261  //
   262  // Error returns:
   263  //   - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known.
   264  //     This is an expected error and must be handled.
   265  //   - unspecific error in case of unexpected problems and bugs
   266  func (c *Consensus) LeaderForView(view uint64) (flow.Identifier, error) {
   267  
   268  	epochInfo, err := c.staticEpochInfoByView(view)
   269  	if err != nil {
   270  		return flow.ZeroID, err
   271  	}
   272  	leaderID, err := epochInfo.leaders.LeaderForView(view)
   273  	if leader.IsInvalidViewError(err) {
   274  		// an invalid view error indicates that no leader was computed for this view
   275  		// this is a fatal internal error, because the view necessarily is within an
   276  		// epoch for which we have pre-computed leader selection
   277  		return flow.ZeroID, fmt.Errorf("unexpected inconsistency in epoch view spans for view %d: %v", view, err)
   278  	}
   279  	if err != nil {
   280  		return flow.ZeroID, err
   281  	}
   282  	return leaderID, nil
   283  }
   284  
   285  // QuorumThresholdForView returns the minimum weight required to build a valid
   286  // QC in the given view. The weight threshold only changes at epoch boundaries
   287  // and is computed based on the initial committee weights.
   288  //
   289  // Error returns:
   290  //   - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known.
   291  //     This is an expected error and must be handled.
   292  //   - unspecific error in case of unexpected problems and bugs
   293  func (c *Consensus) QuorumThresholdForView(view uint64) (uint64, error) {
   294  	epochInfo, err := c.staticEpochInfoByView(view)
   295  	if err != nil {
   296  		return 0, err
   297  	}
   298  	return epochInfo.weightThresholdForQC, nil
   299  }
   300  
   301  func (c *Consensus) Self() flow.Identifier {
   302  	return c.me
   303  }
   304  
   305  // TimeoutThresholdForView returns the minimum weight of observed timeout objects
   306  // to safely immediately timeout for the current view. The weight threshold only
   307  // changes at epoch boundaries and is computed based on the initial committee weights.
   308  func (c *Consensus) TimeoutThresholdForView(view uint64) (uint64, error) {
   309  	epochInfo, err := c.staticEpochInfoByView(view)
   310  	if err != nil {
   311  		return 0, err
   312  	}
   313  	return epochInfo.weightThresholdForTO, nil
   314  }
   315  
   316  // DKG returns the DKG for epoch which includes the given view.
   317  //
   318  // Error returns:
   319  //   - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known.
   320  //     This is an expected error and must be handled.
   321  //   - unspecific error in case of unexpected problems and bugs
   322  func (c *Consensus) DKG(view uint64) (hotstuff.DKG, error) {
   323  	epochInfo, err := c.staticEpochInfoByView(view)
   324  	if err != nil {
   325  		return nil, err
   326  	}
   327  	return epochInfo.dkg, nil
   328  }
   329  
   330  // handleProtocolEvents processes queued Epoch events `EpochCommittedPhaseStarted`
   331  // and `EpochEmergencyFallbackTriggered`. This function permanently utilizes a worker
   332  // routine until the `Component` terminates.
   333  // When we observe a new epoch being committed, we compute
   334  // the leader selection and cache static info for the epoch. When we observe
   335  // epoch emergency fallback being triggered, we inject a fallback epoch.
   336  func (c *Consensus) handleProtocolEvents(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   337  	ready()
   338  
   339  	for {
   340  		select {
   341  		case <-ctx.Done():
   342  			return
   343  		case block := <-c.committedEpochsCh:
   344  			epoch := c.state.AtBlockID(block.ID()).Epochs().Next()
   345  			_, err := c.prepareEpoch(epoch)
   346  			if err != nil {
   347  				ctx.Throw(err)
   348  			}
   349  		case <-c.epochEmergencyFallback:
   350  			err := c.onEpochEmergencyFallbackTriggered()
   351  			if err != nil {
   352  				ctx.Throw(err)
   353  			}
   354  		}
   355  	}
   356  }
   357  
   358  // EpochCommittedPhaseStarted informs the `committee.Consensus` that the block starting the Epoch Committed Phase has been finalized.
   359  func (c *Consensus) EpochCommittedPhaseStarted(_ uint64, first *flow.Header) {
   360  	c.committedEpochsCh <- first
   361  }
   362  
   363  // EpochEmergencyFallbackTriggered passes the protocol event to the worker thread.
   364  func (c *Consensus) EpochEmergencyFallbackTriggered() {
   365  	c.epochEmergencyFallback <- struct{}{}
   366  }
   367  
   368  // onEpochEmergencyFallbackTriggered handles the protocol event for emergency epoch
   369  // fallback mode being triggered. When this occurs, we inject a fallback epoch
   370  // to the committee which extends the current epoch.
   371  // This method must also be called on initialization, if emergency fallback mode
   372  // was triggered in the past.
   373  // No errors are expected during normal operation.
   374  func (c *Consensus) onEpochEmergencyFallbackTriggered() error {
   375  
   376  	// we respond to epoch fallback being triggered at most once, therefore
   377  	// the core logic is protected by an atomic bool.
   378  	// although it is only valid for epoch fallback to be triggered once per spork,
   379  	// we must account for repeated delivery of protocol events.
   380  	if !c.isEpochFallbackHandled.CompareAndSwap(false, true) {
   381  		return nil
   382  	}
   383  
   384  	currentEpochCounter, err := c.state.Final().Epochs().Current().Counter()
   385  	if err != nil {
   386  		return fmt.Errorf("could not get current epoch counter: %w", err)
   387  	}
   388  
   389  	c.mu.RLock()
   390  	// sanity check: current epoch must be cached already
   391  	currentEpoch, ok := c.epochs[currentEpochCounter]
   392  	if !ok {
   393  		c.mu.RUnlock()
   394  		return fmt.Errorf("epoch fallback: could not find current epoch (counter=%d) info", currentEpochCounter)
   395  	}
   396  	// sanity check: next epoch must never be committed, therefore must not be cached
   397  	_, ok = c.epochs[currentEpochCounter+1]
   398  	c.mu.RUnlock()
   399  	if ok {
   400  		return fmt.Errorf("epoch fallback: next epoch (counter=%d) is cached contrary to expectation", currentEpochCounter+1)
   401  	}
   402  
   403  	fallbackEpoch, err := newEmergencyFallbackEpoch(currentEpoch)
   404  	if err != nil {
   405  		return fmt.Errorf("could not construct fallback epoch: %w", err)
   406  	}
   407  
   408  	// cache the epoch info
   409  	c.mu.Lock()
   410  	c.epochs[currentEpochCounter+1] = fallbackEpoch
   411  	c.mu.Unlock()
   412  
   413  	return nil
   414  }
   415  
   416  // staticEpochInfoByView retrieves the previously cached static epoch info for
   417  // the epoch which includes the given view. If no epoch is known for the given
   418  // view, we will attempt to cache the next epoch.
   419  //
   420  // Error returns:
   421  //   - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known
   422  //   - unspecific error in case of unexpected problems and bugs
   423  func (c *Consensus) staticEpochInfoByView(view uint64) (*staticEpochInfo, error) {
   424  
   425  	// look for an epoch matching this view for which we have already pre-computed
   426  	// leader selection. Epochs last ~500k views, so we find the epoch here 99.99%
   427  	// of the time. Since epochs are long-lived and we only cache the most recent 3,
   428  	// this linear map iteration is inexpensive.
   429  	c.mu.RLock()
   430  	for _, epoch := range c.epochs {
   431  		if epoch.firstView <= view && view <= epoch.finalView {
   432  			c.mu.RUnlock()
   433  			return epoch, nil
   434  		}
   435  	}
   436  	c.mu.RUnlock()
   437  
   438  	return nil, model.ErrViewForUnknownEpoch
   439  }
   440  
   441  // prepareEpoch pre-computes and stores the static epoch information for the
   442  // given epoch, including leader selection. Calling prepareEpoch multiple times
   443  // for the same epoch returns cached static epoch information.
   444  // Input must be a committed epoch.
   445  // No errors are expected during normal operation.
   446  func (c *Consensus) prepareEpoch(epoch protocol.Epoch) (*staticEpochInfo, error) {
   447  
   448  	counter, err := epoch.Counter()
   449  	if err != nil {
   450  		return nil, fmt.Errorf("could not get counter for epoch to prepare: %w", err)
   451  	}
   452  
   453  	// this is a no-op if we have already computed static info for this epoch
   454  	c.mu.RLock()
   455  	epochInfo, exists := c.epochs[counter]
   456  	c.mu.RUnlock()
   457  	if exists {
   458  		return epochInfo, nil
   459  	}
   460  
   461  	epochInfo, err = newStaticEpochInfo(epoch)
   462  	if err != nil {
   463  		return nil, fmt.Errorf("could not create static epoch info for epch %d: %w", counter, err)
   464  	}
   465  
   466  	// sanity check: ensure new epoch has contiguous views with the prior epoch
   467  	c.mu.RLock()
   468  	prevEpochInfo, exists := c.epochs[counter-1]
   469  	c.mu.RUnlock()
   470  	if exists {
   471  		if epochInfo.firstView != prevEpochInfo.finalView+1 {
   472  			return nil, fmt.Errorf("non-contiguous view ranges between consecutive epochs (epoch_%d=[%d,%d], epoch_%d=[%d,%d])",
   473  				counter-1, prevEpochInfo.firstView, prevEpochInfo.finalView,
   474  				counter, epochInfo.firstView, epochInfo.finalView)
   475  		}
   476  	}
   477  
   478  	// cache the epoch info
   479  	c.mu.Lock()
   480  	defer c.mu.Unlock()
   481  	c.epochs[counter] = epochInfo
   482  	// now prune any old epochs, if we have exceeded our maximum of 3
   483  	// if we have fewer than 3 epochs, this is a no-op
   484  	c.pruneEpochInfo()
   485  	return epochInfo, nil
   486  }
   487  
   488  // pruneEpochInfo removes any epochs older than the most recent 3.
   489  // NOTE: Not safe for concurrent use - the caller must first acquire the lock.
   490  func (c *Consensus) pruneEpochInfo() {
   491  	// find the maximum counter, including the epoch we just computed
   492  	max := uint64(0)
   493  	for counter := range c.epochs {
   494  		if counter > max {
   495  			max = counter
   496  		}
   497  	}
   498  
   499  	// remove any epochs which aren't within the most recent 3
   500  	for counter := range c.epochs {
   501  		if counter+3 <= max {
   502  			delete(c.epochs, counter)
   503  		}
   504  	}
   505  }