github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/consensus/hotstuff/committees/consensus_committee.go (about)

     1  package committees
     2  
     3  import (
     4  	"fmt"
     5  	"sync"
     6  
     7  	"go.uber.org/atomic"
     8  
     9  	"github.com/onflow/flow-go/consensus/hotstuff"
    10  	"github.com/onflow/flow-go/consensus/hotstuff/committees/leader"
    11  	"github.com/onflow/flow-go/consensus/hotstuff/model"
    12  	"github.com/onflow/flow-go/model/flow"
    13  	"github.com/onflow/flow-go/model/flow/filter"
    14  	"github.com/onflow/flow-go/module/component"
    15  	"github.com/onflow/flow-go/module/irrecoverable"
    16  	"github.com/onflow/flow-go/state/protocol"
    17  	"github.com/onflow/flow-go/state/protocol/events"
    18  	"github.com/onflow/flow-go/state/protocol/prg"
    19  )
    20  
    21  // staticEpochInfo contains leader selection and the initial committee for one epoch.
    22  // This data structure must not be mutated after construction.
    23  type staticEpochInfo struct {
    24  	firstView            uint64                  // first view of the epoch (inclusive)
    25  	finalView            uint64                  // final view of the epoch (inclusive)
    26  	randomSource         []byte                  // random source of epoch
    27  	leaders              *leader.LeaderSelection // pre-computed leader selection for the epoch
    28  	initialCommittee     flow.IdentitySkeletonList
    29  	initialCommitteeMap  map[flow.Identifier]*flow.IdentitySkeleton
    30  	weightThresholdForQC uint64 // computed based on initial committee weights
    31  	weightThresholdForTO uint64 // computed based on initial committee weights
    32  	dkg                  hotstuff.DKG
    33  }
    34  
    35  // newStaticEpochInfo returns the static epoch information from the epoch.
    36  // This can be cached and used for all by-view queries for this epoch.
    37  func newStaticEpochInfo(epoch protocol.Epoch) (*staticEpochInfo, error) {
    38  	firstView, err := epoch.FirstView()
    39  	if err != nil {
    40  		return nil, fmt.Errorf("could not get first view: %w", err)
    41  	}
    42  	finalView, err := epoch.FinalView()
    43  	if err != nil {
    44  		return nil, fmt.Errorf("could not get final view: %w", err)
    45  	}
    46  	randomSource, err := epoch.RandomSource()
    47  	if err != nil {
    48  		return nil, fmt.Errorf("could not get random source: %w", err)
    49  	}
    50  	leaders, err := leader.SelectionForConsensus(epoch)
    51  	if err != nil {
    52  		return nil, fmt.Errorf("could not get leader selection: %w", err)
    53  	}
    54  	initialIdentities, err := epoch.InitialIdentities()
    55  	if err != nil {
    56  		return nil, fmt.Errorf("could not initial identities: %w", err)
    57  	}
    58  	initialCommittee := initialIdentities.Filter(filter.IsConsensusCommitteeMember).ToSkeleton()
    59  	dkg, err := epoch.DKG()
    60  	if err != nil {
    61  		return nil, fmt.Errorf("could not get dkg: %w", err)
    62  	}
    63  
    64  	totalWeight := initialCommittee.TotalWeight()
    65  	epochInfo := &staticEpochInfo{
    66  		firstView:            firstView,
    67  		finalView:            finalView,
    68  		randomSource:         randomSource,
    69  		leaders:              leaders,
    70  		initialCommittee:     initialCommittee,
    71  		initialCommitteeMap:  initialCommittee.Lookup(),
    72  		weightThresholdForQC: WeightThresholdToBuildQC(totalWeight),
    73  		weightThresholdForTO: WeightThresholdToTimeout(totalWeight),
    74  		dkg:                  dkg,
    75  	}
    76  	return epochInfo, nil
    77  }
    78  
    79  // newEmergencyFallbackEpoch creates an artificial fallback epoch generated from
    80  // the last committed epoch at the time epoch emergency fallback is triggered.
    81  // The fallback epoch:
    82  // * begins after the last committed epoch
    83  // * lasts until the next spork (estimated 6 months)
    84  // * has the same static committee as the last committed epoch
    85  func newEmergencyFallbackEpoch(lastCommittedEpoch *staticEpochInfo) (*staticEpochInfo, error) {
    86  	rng, err := prg.New(lastCommittedEpoch.randomSource, prg.ConsensusLeaderSelection, nil)
    87  	if err != nil {
    88  		return nil, fmt.Errorf("could not create rng from seed: %w", err)
    89  	}
    90  	leaders, err := leader.ComputeLeaderSelection(
    91  		lastCommittedEpoch.finalView+1,
    92  		rng,
    93  		leader.EstimatedSixMonthOfViews,
    94  		lastCommittedEpoch.initialCommittee,
    95  	)
    96  	if err != nil {
    97  		return nil, fmt.Errorf("could not compute leader selection for fallback epoch: %w", err)
    98  	}
    99  	epochInfo := &staticEpochInfo{
   100  		firstView:            lastCommittedEpoch.finalView + 1,
   101  		finalView:            lastCommittedEpoch.finalView + leader.EstimatedSixMonthOfViews,
   102  		randomSource:         lastCommittedEpoch.randomSource,
   103  		leaders:              leaders,
   104  		initialCommittee:     lastCommittedEpoch.initialCommittee,
   105  		initialCommitteeMap:  lastCommittedEpoch.initialCommitteeMap,
   106  		weightThresholdForQC: lastCommittedEpoch.weightThresholdForQC,
   107  		weightThresholdForTO: lastCommittedEpoch.weightThresholdForTO,
   108  		dkg:                  lastCommittedEpoch.dkg,
   109  	}
   110  	return epochInfo, nil
   111  }
   112  
   113  // Consensus represents the main committee for consensus nodes. The consensus
   114  // committee might be active for multiple successive epochs.
   115  type Consensus struct {
   116  	state                  protocol.State              // the protocol state
   117  	me                     flow.Identifier             // the node ID of this node
   118  	mu                     sync.RWMutex                // protects access to epochs
   119  	epochs                 map[uint64]*staticEpochInfo // cache of initial committee & leader selection per epoch
   120  	committedEpochsCh      chan *flow.Header           // protocol events for newly committed epochs (the first block of the epoch is passed over the channel)
   121  	epochEmergencyFallback chan struct{}               // protocol event for epoch emergency fallback
   122  	isEpochFallbackHandled *atomic.Bool                // ensure we only inject fallback epoch once
   123  	events.Noop                                        // implements protocol.Consumer
   124  	component.Component
   125  }
   126  
   127  var _ protocol.Consumer = (*Consensus)(nil)
   128  var _ hotstuff.Replicas = (*Consensus)(nil)
   129  var _ hotstuff.DynamicCommittee = (*Consensus)(nil)
   130  
   131  func NewConsensusCommittee(state protocol.State, me flow.Identifier) (*Consensus, error) {
   132  	com := &Consensus{
   133  		state:                  state,
   134  		me:                     me,
   135  		epochs:                 make(map[uint64]*staticEpochInfo),
   136  		committedEpochsCh:      make(chan *flow.Header, 1),
   137  		epochEmergencyFallback: make(chan struct{}, 1),
   138  		isEpochFallbackHandled: atomic.NewBool(false),
   139  	}
   140  
   141  	com.Component = component.NewComponentManagerBuilder().
   142  		AddWorker(com.handleProtocolEvents).
   143  		Build()
   144  
   145  	final := state.Final()
   146  
   147  	// pre-compute leader selection for all presently relevant committed epochs
   148  	epochs := make([]protocol.Epoch, 0, 3)
   149  	// we always prepare the current epoch
   150  	epochs = append(epochs, final.Epochs().Current())
   151  
   152  	// we prepare the previous epoch, if one exists
   153  	exists, err := protocol.PreviousEpochExists(final)
   154  	if err != nil {
   155  		return nil, fmt.Errorf("could not check previous epoch exists: %w", err)
   156  	}
   157  	if exists {
   158  		epochs = append(epochs, final.Epochs().Previous())
   159  	}
   160  
   161  	// we prepare the next epoch, if it is committed
   162  	phase, err := final.Phase()
   163  	if err != nil {
   164  		return nil, fmt.Errorf("could not check epoch phase: %w", err)
   165  	}
   166  	if phase == flow.EpochPhaseCommitted {
   167  		epochs = append(epochs, final.Epochs().Next())
   168  	}
   169  
   170  	for _, epoch := range epochs {
   171  		_, err = com.prepareEpoch(epoch)
   172  		if err != nil {
   173  			return nil, fmt.Errorf("could not prepare initial epochs: %w", err)
   174  		}
   175  	}
   176  
   177  	// if epoch emergency fallback was triggered, inject the fallback epoch
   178  	triggered, err := state.Params().EpochFallbackTriggered()
   179  	if err != nil {
   180  		return nil, fmt.Errorf("could not check epoch fallback: %w", err)
   181  	}
   182  	if triggered {
   183  		err = com.onEpochEmergencyFallbackTriggered()
   184  		if err != nil {
   185  			return nil, fmt.Errorf("could not prepare emergency fallback epoch: %w", err)
   186  		}
   187  	}
   188  
   189  	return com, nil
   190  }
   191  
   192  // IdentitiesByBlock returns the identities of all authorized consensus participants at the given block.
   193  // The order of the identities is the canonical order.
   194  // ERROR conditions:
   195  //   - state.ErrUnknownSnapshotReference if the blockID is for an unknown block
   196  func (c *Consensus) IdentitiesByBlock(blockID flow.Identifier) (flow.IdentityList, error) {
   197  	il, err := c.state.AtBlockID(blockID).Identities(filter.IsVotingConsensusCommitteeMember)
   198  	if err != nil {
   199  		return nil, fmt.Errorf("could not identities at block %x: %w", blockID, err) // state.ErrUnknownSnapshotReference or exception
   200  	}
   201  	return il, nil
   202  }
   203  
   204  // IdentityByBlock returns the identity of the node with the given node ID at the given block.
   205  // ERROR conditions:
   206  //   - model.InvalidSignerError if participantID does NOT correspond to an authorized HotStuff participant at the specified block.
   207  //   - state.ErrUnknownSnapshotReference if the blockID is for an unknown block
   208  func (c *Consensus) IdentityByBlock(blockID flow.Identifier, nodeID flow.Identifier) (*flow.Identity, error) {
   209  	identity, err := c.state.AtBlockID(blockID).Identity(nodeID)
   210  	if err != nil {
   211  		if protocol.IsIdentityNotFound(err) {
   212  			return nil, model.NewInvalidSignerErrorf("id %v is not a valid node id: %w", nodeID, err)
   213  		}
   214  		return nil, fmt.Errorf("could not get identity for node ID %x: %w", nodeID, err) // state.ErrUnknownSnapshotReference or exception
   215  	}
   216  	if !filter.IsVotingConsensusCommitteeMember(identity) {
   217  		return nil, model.NewInvalidSignerErrorf("node %v is not an authorized hotstuff voting participant", nodeID)
   218  	}
   219  	return identity, nil
   220  }
   221  
   222  // IdentitiesByEpoch returns the committee identities in the epoch which contains
   223  // the given view.
   224  // CAUTION: This method considers epochs outside of Previous, Current, Next, w.r.t. the
   225  // finalized block, to be unknown. https://github.com/onflow/flow-go/issues/4085
   226  //
   227  // Error returns:
   228  //   - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known.
   229  //     This is an expected error and must be handled.
   230  //   - unspecific error in case of unexpected problems and bugs
   231  func (c *Consensus) IdentitiesByEpoch(view uint64) (flow.IdentitySkeletonList, error) {
   232  	epochInfo, err := c.staticEpochInfoByView(view)
   233  	if err != nil {
   234  		return nil, err
   235  	}
   236  	return epochInfo.initialCommittee, nil
   237  }
   238  
   239  // IdentityByEpoch returns the identity for the given node ID, in the epoch which
   240  // contains the given view.
   241  // CAUTION: This method considers epochs outside of Previous, Current, Next, w.r.t. the
   242  // finalized block, to be unknown. https://github.com/onflow/flow-go/issues/4085
   243  //
   244  // Error returns:
   245  //   - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known.
   246  //     This is an expected error and must be handled.
   247  //   - model.InvalidSignerError if nodeID was not listed by the Epoch Setup event as an
   248  //     authorized consensus participants.
   249  //   - unspecific error in case of unexpected problems and bugs
   250  func (c *Consensus) IdentityByEpoch(view uint64, participantID flow.Identifier) (*flow.IdentitySkeleton, error) {
   251  	epochInfo, err := c.staticEpochInfoByView(view)
   252  	if err != nil {
   253  		return nil, err
   254  	}
   255  	identity, ok := epochInfo.initialCommitteeMap[participantID]
   256  	if !ok {
   257  		return nil, model.NewInvalidSignerErrorf("id %v is not a valid node id", participantID)
   258  	}
   259  	return identity, nil
   260  }
   261  
   262  // LeaderForView returns the node ID of the leader for the given view.
   263  //
   264  // Error returns:
   265  //   - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known.
   266  //     This is an expected error and must be handled.
   267  //   - unspecific error in case of unexpected problems and bugs
   268  func (c *Consensus) LeaderForView(view uint64) (flow.Identifier, error) {
   269  
   270  	epochInfo, err := c.staticEpochInfoByView(view)
   271  	if err != nil {
   272  		return flow.ZeroID, err
   273  	}
   274  	leaderID, err := epochInfo.leaders.LeaderForView(view)
   275  	if leader.IsInvalidViewError(err) {
   276  		// an invalid view error indicates that no leader was computed for this view
   277  		// this is a fatal internal error, because the view necessarily is within an
   278  		// epoch for which we have pre-computed leader selection
   279  		return flow.ZeroID, fmt.Errorf("unexpected inconsistency in epoch view spans for view %d: %v", view, err)
   280  	}
   281  	if err != nil {
   282  		return flow.ZeroID, err
   283  	}
   284  	return leaderID, nil
   285  }
   286  
   287  // QuorumThresholdForView returns the minimum weight required to build a valid
   288  // QC in the given view. The weight threshold only changes at epoch boundaries
   289  // and is computed based on the initial committee weights.
   290  //
   291  // Error returns:
   292  //   - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known.
   293  //     This is an expected error and must be handled.
   294  //   - unspecific error in case of unexpected problems and bugs
   295  func (c *Consensus) QuorumThresholdForView(view uint64) (uint64, error) {
   296  	epochInfo, err := c.staticEpochInfoByView(view)
   297  	if err != nil {
   298  		return 0, err
   299  	}
   300  	return epochInfo.weightThresholdForQC, nil
   301  }
   302  
   303  func (c *Consensus) Self() flow.Identifier {
   304  	return c.me
   305  }
   306  
   307  // TimeoutThresholdForView returns the minimum weight of observed timeout objects
   308  // to safely immediately timeout for the current view. The weight threshold only
   309  // changes at epoch boundaries and is computed based on the initial committee weights.
   310  func (c *Consensus) TimeoutThresholdForView(view uint64) (uint64, error) {
   311  	epochInfo, err := c.staticEpochInfoByView(view)
   312  	if err != nil {
   313  		return 0, err
   314  	}
   315  	return epochInfo.weightThresholdForTO, nil
   316  }
   317  
   318  // DKG returns the DKG for epoch which includes the given view.
   319  //
   320  // Error returns:
   321  //   - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known.
   322  //     This is an expected error and must be handled.
   323  //   - unspecific error in case of unexpected problems and bugs
   324  func (c *Consensus) DKG(view uint64) (hotstuff.DKG, error) {
   325  	epochInfo, err := c.staticEpochInfoByView(view)
   326  	if err != nil {
   327  		return nil, err
   328  	}
   329  	return epochInfo.dkg, nil
   330  }
   331  
   332  // handleProtocolEvents processes queued Epoch events `EpochCommittedPhaseStarted`
   333  // and `EpochEmergencyFallbackTriggered`. This function permanently utilizes a worker
   334  // routine until the `Component` terminates.
   335  // When we observe a new epoch being committed, we compute
   336  // the leader selection and cache static info for the epoch. When we observe
   337  // epoch emergency fallback being triggered, we inject a fallback epoch.
   338  func (c *Consensus) handleProtocolEvents(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   339  	ready()
   340  
   341  	for {
   342  		select {
   343  		case <-ctx.Done():
   344  			return
   345  		case block := <-c.committedEpochsCh:
   346  			epoch := c.state.AtBlockID(block.ID()).Epochs().Next()
   347  			_, err := c.prepareEpoch(epoch)
   348  			if err != nil {
   349  				ctx.Throw(err)
   350  			}
   351  		case <-c.epochEmergencyFallback:
   352  			err := c.onEpochEmergencyFallbackTriggered()
   353  			if err != nil {
   354  				ctx.Throw(err)
   355  			}
   356  		}
   357  	}
   358  }
   359  
   360  // EpochCommittedPhaseStarted informs the `committee.Consensus` that the block starting the Epoch Committed Phase has been finalized.
   361  func (c *Consensus) EpochCommittedPhaseStarted(_ uint64, first *flow.Header) {
   362  	c.committedEpochsCh <- first
   363  }
   364  
   365  // EpochEmergencyFallbackTriggered passes the protocol event to the worker thread.
   366  func (c *Consensus) EpochEmergencyFallbackTriggered() {
   367  	c.epochEmergencyFallback <- struct{}{}
   368  }
   369  
   370  // onEpochEmergencyFallbackTriggered handles the protocol event for emergency epoch
   371  // fallback mode being triggered. When this occurs, we inject a fallback epoch
   372  // to the committee which extends the current epoch.
   373  // This method must also be called on initialization, if emergency fallback mode
   374  // was triggered in the past.
   375  // No errors are expected during normal operation.
   376  func (c *Consensus) onEpochEmergencyFallbackTriggered() error {
   377  
   378  	// we respond to epoch fallback being triggered at most once, therefore
   379  	// the core logic is protected by an atomic bool.
   380  	// although it is only valid for epoch fallback to be triggered once per spork,
   381  	// we must account for repeated delivery of protocol events.
   382  	if !c.isEpochFallbackHandled.CompareAndSwap(false, true) {
   383  		return nil
   384  	}
   385  
   386  	currentEpochCounter, err := c.state.Final().Epochs().Current().Counter()
   387  	if err != nil {
   388  		return fmt.Errorf("could not get current epoch counter: %w", err)
   389  	}
   390  
   391  	c.mu.RLock()
   392  	// sanity check: current epoch must be cached already
   393  	currentEpoch, ok := c.epochs[currentEpochCounter]
   394  	if !ok {
   395  		c.mu.RUnlock()
   396  		return fmt.Errorf("epoch fallback: could not find current epoch (counter=%d) info", currentEpochCounter)
   397  	}
   398  	// sanity check: next epoch must never be committed, therefore must not be cached
   399  	_, ok = c.epochs[currentEpochCounter+1]
   400  	c.mu.RUnlock()
   401  	if ok {
   402  		return fmt.Errorf("epoch fallback: next epoch (counter=%d) is cached contrary to expectation", currentEpochCounter+1)
   403  	}
   404  
   405  	fallbackEpoch, err := newEmergencyFallbackEpoch(currentEpoch)
   406  	if err != nil {
   407  		return fmt.Errorf("could not construct fallback epoch: %w", err)
   408  	}
   409  
   410  	// cache the epoch info
   411  	c.mu.Lock()
   412  	c.epochs[currentEpochCounter+1] = fallbackEpoch
   413  	c.mu.Unlock()
   414  
   415  	return nil
   416  }
   417  
   418  // staticEpochInfoByView retrieves the previously cached static epoch info for
   419  // the epoch which includes the given view. If no epoch is known for the given
   420  // view, we will attempt to cache the next epoch.
   421  //
   422  // Error returns:
   423  //   - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known
   424  //   - unspecific error in case of unexpected problems and bugs
   425  func (c *Consensus) staticEpochInfoByView(view uint64) (*staticEpochInfo, error) {
   426  
   427  	// look for an epoch matching this view for which we have already pre-computed
   428  	// leader selection. Epochs last ~500k views, so we find the epoch here 99.99%
   429  	// of the time. Since epochs are long-lived and we only cache the most recent 3,
   430  	// this linear map iteration is inexpensive.
   431  	c.mu.RLock()
   432  	for _, epoch := range c.epochs {
   433  		if epoch.firstView <= view && view <= epoch.finalView {
   434  			c.mu.RUnlock()
   435  			return epoch, nil
   436  		}
   437  	}
   438  	c.mu.RUnlock()
   439  
   440  	return nil, model.ErrViewForUnknownEpoch
   441  }
   442  
   443  // prepareEpoch pre-computes and stores the static epoch information for the
   444  // given epoch, including leader selection. Calling prepareEpoch multiple times
   445  // for the same epoch returns cached static epoch information.
   446  // Input must be a committed epoch.
   447  // No errors are expected during normal operation.
   448  func (c *Consensus) prepareEpoch(epoch protocol.Epoch) (*staticEpochInfo, error) {
   449  
   450  	counter, err := epoch.Counter()
   451  	if err != nil {
   452  		return nil, fmt.Errorf("could not get counter for epoch to prepare: %w", err)
   453  	}
   454  
   455  	// this is a no-op if we have already computed static info for this epoch
   456  	c.mu.RLock()
   457  	epochInfo, exists := c.epochs[counter]
   458  	c.mu.RUnlock()
   459  	if exists {
   460  		return epochInfo, nil
   461  	}
   462  
   463  	epochInfo, err = newStaticEpochInfo(epoch)
   464  	if err != nil {
   465  		return nil, fmt.Errorf("could not create static epoch info for epch %d: %w", counter, err)
   466  	}
   467  
   468  	// sanity check: ensure new epoch has contiguous views with the prior epoch
   469  	c.mu.RLock()
   470  	prevEpochInfo, exists := c.epochs[counter-1]
   471  	c.mu.RUnlock()
   472  	if exists {
   473  		if epochInfo.firstView != prevEpochInfo.finalView+1 {
   474  			return nil, fmt.Errorf("non-contiguous view ranges between consecutive epochs (epoch_%d=[%d,%d], epoch_%d=[%d,%d])",
   475  				counter-1, prevEpochInfo.firstView, prevEpochInfo.finalView,
   476  				counter, epochInfo.firstView, epochInfo.finalView)
   477  		}
   478  	}
   479  
   480  	// cache the epoch info
   481  	c.mu.Lock()
   482  	defer c.mu.Unlock()
   483  	c.epochs[counter] = epochInfo
   484  	// now prune any old epochs, if we have exceeded our maximum of 3
   485  	// if we have fewer than 3 epochs, this is a no-op
   486  	c.pruneEpochInfo()
   487  	return epochInfo, nil
   488  }
   489  
   490  // pruneEpochInfo removes any epochs older than the most recent 3.
   491  // NOTE: Not safe for concurrent use - the caller must first acquire the lock.
   492  func (c *Consensus) pruneEpochInfo() {
   493  	// find the maximum counter, including the epoch we just computed
   494  	max := uint64(0)
   495  	for counter := range c.epochs {
   496  		if counter > max {
   497  			max = counter
   498  		}
   499  	}
   500  
   501  	// remove any epochs which aren't within the most recent 3
   502  	for counter := range c.epochs {
   503  		if counter+3 <= max {
   504  			delete(c.epochs, counter)
   505  		}
   506  	}
   507  }