github.com/onflow/flow-go@v0.33.17/consensus/hotstuff/committees/consensus_committee.go (about) 1 package committees 2 3 import ( 4 "fmt" 5 "sync" 6 7 "go.uber.org/atomic" 8 9 "github.com/onflow/flow-go/consensus/hotstuff" 10 "github.com/onflow/flow-go/consensus/hotstuff/committees/leader" 11 "github.com/onflow/flow-go/consensus/hotstuff/model" 12 "github.com/onflow/flow-go/model/flow" 13 "github.com/onflow/flow-go/model/flow/filter" 14 "github.com/onflow/flow-go/module/component" 15 "github.com/onflow/flow-go/module/irrecoverable" 16 "github.com/onflow/flow-go/state/protocol" 17 "github.com/onflow/flow-go/state/protocol/events" 18 "github.com/onflow/flow-go/state/protocol/prg" 19 ) 20 21 // staticEpochInfo contains leader selection and the initial committee for one epoch. 22 // This data structure must not be mutated after construction. 23 type staticEpochInfo struct { 24 firstView uint64 // first view of the epoch (inclusive) 25 finalView uint64 // final view of the epoch (inclusive) 26 randomSource []byte // random source of epoch 27 leaders *leader.LeaderSelection // pre-computed leader selection for the epoch 28 // TODO: should use identity skeleton https://github.com/dapperlabs/flow-go/issues/6232 29 initialCommittee flow.IdentityList 30 initialCommitteeMap map[flow.Identifier]*flow.Identity 31 weightThresholdForQC uint64 // computed based on initial committee weights 32 weightThresholdForTO uint64 // computed based on initial committee weights 33 dkg hotstuff.DKG 34 } 35 36 // newStaticEpochInfo returns the static epoch information from the epoch. 37 // This can be cached and used for all by-view queries for this epoch. 38 func newStaticEpochInfo(epoch protocol.Epoch) (*staticEpochInfo, error) { 39 firstView, err := epoch.FirstView() 40 if err != nil { 41 return nil, fmt.Errorf("could not get first view: %w", err) 42 } 43 finalView, err := epoch.FinalView() 44 if err != nil { 45 return nil, fmt.Errorf("could not get final view: %w", err) 46 } 47 randomSource, err := epoch.RandomSource() 48 if err != nil { 49 return nil, fmt.Errorf("could not get random source: %w", err) 50 } 51 leaders, err := leader.SelectionForConsensus(epoch) 52 if err != nil { 53 return nil, fmt.Errorf("could not get leader selection: %w", err) 54 } 55 initialIdentities, err := epoch.InitialIdentities() 56 if err != nil { 57 return nil, fmt.Errorf("could not initial identities: %w", err) 58 } 59 initialCommittee := initialIdentities.Filter(filter.IsVotingConsensusCommitteeMember) 60 dkg, err := epoch.DKG() 61 if err != nil { 62 return nil, fmt.Errorf("could not get dkg: %w", err) 63 } 64 65 totalWeight := initialCommittee.TotalWeight() 66 epochInfo := &staticEpochInfo{ 67 firstView: firstView, 68 finalView: finalView, 69 randomSource: randomSource, 70 leaders: leaders, 71 initialCommittee: initialCommittee, 72 initialCommitteeMap: initialCommittee.Lookup(), 73 weightThresholdForQC: WeightThresholdToBuildQC(totalWeight), 74 weightThresholdForTO: WeightThresholdToTimeout(totalWeight), 75 dkg: dkg, 76 } 77 return epochInfo, nil 78 } 79 80 // newEmergencyFallbackEpoch creates an artificial fallback epoch generated from 81 // the last committed epoch at the time epoch emergency fallback is triggered. 82 // The fallback epoch: 83 // * begins after the last committed epoch 84 // * lasts until the next spork (estimated 6 months) 85 // * has the same static committee as the last committed epoch 86 func newEmergencyFallbackEpoch(lastCommittedEpoch *staticEpochInfo) (*staticEpochInfo, error) { 87 88 rng, err := prg.New(lastCommittedEpoch.randomSource, prg.ConsensusLeaderSelection, nil) 89 if err != nil { 90 return nil, fmt.Errorf("could not create rng from seed: %w", err) 91 } 92 leaders, err := leader.ComputeLeaderSelection(lastCommittedEpoch.finalView+1, rng, leader.EstimatedSixMonthOfViews, lastCommittedEpoch.initialCommittee) 93 if err != nil { 94 return nil, fmt.Errorf("could not compute leader selection for fallback epoch: %w", err) 95 } 96 epochInfo := &staticEpochInfo{ 97 firstView: lastCommittedEpoch.finalView + 1, 98 finalView: lastCommittedEpoch.finalView + leader.EstimatedSixMonthOfViews, 99 randomSource: lastCommittedEpoch.randomSource, 100 leaders: leaders, 101 initialCommittee: lastCommittedEpoch.initialCommittee, 102 initialCommitteeMap: lastCommittedEpoch.initialCommitteeMap, 103 weightThresholdForQC: lastCommittedEpoch.weightThresholdForQC, 104 weightThresholdForTO: lastCommittedEpoch.weightThresholdForTO, 105 dkg: lastCommittedEpoch.dkg, 106 } 107 return epochInfo, nil 108 } 109 110 // Consensus represents the main committee for consensus nodes. The consensus 111 // committee might be active for multiple successive epochs. 112 type Consensus struct { 113 state protocol.State // the protocol state 114 me flow.Identifier // the node ID of this node 115 mu sync.RWMutex // protects access to epochs 116 epochs map[uint64]*staticEpochInfo // cache of initial committee & leader selection per epoch 117 committedEpochsCh chan *flow.Header // protocol events for newly committed epochs (the first block of the epoch is passed over the channel) 118 epochEmergencyFallback chan struct{} // protocol event for epoch emergency fallback 119 isEpochFallbackHandled *atomic.Bool // ensure we only inject fallback epoch once 120 events.Noop // implements protocol.Consumer 121 component.Component 122 } 123 124 var _ protocol.Consumer = (*Consensus)(nil) 125 var _ hotstuff.Replicas = (*Consensus)(nil) 126 var _ hotstuff.DynamicCommittee = (*Consensus)(nil) 127 128 func NewConsensusCommittee(state protocol.State, me flow.Identifier) (*Consensus, error) { 129 130 com := &Consensus{ 131 state: state, 132 me: me, 133 epochs: make(map[uint64]*staticEpochInfo), 134 committedEpochsCh: make(chan *flow.Header, 1), 135 epochEmergencyFallback: make(chan struct{}, 1), 136 isEpochFallbackHandled: atomic.NewBool(false), 137 } 138 139 com.Component = component.NewComponentManagerBuilder(). 140 AddWorker(com.handleProtocolEvents). 141 Build() 142 143 final := state.Final() 144 145 // pre-compute leader selection for all presently relevant committed epochs 146 epochs := make([]protocol.Epoch, 0, 3) 147 // we always prepare the current epoch 148 epochs = append(epochs, final.Epochs().Current()) 149 150 // we prepare the previous epoch, if one exists 151 exists, err := protocol.PreviousEpochExists(final) 152 if err != nil { 153 return nil, fmt.Errorf("could not check previous epoch exists: %w", err) 154 } 155 if exists { 156 epochs = append(epochs, final.Epochs().Previous()) 157 } 158 159 // we prepare the next epoch, if it is committed 160 phase, err := final.Phase() 161 if err != nil { 162 return nil, fmt.Errorf("could not check epoch phase: %w", err) 163 } 164 if phase == flow.EpochPhaseCommitted { 165 epochs = append(epochs, final.Epochs().Next()) 166 } 167 168 for _, epoch := range epochs { 169 _, err = com.prepareEpoch(epoch) 170 if err != nil { 171 return nil, fmt.Errorf("could not prepare initial epochs: %w", err) 172 } 173 } 174 175 // if epoch emergency fallback was triggered, inject the fallback epoch 176 triggered, err := state.Params().EpochFallbackTriggered() 177 if err != nil { 178 return nil, fmt.Errorf("could not check epoch fallback: %w", err) 179 } 180 if triggered { 181 err = com.onEpochEmergencyFallbackTriggered() 182 if err != nil { 183 return nil, fmt.Errorf("could not prepare emergency fallback epoch: %w", err) 184 } 185 } 186 187 return com, nil 188 } 189 190 // IdentitiesByBlock returns the identities of all authorized consensus participants at the given block. 191 // The order of the identities is the canonical order. 192 // ERROR conditions: 193 // - state.ErrUnknownSnapshotReference if the blockID is for an unknown block 194 func (c *Consensus) IdentitiesByBlock(blockID flow.Identifier) (flow.IdentityList, error) { 195 il, err := c.state.AtBlockID(blockID).Identities(filter.IsVotingConsensusCommitteeMember) 196 if err != nil { 197 return nil, fmt.Errorf("could not identities at block %x: %w", blockID, err) // state.ErrUnknownSnapshotReference or exception 198 } 199 return il, nil 200 } 201 202 // IdentityByBlock returns the identity of the node with the given node ID at the given block. 203 // ERROR conditions: 204 // - model.InvalidSignerError if participantID does NOT correspond to an authorized HotStuff participant at the specified block. 205 // - state.ErrUnknownSnapshotReference if the blockID is for an unknown block 206 func (c *Consensus) IdentityByBlock(blockID flow.Identifier, nodeID flow.Identifier) (*flow.Identity, error) { 207 identity, err := c.state.AtBlockID(blockID).Identity(nodeID) 208 if err != nil { 209 if protocol.IsIdentityNotFound(err) { 210 return nil, model.NewInvalidSignerErrorf("id %v is not a valid node id: %w", nodeID, err) 211 } 212 return nil, fmt.Errorf("could not get identity for node ID %x: %w", nodeID, err) // state.ErrUnknownSnapshotReference or exception 213 } 214 if !filter.IsVotingConsensusCommitteeMember(identity) { 215 return nil, model.NewInvalidSignerErrorf("node %v is not an authorized hotstuff voting participant", nodeID) 216 } 217 return identity, nil 218 } 219 220 // IdentitiesByEpoch returns the committee identities in the epoch which contains 221 // the given view. 222 // CAUTION: This method considers epochs outside of Previous, Current, Next, w.r.t. the 223 // finalized block, to be unknown. https://github.com/onflow/flow-go/issues/4085 224 // 225 // Error returns: 226 // - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known. 227 // This is an expected error and must be handled. 228 // - unspecific error in case of unexpected problems and bugs 229 func (c *Consensus) IdentitiesByEpoch(view uint64) (flow.IdentityList, error) { 230 epochInfo, err := c.staticEpochInfoByView(view) 231 if err != nil { 232 return nil, err 233 } 234 return epochInfo.initialCommittee, nil 235 } 236 237 // IdentityByEpoch returns the identity for the given node ID, in the epoch which 238 // contains the given view. 239 // CAUTION: This method considers epochs outside of Previous, Current, Next, w.r.t. the 240 // finalized block, to be unknown. https://github.com/onflow/flow-go/issues/4085 241 // 242 // Error returns: 243 // - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known. 244 // This is an expected error and must be handled. 245 // - model.InvalidSignerError if nodeID was not listed by the Epoch Setup event as an 246 // authorized consensus participants. 247 // - unspecific error in case of unexpected problems and bugs 248 func (c *Consensus) IdentityByEpoch(view uint64, nodeID flow.Identifier) (*flow.Identity, error) { 249 epochInfo, err := c.staticEpochInfoByView(view) 250 if err != nil { 251 return nil, err 252 } 253 identity, ok := epochInfo.initialCommitteeMap[nodeID] 254 if !ok { 255 return nil, model.NewInvalidSignerErrorf("id %v is not a valid node id", nodeID) 256 } 257 return identity, nil 258 } 259 260 // LeaderForView returns the node ID of the leader for the given view. 261 // 262 // Error returns: 263 // - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known. 264 // This is an expected error and must be handled. 265 // - unspecific error in case of unexpected problems and bugs 266 func (c *Consensus) LeaderForView(view uint64) (flow.Identifier, error) { 267 268 epochInfo, err := c.staticEpochInfoByView(view) 269 if err != nil { 270 return flow.ZeroID, err 271 } 272 leaderID, err := epochInfo.leaders.LeaderForView(view) 273 if leader.IsInvalidViewError(err) { 274 // an invalid view error indicates that no leader was computed for this view 275 // this is a fatal internal error, because the view necessarily is within an 276 // epoch for which we have pre-computed leader selection 277 return flow.ZeroID, fmt.Errorf("unexpected inconsistency in epoch view spans for view %d: %v", view, err) 278 } 279 if err != nil { 280 return flow.ZeroID, err 281 } 282 return leaderID, nil 283 } 284 285 // QuorumThresholdForView returns the minimum weight required to build a valid 286 // QC in the given view. The weight threshold only changes at epoch boundaries 287 // and is computed based on the initial committee weights. 288 // 289 // Error returns: 290 // - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known. 291 // This is an expected error and must be handled. 292 // - unspecific error in case of unexpected problems and bugs 293 func (c *Consensus) QuorumThresholdForView(view uint64) (uint64, error) { 294 epochInfo, err := c.staticEpochInfoByView(view) 295 if err != nil { 296 return 0, err 297 } 298 return epochInfo.weightThresholdForQC, nil 299 } 300 301 func (c *Consensus) Self() flow.Identifier { 302 return c.me 303 } 304 305 // TimeoutThresholdForView returns the minimum weight of observed timeout objects 306 // to safely immediately timeout for the current view. The weight threshold only 307 // changes at epoch boundaries and is computed based on the initial committee weights. 308 func (c *Consensus) TimeoutThresholdForView(view uint64) (uint64, error) { 309 epochInfo, err := c.staticEpochInfoByView(view) 310 if err != nil { 311 return 0, err 312 } 313 return epochInfo.weightThresholdForTO, nil 314 } 315 316 // DKG returns the DKG for epoch which includes the given view. 317 // 318 // Error returns: 319 // - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known. 320 // This is an expected error and must be handled. 321 // - unspecific error in case of unexpected problems and bugs 322 func (c *Consensus) DKG(view uint64) (hotstuff.DKG, error) { 323 epochInfo, err := c.staticEpochInfoByView(view) 324 if err != nil { 325 return nil, err 326 } 327 return epochInfo.dkg, nil 328 } 329 330 // handleProtocolEvents processes queued Epoch events `EpochCommittedPhaseStarted` 331 // and `EpochEmergencyFallbackTriggered`. This function permanently utilizes a worker 332 // routine until the `Component` terminates. 333 // When we observe a new epoch being committed, we compute 334 // the leader selection and cache static info for the epoch. When we observe 335 // epoch emergency fallback being triggered, we inject a fallback epoch. 336 func (c *Consensus) handleProtocolEvents(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 337 ready() 338 339 for { 340 select { 341 case <-ctx.Done(): 342 return 343 case block := <-c.committedEpochsCh: 344 epoch := c.state.AtBlockID(block.ID()).Epochs().Next() 345 _, err := c.prepareEpoch(epoch) 346 if err != nil { 347 ctx.Throw(err) 348 } 349 case <-c.epochEmergencyFallback: 350 err := c.onEpochEmergencyFallbackTriggered() 351 if err != nil { 352 ctx.Throw(err) 353 } 354 } 355 } 356 } 357 358 // EpochCommittedPhaseStarted informs the `committee.Consensus` that the block starting the Epoch Committed Phase has been finalized. 359 func (c *Consensus) EpochCommittedPhaseStarted(_ uint64, first *flow.Header) { 360 c.committedEpochsCh <- first 361 } 362 363 // EpochEmergencyFallbackTriggered passes the protocol event to the worker thread. 364 func (c *Consensus) EpochEmergencyFallbackTriggered() { 365 c.epochEmergencyFallback <- struct{}{} 366 } 367 368 // onEpochEmergencyFallbackTriggered handles the protocol event for emergency epoch 369 // fallback mode being triggered. When this occurs, we inject a fallback epoch 370 // to the committee which extends the current epoch. 371 // This method must also be called on initialization, if emergency fallback mode 372 // was triggered in the past. 373 // No errors are expected during normal operation. 374 func (c *Consensus) onEpochEmergencyFallbackTriggered() error { 375 376 // we respond to epoch fallback being triggered at most once, therefore 377 // the core logic is protected by an atomic bool. 378 // although it is only valid for epoch fallback to be triggered once per spork, 379 // we must account for repeated delivery of protocol events. 380 if !c.isEpochFallbackHandled.CompareAndSwap(false, true) { 381 return nil 382 } 383 384 currentEpochCounter, err := c.state.Final().Epochs().Current().Counter() 385 if err != nil { 386 return fmt.Errorf("could not get current epoch counter: %w", err) 387 } 388 389 c.mu.RLock() 390 // sanity check: current epoch must be cached already 391 currentEpoch, ok := c.epochs[currentEpochCounter] 392 if !ok { 393 c.mu.RUnlock() 394 return fmt.Errorf("epoch fallback: could not find current epoch (counter=%d) info", currentEpochCounter) 395 } 396 // sanity check: next epoch must never be committed, therefore must not be cached 397 _, ok = c.epochs[currentEpochCounter+1] 398 c.mu.RUnlock() 399 if ok { 400 return fmt.Errorf("epoch fallback: next epoch (counter=%d) is cached contrary to expectation", currentEpochCounter+1) 401 } 402 403 fallbackEpoch, err := newEmergencyFallbackEpoch(currentEpoch) 404 if err != nil { 405 return fmt.Errorf("could not construct fallback epoch: %w", err) 406 } 407 408 // cache the epoch info 409 c.mu.Lock() 410 c.epochs[currentEpochCounter+1] = fallbackEpoch 411 c.mu.Unlock() 412 413 return nil 414 } 415 416 // staticEpochInfoByView retrieves the previously cached static epoch info for 417 // the epoch which includes the given view. If no epoch is known for the given 418 // view, we will attempt to cache the next epoch. 419 // 420 // Error returns: 421 // - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known 422 // - unspecific error in case of unexpected problems and bugs 423 func (c *Consensus) staticEpochInfoByView(view uint64) (*staticEpochInfo, error) { 424 425 // look for an epoch matching this view for which we have already pre-computed 426 // leader selection. Epochs last ~500k views, so we find the epoch here 99.99% 427 // of the time. Since epochs are long-lived and we only cache the most recent 3, 428 // this linear map iteration is inexpensive. 429 c.mu.RLock() 430 for _, epoch := range c.epochs { 431 if epoch.firstView <= view && view <= epoch.finalView { 432 c.mu.RUnlock() 433 return epoch, nil 434 } 435 } 436 c.mu.RUnlock() 437 438 return nil, model.ErrViewForUnknownEpoch 439 } 440 441 // prepareEpoch pre-computes and stores the static epoch information for the 442 // given epoch, including leader selection. Calling prepareEpoch multiple times 443 // for the same epoch returns cached static epoch information. 444 // Input must be a committed epoch. 445 // No errors are expected during normal operation. 446 func (c *Consensus) prepareEpoch(epoch protocol.Epoch) (*staticEpochInfo, error) { 447 448 counter, err := epoch.Counter() 449 if err != nil { 450 return nil, fmt.Errorf("could not get counter for epoch to prepare: %w", err) 451 } 452 453 // this is a no-op if we have already computed static info for this epoch 454 c.mu.RLock() 455 epochInfo, exists := c.epochs[counter] 456 c.mu.RUnlock() 457 if exists { 458 return epochInfo, nil 459 } 460 461 epochInfo, err = newStaticEpochInfo(epoch) 462 if err != nil { 463 return nil, fmt.Errorf("could not create static epoch info for epch %d: %w", counter, err) 464 } 465 466 // sanity check: ensure new epoch has contiguous views with the prior epoch 467 c.mu.RLock() 468 prevEpochInfo, exists := c.epochs[counter-1] 469 c.mu.RUnlock() 470 if exists { 471 if epochInfo.firstView != prevEpochInfo.finalView+1 { 472 return nil, fmt.Errorf("non-contiguous view ranges between consecutive epochs (epoch_%d=[%d,%d], epoch_%d=[%d,%d])", 473 counter-1, prevEpochInfo.firstView, prevEpochInfo.finalView, 474 counter, epochInfo.firstView, epochInfo.finalView) 475 } 476 } 477 478 // cache the epoch info 479 c.mu.Lock() 480 defer c.mu.Unlock() 481 c.epochs[counter] = epochInfo 482 // now prune any old epochs, if we have exceeded our maximum of 3 483 // if we have fewer than 3 epochs, this is a no-op 484 c.pruneEpochInfo() 485 return epochInfo, nil 486 } 487 488 // pruneEpochInfo removes any epochs older than the most recent 3. 489 // NOTE: Not safe for concurrent use - the caller must first acquire the lock. 490 func (c *Consensus) pruneEpochInfo() { 491 // find the maximum counter, including the epoch we just computed 492 max := uint64(0) 493 for counter := range c.epochs { 494 if counter > max { 495 max = counter 496 } 497 } 498 499 // remove any epochs which aren't within the most recent 3 500 for counter := range c.epochs { 501 if counter+3 <= max { 502 delete(c.epochs, counter) 503 } 504 } 505 }