github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/consensus/hotstuff/committees/consensus_committee.go (about) 1 package committees 2 3 import ( 4 "fmt" 5 "sync" 6 7 "go.uber.org/atomic" 8 9 "github.com/onflow/flow-go/consensus/hotstuff" 10 "github.com/onflow/flow-go/consensus/hotstuff/committees/leader" 11 "github.com/onflow/flow-go/consensus/hotstuff/model" 12 "github.com/onflow/flow-go/model/flow" 13 "github.com/onflow/flow-go/model/flow/filter" 14 "github.com/onflow/flow-go/module/component" 15 "github.com/onflow/flow-go/module/irrecoverable" 16 "github.com/onflow/flow-go/state/protocol" 17 "github.com/onflow/flow-go/state/protocol/events" 18 "github.com/onflow/flow-go/state/protocol/prg" 19 ) 20 21 // staticEpochInfo contains leader selection and the initial committee for one epoch. 22 // This data structure must not be mutated after construction. 23 type staticEpochInfo struct { 24 firstView uint64 // first view of the epoch (inclusive) 25 finalView uint64 // final view of the epoch (inclusive) 26 randomSource []byte // random source of epoch 27 leaders *leader.LeaderSelection // pre-computed leader selection for the epoch 28 initialCommittee flow.IdentitySkeletonList 29 initialCommitteeMap map[flow.Identifier]*flow.IdentitySkeleton 30 weightThresholdForQC uint64 // computed based on initial committee weights 31 weightThresholdForTO uint64 // computed based on initial committee weights 32 dkg hotstuff.DKG 33 } 34 35 // newStaticEpochInfo returns the static epoch information from the epoch. 36 // This can be cached and used for all by-view queries for this epoch. 37 func newStaticEpochInfo(epoch protocol.Epoch) (*staticEpochInfo, error) { 38 firstView, err := epoch.FirstView() 39 if err != nil { 40 return nil, fmt.Errorf("could not get first view: %w", err) 41 } 42 finalView, err := epoch.FinalView() 43 if err != nil { 44 return nil, fmt.Errorf("could not get final view: %w", err) 45 } 46 randomSource, err := epoch.RandomSource() 47 if err != nil { 48 return nil, fmt.Errorf("could not get random source: %w", err) 49 } 50 leaders, err := leader.SelectionForConsensus(epoch) 51 if err != nil { 52 return nil, fmt.Errorf("could not get leader selection: %w", err) 53 } 54 initialIdentities, err := epoch.InitialIdentities() 55 if err != nil { 56 return nil, fmt.Errorf("could not initial identities: %w", err) 57 } 58 initialCommittee := initialIdentities.Filter(filter.IsConsensusCommitteeMember).ToSkeleton() 59 dkg, err := epoch.DKG() 60 if err != nil { 61 return nil, fmt.Errorf("could not get dkg: %w", err) 62 } 63 64 totalWeight := initialCommittee.TotalWeight() 65 epochInfo := &staticEpochInfo{ 66 firstView: firstView, 67 finalView: finalView, 68 randomSource: randomSource, 69 leaders: leaders, 70 initialCommittee: initialCommittee, 71 initialCommitteeMap: initialCommittee.Lookup(), 72 weightThresholdForQC: WeightThresholdToBuildQC(totalWeight), 73 weightThresholdForTO: WeightThresholdToTimeout(totalWeight), 74 dkg: dkg, 75 } 76 return epochInfo, nil 77 } 78 79 // newEmergencyFallbackEpoch creates an artificial fallback epoch generated from 80 // the last committed epoch at the time epoch emergency fallback is triggered. 81 // The fallback epoch: 82 // * begins after the last committed epoch 83 // * lasts until the next spork (estimated 6 months) 84 // * has the same static committee as the last committed epoch 85 func newEmergencyFallbackEpoch(lastCommittedEpoch *staticEpochInfo) (*staticEpochInfo, error) { 86 rng, err := prg.New(lastCommittedEpoch.randomSource, prg.ConsensusLeaderSelection, nil) 87 if err != nil { 88 return nil, fmt.Errorf("could not create rng from seed: %w", err) 89 } 90 leaders, err := leader.ComputeLeaderSelection( 91 lastCommittedEpoch.finalView+1, 92 rng, 93 leader.EstimatedSixMonthOfViews, 94 lastCommittedEpoch.initialCommittee, 95 ) 96 if err != nil { 97 return nil, fmt.Errorf("could not compute leader selection for fallback epoch: %w", err) 98 } 99 epochInfo := &staticEpochInfo{ 100 firstView: lastCommittedEpoch.finalView + 1, 101 finalView: lastCommittedEpoch.finalView + leader.EstimatedSixMonthOfViews, 102 randomSource: lastCommittedEpoch.randomSource, 103 leaders: leaders, 104 initialCommittee: lastCommittedEpoch.initialCommittee, 105 initialCommitteeMap: lastCommittedEpoch.initialCommitteeMap, 106 weightThresholdForQC: lastCommittedEpoch.weightThresholdForQC, 107 weightThresholdForTO: lastCommittedEpoch.weightThresholdForTO, 108 dkg: lastCommittedEpoch.dkg, 109 } 110 return epochInfo, nil 111 } 112 113 // Consensus represents the main committee for consensus nodes. The consensus 114 // committee might be active for multiple successive epochs. 115 type Consensus struct { 116 state protocol.State // the protocol state 117 me flow.Identifier // the node ID of this node 118 mu sync.RWMutex // protects access to epochs 119 epochs map[uint64]*staticEpochInfo // cache of initial committee & leader selection per epoch 120 committedEpochsCh chan *flow.Header // protocol events for newly committed epochs (the first block of the epoch is passed over the channel) 121 epochEmergencyFallback chan struct{} // protocol event for epoch emergency fallback 122 isEpochFallbackHandled *atomic.Bool // ensure we only inject fallback epoch once 123 events.Noop // implements protocol.Consumer 124 component.Component 125 } 126 127 var _ protocol.Consumer = (*Consensus)(nil) 128 var _ hotstuff.Replicas = (*Consensus)(nil) 129 var _ hotstuff.DynamicCommittee = (*Consensus)(nil) 130 131 func NewConsensusCommittee(state protocol.State, me flow.Identifier) (*Consensus, error) { 132 com := &Consensus{ 133 state: state, 134 me: me, 135 epochs: make(map[uint64]*staticEpochInfo), 136 committedEpochsCh: make(chan *flow.Header, 1), 137 epochEmergencyFallback: make(chan struct{}, 1), 138 isEpochFallbackHandled: atomic.NewBool(false), 139 } 140 141 com.Component = component.NewComponentManagerBuilder(). 142 AddWorker(com.handleProtocolEvents). 143 Build() 144 145 final := state.Final() 146 147 // pre-compute leader selection for all presently relevant committed epochs 148 epochs := make([]protocol.Epoch, 0, 3) 149 // we always prepare the current epoch 150 epochs = append(epochs, final.Epochs().Current()) 151 152 // we prepare the previous epoch, if one exists 153 exists, err := protocol.PreviousEpochExists(final) 154 if err != nil { 155 return nil, fmt.Errorf("could not check previous epoch exists: %w", err) 156 } 157 if exists { 158 epochs = append(epochs, final.Epochs().Previous()) 159 } 160 161 // we prepare the next epoch, if it is committed 162 phase, err := final.Phase() 163 if err != nil { 164 return nil, fmt.Errorf("could not check epoch phase: %w", err) 165 } 166 if phase == flow.EpochPhaseCommitted { 167 epochs = append(epochs, final.Epochs().Next()) 168 } 169 170 for _, epoch := range epochs { 171 _, err = com.prepareEpoch(epoch) 172 if err != nil { 173 return nil, fmt.Errorf("could not prepare initial epochs: %w", err) 174 } 175 } 176 177 // if epoch emergency fallback was triggered, inject the fallback epoch 178 triggered, err := state.Params().EpochFallbackTriggered() 179 if err != nil { 180 return nil, fmt.Errorf("could not check epoch fallback: %w", err) 181 } 182 if triggered { 183 err = com.onEpochEmergencyFallbackTriggered() 184 if err != nil { 185 return nil, fmt.Errorf("could not prepare emergency fallback epoch: %w", err) 186 } 187 } 188 189 return com, nil 190 } 191 192 // IdentitiesByBlock returns the identities of all authorized consensus participants at the given block. 193 // The order of the identities is the canonical order. 194 // ERROR conditions: 195 // - state.ErrUnknownSnapshotReference if the blockID is for an unknown block 196 func (c *Consensus) IdentitiesByBlock(blockID flow.Identifier) (flow.IdentityList, error) { 197 il, err := c.state.AtBlockID(blockID).Identities(filter.IsVotingConsensusCommitteeMember) 198 if err != nil { 199 return nil, fmt.Errorf("could not identities at block %x: %w", blockID, err) // state.ErrUnknownSnapshotReference or exception 200 } 201 return il, nil 202 } 203 204 // IdentityByBlock returns the identity of the node with the given node ID at the given block. 205 // ERROR conditions: 206 // - model.InvalidSignerError if participantID does NOT correspond to an authorized HotStuff participant at the specified block. 207 // - state.ErrUnknownSnapshotReference if the blockID is for an unknown block 208 func (c *Consensus) IdentityByBlock(blockID flow.Identifier, nodeID flow.Identifier) (*flow.Identity, error) { 209 identity, err := c.state.AtBlockID(blockID).Identity(nodeID) 210 if err != nil { 211 if protocol.IsIdentityNotFound(err) { 212 return nil, model.NewInvalidSignerErrorf("id %v is not a valid node id: %w", nodeID, err) 213 } 214 return nil, fmt.Errorf("could not get identity for node ID %x: %w", nodeID, err) // state.ErrUnknownSnapshotReference or exception 215 } 216 if !filter.IsVotingConsensusCommitteeMember(identity) { 217 return nil, model.NewInvalidSignerErrorf("node %v is not an authorized hotstuff voting participant", nodeID) 218 } 219 return identity, nil 220 } 221 222 // IdentitiesByEpoch returns the committee identities in the epoch which contains 223 // the given view. 224 // CAUTION: This method considers epochs outside of Previous, Current, Next, w.r.t. the 225 // finalized block, to be unknown. https://github.com/onflow/flow-go/issues/4085 226 // 227 // Error returns: 228 // - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known. 229 // This is an expected error and must be handled. 230 // - unspecific error in case of unexpected problems and bugs 231 func (c *Consensus) IdentitiesByEpoch(view uint64) (flow.IdentitySkeletonList, error) { 232 epochInfo, err := c.staticEpochInfoByView(view) 233 if err != nil { 234 return nil, err 235 } 236 return epochInfo.initialCommittee, nil 237 } 238 239 // IdentityByEpoch returns the identity for the given node ID, in the epoch which 240 // contains the given view. 241 // CAUTION: This method considers epochs outside of Previous, Current, Next, w.r.t. the 242 // finalized block, to be unknown. https://github.com/onflow/flow-go/issues/4085 243 // 244 // Error returns: 245 // - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known. 246 // This is an expected error and must be handled. 247 // - model.InvalidSignerError if nodeID was not listed by the Epoch Setup event as an 248 // authorized consensus participants. 249 // - unspecific error in case of unexpected problems and bugs 250 func (c *Consensus) IdentityByEpoch(view uint64, participantID flow.Identifier) (*flow.IdentitySkeleton, error) { 251 epochInfo, err := c.staticEpochInfoByView(view) 252 if err != nil { 253 return nil, err 254 } 255 identity, ok := epochInfo.initialCommitteeMap[participantID] 256 if !ok { 257 return nil, model.NewInvalidSignerErrorf("id %v is not a valid node id", participantID) 258 } 259 return identity, nil 260 } 261 262 // LeaderForView returns the node ID of the leader for the given view. 263 // 264 // Error returns: 265 // - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known. 266 // This is an expected error and must be handled. 267 // - unspecific error in case of unexpected problems and bugs 268 func (c *Consensus) LeaderForView(view uint64) (flow.Identifier, error) { 269 270 epochInfo, err := c.staticEpochInfoByView(view) 271 if err != nil { 272 return flow.ZeroID, err 273 } 274 leaderID, err := epochInfo.leaders.LeaderForView(view) 275 if leader.IsInvalidViewError(err) { 276 // an invalid view error indicates that no leader was computed for this view 277 // this is a fatal internal error, because the view necessarily is within an 278 // epoch for which we have pre-computed leader selection 279 return flow.ZeroID, fmt.Errorf("unexpected inconsistency in epoch view spans for view %d: %v", view, err) 280 } 281 if err != nil { 282 return flow.ZeroID, err 283 } 284 return leaderID, nil 285 } 286 287 // QuorumThresholdForView returns the minimum weight required to build a valid 288 // QC in the given view. The weight threshold only changes at epoch boundaries 289 // and is computed based on the initial committee weights. 290 // 291 // Error returns: 292 // - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known. 293 // This is an expected error and must be handled. 294 // - unspecific error in case of unexpected problems and bugs 295 func (c *Consensus) QuorumThresholdForView(view uint64) (uint64, error) { 296 epochInfo, err := c.staticEpochInfoByView(view) 297 if err != nil { 298 return 0, err 299 } 300 return epochInfo.weightThresholdForQC, nil 301 } 302 303 func (c *Consensus) Self() flow.Identifier { 304 return c.me 305 } 306 307 // TimeoutThresholdForView returns the minimum weight of observed timeout objects 308 // to safely immediately timeout for the current view. The weight threshold only 309 // changes at epoch boundaries and is computed based on the initial committee weights. 310 func (c *Consensus) TimeoutThresholdForView(view uint64) (uint64, error) { 311 epochInfo, err := c.staticEpochInfoByView(view) 312 if err != nil { 313 return 0, err 314 } 315 return epochInfo.weightThresholdForTO, nil 316 } 317 318 // DKG returns the DKG for epoch which includes the given view. 319 // 320 // Error returns: 321 // - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known. 322 // This is an expected error and must be handled. 323 // - unspecific error in case of unexpected problems and bugs 324 func (c *Consensus) DKG(view uint64) (hotstuff.DKG, error) { 325 epochInfo, err := c.staticEpochInfoByView(view) 326 if err != nil { 327 return nil, err 328 } 329 return epochInfo.dkg, nil 330 } 331 332 // handleProtocolEvents processes queued Epoch events `EpochCommittedPhaseStarted` 333 // and `EpochEmergencyFallbackTriggered`. This function permanently utilizes a worker 334 // routine until the `Component` terminates. 335 // When we observe a new epoch being committed, we compute 336 // the leader selection and cache static info for the epoch. When we observe 337 // epoch emergency fallback being triggered, we inject a fallback epoch. 338 func (c *Consensus) handleProtocolEvents(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 339 ready() 340 341 for { 342 select { 343 case <-ctx.Done(): 344 return 345 case block := <-c.committedEpochsCh: 346 epoch := c.state.AtBlockID(block.ID()).Epochs().Next() 347 _, err := c.prepareEpoch(epoch) 348 if err != nil { 349 ctx.Throw(err) 350 } 351 case <-c.epochEmergencyFallback: 352 err := c.onEpochEmergencyFallbackTriggered() 353 if err != nil { 354 ctx.Throw(err) 355 } 356 } 357 } 358 } 359 360 // EpochCommittedPhaseStarted informs the `committee.Consensus` that the block starting the Epoch Committed Phase has been finalized. 361 func (c *Consensus) EpochCommittedPhaseStarted(_ uint64, first *flow.Header) { 362 c.committedEpochsCh <- first 363 } 364 365 // EpochEmergencyFallbackTriggered passes the protocol event to the worker thread. 366 func (c *Consensus) EpochEmergencyFallbackTriggered() { 367 c.epochEmergencyFallback <- struct{}{} 368 } 369 370 // onEpochEmergencyFallbackTriggered handles the protocol event for emergency epoch 371 // fallback mode being triggered. When this occurs, we inject a fallback epoch 372 // to the committee which extends the current epoch. 373 // This method must also be called on initialization, if emergency fallback mode 374 // was triggered in the past. 375 // No errors are expected during normal operation. 376 func (c *Consensus) onEpochEmergencyFallbackTriggered() error { 377 378 // we respond to epoch fallback being triggered at most once, therefore 379 // the core logic is protected by an atomic bool. 380 // although it is only valid for epoch fallback to be triggered once per spork, 381 // we must account for repeated delivery of protocol events. 382 if !c.isEpochFallbackHandled.CompareAndSwap(false, true) { 383 return nil 384 } 385 386 currentEpochCounter, err := c.state.Final().Epochs().Current().Counter() 387 if err != nil { 388 return fmt.Errorf("could not get current epoch counter: %w", err) 389 } 390 391 c.mu.RLock() 392 // sanity check: current epoch must be cached already 393 currentEpoch, ok := c.epochs[currentEpochCounter] 394 if !ok { 395 c.mu.RUnlock() 396 return fmt.Errorf("epoch fallback: could not find current epoch (counter=%d) info", currentEpochCounter) 397 } 398 // sanity check: next epoch must never be committed, therefore must not be cached 399 _, ok = c.epochs[currentEpochCounter+1] 400 c.mu.RUnlock() 401 if ok { 402 return fmt.Errorf("epoch fallback: next epoch (counter=%d) is cached contrary to expectation", currentEpochCounter+1) 403 } 404 405 fallbackEpoch, err := newEmergencyFallbackEpoch(currentEpoch) 406 if err != nil { 407 return fmt.Errorf("could not construct fallback epoch: %w", err) 408 } 409 410 // cache the epoch info 411 c.mu.Lock() 412 c.epochs[currentEpochCounter+1] = fallbackEpoch 413 c.mu.Unlock() 414 415 return nil 416 } 417 418 // staticEpochInfoByView retrieves the previously cached static epoch info for 419 // the epoch which includes the given view. If no epoch is known for the given 420 // view, we will attempt to cache the next epoch. 421 // 422 // Error returns: 423 // - model.ErrViewForUnknownEpoch if no committed epoch containing the given view is known 424 // - unspecific error in case of unexpected problems and bugs 425 func (c *Consensus) staticEpochInfoByView(view uint64) (*staticEpochInfo, error) { 426 427 // look for an epoch matching this view for which we have already pre-computed 428 // leader selection. Epochs last ~500k views, so we find the epoch here 99.99% 429 // of the time. Since epochs are long-lived and we only cache the most recent 3, 430 // this linear map iteration is inexpensive. 431 c.mu.RLock() 432 for _, epoch := range c.epochs { 433 if epoch.firstView <= view && view <= epoch.finalView { 434 c.mu.RUnlock() 435 return epoch, nil 436 } 437 } 438 c.mu.RUnlock() 439 440 return nil, model.ErrViewForUnknownEpoch 441 } 442 443 // prepareEpoch pre-computes and stores the static epoch information for the 444 // given epoch, including leader selection. Calling prepareEpoch multiple times 445 // for the same epoch returns cached static epoch information. 446 // Input must be a committed epoch. 447 // No errors are expected during normal operation. 448 func (c *Consensus) prepareEpoch(epoch protocol.Epoch) (*staticEpochInfo, error) { 449 450 counter, err := epoch.Counter() 451 if err != nil { 452 return nil, fmt.Errorf("could not get counter for epoch to prepare: %w", err) 453 } 454 455 // this is a no-op if we have already computed static info for this epoch 456 c.mu.RLock() 457 epochInfo, exists := c.epochs[counter] 458 c.mu.RUnlock() 459 if exists { 460 return epochInfo, nil 461 } 462 463 epochInfo, err = newStaticEpochInfo(epoch) 464 if err != nil { 465 return nil, fmt.Errorf("could not create static epoch info for epch %d: %w", counter, err) 466 } 467 468 // sanity check: ensure new epoch has contiguous views with the prior epoch 469 c.mu.RLock() 470 prevEpochInfo, exists := c.epochs[counter-1] 471 c.mu.RUnlock() 472 if exists { 473 if epochInfo.firstView != prevEpochInfo.finalView+1 { 474 return nil, fmt.Errorf("non-contiguous view ranges between consecutive epochs (epoch_%d=[%d,%d], epoch_%d=[%d,%d])", 475 counter-1, prevEpochInfo.firstView, prevEpochInfo.finalView, 476 counter, epochInfo.firstView, epochInfo.finalView) 477 } 478 } 479 480 // cache the epoch info 481 c.mu.Lock() 482 defer c.mu.Unlock() 483 c.epochs[counter] = epochInfo 484 // now prune any old epochs, if we have exceeded our maximum of 3 485 // if we have fewer than 3 epochs, this is a no-op 486 c.pruneEpochInfo() 487 return epochInfo, nil 488 } 489 490 // pruneEpochInfo removes any epochs older than the most recent 3. 491 // NOTE: Not safe for concurrent use - the caller must first acquire the lock. 492 func (c *Consensus) pruneEpochInfo() { 493 // find the maximum counter, including the epoch we just computed 494 max := uint64(0) 495 for counter := range c.epochs { 496 if counter > max { 497 max = counter 498 } 499 } 500 501 // remove any epochs which aren't within the most recent 3 502 for counter := range c.epochs { 503 if counter+3 <= max { 504 delete(c.epochs, counter) 505 } 506 } 507 }