github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/consensus/hotstuff/safetyrules/safety_rules.go (about) 1 package safetyrules 2 3 import ( 4 "fmt" 5 6 "github.com/onflow/flow-go/consensus/hotstuff" 7 "github.com/onflow/flow-go/consensus/hotstuff/model" 8 "github.com/onflow/flow-go/model/flow" 9 ) 10 11 // SafetyRules is a dedicated module that enforces consensus safety. This component has the sole authority to generate 12 // votes and timeouts. It follows voting and timeout rules for creating votes and timeouts respectively. 13 // Caller can be sure that created vote or timeout doesn't break safety and can be used in consensus process. 14 // SafetyRules relies on hotstuff.Persister to store latest state of hotstuff.SafetyData. 15 // 16 // The voting rules implemented by SafetyRules are: 17 // 1. Replicas vote strictly in increasing rounds 18 // 2. Each block has to include a TC or a QC from the previous round. 19 // a. [Happy path] If the previous round resulted in a QC then new QC should extend it. 20 // b. [Recovery path] If the previous round did *not* result in a QC, the leader of the 21 // subsequent round *must* include a valid TC for the previous round in its block. 22 // 23 // NOT safe for concurrent use. 24 type SafetyRules struct { 25 signer hotstuff.Signer 26 persist hotstuff.Persister 27 committee hotstuff.DynamicCommittee // only produce votes when we are valid committee members 28 safetyData *hotstuff.SafetyData 29 } 30 31 var _ hotstuff.SafetyRules = (*SafetyRules)(nil) 32 33 // New creates a new SafetyRules instance 34 func New( 35 signer hotstuff.Signer, 36 persist hotstuff.Persister, 37 committee hotstuff.DynamicCommittee, 38 ) (*SafetyRules, error) { 39 // get the last stored safety data 40 safetyData, err := persist.GetSafetyData() 41 if err != nil { 42 return nil, fmt.Errorf("could not recover safety data: %w", err) 43 } 44 45 return &SafetyRules{ 46 signer: signer, 47 persist: persist, 48 committee: committee, 49 safetyData: safetyData, 50 }, nil 51 } 52 53 // ProduceVote will make a decision on whether it will vote for the given proposal, the returned 54 // error indicates whether to vote or not. 55 // To ensure that only safe proposals are being voted on, we check that the proposer is a valid committee member and that the 56 // proposal complies with voting rules. 57 // We expect that only well-formed proposals with valid signatures are submitted for voting. 58 // The curView is taken as input to ensure SafetyRules will only vote for proposals at current view and prevent double voting. 59 // Returns: 60 // - (vote, nil): On the _first_ block for the current view that is safe to vote for. 61 // Subsequently, voter does _not_ vote for any other block with the same (or lower) view. 62 // - (nil, model.NoVoteError): If the voter decides that it does not want to vote for the given block. 63 // This is a sentinel error and _expected_ during normal operation. 64 // 65 // All other errors are unexpected and potential symptoms of uncovered edge cases or corrupted internal state (fatal). 66 func (r *SafetyRules) ProduceVote(proposal *model.Proposal, curView uint64) (*model.Vote, error) { 67 block := proposal.Block 68 // sanity checks: 69 if curView != block.View { 70 return nil, fmt.Errorf("expecting block for current view %d, but block's view is %d", curView, block.View) 71 } 72 73 err := r.IsSafeToVote(proposal) 74 if err != nil { 75 return nil, fmt.Errorf("not safe to vote for proposal %x: %w", proposal.Block.BlockID, err) 76 } 77 78 // we expect that only valid proposals are submitted for voting 79 // we need to make sure that proposer is not ejected to decide to vote or not 80 _, err = r.committee.IdentityByBlock(block.BlockID, block.ProposerID) 81 if model.IsInvalidSignerError(err) { 82 // the proposer must be ejected since the proposal has already been validated, 83 // which ensures that the proposer was a valid committee member at the start of the epoch 84 return nil, model.NewNoVoteErrorf("proposer ejected: %w", err) 85 } 86 if err != nil { 87 return nil, fmt.Errorf("internal error retrieving Identity of proposer %x at block %x: %w", block.ProposerID, block.BlockID, err) 88 } 89 90 // Do not produce a vote for blocks where we are not a valid committee member. 91 // HotStuff will ask for a vote for the first block of the next epoch, even if we 92 // have zero weight in the next epoch. Such vote can't be used to produce valid QCs. 93 _, err = r.committee.IdentityByBlock(block.BlockID, r.committee.Self()) 94 if model.IsInvalidSignerError(err) { 95 return nil, model.NewNoVoteErrorf("I am not authorized to vote for block %x: %w", block.BlockID, err) 96 } 97 if err != nil { 98 return nil, fmt.Errorf("could not get self identity: %w", err) 99 } 100 101 vote, err := r.signer.CreateVote(block) 102 if err != nil { 103 return nil, fmt.Errorf("could not vote for block: %w", err) 104 } 105 106 // vote for the current view has been produced, update safetyData 107 r.safetyData.HighestAcknowledgedView = curView 108 if r.safetyData.LockedOneChainView < block.QC.View { 109 r.safetyData.LockedOneChainView = block.QC.View 110 } 111 112 err = r.persist.PutSafetyData(r.safetyData) 113 if err != nil { 114 return nil, fmt.Errorf("could not persist safety data: %w", err) 115 } 116 117 return vote, nil 118 } 119 120 // ProduceTimeout takes current view, highest locally known QC and TC (optional, must be nil if and 121 // only if QC is for previous view) and decides whether to produce timeout for current view. 122 // Returns: 123 // - (timeout, nil): It is safe to timeout for current view using newestQC and lastViewTC. 124 // - (nil, model.NoTimeoutError): If replica is not part of the authorized consensus committee (anymore) and 125 // therefore is not authorized to produce a valid timeout object. This sentinel error is _expected_ during 126 // normal operation, e.g. during the grace-period after Epoch switchover or after the replica self-ejected. 127 // 128 // All other errors are unexpected and potential symptoms of uncovered edge cases or corrupted internal state (fatal). 129 func (r *SafetyRules) ProduceTimeout(curView uint64, newestQC *flow.QuorumCertificate, lastViewTC *flow.TimeoutCertificate) (*model.TimeoutObject, error) { 130 lastTimeout := r.safetyData.LastTimeout 131 if lastTimeout != nil && lastTimeout.View == curView { 132 // model.TimeoutObject are conceptually immutable, hence we create a shallow copy here, which allows us to increment TimeoutTick 133 updatedTimeout := *lastTimeout 134 updatedTimeout.TimeoutTick += 1 135 136 // persist updated TimeoutObject in `safetyData` and return it 137 r.safetyData.LastTimeout = &updatedTimeout 138 err := r.persist.PutSafetyData(r.safetyData) 139 if err != nil { 140 return nil, fmt.Errorf("could not persist safety data: %w", err) 141 } 142 return r.safetyData.LastTimeout, nil 143 } 144 145 err := r.IsSafeToTimeout(curView, newestQC, lastViewTC) 146 if err != nil { 147 return nil, fmt.Errorf("local, trusted inputs failed safety rules: %w", err) 148 } 149 150 // Do not produce a timeout for view where we are not a valid committee member. 151 _, err = r.committee.IdentityByEpoch(curView, r.committee.Self()) 152 if err != nil { 153 if model.IsInvalidSignerError(err) { 154 return nil, model.NewNoTimeoutErrorf("I am not authorized to timeout for view %d: %w", curView, err) 155 } 156 return nil, fmt.Errorf("could not get self identity: %w", err) 157 } 158 159 timeout, err := r.signer.CreateTimeout(curView, newestQC, lastViewTC) 160 if err != nil { 161 return nil, fmt.Errorf("could not create timeout at view %d: %w", curView, err) 162 } 163 164 r.safetyData.HighestAcknowledgedView = curView 165 r.safetyData.LastTimeout = timeout 166 167 err = r.persist.PutSafetyData(r.safetyData) 168 if err != nil { 169 return nil, fmt.Errorf("could not persist safety data: %w", err) 170 } 171 172 return timeout, nil 173 } 174 175 // IsSafeToVote checks if this proposal is valid in terms of voting rules, if voting for this proposal won't break safety rules. 176 // Expected errors during normal operations: 177 // - NoVoteError if replica already acted during this view (either voted or generated timeout) 178 func (r *SafetyRules) IsSafeToVote(proposal *model.Proposal) error { 179 blockView := proposal.Block.View 180 181 err := r.validateEvidenceForEnteringView(blockView, proposal.Block.QC, proposal.LastViewTC) 182 if err != nil { 183 // As we are expecting the blocks to be pre-validated, any failure here is a symptom of an internal bug. 184 return fmt.Errorf("proposal failed consensus validity check") 185 } 186 187 // This check satisfies voting rule 1 188 // 1. Replicas vote strictly in increasing rounds, 189 // block's view must be greater than the view that we have voted for 190 acView := r.safetyData.HighestAcknowledgedView 191 if blockView == acView { 192 return model.NewNoVoteErrorf("already voted or generated timeout in view %d", blockView) 193 } 194 if blockView < acView { 195 return fmt.Errorf("already acted during view %d but got proposal for lower view %d", acView, blockView) 196 } 197 198 return nil 199 } 200 201 // IsSafeToTimeout checks if it's safe to timeout with proposed data, i.e. timing out won't break safety. 202 // newestQC is the valid QC with the greatest view that we have observed. 203 // lastViewTC is the TC for the previous view (might be nil). 204 // 205 // When generating a timeout, the inputs are provided by node-internal components. Failure to comply with 206 // the protocol is a symptom of an internal bug. We don't expect any errors during normal operations. 207 func (r *SafetyRules) IsSafeToTimeout(curView uint64, newestQC *flow.QuorumCertificate, lastViewTC *flow.TimeoutCertificate) error { 208 err := r.validateEvidenceForEnteringView(curView, newestQC, lastViewTC) 209 if err != nil { 210 return fmt.Errorf("not safe to timeout: %w", err) 211 } 212 213 if newestQC.View < r.safetyData.LockedOneChainView { 214 return fmt.Errorf("have already seen QC for view %d, but newest QC is reported to be for view %d", r.safetyData.LockedOneChainView, newestQC.View) 215 } 216 if curView+1 <= r.safetyData.HighestAcknowledgedView { 217 return fmt.Errorf("cannot generate timeout for past view %d", curView) 218 } 219 // the logic for rejecting inputs with `curView <= newestQC.View` is already contained 220 // in `validateEvidenceForEnteringView(..)`, because it only passes if 221 // * either `curView == newestQC.View + 1` (condition 2) 222 // * or `curView > newestQC.View` (condition 4) 223 224 return nil 225 } 226 227 // validateEvidenceForEnteringView performs the following check that is fundamental for consensus safety: 228 // Whenever a replica acts within a view, it must prove that is has sufficient evidence to enter this view 229 // Specifically: 230 // 1. The replica must always provide a QC and optionally a TC. 231 // 2. [Happy Path] If the previous round (i.e. `view -1`) resulted in a QC, the replica is allowed to transition to `view`. 232 // The QC from the previous round provides sufficient evidence. Furthermore, to prevent resource-exhaustion attacks, 233 // we require that no TC is included as part of the proof. 234 // 3. Following the Happy Path has priority over following the Recovery Path (specified below). 235 // 4. [Recovery Path] If the previous round (i.e. `view -1`) did *not* result in a QC, a TC from the previous round 236 // is required to transition to `view`. The following additional consistency requirements have to be satisfied: 237 // (a) newestQC.View + 1 < view 238 // Otherwise, the replica has violated condition 3 (in case newestQC.View + 1 = view); or the replica 239 // failed to apply condition 2 (in case newestQC.View + 1 > view). 240 // (b) newestQC.View ≥ lastViewTC.NewestQC.View 241 // Otherwise, the replica has violated condition 3. 242 // 243 // SafetyRules has the sole signing authority and enforces adherence to these conditions. In order to generate valid 244 // consensus signatures, the replica must provide the respective evidence (required QC + optional TC) to its 245 // internal SafetyRules component for each consensus action that the replica wants to take: 246 // - primary signing its own proposal 247 // - replica voting for a block 248 // - replica generating a timeout message 249 // 250 // During normal operations, no errors are expected: 251 // - As we are expecting the blocks to be pre-validated, any failure here is a symptom of an internal bug. 252 // - When generating a timeout, the inputs are provided by node-internal components. Failure to comply with 253 // the protocol is a symptom of an internal bug. 254 func (r *SafetyRules) validateEvidenceForEnteringView(view uint64, newestQC *flow.QuorumCertificate, lastViewTC *flow.TimeoutCertificate) error { 255 // Condition 1: 256 if newestQC == nil { 257 return fmt.Errorf("missing the mandatory QC") 258 } 259 260 // Condition 2: 261 if newestQC.View+1 == view { 262 if lastViewTC != nil { 263 return fmt.Errorf("when QC is for prior round, no TC should be provided") 264 } 265 return nil 266 } 267 // Condition 3: if we reach the following lines, the happy path is not satisfied. 268 269 // Condition 4: 270 if lastViewTC == nil { 271 return fmt.Errorf("expecting TC because QC is not for prior view; but didn't get any TC") 272 } 273 if lastViewTC.View+1 != view { 274 return fmt.Errorf("neither QC (view %d) nor TC (view %d) allows to transition to view %d", newestQC.View, lastViewTC.View, view) 275 } 276 if newestQC.View >= view { 277 // Note: we need to enforce here that `newestQC.View + 1 < view`, i.e. we error for `newestQC.View+1 >= view` 278 // However, `newestQC.View+1 == view` is impossible, because otherwise we would have walked into condition 2. 279 // Hence, it suffices to error if `newestQC.View+1 > view`, which is identical to `newestQC.View >= view` 280 return fmt.Errorf("still at view %d, despite knowing a QC for view %d", view, newestQC.View) 281 } 282 if newestQC.View < lastViewTC.NewestQC.View { 283 return fmt.Errorf("failed to update newest QC (still at view %d) despite a newer QC (view %d) being included in TC", newestQC.View, lastViewTC.NewestQC.View) 284 } 285 286 return nil 287 }