github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/consensus/hotstuff/timeoutcollector/timeout_processor.go (about) 1 package timeoutcollector 2 3 import ( 4 "errors" 5 "fmt" 6 7 "github.com/rs/zerolog" 8 "go.uber.org/atomic" 9 "golang.org/x/exp/slices" 10 11 "github.com/onflow/flow-go/consensus/hotstuff" 12 "github.com/onflow/flow-go/consensus/hotstuff/model" 13 "github.com/onflow/flow-go/consensus/hotstuff/tracker" 14 "github.com/onflow/flow-go/model/flow" 15 "github.com/onflow/flow-go/module/signature" 16 ) 17 18 // accumulatedWeightTracker tracks one-time event of reaching required weight 19 // Uses atomic flag to guarantee concurrency safety. 20 type accumulatedWeightTracker struct { 21 minRequiredWeight uint64 22 done atomic.Bool 23 } 24 25 func (t *accumulatedWeightTracker) Done() bool { 26 return t.done.Load() 27 } 28 29 // Track returns true if `weight` reaches or exceeds `minRequiredWeight` for the _first time_. 30 // All subsequent calls of `Track` (with any value) return false. 31 func (t *accumulatedWeightTracker) Track(weight uint64) bool { 32 if weight < t.minRequiredWeight { 33 return false 34 } 35 return t.done.CompareAndSwap(false, true) 36 } 37 38 // TimeoutProcessor implements the hotstuff.TimeoutProcessor interface. 39 // It processes timeout objects broadcast by other replicas of the consensus committee. 40 // TimeoutProcessor collects TOs for one view, eventually when enough timeout objects are contributed 41 // TimeoutProcessor will create a timeout certificate which can be used to advance round. 42 // Concurrency safe. 43 type TimeoutProcessor struct { 44 log zerolog.Logger 45 view uint64 46 validator hotstuff.Validator 47 committee hotstuff.Replicas 48 sigAggregator hotstuff.TimeoutSignatureAggregator 49 notifier hotstuff.TimeoutCollectorConsumer 50 partialTCTracker accumulatedWeightTracker 51 tcTracker accumulatedWeightTracker 52 newestQCTracker *tracker.NewestQCTracker 53 } 54 55 var _ hotstuff.TimeoutProcessor = (*TimeoutProcessor)(nil) 56 57 // NewTimeoutProcessor creates new instance of TimeoutProcessor 58 // Returns the following expected errors for invalid inputs: 59 // - model.ErrViewForUnknownEpoch if no epoch containing the given view is known 60 // 61 // All other errors should be treated as exceptions. 62 func NewTimeoutProcessor(log zerolog.Logger, 63 committee hotstuff.Replicas, 64 validator hotstuff.Validator, 65 sigAggregator hotstuff.TimeoutSignatureAggregator, 66 notifier hotstuff.TimeoutCollectorConsumer, 67 ) (*TimeoutProcessor, error) { 68 view := sigAggregator.View() 69 qcThreshold, err := committee.QuorumThresholdForView(view) 70 if err != nil { 71 return nil, fmt.Errorf("could not retrieve QC weight threshold for view %d: %w", view, err) 72 } 73 timeoutThreshold, err := committee.TimeoutThresholdForView(view) 74 if err != nil { 75 return nil, fmt.Errorf("could not retrieve timeout weight threshold for view %d: %w", view, err) 76 } 77 return &TimeoutProcessor{ 78 log: log.With(). 79 Str("component", "hotstuff.timeout_processor"). 80 Uint64("view", view). 81 Logger(), 82 view: view, 83 committee: committee, 84 validator: validator, 85 notifier: notifier, 86 partialTCTracker: accumulatedWeightTracker{ 87 minRequiredWeight: timeoutThreshold, 88 done: *atomic.NewBool(false), 89 }, 90 tcTracker: accumulatedWeightTracker{ 91 minRequiredWeight: qcThreshold, 92 done: *atomic.NewBool(false), 93 }, 94 sigAggregator: sigAggregator, 95 newestQCTracker: tracker.NewNewestQCTracker(), 96 }, nil 97 } 98 99 // Process performs processing of timeout object in concurrent safe way. This 100 // function is implemented to be called by multiple goroutines at the same time. 101 // Design of this function is event driven, as soon as we collect enough weight 102 // to create a TC or a partial TC we will immediately do so and submit it 103 // via callback for further processing. 104 // Expected error returns during normal operations: 105 // - ErrTimeoutForIncompatibleView - submitted timeout for incompatible view 106 // - model.InvalidTimeoutError - submitted invalid timeout(invalid structure or invalid signature) 107 // - model.DuplicatedSignerError if a timeout from the same signer was previously already added 108 // It does _not necessarily_ imply that the timeout is invalid or the sender is equivocating. 109 // 110 // All other errors should be treated as exceptions. 111 func (p *TimeoutProcessor) Process(timeout *model.TimeoutObject) error { 112 if p.view != timeout.View { 113 return fmt.Errorf("received incompatible timeout, expected %d got %d: %w", p.view, timeout.View, ErrTimeoutForIncompatibleView) 114 } 115 116 if p.tcTracker.Done() { 117 return nil 118 } 119 120 err := p.validateTimeout(timeout) 121 if err != nil { 122 return fmt.Errorf("validating timeout failed: %w", err) 123 } 124 if p.tcTracker.Done() { 125 return nil 126 } 127 128 // CAUTION: for correctness it is critical that we update the `newestQCTracker` first, _before_ we add the 129 // TO's signature to `sigAggregator`. Reasoning: 130 // * For a valid TC, we require that the TC includes a QC with view ≥ max{TC.NewestQCViews}. 131 // * The `NewestQCViews` is maintained by `sigAggregator`. 132 // * Hence, for any view `v ∈ NewestQCViews` that `sigAggregator` knows, a QC with equal or larger view is 133 // known to `newestQCTracker`. This is guaranteed if and only if `newestQCTracker` is updated first. 134 p.newestQCTracker.Track(timeout.NewestQC) 135 136 totalWeight, err := p.sigAggregator.VerifyAndAdd(timeout.SignerID, timeout.SigData, timeout.NewestQC.View) 137 if err != nil { 138 if model.IsInvalidSignerError(err) { 139 return model.NewInvalidTimeoutErrorf(timeout, "invalid signer for timeout: %w", err) 140 } 141 if errors.Is(err, model.ErrInvalidSignature) { 142 return model.NewInvalidTimeoutErrorf(timeout, "timeout is from valid signer but has cryptographically invalid signature: %w", err) 143 } 144 // model.DuplicatedSignerError is an expected error and just bubbled up the call stack. 145 // It does _not necessarily_ imply that the timeout is invalid or the sender is equivocating. 146 return fmt.Errorf("adding signature to aggregator failed: %w", err) 147 } 148 p.log.Debug().Msgf("processed timeout, total weight=(%d), required=(%d)", totalWeight, p.tcTracker.minRequiredWeight) 149 150 if p.partialTCTracker.Track(totalWeight) { 151 p.notifier.OnPartialTcCreated(p.view, p.newestQCTracker.NewestQC(), timeout.LastViewTC) 152 } 153 154 // Checking of conditions for building TC are satisfied when willBuildTC is true. 155 // At this point, we have enough signatures to build a TC. Another routine 156 // might just be at this point. To avoid duplicate work, Track returns true only once. 157 willBuildTC := p.tcTracker.Track(totalWeight) 158 if !willBuildTC { 159 // either we do not have enough timeouts to build a TC, or another thread 160 // has already passed this gate and created a TC 161 return nil 162 } 163 164 tc, err := p.buildTC() 165 if err != nil { 166 return fmt.Errorf("internal error constructing TC: %w", err) 167 } 168 p.notifier.OnTcConstructedFromTimeouts(tc) 169 170 return nil 171 } 172 173 // validateTimeout performs validation of timeout object, verifies if timeout is correctly structured 174 // and included QC and TC is correctly structured and signed. 175 // ATTENTION: this function does _not_ check whether the TO's `SignerID` is an authorized node nor if 176 // the signature is valid. These checks happen in signature aggregator. 177 // Expected error returns during normal operations: 178 // * model.InvalidTimeoutError - submitted invalid timeout 179 // All other errors should be treated as exceptions. 180 func (p *TimeoutProcessor) validateTimeout(timeout *model.TimeoutObject) error { 181 // 1. check if it's correctly structured 182 // (a) Every TO must contain a QC 183 if timeout.NewestQC == nil { 184 return model.NewInvalidTimeoutErrorf(timeout, "TimeoutObject without QC is invalid") 185 } 186 187 if timeout.View <= timeout.NewestQC.View { 188 return model.NewInvalidTimeoutErrorf(timeout, "TO's QC %d cannot be newer than the TO's view %d", 189 timeout.NewestQC.View, timeout.View) 190 } 191 192 // (b) If a TC is included, the TC must be for the past round, no matter whether a QC 193 // for the last round is also included. In some edge cases, a node might observe 194 // _both_ QC and TC for the previous round, in which case it can include both. 195 if timeout.LastViewTC != nil { 196 if timeout.View != timeout.LastViewTC.View+1 { 197 return model.NewInvalidTimeoutErrorf(timeout, "invalid TC for non-previous view, expected view %d, got view %d", timeout.View-1, timeout.LastViewTC.View) 198 } 199 if timeout.NewestQC.View < timeout.LastViewTC.NewestQC.View { 200 return model.NewInvalidTimeoutErrorf(timeout, "timeout.NewestQC is older (view=%d) than the QC in timeout.LastViewTC (view=%d)", timeout.NewestQC.View, timeout.LastViewTC.NewestQC.View) 201 } 202 } 203 // (c) The TO must contain a proof that sender legitimately entered timeout.View. Transitioning 204 // to round timeout.View is possible either by observing a QC or a TC for the previous round. 205 // If no QC is included, we require a TC to be present, which by check (1b) must be for 206 // the previous round. 207 lastViewSuccessful := timeout.View == timeout.NewestQC.View+1 208 if !lastViewSuccessful { 209 // The TO's sender did _not_ observe a QC for round timeout.View-1. Hence, it should 210 // include a TC for the previous round. Otherwise, the TO is invalid. 211 if timeout.LastViewTC == nil { 212 return model.NewInvalidTimeoutErrorf(timeout, "timeout must include TC") 213 } 214 } 215 216 // 2. Check if QC is valid 217 err := p.validator.ValidateQC(timeout.NewestQC) 218 if err != nil { 219 if model.IsInvalidQCError(err) { 220 return model.NewInvalidTimeoutErrorf(timeout, "included QC is invalid: %w", err) 221 } 222 if errors.Is(err, model.ErrViewForUnknownEpoch) { 223 // We require each replica to be bootstrapped with a QC pointing to a finalized block. Therefore, we should know the 224 // Epoch for any QC.View and TC.View we encounter. Receiving a `model.ErrViewForUnknownEpoch` is conceptually impossible, 225 // i.e. a symptom of an internal bug or invalid bootstrapping information. 226 return fmt.Errorf("no Epoch information availalbe for QC that was included in TO; symptom of internal bug or invalid bootstrapping information: %s", err.Error()) 227 } 228 return fmt.Errorf("unexpected error when validating QC: %w", err) 229 } 230 231 // 3. If TC is included, it must be valid 232 if timeout.LastViewTC != nil { 233 err = p.validator.ValidateTC(timeout.LastViewTC) 234 if err != nil { 235 if model.IsInvalidTCError(err) { 236 return model.NewInvalidTimeoutErrorf(timeout, "included TC is invalid: %w", err) 237 } 238 if errors.Is(err, model.ErrViewForUnknownEpoch) { 239 // We require each replica to be bootstrapped with a QC pointing to a finalized block. Therefore, we should know the 240 // Epoch for any QC.View and TC.View we encounter. Receiving a `model.ErrViewForUnknownEpoch` is conceptually impossible, 241 // i.e. a symptom of an internal bug or invalid bootstrapping information. 242 return fmt.Errorf("no Epoch information availalbe for TC that was included in TO; symptom of internal bug or invalid bootstrapping information: %s", err.Error()) 243 } 244 return fmt.Errorf("unexpected error when validating TC: %w", err) 245 } 246 } 247 return nil 248 249 } 250 251 // buildTC performs aggregation of signatures when we have collected enough 252 // weight for building TC. This function is run only once by single worker. 253 // Any error should be treated as exception. 254 func (p *TimeoutProcessor) buildTC() (*flow.TimeoutCertificate, error) { 255 signersData, aggregatedSig, err := p.sigAggregator.Aggregate() 256 if err != nil { 257 return nil, fmt.Errorf("could not aggregate multi message signature: %w", err) 258 } 259 260 // IMPORTANT: To properly verify an aggregated signature included in TC we need to provide list of signers with corresponding 261 // messages(`TimeoutCertificate.NewestQCViews`) for each signer. If the one-to-once correspondence of view and signer is not maintained, 262 // it won't be possible to verify the aggregated signature. 263 // Aggregate returns an unordered set of signers together with additional data. 264 // Due to implementation specifics of signer indices, the decoding step results in canonically ordered signer ids, which means 265 // we need to canonically order the respective `newestQCView`, so we can properly map signer to `newestQCView` after decoding. 266 267 // sort data in canonical order 268 slices.SortFunc(signersData, func(lhs, rhs hotstuff.TimeoutSignerInfo) int { 269 return flow.IdentifierCanonical(lhs.Signer, rhs.Signer) 270 }) 271 272 // extract signers and data separately 273 signers := make([]flow.Identifier, 0, len(signersData)) 274 newestQCViews := make([]uint64, 0, len(signersData)) 275 for _, data := range signersData { 276 signers = append(signers, data.Signer) 277 newestQCViews = append(newestQCViews, data.NewestQCView) 278 } 279 280 signerIndices, err := p.signerIndicesFromIdentities(signers) 281 if err != nil { 282 return nil, fmt.Errorf("could not encode signer indices: %w", err) 283 } 284 285 // Note that `newestQC` can have a larger view than any of the views included in `newestQCViews`. 286 // This is because for a TO currently being processes following two operations are executed in separate steps: 287 // * updating the `newestQCTracker` with the QC from the TO 288 // * adding the TO's signature to `sigAggregator` 289 // Therefore, races are possible, where the `newestQCTracker` already knows of a QC with larger view 290 // than the data stored in `sigAggregator`. 291 newestQC := p.newestQCTracker.NewestQC() 292 293 return &flow.TimeoutCertificate{ 294 View: p.view, 295 NewestQCViews: newestQCViews, 296 NewestQC: newestQC, 297 SignerIndices: signerIndices, 298 SigData: aggregatedSig, 299 }, nil 300 } 301 302 // signerIndicesFromIdentities encodes identities into signer indices. 303 // Any error should be treated as exception. 304 func (p *TimeoutProcessor) signerIndicesFromIdentities(signerIDs flow.IdentifierList) ([]byte, error) { 305 allIdentities, err := p.committee.IdentitiesByEpoch(p.view) 306 if err != nil { 307 return nil, fmt.Errorf("could not retrieve identities for view %d: %w", p.view, err) 308 } 309 signerIndices, err := signature.EncodeSignersToIndices(allIdentities.NodeIDs(), signerIDs) 310 if err != nil { 311 return nil, fmt.Errorf("could not encode signer identifiers to indices: %w", err) 312 } 313 return signerIndices, nil 314 }