github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/consensus/approvals/assignment_collector_statemachine.go (about) 1 package approvals 2 3 import ( 4 "errors" 5 "fmt" 6 "sync" 7 8 "go.uber.org/atomic" 9 10 "github.com/onflow/flow-go/engine" 11 "github.com/onflow/flow-go/engine/consensus" 12 "github.com/onflow/flow-go/model/flow" 13 ) 14 15 var ( 16 ErrInvalidCollectorStateTransition = errors.New("invalid state transition") 17 ErrDifferentCollectorState = errors.New("different state") 18 ) 19 20 // AssignmentCollectorStateMachine implements the `AssignmentCollector` interface. 21 // It wraps the current `AssignmentCollectorState` and provides logic for state transitions. 22 // Any state-specific logic is delegated to the state-specific instance. 23 // AssignmentCollectorStateMachine is fully concurrent. 24 // 25 // Comment on concurrency safety for state-specific business logic: 26 // - AssignmentCollectorStateMachine processes state updates concurrently with 27 // state-specific business logic. Hence, it can happen that we update a stale 28 // state. 29 // - To guarantee that we hand inputs to the latest state, we employ a 30 // "Compare And Repeat Pattern": we atomically read the state before and after the 31 // operation. If the state changed, we updated a stale state. We repeat until 32 // we confirm that the latest state was updated. 33 type AssignmentCollectorStateMachine struct { 34 AssignmentCollectorBase 35 36 // collector references the assignment collector in its current state. The value is 37 // frequently read, but infrequently updated. Reads are atomic and therefore concurrency 38 // safe. For state updates (write), we use a mutex to guarantee that a state update is 39 // always based on the most recent value. 40 collector atomic.Value 41 sync.Mutex 42 } 43 44 func (asm *AssignmentCollectorStateMachine) atomicLoadCollector() AssignmentCollectorState { 45 return asm.collector.Load().(*atomicValueWrapper).collector 46 } 47 48 // atomic.Value doesn't allow storing interfaces as atomic values, 49 // it requires that stored type is always the same so we need a wrapper that will mitigate this restriction 50 // https://github.com/golang/go/issues/22550 51 type atomicValueWrapper struct { 52 collector AssignmentCollectorState 53 } 54 55 func NewAssignmentCollectorStateMachine(collectorBase AssignmentCollectorBase) *AssignmentCollectorStateMachine { 56 sm := &AssignmentCollectorStateMachine{ 57 AssignmentCollectorBase: collectorBase, 58 } 59 60 // by default start with caching collector 61 sm.collector.Store(&atomicValueWrapper{ 62 collector: NewCachingAssignmentCollector(collectorBase), 63 }) 64 return sm 65 } 66 67 // ProcessIncorporatedResult starts tracking the approval for IncorporatedResult. 68 // Method is idempotent. 69 // Error Returns: 70 // - no errors expected during normal operation; 71 // errors might be symptoms of bugs or internal state corruption (fatal) 72 func (asm *AssignmentCollectorStateMachine) ProcessIncorporatedResult(incorporatedResult *flow.IncorporatedResult) error { 73 for { // Compare And Repeat if state update occurred concurrently 74 collector := asm.atomicLoadCollector() 75 currentState := collector.ProcessingStatus() 76 err := collector.ProcessIncorporatedResult(incorporatedResult) 77 if err != nil { 78 return fmt.Errorf("could not process incorporated result %v: %w", incorporatedResult.ID(), err) 79 } 80 if currentState != asm.ProcessingStatus() { 81 continue 82 } 83 return nil 84 } 85 } 86 87 // ProcessApproval ingests Result Approvals and triggers sealing of execution result 88 // when sufficient approvals have arrived. 89 // Error Returns: 90 // - nil in case of success (outdated approvals might be silently discarded) 91 // - engine.InvalidInputError if the result approval is invalid 92 // - any other errors might be symptoms of bugs or internal state corruption (fatal) 93 func (asm *AssignmentCollectorStateMachine) ProcessApproval(approval *flow.ResultApproval) error { 94 for { // Compare And Repeat if state update occurred concurrently 95 collector := asm.atomicLoadCollector() 96 currentState := collector.ProcessingStatus() 97 err := collector.ProcessApproval(approval) 98 if err != nil { 99 return fmt.Errorf("could not process approval %v: %w", approval.ID(), err) 100 } 101 if currentState != asm.ProcessingStatus() { 102 continue 103 } 104 return nil 105 } 106 } 107 108 // CheckEmergencySealing checks whether this AssignmentCollector can be emergency 109 // sealed. If this is the case, the AssignmentCollector produces a candidate seal 110 // as part of this method call. No errors are expected during normal operations. 111 func (asm *AssignmentCollectorStateMachine) CheckEmergencySealing(observer consensus.SealingObservation, finalizedBlockHeight uint64) error { 112 collector := asm.atomicLoadCollector() 113 return collector.CheckEmergencySealing(observer, finalizedBlockHeight) 114 } 115 116 // RequestMissingApprovals sends requests for missing approvals to the respective 117 // verification nodes. Returns number of requests made. No errors are expected 118 // during normal operations. 119 func (asm *AssignmentCollectorStateMachine) RequestMissingApprovals(observer consensus.SealingObservation, maxHeightForRequesting uint64) (uint, error) { 120 collector := asm.atomicLoadCollector() 121 return collector.RequestMissingApprovals(observer, maxHeightForRequesting) 122 } 123 124 // ProcessingStatus returns the AssignmentCollector's ProcessingStatus (state descriptor). 125 func (asm *AssignmentCollectorStateMachine) ProcessingStatus() ProcessingStatus { 126 collector := asm.atomicLoadCollector() 127 return collector.ProcessingStatus() 128 } 129 130 // ChangeProcessingStatus changes the AssignmentCollector's internal processing 131 // status. The operation is implemented as an atomic compare-and-swap, i.e. the 132 // state transition is only executed if AssignmentCollector's internal state is 133 // equal to `expectedValue`. The return indicates whether the state was updated. 134 // The implementation only allows the following transitions: 135 // 136 // CachingApprovals -> VerifyingApprovals 137 // CachingApprovals -> Orphaned 138 // VerifyingApprovals -> Orphaned 139 // 140 // Error returns: 141 // * nil if the state transition was successfully executed 142 // * ErrDifferentCollectorState if the AssignmentCollector's state is different than expectedCurrentStatus 143 // * ErrInvalidCollectorStateTransition if the given state transition is impossible 144 // * all other errors are unexpected and potential symptoms of internal bugs or state corruption (fatal) 145 func (asm *AssignmentCollectorStateMachine) ChangeProcessingStatus(expectedCurrentStatus, newStatus ProcessingStatus) error { 146 // don't transition between same states 147 if expectedCurrentStatus == newStatus { 148 return nil 149 } 150 151 // state transition: VerifyingApprovals -> Orphaned 152 if (expectedCurrentStatus == VerifyingApprovals) && (newStatus == Orphaned) { 153 _, err := asm.verifying2Orphaned() 154 if err != nil { 155 return fmt.Errorf("failed to transistion AssignmentCollector from %s to %s: %w", expectedCurrentStatus.String(), newStatus.String(), err) 156 } 157 return nil 158 } 159 160 // state transition: CachingApprovals -> Orphaned 161 if (expectedCurrentStatus == CachingApprovals) && (newStatus == Orphaned) { 162 _, err := asm.caching2Orphaned() 163 if err != nil { 164 return fmt.Errorf("failed to transistion AssignmentCollector from %s to %s: %w", expectedCurrentStatus.String(), newStatus.String(), err) 165 } 166 return nil 167 } 168 169 // state transition: CachingApprovals -> VerifyingApprovals 170 if (expectedCurrentStatus == CachingApprovals) && (newStatus == VerifyingApprovals) { 171 cachingCollector, err := asm.caching2Verifying() 172 if err != nil { 173 return fmt.Errorf("failed to transistion AssignmentCollector from %s to %s: %w", expectedCurrentStatus.String(), newStatus.String(), err) 174 } 175 176 // From this goroutine's perspective, the "Compare And Repeat Pattern" guarantees 177 // that any IncorporatedResult or ResultApproval is 178 // * either already stored in the old state (i.e. the CachingAssignmentCollector) 179 // when this goroutine retrieves it 180 // * or the incorporated result / approval is subsequently added to updated state 181 // (i.e. the VerifyingAssignmentCollector) after this goroutine stored it 182 // Hence, this goroutine only needs to hand the IncorporatedResults and ResultApprovals 183 // that are stored in the CachingAssignmentCollector to the VerifyingAssignmentCollector. 184 // 185 // Generally, we would like to process the cached data concurrently here, because 186 // sequential processing is too slow. However, we should only allocate limited resources 187 // to avoid other components being starved. Therefore, we use a WorkerPool to queue 188 // the processing tasks and work through a limited number of them concurrently. 189 for _, ir := range cachingCollector.GetIncorporatedResults() { 190 task := asm.reIngestIncorporatedResultTask(ir) 191 asm.workerPool.Submit(task) 192 } 193 for _, approval := range cachingCollector.GetApprovals() { 194 task := asm.reIngestApprovalTask(approval) 195 asm.workerPool.Submit(task) 196 } 197 return nil 198 } 199 200 return fmt.Errorf("cannot transition from %s to %s: %w", expectedCurrentStatus.String(), newStatus.String(), ErrInvalidCollectorStateTransition) 201 } 202 203 // caching2Orphaned ensures that the collector is currently in state `CachingApprovals` 204 // and replaces it by a newly-created OrphanAssignmentCollector. 205 // Returns: 206 // * CachingAssignmentCollector as of before the update 207 // * ErrDifferentCollectorState if the AssignmentCollector's state is _not_ `CachingApprovals` 208 // * all other errors are unexpected and potential symptoms of internal bugs or state corruption (fatal) 209 func (asm *AssignmentCollectorStateMachine) caching2Orphaned() (*CachingAssignmentCollector, error) { 210 asm.Lock() 211 defer asm.Unlock() 212 clr := asm.atomicLoadCollector() 213 cachingCollector, ok := clr.(*CachingAssignmentCollector) 214 if !ok { 215 return nil, fmt.Errorf("collector's current state is %s: %w", clr.ProcessingStatus().String(), ErrDifferentCollectorState) 216 } 217 asm.collector.Store(&atomicValueWrapper{collector: NewOrphanAssignmentCollector(asm.AssignmentCollectorBase)}) 218 return cachingCollector, nil 219 } 220 221 // verifying2Orphaned ensures that the collector is currently in state `VerifyingApprovals` 222 // and replaces it by a newly-created OrphanAssignmentCollector. 223 // Returns: 224 // * VerifyingAssignmentCollector as of before the update 225 // * ErrDifferentCollectorState if the AssignmentCollector's state is _not_ `VerifyingApprovals` 226 // * all other errors are unexpected and potential symptoms of internal bugs or state corruption (fatal) 227 func (asm *AssignmentCollectorStateMachine) verifying2Orphaned() (*VerifyingAssignmentCollector, error) { 228 asm.Lock() 229 defer asm.Unlock() 230 clr := asm.atomicLoadCollector() 231 verifyingCollector, ok := clr.(*VerifyingAssignmentCollector) 232 if !ok { 233 return nil, fmt.Errorf("collector's current state is %s: %w", clr.ProcessingStatus().String(), ErrDifferentCollectorState) 234 } 235 asm.collector.Store(&atomicValueWrapper{collector: NewOrphanAssignmentCollector(asm.AssignmentCollectorBase)}) 236 return verifyingCollector, nil 237 } 238 239 // caching2Verifying ensures that the collector is currently in state `CachingApprovals` 240 // and replaces it by a newly-created VerifyingAssignmentCollector. 241 // Returns: 242 // * CachingAssignmentCollector as of before the update 243 // * ErrDifferentCollectorState if the AssignmentCollector's state is _not_ `CachingApprovals` 244 // * all other errors are unexpected and potential symptoms of internal bugs or state corruption (fatal) 245 func (asm *AssignmentCollectorStateMachine) caching2Verifying() (*CachingAssignmentCollector, error) { 246 asm.Lock() 247 defer asm.Unlock() 248 clr := asm.atomicLoadCollector() 249 cachingCollector, ok := clr.(*CachingAssignmentCollector) 250 if !ok { 251 return nil, fmt.Errorf("collector's current state is %s: %w", clr.ProcessingStatus().String(), ErrDifferentCollectorState) 252 } 253 254 verifyingCollector, err := NewVerifyingAssignmentCollector(asm.AssignmentCollectorBase) 255 if err != nil { 256 return nil, fmt.Errorf("failed to instantiate VerifyingAssignmentCollector: %w", err) 257 } 258 asm.collector.Store(&atomicValueWrapper{collector: verifyingCollector}) 259 260 return cachingCollector, nil 261 } 262 263 // reIngestIncorporatedResultTask returns a functor for re-ingesting the specified 264 // IncorporatedResults; functor handles all potential business logic errors. 265 func (asm *AssignmentCollectorStateMachine) reIngestIncorporatedResultTask(incResult *flow.IncorporatedResult) func() { 266 task := func() { 267 err := asm.ProcessIncorporatedResult(incResult) 268 if err != nil { 269 asm.log.Fatal().Err(err). 270 Str("executed_block_id", incResult.Result.BlockID.String()). 271 Str("result_id", incResult.Result.ID().String()). 272 Str("incorporating_block_id", incResult.IncorporatedBlockID.String()). 273 Str("incorporated_result_id", incResult.ID().String()). 274 Msg("re-ingesting incorporated results failed") 275 } 276 } 277 return task 278 } 279 280 // reIngestApprovalTask returns a functor for re-ingesting the specified 281 // ResultApprovals; functor handles all potential business logic errors. 282 func (asm *AssignmentCollectorStateMachine) reIngestApprovalTask(approval *flow.ResultApproval) func() { 283 task := func() { 284 err := asm.ProcessApproval(approval) 285 if err != nil { 286 lg := asm.log.With().Err(err). 287 Str("approver_id", approval.Body.ApproverID.String()). 288 Str("executed_block_id", approval.Body.BlockID.String()). 289 Str("result_id", approval.Body.ExecutionResultID.String()). 290 Str("approval_id", approval.ID().String()). 291 Logger() 292 if engine.IsInvalidInputError(err) { 293 lg.Error().Msgf("received invalid approval") 294 return 295 } 296 asm.log.Fatal().Msg("unexpected error re-ingesting result approval") 297 } 298 } 299 return task 300 }