github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/consensus/approvals/assignment_collector_statemachine.go (about)

     1  package approvals
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"sync"
     7  
     8  	"go.uber.org/atomic"
     9  
    10  	"github.com/onflow/flow-go/engine"
    11  	"github.com/onflow/flow-go/engine/consensus"
    12  	"github.com/onflow/flow-go/model/flow"
    13  )
    14  
    15  var (
    16  	ErrInvalidCollectorStateTransition = errors.New("invalid state transition")
    17  	ErrDifferentCollectorState         = errors.New("different state")
    18  )
    19  
    20  // AssignmentCollectorStateMachine implements the `AssignmentCollector` interface.
    21  // It wraps the current `AssignmentCollectorState` and provides logic for state transitions.
    22  // Any state-specific logic is delegated to the state-specific instance.
    23  // AssignmentCollectorStateMachine is fully concurrent.
    24  //
    25  // Comment on concurrency safety for state-specific business logic:
    26  //   - AssignmentCollectorStateMachine processes state updates concurrently with
    27  //     state-specific business logic. Hence, it can happen that we update a stale
    28  //     state.
    29  //   - To guarantee that we hand inputs to the latest state, we employ a
    30  //     "Compare And Repeat Pattern": we atomically read the state before and after the
    31  //     operation. If the state changed, we updated a stale state. We repeat until
    32  //     we confirm that the latest state was updated.
    33  type AssignmentCollectorStateMachine struct {
    34  	AssignmentCollectorBase
    35  
    36  	// collector references the assignment collector in its current state. The value is
    37  	// frequently read, but infrequently updated. Reads are atomic and therefore concurrency
    38  	// safe. For state updates (write), we use a mutex to guarantee that a state update is
    39  	// always based on the most recent value.
    40  	collector atomic.Value
    41  	sync.Mutex
    42  }
    43  
    44  func (asm *AssignmentCollectorStateMachine) atomicLoadCollector() AssignmentCollectorState {
    45  	return asm.collector.Load().(*atomicValueWrapper).collector
    46  }
    47  
    48  // atomic.Value doesn't allow storing interfaces as atomic values,
    49  // it requires that stored type is always the same so we need a wrapper that will mitigate this restriction
    50  // https://github.com/golang/go/issues/22550
    51  type atomicValueWrapper struct {
    52  	collector AssignmentCollectorState
    53  }
    54  
    55  func NewAssignmentCollectorStateMachine(collectorBase AssignmentCollectorBase) *AssignmentCollectorStateMachine {
    56  	sm := &AssignmentCollectorStateMachine{
    57  		AssignmentCollectorBase: collectorBase,
    58  	}
    59  
    60  	// by default start with caching collector
    61  	sm.collector.Store(&atomicValueWrapper{
    62  		collector: NewCachingAssignmentCollector(collectorBase),
    63  	})
    64  	return sm
    65  }
    66  
    67  // ProcessIncorporatedResult starts tracking the approval for IncorporatedResult.
    68  // Method is idempotent.
    69  // Error Returns:
    70  //   - no errors expected during normal operation;
    71  //     errors might be symptoms of bugs or internal state corruption (fatal)
    72  func (asm *AssignmentCollectorStateMachine) ProcessIncorporatedResult(incorporatedResult *flow.IncorporatedResult) error {
    73  	for { // Compare And Repeat if state update occurred concurrently
    74  		collector := asm.atomicLoadCollector()
    75  		currentState := collector.ProcessingStatus()
    76  		err := collector.ProcessIncorporatedResult(incorporatedResult)
    77  		if err != nil {
    78  			return fmt.Errorf("could not process incorporated result %v: %w", incorporatedResult.ID(), err)
    79  		}
    80  		if currentState != asm.ProcessingStatus() {
    81  			continue
    82  		}
    83  		return nil
    84  	}
    85  }
    86  
    87  // ProcessApproval ingests Result Approvals and triggers sealing of execution result
    88  // when sufficient approvals have arrived.
    89  // Error Returns:
    90  //   - nil in case of success (outdated approvals might be silently discarded)
    91  //   - engine.InvalidInputError if the result approval is invalid
    92  //   - any other errors might be symptoms of bugs or internal state corruption (fatal)
    93  func (asm *AssignmentCollectorStateMachine) ProcessApproval(approval *flow.ResultApproval) error {
    94  	for { // Compare And Repeat if state update occurred concurrently
    95  		collector := asm.atomicLoadCollector()
    96  		currentState := collector.ProcessingStatus()
    97  		err := collector.ProcessApproval(approval)
    98  		if err != nil {
    99  			return fmt.Errorf("could not process approval %v: %w", approval.ID(), err)
   100  		}
   101  		if currentState != asm.ProcessingStatus() {
   102  			continue
   103  		}
   104  		return nil
   105  	}
   106  }
   107  
   108  // CheckEmergencySealing checks whether this AssignmentCollector can be emergency
   109  // sealed. If this is the case, the AssignmentCollector produces a candidate seal
   110  // as part of this method call. No errors are expected during normal operations.
   111  func (asm *AssignmentCollectorStateMachine) CheckEmergencySealing(observer consensus.SealingObservation, finalizedBlockHeight uint64) error {
   112  	collector := asm.atomicLoadCollector()
   113  	return collector.CheckEmergencySealing(observer, finalizedBlockHeight)
   114  }
   115  
   116  // RequestMissingApprovals sends requests for missing approvals to the respective
   117  // verification nodes. Returns number of requests made. No errors are expected
   118  // during normal operations.
   119  func (asm *AssignmentCollectorStateMachine) RequestMissingApprovals(observer consensus.SealingObservation, maxHeightForRequesting uint64) (uint, error) {
   120  	collector := asm.atomicLoadCollector()
   121  	return collector.RequestMissingApprovals(observer, maxHeightForRequesting)
   122  }
   123  
   124  // ProcessingStatus returns the AssignmentCollector's ProcessingStatus (state descriptor).
   125  func (asm *AssignmentCollectorStateMachine) ProcessingStatus() ProcessingStatus {
   126  	collector := asm.atomicLoadCollector()
   127  	return collector.ProcessingStatus()
   128  }
   129  
   130  // ChangeProcessingStatus changes the AssignmentCollector's internal processing
   131  // status. The operation is implemented as an atomic compare-and-swap, i.e. the
   132  // state transition is only executed if AssignmentCollector's internal state is
   133  // equal to `expectedValue`. The return indicates whether the state was updated.
   134  // The implementation only allows the following transitions:
   135  //
   136  //	CachingApprovals   -> VerifyingApprovals
   137  //	CachingApprovals   -> Orphaned
   138  //	VerifyingApprovals -> Orphaned
   139  //
   140  // Error returns:
   141  // * nil if the state transition was successfully executed
   142  // * ErrDifferentCollectorState if the AssignmentCollector's state is different than expectedCurrentStatus
   143  // * ErrInvalidCollectorStateTransition if the given state transition is impossible
   144  // * all other errors are unexpected and potential symptoms of internal bugs or state corruption (fatal)
   145  func (asm *AssignmentCollectorStateMachine) ChangeProcessingStatus(expectedCurrentStatus, newStatus ProcessingStatus) error {
   146  	// don't transition between same states
   147  	if expectedCurrentStatus == newStatus {
   148  		return nil
   149  	}
   150  
   151  	// state transition: VerifyingApprovals -> Orphaned
   152  	if (expectedCurrentStatus == VerifyingApprovals) && (newStatus == Orphaned) {
   153  		_, err := asm.verifying2Orphaned()
   154  		if err != nil {
   155  			return fmt.Errorf("failed to transistion AssignmentCollector from %s to %s: %w", expectedCurrentStatus.String(), newStatus.String(), err)
   156  		}
   157  		return nil
   158  	}
   159  
   160  	// state transition: CachingApprovals -> Orphaned
   161  	if (expectedCurrentStatus == CachingApprovals) && (newStatus == Orphaned) {
   162  		_, err := asm.caching2Orphaned()
   163  		if err != nil {
   164  			return fmt.Errorf("failed to transistion AssignmentCollector from %s to %s: %w", expectedCurrentStatus.String(), newStatus.String(), err)
   165  		}
   166  		return nil
   167  	}
   168  
   169  	// state transition: CachingApprovals -> VerifyingApprovals
   170  	if (expectedCurrentStatus == CachingApprovals) && (newStatus == VerifyingApprovals) {
   171  		cachingCollector, err := asm.caching2Verifying()
   172  		if err != nil {
   173  			return fmt.Errorf("failed to transistion AssignmentCollector from %s to %s: %w", expectedCurrentStatus.String(), newStatus.String(), err)
   174  		}
   175  
   176  		// From this goroutine's perspective, the "Compare And Repeat Pattern" guarantees
   177  		// that any IncorporatedResult or ResultApproval is
   178  		//  * either already stored in the old state (i.e. the CachingAssignmentCollector)
   179  		//    when this goroutine retrieves it
   180  		//  * or the incorporated result / approval is subsequently added to updated state
   181  		//    (i.e. the VerifyingAssignmentCollector) after this goroutine stored it
   182  		// Hence, this goroutine only needs to hand the IncorporatedResults and ResultApprovals
   183  		// that are stored in the CachingAssignmentCollector to the VerifyingAssignmentCollector.
   184  		//
   185  		// Generally, we would like to process the cached data concurrently here, because
   186  		// sequential processing is too slow. However, we should only allocate limited resources
   187  		// to avoid other components being starved. Therefore, we use a WorkerPool to queue
   188  		// the processing tasks and work through a limited number of them concurrently.
   189  		for _, ir := range cachingCollector.GetIncorporatedResults() {
   190  			task := asm.reIngestIncorporatedResultTask(ir)
   191  			asm.workerPool.Submit(task)
   192  		}
   193  		for _, approval := range cachingCollector.GetApprovals() {
   194  			task := asm.reIngestApprovalTask(approval)
   195  			asm.workerPool.Submit(task)
   196  		}
   197  		return nil
   198  	}
   199  
   200  	return fmt.Errorf("cannot transition from %s to %s: %w", expectedCurrentStatus.String(), newStatus.String(), ErrInvalidCollectorStateTransition)
   201  }
   202  
   203  // caching2Orphaned ensures that the collector is currently in state `CachingApprovals`
   204  // and replaces it by a newly-created OrphanAssignmentCollector.
   205  // Returns:
   206  // * CachingAssignmentCollector as of before the update
   207  // * ErrDifferentCollectorState if the AssignmentCollector's state is _not_ `CachingApprovals`
   208  // * all other errors are unexpected and potential symptoms of internal bugs or state corruption (fatal)
   209  func (asm *AssignmentCollectorStateMachine) caching2Orphaned() (*CachingAssignmentCollector, error) {
   210  	asm.Lock()
   211  	defer asm.Unlock()
   212  	clr := asm.atomicLoadCollector()
   213  	cachingCollector, ok := clr.(*CachingAssignmentCollector)
   214  	if !ok {
   215  		return nil, fmt.Errorf("collector's current state is %s: %w", clr.ProcessingStatus().String(), ErrDifferentCollectorState)
   216  	}
   217  	asm.collector.Store(&atomicValueWrapper{collector: NewOrphanAssignmentCollector(asm.AssignmentCollectorBase)})
   218  	return cachingCollector, nil
   219  }
   220  
   221  // verifying2Orphaned ensures that the collector is currently in state `VerifyingApprovals`
   222  // and replaces it by a newly-created OrphanAssignmentCollector.
   223  // Returns:
   224  // * VerifyingAssignmentCollector as of before the update
   225  // * ErrDifferentCollectorState if the AssignmentCollector's state is _not_ `VerifyingApprovals`
   226  // * all other errors are unexpected and potential symptoms of internal bugs or state corruption (fatal)
   227  func (asm *AssignmentCollectorStateMachine) verifying2Orphaned() (*VerifyingAssignmentCollector, error) {
   228  	asm.Lock()
   229  	defer asm.Unlock()
   230  	clr := asm.atomicLoadCollector()
   231  	verifyingCollector, ok := clr.(*VerifyingAssignmentCollector)
   232  	if !ok {
   233  		return nil, fmt.Errorf("collector's current state is %s: %w", clr.ProcessingStatus().String(), ErrDifferentCollectorState)
   234  	}
   235  	asm.collector.Store(&atomicValueWrapper{collector: NewOrphanAssignmentCollector(asm.AssignmentCollectorBase)})
   236  	return verifyingCollector, nil
   237  }
   238  
   239  // caching2Verifying ensures that the collector is currently in state `CachingApprovals`
   240  // and replaces it by a newly-created VerifyingAssignmentCollector.
   241  // Returns:
   242  // * CachingAssignmentCollector as of before the update
   243  // * ErrDifferentCollectorState if the AssignmentCollector's state is _not_ `CachingApprovals`
   244  // * all other errors are unexpected and potential symptoms of internal bugs or state corruption (fatal)
   245  func (asm *AssignmentCollectorStateMachine) caching2Verifying() (*CachingAssignmentCollector, error) {
   246  	asm.Lock()
   247  	defer asm.Unlock()
   248  	clr := asm.atomicLoadCollector()
   249  	cachingCollector, ok := clr.(*CachingAssignmentCollector)
   250  	if !ok {
   251  		return nil, fmt.Errorf("collector's current state is %s: %w", clr.ProcessingStatus().String(), ErrDifferentCollectorState)
   252  	}
   253  
   254  	verifyingCollector, err := NewVerifyingAssignmentCollector(asm.AssignmentCollectorBase)
   255  	if err != nil {
   256  		return nil, fmt.Errorf("failed to instantiate VerifyingAssignmentCollector: %w", err)
   257  	}
   258  	asm.collector.Store(&atomicValueWrapper{collector: verifyingCollector})
   259  
   260  	return cachingCollector, nil
   261  }
   262  
   263  // reIngestIncorporatedResultTask returns a functor for re-ingesting the specified
   264  // IncorporatedResults; functor handles all potential business logic errors.
   265  func (asm *AssignmentCollectorStateMachine) reIngestIncorporatedResultTask(incResult *flow.IncorporatedResult) func() {
   266  	task := func() {
   267  		err := asm.ProcessIncorporatedResult(incResult)
   268  		if err != nil {
   269  			asm.log.Fatal().Err(err).
   270  				Str("executed_block_id", incResult.Result.BlockID.String()).
   271  				Str("result_id", incResult.Result.ID().String()).
   272  				Str("incorporating_block_id", incResult.IncorporatedBlockID.String()).
   273  				Str("incorporated_result_id", incResult.ID().String()).
   274  				Msg("re-ingesting incorporated results failed")
   275  		}
   276  	}
   277  	return task
   278  }
   279  
   280  // reIngestApprovalTask returns a functor for re-ingesting the specified
   281  // ResultApprovals; functor handles all potential business logic errors.
   282  func (asm *AssignmentCollectorStateMachine) reIngestApprovalTask(approval *flow.ResultApproval) func() {
   283  	task := func() {
   284  		err := asm.ProcessApproval(approval)
   285  		if err != nil {
   286  			lg := asm.log.With().Err(err).
   287  				Str("approver_id", approval.Body.ApproverID.String()).
   288  				Str("executed_block_id", approval.Body.BlockID.String()).
   289  				Str("result_id", approval.Body.ExecutionResultID.String()).
   290  				Str("approval_id", approval.ID().String()).
   291  				Logger()
   292  			if engine.IsInvalidInputError(err) {
   293  				lg.Error().Msgf("received invalid approval")
   294  				return
   295  			}
   296  			asm.log.Fatal().Msg("unexpected error re-ingesting result approval")
   297  		}
   298  	}
   299  	return task
   300  }