github.com/koko1123/flow-go-1@v0.29.6/module/mempool/consensus/exec_fork_suppressor.go

github.com/koko1123/flow-go-1@v0.29.6/module/mempool/consensus/exec_fork_suppressor.go (about)

     1  package consensus
     2  
     3  import (
     4  	"encoding/json"
     5  	"errors"
     6  	"fmt"
     7  	"sync"
     8  
     9  	"github.com/dgraph-io/badger/v3"
    10  	"github.com/rs/zerolog"
    11  	"github.com/rs/zerolog/log"
    12  	"go.uber.org/atomic"
    13  
    14  	"github.com/koko1123/flow-go-1/engine"
    15  	"github.com/koko1123/flow-go-1/model/flow"
    16  	"github.com/koko1123/flow-go-1/module/mempool"
    17  	"github.com/koko1123/flow-go-1/storage"
    18  	"github.com/koko1123/flow-go-1/storage/badger/operation"
    19  )
    20  
    21  // ExecForkSuppressor is a wrapper around a conventional mempool.IncorporatedResultSeals
    22  // mempool. It implements the following mitigation strategy for execution forks:
    23  //   - In case two conflicting results are considered sealable for the same block,
    24  //     sealing should halt. Specifically, two results are considered conflicting,
    25  //     if they differ in their start or end state.
    26  //   - Even after a restart, the sealing should not resume.
    27  //   - We rely on human intervention to resolve the conflict.
    28  //
    29  // The ExecForkSuppressor implements this mitigation strategy as follows:
    30  //   - For each candidate seal inserted into the mempool, indexes seal
    31  //     by respective blockID, storing all seals in the internal map `sealsForBlock`.
    32  //   - Whenever client perform any query, we check if there are conflicting seals.
    33  //   - We pick first seal available for a block and check whether
    34  //     the seal has the same state transition as other seals included for same block.
    35  //   - If conflicting state transitions for the same block are detected,
    36  //     ExecForkSuppressor sets an internal flag and thereafter
    37  //     reports the mempool as empty, which will lead to the respective
    38  //     consensus node not including any more seals.
    39  //   - Evidence for an execution fork stored in a database (persisted across restarts).
    40  //
    41  // Implementation is concurrency safe.
    42  type ExecForkSuppressor struct {
    43  	mutex            sync.RWMutex
    44  	seals            mempool.IncorporatedResultSeals
    45  	sealsForBlock    map[flow.Identifier]sealSet             // map BlockID -> set of IncorporatedResultSeal
    46  	byHeight         map[uint64]map[flow.Identifier]struct{} // map height -> set of executed block IDs at height
    47  	lowestHeight     uint64
    48  	execForkDetected atomic.Bool
    49  	onExecFork       ExecForkActor
    50  	db               *badger.DB
    51  	log              zerolog.Logger
    52  }
    53  
    54  var _ mempool.IncorporatedResultSeals = (*ExecForkSuppressor)(nil)
    55  
    56  // sealSet is a set of seals; internally represented as a map from sealID -> to seal
    57  type sealSet map[flow.Identifier]*flow.IncorporatedResultSeal
    58  
    59  // sealsList is a list of seals
    60  type sealsList []*flow.IncorporatedResultSeal
    61  
    62  func NewExecStateForkSuppressor(seals mempool.IncorporatedResultSeals, onExecFork ExecForkActor, db *badger.DB, log zerolog.Logger) (*ExecForkSuppressor, error) {
    63  	conflictingSeals, err := checkExecutionForkEvidence(db)
    64  	if err != nil {
    65  		return nil, fmt.Errorf("failed to interface with storage: %w", err)
    66  	}
    67  	execForkDetectedFlag := len(conflictingSeals) != 0
    68  	if execForkDetectedFlag {
    69  		onExecFork(conflictingSeals)
    70  	}
    71  
    72  	wrapper := ExecForkSuppressor{
    73  		mutex:            sync.RWMutex{},
    74  		seals:            seals,
    75  		sealsForBlock:    make(map[flow.Identifier]sealSet),
    76  		byHeight:         make(map[uint64]map[flow.Identifier]struct{}),
    77  		execForkDetected: *atomic.NewBool(execForkDetectedFlag),
    78  		onExecFork:       onExecFork,
    79  		db:               db,
    80  		log:              log.With().Str("mempool", "ExecForkSuppressor").Logger(),
    81  	}
    82  
    83  	return &wrapper, nil
    84  }
    85  
    86  // Add adds the given seal to the mempool. Return value indicates whether seal was added to the mempool.
    87  // Internally indexes every added seal by blockID. Expects that underlying mempool never eject items.
    88  // Error returns:
    89  //   - engine.InvalidInputError (sentinel error)
    90  //     In case a seal fails one of the required consistency checks;
    91  func (s *ExecForkSuppressor) Add(newSeal *flow.IncorporatedResultSeal) (bool, error) {
    92  	s.mutex.Lock()
    93  	defer s.mutex.Unlock()
    94  
    95  	if s.execForkDetected.Load() {
    96  		return false, nil
    97  	}
    98  
    99  	if newSeal.Header.Height < s.lowestHeight {
   100  		return false, nil
   101  	}
   102  
   103  	// STEP 1: ensure locally that newSeal's chunks are non zero, which means
   104  	// that the new seal contains start and end state values.
   105  	// This wrapper is a temporary safety layer; we check all conditions that are
   106  	// required for its correct functioning locally, to not delegate safety-critical
   107  	// implementation aspects to external components
   108  	err := s.enforceValidChunks(newSeal)
   109  	if err != nil {
   110  		return false, fmt.Errorf("invalid candidate seal: %w", err)
   111  	}
   112  	blockID := newSeal.Seal.BlockID
   113  
   114  	// This mempool allows adding multiple seals for same blockID even if they have different state transition.
   115  	// When builder logic tries to query such seals we will check whenever we have an execution fork. The main reason for
   116  	// detecting forks at query time(not at adding time) is ability to add extra logic in underlying mempools. For instance
   117  	// we could filter seals comming from underlying mempool by some criteria.
   118  
   119  	// STEP 2: add newSeal to the wrapped mempool
   120  	added, err := s.seals.Add(newSeal) // internally de-duplicates
   121  	if err != nil {
   122  		return added, fmt.Errorf("failed to add seal to wrapped mempool: %w", err)
   123  	}
   124  	if !added { // if underlying mempool did not accept the seal => nothing to do anymore
   125  		return false, nil
   126  	}
   127  
   128  	// STEP 3: add newSeal to secondary index of this wrapper
   129  	// CAUTION: We expect that underlying mempool NEVER ejects seals because it breaks liveness.
   130  	blockSeals, found := s.sealsForBlock[blockID]
   131  	if !found {
   132  		// no other seal for this block was in mempool before => create a set for the seals for this block
   133  		blockSeals = make(sealSet)
   134  		s.sealsForBlock[blockID] = blockSeals
   135  	}
   136  	blockSeals[newSeal.ID()] = newSeal
   137  
   138  	// cache block height to prune additional index by height
   139  	blocksAtHeight, found := s.byHeight[newSeal.Header.Height]
   140  	if !found {
   141  		blocksAtHeight = make(map[flow.Identifier]struct{})
   142  		s.byHeight[newSeal.Header.Height] = blocksAtHeight
   143  	}
   144  	blocksAtHeight[blockID] = struct{}{}
   145  
   146  	return true, nil
   147  }
   148  
   149  // All returns all the IncorporatedResultSeals in the mempool.
   150  // Note: This call might crash if the block of the seal has multiple seals in mempool for conflicting
   151  // incorporated results.
   152  func (s *ExecForkSuppressor) All() []*flow.IncorporatedResultSeal {
   153  	s.mutex.RLock()
   154  	seals := s.seals.All()
   155  	s.mutex.RUnlock()
   156  
   157  	// index seals retrieved from underlying mepool by blockID to check
   158  	// for conflicting seals
   159  	sealsByBlockID := make(map[flow.Identifier]sealsList, 0)
   160  	for _, seal := range seals {
   161  		sealsPerBlock := sealsByBlockID[seal.Seal.BlockID]
   162  		sealsByBlockID[seal.Seal.BlockID] = append(sealsPerBlock, seal)
   163  	}
   164  
   165  	// check for conflicting seals
   166  	return s.filterConflictingSeals(sealsByBlockID)
   167  }
   168  
   169  // ByID returns an IncorporatedResultSeal by its ID.
   170  // The IncorporatedResultSeal's ID is the same as IncorporatedResult's ID,
   171  // so this call essentially is to find the seal for the incorporated result in the mempool.
   172  // Note: This call might crash if the block of the seal has multiple seals in mempool for conflicting
   173  // incorporated results. Usually the builder will call this method to find a seal for an incorporated
   174  // result, so the builder might crash if multiple conflicting seals exist.
   175  func (s *ExecForkSuppressor) ByID(identifier flow.Identifier) (*flow.IncorporatedResultSeal, bool) {
   176  	s.mutex.RLock()
   177  	seal, found := s.seals.ByID(identifier)
   178  	// if we haven't found seal in underlying storage - exit early
   179  	if !found {
   180  		s.mutex.RUnlock()
   181  		return seal, found
   182  	}
   183  	sealsForBlock := s.sealsForBlock[seal.Seal.BlockID]
   184  	// if there are no other seals for this block previously seen - then no possible execution forks
   185  	if len(sealsForBlock) == 1 {
   186  		s.mutex.RUnlock()
   187  		return seal, true
   188  	}
   189  	// convert map into list
   190  	var sealsPerBlock sealsList
   191  	for _, otherSeal := range sealsForBlock {
   192  		sealsPerBlock = append(sealsPerBlock, otherSeal)
   193  	}
   194  	s.mutex.RUnlock()
   195  
   196  	// check for conflicting seals
   197  	seals := s.filterConflictingSeals(map[flow.Identifier]sealsList{seal.Seal.BlockID: sealsPerBlock})
   198  	if len(seals) == 0 {
   199  		return nil, false
   200  	}
   201  	return seals[0], true
   202  }
   203  
   204  // Remove removes the IncorporatedResultSeal with id from the mempool
   205  func (s *ExecForkSuppressor) Remove(id flow.Identifier) bool {
   206  	s.mutex.Lock()
   207  	defer s.mutex.Unlock()
   208  
   209  	seal, found := s.seals.ByID(id)
   210  	if found {
   211  		s.seals.Remove(id)
   212  		set, found := s.sealsForBlock[seal.Seal.BlockID]
   213  		if !found {
   214  			// In the current implementation, this cannot happen, as every entity in the mempool is also contained in sealsForBlock.
   215  			// we nevertheless perform this sanity check here, to catch future inconsistent code modifications
   216  			s.log.Fatal().Msg("inconsistent state detected: seal not in secondary index")
   217  		}
   218  		if len(set) > 1 {
   219  			delete(set, id)
   220  		} else {
   221  			delete(s.sealsForBlock, seal.Seal.BlockID)
   222  		}
   223  	}
   224  	return found
   225  }
   226  
   227  // Size returns the number of items in the mempool
   228  func (s *ExecForkSuppressor) Size() uint {
   229  	s.mutex.RLock()
   230  	defer s.mutex.RUnlock()
   231  	return s.seals.Size()
   232  }
   233  
   234  // Limit returns the size limit of the mempool
   235  func (s *ExecForkSuppressor) Limit() uint {
   236  	s.mutex.RLock()
   237  	defer s.mutex.RUnlock()
   238  	return s.seals.Limit()
   239  }
   240  
   241  // Clear removes all entities from the pool.
   242  // The wrapper clears the internal state as well as its local (additional) state.
   243  func (s *ExecForkSuppressor) Clear() {
   244  	s.mutex.Lock()
   245  	defer s.mutex.Unlock()
   246  	s.sealsForBlock = make(map[flow.Identifier]sealSet)
   247  	s.seals.Clear()
   248  }
   249  
   250  // PruneUpToHeight remove all seals for blocks whose height is strictly
   251  // smaller that height. Note: seals for blocks at height are retained.
   252  func (s *ExecForkSuppressor) PruneUpToHeight(height uint64) error {
   253  	err := s.seals.PruneUpToHeight(height)
   254  	if err != nil {
   255  		return err
   256  	}
   257  
   258  	s.mutex.Lock()
   259  	defer s.mutex.Unlock()
   260  
   261  	if len(s.sealsForBlock) == 0 {
   262  		s.lowestHeight = height
   263  		return nil
   264  	}
   265  
   266  	// Optimization: if there are less height in the index than the height range to prune,
   267  	// range to prune, then just go through each seal.
   268  	// Otherwise, go through each height to prune.
   269  	if uint64(len(s.byHeight)) < height-s.lowestHeight {
   270  		for h := range s.byHeight {
   271  			if h < height {
   272  				s.removeByHeight(h)
   273  			}
   274  		}
   275  	} else {
   276  		for h := s.lowestHeight; h < height; h++ {
   277  			s.removeByHeight(h)
   278  		}
   279  	}
   280  
   281  	return nil
   282  }
   283  
   284  func (s *ExecForkSuppressor) removeByHeight(height uint64) {
   285  	for blockID := range s.byHeight[height] {
   286  		delete(s.sealsForBlock, blockID)
   287  	}
   288  	delete(s.byHeight, height)
   289  }
   290  
   291  // enforceValidChunks checks that seal has valid non-zero number of chunks.
   292  // In case a seal fails the check, a detailed error message is logged and an
   293  // engine.InvalidInputError (sentinel error) is returned.
   294  func (s *ExecForkSuppressor) enforceValidChunks(irSeal *flow.IncorporatedResultSeal) error {
   295  	result := irSeal.IncorporatedResult.Result
   296  
   297  	if !result.ValidateChunksLength() {
   298  		scjson, errjson := json.Marshal(irSeal)
   299  		if errjson != nil {
   300  			return errjson
   301  		}
   302  		s.log.Error().
   303  			Str("seal", string(scjson)).
   304  			Msg("seal's execution result has no chunks")
   305  		return engine.NewInvalidInputErrorf("seal's execution result has no chunks: %x", result.ID())
   306  	}
   307  	return nil
   308  }
   309  
   310  // enforceConsistentStateTransitions checks whether the execution results in the seals
   311  // have matching state transitions. If a fork in the execution state is detected:
   312  //   - wrapped mempool is cleared
   313  //   - internal execForkDetected flag is ste to true
   314  //   - the new value of execForkDetected is persisted to data base
   315  //
   316  // and executionForkErr (sentinel error) is returned
   317  // The function assumes the execution results in the seals have a non-zero number of chunks.
   318  func hasConsistentStateTransitions(irSeal, irSeal2 *flow.IncorporatedResultSeal) bool {
   319  	if irSeal.IncorporatedResult.Result.ID() == irSeal2.IncorporatedResult.Result.ID() {
   320  		// happy case: candidate seals are for the same result
   321  		return true
   322  	}
   323  	// the results for the seals have different IDs (!)
   324  	// => check whether initial and final state match in both seals
   325  
   326  	// unsafe: we assume validity of chunks has been checked before
   327  	irSeal1InitialState, _ := irSeal.IncorporatedResult.Result.InitialStateCommit()
   328  	irSeal1FinalState, _ := irSeal.IncorporatedResult.Result.FinalStateCommitment()
   329  	irSeal2InitialState, _ := irSeal2.IncorporatedResult.Result.InitialStateCommit()
   330  	irSeal2FinalState, _ := irSeal2.IncorporatedResult.Result.FinalStateCommitment()
   331  
   332  	if irSeal1InitialState != irSeal2InitialState || irSeal1FinalState != irSeal2FinalState {
   333  		log.Error().Msg("inconsistent seals for the same block")
   334  		return false
   335  	}
   336  	log.Warn().Msg("seals with different ID but consistent state transition")
   337  	return true
   338  }
   339  
   340  // checkExecutionForkDetected checks the database whether evidence
   341  // about an execution fork is stored. Returns the stored evidence.
   342  func checkExecutionForkEvidence(db *badger.DB) ([]*flow.IncorporatedResultSeal, error) {
   343  	var conflictingSeals []*flow.IncorporatedResultSeal
   344  	err := db.View(func(tx *badger.Txn) error {
   345  		err := operation.RetrieveExecutionForkEvidence(&conflictingSeals)(tx)
   346  		if errors.Is(err, storage.ErrNotFound) {
   347  			return nil // no evidence in data base; conflictingSeals is still nil slice
   348  		}
   349  		if err != nil {
   350  			return fmt.Errorf("failed to load evidence whether or not an execution fork occured: %w", err)
   351  		}
   352  		return nil
   353  	})
   354  	return conflictingSeals, err
   355  }
   356  
   357  // storeExecutionForkEvidence stores the provided seals in the database
   358  // as evidence for an execution fork.
   359  func storeExecutionForkEvidence(conflictingSeals []*flow.IncorporatedResultSeal, db *badger.DB) error {
   360  	err := operation.RetryOnConflict(db.Update, func(tx *badger.Txn) error {
   361  		err := operation.InsertExecutionForkEvidence(conflictingSeals)(tx)
   362  		if errors.Is(err, storage.ErrAlreadyExists) {
   363  			// some evidence about execution fork already stored;
   364  			// we only keep the first evidence => noting more to do
   365  			return nil
   366  		}
   367  		if err != nil {
   368  			return fmt.Errorf("failed to store evidence about execution fork: %w", err)
   369  		}
   370  		return nil
   371  	})
   372  	return err
   373  }
   374  
   375  // filterConflictingSeals performs filtering of provided seals by checking if there are conflicting seals for same block.
   376  // For every block we check if first seal has same state transitions as others. Multiple seals for same block are allowed
   377  // but their state transitions should be the same. Upon detecting seal with inconsistent state transition we will clear our mempool,
   378  // stop accepting new seals and querying old seals and store execution fork evidence into DB. Creator of mempool will be notified
   379  // by callback.
   380  func (s *ExecForkSuppressor) filterConflictingSeals(sealsByBlockID map[flow.Identifier]sealsList) sealsList {
   381  	var result sealsList
   382  	for _, sealsInBlock := range sealsByBlockID {
   383  		if len(sealsInBlock) > 1 {
   384  			// enforce that newSeal's state transition does not conflict with other stored seals for the same block
   385  			// already other seal for this block in mempool => compare consistency of results' state transitions
   386  			var conflictingSeals sealsList
   387  			candidateSeal := sealsInBlock[0]
   388  			for _, otherSeal := range sealsInBlock[1:] {
   389  				if !hasConsistentStateTransitions(candidateSeal, otherSeal) {
   390  					conflictingSeals = append(conflictingSeals, otherSeal)
   391  				}
   392  			}
   393  			// check if inconsistent state transition detected
   394  			if len(conflictingSeals) > 0 {
   395  				s.execForkDetected.Store(true)
   396  				s.Clear()
   397  				conflictingSeals = append(sealsList{candidateSeal}, conflictingSeals...)
   398  				err := storeExecutionForkEvidence(conflictingSeals, s.db)
   399  				if err != nil {
   400  					panic("failed to store execution fork evidence")
   401  				}
   402  				s.onExecFork(conflictingSeals)
   403  				return nil
   404  			}
   405  		}
   406  		result = append(result, sealsInBlock...)
   407  	}
   408  	return result
   409  }