github.com/koko1123/flow-go-1@v0.29.6/engine/execution/ingestion/stop_control.go (about)

     1  package ingestion
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sync"
     7  
     8  	"github.com/rs/zerolog"
     9  
    10  	"github.com/koko1123/flow-go-1/engine/execution/state"
    11  	"github.com/koko1123/flow-go-1/model/flow"
    12  )
    13  
    14  // StopControl is a specialized component used by ingestion.Engine to encapsulate
    15  // control of pausing/stopping blocks execution.
    16  // It is intended to work tightly with the Engine, not as a general mechanism or interface.
    17  // StopControl follows states described in StopState
    18  type StopControl struct {
    19  	sync.RWMutex
    20  	// desired stop height, the first value new version should be used, so this height WON'T
    21  	// be executed
    22  	height uint64
    23  
    24  	// if the node should crash or just pause after reaching stop height
    25  	crash              bool
    26  	stopAfterExecuting flow.Identifier
    27  
    28  	log   zerolog.Logger
    29  	state StopControlState
    30  
    31  	// used to prevent setting stop height to block which has already been executed
    32  	highestExecutingHeight uint64
    33  }
    34  
    35  type StopControlState byte
    36  
    37  const (
    38  	// StopControlOff default state, envisioned to be used most of the time. Stopping module is simply off,
    39  	// blocks will be processed "as usual".
    40  	StopControlOff StopControlState = iota
    41  
    42  	// StopControlSet means stop height is set but not reached yet, and nothing related to stopping happened yet.
    43  	// We could still go back to StopControlOff or progress to StopControlCommenced.
    44  	StopControlSet
    45  
    46  	// StopControlCommenced indicates that stopping process has commenced and no parameters can be changed anymore.
    47  	// For example, blocks at or above stop height has been received, but finalization didn't reach stop height yet.
    48  	// It can only progress to StopControlPaused
    49  	StopControlCommenced
    50  
    51  	// StopControlPaused means EN has stopped processing blocks. It can happen by reaching the set stopping `height`, or
    52  	// if the node was started in pause mode.
    53  	// It is a final state and cannot be changed
    54  	StopControlPaused
    55  )
    56  
    57  // NewStopControl creates new empty NewStopControl
    58  func NewStopControl(log zerolog.Logger, paused bool, lastExecutedHeight uint64) *StopControl {
    59  	state := StopControlOff
    60  	if paused {
    61  		state = StopControlPaused
    62  	}
    63  	log.Debug().Msgf("created StopControl module with paused = %t", paused)
    64  	return &StopControl{
    65  		log:                    log,
    66  		state:                  state,
    67  		highestExecutingHeight: lastExecutedHeight,
    68  	}
    69  }
    70  
    71  // GetState returns current state of StopControl module
    72  func (s *StopControl) GetState() StopControlState {
    73  	s.RLock()
    74  	defer s.RUnlock()
    75  	return s.state
    76  }
    77  
    78  // IsPaused returns true is block execution has been paused
    79  func (s *StopControl) IsPaused() bool {
    80  	s.RLock()
    81  	defer s.RUnlock()
    82  	return s.state == StopControlPaused
    83  }
    84  
    85  // SetStopHeight sets new stop height and crash mode, and return old values:
    86  //   - height
    87  //   - crash
    88  //
    89  // Returns error if the stopping process has already commenced, new values will be rejected.
    90  func (s *StopControl) SetStopHeight(height uint64, crash bool) (uint64, bool, error) {
    91  	s.Lock()
    92  	defer s.Unlock()
    93  
    94  	oldHeight := s.height
    95  	oldCrash := s.crash
    96  
    97  	if s.state == StopControlCommenced {
    98  		return oldHeight, oldCrash, fmt.Errorf("cannot update stop height, stopping commenced for height %d with crash=%t", oldHeight, oldCrash)
    99  	}
   100  
   101  	if s.state == StopControlPaused {
   102  		return oldHeight, oldCrash, fmt.Errorf("cannot update stop height, already paused")
   103  	}
   104  
   105  	// +1 because we track last executing height, so +1 is the lowest possible block to stop
   106  	if height <= s.highestExecutingHeight+1 {
   107  		return oldHeight, oldCrash, fmt.Errorf("cannot update stop height, given height %d at or below last executed %d", height, s.highestExecutingHeight)
   108  	}
   109  
   110  	s.log.Info().
   111  		Int8("previous_state", int8(s.state)).Int8("new_state", int8(StopControlSet)).
   112  		Uint64("height", height).Bool("crash", crash).
   113  		Uint64("old_height", oldHeight).Bool("old_crash", oldCrash).Msg("new stop height set")
   114  
   115  	s.state = StopControlSet
   116  
   117  	s.height = height
   118  	s.crash = crash
   119  	s.stopAfterExecuting = flow.ZeroID
   120  
   121  	return oldHeight, oldCrash, nil
   122  }
   123  
   124  // GetStopHeight returns:
   125  //   - height
   126  //   - crash
   127  //
   128  // Values are undefined if they were not previously set
   129  func (s *StopControl) GetStopHeight() (uint64, bool) {
   130  	s.RLock()
   131  	defer s.RUnlock()
   132  
   133  	return s.height, s.crash
   134  }
   135  
   136  // blockProcessable should be called when new block is processable.
   137  // It returns boolean indicating if the block should be processed.
   138  func (s *StopControl) blockProcessable(b *flow.Header) bool {
   139  
   140  	s.Lock()
   141  	defer s.Unlock()
   142  
   143  	if s.state == StopControlOff {
   144  		return true
   145  	}
   146  
   147  	if s.state == StopControlPaused {
   148  		return false
   149  	}
   150  
   151  	// skips blocks at or above requested stop height
   152  	if b.Height >= s.height {
   153  		s.log.Warn().Int8("previous_state", int8(s.state)).Int8("new_state", int8(StopControlCommenced)).Msgf("Skipping execution of %s at height %d because stop has been requested at height %d", b.ID(), b.Height, s.height)
   154  		s.state = StopControlCommenced // if block was skipped, move into commenced state
   155  		return false
   156  	}
   157  
   158  	return true
   159  }
   160  
   161  // blockFinalized should be called when a block is marked as finalized
   162  func (s *StopControl) blockFinalized(ctx context.Context, execState state.ReadOnlyExecutionState, h *flow.Header) {
   163  
   164  	s.Lock()
   165  	defer s.Unlock()
   166  
   167  	if s.state == StopControlOff || s.state == StopControlPaused {
   168  		return
   169  	}
   170  
   171  	// Once finalization reached stop height we can be sure no other fork will be valid at this height,
   172  	// if this block's parent has been executed, we are safe to stop or crash.
   173  	// This will happen during normal execution, where blocks are executed before they are finalized.
   174  	// However, it is possible that EN block computation progress can fall behind. In this case,
   175  	// we want to crash only after the execution reached the stop height.
   176  	if h.Height == s.height {
   177  
   178  		executed, err := state.IsBlockExecuted(ctx, execState, h.ParentID)
   179  		if err != nil {
   180  			// any error here would indicate unexpected storage error, so we crash the node
   181  			s.log.Fatal().Err(err).Str("block_id", h.ID().String()).Msg("failed to check if the block has been executed")
   182  			return
   183  		}
   184  
   185  		if executed {
   186  			s.stopExecution()
   187  		} else {
   188  			s.stopAfterExecuting = h.ParentID
   189  			s.log.Info().Msgf("Node scheduled to stop executing after executing block %s at height %d", s.stopAfterExecuting.String(), h.Height-1)
   190  		}
   191  
   192  	}
   193  
   194  }
   195  
   196  // blockExecuted should be called after a block has finished execution
   197  func (s *StopControl) blockExecuted(h *flow.Header) {
   198  	s.Lock()
   199  	defer s.Unlock()
   200  
   201  	if s.state == StopControlPaused || s.state == StopControlOff {
   202  		return
   203  	}
   204  
   205  	if s.stopAfterExecuting == h.ID() {
   206  		// double check. Even if requested stop height has been changed multiple times,
   207  		// as long as it matches this block we are safe to terminate
   208  
   209  		if h.Height == s.height-1 {
   210  			s.stopExecution()
   211  		} else {
   212  			s.log.Warn().Msgf("Inconsistent stopping state. Scheduled to stop after executing block ID %s and height %d, but this block has a height %d. ",
   213  				h.ID().String(), s.height-1, h.Height)
   214  		}
   215  	}
   216  }
   217  
   218  func (s *StopControl) stopExecution() {
   219  	if s.crash {
   220  		s.log.Fatal().Msgf("Crashing as finalization reached requested stop height %d and the highest executed block is (%d - 1)", s.height, s.height)
   221  	} else {
   222  		s.log.Debug().Int8("previous_state", int8(s.state)).Int8("new_state", int8(StopControlPaused)).Msg("StopControl state transition")
   223  		s.state = StopControlPaused
   224  		s.log.Warn().Msgf("Pausing execution as finalization reached requested stop height %d", s.height)
   225  	}
   226  }
   227  
   228  // executingBlockHeight should be called while execution of height starts, used for internal tracking of the minimum
   229  // possible value of height
   230  func (s *StopControl) executingBlockHeight(height uint64) {
   231  	if s.state == StopControlPaused {
   232  		return
   233  	}
   234  
   235  	// updating the highest executing height, which will be used to reject setting stop height that
   236  	// is too low.
   237  	if height > s.highestExecutingHeight {
   238  		s.highestExecutingHeight = height
   239  	}
   240  }