github.com/koko1123/flow-go-1@v0.29.6/engine/consensus/dkg/reactor_engine.go (about)

     1  package dkg
     2  
     3  import (
     4  	"crypto/rand"
     5  	"errors"
     6  	"fmt"
     7  
     8  	"github.com/rs/zerolog"
     9  
    10  	"github.com/koko1123/flow-go-1/engine"
    11  	"github.com/koko1123/flow-go-1/model/flow"
    12  	"github.com/koko1123/flow-go-1/model/flow/filter"
    13  	"github.com/koko1123/flow-go-1/module"
    14  	dkgmodule "github.com/koko1123/flow-go-1/module/dkg"
    15  	"github.com/koko1123/flow-go-1/state/protocol"
    16  	"github.com/koko1123/flow-go-1/state/protocol/events"
    17  	"github.com/koko1123/flow-go-1/storage"
    18  	"github.com/onflow/flow-go/crypto"
    19  )
    20  
    21  // DefaultPollStep specifies the default number of views that separate two calls
    22  // to the DKG smart-contract to read broadcast messages.
    23  const DefaultPollStep = 10
    24  
    25  // dkgInfo consolidates information about the current DKG protocol instance.
    26  type dkgInfo struct {
    27  	identities      flow.IdentityList
    28  	phase1FinalView uint64
    29  	phase2FinalView uint64
    30  	phase3FinalView uint64
    31  	// seed must be generated for each DKG instance, using a randomness source that is independent from all other nodes.
    32  	seed []byte
    33  }
    34  
    35  // ReactorEngine is an engine that reacts to chain events to start new DKG runs,
    36  // and manage subsequent phase transitions. Any unexpected error triggers a
    37  // panic as it would undermine the security of the protocol.
    38  type ReactorEngine struct {
    39  	events.Noop
    40  	unit              *engine.Unit
    41  	log               zerolog.Logger
    42  	me                module.Local
    43  	State             protocol.State
    44  	dkgState          storage.DKGState
    45  	controller        module.DKGController
    46  	controllerFactory module.DKGControllerFactory
    47  	viewEvents        events.Views
    48  	pollStep          uint64
    49  }
    50  
    51  // NewReactorEngine return a new ReactorEngine.
    52  func NewReactorEngine(
    53  	log zerolog.Logger,
    54  	me module.Local,
    55  	state protocol.State,
    56  	dkgState storage.DKGState,
    57  	controllerFactory module.DKGControllerFactory,
    58  	viewEvents events.Views,
    59  ) *ReactorEngine {
    60  
    61  	logger := log.With().
    62  		Str("engine", "dkg_reactor").
    63  		Logger()
    64  
    65  	return &ReactorEngine{
    66  		unit:              engine.NewUnit(),
    67  		log:               logger,
    68  		me:                me,
    69  		State:             state,
    70  		dkgState:          dkgState,
    71  		controllerFactory: controllerFactory,
    72  		viewEvents:        viewEvents,
    73  		pollStep:          DefaultPollStep,
    74  	}
    75  }
    76  
    77  // Ready implements the module ReadyDoneAware interface. It returns a channel
    78  // that will close when the engine has successfully
    79  // started.
    80  func (e *ReactorEngine) Ready() <-chan struct{} {
    81  	return e.unit.Ready(func() {
    82  		// If we are starting up in the EpochSetup phase, try to start the DKG.
    83  		// If the DKG for this epoch has been started previously, we will exit
    84  		// and fail this epoch's DKG.
    85  		snap := e.State.Final()
    86  
    87  		phase, err := snap.Phase()
    88  		if err != nil {
    89  			// unexpected storage-level error
    90  			e.log.Fatal().Err(err).Msg("failed to check epoch phase when starting DKG reactor engine")
    91  			return
    92  		}
    93  		if phase != flow.EpochPhaseSetup {
    94  			// start up in a non-setup phase - this is the typical path
    95  			return
    96  		}
    97  
    98  		currentCounter, err := snap.Epochs().Current().Counter()
    99  		if err != nil {
   100  			// unexpected storage-level error
   101  			e.log.Fatal().Err(err).Msg("failed to retrieve current epoch counter when starting DKG reactor engine")
   102  			return
   103  		}
   104  		first, err := snap.Head()
   105  		if err != nil {
   106  			// unexpected storage-level error
   107  			e.log.Fatal().Err(err).Msg("failed to retrieve finalized header when starting DKG reactor engine")
   108  			return
   109  		}
   110  
   111  		e.startDKGForEpoch(currentCounter, first)
   112  	})
   113  }
   114  
   115  // Done implements the module ReadyDoneAware interface. It returns a channel
   116  // that will close when the engine has successfully stopped.
   117  func (e *ReactorEngine) Done() <-chan struct{} {
   118  	return e.unit.Done()
   119  }
   120  
   121  // EpochSetupPhaseStarted handles the EpochSetupPhaseStarted protocol event by
   122  // starting the DKG process.
   123  func (e *ReactorEngine) EpochSetupPhaseStarted(currentEpochCounter uint64, first *flow.Header) {
   124  	e.startDKGForEpoch(currentEpochCounter, first)
   125  }
   126  
   127  // EpochCommittedPhaseStarted handles the EpochCommittedPhaseStarted protocol
   128  // event by checking the consistency of our locally computed key share.
   129  func (e *ReactorEngine) EpochCommittedPhaseStarted(currentEpochCounter uint64, first *flow.Header) {
   130  	e.handleEpochCommittedPhaseStarted(currentEpochCounter, first)
   131  }
   132  
   133  // startDKGForEpoch starts the DKG instance for the given epoch, only if we have
   134  // never started the DKG during setup phase for the given epoch. This allows consensus nodes which
   135  // boot from a state snapshot within the EpochSetup phase to run the DKG.
   136  //
   137  // It starts a new controller for the epoch and registers the triggers to regularly
   138  // query the DKG smart-contract and transition between phases at the specified views.
   139  func (e *ReactorEngine) startDKGForEpoch(currentEpochCounter uint64, first *flow.Header) {
   140  
   141  	firstID := first.ID()
   142  	nextEpochCounter := currentEpochCounter + 1
   143  	log := e.log.With().
   144  		Uint64("cur_epoch", currentEpochCounter). // the epoch we are in the middle of
   145  		Uint64("next_epoch", nextEpochCounter).   // the epoch we are running the DKG for
   146  		Uint64("first_block_view", first.View).   // view of first block in EpochSetup phase
   147  		Hex("first_block_id", firstID[:]).        // id of first block in EpochSetup phase
   148  		Logger()
   149  
   150  	// if we have started the dkg for this epoch already, exit
   151  	started, err := e.dkgState.GetDKGStarted(nextEpochCounter)
   152  	if err != nil {
   153  		// unexpected storage-level error
   154  		log.Fatal().Err(err).Msg("could not check whether DKG is started")
   155  	}
   156  	if started {
   157  		log.Warn().Msg("DKG started before, skipping starting the DKG for this epoch")
   158  		return
   159  	}
   160  
   161  	// flag that we are starting the dkg for this epoch
   162  	err = e.dkgState.SetDKGStarted(nextEpochCounter)
   163  	if err != nil {
   164  		// unexpected storage-level error
   165  		log.Fatal().Err(err).Msg("could not set dkg started")
   166  	}
   167  
   168  	curDKGInfo, err := e.getDKGInfo(firstID)
   169  	if err != nil {
   170  		// unexpected storage-level error
   171  		log.Fatal().Err(err).Msg("could not retrieve epoch info")
   172  	}
   173  
   174  	committee := curDKGInfo.identities.Filter(filter.IsVotingConsensusCommitteeMember)
   175  
   176  	log.Info().
   177  		Uint64("phase1", curDKGInfo.phase1FinalView).
   178  		Uint64("phase2", curDKGInfo.phase2FinalView).
   179  		Uint64("phase3", curDKGInfo.phase3FinalView).
   180  		Interface("members", committee.NodeIDs()).
   181  		Msg("epoch info")
   182  
   183  	if _, ok := committee.GetIndex(e.me.NodeID()); !ok {
   184  		// node not found in DKG committee bypass starting the DKG
   185  		log.Warn().Str("node_id", e.me.NodeID().String()).Msg("failed to find our node ID in the DKG committee skip starting DKG engine, this node will not participate in consensus after the next epoch starts")
   186  		return
   187  	}
   188  	controller, err := e.controllerFactory.Create(
   189  		dkgmodule.CanonicalInstanceID(first.ChainID, nextEpochCounter),
   190  		committee,
   191  		curDKGInfo.seed,
   192  	)
   193  	if err != nil {
   194  		// no expected errors in controller factory
   195  		log.Fatal().Err(err).Msg("could not create DKG controller")
   196  	}
   197  	e.controller = controller
   198  
   199  	e.unit.Launch(func() {
   200  		log.Info().Msg("DKG Run")
   201  		err := e.controller.Run()
   202  		if err != nil {
   203  			// TODO handle crypto sentinels and do not crash here
   204  			log.Fatal().Err(err).Msg("DKG Run error")
   205  		}
   206  	})
   207  
   208  	// NOTE:
   209  	// We register two callbacks for views that mark a state transition: one for
   210  	// polling broadcast messages, and one for triggering the phase transition.
   211  	// It is essential that all polled broadcast messages are processed before
   212  	// starting the phase transition. Here we register the polling callback
   213  	// before the phase transition, which guarantees that it will be called
   214  	// before because callbacks for the same views are executed on a FIFO basis.
   215  	// Moreover, the poll callback does not return until all received messages
   216  	// are processed by the underlying DKG controller (as guaranteed by the
   217  	// specifications and implementations of the DKGBroker and DKGController
   218  	// interfaces).
   219  
   220  	for view := curDKGInfo.phase1FinalView; view > first.View; view -= e.pollStep {
   221  		e.registerPoll(view)
   222  	}
   223  	e.registerPhaseTransition(curDKGInfo.phase1FinalView, dkgmodule.Phase1, e.controller.EndPhase1)
   224  
   225  	for view := curDKGInfo.phase2FinalView; view > curDKGInfo.phase1FinalView; view -= e.pollStep {
   226  		e.registerPoll(view)
   227  	}
   228  	e.registerPhaseTransition(curDKGInfo.phase2FinalView, dkgmodule.Phase2, e.controller.EndPhase2)
   229  
   230  	for view := curDKGInfo.phase3FinalView; view > curDKGInfo.phase2FinalView; view -= e.pollStep {
   231  		e.registerPoll(view)
   232  	}
   233  	e.registerPhaseTransition(curDKGInfo.phase3FinalView, dkgmodule.Phase3, e.end(nextEpochCounter))
   234  }
   235  
   236  // handleEpochCommittedPhaseStarted is invoked upon the transition to the EpochCommitted
   237  // phase, when the canonical beacon key vector is incorporated into the protocol state.
   238  //
   239  // This function checks that the local DKG completed and that our locally computed
   240  // key share is consistent with the canonical key vector. When this function returns,
   241  // an end state for the just-completed DKG is guaranteed to be stored (if not, the
   242  // program will crash). Since this function is invoked synchronously before the end
   243  // of the current epoch, this guarantees that when we reach the end of the current epoch
   244  // we will either have a usable beacon key (successful DKG) or a DKG failure end state
   245  // stored, so we can safely fall back to using our staking key.
   246  //
   247  // CAUTION: This function is not safe for concurrent use. This is not enforced within
   248  // the ReactorEngine - instead we rely on the protocol event emission being single-threaded
   249  func (e *ReactorEngine) handleEpochCommittedPhaseStarted(currentEpochCounter uint64, firstBlock *flow.Header) {
   250  
   251  	// the DKG we have just completed produces keys that we will use in the next epoch
   252  	nextEpochCounter := currentEpochCounter + 1
   253  
   254  	log := e.log.With().
   255  		Uint64("cur_epoch", currentEpochCounter). // the epoch we are in the middle of
   256  		Uint64("next_epoch", nextEpochCounter).   // the epoch the just-finished DKG was preparing for
   257  		Logger()
   258  
   259  	// Check whether we have already set the end state for this DKG.
   260  	// This can happen if the DKG failed locally, if we failed to generate
   261  	// a local private beacon key, or if we crashed while performing this
   262  	// check previously.
   263  	endState, err := e.dkgState.GetDKGEndState(nextEpochCounter)
   264  	if err == nil {
   265  		log.Warn().Msgf("checking beacon key consistency: exiting because dkg end state was already set: %s", endState.String())
   266  		return
   267  	}
   268  
   269  	// Since epoch phase transitions are emitted when the first block of the new
   270  	// phase is finalized, the block's snapshot is guaranteed to already be
   271  	// accessible in the protocol state at this point (even though the Badger
   272  	// transaction finalizing the block has not been committed yet).
   273  	nextDKG, err := e.State.AtBlockID(firstBlock.ID()).Epochs().Next().DKG()
   274  	if err != nil {
   275  		// CAUTION: this should never happen, indicates a storage failure or corruption
   276  		log.Fatal().Err(err).Msg("checking beacon key consistency: could not retrieve next DKG info")
   277  		return
   278  	}
   279  
   280  	myBeaconPrivKey, err := e.dkgState.RetrieveMyBeaconPrivateKey(nextEpochCounter)
   281  	if errors.Is(err, storage.ErrNotFound) {
   282  		log.Warn().Msg("checking beacon key consistency: no key found")
   283  		err := e.dkgState.SetDKGEndState(nextEpochCounter, flow.DKGEndStateNoKey)
   284  		if err != nil {
   285  			log.Fatal().Err(err).Msg("failed to set dkg end state")
   286  		}
   287  		return
   288  	} else if err != nil {
   289  		log.Fatal().Err(err).Msg("checking beacon key consistency: could not retrieve beacon private key for next epoch")
   290  		return
   291  	}
   292  
   293  	nextDKGPubKey, err := nextDKG.KeyShare(e.me.NodeID())
   294  	if err != nil {
   295  		log.Fatal().Err(err).Msg("checking beacon key consistency: could not retrieve my beacon public key for next epoch")
   296  		return
   297  	}
   298  	localPubKey := myBeaconPrivKey.PublicKey()
   299  
   300  	// we computed a local beacon key but it is inconsistent with our canonical
   301  	// public key - therefore it is unsafe for use
   302  	if !nextDKGPubKey.Equals(localPubKey) {
   303  		log.Warn().
   304  			Str("computed_beacon_pub_key", localPubKey.String()).
   305  			Str("canonical_beacon_pub_key", nextDKGPubKey.String()).
   306  			Msg("checking beacon key consistency: locally computed beacon public key does not match beacon public key for next epoch")
   307  		err := e.dkgState.SetDKGEndState(nextEpochCounter, flow.DKGEndStateInconsistentKey)
   308  		if err != nil {
   309  			log.Fatal().Err(err).Msg("failed to set dkg end state")
   310  		}
   311  		return
   312  	}
   313  
   314  	err = e.dkgState.SetDKGEndState(nextEpochCounter, flow.DKGEndStateSuccess)
   315  	if err != nil {
   316  		e.log.Fatal().Err(err).Msg("failed to set dkg")
   317  	}
   318  	log.Info().Msgf("successfully ended DKG, my beacon pub key for epoch %d is %s", nextEpochCounter, localPubKey)
   319  }
   320  
   321  func (e *ReactorEngine) getDKGInfo(firstBlockID flow.Identifier) (*dkgInfo, error) {
   322  	currEpoch := e.State.AtBlockID(firstBlockID).Epochs().Current()
   323  	nextEpoch := e.State.AtBlockID(firstBlockID).Epochs().Next()
   324  
   325  	identities, err := nextEpoch.InitialIdentities()
   326  	if err != nil {
   327  		return nil, fmt.Errorf("could not retrieve epoch identities: %w", err)
   328  	}
   329  	phase1Final, phase2Final, phase3Final, err := protocol.DKGPhaseViews(currEpoch)
   330  	if err != nil {
   331  		return nil, fmt.Errorf("could not retrieve epoch dkg final views: %w", err)
   332  	}
   333  	seed := make([]byte, crypto.SeedMinLenDKG)
   334  	_, err = rand.Read(seed)
   335  	if err != nil {
   336  		return nil, fmt.Errorf("could not generate random seed: %w", err)
   337  	}
   338  
   339  	info := &dkgInfo{
   340  		identities:      identities,
   341  		phase1FinalView: phase1Final,
   342  		phase2FinalView: phase2Final,
   343  		phase3FinalView: phase3Final,
   344  		seed:            seed,
   345  	}
   346  	return info, nil
   347  }
   348  
   349  // registerPoll instructs the engine to query the DKG smart-contract for new
   350  // broadcast messages at the specified view.
   351  func (e *ReactorEngine) registerPoll(view uint64) {
   352  	e.viewEvents.OnView(view, func(header *flow.Header) {
   353  		e.unit.Launch(func() {
   354  			e.unit.Lock()
   355  			defer e.unit.Unlock()
   356  
   357  			blockID := header.ID()
   358  			log := e.log.With().
   359  				Uint64("view", view).
   360  				Uint64("height", header.Height).
   361  				Hex("block_id", blockID[:]).
   362  				Logger()
   363  
   364  			log.Info().Msg("polling DKG smart-contract...")
   365  			err := e.controller.Poll(header.ID())
   366  			if err != nil {
   367  				log.Err(err).Msg("failed to poll DKG smart-contract")
   368  			}
   369  		})
   370  	})
   371  }
   372  
   373  // registerPhaseTransition instructs the engine to change phases at the
   374  // specified view.
   375  func (e *ReactorEngine) registerPhaseTransition(view uint64, fromState dkgmodule.State, phaseTransition func() error) {
   376  	e.viewEvents.OnView(view, func(header *flow.Header) {
   377  		e.unit.Launch(func() {
   378  			e.unit.Lock()
   379  			defer e.unit.Unlock()
   380  
   381  			blockID := header.ID()
   382  			log := e.log.With().
   383  				Uint64("view", view).
   384  				Hex("block_id", blockID[:]).
   385  				Logger()
   386  
   387  			log.Info().Msgf("ending %s...", fromState)
   388  			err := phaseTransition()
   389  			if err != nil {
   390  				log.Fatal().Err(err).Msgf("node failed to end %s", fromState)
   391  			}
   392  			log.Info().Msgf("ended %s successfully", fromState)
   393  		})
   394  	})
   395  }
   396  
   397  // end returns a callback that is used to end the DKG protocol, save the
   398  // resulting private key to storage, and publish the other results to the DKG
   399  // smart-contract.
   400  func (e *ReactorEngine) end(nextEpochCounter uint64) func() error {
   401  	return func() error {
   402  
   403  		err := e.controller.End()
   404  		if crypto.IsDKGFailureError(err) {
   405  			e.log.Warn().Err(err).Msgf("node %s with index %d failed DKG locally", e.me.NodeID(), e.controller.GetIndex())
   406  			err := e.dkgState.SetDKGEndState(nextEpochCounter, flow.DKGEndStateDKGFailure)
   407  			if err != nil {
   408  				return fmt.Errorf("failed to set dkg end state following dkg end error: %w", err)
   409  			}
   410  		} else if err != nil {
   411  			return fmt.Errorf("unknown error ending the dkg: %w", err)
   412  		}
   413  
   414  		privateShare, _, _ := e.controller.GetArtifacts()
   415  		if privateShare != nil {
   416  			// we only store our key if one was computed
   417  			err = e.dkgState.InsertMyBeaconPrivateKey(nextEpochCounter, privateShare)
   418  			if err != nil {
   419  				return fmt.Errorf("could not save beacon private key in db: %w", err)
   420  			}
   421  		}
   422  
   423  		err = e.controller.SubmitResult()
   424  		if err != nil {
   425  			return fmt.Errorf("couldn't publish DKG results: %w", err)
   426  		}
   427  
   428  		return nil
   429  	}
   430  }