github.com/koko1123/flow-go-1@v0.29.6/engine/consensus/dkg/reactor_engine.go (about) 1 package dkg 2 3 import ( 4 "crypto/rand" 5 "errors" 6 "fmt" 7 8 "github.com/rs/zerolog" 9 10 "github.com/koko1123/flow-go-1/engine" 11 "github.com/koko1123/flow-go-1/model/flow" 12 "github.com/koko1123/flow-go-1/model/flow/filter" 13 "github.com/koko1123/flow-go-1/module" 14 dkgmodule "github.com/koko1123/flow-go-1/module/dkg" 15 "github.com/koko1123/flow-go-1/state/protocol" 16 "github.com/koko1123/flow-go-1/state/protocol/events" 17 "github.com/koko1123/flow-go-1/storage" 18 "github.com/onflow/flow-go/crypto" 19 ) 20 21 // DefaultPollStep specifies the default number of views that separate two calls 22 // to the DKG smart-contract to read broadcast messages. 23 const DefaultPollStep = 10 24 25 // dkgInfo consolidates information about the current DKG protocol instance. 26 type dkgInfo struct { 27 identities flow.IdentityList 28 phase1FinalView uint64 29 phase2FinalView uint64 30 phase3FinalView uint64 31 // seed must be generated for each DKG instance, using a randomness source that is independent from all other nodes. 32 seed []byte 33 } 34 35 // ReactorEngine is an engine that reacts to chain events to start new DKG runs, 36 // and manage subsequent phase transitions. Any unexpected error triggers a 37 // panic as it would undermine the security of the protocol. 38 type ReactorEngine struct { 39 events.Noop 40 unit *engine.Unit 41 log zerolog.Logger 42 me module.Local 43 State protocol.State 44 dkgState storage.DKGState 45 controller module.DKGController 46 controllerFactory module.DKGControllerFactory 47 viewEvents events.Views 48 pollStep uint64 49 } 50 51 // NewReactorEngine return a new ReactorEngine. 52 func NewReactorEngine( 53 log zerolog.Logger, 54 me module.Local, 55 state protocol.State, 56 dkgState storage.DKGState, 57 controllerFactory module.DKGControllerFactory, 58 viewEvents events.Views, 59 ) *ReactorEngine { 60 61 logger := log.With(). 62 Str("engine", "dkg_reactor"). 63 Logger() 64 65 return &ReactorEngine{ 66 unit: engine.NewUnit(), 67 log: logger, 68 me: me, 69 State: state, 70 dkgState: dkgState, 71 controllerFactory: controllerFactory, 72 viewEvents: viewEvents, 73 pollStep: DefaultPollStep, 74 } 75 } 76 77 // Ready implements the module ReadyDoneAware interface. It returns a channel 78 // that will close when the engine has successfully 79 // started. 80 func (e *ReactorEngine) Ready() <-chan struct{} { 81 return e.unit.Ready(func() { 82 // If we are starting up in the EpochSetup phase, try to start the DKG. 83 // If the DKG for this epoch has been started previously, we will exit 84 // and fail this epoch's DKG. 85 snap := e.State.Final() 86 87 phase, err := snap.Phase() 88 if err != nil { 89 // unexpected storage-level error 90 e.log.Fatal().Err(err).Msg("failed to check epoch phase when starting DKG reactor engine") 91 return 92 } 93 if phase != flow.EpochPhaseSetup { 94 // start up in a non-setup phase - this is the typical path 95 return 96 } 97 98 currentCounter, err := snap.Epochs().Current().Counter() 99 if err != nil { 100 // unexpected storage-level error 101 e.log.Fatal().Err(err).Msg("failed to retrieve current epoch counter when starting DKG reactor engine") 102 return 103 } 104 first, err := snap.Head() 105 if err != nil { 106 // unexpected storage-level error 107 e.log.Fatal().Err(err).Msg("failed to retrieve finalized header when starting DKG reactor engine") 108 return 109 } 110 111 e.startDKGForEpoch(currentCounter, first) 112 }) 113 } 114 115 // Done implements the module ReadyDoneAware interface. It returns a channel 116 // that will close when the engine has successfully stopped. 117 func (e *ReactorEngine) Done() <-chan struct{} { 118 return e.unit.Done() 119 } 120 121 // EpochSetupPhaseStarted handles the EpochSetupPhaseStarted protocol event by 122 // starting the DKG process. 123 func (e *ReactorEngine) EpochSetupPhaseStarted(currentEpochCounter uint64, first *flow.Header) { 124 e.startDKGForEpoch(currentEpochCounter, first) 125 } 126 127 // EpochCommittedPhaseStarted handles the EpochCommittedPhaseStarted protocol 128 // event by checking the consistency of our locally computed key share. 129 func (e *ReactorEngine) EpochCommittedPhaseStarted(currentEpochCounter uint64, first *flow.Header) { 130 e.handleEpochCommittedPhaseStarted(currentEpochCounter, first) 131 } 132 133 // startDKGForEpoch starts the DKG instance for the given epoch, only if we have 134 // never started the DKG during setup phase for the given epoch. This allows consensus nodes which 135 // boot from a state snapshot within the EpochSetup phase to run the DKG. 136 // 137 // It starts a new controller for the epoch and registers the triggers to regularly 138 // query the DKG smart-contract and transition between phases at the specified views. 139 func (e *ReactorEngine) startDKGForEpoch(currentEpochCounter uint64, first *flow.Header) { 140 141 firstID := first.ID() 142 nextEpochCounter := currentEpochCounter + 1 143 log := e.log.With(). 144 Uint64("cur_epoch", currentEpochCounter). // the epoch we are in the middle of 145 Uint64("next_epoch", nextEpochCounter). // the epoch we are running the DKG for 146 Uint64("first_block_view", first.View). // view of first block in EpochSetup phase 147 Hex("first_block_id", firstID[:]). // id of first block in EpochSetup phase 148 Logger() 149 150 // if we have started the dkg for this epoch already, exit 151 started, err := e.dkgState.GetDKGStarted(nextEpochCounter) 152 if err != nil { 153 // unexpected storage-level error 154 log.Fatal().Err(err).Msg("could not check whether DKG is started") 155 } 156 if started { 157 log.Warn().Msg("DKG started before, skipping starting the DKG for this epoch") 158 return 159 } 160 161 // flag that we are starting the dkg for this epoch 162 err = e.dkgState.SetDKGStarted(nextEpochCounter) 163 if err != nil { 164 // unexpected storage-level error 165 log.Fatal().Err(err).Msg("could not set dkg started") 166 } 167 168 curDKGInfo, err := e.getDKGInfo(firstID) 169 if err != nil { 170 // unexpected storage-level error 171 log.Fatal().Err(err).Msg("could not retrieve epoch info") 172 } 173 174 committee := curDKGInfo.identities.Filter(filter.IsVotingConsensusCommitteeMember) 175 176 log.Info(). 177 Uint64("phase1", curDKGInfo.phase1FinalView). 178 Uint64("phase2", curDKGInfo.phase2FinalView). 179 Uint64("phase3", curDKGInfo.phase3FinalView). 180 Interface("members", committee.NodeIDs()). 181 Msg("epoch info") 182 183 if _, ok := committee.GetIndex(e.me.NodeID()); !ok { 184 // node not found in DKG committee bypass starting the DKG 185 log.Warn().Str("node_id", e.me.NodeID().String()).Msg("failed to find our node ID in the DKG committee skip starting DKG engine, this node will not participate in consensus after the next epoch starts") 186 return 187 } 188 controller, err := e.controllerFactory.Create( 189 dkgmodule.CanonicalInstanceID(first.ChainID, nextEpochCounter), 190 committee, 191 curDKGInfo.seed, 192 ) 193 if err != nil { 194 // no expected errors in controller factory 195 log.Fatal().Err(err).Msg("could not create DKG controller") 196 } 197 e.controller = controller 198 199 e.unit.Launch(func() { 200 log.Info().Msg("DKG Run") 201 err := e.controller.Run() 202 if err != nil { 203 // TODO handle crypto sentinels and do not crash here 204 log.Fatal().Err(err).Msg("DKG Run error") 205 } 206 }) 207 208 // NOTE: 209 // We register two callbacks for views that mark a state transition: one for 210 // polling broadcast messages, and one for triggering the phase transition. 211 // It is essential that all polled broadcast messages are processed before 212 // starting the phase transition. Here we register the polling callback 213 // before the phase transition, which guarantees that it will be called 214 // before because callbacks for the same views are executed on a FIFO basis. 215 // Moreover, the poll callback does not return until all received messages 216 // are processed by the underlying DKG controller (as guaranteed by the 217 // specifications and implementations of the DKGBroker and DKGController 218 // interfaces). 219 220 for view := curDKGInfo.phase1FinalView; view > first.View; view -= e.pollStep { 221 e.registerPoll(view) 222 } 223 e.registerPhaseTransition(curDKGInfo.phase1FinalView, dkgmodule.Phase1, e.controller.EndPhase1) 224 225 for view := curDKGInfo.phase2FinalView; view > curDKGInfo.phase1FinalView; view -= e.pollStep { 226 e.registerPoll(view) 227 } 228 e.registerPhaseTransition(curDKGInfo.phase2FinalView, dkgmodule.Phase2, e.controller.EndPhase2) 229 230 for view := curDKGInfo.phase3FinalView; view > curDKGInfo.phase2FinalView; view -= e.pollStep { 231 e.registerPoll(view) 232 } 233 e.registerPhaseTransition(curDKGInfo.phase3FinalView, dkgmodule.Phase3, e.end(nextEpochCounter)) 234 } 235 236 // handleEpochCommittedPhaseStarted is invoked upon the transition to the EpochCommitted 237 // phase, when the canonical beacon key vector is incorporated into the protocol state. 238 // 239 // This function checks that the local DKG completed and that our locally computed 240 // key share is consistent with the canonical key vector. When this function returns, 241 // an end state for the just-completed DKG is guaranteed to be stored (if not, the 242 // program will crash). Since this function is invoked synchronously before the end 243 // of the current epoch, this guarantees that when we reach the end of the current epoch 244 // we will either have a usable beacon key (successful DKG) or a DKG failure end state 245 // stored, so we can safely fall back to using our staking key. 246 // 247 // CAUTION: This function is not safe for concurrent use. This is not enforced within 248 // the ReactorEngine - instead we rely on the protocol event emission being single-threaded 249 func (e *ReactorEngine) handleEpochCommittedPhaseStarted(currentEpochCounter uint64, firstBlock *flow.Header) { 250 251 // the DKG we have just completed produces keys that we will use in the next epoch 252 nextEpochCounter := currentEpochCounter + 1 253 254 log := e.log.With(). 255 Uint64("cur_epoch", currentEpochCounter). // the epoch we are in the middle of 256 Uint64("next_epoch", nextEpochCounter). // the epoch the just-finished DKG was preparing for 257 Logger() 258 259 // Check whether we have already set the end state for this DKG. 260 // This can happen if the DKG failed locally, if we failed to generate 261 // a local private beacon key, or if we crashed while performing this 262 // check previously. 263 endState, err := e.dkgState.GetDKGEndState(nextEpochCounter) 264 if err == nil { 265 log.Warn().Msgf("checking beacon key consistency: exiting because dkg end state was already set: %s", endState.String()) 266 return 267 } 268 269 // Since epoch phase transitions are emitted when the first block of the new 270 // phase is finalized, the block's snapshot is guaranteed to already be 271 // accessible in the protocol state at this point (even though the Badger 272 // transaction finalizing the block has not been committed yet). 273 nextDKG, err := e.State.AtBlockID(firstBlock.ID()).Epochs().Next().DKG() 274 if err != nil { 275 // CAUTION: this should never happen, indicates a storage failure or corruption 276 log.Fatal().Err(err).Msg("checking beacon key consistency: could not retrieve next DKG info") 277 return 278 } 279 280 myBeaconPrivKey, err := e.dkgState.RetrieveMyBeaconPrivateKey(nextEpochCounter) 281 if errors.Is(err, storage.ErrNotFound) { 282 log.Warn().Msg("checking beacon key consistency: no key found") 283 err := e.dkgState.SetDKGEndState(nextEpochCounter, flow.DKGEndStateNoKey) 284 if err != nil { 285 log.Fatal().Err(err).Msg("failed to set dkg end state") 286 } 287 return 288 } else if err != nil { 289 log.Fatal().Err(err).Msg("checking beacon key consistency: could not retrieve beacon private key for next epoch") 290 return 291 } 292 293 nextDKGPubKey, err := nextDKG.KeyShare(e.me.NodeID()) 294 if err != nil { 295 log.Fatal().Err(err).Msg("checking beacon key consistency: could not retrieve my beacon public key for next epoch") 296 return 297 } 298 localPubKey := myBeaconPrivKey.PublicKey() 299 300 // we computed a local beacon key but it is inconsistent with our canonical 301 // public key - therefore it is unsafe for use 302 if !nextDKGPubKey.Equals(localPubKey) { 303 log.Warn(). 304 Str("computed_beacon_pub_key", localPubKey.String()). 305 Str("canonical_beacon_pub_key", nextDKGPubKey.String()). 306 Msg("checking beacon key consistency: locally computed beacon public key does not match beacon public key for next epoch") 307 err := e.dkgState.SetDKGEndState(nextEpochCounter, flow.DKGEndStateInconsistentKey) 308 if err != nil { 309 log.Fatal().Err(err).Msg("failed to set dkg end state") 310 } 311 return 312 } 313 314 err = e.dkgState.SetDKGEndState(nextEpochCounter, flow.DKGEndStateSuccess) 315 if err != nil { 316 e.log.Fatal().Err(err).Msg("failed to set dkg") 317 } 318 log.Info().Msgf("successfully ended DKG, my beacon pub key for epoch %d is %s", nextEpochCounter, localPubKey) 319 } 320 321 func (e *ReactorEngine) getDKGInfo(firstBlockID flow.Identifier) (*dkgInfo, error) { 322 currEpoch := e.State.AtBlockID(firstBlockID).Epochs().Current() 323 nextEpoch := e.State.AtBlockID(firstBlockID).Epochs().Next() 324 325 identities, err := nextEpoch.InitialIdentities() 326 if err != nil { 327 return nil, fmt.Errorf("could not retrieve epoch identities: %w", err) 328 } 329 phase1Final, phase2Final, phase3Final, err := protocol.DKGPhaseViews(currEpoch) 330 if err != nil { 331 return nil, fmt.Errorf("could not retrieve epoch dkg final views: %w", err) 332 } 333 seed := make([]byte, crypto.SeedMinLenDKG) 334 _, err = rand.Read(seed) 335 if err != nil { 336 return nil, fmt.Errorf("could not generate random seed: %w", err) 337 } 338 339 info := &dkgInfo{ 340 identities: identities, 341 phase1FinalView: phase1Final, 342 phase2FinalView: phase2Final, 343 phase3FinalView: phase3Final, 344 seed: seed, 345 } 346 return info, nil 347 } 348 349 // registerPoll instructs the engine to query the DKG smart-contract for new 350 // broadcast messages at the specified view. 351 func (e *ReactorEngine) registerPoll(view uint64) { 352 e.viewEvents.OnView(view, func(header *flow.Header) { 353 e.unit.Launch(func() { 354 e.unit.Lock() 355 defer e.unit.Unlock() 356 357 blockID := header.ID() 358 log := e.log.With(). 359 Uint64("view", view). 360 Uint64("height", header.Height). 361 Hex("block_id", blockID[:]). 362 Logger() 363 364 log.Info().Msg("polling DKG smart-contract...") 365 err := e.controller.Poll(header.ID()) 366 if err != nil { 367 log.Err(err).Msg("failed to poll DKG smart-contract") 368 } 369 }) 370 }) 371 } 372 373 // registerPhaseTransition instructs the engine to change phases at the 374 // specified view. 375 func (e *ReactorEngine) registerPhaseTransition(view uint64, fromState dkgmodule.State, phaseTransition func() error) { 376 e.viewEvents.OnView(view, func(header *flow.Header) { 377 e.unit.Launch(func() { 378 e.unit.Lock() 379 defer e.unit.Unlock() 380 381 blockID := header.ID() 382 log := e.log.With(). 383 Uint64("view", view). 384 Hex("block_id", blockID[:]). 385 Logger() 386 387 log.Info().Msgf("ending %s...", fromState) 388 err := phaseTransition() 389 if err != nil { 390 log.Fatal().Err(err).Msgf("node failed to end %s", fromState) 391 } 392 log.Info().Msgf("ended %s successfully", fromState) 393 }) 394 }) 395 } 396 397 // end returns a callback that is used to end the DKG protocol, save the 398 // resulting private key to storage, and publish the other results to the DKG 399 // smart-contract. 400 func (e *ReactorEngine) end(nextEpochCounter uint64) func() error { 401 return func() error { 402 403 err := e.controller.End() 404 if crypto.IsDKGFailureError(err) { 405 e.log.Warn().Err(err).Msgf("node %s with index %d failed DKG locally", e.me.NodeID(), e.controller.GetIndex()) 406 err := e.dkgState.SetDKGEndState(nextEpochCounter, flow.DKGEndStateDKGFailure) 407 if err != nil { 408 return fmt.Errorf("failed to set dkg end state following dkg end error: %w", err) 409 } 410 } else if err != nil { 411 return fmt.Errorf("unknown error ending the dkg: %w", err) 412 } 413 414 privateShare, _, _ := e.controller.GetArtifacts() 415 if privateShare != nil { 416 // we only store our key if one was computed 417 err = e.dkgState.InsertMyBeaconPrivateKey(nextEpochCounter, privateShare) 418 if err != nil { 419 return fmt.Errorf("could not save beacon private key in db: %w", err) 420 } 421 } 422 423 err = e.controller.SubmitResult() 424 if err != nil { 425 return fmt.Errorf("couldn't publish DKG results: %w", err) 426 } 427 428 return nil 429 } 430 }