github.com/koko1123/flow-go-1@v0.29.6/engine/consensus/dkg/messaging_engine.go (about) 1 package dkg 2 3 import ( 4 "context" 5 "fmt" 6 "time" 7 8 "github.com/rs/zerolog" 9 "github.com/sethvargo/go-retry" 10 11 "github.com/koko1123/flow-go-1/engine" 12 "github.com/koko1123/flow-go-1/model/flow" 13 msg "github.com/koko1123/flow-go-1/model/messages" 14 "github.com/koko1123/flow-go-1/module" 15 "github.com/koko1123/flow-go-1/module/dkg" 16 "github.com/koko1123/flow-go-1/network" 17 "github.com/koko1123/flow-go-1/network/channels" 18 ) 19 20 // retryMax is the maximum number of times the engine will attempt to forward 21 // a message before permanently giving up. 22 const retryMax = 9 23 24 // retryBaseWait is the duration to wait between the two first tries. 25 // With 9 attempts and exponential backoff, this will retry for about 26 // 8m before giving up. 27 const retryBaseWait = 1 * time.Second 28 29 // retryJitterPct is the percent jitter to add to each inter-retry wait. 30 const retryJitterPct = 25 31 32 // MessagingEngine is a network engine that enables DKG nodes to exchange 33 // private messages over the network. 34 type MessagingEngine struct { 35 unit *engine.Unit 36 log zerolog.Logger 37 me module.Local // local object to identify the node 38 conduit network.Conduit // network conduit for sending and receiving private messages 39 tunnel *dkg.BrokerTunnel // tunnel for relaying private messages to and from controllers 40 } 41 42 // NewMessagingEngine returns a new engine. 43 func NewMessagingEngine( 44 logger zerolog.Logger, 45 net network.Network, 46 me module.Local, 47 tunnel *dkg.BrokerTunnel) (*MessagingEngine, error) { 48 49 log := logger.With().Str("engine", "dkg-processor").Logger() 50 51 eng := MessagingEngine{ 52 unit: engine.NewUnit(), 53 log: log, 54 me: me, 55 tunnel: tunnel, 56 } 57 58 var err error 59 eng.conduit, err = net.Register(channels.DKGCommittee, &eng) 60 if err != nil { 61 return nil, fmt.Errorf("could not register dkg network engine: %w", err) 62 } 63 64 eng.unit.Launch(eng.forwardOutgoingMessages) 65 66 return &eng, nil 67 } 68 69 // Ready implements the module ReadyDoneAware interface. It returns a channel 70 // that will close when the engine has successfully 71 // started. 72 func (e *MessagingEngine) Ready() <-chan struct{} { 73 return e.unit.Ready() 74 } 75 76 // Done implements the module ReadyDoneAware interface. It returns a channel 77 // that will close when the engine has successfully stopped. 78 func (e *MessagingEngine) Done() <-chan struct{} { 79 return e.unit.Done() 80 } 81 82 // SubmitLocal implements the network Engine interface 83 func (e *MessagingEngine) SubmitLocal(event interface{}) { 84 e.unit.Launch(func() { 85 err := e.process(e.me.NodeID(), event) 86 if err != nil { 87 e.log.Fatal().Err(err).Str("origin", e.me.NodeID().String()).Msg("failed to submit local message") 88 } 89 }) 90 } 91 92 // Submit implements the network Engine interface 93 func (e *MessagingEngine) Submit(_ channels.Channel, originID flow.Identifier, event interface{}) { 94 e.unit.Launch(func() { 95 err := e.process(originID, event) 96 if engine.IsInvalidInputError(err) { 97 e.log.Error().Err(err).Str("origin", originID.String()).Msg("failed to submit dropping invalid input message") 98 } else if err != nil { 99 e.log.Fatal().Err(err).Str("origin", originID.String()).Msg("failed to submit message unknown error") 100 } 101 }) 102 } 103 104 // ProcessLocal implements the network Engine interface 105 func (e *MessagingEngine) ProcessLocal(event interface{}) error { 106 return e.unit.Do(func() error { 107 err := e.process(e.me.NodeID(), event) 108 if err != nil { 109 e.log.Fatal().Err(err).Str("origin", e.me.NodeID().String()).Msg("failed to process local message") 110 } 111 112 return nil 113 }) 114 } 115 116 // Process implements the network Engine interface 117 func (e *MessagingEngine) Process(_ channels.Channel, originID flow.Identifier, event interface{}) error { 118 return e.unit.Do(func() error { 119 return e.process(originID, event) 120 }) 121 } 122 123 func (e *MessagingEngine) process(originID flow.Identifier, event interface{}) error { 124 switch v := event.(type) { 125 case *msg.DKGMessage: 126 // messages are forwarded async rather than sync, because otherwise the message queue 127 // might get full when it's slow to process DKG messages synchronously and impact 128 // block rate. 129 e.forwardInboundMessageAsync(originID, v) 130 return nil 131 default: 132 return engine.NewInvalidInputErrorf("expecting input with type msg.DKGMessage, but got %T", event) 133 } 134 } 135 136 // forwardInboundMessageAsync forwards a private DKG message from another DKG 137 // participant to the DKG controller. 138 func (e *MessagingEngine) forwardInboundMessageAsync(originID flow.Identifier, message *msg.DKGMessage) { 139 e.unit.Launch(func() { 140 e.tunnel.SendIn( 141 msg.PrivDKGMessageIn{ 142 DKGMessage: *message, 143 OriginID: originID, 144 }, 145 ) 146 }) 147 } 148 149 func (e *MessagingEngine) forwardOutgoingMessages() { 150 for { 151 select { 152 case msg := <-e.tunnel.MsgChOut: 153 e.forwardOutboundMessageAsync(msg) 154 case <-e.unit.Quit(): 155 return 156 } 157 } 158 } 159 160 // forwardOutboundMessageAsync asynchronously attempts to forward a private 161 // DKG message to a single other DKG participant, on a best effort basis. 162 func (e *MessagingEngine) forwardOutboundMessageAsync(message msg.PrivDKGMessageOut) { 163 e.unit.Launch(func() { 164 backoff := retry.NewExponential(retryBaseWait) 165 backoff = retry.WithMaxRetries(retryMax, backoff) 166 backoff = retry.WithJitterPercent(retryJitterPct, backoff) 167 168 attempts := 1 169 err := retry.Do(e.unit.Ctx(), backoff, func(ctx context.Context) error { 170 err := e.conduit.Unicast(&message.DKGMessage, message.DestID) 171 if err != nil { 172 e.log.Warn().Err(err).Msgf("error sending dkg message retrying (%d)", attempts) 173 } 174 175 attempts++ 176 return retry.RetryableError(err) 177 }) 178 179 // Various network conditions can result in errors while forwarding outbound messages. 180 // Because the overall DKG is resilient to individual message failures most of time. 181 // it is acceptable to log the error and move on. 182 if err != nil { 183 e.log.Error().Err(err).Msgf("error sending private dkg message after %d attempts", attempts) 184 } 185 }) 186 }