github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/consensus/dkg/messaging_engine.go (about) 1 package dkg 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "time" 8 9 "github.com/rs/zerolog" 10 "github.com/sethvargo/go-retry" 11 12 "github.com/onflow/flow-go/engine" 13 "github.com/onflow/flow-go/engine/common/fifoqueue" 14 "github.com/onflow/flow-go/model/flow" 15 msg "github.com/onflow/flow-go/model/messages" 16 "github.com/onflow/flow-go/module" 17 "github.com/onflow/flow-go/module/component" 18 "github.com/onflow/flow-go/module/dkg" 19 "github.com/onflow/flow-go/module/irrecoverable" 20 "github.com/onflow/flow-go/module/metrics" 21 "github.com/onflow/flow-go/network" 22 "github.com/onflow/flow-go/network/channels" 23 "github.com/onflow/flow-go/utils/logging" 24 ) 25 26 // MessagingEngineConfig configures outbound message submission. 27 type MessagingEngineConfig struct { 28 // RetryMax is the maximum number of times the engine will attempt to send 29 // an outbound message before permanently giving up. 30 RetryMax uint64 31 // RetryBaseWait is the duration to wait between the two first send attempts. 32 RetryBaseWait time.Duration 33 // RetryJitterPercent is the percent jitter to add to each inter-retry wait. 34 RetryJitterPercent uint64 35 } 36 37 // DefaultMessagingEngineConfig returns the config defaults. With 9 attempts and 38 // exponential backoff, this will retry for about 8m before giving up. 39 func DefaultMessagingEngineConfig() MessagingEngineConfig { 40 return MessagingEngineConfig{ 41 RetryMax: 9, 42 RetryBaseWait: time.Second, 43 RetryJitterPercent: 25, 44 } 45 } 46 47 // MessagingEngine is an engine which sends and receives all DKG private messages. 48 // The same engine instance is used for the lifetime of a node and will be used 49 // for different DKG instances. The ReactorEngine is responsible for the lifecycle 50 // of components which are scoped one DKG instance, for example the DKGController. 51 // The dkg.BrokerTunnel handles routing messages to/from the current DKG instance. 52 type MessagingEngine struct { 53 log zerolog.Logger 54 me module.Local // local object to identify the node 55 conduit network.Conduit // network conduit for sending and receiving private messages 56 tunnel *dkg.BrokerTunnel // tunnel for relaying private messages to and from controllers 57 config MessagingEngineConfig // config for outbound message transmission 58 59 messageHandler *engine.MessageHandler // encapsulates enqueueing messages from network 60 notifier engine.Notifier // notifies inbound messages available for forwarding 61 inbound *fifoqueue.FifoQueue // messages from the network, to be processed by DKG Controller 62 63 component.Component 64 cm *component.ComponentManager 65 } 66 67 var _ network.MessageProcessor = (*MessagingEngine)(nil) 68 var _ component.Component = (*MessagingEngine)(nil) 69 70 // NewMessagingEngine returns a new MessagingEngine. 71 func NewMessagingEngine( 72 log zerolog.Logger, 73 net network.EngineRegistry, 74 me module.Local, 75 tunnel *dkg.BrokerTunnel, 76 collector module.MempoolMetrics, 77 config MessagingEngineConfig, 78 ) (*MessagingEngine, error) { 79 log = log.With().Str("engine", "dkg_messaging").Logger() 80 81 inbound, err := fifoqueue.NewFifoQueue( 82 1000, 83 fifoqueue.WithLengthMetricObserver(metrics.ResourceDKGMessage, collector.MempoolEntries)) 84 if err != nil { 85 return nil, fmt.Errorf("could not create inbound fifoqueue: %w", err) 86 } 87 88 notifier := engine.NewNotifier() 89 messageHandler := engine.NewMessageHandler(log, notifier, engine.Pattern{ 90 Match: engine.MatchType[*msg.DKGMessage], 91 Store: &engine.FifoMessageStore{FifoQueue: inbound}, 92 }) 93 94 eng := MessagingEngine{ 95 log: log, 96 me: me, 97 tunnel: tunnel, 98 messageHandler: messageHandler, 99 notifier: notifier, 100 inbound: inbound, 101 config: config, 102 } 103 104 conduit, err := net.Register(channels.DKGCommittee, &eng) 105 if err != nil { 106 return nil, fmt.Errorf("could not register dkg network engine: %w", err) 107 } 108 eng.conduit = conduit 109 110 eng.cm = component.NewComponentManagerBuilder(). 111 AddWorker(eng.forwardInboundMessagesWorker). 112 AddWorker(eng.forwardOutboundMessagesWorker). 113 Build() 114 eng.Component = eng.cm 115 116 return &eng, nil 117 } 118 119 // Process processes messages from the networking layer. 120 // No errors are expected during normal operation. 121 func (e *MessagingEngine) Process(channel channels.Channel, originID flow.Identifier, message any) error { 122 err := e.messageHandler.Process(originID, message) 123 if err != nil { 124 if errors.Is(err, engine.IncompatibleInputTypeError) { 125 e.log.Warn().Bool(logging.KeySuspicious, true).Msgf("%v delivered unsupported message %T through %v", originID, message, channel) 126 return nil 127 } 128 return fmt.Errorf("unexpected failure to process inbound dkg message: %w", err) 129 } 130 return nil 131 } 132 133 // forwardInboundMessagesWorker reads queued inbound messages and forwards them 134 // through the broker tunnel to the DKG Controller for processing. 135 // This is a worker routine which runs for the lifetime of the engine. 136 func (e *MessagingEngine) forwardInboundMessagesWorker(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 137 ready() 138 139 done := ctx.Done() 140 wake := e.notifier.Channel() 141 for { 142 select { 143 case <-done: 144 return 145 case <-wake: 146 e.forwardInboundMessagesWhileAvailable(ctx) 147 } 148 } 149 } 150 151 // popNextInboundMessage pops one message from the queue and returns it as the 152 // appropriate type expected by the DKG controller. 153 func (e *MessagingEngine) popNextInboundMessage() (msg.PrivDKGMessageIn, bool) { 154 nextMessage, ok := e.inbound.Pop() 155 if !ok { 156 return msg.PrivDKGMessageIn{}, false 157 } 158 asEngineWrapper := nextMessage.(*engine.Message) 159 asDKGMsg := asEngineWrapper.Payload.(*msg.DKGMessage) 160 originID := asEngineWrapper.OriginID 161 162 message := msg.PrivDKGMessageIn{ 163 DKGMessage: *asDKGMsg, 164 OriginID: originID, 165 } 166 return message, true 167 } 168 169 // forwardInboundMessagesWhileAvailable retrieves all inbound messages from the queue and 170 // sends to the DKG Controller over the broker tunnel. Exists when the queue is empty. 171 func (e *MessagingEngine) forwardInboundMessagesWhileAvailable(ctx context.Context) { 172 for { 173 started := time.Now() 174 message, ok := e.popNextInboundMessage() 175 if !ok { 176 return 177 } 178 179 select { 180 case <-ctx.Done(): 181 return 182 case e.tunnel.MsgChIn <- message: 183 e.log.Debug().Dur("waited", time.Since(started)).Msg("forwarded DKG message to Broker") 184 continue 185 } 186 } 187 } 188 189 // forwardOutboundMessagesWorker reads outbound DKG messages created by our DKG Controller 190 // and sends them to the appropriate other DKG participant. Each outbound message is sent 191 // async in an ad-hoc goroutine, which internally manages retry backoff for the message. 192 // This is a worker routine which runs for the lifetime of the engine. 193 func (e *MessagingEngine) forwardOutboundMessagesWorker(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 194 ready() 195 196 done := ctx.Done() 197 for { 198 select { 199 case <-done: 200 return 201 case message := <-e.tunnel.MsgChOut: 202 go e.forwardOutboundMessage(ctx, message) 203 } 204 } 205 } 206 207 // forwardOutboundMessage transmits message to the target DKG participant. 208 // Upon any error from the Unicast, we will retry with an exponential backoff. 209 // After a limited number of attempts, we will log an error and exit. 210 // The DKG protocol tolerates a number of failed private messages - these will 211 // be resolved by broadcasting complaints in later phases. 212 // Must be invoked as a goroutine. 213 func (e *MessagingEngine) forwardOutboundMessage(ctx context.Context, message msg.PrivDKGMessageOut) { 214 backoff := retry.NewExponential(e.config.RetryBaseWait) 215 backoff = retry.WithMaxRetries(e.config.RetryMax, backoff) 216 backoff = retry.WithJitterPercent(e.config.RetryJitterPercent, backoff) 217 218 started := time.Now() 219 log := e.log.With().Str("target", message.DestID.String()).Logger() 220 221 attempts := 0 222 err := retry.Do(ctx, backoff, func(ctx context.Context) error { 223 attempts++ 224 err := e.conduit.Unicast(&message.DKGMessage, message.DestID) 225 // TODO Unicast does not document expected errors, therefore we treat all errors as benign networking failures here 226 if err != nil { 227 log.Warn(). 228 Err(err). 229 Int("attempt", attempts). 230 Dur("send_time", time.Since(started)). 231 Msgf("error sending dkg message on attempt %d - will retry...", attempts) 232 } 233 234 return retry.RetryableError(err) 235 }) 236 237 // TODO Unicast does not document expected errors, therefore we treat all errors as benign networking failures here 238 if err != nil { 239 log.Error(). 240 Err(err). 241 Int("total_attempts", attempts). 242 Dur("total_send_time", time.Since(started)). 243 Msgf("failed to send private dkg message after %d attempts - will not retry", attempts) 244 } 245 }