github.com/koko1123/flow-go-1@v0.29.6/engine/common/synchronization/engine.go (about) 1 // (c) 2019 Dapper Labs - ALL RIGHTS RESERVED 2 3 package synchronization 4 5 import ( 6 "fmt" 7 "math/rand" 8 "time" 9 10 "github.com/hashicorp/go-multierror" 11 "github.com/rs/zerolog" 12 13 "github.com/koko1123/flow-go-1/engine" 14 "github.com/koko1123/flow-go-1/engine/common/fifoqueue" 15 "github.com/koko1123/flow-go-1/model/chainsync" 16 "github.com/koko1123/flow-go-1/model/flow" 17 "github.com/koko1123/flow-go-1/model/messages" 18 "github.com/koko1123/flow-go-1/module" 19 synccore "github.com/koko1123/flow-go-1/module/chainsync" 20 "github.com/koko1123/flow-go-1/module/lifecycle" 21 "github.com/koko1123/flow-go-1/module/metrics" 22 "github.com/koko1123/flow-go-1/network" 23 "github.com/koko1123/flow-go-1/network/channels" 24 "github.com/koko1123/flow-go-1/storage" 25 ) 26 27 // defaultSyncResponseQueueCapacity maximum capacity of sync responses queue 28 const defaultSyncResponseQueueCapacity = 500 29 30 // defaultBlockResponseQueueCapacity maximum capacity of block responses queue 31 const defaultBlockResponseQueueCapacity = 500 32 33 // Engine is the synchronization engine, responsible for synchronizing chain state. 34 type Engine struct { 35 unit *engine.Unit 36 lm *lifecycle.LifecycleManager 37 log zerolog.Logger 38 metrics module.EngineMetrics 39 me module.Local 40 con network.Conduit 41 blocks storage.Blocks 42 comp network.Engine // compliance layer engine 43 44 pollInterval time.Duration 45 scanInterval time.Duration 46 core module.SyncCore 47 participantsProvider module.IdentifierProvider 48 finalizedHeader *FinalizedHeaderCache 49 50 requestHandler *RequestHandler // component responsible for handling requests 51 52 pendingSyncResponses engine.MessageStore // message store for *message.SyncResponse 53 pendingBlockResponses engine.MessageStore // message store for *message.BlockResponse 54 responseMessageHandler *engine.MessageHandler // message handler responsible for response processing 55 } 56 57 // New creates a new main chain synchronization engine. 58 func New( 59 log zerolog.Logger, 60 metrics module.EngineMetrics, 61 net network.Network, 62 me module.Local, 63 blocks storage.Blocks, 64 comp network.Engine, 65 core module.SyncCore, 66 finalizedHeader *FinalizedHeaderCache, 67 participantsProvider module.IdentifierProvider, 68 opts ...OptionFunc, 69 ) (*Engine, error) { 70 71 opt := DefaultConfig() 72 for _, f := range opts { 73 f(opt) 74 } 75 76 if comp == nil { 77 panic("must initialize synchronization engine with comp engine") 78 } 79 80 // initialize the propagation engine with its dependencies 81 e := &Engine{ 82 unit: engine.NewUnit(), 83 lm: lifecycle.NewLifecycleManager(), 84 log: log.With().Str("engine", "synchronization").Logger(), 85 metrics: metrics, 86 me: me, 87 blocks: blocks, 88 comp: comp, 89 core: core, 90 pollInterval: opt.PollInterval, 91 scanInterval: opt.ScanInterval, 92 finalizedHeader: finalizedHeader, 93 participantsProvider: participantsProvider, 94 } 95 96 err := e.setupResponseMessageHandler() 97 if err != nil { 98 return nil, fmt.Errorf("could not setup message handler") 99 } 100 101 // register the engine with the network layer and store the conduit 102 con, err := net.Register(channels.SyncCommittee, e) 103 if err != nil { 104 return nil, fmt.Errorf("could not register engine: %w", err) 105 } 106 e.con = con 107 108 e.requestHandler = NewRequestHandler(log, metrics, NewResponseSender(con), me, blocks, core, finalizedHeader, true) 109 110 return e, nil 111 } 112 113 // setupResponseMessageHandler initializes the inbound queues and the MessageHandler for UNTRUSTED responses. 114 func (e *Engine) setupResponseMessageHandler() error { 115 syncResponseQueue, err := fifoqueue.NewFifoQueue(defaultSyncResponseQueueCapacity) 116 if err != nil { 117 return fmt.Errorf("failed to create queue for sync responses: %w", err) 118 } 119 120 e.pendingSyncResponses = &engine.FifoMessageStore{ 121 FifoQueue: syncResponseQueue, 122 } 123 124 blockResponseQueue, err := fifoqueue.NewFifoQueue(defaultBlockResponseQueueCapacity) 125 if err != nil { 126 return fmt.Errorf("failed to create queue for block responses: %w", err) 127 } 128 129 e.pendingBlockResponses = &engine.FifoMessageStore{ 130 FifoQueue: blockResponseQueue, 131 } 132 133 // define message queueing behaviour 134 e.responseMessageHandler = engine.NewMessageHandler( 135 e.log, 136 engine.NewNotifier(), 137 engine.Pattern{ 138 Match: func(msg *engine.Message) bool { 139 _, ok := msg.Payload.(*messages.SyncResponse) 140 if ok { 141 e.metrics.MessageReceived(metrics.EngineSynchronization, metrics.MessageSyncResponse) 142 } 143 return ok 144 }, 145 Store: e.pendingSyncResponses, 146 }, 147 engine.Pattern{ 148 Match: func(msg *engine.Message) bool { 149 _, ok := msg.Payload.(*messages.BlockResponse) 150 if ok { 151 e.metrics.MessageReceived(metrics.EngineSynchronization, metrics.MessageBlockResponse) 152 } 153 return ok 154 }, 155 Store: e.pendingBlockResponses, 156 }, 157 ) 158 159 return nil 160 } 161 162 // Ready returns a ready channel that is closed once the engine has fully started. 163 func (e *Engine) Ready() <-chan struct{} { 164 e.lm.OnStart(func() { 165 <-e.finalizedHeader.Ready() 166 e.unit.Launch(e.checkLoop) 167 e.unit.Launch(e.responseProcessingLoop) 168 // wait for request handler to startup 169 <-e.requestHandler.Ready() 170 }) 171 return e.lm.Started() 172 } 173 174 // Done returns a done channel that is closed once the engine has fully stopped. 175 func (e *Engine) Done() <-chan struct{} { 176 e.lm.OnStop(func() { 177 // signal the request handler to shutdown 178 requestHandlerDone := e.requestHandler.Done() 179 // wait for request sending and response processing routines to exit 180 <-e.unit.Done() 181 // wait for request handler shutdown to complete 182 <-requestHandlerDone 183 <-e.finalizedHeader.Done() 184 }) 185 return e.lm.Stopped() 186 } 187 188 // SubmitLocal submits an event originating on the local node. 189 func (e *Engine) SubmitLocal(event interface{}) { 190 err := e.process(e.me.NodeID(), event) 191 if err != nil { 192 // receiving an input of incompatible type from a trusted internal component is fatal 193 e.log.Fatal().Err(err).Msg("internal error processing event") 194 } 195 } 196 197 // Submit submits the given event from the node with the given origin ID 198 // for processing in a non-blocking manner. It returns instantly and logs 199 // a potential processing error internally when done. 200 func (e *Engine) Submit(channel channels.Channel, originID flow.Identifier, event interface{}) { 201 err := e.Process(channel, originID, event) 202 if err != nil { 203 e.log.Fatal().Err(err).Msg("internal error processing event") 204 } 205 } 206 207 // ProcessLocal processes an event originating on the local node. 208 func (e *Engine) ProcessLocal(event interface{}) error { 209 return e.process(e.me.NodeID(), event) 210 } 211 212 // Process processes the given event from the node with the given origin ID in 213 // a blocking manner. It returns the potential processing error when done. 214 func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, event interface{}) error { 215 err := e.process(originID, event) 216 if err != nil { 217 if engine.IsIncompatibleInputTypeError(err) { 218 e.log.Warn().Msgf("%v delivered unsupported message %T through %v", originID, event, channel) 219 return nil 220 } 221 return fmt.Errorf("unexpected error while processing engine message: %w", err) 222 } 223 return nil 224 } 225 226 // process processes events for the synchronization engine. 227 // Error returns: 228 // - IncompatibleInputTypeError if input has unexpected type 229 // - All other errors are potential symptoms of internal state corruption or bugs (fatal). 230 func (e *Engine) process(originID flow.Identifier, event interface{}) error { 231 switch event.(type) { 232 case *messages.RangeRequest, *messages.BatchRequest, *messages.SyncRequest: 233 return e.requestHandler.process(originID, event) 234 case *messages.SyncResponse, *messages.BlockResponse: 235 return e.responseMessageHandler.Process(originID, event) 236 default: 237 return fmt.Errorf("received input with type %T from %x: %w", event, originID[:], engine.IncompatibleInputTypeError) 238 } 239 } 240 241 // responseProcessingLoop is a separate goroutine that performs processing of queued responses 242 func (e *Engine) responseProcessingLoop() { 243 notifier := e.responseMessageHandler.GetNotifier() 244 for { 245 select { 246 case <-e.unit.Quit(): 247 return 248 case <-notifier: 249 e.processAvailableResponses() 250 } 251 } 252 } 253 254 // processAvailableResponses is processor of pending events which drives events from networking layer to business logic. 255 func (e *Engine) processAvailableResponses() { 256 for { 257 select { 258 case <-e.unit.Quit(): 259 return 260 default: 261 } 262 263 msg, ok := e.pendingSyncResponses.Get() 264 if ok { 265 e.onSyncResponse(msg.OriginID, msg.Payload.(*messages.SyncResponse)) 266 e.metrics.MessageHandled(metrics.EngineSynchronization, metrics.MessageSyncResponse) 267 continue 268 } 269 270 msg, ok = e.pendingBlockResponses.Get() 271 if ok { 272 e.onBlockResponse(msg.OriginID, msg.Payload.(*messages.BlockResponse)) 273 e.metrics.MessageHandled(metrics.EngineSynchronization, metrics.MessageBlockResponse) 274 continue 275 } 276 277 // when there is no more messages in the queue, back to the loop to wait 278 // for the next incoming message to arrive. 279 return 280 } 281 } 282 283 // onSyncResponse processes a synchronization response. 284 func (e *Engine) onSyncResponse(originID flow.Identifier, res *messages.SyncResponse) { 285 e.log.Debug().Str("origin_id", originID.String()).Msg("received sync response") 286 final := e.finalizedHeader.Get() 287 e.core.HandleHeight(final, res.Height) 288 } 289 290 // onBlockResponse processes a response containing a specifically requested block. 291 func (e *Engine) onBlockResponse(originID flow.Identifier, res *messages.BlockResponse) { 292 // process the blocks one by one 293 if len(res.Blocks) == 0 { 294 e.log.Debug().Msg("received empty block response") 295 return 296 } 297 298 first := res.Blocks[0].Header.Height 299 last := res.Blocks[len(res.Blocks)-1].Header.Height 300 e.log.Debug().Uint64("first", first).Uint64("last", last).Msg("received block response") 301 302 for _, block := range res.Blocks { 303 if !e.core.HandleBlock(&block.Header) { 304 e.log.Debug().Uint64("height", block.Header.Height).Msg("block handler rejected") 305 continue 306 } 307 } 308 309 e.comp.SubmitLocal(res) 310 } 311 312 // checkLoop will regularly scan for items that need requesting. 313 func (e *Engine) checkLoop() { 314 pollChan := make(<-chan time.Time) 315 if e.pollInterval > 0 { 316 poll := time.NewTicker(e.pollInterval) 317 pollChan = poll.C 318 defer poll.Stop() 319 } 320 scan := time.NewTicker(e.scanInterval) 321 322 CheckLoop: 323 for { 324 // give the quit channel a priority to be selected 325 select { 326 case <-e.unit.Quit(): 327 break CheckLoop 328 default: 329 } 330 331 select { 332 case <-e.unit.Quit(): 333 break CheckLoop 334 case <-pollChan: 335 e.pollHeight() 336 case <-scan.C: 337 head := e.finalizedHeader.Get() 338 participants := e.participantsProvider.Identifiers() 339 ranges, batches := e.core.ScanPending(head) 340 e.sendRequests(participants, ranges, batches) 341 } 342 } 343 344 // some minor cleanup 345 scan.Stop() 346 } 347 348 // pollHeight will send a synchronization request to three random nodes. 349 func (e *Engine) pollHeight() { 350 head := e.finalizedHeader.Get() 351 participants := e.participantsProvider.Identifiers() 352 353 // send the request for synchronization 354 req := &messages.SyncRequest{ 355 Nonce: rand.Uint64(), 356 Height: head.Height, 357 } 358 e.log.Debug(). 359 Uint64("height", req.Height). 360 Uint64("range_nonce", req.Nonce). 361 Msg("sending sync request") 362 err := e.con.Multicast(req, synccore.DefaultPollNodes, participants...) 363 if err != nil { 364 e.log.Warn().Err(err).Msg("sending sync request to poll heights failed") 365 return 366 } 367 e.metrics.MessageSent(metrics.EngineSynchronization, metrics.MessageSyncRequest) 368 } 369 370 // sendRequests sends a request for each range and batch using consensus participants from last finalized snapshot. 371 func (e *Engine) sendRequests(participants flow.IdentifierList, ranges []chainsync.Range, batches []chainsync.Batch) { 372 var errs *multierror.Error 373 374 for _, ran := range ranges { 375 req := &messages.RangeRequest{ 376 Nonce: rand.Uint64(), 377 FromHeight: ran.From, 378 ToHeight: ran.To, 379 } 380 err := e.con.Multicast(req, synccore.DefaultBlockRequestNodes, participants...) 381 if err != nil { 382 errs = multierror.Append(errs, fmt.Errorf("could not submit range request: %w", err)) 383 continue 384 } 385 e.log.Info(). 386 Uint64("range_from", req.FromHeight). 387 Uint64("range_to", req.ToHeight). 388 Uint64("range_nonce", req.Nonce). 389 Msg("range requested") 390 e.core.RangeRequested(ran) 391 e.metrics.MessageSent(metrics.EngineSynchronization, metrics.MessageRangeRequest) 392 } 393 394 for _, batch := range batches { 395 req := &messages.BatchRequest{ 396 Nonce: rand.Uint64(), 397 BlockIDs: batch.BlockIDs, 398 } 399 err := e.con.Multicast(req, synccore.DefaultBlockRequestNodes, participants...) 400 if err != nil { 401 errs = multierror.Append(errs, fmt.Errorf("could not submit batch request: %w", err)) 402 continue 403 } 404 e.log.Debug(). 405 Strs("block_ids", flow.IdentifierList(batch.BlockIDs).Strings()). 406 Uint64("range_nonce", req.Nonce). 407 Msg("batch requested") 408 e.core.BatchRequested(batch) 409 e.metrics.MessageSent(metrics.EngineSynchronization, metrics.MessageBatchRequest) 410 } 411 412 if err := errs.ErrorOrNil(); err != nil { 413 e.log.Warn().Err(err).Msg("sending range and batch requests failed") 414 } 415 }