github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/collection/synchronization/request_handler.go (about) 1 package synchronization 2 3 import ( 4 "errors" 5 "fmt" 6 7 "github.com/rs/zerolog" 8 9 "github.com/onflow/flow-go/engine" 10 commonsync "github.com/onflow/flow-go/engine/common/synchronization" 11 "github.com/onflow/flow-go/model/flow" 12 "github.com/onflow/flow-go/model/messages" 13 "github.com/onflow/flow-go/module" 14 "github.com/onflow/flow-go/module/chainsync" 15 "github.com/onflow/flow-go/module/lifecycle" 16 "github.com/onflow/flow-go/module/metrics" 17 "github.com/onflow/flow-go/network" 18 "github.com/onflow/flow-go/network/channels" 19 "github.com/onflow/flow-go/state/cluster" 20 "github.com/onflow/flow-go/storage" 21 ) 22 23 // defaultSyncRequestQueueCapacity maximum capacity of sync requests queue 24 const defaultSyncRequestQueueCapacity = 500 25 26 // defaultSyncRequestQueueCapacity maximum capacity of range requests queue 27 const defaultRangeRequestQueueCapacity = 500 28 29 // defaultSyncRequestQueueCapacity maximum capacity of batch requests queue 30 const defaultBatchRequestQueueCapacity = 500 31 32 // defaultEngineRequestsWorkers number of workers to dispatch events for requests 33 const defaultEngineRequestsWorkers = 8 34 35 type RequestHandlerEngine struct { 36 unit *engine.Unit 37 lm *lifecycle.LifecycleManager 38 39 me module.Local 40 log zerolog.Logger 41 metrics module.EngineMetrics 42 43 blocks storage.ClusterBlocks 44 core module.SyncCore 45 state cluster.State 46 con network.Conduit // used for sending responses to requesters 47 48 pendingSyncRequests engine.MessageStore // message store for *message.SyncRequest 49 pendingBatchRequests engine.MessageStore // message store for *message.BatchRequest 50 pendingRangeRequests engine.MessageStore // message store for *message.RangeRequest 51 requestMessageHandler *engine.MessageHandler // message handler responsible for request processing 52 } 53 54 func NewRequestHandlerEngine( 55 log zerolog.Logger, 56 metrics module.EngineMetrics, 57 con network.Conduit, 58 me module.Local, 59 blocks storage.ClusterBlocks, 60 core module.SyncCore, 61 state cluster.State, 62 ) *RequestHandlerEngine { 63 r := &RequestHandlerEngine{ 64 unit: engine.NewUnit(), 65 lm: lifecycle.NewLifecycleManager(), 66 me: me, 67 log: log.With().Str("engine", "cluster_synchronization").Logger(), 68 metrics: metrics, 69 blocks: blocks, 70 core: core, 71 state: state, 72 con: con, 73 } 74 75 r.setupRequestMessageHandler() 76 77 return r 78 } 79 80 // SubmitLocal submits an event originating on the local node. 81 func (r *RequestHandlerEngine) SubmitLocal(event interface{}) { 82 err := r.ProcessLocal(event) 83 if err != nil { 84 r.log.Fatal().Err(err).Msg("internal error processing event") 85 } 86 } 87 88 // Submit submits the given event from the node with the given origin ID 89 // for processing in a non-blocking manner. It returns instantly and logs 90 // a potential processing error internally when done. 91 func (r *RequestHandlerEngine) Submit(channel channels.Channel, originID flow.Identifier, event interface{}) { 92 err := r.Process(channel, originID, event) 93 if err != nil { 94 r.log.Fatal().Err(err).Msg("internal error processing event") 95 } 96 } 97 98 // ProcessLocal processes an event originating on the local node. 99 func (r *RequestHandlerEngine) ProcessLocal(event interface{}) error { 100 return r.process(r.me.NodeID(), event) 101 } 102 103 // Process processes the given event from the node with the given origin ID in 104 // a blocking manner. It returns the potential processing error when done. 105 func (r *RequestHandlerEngine) Process(channel channels.Channel, originID flow.Identifier, event interface{}) error { 106 err := r.process(originID, event) 107 if err != nil { 108 if engine.IsIncompatibleInputTypeError(err) { 109 r.log.Warn().Msgf("%v delivered unsupported message %T through %v", originID, event, channel) 110 return nil 111 } 112 return fmt.Errorf("unexpected error while processing engine message: %w", err) 113 } 114 return nil 115 } 116 117 // process processes events for the synchronization request handler engine. 118 // Error returns: 119 // - IncompatibleInputTypeError if input has unexpected type 120 // - All other errors are potential symptoms of internal state corruption or bugs (fatal). 121 func (r *RequestHandlerEngine) process(originID flow.Identifier, event interface{}) error { 122 return r.requestMessageHandler.Process(originID, event) 123 } 124 125 // setupRequestMessageHandler initializes the inbound queues and the MessageHandler for UNTRUSTED requests. 126 func (r *RequestHandlerEngine) setupRequestMessageHandler() { 127 // RequestHeap deduplicates requests by keeping only one sync request for each requester. 128 r.pendingSyncRequests = commonsync.NewRequestHeap(defaultSyncRequestQueueCapacity) 129 r.pendingRangeRequests = commonsync.NewRequestHeap(defaultRangeRequestQueueCapacity) 130 r.pendingBatchRequests = commonsync.NewRequestHeap(defaultBatchRequestQueueCapacity) 131 132 // define message queueing behaviour 133 r.requestMessageHandler = engine.NewMessageHandler( 134 r.log, 135 engine.NewNotifier(), 136 engine.Pattern{ 137 Match: func(msg *engine.Message) bool { 138 _, ok := msg.Payload.(*messages.SyncRequest) 139 if ok { 140 r.metrics.MessageReceived(metrics.EngineClusterSynchronization, metrics.MessageSyncRequest) 141 } 142 return ok 143 }, 144 Store: r.pendingSyncRequests, 145 }, 146 engine.Pattern{ 147 Match: func(msg *engine.Message) bool { 148 _, ok := msg.Payload.(*messages.RangeRequest) 149 if ok { 150 r.metrics.MessageReceived(metrics.EngineClusterSynchronization, metrics.MessageRangeRequest) 151 } 152 return ok 153 }, 154 Store: r.pendingRangeRequests, 155 }, 156 engine.Pattern{ 157 Match: func(msg *engine.Message) bool { 158 _, ok := msg.Payload.(*messages.BatchRequest) 159 if ok { 160 r.metrics.MessageReceived(metrics.EngineClusterSynchronization, metrics.MessageBatchRequest) 161 } 162 return ok 163 }, 164 Store: r.pendingBatchRequests, 165 }, 166 ) 167 } 168 169 // onSyncRequest processes an outgoing handshake; if we have a higher height, we 170 // inform the other node of it, so they can organize their block downloads. If 171 // we have a lower height, we add the difference to our own download queue. 172 func (r *RequestHandlerEngine) onSyncRequest(originID flow.Identifier, req *messages.SyncRequest) error { 173 final, err := r.state.Final().Head() 174 if err != nil { 175 return fmt.Errorf("could not get last finalized header: %w", err) 176 } 177 178 // queue any missing heights as needed 179 r.core.HandleHeight(final, req.Height) 180 181 // don't bother sending a response if we're within tolerance or if we're 182 // behind the requester 183 if r.core.WithinTolerance(final, req.Height) || req.Height > final.Height { 184 return nil 185 } 186 187 // if we're sufficiently ahead of the requester, send a response 188 res := &messages.SyncResponse{ 189 Height: final.Height, 190 Nonce: req.Nonce, 191 } 192 err = r.con.Unicast(res, originID) 193 if err != nil { 194 r.log.Warn().Err(err).Msg("sending sync response failed") 195 return nil 196 } 197 r.metrics.MessageSent(metrics.EngineClusterSynchronization, metrics.MessageSyncResponse) 198 199 return nil 200 } 201 202 // onRangeRequest processes a request for a range of blocks by height. 203 func (r *RequestHandlerEngine) onRangeRequest(originID flow.Identifier, req *messages.RangeRequest) error { 204 r.log.Debug().Str("origin_id", originID.String()).Msg("received new range request") 205 // get the latest final state to know if we can fulfill the request 206 head, err := r.state.Final().Head() 207 if err != nil { 208 return fmt.Errorf("could not get last finalized header: %w", err) 209 } 210 211 // if we don't have anything to send, we can bail right away 212 if head.Height < req.FromHeight || req.FromHeight > req.ToHeight { 213 return nil 214 } 215 216 // enforce client-side max request size 217 var maxSize uint 218 // TODO: clean up this logic 219 if core, ok := r.core.(*chainsync.Core); ok { 220 maxSize = core.Config.MaxSize 221 } else { 222 maxSize = chainsync.DefaultConfig().MaxSize 223 } 224 maxHeight := req.FromHeight + uint64(maxSize) 225 if maxHeight < req.ToHeight { 226 r.log.Warn(). 227 Uint64("from", req.FromHeight). 228 Uint64("to", req.ToHeight). 229 Uint64("size", (req.ToHeight-req.FromHeight)+1). 230 Uint("max_size", maxSize). 231 Msg("range request is too large") 232 233 req.ToHeight = maxHeight 234 } 235 236 // get all of the blocks, one by one 237 blocks := make([]messages.UntrustedClusterBlock, 0, req.ToHeight-req.FromHeight+1) 238 for height := req.FromHeight; height <= req.ToHeight; height++ { 239 block, err := r.blocks.ByHeight(height) 240 if errors.Is(err, storage.ErrNotFound) { 241 r.log.Error().Uint64("height", height).Msg("skipping unknown heights") 242 break 243 } 244 if err != nil { 245 return fmt.Errorf("could not get block for height (%d): %w", height, err) 246 } 247 blocks = append(blocks, messages.UntrustedClusterBlockFromInternal(block)) 248 } 249 250 // if there are no blocks to send, skip network message 251 if len(blocks) == 0 { 252 r.log.Debug().Msg("skipping empty range response") 253 return nil 254 } 255 256 // send the response 257 res := &messages.ClusterBlockResponse{ 258 Nonce: req.Nonce, 259 Blocks: blocks, 260 } 261 err = r.con.Unicast(res, originID) 262 if err != nil { 263 r.log.Warn().Err(err).Hex("origin_id", originID[:]).Msg("sending range response failed") 264 return nil 265 } 266 r.metrics.MessageSent(metrics.EngineClusterSynchronization, metrics.MessageBlockResponse) 267 268 return nil 269 } 270 271 // onBatchRequest processes a request for a specific block by block ID. 272 func (r *RequestHandlerEngine) onBatchRequest(originID flow.Identifier, req *messages.BatchRequest) error { 273 r.log.Debug().Str("origin_id", originID.String()).Msg("received new batch request") 274 // we should bail and send nothing on empty request 275 if len(req.BlockIDs) == 0 { 276 return nil 277 } 278 279 // TODO: clean up this logic 280 var maxSize uint 281 if core, ok := r.core.(*chainsync.Core); ok { 282 maxSize = core.Config.MaxSize 283 } else { 284 maxSize = chainsync.DefaultConfig().MaxSize 285 } 286 287 if len(req.BlockIDs) > int(maxSize) { 288 r.log.Warn(). 289 Int("size", len(req.BlockIDs)). 290 Uint("max_size", maxSize). 291 Msg("batch request is too large") 292 } 293 294 // deduplicate the block IDs in the batch request 295 blockIDs := make(map[flow.Identifier]struct{}) 296 for _, blockID := range req.BlockIDs { 297 blockIDs[blockID] = struct{}{} 298 299 // enforce client-side max request size 300 if len(blockIDs) == int(maxSize) { 301 break 302 } 303 } 304 305 // try to get all the blocks by ID 306 blocks := make([]messages.UntrustedClusterBlock, 0, len(blockIDs)) 307 for blockID := range blockIDs { 308 block, err := r.blocks.ByID(blockID) 309 if errors.Is(err, storage.ErrNotFound) { 310 r.log.Debug().Hex("block_id", blockID[:]).Msg("skipping unknown block") 311 continue 312 } 313 if err != nil { 314 return fmt.Errorf("could not get block by ID (%s): %w", blockID, err) 315 } 316 blocks = append(blocks, messages.UntrustedClusterBlockFromInternal(block)) 317 } 318 319 // if there are no blocks to send, skip network message 320 if len(blocks) == 0 { 321 r.log.Debug().Msg("skipping empty batch response") 322 return nil 323 } 324 325 // send the response 326 res := &messages.ClusterBlockResponse{ 327 Nonce: req.Nonce, 328 Blocks: blocks, 329 } 330 err := r.con.Unicast(res, originID) 331 if err != nil { 332 r.log.Warn().Err(err).Hex("origin_id", originID[:]).Msg("sending batch response failed") 333 return nil 334 } 335 r.metrics.MessageSent(metrics.EngineClusterSynchronization, metrics.MessageBlockResponse) 336 337 return nil 338 } 339 340 // processAvailableRequests is processor of pending events which drives events from networking layer to business logic. 341 func (r *RequestHandlerEngine) processAvailableRequests() error { 342 for { 343 select { 344 case <-r.unit.Quit(): 345 return nil 346 default: 347 } 348 349 msg, ok := r.pendingSyncRequests.Get() 350 if ok { 351 err := r.onSyncRequest(msg.OriginID, msg.Payload.(*messages.SyncRequest)) 352 if err != nil { 353 return fmt.Errorf("processing sync request failed: %w", err) 354 } 355 continue 356 } 357 358 msg, ok = r.pendingRangeRequests.Get() 359 if ok { 360 err := r.onRangeRequest(msg.OriginID, msg.Payload.(*messages.RangeRequest)) 361 if err != nil { 362 return fmt.Errorf("processing range request failed: %w", err) 363 } 364 continue 365 } 366 367 msg, ok = r.pendingBatchRequests.Get() 368 if ok { 369 err := r.onBatchRequest(msg.OriginID, msg.Payload.(*messages.BatchRequest)) 370 if err != nil { 371 return fmt.Errorf("processing batch request failed: %w", err) 372 } 373 continue 374 } 375 376 // when there is no more messages in the queue, back to the loop to wait 377 // for the next incoming message to arrive. 378 return nil 379 } 380 } 381 382 // requestProcessingLoop is a separate goroutine that performs processing of queued requests 383 func (r *RequestHandlerEngine) requestProcessingLoop() { 384 notifier := r.requestMessageHandler.GetNotifier() 385 for { 386 select { 387 case <-r.unit.Quit(): 388 return 389 case <-notifier: 390 err := r.processAvailableRequests() 391 if err != nil { 392 r.log.Fatal().Err(err).Msg("internal error processing queued requests") 393 } 394 } 395 } 396 } 397 398 // Ready returns a ready channel that is closed once the engine has fully started. 399 func (r *RequestHandlerEngine) Ready() <-chan struct{} { 400 r.lm.OnStart(func() { 401 for i := 0; i < defaultEngineRequestsWorkers; i++ { 402 r.unit.Launch(r.requestProcessingLoop) 403 } 404 }) 405 return r.lm.Started() 406 } 407 408 // Done returns a done channel that is closed once the engine has fully stopped. 409 func (r *RequestHandlerEngine) Done() <-chan struct{} { 410 r.lm.OnStop(func() { 411 // wait for all request processing workers to exit 412 <-r.unit.Done() 413 }) 414 return r.lm.Stopped() 415 }