github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/common/provider/engine.go (about) 1 package provider 2 3 import ( 4 "errors" 5 "fmt" 6 7 "github.com/rs/zerolog" 8 "github.com/vmihailenco/msgpack" 9 10 "github.com/onflow/flow-go/engine" 11 "github.com/onflow/flow-go/engine/common/provider/internal" 12 "github.com/onflow/flow-go/model/flow" 13 "github.com/onflow/flow-go/model/flow/filter" 14 "github.com/onflow/flow-go/model/messages" 15 "github.com/onflow/flow-go/module" 16 "github.com/onflow/flow-go/module/component" 17 "github.com/onflow/flow-go/module/irrecoverable" 18 "github.com/onflow/flow-go/module/metrics" 19 "github.com/onflow/flow-go/network" 20 "github.com/onflow/flow-go/network/channels" 21 "github.com/onflow/flow-go/state/protocol" 22 "github.com/onflow/flow-go/storage" 23 "github.com/onflow/flow-go/utils/logging" 24 ) 25 26 const ( 27 // DefaultRequestProviderWorkers is the default number of workers used to process entity requests. 28 DefaultRequestProviderWorkers = uint(5) 29 30 // DefaultEntityRequestCacheSize is the default max message queue size for the provider engine. 31 // This equates to ~5GB of memory usage with a full queue (10M*500) 32 DefaultEntityRequestCacheSize = 500 33 ) 34 35 // RetrieveFunc is a function provided to the provider engine upon construction. 36 // It is used by the engine when receiving requests in order to retrieve the 37 // related entities. It is important that the retrieve function return a 38 // `storage.ErrNotFound` error if the entity does not exist locally; otherwise, 39 // the logic will error and not send responses when failing to retrieve entities. 40 type RetrieveFunc func(flow.Identifier) (flow.Entity, error) 41 42 // Engine is a generic provider engine, handling the fulfillment of entity 43 // requests on the flow network. It is the `reply` part of the request-reply 44 // pattern provided by the pair of generic exchange engines. 45 type Engine struct { 46 component.Component 47 cm *component.ComponentManager 48 log zerolog.Logger 49 metrics module.EngineMetrics 50 state protocol.State 51 con network.Conduit 52 channel channels.Channel 53 requestHandler *engine.MessageHandler 54 requestQueue engine.MessageStore 55 selector flow.IdentityFilter[flow.Identity] 56 retrieve RetrieveFunc 57 // buffered channel for EntityRequest workers to pick and process. 58 requestChannel chan *internal.EntityRequest 59 } 60 61 var _ network.MessageProcessor = (*Engine)(nil) 62 63 // New creates a new provider engine, operating on the provided network channel, and accepting requests for entities 64 // from a node within the set obtained by applying the provided selector filter. It uses the injected retrieve function 65 // to manage the fullfilment of these requests. 66 func New( 67 log zerolog.Logger, 68 metrics module.EngineMetrics, 69 net network.EngineRegistry, 70 me module.Local, 71 state protocol.State, 72 requestQueue engine.MessageStore, 73 requestWorkers uint, 74 channel channels.Channel, 75 selector flow.IdentityFilter[flow.Identity], 76 retrieve RetrieveFunc) (*Engine, error) { 77 78 // make sure we don't respond to request sent by self or unauthorized nodes 79 selector = filter.And( 80 selector, 81 filter.Not(filter.HasNodeID[flow.Identity](me.NodeID())), 82 ) 83 84 handler := engine.NewMessageHandler( 85 log, 86 engine.NewNotifier(), 87 engine.Pattern{ 88 // Match is called on every new message coming to this engine. 89 // Provider engine only expects EntityRequest. 90 // Other message types are discarded by Match. 91 Match: func(message *engine.Message) bool { 92 _, ok := message.Payload.(*messages.EntityRequest) 93 return ok 94 }, 95 // Map is called on messages that are Match(ed) successfully, i.e., 96 // EntityRequest. 97 Map: func(message *engine.Message) (*engine.Message, bool) { 98 request, ok := message.Payload.(*messages.EntityRequest) 99 if !ok { 100 // should never happen, unless there is a bug. 101 log.Warn(). 102 Str("entity_ids", fmt.Sprintf("%v", request.EntityIDs)). 103 Hex("origin_id", logging.ID(message.OriginID)). 104 Msg("cannot match the payload to entity request") 105 return nil, false 106 } 107 108 message.Payload = *request // de-reference the pointer as HeroCache works with value. 109 110 return message, true 111 }, 112 Store: requestQueue, 113 }) 114 115 // initialize the propagation engine with its dependencies 116 e := &Engine{ 117 log: log.With().Str("engine", "provider").Logger(), 118 metrics: metrics, 119 state: state, 120 channel: channel, 121 selector: selector, 122 retrieve: retrieve, 123 requestHandler: handler, 124 requestQueue: requestQueue, 125 requestChannel: make(chan *internal.EntityRequest, requestWorkers), 126 } 127 128 // register the engine with the network layer and store the conduit 129 con, err := net.Register(channel, e) 130 if err != nil { 131 return nil, fmt.Errorf("could not register engine: %w", err) 132 } 133 e.con = con 134 135 cm := component.NewComponentManagerBuilder() 136 cm.AddWorker(e.processQueuedRequestsShovellerWorker) 137 for i := uint(0); i < requestWorkers; i++ { 138 cm.AddWorker(e.processEntityRequestWorker) 139 } 140 141 e.cm = cm.Build() 142 e.Component = e.cm 143 144 return e, nil 145 } 146 147 // Process processes the given message from the node with the given origin ID in 148 // a blocking manner. It returns the potential processing error when done. 149 func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, event interface{}) error { 150 select { 151 case <-e.cm.ShutdownSignal(): 152 e.log.Warn(). 153 Hex("origin_id", logging.ID(originID)). 154 Msgf("received message after shutdown") 155 return nil 156 default: 157 } 158 159 e.metrics.MessageReceived(e.channel.String(), metrics.MessageEntityRequest) 160 161 err := e.requestHandler.Process(originID, event) 162 if err != nil { 163 if engine.IsIncompatibleInputTypeError(err) { 164 e.log.Warn(). 165 Hex("origin_id", logging.ID(originID)). 166 Str("channel", channel.String()). 167 Str("event", fmt.Sprintf("%+v", event)). 168 Bool(logging.KeySuspicious, true). 169 Msg("received unsupported message type") 170 return nil 171 } 172 return fmt.Errorf("unexpected error while processing engine event: %w", err) 173 } 174 175 return nil 176 } 177 178 // onEntityRequest processes an entity request message from a remote node. 179 // Error returns: 180 // * NetworkTransmissionError if there is a network error happens on transmitting the requested entities. 181 // * InvalidInputError if the list of requested entities is invalid (empty). 182 // * generic error in case of unexpected failure or implementation bug. 183 func (e *Engine) onEntityRequest(request *internal.EntityRequest) error { 184 defer e.metrics.MessageHandled(e.channel.String(), metrics.MessageEntityRequest) 185 186 lg := e.log.With(). 187 Str("origin_id", request.OriginId.String()). 188 Strs("entity_ids", flow.IdentifierList(request.EntityIds).Strings()). 189 Logger() 190 191 lg.Info(). 192 Uint64("nonce", request.Nonce). 193 Msg("entity request received") 194 195 // TODO: add reputation system to punish nodes for malicious behaviour (spam / repeated requests) 196 197 // then, we try to get the current identity of the requester and check it against the filter 198 // for the handler to make sure the requester is authorized for this resource 199 requesters, err := e.state.Final().Identities(filter.And( 200 e.selector, 201 filter.HasNodeID[flow.Identity](request.OriginId)), 202 ) 203 if err != nil { 204 return fmt.Errorf("could not get requesters: %w", err) 205 } 206 if len(requesters) == 0 { 207 return engine.NewInvalidInputErrorf("invalid requester origin (%x)", request.OriginId) 208 } 209 210 // try to retrieve each entity and skip missing ones 211 entities := make([]flow.Entity, 0, len(request.EntityIds)) 212 entityIDs := make([]flow.Identifier, 0, len(request.EntityIds)) 213 seen := make(map[flow.Identifier]struct{}) 214 for _, entityID := range request.EntityIds { 215 // skip requesting duplicate entity IDs 216 if _, ok := seen[entityID]; ok { 217 lg.Warn(). 218 Str("entity_id", entityID.String()). 219 Bool(logging.KeySuspicious, true). 220 Msg("duplicate entity ID in entity request") 221 continue 222 } 223 224 entity, err := e.retrieve(entityID) 225 if errors.Is(err, storage.ErrNotFound) { 226 lg.Debug(). 227 Str("entity_id", entityID.String()). 228 Msg("entity not found") 229 continue 230 } 231 if err != nil { 232 return fmt.Errorf("could not retrieve entity (%x): %w", entityID, err) 233 } 234 entities = append(entities, entity) 235 entityIDs = append(entityIDs, entityID) 236 seen[entityID] = struct{}{} 237 } 238 239 // encode all of the entities 240 blobs := make([][]byte, 0, len(entities)) 241 for _, entity := range entities { 242 blob, err := msgpack.Marshal(entity) 243 if err != nil { 244 return fmt.Errorf("could not encode entity (%x): %w", entity.ID(), err) 245 } 246 blobs = append(blobs, blob) 247 } 248 249 // NOTE: we do _NOT_ avoid sending empty responses, as this will allow 250 // the requester to know we don't have any of the requested entities, which 251 // allows him to retry them immediately, rather than waiting for the expiry 252 // of the retry interval 253 254 // send back the response 255 res := &messages.EntityResponse{ 256 Nonce: request.Nonce, 257 EntityIDs: entityIDs, 258 Blobs: blobs, 259 } 260 err = e.con.Unicast(res, request.OriginId) 261 if err != nil { 262 return engine.NewNetworkTransmissionErrorf("could not send entity response: %w", err) 263 } 264 265 e.metrics.MessageSent(e.channel.String(), metrics.MessageEntityResponse) 266 e.log.Info(). 267 Str("origin_id", request.OriginId.String()). 268 Strs("entity_ids", flow.IdentifierList(entityIDs).Strings()). 269 Uint64("nonce", request.Nonce). // to match with the the entity request received log 270 Msg("entity response sent") 271 272 return nil 273 } 274 275 func (e *Engine) processQueuedRequestsShovellerWorker(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 276 ready() 277 278 e.log.Debug().Msg("process entity request shoveller worker started") 279 280 for { 281 select { 282 case <-e.requestHandler.GetNotifier(): 283 // there is at least a single request in the queue, so we try to process it. 284 e.processAvailableMessages(ctx) 285 case <-ctx.Done(): 286 // close the internal channel, the workers will drain the channel before exiting 287 close(e.requestChannel) 288 e.log.Trace().Msg("processing entity request worker terminated") 289 return 290 } 291 } 292 } 293 294 func (e *Engine) processAvailableMessages(ctx irrecoverable.SignalerContext) { 295 for { 296 select { 297 case <-ctx.Done(): 298 return 299 default: 300 } 301 302 msg, ok := e.requestQueue.Get() 303 if !ok { 304 // no more requests, return 305 return 306 } 307 308 requestEvent, ok := msg.Payload.(messages.EntityRequest) 309 if !ok { 310 // should never happen, as we only put EntityRequest in the queue, 311 // if it does happen, it means there is a bug in the queue implementation. 312 ctx.Throw(fmt.Errorf("invalid message type in entity request queue: %T", msg.Payload)) 313 } 314 315 req := &internal.EntityRequest{ 316 OriginId: msg.OriginID, 317 EntityIds: requestEvent.EntityIDs, 318 Nonce: requestEvent.Nonce, 319 } 320 321 lg := e.log.With(). 322 Hex("origin_id", logging.ID(req.OriginId)). 323 Str("requested_entity_ids", fmt.Sprintf("%v", req.EntityIds)).Logger() 324 325 lg.Trace().Msg("processor is queuing entity request for processing") 326 e.requestChannel <- req 327 lg.Trace().Msg("processor queued up entity request for processing") 328 } 329 } 330 331 func (e *Engine) processEntityRequestWorker(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 332 ready() 333 334 for { 335 request, ok := <-e.requestChannel 336 if !ok { 337 e.log.Trace().Msg("processing entity request worker terminated") 338 return 339 } 340 lg := e.log.With(). 341 Hex("origin_id", logging.ID(request.OriginId)). 342 Str("requested_entity_ids", fmt.Sprintf("%v", request.EntityIds)).Logger() 343 lg.Trace().Msg("worker picked up entity request for processing") 344 err := e.onEntityRequest(request) 345 if err != nil { 346 if engine.IsInvalidInputError(err) || engine.IsNetworkTransmissionError(err) { 347 lg.Error().Err(err).Msg("worker could not process entity request") 348 } else { 349 // this is an unexpected error, we crash the node. 350 ctx.Throw(err) 351 } 352 } 353 lg.Trace().Msg("worker finished entity request processing") 354 } 355 }