github.com/koko1123/flow-go-1@v0.29.6/engine/common/provider/engine.go (about) 1 package provider 2 3 import ( 4 "errors" 5 "fmt" 6 7 "github.com/rs/zerolog" 8 "github.com/vmihailenco/msgpack" 9 10 "github.com/koko1123/flow-go-1/engine" 11 "github.com/koko1123/flow-go-1/engine/common/provider/internal" 12 "github.com/koko1123/flow-go-1/model/flow" 13 "github.com/koko1123/flow-go-1/model/flow/filter" 14 "github.com/koko1123/flow-go-1/model/messages" 15 "github.com/koko1123/flow-go-1/module" 16 "github.com/koko1123/flow-go-1/module/component" 17 "github.com/koko1123/flow-go-1/module/irrecoverable" 18 "github.com/koko1123/flow-go-1/module/metrics" 19 "github.com/koko1123/flow-go-1/network" 20 "github.com/koko1123/flow-go-1/network/channels" 21 "github.com/koko1123/flow-go-1/state/protocol" 22 "github.com/koko1123/flow-go-1/storage" 23 "github.com/koko1123/flow-go-1/utils/logging" 24 ) 25 26 const ( 27 // DefaultRequestProviderWorkers is the default number of workers used to process entity requests. 28 DefaultRequestProviderWorkers = uint(5) 29 30 // DefaultEntityRequestCacheSize is the default max message queue size for the provider engine. 31 // This equates to ~5GB of memory usage with a full queue (10M*500) 32 DefaultEntityRequestCacheSize = 500 33 ) 34 35 // RetrieveFunc is a function provided to the provider engine upon construction. 36 // It is used by the engine when receiving requests in order to retrieve the 37 // related entities. It is important that the retrieve function return a 38 // `storage.ErrNotFound` error if the entity does not exist locally; otherwise, 39 // the logic will error and not send responses when failing to retrieve entities. 40 type RetrieveFunc func(flow.Identifier) (flow.Entity, error) 41 42 // Engine is a generic provider engine, handling the fulfillment of entity 43 // requests on the flow network. It is the `reply` part of the request-reply 44 // pattern provided by the pair of generic exchange engines. 45 type Engine struct { 46 component.Component 47 cm *component.ComponentManager 48 log zerolog.Logger 49 metrics module.EngineMetrics 50 state protocol.State 51 con network.Conduit 52 channel channels.Channel 53 requestHandler *engine.MessageHandler 54 requestQueue engine.MessageStore 55 selector flow.IdentityFilter 56 retrieve RetrieveFunc 57 // buffered channel for EntityRequest workers to pick and process. 58 requestChannel chan *internal.EntityRequest 59 } 60 61 var _ network.MessageProcessor = (*Engine)(nil) 62 63 // New creates a new provider engine, operating on the provided network channel, and accepting requests for entities 64 // from a node within the set obtained by applying the provided selector filter. It uses the injected retrieve function 65 // to manage the fullfilment of these requests. 66 func New( 67 log zerolog.Logger, 68 metrics module.EngineMetrics, 69 net network.Network, 70 me module.Local, 71 state protocol.State, 72 requestQueue engine.MessageStore, 73 requestWorkers uint, 74 channel channels.Channel, 75 selector flow.IdentityFilter, 76 retrieve RetrieveFunc) (*Engine, error) { 77 78 // make sure we don't respond to request sent by self or unauthorized nodes 79 selector = filter.And( 80 selector, 81 filter.HasWeight(true), 82 filter.Not(filter.HasNodeID(me.NodeID())), 83 ) 84 85 handler := engine.NewMessageHandler( 86 log, 87 engine.NewNotifier(), 88 engine.Pattern{ 89 // Match is called on every new message coming to this engine. 90 // Provider engine only expects EntityRequest. 91 // Other message types are discarded by Match. 92 Match: func(message *engine.Message) bool { 93 _, ok := message.Payload.(*messages.EntityRequest) 94 return ok 95 }, 96 // Map is called on messages that are Match(ed) successfully, i.e., 97 // EntityRequest. 98 Map: func(message *engine.Message) (*engine.Message, bool) { 99 request, ok := message.Payload.(*messages.EntityRequest) 100 if !ok { 101 // should never happen, unless there is a bug. 102 log.Warn(). 103 Str("entity_ids", fmt.Sprintf("%v", request.EntityIDs)). 104 Hex("origin_id", logging.ID(message.OriginID)). 105 Msg("cannot match the payload to entity request") 106 return nil, false 107 } 108 109 message.Payload = *request // de-reference the pointer as HeroCache works with value. 110 111 return message, true 112 }, 113 Store: requestQueue, 114 }) 115 116 // initialize the propagation engine with its dependencies 117 e := &Engine{ 118 log: log.With().Str("engine", "provider").Logger(), 119 metrics: metrics, 120 state: state, 121 channel: channel, 122 selector: selector, 123 retrieve: retrieve, 124 requestHandler: handler, 125 requestQueue: requestQueue, 126 requestChannel: make(chan *internal.EntityRequest, requestWorkers), 127 } 128 129 // register the engine with the network layer and store the conduit 130 con, err := net.Register(channel, e) 131 if err != nil { 132 return nil, fmt.Errorf("could not register engine: %w", err) 133 } 134 e.con = con 135 136 cm := component.NewComponentManagerBuilder() 137 cm.AddWorker(e.processQueuedRequestsShovellerWorker) 138 for i := uint(0); i < requestWorkers; i++ { 139 cm.AddWorker(e.processEntityRequestWorker) 140 } 141 142 e.cm = cm.Build() 143 e.Component = e.cm 144 145 return e, nil 146 } 147 148 // Process processes the given message from the node with the given origin ID in 149 // a blocking manner. It returns the potential processing error when done. 150 func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, event interface{}) error { 151 select { 152 case <-e.cm.ShutdownSignal(): 153 e.log.Warn(). 154 Hex("origin_id", logging.ID(originID)). 155 Msgf("received message after shutdown") 156 return nil 157 default: 158 } 159 160 e.metrics.MessageReceived(e.channel.String(), metrics.MessageEntityRequest) 161 162 err := e.requestHandler.Process(originID, event) 163 if err != nil { 164 if engine.IsIncompatibleInputTypeError(err) { 165 e.log.Warn(). 166 Hex("origin_id", logging.ID(originID)). 167 Str("channel", channel.String()). 168 Str("event", fmt.Sprintf("%+v", event)). 169 Bool(logging.KeySuspicious, true). 170 Msg("received unsupported message type") 171 return nil 172 } 173 return fmt.Errorf("unexpected error while processing engine event: %w", err) 174 } 175 176 return nil 177 } 178 179 // onEntityRequest processes an entity request message from a remote node. 180 // Error returns: 181 // * NetworkTransmissionError if there is a network error happens on transmitting the requested entities. 182 // * InvalidInputError if the list of requested entities is invalid (empty). 183 // * generic error in case of unexpected failure or implementation bug. 184 func (e *Engine) onEntityRequest(request *internal.EntityRequest) error { 185 defer e.metrics.MessageHandled(e.channel.String(), metrics.MessageEntityRequest) 186 187 lg := e.log.With(). 188 Str("origin_id", request.OriginId.String()). 189 Strs("entity_ids", flow.IdentifierList(request.EntityIds).Strings()). 190 Logger() 191 192 lg.Info(). 193 Uint64("nonce", request.Nonce). 194 Msg("entity request received") 195 196 // TODO: add reputation system to punish nodes for malicious behaviour (spam / repeated requests) 197 198 // then, we try to get the current identity of the requester and check it against the filter 199 // for the handler to make sure the requester is authorized for this resource 200 requesters, err := e.state.Final().Identities(filter.And( 201 e.selector, 202 filter.HasNodeID(request.OriginId)), 203 ) 204 if err != nil { 205 return fmt.Errorf("could not get requesters: %w", err) 206 } 207 if len(requesters) == 0 { 208 return engine.NewInvalidInputErrorf("invalid requester origin (%x)", request.OriginId) 209 } 210 211 // try to retrieve each entity and skip missing ones 212 entities := make([]flow.Entity, 0, len(request.EntityIds)) 213 entityIDs := make([]flow.Identifier, 0, len(request.EntityIds)) 214 seen := make(map[flow.Identifier]struct{}) 215 for _, entityID := range request.EntityIds { 216 // skip requesting duplicate entity IDs 217 if _, ok := seen[entityID]; ok { 218 lg.Warn(). 219 Str("entity_id", entityID.String()). 220 Bool(logging.KeySuspicious, true). 221 Msg("duplicate entity ID in entity request") 222 continue 223 } 224 225 entity, err := e.retrieve(entityID) 226 if errors.Is(err, storage.ErrNotFound) { 227 lg.Debug(). 228 Str("entity_id", entityID.String()). 229 Msg("entity not found") 230 continue 231 } 232 if err != nil { 233 return fmt.Errorf("could not retrieve entity (%x): %w", entityID, err) 234 } 235 entities = append(entities, entity) 236 entityIDs = append(entityIDs, entityID) 237 seen[entityID] = struct{}{} 238 } 239 240 // encode all of the entities 241 blobs := make([][]byte, 0, len(entities)) 242 for _, entity := range entities { 243 blob, err := msgpack.Marshal(entity) 244 if err != nil { 245 return fmt.Errorf("could not encode entity (%x): %w", entity.ID(), err) 246 } 247 blobs = append(blobs, blob) 248 } 249 250 // NOTE: we do _NOT_ avoid sending empty responses, as this will allow 251 // the requester to know we don't have any of the requested entities, which 252 // allows him to retry them immediately, rather than waiting for the expiry 253 // of the retry interval 254 255 // send back the response 256 res := &messages.EntityResponse{ 257 Nonce: request.Nonce, 258 EntityIDs: entityIDs, 259 Blobs: blobs, 260 } 261 err = e.con.Unicast(res, request.OriginId) 262 if err != nil { 263 return engine.NewNetworkTransmissionErrorf("could not send entity response: %w", err) 264 } 265 266 e.metrics.MessageSent(e.channel.String(), metrics.MessageEntityResponse) 267 e.log.Info(). 268 Str("origin_id", request.OriginId.String()). 269 Strs("entity_ids", flow.IdentifierList(entityIDs).Strings()). 270 Uint64("nonce", request.Nonce). // to match with the the entity request received log 271 Msg("entity response sent") 272 273 return nil 274 } 275 276 func (e *Engine) processQueuedRequestsShovellerWorker(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 277 ready() 278 279 e.log.Debug().Msg("process entity request shoveller worker started") 280 281 for { 282 select { 283 case <-e.requestHandler.GetNotifier(): 284 // there is at least a single request in the queue, so we try to process it. 285 e.processAvailableMessages(ctx) 286 case <-ctx.Done(): 287 // close the internal channel, the workers will drain the channel before exiting 288 close(e.requestChannel) 289 e.log.Trace().Msg("processing entity request worker terminated") 290 return 291 } 292 } 293 } 294 295 func (e *Engine) processAvailableMessages(ctx irrecoverable.SignalerContext) { 296 for { 297 select { 298 case <-ctx.Done(): 299 return 300 default: 301 } 302 303 msg, ok := e.requestQueue.Get() 304 if !ok { 305 // no more requests, return 306 return 307 } 308 309 requestEvent, ok := msg.Payload.(messages.EntityRequest) 310 if !ok { 311 // should never happen, as we only put EntityRequest in the queue, 312 // if it does happen, it means there is a bug in the queue implementation. 313 ctx.Throw(fmt.Errorf("invalid message type in entity request queue: %T", msg.Payload)) 314 } 315 316 req := &internal.EntityRequest{ 317 OriginId: msg.OriginID, 318 EntityIds: requestEvent.EntityIDs, 319 Nonce: requestEvent.Nonce, 320 } 321 322 lg := e.log.With(). 323 Hex("origin_id", logging.ID(req.OriginId)). 324 Str("requested_entity_ids", fmt.Sprintf("%v", req.EntityIds)).Logger() 325 326 lg.Trace().Msg("processor is queuing entity request for processing") 327 e.requestChannel <- req 328 lg.Trace().Msg("processor queued up entity request for processing") 329 } 330 } 331 332 func (e *Engine) processEntityRequestWorker(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 333 ready() 334 335 for { 336 request, ok := <-e.requestChannel 337 if !ok { 338 e.log.Trace().Msg("processing entity request worker terminated") 339 return 340 } 341 lg := e.log.With(). 342 Hex("origin_id", logging.ID(request.OriginId)). 343 Str("requested_entity_ids", fmt.Sprintf("%v", request.EntityIds)).Logger() 344 lg.Trace().Msg("worker picked up entity request for processing") 345 err := e.onEntityRequest(request) 346 if err != nil { 347 if engine.IsInvalidInputError(err) || engine.IsNetworkTransmissionError(err) { 348 lg.Error().Err(err).Msg("worker could not process entity request") 349 } else { 350 // this is an unexpected error, we crash the node. 351 ctx.Throw(err) 352 } 353 } 354 lg.Trace().Msg("worker finished entity request processing") 355 } 356 }