github.com/kaleido-io/firefly@v0.0.0-20210622132723-8b4b6aacb971/internal/events/aggregator.go (about) 1 // Copyright © 2021 Kaleido, Inc. 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 // Licensed under the Apache License, Version 2.0 (the "License"); 6 // you may not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package events 18 19 import ( 20 "context" 21 "crypto/sha256" 22 "database/sql/driver" 23 "encoding/binary" 24 25 "github.com/kaleido-io/firefly/internal/broadcast" 26 "github.com/kaleido-io/firefly/internal/config" 27 "github.com/kaleido-io/firefly/internal/data" 28 "github.com/kaleido-io/firefly/internal/log" 29 "github.com/kaleido-io/firefly/internal/privatemessaging" 30 "github.com/kaleido-io/firefly/internal/retry" 31 "github.com/kaleido-io/firefly/pkg/database" 32 "github.com/kaleido-io/firefly/pkg/fftypes" 33 ) 34 35 const ( 36 aggregatorOffsetName = "ff_aggregator" 37 ) 38 39 type aggregator struct { 40 ctx context.Context 41 database database.Plugin 42 broadcast broadcast.Manager 43 messaging privatemessaging.Manager 44 data data.Manager 45 eventPoller *eventPoller 46 newPins chan int64 47 offchainBatches chan *fftypes.UUID 48 retry *retry.Retry 49 } 50 51 func newAggregator(ctx context.Context, di database.Plugin, bm broadcast.Manager, pm privatemessaging.Manager, dm data.Manager, en *eventNotifier) *aggregator { 52 batchSize := config.GetInt(config.EventAggregatorBatchSize) 53 ag := &aggregator{ 54 ctx: log.WithLogField(ctx, "role", "aggregator"), 55 database: di, 56 broadcast: bm, 57 messaging: pm, 58 data: dm, 59 newPins: make(chan int64), 60 offchainBatches: make(chan *fftypes.UUID, batchSize), 61 } 62 firstEvent := fftypes.SubOptsFirstEvent(config.GetString(config.EventAggregatorFirstEvent)) 63 ag.eventPoller = newEventPoller(ctx, di, en, &eventPollerConf{ 64 eventBatchSize: batchSize, 65 eventBatchTimeout: config.GetDuration(config.EventAggregatorBatchTimeout), 66 eventPollTimeout: config.GetDuration(config.EventAggregatorPollTimeout), 67 startupOffsetRetryAttempts: config.GetInt(config.OrchestratorStartupAttempts), 68 retry: retry.Retry{ 69 InitialDelay: config.GetDuration(config.EventAggregatorRetryInitDelay), 70 MaximumDelay: config.GetDuration(config.EventAggregatorRetryMaxDelay), 71 Factor: config.GetFloat64(config.EventAggregatorRetryFactor), 72 }, 73 firstEvent: &firstEvent, 74 offsetType: fftypes.OffsetTypeAggregator, 75 offsetNamespace: fftypes.SystemNamespace, 76 offsetName: aggregatorOffsetName, 77 newEventsHandler: ag.processPinsDBGroup, 78 getItems: ag.getPins, 79 queryFactory: database.PinQueryFactory, 80 addCriteria: func(af database.AndFilter) database.AndFilter { 81 return af.Condition(af.Builder().Eq("dispatched", false)) 82 }, 83 maybeRewind: ag.rewindOffchainBatches, 84 }) 85 ag.retry = &ag.eventPoller.conf.retry 86 return ag 87 } 88 89 func (ag *aggregator) start() error { 90 return ag.eventPoller.start() 91 } 92 93 func (ag *aggregator) rewindOffchainBatches() (rewind bool, offset int64) { 94 // Retry idefinitely for database errors (until the context closes) 95 _ = ag.retry.Do(ag.ctx, "check for off-chain batch deliveries", func(attempt int) (retry bool, err error) { 96 var batchIDs []driver.Value 97 draining := true 98 for draining { 99 select { 100 case batchID := <-ag.offchainBatches: 101 batchIDs = append(batchIDs, batchID) 102 default: 103 draining = false 104 } 105 } 106 if len(batchIDs) > 0 { 107 fb := database.PinQueryFactory.NewFilter(ag.ctx) 108 filter := fb.And( 109 fb.Eq("dispatched", false), 110 fb.In("batch", batchIDs), 111 ).Sort("sequence").Limit(1) // only need the one oldest sequence 112 sequences, err := ag.database.GetPins(ag.ctx, filter) 113 if err != nil { 114 return true, err 115 } 116 if len(sequences) > 0 { 117 rewind = true 118 offset = sequences[0].Sequence 119 log.L(ag.ctx).Debugf("Rewinding for off-chain data arrival. New local pin sequence %d", offset) 120 } 121 } 122 return false, nil 123 }) 124 return rewind, offset 125 } 126 127 func (ag *aggregator) processPinsDBGroup(items []fftypes.LocallySequenced) (repoll bool, err error) { 128 pins := make([]*fftypes.Pin, len(items)) 129 for i, item := range items { 130 pins[i] = item.(*fftypes.Pin) 131 } 132 err = ag.database.RunAsGroup(ag.ctx, func(ctx context.Context) (err error) { 133 err = ag.processPins(ctx, pins) 134 return err 135 }) 136 return false, err 137 } 138 139 func (ag *aggregator) getPins(ctx context.Context, filter database.Filter) ([]fftypes.LocallySequenced, error) { 140 pins, err := ag.database.GetPins(ctx, filter) 141 ls := make([]fftypes.LocallySequenced, len(pins)) 142 for i, p := range pins { 143 ls[i] = p 144 } 145 return ls, err 146 } 147 148 func (ag *aggregator) processPins(ctx context.Context, pins []*fftypes.Pin) (err error) { 149 l := log.L(ctx) 150 151 // Keep a batch cache for this list of pins 152 var batch *fftypes.Batch 153 // As messages can have multiple topics, we need to avoid processing the message twice in the same poll loop. 154 // We must check all the contexts in the message, and mark them dispatched together. 155 dupMsgCheck := make(map[fftypes.UUID]bool) 156 for _, pin := range pins { 157 l.Debugf("Aggregating pin %.10d batch=%s hash=%s masked=%t", pin.Sequence, pin.Batch, pin.Hash, pin.Masked) 158 159 if batch == nil || *batch.ID != *pin.Batch { 160 batch, err = ag.database.GetBatchByID(ctx, pin.Batch) 161 if err != nil { 162 return err 163 } 164 if batch == nil { 165 l.Debugf("Batch %s not available - pin %s is parked", pin.Batch, pin.Hash) 166 continue 167 } 168 } 169 170 // Extract the message from the batch - where the index is of a topic within a message 171 var msg *fftypes.Message 172 var i int64 = -1 173 for iM := 0; i < pin.Index && iM < len(batch.Payload.Messages); iM++ { 174 msg = batch.Payload.Messages[iM] 175 for iT := 0; i < pin.Index && iT < len(msg.Header.Topics); iT++ { 176 i++ 177 } 178 } 179 180 if i < pin.Index { 181 l.Errorf("Batch %s does not have message-topic index %d - pin %s is invalid", pin.Batch, pin.Index, pin.Hash) 182 continue 183 } 184 l.Tracef("Batch %s message %d: %+v", batch.ID, pin.Index, msg) 185 if msg == nil || msg.Header.ID == nil { 186 l.Errorf("null message entry %d in batch '%s'", pin.Index, batch.ID) 187 continue 188 } 189 if dupMsgCheck[*msg.Header.ID] { 190 continue 191 } 192 dupMsgCheck[*msg.Header.ID] = true 193 194 // Attempt to process the message (only returns errors for database persistence issues) 195 if err = ag.processMessage(ctx, batch, pin.Masked, pin.Sequence, msg); err != nil { 196 return err 197 } 198 } 199 200 err = ag.eventPoller.commitOffset(ctx, pins[len(pins)-1].Sequence) 201 return err 202 } 203 204 func (ag *aggregator) calcHash(topic string, groupID *fftypes.Bytes32, identity string, nonce int64) *fftypes.Bytes32 { 205 h := sha256.New() 206 h.Write([]byte(topic)) 207 h.Write((*groupID)[:]) 208 h.Write([]byte(identity)) 209 nonceBytes := make([]byte, 8) 210 binary.BigEndian.PutUint64(nonceBytes, uint64(nonce)) 211 h.Write(nonceBytes) 212 return fftypes.HashResult(h) 213 } 214 215 func (ag *aggregator) processMessage(ctx context.Context, batch *fftypes.Batch, masked bool, pinnedSequence int64, msg *fftypes.Message) (err error) { 216 l := log.L(ctx) 217 218 // Check if it's ready to be processed 219 nextPins := make([]*fftypes.NextPin, len(msg.Pins)) 220 if masked { 221 // Private messages have one or more masked "pin" hashes that allow us to work 222 // out if it's the next message in the sequence, given the previous messages 223 if msg.Header.Group == nil || len(msg.Pins) == 0 || len(msg.Header.Topics) != len(msg.Pins) { 224 log.L(ctx).Errorf("Message '%s' in batch '%s' has invalid pin data pins=%v topics=%v", msg.Header.ID, batch.ID, msg.Pins, msg.Header.Topics) 225 return nil 226 } 227 for i, pinStr := range msg.Pins { 228 var pin fftypes.Bytes32 229 err := pin.UnmarshalText([]byte(pinStr)) 230 if err != nil { 231 log.L(ctx).Errorf("Message '%s' in batch '%s' has invalid pin at index %d: '%s'", msg.Header.ID, batch.ID, i, pinStr) 232 return nil 233 } 234 nextPin, err := ag.checkMaskedContextReady(ctx, msg, msg.Header.Topics[i], pinnedSequence, &pin) 235 if err != nil || nextPin == nil { 236 return err 237 } 238 nextPins[i] = nextPin 239 } 240 } else { 241 // We just need to check there's no earlier sequences with the same unmasked context 242 unmaskedContexts := make([]driver.Value, len(msg.Header.Topics)) 243 for i, topic := range msg.Header.Topics { 244 h := sha256.New() 245 h.Write([]byte(topic)) 246 unmaskedContexts[i] = fftypes.HashResult(h) 247 } 248 fb := database.PinQueryFactory.NewFilter(ctx) 249 filter := fb.And( 250 fb.Eq("dispatched", false), 251 fb.In("hash", unmaskedContexts), 252 fb.Lt("sequence", pinnedSequence), 253 ) 254 earlier, err := ag.database.GetPins(ctx, filter) 255 if err != nil { 256 return err 257 } 258 if len(earlier) > 0 { 259 l.Debugf("Message %s pinned at sequence %d blocked by earlier context %s at sequence %d", msg.Header.ID, pinnedSequence, earlier[0].Hash, earlier[0].Sequence) 260 return nil 261 } 262 } 263 264 dispatched, err := ag.attemptMessageDispatch(ctx, msg) 265 if err != nil || !dispatched { 266 return err 267 } 268 269 // Move the nextPin forwards to the next sequence for this sender, on all 270 // topics associated with the message 271 if masked { 272 for i, nextPin := range nextPins { 273 nextPin.Nonce++ 274 nextPin.Hash = ag.calcHash(msg.Header.Topics[i], msg.Header.Group, nextPin.Identity, nextPin.Nonce) 275 if err = ag.database.UpdateNextPin(ctx, nextPin.Sequence, database.NextPinQueryFactory.NewUpdate(ctx). 276 Set("nonce", nextPin.Nonce). 277 Set("hash", nextPin.Hash), 278 ); err != nil { 279 return err 280 } 281 } 282 } 283 284 // Mark the pin dispatched 285 return ag.database.SetPinDispatched(ctx, pinnedSequence) 286 } 287 288 func (ag *aggregator) checkMaskedContextReady(ctx context.Context, msg *fftypes.Message, topic string, pinnedSequence int64, pin *fftypes.Bytes32) (*fftypes.NextPin, error) { 289 l := log.L(ctx) 290 291 // For masked pins, we can only process if: 292 // - it is the next sequence on this context for one of the members of the group 293 // - there are no undispatched messages on this context earlier in the stream 294 h := sha256.New() 295 h.Write([]byte(topic)) 296 h.Write((*msg.Header.Group)[:]) 297 contextUnmasked := fftypes.HashResult(h) 298 filter := database.NextPinQueryFactory.NewFilter(ctx).Eq("context", contextUnmasked) 299 nextPins, err := ag.database.GetNextPins(ctx, filter) 300 if err != nil { 301 return nil, err 302 } 303 l.Debugf("Group=%s Topic='%s' NextPins=%v Sequence=%d Pin=%s NextPins=%v", msg.Header.Group, topic, nextPins, pinnedSequence, pin, nextPins) 304 305 if len(nextPins) == 0 { 306 // If this is the first time we've seen the context, then this message is read as long as it is 307 // the first (nonce=0) message on the context, for one of the members, and there aren't any earlier 308 // messages that are nonce=0. 309 return ag.attemptContextInit(ctx, msg, topic, pinnedSequence, contextUnmasked, pin) 310 } 311 312 // This message must be the next hash for the author 313 var nextPin *fftypes.NextPin 314 for _, np := range nextPins { 315 if *np.Hash == *pin { 316 nextPin = np 317 break 318 } 319 } 320 if nextPin == nil || nextPin.Identity != msg.Header.Author { 321 l.Debugf("Mismatched nexthash or author group=%s topic=%s context=%s pin=%s nextHash=%+v", msg.Header.Group, topic, contextUnmasked, pin, nextPin) 322 return nil, nil 323 } 324 return nextPin, nil 325 } 326 327 func (ag *aggregator) attemptContextInit(ctx context.Context, msg *fftypes.Message, topic string, pinnedSequence int64, contextUnmasked, pin *fftypes.Bytes32) (*fftypes.NextPin, error) { 328 l := log.L(ctx) 329 330 // It might be the system topic/context initializing the group 331 group, err := ag.messaging.ResolveInitGroup(ctx, msg) 332 if err != nil || group == nil { 333 return nil, err 334 } 335 336 // Find the list of zerohashes for this context, and match this pin to one of them 337 zeroHashes := make([]driver.Value, len(group.Members)) 338 var nextPin *fftypes.NextPin 339 nextPins := make([]*fftypes.NextPin, len(group.Members)) 340 for i, member := range group.Members { 341 zeroHash := ag.calcHash(topic, msg.Header.Group, member.Identity, 0) 342 np := &fftypes.NextPin{ 343 Context: contextUnmasked, 344 Identity: member.Identity, 345 Hash: zeroHash, 346 Nonce: 0, 347 } 348 if *pin == *zeroHash { 349 if member.Identity != msg.Header.Author { 350 l.Warnf("Author mismatch for zerohash on context: group=%s topic=%s context=%s pin=%s", msg.Header.Group, topic, contextUnmasked, pin) 351 return nil, nil 352 } 353 nextPin = np 354 } 355 zeroHashes[i] = zeroHash 356 nextPins[i] = np 357 } 358 l.Debugf("Group=%s topic=%s context=%s zeroHashes=%v", msg.Header.Group, topic, contextUnmasked, zeroHashes) 359 if nextPin == nil { 360 l.Warnf("No match for zerohash on context: group=%s topic=%s context=%s pin=%s", msg.Header.Group, topic, contextUnmasked, pin) 361 return nil, nil 362 } 363 364 // Check none of the other zerohashes exist before us in the stream 365 fb := database.PinQueryFactory.NewFilter(ctx) 366 filter := fb.And( 367 fb.Eq("dispatched", false), 368 fb.In("hash", zeroHashes), 369 fb.Lt("sequence", pinnedSequence), 370 ) 371 earlier, err := ag.database.GetPins(ctx, filter) 372 if err != nil { 373 return nil, err 374 } 375 if len(earlier) > 0 { 376 l.Debugf("Group=%s topic=%s context=%s earlier=%v", msg.Header.Group, topic, contextUnmasked, earlier) 377 return nil, nil 378 } 379 380 // We're good to be the first message on this context. 381 // Initialize the nextpins on this context - this is safe to do even if we don't actually dispatch the message 382 for _, np := range nextPins { 383 if err = ag.database.InsertNextPin(ctx, np); err != nil { 384 return nil, err 385 } 386 } 387 return nextPin, err 388 } 389 390 func (ag *aggregator) attemptMessageDispatch(ctx context.Context, msg *fftypes.Message) (bool, error) { 391 392 // If we don't find all the data, then we don't dispatch 393 data, foundAll, err := ag.data.GetMessageData(ctx, msg, true) 394 if err != nil || !foundAll { 395 return false, err 396 } 397 398 // We're going to dispatch it at this point, but we need to validate the data first 399 valid := true 400 eventType := fftypes.EventTypeMessageConfirmed 401 if msg.Header.Namespace == fftypes.SystemNamespace { 402 // We handle system events in-line on the aggregator, as it would be confusing for apps to be 403 // dispatched subsequent events before we have processed the system events they depend on. 404 if valid, err = ag.broadcast.HandleSystemBroadcast(ctx, msg, data); err != nil { 405 // Should only return errors that are retryable 406 return false, err 407 } 408 } else if len(msg.Data) > 0 { 409 valid, err = ag.data.ValidateAll(ctx, data) 410 if err != nil { 411 return false, err 412 } 413 } 414 if valid { 415 // This message is now confirmed 416 setConfirmed := database.MessageQueryFactory.NewUpdate(ctx).Set("confirmed", fftypes.Now()) 417 err = ag.database.UpdateMessage(ctx, msg.Header.ID, setConfirmed) 418 if err != nil { 419 return false, err 420 } 421 } else { 422 // An message with invalid (but complete) data is still considered dispatched. 423 // However, we drive a different event to the applications. 424 eventType = fftypes.EventTypeMessageInvalid 425 } 426 427 // Generate the appropriate event 428 event := fftypes.NewEvent(eventType, msg.Header.Namespace, msg.Header.ID, msg.Header.Group) 429 if err = ag.database.UpsertEvent(ctx, event, false); err != nil { 430 return false, err 431 } 432 log.L(ctx).Infof("Emitting %s for message %s:%s", eventType, msg.Header.Namespace, msg.Header.ID) 433 434 return true, nil 435 }