github.com/kaleido-io/firefly@v0.0.0-20210622132723-8b4b6aacb971/internal/events/aggregator.go (about)

     1  // Copyright © 2021 Kaleido, Inc.
     2  //
     3  // SPDX-License-Identifier: Apache-2.0
     4  //
     5  // Licensed under the Apache License, Version 2.0 (the "License");
     6  // you may not use this file except in compliance with the License.
     7  // You may obtain a copy of the License at
     8  //
     9  //     http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package events
    18  
    19  import (
    20  	"context"
    21  	"crypto/sha256"
    22  	"database/sql/driver"
    23  	"encoding/binary"
    24  
    25  	"github.com/kaleido-io/firefly/internal/broadcast"
    26  	"github.com/kaleido-io/firefly/internal/config"
    27  	"github.com/kaleido-io/firefly/internal/data"
    28  	"github.com/kaleido-io/firefly/internal/log"
    29  	"github.com/kaleido-io/firefly/internal/privatemessaging"
    30  	"github.com/kaleido-io/firefly/internal/retry"
    31  	"github.com/kaleido-io/firefly/pkg/database"
    32  	"github.com/kaleido-io/firefly/pkg/fftypes"
    33  )
    34  
    35  const (
    36  	aggregatorOffsetName = "ff_aggregator"
    37  )
    38  
    39  type aggregator struct {
    40  	ctx             context.Context
    41  	database        database.Plugin
    42  	broadcast       broadcast.Manager
    43  	messaging       privatemessaging.Manager
    44  	data            data.Manager
    45  	eventPoller     *eventPoller
    46  	newPins         chan int64
    47  	offchainBatches chan *fftypes.UUID
    48  	retry           *retry.Retry
    49  }
    50  
    51  func newAggregator(ctx context.Context, di database.Plugin, bm broadcast.Manager, pm privatemessaging.Manager, dm data.Manager, en *eventNotifier) *aggregator {
    52  	batchSize := config.GetInt(config.EventAggregatorBatchSize)
    53  	ag := &aggregator{
    54  		ctx:             log.WithLogField(ctx, "role", "aggregator"),
    55  		database:        di,
    56  		broadcast:       bm,
    57  		messaging:       pm,
    58  		data:            dm,
    59  		newPins:         make(chan int64),
    60  		offchainBatches: make(chan *fftypes.UUID, batchSize),
    61  	}
    62  	firstEvent := fftypes.SubOptsFirstEvent(config.GetString(config.EventAggregatorFirstEvent))
    63  	ag.eventPoller = newEventPoller(ctx, di, en, &eventPollerConf{
    64  		eventBatchSize:             batchSize,
    65  		eventBatchTimeout:          config.GetDuration(config.EventAggregatorBatchTimeout),
    66  		eventPollTimeout:           config.GetDuration(config.EventAggregatorPollTimeout),
    67  		startupOffsetRetryAttempts: config.GetInt(config.OrchestratorStartupAttempts),
    68  		retry: retry.Retry{
    69  			InitialDelay: config.GetDuration(config.EventAggregatorRetryInitDelay),
    70  			MaximumDelay: config.GetDuration(config.EventAggregatorRetryMaxDelay),
    71  			Factor:       config.GetFloat64(config.EventAggregatorRetryFactor),
    72  		},
    73  		firstEvent:       &firstEvent,
    74  		offsetType:       fftypes.OffsetTypeAggregator,
    75  		offsetNamespace:  fftypes.SystemNamespace,
    76  		offsetName:       aggregatorOffsetName,
    77  		newEventsHandler: ag.processPinsDBGroup,
    78  		getItems:         ag.getPins,
    79  		queryFactory:     database.PinQueryFactory,
    80  		addCriteria: func(af database.AndFilter) database.AndFilter {
    81  			return af.Condition(af.Builder().Eq("dispatched", false))
    82  		},
    83  		maybeRewind: ag.rewindOffchainBatches,
    84  	})
    85  	ag.retry = &ag.eventPoller.conf.retry
    86  	return ag
    87  }
    88  
    89  func (ag *aggregator) start() error {
    90  	return ag.eventPoller.start()
    91  }
    92  
    93  func (ag *aggregator) rewindOffchainBatches() (rewind bool, offset int64) {
    94  	// Retry idefinitely for database errors (until the context closes)
    95  	_ = ag.retry.Do(ag.ctx, "check for off-chain batch deliveries", func(attempt int) (retry bool, err error) {
    96  		var batchIDs []driver.Value
    97  		draining := true
    98  		for draining {
    99  			select {
   100  			case batchID := <-ag.offchainBatches:
   101  				batchIDs = append(batchIDs, batchID)
   102  			default:
   103  				draining = false
   104  			}
   105  		}
   106  		if len(batchIDs) > 0 {
   107  			fb := database.PinQueryFactory.NewFilter(ag.ctx)
   108  			filter := fb.And(
   109  				fb.Eq("dispatched", false),
   110  				fb.In("batch", batchIDs),
   111  			).Sort("sequence").Limit(1) // only need the one oldest sequence
   112  			sequences, err := ag.database.GetPins(ag.ctx, filter)
   113  			if err != nil {
   114  				return true, err
   115  			}
   116  			if len(sequences) > 0 {
   117  				rewind = true
   118  				offset = sequences[0].Sequence
   119  				log.L(ag.ctx).Debugf("Rewinding for off-chain data arrival. New local pin sequence %d", offset)
   120  			}
   121  		}
   122  		return false, nil
   123  	})
   124  	return rewind, offset
   125  }
   126  
   127  func (ag *aggregator) processPinsDBGroup(items []fftypes.LocallySequenced) (repoll bool, err error) {
   128  	pins := make([]*fftypes.Pin, len(items))
   129  	for i, item := range items {
   130  		pins[i] = item.(*fftypes.Pin)
   131  	}
   132  	err = ag.database.RunAsGroup(ag.ctx, func(ctx context.Context) (err error) {
   133  		err = ag.processPins(ctx, pins)
   134  		return err
   135  	})
   136  	return false, err
   137  }
   138  
   139  func (ag *aggregator) getPins(ctx context.Context, filter database.Filter) ([]fftypes.LocallySequenced, error) {
   140  	pins, err := ag.database.GetPins(ctx, filter)
   141  	ls := make([]fftypes.LocallySequenced, len(pins))
   142  	for i, p := range pins {
   143  		ls[i] = p
   144  	}
   145  	return ls, err
   146  }
   147  
   148  func (ag *aggregator) processPins(ctx context.Context, pins []*fftypes.Pin) (err error) {
   149  	l := log.L(ctx)
   150  
   151  	// Keep a batch cache for this list of pins
   152  	var batch *fftypes.Batch
   153  	// As messages can have multiple topics, we need to avoid processing the message twice in the same poll loop.
   154  	// We must check all the contexts in the message, and mark them dispatched together.
   155  	dupMsgCheck := make(map[fftypes.UUID]bool)
   156  	for _, pin := range pins {
   157  		l.Debugf("Aggregating pin %.10d batch=%s hash=%s masked=%t", pin.Sequence, pin.Batch, pin.Hash, pin.Masked)
   158  
   159  		if batch == nil || *batch.ID != *pin.Batch {
   160  			batch, err = ag.database.GetBatchByID(ctx, pin.Batch)
   161  			if err != nil {
   162  				return err
   163  			}
   164  			if batch == nil {
   165  				l.Debugf("Batch %s not available - pin %s is parked", pin.Batch, pin.Hash)
   166  				continue
   167  			}
   168  		}
   169  
   170  		// Extract the message from the batch - where the index is of a topic within a message
   171  		var msg *fftypes.Message
   172  		var i int64 = -1
   173  		for iM := 0; i < pin.Index && iM < len(batch.Payload.Messages); iM++ {
   174  			msg = batch.Payload.Messages[iM]
   175  			for iT := 0; i < pin.Index && iT < len(msg.Header.Topics); iT++ {
   176  				i++
   177  			}
   178  		}
   179  
   180  		if i < pin.Index {
   181  			l.Errorf("Batch %s does not have message-topic index %d - pin %s is invalid", pin.Batch, pin.Index, pin.Hash)
   182  			continue
   183  		}
   184  		l.Tracef("Batch %s message %d: %+v", batch.ID, pin.Index, msg)
   185  		if msg == nil || msg.Header.ID == nil {
   186  			l.Errorf("null message entry %d in batch '%s'", pin.Index, batch.ID)
   187  			continue
   188  		}
   189  		if dupMsgCheck[*msg.Header.ID] {
   190  			continue
   191  		}
   192  		dupMsgCheck[*msg.Header.ID] = true
   193  
   194  		// Attempt to process the message (only returns errors for database persistence issues)
   195  		if err = ag.processMessage(ctx, batch, pin.Masked, pin.Sequence, msg); err != nil {
   196  			return err
   197  		}
   198  	}
   199  
   200  	err = ag.eventPoller.commitOffset(ctx, pins[len(pins)-1].Sequence)
   201  	return err
   202  }
   203  
   204  func (ag *aggregator) calcHash(topic string, groupID *fftypes.Bytes32, identity string, nonce int64) *fftypes.Bytes32 {
   205  	h := sha256.New()
   206  	h.Write([]byte(topic))
   207  	h.Write((*groupID)[:])
   208  	h.Write([]byte(identity))
   209  	nonceBytes := make([]byte, 8)
   210  	binary.BigEndian.PutUint64(nonceBytes, uint64(nonce))
   211  	h.Write(nonceBytes)
   212  	return fftypes.HashResult(h)
   213  }
   214  
   215  func (ag *aggregator) processMessage(ctx context.Context, batch *fftypes.Batch, masked bool, pinnedSequence int64, msg *fftypes.Message) (err error) {
   216  	l := log.L(ctx)
   217  
   218  	// Check if it's ready to be processed
   219  	nextPins := make([]*fftypes.NextPin, len(msg.Pins))
   220  	if masked {
   221  		// Private messages have one or more masked "pin" hashes that allow us to work
   222  		// out if it's the next message in the sequence, given the previous messages
   223  		if msg.Header.Group == nil || len(msg.Pins) == 0 || len(msg.Header.Topics) != len(msg.Pins) {
   224  			log.L(ctx).Errorf("Message '%s' in batch '%s' has invalid pin data pins=%v topics=%v", msg.Header.ID, batch.ID, msg.Pins, msg.Header.Topics)
   225  			return nil
   226  		}
   227  		for i, pinStr := range msg.Pins {
   228  			var pin fftypes.Bytes32
   229  			err := pin.UnmarshalText([]byte(pinStr))
   230  			if err != nil {
   231  				log.L(ctx).Errorf("Message '%s' in batch '%s' has invalid pin at index %d: '%s'", msg.Header.ID, batch.ID, i, pinStr)
   232  				return nil
   233  			}
   234  			nextPin, err := ag.checkMaskedContextReady(ctx, msg, msg.Header.Topics[i], pinnedSequence, &pin)
   235  			if err != nil || nextPin == nil {
   236  				return err
   237  			}
   238  			nextPins[i] = nextPin
   239  		}
   240  	} else {
   241  		// We just need to check there's no earlier sequences with the same unmasked context
   242  		unmaskedContexts := make([]driver.Value, len(msg.Header.Topics))
   243  		for i, topic := range msg.Header.Topics {
   244  			h := sha256.New()
   245  			h.Write([]byte(topic))
   246  			unmaskedContexts[i] = fftypes.HashResult(h)
   247  		}
   248  		fb := database.PinQueryFactory.NewFilter(ctx)
   249  		filter := fb.And(
   250  			fb.Eq("dispatched", false),
   251  			fb.In("hash", unmaskedContexts),
   252  			fb.Lt("sequence", pinnedSequence),
   253  		)
   254  		earlier, err := ag.database.GetPins(ctx, filter)
   255  		if err != nil {
   256  			return err
   257  		}
   258  		if len(earlier) > 0 {
   259  			l.Debugf("Message %s pinned at sequence %d blocked by earlier context %s at sequence %d", msg.Header.ID, pinnedSequence, earlier[0].Hash, earlier[0].Sequence)
   260  			return nil
   261  		}
   262  	}
   263  
   264  	dispatched, err := ag.attemptMessageDispatch(ctx, msg)
   265  	if err != nil || !dispatched {
   266  		return err
   267  	}
   268  
   269  	// Move the nextPin forwards to the next sequence for this sender, on all
   270  	// topics associated with the message
   271  	if masked {
   272  		for i, nextPin := range nextPins {
   273  			nextPin.Nonce++
   274  			nextPin.Hash = ag.calcHash(msg.Header.Topics[i], msg.Header.Group, nextPin.Identity, nextPin.Nonce)
   275  			if err = ag.database.UpdateNextPin(ctx, nextPin.Sequence, database.NextPinQueryFactory.NewUpdate(ctx).
   276  				Set("nonce", nextPin.Nonce).
   277  				Set("hash", nextPin.Hash),
   278  			); err != nil {
   279  				return err
   280  			}
   281  		}
   282  	}
   283  
   284  	// Mark the pin dispatched
   285  	return ag.database.SetPinDispatched(ctx, pinnedSequence)
   286  }
   287  
   288  func (ag *aggregator) checkMaskedContextReady(ctx context.Context, msg *fftypes.Message, topic string, pinnedSequence int64, pin *fftypes.Bytes32) (*fftypes.NextPin, error) {
   289  	l := log.L(ctx)
   290  
   291  	// For masked pins, we can only process if:
   292  	// - it is the next sequence on this context for one of the members of the group
   293  	// - there are no undispatched messages on this context earlier in the stream
   294  	h := sha256.New()
   295  	h.Write([]byte(topic))
   296  	h.Write((*msg.Header.Group)[:])
   297  	contextUnmasked := fftypes.HashResult(h)
   298  	filter := database.NextPinQueryFactory.NewFilter(ctx).Eq("context", contextUnmasked)
   299  	nextPins, err := ag.database.GetNextPins(ctx, filter)
   300  	if err != nil {
   301  		return nil, err
   302  	}
   303  	l.Debugf("Group=%s Topic='%s' NextPins=%v Sequence=%d Pin=%s NextPins=%v", msg.Header.Group, topic, nextPins, pinnedSequence, pin, nextPins)
   304  
   305  	if len(nextPins) == 0 {
   306  		// If this is the first time we've seen the context, then this message is read as long as it is
   307  		// the first (nonce=0) message on the context, for one of the members, and there aren't any earlier
   308  		// messages that are nonce=0.
   309  		return ag.attemptContextInit(ctx, msg, topic, pinnedSequence, contextUnmasked, pin)
   310  	}
   311  
   312  	// This message must be the next hash for the author
   313  	var nextPin *fftypes.NextPin
   314  	for _, np := range nextPins {
   315  		if *np.Hash == *pin {
   316  			nextPin = np
   317  			break
   318  		}
   319  	}
   320  	if nextPin == nil || nextPin.Identity != msg.Header.Author {
   321  		l.Debugf("Mismatched nexthash or author group=%s topic=%s context=%s pin=%s nextHash=%+v", msg.Header.Group, topic, contextUnmasked, pin, nextPin)
   322  		return nil, nil
   323  	}
   324  	return nextPin, nil
   325  }
   326  
   327  func (ag *aggregator) attemptContextInit(ctx context.Context, msg *fftypes.Message, topic string, pinnedSequence int64, contextUnmasked, pin *fftypes.Bytes32) (*fftypes.NextPin, error) {
   328  	l := log.L(ctx)
   329  
   330  	// It might be the system topic/context initializing the group
   331  	group, err := ag.messaging.ResolveInitGroup(ctx, msg)
   332  	if err != nil || group == nil {
   333  		return nil, err
   334  	}
   335  
   336  	// Find the list of zerohashes for this context, and match this pin to one of them
   337  	zeroHashes := make([]driver.Value, len(group.Members))
   338  	var nextPin *fftypes.NextPin
   339  	nextPins := make([]*fftypes.NextPin, len(group.Members))
   340  	for i, member := range group.Members {
   341  		zeroHash := ag.calcHash(topic, msg.Header.Group, member.Identity, 0)
   342  		np := &fftypes.NextPin{
   343  			Context:  contextUnmasked,
   344  			Identity: member.Identity,
   345  			Hash:     zeroHash,
   346  			Nonce:    0,
   347  		}
   348  		if *pin == *zeroHash {
   349  			if member.Identity != msg.Header.Author {
   350  				l.Warnf("Author mismatch for zerohash on context: group=%s topic=%s context=%s pin=%s", msg.Header.Group, topic, contextUnmasked, pin)
   351  				return nil, nil
   352  			}
   353  			nextPin = np
   354  		}
   355  		zeroHashes[i] = zeroHash
   356  		nextPins[i] = np
   357  	}
   358  	l.Debugf("Group=%s topic=%s context=%s zeroHashes=%v", msg.Header.Group, topic, contextUnmasked, zeroHashes)
   359  	if nextPin == nil {
   360  		l.Warnf("No match for zerohash on context: group=%s topic=%s context=%s pin=%s", msg.Header.Group, topic, contextUnmasked, pin)
   361  		return nil, nil
   362  	}
   363  
   364  	// Check none of the other zerohashes exist before us in the stream
   365  	fb := database.PinQueryFactory.NewFilter(ctx)
   366  	filter := fb.And(
   367  		fb.Eq("dispatched", false),
   368  		fb.In("hash", zeroHashes),
   369  		fb.Lt("sequence", pinnedSequence),
   370  	)
   371  	earlier, err := ag.database.GetPins(ctx, filter)
   372  	if err != nil {
   373  		return nil, err
   374  	}
   375  	if len(earlier) > 0 {
   376  		l.Debugf("Group=%s topic=%s context=%s earlier=%v", msg.Header.Group, topic, contextUnmasked, earlier)
   377  		return nil, nil
   378  	}
   379  
   380  	// We're good to be the first message on this context.
   381  	// Initialize the nextpins on this context - this is safe to do even if we don't actually dispatch the message
   382  	for _, np := range nextPins {
   383  		if err = ag.database.InsertNextPin(ctx, np); err != nil {
   384  			return nil, err
   385  		}
   386  	}
   387  	return nextPin, err
   388  }
   389  
   390  func (ag *aggregator) attemptMessageDispatch(ctx context.Context, msg *fftypes.Message) (bool, error) {
   391  
   392  	// If we don't find all the data, then we don't dispatch
   393  	data, foundAll, err := ag.data.GetMessageData(ctx, msg, true)
   394  	if err != nil || !foundAll {
   395  		return false, err
   396  	}
   397  
   398  	// We're going to dispatch it at this point, but we need to validate the data first
   399  	valid := true
   400  	eventType := fftypes.EventTypeMessageConfirmed
   401  	if msg.Header.Namespace == fftypes.SystemNamespace {
   402  		// We handle system events in-line on the aggregator, as it would be confusing for apps to be
   403  		// dispatched subsequent events before we have processed the system events they depend on.
   404  		if valid, err = ag.broadcast.HandleSystemBroadcast(ctx, msg, data); err != nil {
   405  			// Should only return errors that are retryable
   406  			return false, err
   407  		}
   408  	} else if len(msg.Data) > 0 {
   409  		valid, err = ag.data.ValidateAll(ctx, data)
   410  		if err != nil {
   411  			return false, err
   412  		}
   413  	}
   414  	if valid {
   415  		// This message is now confirmed
   416  		setConfirmed := database.MessageQueryFactory.NewUpdate(ctx).Set("confirmed", fftypes.Now())
   417  		err = ag.database.UpdateMessage(ctx, msg.Header.ID, setConfirmed)
   418  		if err != nil {
   419  			return false, err
   420  		}
   421  	} else {
   422  		// An message with invalid (but complete) data is still considered dispatched.
   423  		// However, we drive a different event to the applications.
   424  		eventType = fftypes.EventTypeMessageInvalid
   425  	}
   426  
   427  	// Generate the appropriate event
   428  	event := fftypes.NewEvent(eventType, msg.Header.Namespace, msg.Header.ID, msg.Header.Group)
   429  	if err = ag.database.UpsertEvent(ctx, event, false); err != nil {
   430  		return false, err
   431  	}
   432  	log.L(ctx).Infof("Emitting %s for message %s:%s", eventType, msg.Header.Namespace, msg.Header.ID)
   433  
   434  	return true, nil
   435  }