github.com/kaleido-io/firefly@v0.0.0-20210622132723-8b4b6aacb971/internal/batch/batch_processor.go (about)

     1  // Copyright © 2021 Kaleido, Inc.
     2  //
     3  // SPDX-License-Identifier: Apache-2.0
     4  //
     5  // Licensed under the Apache License, Version 2.0 (the "License");
     6  // you may not use this file except in compliance with the License.
     7  // You may obtain a copy of the License at
     8  //
     9  //     http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package batch
    18  
    19  import (
    20  	"context"
    21  	"crypto/sha256"
    22  	"database/sql/driver"
    23  	"encoding/binary"
    24  	"fmt"
    25  	"time"
    26  
    27  	"github.com/kaleido-io/firefly/internal/log"
    28  	"github.com/kaleido-io/firefly/internal/retry"
    29  	"github.com/kaleido-io/firefly/pkg/database"
    30  	"github.com/kaleido-io/firefly/pkg/fftypes"
    31  )
    32  
    33  type batchWork struct {
    34  	msg        *fftypes.Message
    35  	data       []*fftypes.Data
    36  	dispatched chan *batchDispatch
    37  	abandoned  bool
    38  }
    39  
    40  type batchDispatch struct {
    41  	msg     *fftypes.Message
    42  	batchID *fftypes.UUID
    43  }
    44  
    45  type batchProcessorConf struct {
    46  	Options
    47  	namespace          string
    48  	author             string
    49  	group              *fftypes.Bytes32
    50  	dispatch           DispatchHandler
    51  	processorQuiescing func()
    52  }
    53  
    54  type batchProcessor struct {
    55  	ctx         context.Context
    56  	database    database.Plugin
    57  	name        string
    58  	cancelCtx   func()
    59  	closed      bool
    60  	newWork     chan *batchWork
    61  	persistWork chan *batchWork
    62  	sealBatch   chan bool
    63  	batchSealed chan bool
    64  	retry       *retry.Retry
    65  	conf        *batchProcessorConf
    66  }
    67  
    68  func newBatchProcessor(ctx context.Context, di database.Plugin, conf *batchProcessorConf, retry *retry.Retry) *batchProcessor {
    69  	pCtx := log.WithLogField(ctx, "role", fmt.Sprintf("batchproc-%s:%s", conf.namespace, conf.author))
    70  	pCtx, cancelCtx := context.WithCancel(pCtx)
    71  	bp := &batchProcessor{
    72  		ctx:         pCtx,
    73  		cancelCtx:   cancelCtx,
    74  		database:    di,
    75  		name:        fmt.Sprintf("%s:%s", conf.namespace, conf.author),
    76  		newWork:     make(chan *batchWork),
    77  		persistWork: make(chan *batchWork, conf.BatchMaxSize),
    78  		sealBatch:   make(chan bool),
    79  		batchSealed: make(chan bool),
    80  		retry:       retry,
    81  		conf:        conf,
    82  	}
    83  	go bp.assemblyLoop()
    84  	go bp.persistenceLoop()
    85  	return bp
    86  }
    87  
    88  // The assemblyLoop accepts work into the pipe as quickly as possible.
    89  // It dispatches work asynchronously to the peristenceLoop, which is responsible for
    90  // calling back each piece of work once persisted into a batch
    91  // (doesn't wait until that batch is sealed/dispatched).
    92  // The assemblyLoop seals batches when they are full, or timeout.
    93  func (bp *batchProcessor) assemblyLoop() {
    94  	defer bp.close()
    95  	defer close(bp.sealBatch) // close persitenceLoop when we exit
    96  	l := log.L(bp.ctx)
    97  	var batchSize uint
    98  	var lastBatchSealed = time.Now()
    99  	var quiescing bool
   100  	for {
   101  		// We timeout waiting at the point we think we're ready for disposal,
   102  		// unless we've started a batch in which case we wait for what's left
   103  		// of the batch timeout
   104  		timeToWait := bp.conf.DisposeTimeout
   105  		if quiescing {
   106  			timeToWait = 100 * time.Millisecond
   107  		} else if batchSize > 0 {
   108  			timeToWait = bp.conf.BatchTimeout - time.Since(lastBatchSealed)
   109  		}
   110  		timeout := time.NewTimer(timeToWait)
   111  
   112  		// Wait for work, the timeout, or close
   113  		var timedOut, closed bool
   114  		select {
   115  		case <-timeout.C:
   116  			timedOut = true
   117  		case work, ok := <-bp.newWork:
   118  			if ok && !work.abandoned {
   119  				batchSize++
   120  				bp.persistWork <- work
   121  			} else {
   122  				closed = true
   123  			}
   124  		}
   125  
   126  		// Don't include the sealing time in the duration
   127  		batchFull := batchSize >= bp.conf.BatchMaxSize
   128  		l.Debugf("Assembly batch loop: Size=%d Full=%t", batchSize, batchFull)
   129  
   130  		batchDuration := time.Since(lastBatchSealed)
   131  		if quiescing && batchSize == 0 {
   132  			l.Debugf("Batch assembler disposed after %.2fs of inactivity", float64(batchDuration)/float64(time.Second))
   133  			return
   134  		}
   135  
   136  		if closed || batchDuration > bp.conf.DisposeTimeout {
   137  			bp.conf.processorQuiescing()
   138  			quiescing = true
   139  		}
   140  
   141  		if (quiescing || timedOut || batchFull) && batchSize > 0 {
   142  			bp.sealBatch <- true
   143  			<-bp.batchSealed
   144  			l.Debugf("Assembly batch sealed")
   145  			lastBatchSealed = time.Now()
   146  			batchSize = 0
   147  		}
   148  
   149  	}
   150  }
   151  
   152  func (bp *batchProcessor) createOrAddToBatch(batch *fftypes.Batch, newWork []*batchWork, seal bool) *fftypes.Batch {
   153  	l := log.L(bp.ctx)
   154  	if batch == nil {
   155  		batchID := fftypes.NewUUID()
   156  		l.Debugf("New batch %s", batchID)
   157  		batch = &fftypes.Batch{
   158  			ID:        batchID,
   159  			Namespace: bp.conf.namespace,
   160  			Author:    bp.conf.author,
   161  			Group:     bp.conf.group,
   162  			Payload:   fftypes.BatchPayload{},
   163  			Created:   fftypes.Now(),
   164  		}
   165  	}
   166  	for _, w := range newWork {
   167  		if w.msg != nil {
   168  			w.msg.BatchID = batch.ID
   169  			w.msg.Local = false
   170  			batch.Payload.Messages = append(batch.Payload.Messages, w.msg)
   171  		}
   172  		batch.Payload.Data = append(batch.Payload.Data, w.data...)
   173  	}
   174  	if seal {
   175  		// Generate a new Transaction reference, which will be used to record status of the associated transaction as it happens
   176  		batch.Payload.TX = fftypes.TransactionRef{
   177  			Type: fftypes.TransactionTypeBatchPin,
   178  			ID:   fftypes.NewUUID(),
   179  		}
   180  		batch.Hash = batch.Payload.Hash()
   181  		l.Debugf("Batch %s sealed. Hash=%s", batch.ID, batch.Hash)
   182  	}
   183  	return batch
   184  }
   185  
   186  func (bp *batchProcessor) maskContext(ctx context.Context, msg *fftypes.Message, topic string) (contextOrPin *fftypes.Bytes32, err error) {
   187  
   188  	hashBuilder := sha256.New()
   189  	hashBuilder.Write([]byte(topic))
   190  
   191  	// For broadcast we do not need to mask the context, which is just the hash
   192  	// of the topic. There would be no way to unmask it if we did, because we don't have
   193  	// the full list of senders to know what their next hashes should be.
   194  	if msg.Header.Group == nil {
   195  		return fftypes.HashResult(hashBuilder), nil
   196  	}
   197  
   198  	// For private groups, we need to make the topic specific to the group (which is
   199  	// a salt for the hash as it is not on chain)
   200  	hashBuilder.Write((*msg.Header.Group)[:])
   201  
   202  	// The combination of the topic and group is the context
   203  	contextHash := fftypes.HashResult(hashBuilder)
   204  
   205  	// Get the next nonce for this context - we're the authority in the nextwork on this,
   206  	// as we are the sender.
   207  	gc := &fftypes.Nonce{
   208  		Context: contextHash,
   209  		Group:   msg.Header.Group,
   210  		Topic:   topic,
   211  	}
   212  	err = bp.database.UpsertNonceNext(ctx, gc)
   213  	if err != nil {
   214  		return nil, err
   215  	}
   216  
   217  	// Now combine our sending identity, and this nonce, to produce the hash that should
   218  	// be expected by all members of the group as the next nonce from us on this topic.
   219  	hashBuilder.Write([]byte(msg.Header.Author))
   220  	nonceBytes := make([]byte, 8)
   221  	binary.BigEndian.PutUint64(nonceBytes, uint64(gc.Nonce))
   222  	hashBuilder.Write(nonceBytes)
   223  
   224  	return fftypes.HashResult(hashBuilder), err
   225  }
   226  
   227  func (bp *batchProcessor) maskContexts(ctx context.Context, batch *fftypes.Batch) ([]*fftypes.Bytes32, error) {
   228  	// Calculate the sequence hashes
   229  	contextsOrPins := make([]*fftypes.Bytes32, 0, len(batch.Payload.Messages))
   230  	for _, msg := range batch.Payload.Messages {
   231  		for _, topic := range msg.Header.Topics {
   232  			contextOrPin, err := bp.maskContext(ctx, msg, topic)
   233  			if err != nil {
   234  				return nil, err
   235  			}
   236  			contextsOrPins = append(contextsOrPins, contextOrPin)
   237  			if msg.Header.Group != nil {
   238  				msg.Pins = append(msg.Pins, contextOrPin.String())
   239  			}
   240  		}
   241  	}
   242  	return contextsOrPins, nil
   243  }
   244  
   245  func (bp *batchProcessor) dispatchBatch(batch *fftypes.Batch, pins []*fftypes.Bytes32) {
   246  	// Call the dispatcher to do the heavy lifting - will only exit if we're closed
   247  	_ = bp.retry.Do(bp.ctx, "batch dispatch", func(attempt int) (retry bool, err error) {
   248  		err = bp.conf.dispatch(bp.ctx, batch, pins)
   249  		if err != nil {
   250  			return !bp.closed, err
   251  		}
   252  		return false, nil
   253  	})
   254  }
   255  
   256  func (bp *batchProcessor) persistBatch(batch *fftypes.Batch, newWork []*batchWork, seal bool) (contexts []*fftypes.Bytes32, err error) {
   257  	err = bp.retry.Do(bp.ctx, "batch persist", func(attempt int) (retry bool, err error) {
   258  		err = bp.database.RunAsGroup(bp.ctx, func(ctx context.Context) (err error) {
   259  			// Update all the messages in the batch with the batch ID
   260  			if len(newWork) > 0 {
   261  				msgIDs := make([]driver.Value, 0, len(newWork))
   262  				for _, w := range newWork {
   263  					if w.msg != nil {
   264  						msgIDs = append(msgIDs, w.msg.Header.ID)
   265  					}
   266  				}
   267  				filter := database.MessageQueryFactory.NewFilter(ctx).In("id", msgIDs)
   268  				update := database.MessageQueryFactory.NewUpdate(ctx).
   269  					Set("batch", batch.ID).
   270  					Set("group", batch.Group)
   271  				err = bp.database.UpdateMessages(ctx, filter, update)
   272  			}
   273  			if err == nil && seal {
   274  				contexts, err = bp.maskContexts(bp.ctx, batch)
   275  			}
   276  			if err == nil {
   277  				// Persist the batch itself
   278  				err = bp.database.UpsertBatch(ctx, batch, true, seal /* we set the hash as it seals */)
   279  			}
   280  			return err
   281  		})
   282  		if err != nil {
   283  			return !bp.closed, err
   284  		}
   285  		return false, nil
   286  	})
   287  	return contexts, err
   288  }
   289  
   290  func (bp *batchProcessor) persistenceLoop() {
   291  	defer close(bp.batchSealed)
   292  	l := log.L(bp.ctx)
   293  	var currentBatch *fftypes.Batch
   294  	var batchSize = 0
   295  	for !bp.closed {
   296  		var seal bool
   297  		newWork := make([]*batchWork, 0, bp.conf.BatchMaxSize)
   298  
   299  		// Block waiting for work, or a batch sealing request
   300  		select {
   301  		case w := <-bp.persistWork:
   302  			newWork = append(newWork, w)
   303  		case <-bp.sealBatch:
   304  			seal = true
   305  		}
   306  
   307  		// Drain everything currently in the pipe waiting for dispatch
   308  		// This means we batch the writing to the database, which has to happen before
   309  		// we can callback the work with a persisted batch ID.
   310  		// We drain both the message queue, and the seal, because there's no point
   311  		// going round the loop (persisting twice) if the batch has just filled
   312  		var drained bool
   313  		for !drained {
   314  			select {
   315  			case _, ok := <-bp.sealBatch:
   316  				seal = true
   317  				if !ok {
   318  					return // Closed by termination of assemblyLoop
   319  				}
   320  			case w := <-bp.persistWork:
   321  				newWork = append(newWork, w)
   322  			default:
   323  				drained = true
   324  			}
   325  		}
   326  
   327  		batchSize += len(newWork)
   328  		currentBatch = bp.createOrAddToBatch(currentBatch, newWork, seal)
   329  		l.Debugf("Adding %d entries to batch %s. Size=%d Seal=%t", len(newWork), currentBatch.ID, batchSize, seal)
   330  
   331  		// Persist the batch - indefinite retry (unless we close, or context is cancelled)
   332  		contexts, err := bp.persistBatch(currentBatch, newWork, seal)
   333  		if err != nil {
   334  			return
   335  		}
   336  
   337  		// Inform all the work in this batch of the batch they have been persisted
   338  		// into. At this point they can carry on processing, because we won't lose
   339  		// the work - it's tracked in a batch ready to go
   340  		for _, w := range newWork {
   341  			w.dispatched <- &batchDispatch{
   342  				w.msg,
   343  				currentBatch.ID,
   344  			}
   345  		}
   346  
   347  		if seal {
   348  			// At this point the batch is sealed, and the assember can start
   349  			// queing up the next batch. We only let them get one batch ahead
   350  			// (due to the size of the channel being the maxBatchSize) before
   351  			// they start blocking waiting for us to complete database of
   352  			// the current batch.
   353  			bp.batchSealed <- true
   354  
   355  			// Synchronously dispatch the batch. Must be last thing we do in the loop, as we
   356  			// will break out of the retry in the case that we close
   357  			bp.dispatchBatch(currentBatch, contexts)
   358  
   359  			// Move onto the next batch
   360  			currentBatch = nil
   361  			batchSize = 0
   362  		}
   363  
   364  	}
   365  }
   366  
   367  func (bp *batchProcessor) close() {
   368  	if !bp.closed {
   369  		// We don't cancel the context here, as we use close during quiesce and don't want the
   370  		// persistence loop to have its context cancelled, and fail to perform DB operations
   371  		close(bp.newWork)
   372  		bp.closed = true
   373  	}
   374  }
   375  
   376  func (bp *batchProcessor) waitClosed() {
   377  	<-bp.sealBatch
   378  	<-bp.batchSealed
   379  }