github.com/Jeffail/benthos/v3@v3.65.0/lib/input/aws_kinesis_record_batcher.go (about)

     1  package input
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/Jeffail/benthos/v3/internal/checkpoint"
    10  	"github.com/Jeffail/benthos/v3/lib/message"
    11  	"github.com/Jeffail/benthos/v3/lib/message/batch"
    12  	"github.com/Jeffail/benthos/v3/lib/types"
    13  	"github.com/aws/aws-sdk-go/service/kinesis"
    14  )
    15  
    16  type awsKinesisRecordBatcher struct {
    17  	streamID string
    18  	shardID  string
    19  
    20  	batchPolicy  *batch.Policy
    21  	checkpointer *checkpoint.Capped
    22  
    23  	flushedMessage types.Message
    24  
    25  	batchedSequence string
    26  
    27  	ackedSequence string
    28  	ackedMut      sync.Mutex
    29  	ackedWG       sync.WaitGroup
    30  }
    31  
    32  func (k *kinesisReader) newAWSKinesisRecordBatcher(streamID, shardID, sequence string) (*awsKinesisRecordBatcher, error) {
    33  	batchPolicy, err := batch.NewPolicy(k.conf.Batching, k.mgr, k.log, k.stats)
    34  	if err != nil {
    35  		return nil, fmt.Errorf("failed to initialize batch policy for shard consumer: %w", err)
    36  	}
    37  
    38  	return &awsKinesisRecordBatcher{
    39  		streamID:      streamID,
    40  		shardID:       shardID,
    41  		batchPolicy:   batchPolicy,
    42  		checkpointer:  checkpoint.NewCapped(int64(k.conf.CheckpointLimit)),
    43  		ackedSequence: sequence,
    44  	}, nil
    45  }
    46  
    47  func (a *awsKinesisRecordBatcher) AddRecord(r *kinesis.Record) bool {
    48  	p := message.NewPart(r.Data)
    49  	met := p.Metadata()
    50  	met.Set("kinesis_stream", a.streamID)
    51  	met.Set("kinesis_shard", a.shardID)
    52  	if r.PartitionKey != nil {
    53  		met.Set("kinesis_partition_key", *r.PartitionKey)
    54  	}
    55  	met.Set("kinesis_sequence_number", *r.SequenceNumber)
    56  
    57  	a.batchedSequence = *r.SequenceNumber
    58  	if a.flushedMessage != nil {
    59  		// Upstream shouldn't really be adding records if a prior flush was
    60  		// unsuccessful. However, we can still accommodate this by appending it
    61  		// to the flushed message.
    62  		a.flushedMessage.Append(p)
    63  		return true
    64  	}
    65  	return a.batchPolicy.Add(p)
    66  }
    67  
    68  func (a *awsKinesisRecordBatcher) HasPendingMessage() bool {
    69  	return a.flushedMessage != nil
    70  }
    71  
    72  func (a *awsKinesisRecordBatcher) FlushMessage(ctx context.Context) (asyncMessage, error) {
    73  	if a.flushedMessage == nil {
    74  		if a.flushedMessage = a.batchPolicy.Flush(); a.flushedMessage == nil {
    75  			return asyncMessage{}, nil
    76  		}
    77  	}
    78  
    79  	resolveFn, err := a.checkpointer.Track(ctx, a.batchedSequence, int64(a.flushedMessage.Len()))
    80  	if err != nil {
    81  		if err == types.ErrTimeout {
    82  			err = nil
    83  		}
    84  		return asyncMessage{}, err
    85  	}
    86  
    87  	a.ackedWG.Add(1)
    88  	aMsg := asyncMessage{
    89  		msg: a.flushedMessage,
    90  		ackFn: func(ctx context.Context, res types.Response) error {
    91  			topSequence := resolveFn()
    92  			if topSequence != nil {
    93  				a.ackedMut.Lock()
    94  				a.ackedSequence = topSequence.(string)
    95  				a.ackedMut.Unlock()
    96  			}
    97  			a.ackedWG.Done()
    98  			return err
    99  		},
   100  	}
   101  	a.flushedMessage = nil
   102  	return aMsg, nil
   103  }
   104  
   105  func (a *awsKinesisRecordBatcher) UntilNext() time.Duration {
   106  	return a.batchPolicy.UntilNext()
   107  }
   108  
   109  func (a *awsKinesisRecordBatcher) GetSequence() string {
   110  	a.ackedMut.Lock()
   111  	seq := a.ackedSequence
   112  	a.ackedMut.Unlock()
   113  	return seq
   114  }
   115  
   116  func (a *awsKinesisRecordBatcher) Close(blocked bool) {
   117  	if blocked {
   118  		a.ackedWG.Wait()
   119  	}
   120  	a.batchPolicy.CloseAsync()
   121  }