github.com/Jeffail/benthos/v3@v3.65.0/lib/input/reader/kinesis.go (about)

     1  package reader
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"time"
     8  
     9  	"github.com/Jeffail/benthos/v3/lib/log"
    10  	"github.com/Jeffail/benthos/v3/lib/message"
    11  	"github.com/Jeffail/benthos/v3/lib/message/batch"
    12  	"github.com/Jeffail/benthos/v3/lib/metrics"
    13  	"github.com/Jeffail/benthos/v3/lib/types"
    14  	sess "github.com/Jeffail/benthos/v3/lib/util/aws/session"
    15  	"github.com/aws/aws-sdk-go/aws"
    16  	"github.com/aws/aws-sdk-go/aws/awserr"
    17  	"github.com/aws/aws-sdk-go/aws/request"
    18  	"github.com/aws/aws-sdk-go/aws/session"
    19  	"github.com/aws/aws-sdk-go/service/dynamodb"
    20  	"github.com/aws/aws-sdk-go/service/kinesis"
    21  )
    22  
    23  //------------------------------------------------------------------------------
    24  
    25  // KinesisConfig is configuration values for the input type.
    26  type KinesisConfig struct {
    27  	sess.Config     `json:",inline" yaml:",inline"`
    28  	Limit           int64              `json:"limit" yaml:"limit"`
    29  	Stream          string             `json:"stream" yaml:"stream"`
    30  	Shard           string             `json:"shard" yaml:"shard"`
    31  	DynamoDBTable   string             `json:"dynamodb_table" yaml:"dynamodb_table"`
    32  	ClientID        string             `json:"client_id" yaml:"client_id"`
    33  	CommitPeriod    string             `json:"commit_period" yaml:"commit_period"`
    34  	StartFromOldest bool               `json:"start_from_oldest" yaml:"start_from_oldest"`
    35  	Timeout         string             `json:"timeout" yaml:"timeout"`
    36  	Batching        batch.PolicyConfig `json:"batching" yaml:"batching"`
    37  }
    38  
    39  // NewKinesisConfig creates a new Config with default values.
    40  func NewKinesisConfig() KinesisConfig {
    41  	return KinesisConfig{
    42  		Config:          sess.NewConfig(),
    43  		Limit:           100,
    44  		Stream:          "",
    45  		Shard:           "0",
    46  		DynamoDBTable:   "",
    47  		ClientID:        "benthos_consumer",
    48  		CommitPeriod:    "1s",
    49  		StartFromOldest: true,
    50  		Timeout:         "5s",
    51  		Batching:        batch.NewPolicyConfig(),
    52  	}
    53  }
    54  
    55  //------------------------------------------------------------------------------
    56  
    57  // Kinesis is a benthos reader.Type implementation that reads messages from an
    58  // Amazon Kinesis stream.
    59  type Kinesis struct {
    60  	conf KinesisConfig
    61  
    62  	session *session.Session
    63  	kinesis *kinesis.Kinesis
    64  	dynamo  *dynamodb.DynamoDB
    65  
    66  	offsetLastCommitted time.Time
    67  	sequenceCommit      string
    68  	sequence            string
    69  	sharditer           string
    70  	namespace           string
    71  
    72  	commitPeriod time.Duration
    73  	timeout      time.Duration
    74  
    75  	log   log.Modular
    76  	stats metrics.Type
    77  }
    78  
    79  // NewKinesis creates a new Amazon Kinesis stream reader.Type.
    80  func NewKinesis(
    81  	conf KinesisConfig,
    82  	log log.Modular,
    83  	stats metrics.Type,
    84  ) (*Kinesis, error) {
    85  	var timeout, commitPeriod time.Duration
    86  	if tout := conf.Timeout; len(tout) > 0 {
    87  		var err error
    88  		if timeout, err = time.ParseDuration(tout); err != nil {
    89  			return nil, fmt.Errorf("failed to parse timeout string: %v", err)
    90  		}
    91  	}
    92  	if tout := conf.CommitPeriod; len(tout) > 0 {
    93  		var err error
    94  		if commitPeriod, err = time.ParseDuration(tout); err != nil {
    95  			return nil, fmt.Errorf("failed to parse commit period string: %v", err)
    96  		}
    97  	}
    98  	return &Kinesis{
    99  		conf:         conf,
   100  		log:          log,
   101  		timeout:      timeout,
   102  		commitPeriod: commitPeriod,
   103  		namespace:    fmt.Sprintf("%v-%v", conf.ClientID, conf.Stream),
   104  		stats:        stats,
   105  	}, nil
   106  }
   107  
   108  func (k *Kinesis) getIter() error {
   109  	if k.sequenceCommit == "" && k.conf.DynamoDBTable != "" {
   110  		resp, err := k.dynamo.GetItemWithContext(
   111  			aws.BackgroundContext(),
   112  			&dynamodb.GetItemInput{
   113  				TableName:      aws.String(k.conf.DynamoDBTable),
   114  				ConsistentRead: aws.Bool(true),
   115  				Key: map[string]*dynamodb.AttributeValue{
   116  					"namespace": {
   117  						S: aws.String(k.namespace),
   118  					},
   119  					"shard_id": {
   120  						S: aws.String(k.conf.Shard),
   121  					},
   122  				},
   123  			},
   124  			request.WithResponseReadTimeout(k.timeout),
   125  		)
   126  		if err != nil {
   127  			if err.Error() == request.ErrCodeResponseTimeout {
   128  				return types.ErrTimeout
   129  			}
   130  			return fmt.Errorf("failed to access dynamodb table '%s': %w", k.conf.DynamoDBTable, err)
   131  		}
   132  		if seqAttr := resp.Item["sequence"]; seqAttr != nil {
   133  			if seqAttr.S != nil {
   134  				k.sequenceCommit = *seqAttr.S
   135  				k.sequence = *seqAttr.S
   136  			}
   137  		}
   138  	}
   139  
   140  	if k.sharditer == "" && len(k.sequence) > 0 {
   141  		getShardIter := kinesis.GetShardIteratorInput{
   142  			ShardId:                &k.conf.Shard,
   143  			StreamName:             &k.conf.Stream,
   144  			StartingSequenceNumber: &k.sequence,
   145  			ShardIteratorType:      aws.String(kinesis.ShardIteratorTypeAfterSequenceNumber),
   146  		}
   147  		res, err := k.kinesis.GetShardIteratorWithContext(
   148  			aws.BackgroundContext(),
   149  			&getShardIter,
   150  			request.WithResponseReadTimeout(k.timeout),
   151  		)
   152  		if err != nil {
   153  			if err.Error() == request.ErrCodeResponseTimeout {
   154  				return types.ErrTimeout
   155  			} else if err.Error() == kinesis.ErrCodeInvalidArgumentException {
   156  				k.log.Errorf("Failed to receive iterator from sequence number: %v\n", err.Error())
   157  			} else {
   158  				return err
   159  			}
   160  		}
   161  		if res.ShardIterator != nil {
   162  			k.sharditer = *res.ShardIterator
   163  		}
   164  	}
   165  
   166  	if k.sharditer == "" {
   167  		// Otherwise start from somewhere
   168  		iterType := kinesis.ShardIteratorTypeTrimHorizon
   169  		if !k.conf.StartFromOldest {
   170  			iterType = kinesis.ShardIteratorTypeLatest
   171  		}
   172  		// If we failed to obtain from a sequence we start from beginning
   173  		if len(k.sequence) > 0 {
   174  			iterType = kinesis.ShardIteratorTypeTrimHorizon
   175  		}
   176  		getShardIter := kinesis.GetShardIteratorInput{
   177  			ShardId:           &k.conf.Shard,
   178  			StreamName:        &k.conf.Stream,
   179  			ShardIteratorType: &iterType,
   180  		}
   181  		res, err := k.kinesis.GetShardIteratorWithContext(
   182  			aws.BackgroundContext(),
   183  			&getShardIter,
   184  			request.WithResponseReadTimeout(k.timeout),
   185  		)
   186  		if err != nil {
   187  			if err.Error() == request.ErrCodeResponseTimeout {
   188  				return types.ErrTimeout
   189  			}
   190  			return err
   191  		}
   192  		if res.ShardIterator != nil {
   193  			k.sharditer = *res.ShardIterator
   194  		}
   195  	}
   196  
   197  	if k.sharditer == "" {
   198  		return errors.New("failed to obtain shard iterator")
   199  	}
   200  	return nil
   201  }
   202  
   203  // Connect attempts to establish a connection to the target SQS queue.
   204  func (k *Kinesis) Connect() error {
   205  	return k.ConnectWithContext(context.Background())
   206  }
   207  
   208  // ConnectWithContext attempts to establish a connection to the target Kinesis
   209  // shard.
   210  func (k *Kinesis) ConnectWithContext(ctx context.Context) error {
   211  	if k.session != nil {
   212  		return nil
   213  	}
   214  
   215  	sess, err := k.conf.GetSession()
   216  	if err != nil {
   217  		return err
   218  	}
   219  
   220  	k.dynamo = dynamodb.New(sess)
   221  	k.kinesis = kinesis.New(sess)
   222  	k.session = sess
   223  
   224  	if err = k.getIter(); err != nil {
   225  		k.dynamo = nil
   226  		k.kinesis = nil
   227  		k.session = nil
   228  		return err
   229  	}
   230  
   231  	k.log.Infof("Receiving Amazon Kinesis messages from stream: %v\n", k.conf.Stream)
   232  	return nil
   233  }
   234  
   235  // Read attempts to read a new message from the target SQS.
   236  func (k *Kinesis) Read() (types.Message, error) {
   237  	return k.ReadNextWithContext(context.Background())
   238  }
   239  
   240  // ReadNextWithContext attempts to read a new message from the target Kinesis
   241  // shard.
   242  func (k *Kinesis) ReadNextWithContext(ctx context.Context) (types.Message, error) {
   243  	if k.session == nil {
   244  		return nil, types.ErrNotConnected
   245  	}
   246  	if k.sharditer == "" {
   247  		if err := k.getIter(); err != nil {
   248  			return nil, fmt.Errorf("failed to obtain iterator: %v", err)
   249  		}
   250  	}
   251  
   252  	getRecords := kinesis.GetRecordsInput{
   253  		Limit:         &k.conf.Limit,
   254  		ShardIterator: &k.sharditer,
   255  	}
   256  	res, err := k.kinesis.GetRecordsWithContext(
   257  		ctx,
   258  		&getRecords,
   259  		request.WithResponseReadTimeout(k.timeout),
   260  	)
   261  	if err != nil {
   262  		if err.Error() == request.ErrCodeResponseTimeout {
   263  			return nil, types.ErrTimeout
   264  		} else if aerr, ok := err.(awserr.Error); ok && aerr.Code() == request.CanceledErrorCode {
   265  			return nil, types.ErrTimeout
   266  		} else if err.Error() == kinesis.ErrCodeExpiredIteratorException {
   267  			k.log.Warnln("Shard iterator expired, attempting to refresh")
   268  			return nil, types.ErrTimeout
   269  		}
   270  		return nil, err
   271  	}
   272  
   273  	if res.NextShardIterator != nil {
   274  		k.sharditer = *res.NextShardIterator
   275  	}
   276  
   277  	if len(res.Records) == 0 {
   278  		return nil, types.ErrTimeout
   279  	}
   280  
   281  	msg := message.New(nil)
   282  	for _, rec := range res.Records {
   283  		if rec.Data == nil {
   284  			continue
   285  		}
   286  
   287  		part := message.NewPart(rec.Data)
   288  		part.Metadata().Set("kinesis_shard", k.conf.Shard)
   289  		part.Metadata().Set("kinesis_stream", k.conf.Stream)
   290  
   291  		msg.Append(part)
   292  		if rec.SequenceNumber != nil {
   293  			k.sequence = *rec.SequenceNumber
   294  		}
   295  	}
   296  
   297  	if msg.Len() == 0 {
   298  		return nil, types.ErrTimeout
   299  	}
   300  
   301  	return msg, nil
   302  }
   303  
   304  func (k *Kinesis) commit() error {
   305  	if k.session == nil {
   306  		return nil
   307  	}
   308  	if k.conf.DynamoDBTable != "" {
   309  		if _, err := k.dynamo.PutItemWithContext(
   310  			aws.BackgroundContext(),
   311  			&dynamodb.PutItemInput{
   312  				TableName: aws.String(k.conf.DynamoDBTable),
   313  				Item: map[string]*dynamodb.AttributeValue{
   314  					"namespace": {
   315  						S: aws.String(k.namespace),
   316  					},
   317  					"shard_id": {
   318  						S: aws.String(k.conf.Shard),
   319  					},
   320  					"sequence": {
   321  						S: aws.String(k.sequenceCommit),
   322  					},
   323  				},
   324  			},
   325  			request.WithResponseReadTimeout(k.timeout),
   326  		); err != nil {
   327  			return err
   328  		}
   329  		k.offsetLastCommitted = time.Now()
   330  	}
   331  	return nil
   332  }
   333  
   334  // Acknowledge confirms whether or not our unacknowledged messages have been
   335  // successfully propagated or not.
   336  func (k *Kinesis) Acknowledge(err error) error {
   337  	return k.AcknowledgeWithContext(context.Background(), err)
   338  }
   339  
   340  // AcknowledgeWithContext confirms whether or not our unacknowledged messages
   341  // have been successfully propagated or not.
   342  func (k *Kinesis) AcknowledgeWithContext(ctx context.Context, err error) error {
   343  	if err == nil {
   344  		k.sequenceCommit = k.sequence
   345  	}
   346  
   347  	if time.Since(k.offsetLastCommitted) < k.commitPeriod {
   348  		return nil
   349  	}
   350  
   351  	return k.commit()
   352  }
   353  
   354  // CloseAsync begins cleaning up resources used by this reader asynchronously.
   355  func (k *Kinesis) CloseAsync() {
   356  	go k.commit()
   357  }
   358  
   359  // WaitForClose will block until either the reader is closed or a specified
   360  // timeout occurs.
   361  func (k *Kinesis) WaitForClose(time.Duration) error {
   362  	return nil
   363  }
   364  
   365  //------------------------------------------------------------------------------