github.com/Jeffail/benthos/v3@v3.65.0/lib/output/writer/kinesis_firehose.go (about)

     1  package writer
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"time"
     7  
     8  	"github.com/Jeffail/benthos/v3/lib/log"
     9  	"github.com/Jeffail/benthos/v3/lib/message/batch"
    10  	"github.com/Jeffail/benthos/v3/lib/metrics"
    11  	"github.com/Jeffail/benthos/v3/lib/types"
    12  	sess "github.com/Jeffail/benthos/v3/lib/util/aws/session"
    13  	"github.com/Jeffail/benthos/v3/lib/util/retries"
    14  	"github.com/aws/aws-sdk-go/aws"
    15  	"github.com/aws/aws-sdk-go/aws/session"
    16  	"github.com/aws/aws-sdk-go/service/firehose"
    17  	"github.com/aws/aws-sdk-go/service/firehose/firehoseiface"
    18  	"github.com/cenkalti/backoff/v4"
    19  )
    20  
    21  //------------------------------------------------------------------------------
    22  
    23  // KinesisFirehoseConfig contains configuration fields for the KinesisFirehose output type.
    24  type KinesisFirehoseConfig struct {
    25  	sessionConfig  `json:",inline" yaml:",inline"`
    26  	Stream         string `json:"stream" yaml:"stream"`
    27  	MaxInFlight    int    `json:"max_in_flight" yaml:"max_in_flight"`
    28  	retries.Config `json:",inline" yaml:",inline"`
    29  	Batching       batch.PolicyConfig `json:"batching" yaml:"batching"`
    30  }
    31  
    32  // NewKinesisFirehoseConfig creates a new Config with default values.
    33  func NewKinesisFirehoseConfig() KinesisFirehoseConfig {
    34  	rConf := retries.NewConfig()
    35  	rConf.Backoff.InitialInterval = "1s"
    36  	rConf.Backoff.MaxInterval = "5s"
    37  	rConf.Backoff.MaxElapsedTime = "30s"
    38  
    39  	return KinesisFirehoseConfig{
    40  		sessionConfig: sessionConfig{
    41  			Config: sess.NewConfig(),
    42  		},
    43  		Stream:      "",
    44  		MaxInFlight: 1,
    45  		Config:      rConf,
    46  		Batching:    batch.NewPolicyConfig(),
    47  	}
    48  }
    49  
    50  //------------------------------------------------------------------------------
    51  
    52  // KinesisFirehose is a benthos writer.Type implementation that writes messages
    53  // to an Amazon Kinesis Firehose destination.
    54  type KinesisFirehose struct {
    55  	conf KinesisFirehoseConfig
    56  
    57  	session  *session.Session
    58  	firehose firehoseiface.FirehoseAPI
    59  
    60  	backoffCtor func() backoff.BackOff
    61  	streamName  *string
    62  
    63  	log   log.Modular
    64  	stats metrics.Type
    65  
    66  	mThrottled       metrics.StatCounter
    67  	mThrottledF      metrics.StatCounter
    68  	mPartsThrottled  metrics.StatCounter
    69  	mPartsThrottledF metrics.StatCounter
    70  }
    71  
    72  // NewKinesisFirehose creates a new Amazon Kinesis Firehose writer.Type.
    73  func NewKinesisFirehose(
    74  	conf KinesisFirehoseConfig,
    75  	log log.Modular,
    76  	stats metrics.Type,
    77  ) (*KinesisFirehose, error) {
    78  	k := KinesisFirehose{
    79  		conf:            conf,
    80  		log:             log,
    81  		stats:           stats,
    82  		mPartsThrottled: stats.GetCounter("parts.send.throttled"),
    83  		mThrottled:      stats.GetCounter("send.throttled"),
    84  		streamName:      aws.String(conf.Stream),
    85  	}
    86  
    87  	var err error
    88  	if k.backoffCtor, err = conf.Config.GetCtor(); err != nil {
    89  		return nil, err
    90  	}
    91  	return &k, nil
    92  }
    93  
    94  //------------------------------------------------------------------------------
    95  
    96  // toRecords converts an individual benthos message into a slice of Kinesis Firehose
    97  // batch put entries by promoting each message part into a single part message
    98  // and passing each new message through the partition and hash key interpolation
    99  // process, allowing the user to define the partition and hash key per message
   100  // part.
   101  func (a *KinesisFirehose) toRecords(msg types.Message) ([]*firehose.Record, error) {
   102  	entries := make([]*firehose.Record, msg.Len())
   103  
   104  	err := msg.Iter(func(i int, p types.Part) error {
   105  		entry := firehose.Record{
   106  			Data: p.Get(),
   107  		}
   108  
   109  		if len(entry.Data) > mebibyte {
   110  			a.log.Errorf("part %d exceeds the maximum Kinesis Firehose payload limit of 1 MiB\n", i)
   111  			return types.ErrMessageTooLarge
   112  		}
   113  
   114  		entries[i] = &entry
   115  		return nil
   116  	})
   117  
   118  	return entries, err
   119  }
   120  
   121  //------------------------------------------------------------------------------
   122  
   123  // ConnectWithContext creates a new Kinesis Firehose client and ensures that the
   124  // target Kinesis Firehose delivery stream.
   125  func (a *KinesisFirehose) ConnectWithContext(ctx context.Context) error {
   126  	return a.Connect()
   127  }
   128  
   129  // Connect creates a new Kinesis Firehose client and ensures that the target
   130  // Kinesis Firehose delivery stream.
   131  func (a *KinesisFirehose) Connect() error {
   132  	if a.session != nil {
   133  		return nil
   134  	}
   135  
   136  	sess, err := a.conf.GetSession()
   137  	if err != nil {
   138  		return err
   139  	}
   140  
   141  	a.session = sess
   142  	a.firehose = firehose.New(sess)
   143  
   144  	if _, err := a.firehose.DescribeDeliveryStream(&firehose.DescribeDeliveryStreamInput{
   145  		DeliveryStreamName: a.streamName,
   146  	}); err != nil {
   147  		return err
   148  	}
   149  
   150  	a.log.Infof("Sending messages to Kinesis Firehose delivery stream: %v\n", a.conf.Stream)
   151  	return nil
   152  }
   153  
   154  // Write attempts to write message contents to a target Kinesis Firehose delivery
   155  // stream in batches of 500. If throttling is detected, failed messages are retried
   156  // according to the configurable backoff settings.
   157  func (a *KinesisFirehose) Write(msg types.Message) error {
   158  	return a.WriteWithContext(context.Background(), msg)
   159  }
   160  
   161  // WriteWithContext attempts to write message contents to a target Kinesis
   162  // Firehose delivery stream in batches of 500. If throttling is detected, failed
   163  // messages are retried according to the configurable backoff settings.
   164  func (a *KinesisFirehose) WriteWithContext(ctx context.Context, msg types.Message) error {
   165  	if a.session == nil {
   166  		return types.ErrNotConnected
   167  	}
   168  
   169  	backOff := a.backoffCtor()
   170  
   171  	records, err := a.toRecords(msg)
   172  	if err != nil {
   173  		return err
   174  	}
   175  
   176  	input := &firehose.PutRecordBatchInput{
   177  		Records:            records,
   178  		DeliveryStreamName: a.streamName,
   179  	}
   180  
   181  	// trim input record length to max kinesis firehose batch size
   182  	if len(records) > kinesisMaxRecordsCount {
   183  		input.Records, records = records[:kinesisMaxRecordsCount], records[kinesisMaxRecordsCount:]
   184  	} else {
   185  		records = nil
   186  	}
   187  
   188  	var failed []*firehose.Record
   189  	for len(input.Records) > 0 {
   190  		wait := backOff.NextBackOff()
   191  
   192  		// batch write to kinesis firehose
   193  		output, err := a.firehose.PutRecordBatch(input)
   194  		if err != nil {
   195  			a.log.Warnf("kinesis firehose error: %v\n", err)
   196  			// bail if a message is too large or all retry attempts expired
   197  			if wait == backoff.Stop {
   198  				return err
   199  			}
   200  			continue
   201  		}
   202  
   203  		// requeue any individual records that failed due to throttling
   204  		failed = nil
   205  		if output.FailedPutCount != nil {
   206  			for i, entry := range output.RequestResponses {
   207  				if entry.ErrorCode != nil {
   208  					failed = append(failed, input.Records[i])
   209  					if *entry.ErrorCode != firehose.ErrCodeServiceUnavailableException {
   210  						err = fmt.Errorf("record failed with code [%s] %s: %+v", *entry.ErrorCode, *entry.ErrorMessage, input.Records[i])
   211  						a.log.Errorf("kinesis firehose record error: %v\n", err)
   212  						return err
   213  					}
   214  				}
   215  			}
   216  		}
   217  		input.Records = failed
   218  
   219  		// if throttling errors detected, pause briefly
   220  		l := len(failed)
   221  		if l > 0 {
   222  			a.mThrottled.Incr(1)
   223  			a.mPartsThrottled.Incr(int64(l))
   224  			a.log.Warnf("scheduling retry of throttled records (%d)\n", l)
   225  			if wait == backoff.Stop {
   226  				return types.ErrTimeout
   227  			}
   228  			time.Sleep(wait)
   229  		}
   230  
   231  		// add remaining records to batch
   232  		if n := len(records); n > 0 && l < kinesisMaxRecordsCount {
   233  			if remaining := kinesisMaxRecordsCount - l; remaining < n {
   234  				input.Records, records = append(input.Records, records[:remaining]...), records[remaining:]
   235  			} else {
   236  				input.Records, records = append(input.Records, records...), nil
   237  			}
   238  		}
   239  	}
   240  	return err
   241  }
   242  
   243  // CloseAsync begins cleaning up resources used by this reader asynchronously.
   244  func (a *KinesisFirehose) CloseAsync() {
   245  }
   246  
   247  // WaitForClose will block until either the reader is closed or a specified
   248  // timeout occurs.
   249  func (a *KinesisFirehose) WaitForClose(time.Duration) error {
   250  	return nil
   251  }
   252  
   253  //------------------------------------------------------------------------------