github.com/Jeffail/benthos/v3@v3.65.0/lib/output/writer/dynamodb.go (about)

     1  package writer
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"errors"
     7  	"fmt"
     8  	"strconv"
     9  	"sync"
    10  	"time"
    11  
    12  	batchInternal "github.com/Jeffail/benthos/v3/internal/batch"
    13  	"github.com/Jeffail/benthos/v3/internal/bloblang/field"
    14  	"github.com/Jeffail/benthos/v3/internal/interop"
    15  	"github.com/Jeffail/benthos/v3/lib/log"
    16  	"github.com/Jeffail/benthos/v3/lib/message/batch"
    17  	"github.com/Jeffail/benthos/v3/lib/metrics"
    18  	"github.com/Jeffail/benthos/v3/lib/types"
    19  	"github.com/Jeffail/benthos/v3/lib/util/aws/session"
    20  	"github.com/Jeffail/benthos/v3/lib/util/retries"
    21  	"github.com/Jeffail/gabs/v2"
    22  	"github.com/aws/aws-sdk-go/aws"
    23  	"github.com/aws/aws-sdk-go/service/dynamodb"
    24  	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
    25  	"github.com/cenkalti/backoff/v4"
    26  	"github.com/google/go-cmp/cmp"
    27  )
    28  
    29  //------------------------------------------------------------------------------
    30  
    31  // DynamoDBConfig contains config fields for the DynamoDB output type.
    32  type DynamoDBConfig struct {
    33  	sessionConfig  `json:",inline" yaml:",inline"`
    34  	Table          string            `json:"table" yaml:"table"`
    35  	StringColumns  map[string]string `json:"string_columns" yaml:"string_columns"`
    36  	JSONMapColumns map[string]string `json:"json_map_columns" yaml:"json_map_columns"`
    37  	TTL            string            `json:"ttl" yaml:"ttl"`
    38  	TTLKey         string            `json:"ttl_key" yaml:"ttl_key"`
    39  	MaxInFlight    int               `json:"max_in_flight" yaml:"max_in_flight"`
    40  	retries.Config `json:",inline" yaml:",inline"`
    41  	Batching       batch.PolicyConfig `json:"batching" yaml:"batching"`
    42  }
    43  
    44  // NewDynamoDBConfig creates a DynamoDBConfig populated with default values.
    45  func NewDynamoDBConfig() DynamoDBConfig {
    46  	rConf := retries.NewConfig()
    47  	rConf.MaxRetries = 3
    48  	rConf.Backoff.InitialInterval = "1s"
    49  	rConf.Backoff.MaxInterval = "5s"
    50  	rConf.Backoff.MaxElapsedTime = "30s"
    51  	return DynamoDBConfig{
    52  		sessionConfig: sessionConfig{
    53  			Config: session.NewConfig(),
    54  		},
    55  		Table:          "",
    56  		StringColumns:  map[string]string{},
    57  		JSONMapColumns: map[string]string{},
    58  		TTL:            "",
    59  		TTLKey:         "",
    60  		MaxInFlight:    1,
    61  		Config:         rConf,
    62  		Batching:       batch.NewPolicyConfig(),
    63  	}
    64  }
    65  
    66  //------------------------------------------------------------------------------
    67  
    68  // DynamoDB is a benthos writer.Type implementation that writes messages to an
    69  // Amazon SQS queue.
    70  type DynamoDB struct {
    71  	client dynamodbiface.DynamoDBAPI
    72  	conf   DynamoDBConfig
    73  	log    log.Modular
    74  	stats  metrics.Type
    75  
    76  	backoffCtor func() backoff.BackOff
    77  	boffPool    sync.Pool
    78  
    79  	table          *string
    80  	ttl            time.Duration
    81  	strColumns     map[string]*field.Expression
    82  	jsonMapColumns map[string]string
    83  }
    84  
    85  // NewDynamoDB creates a new Amazon SQS writer.Type.
    86  //
    87  // Deprecated: use the V2 API instead.
    88  func NewDynamoDB(
    89  	conf DynamoDBConfig,
    90  	log log.Modular,
    91  	stats metrics.Type,
    92  ) (*DynamoDB, error) {
    93  	return NewDynamoDBV2(conf, types.NoopMgr(), log, stats)
    94  }
    95  
    96  // NewDynamoDBV2 creates a new Amazon SQS writer.Type.
    97  func NewDynamoDBV2(
    98  	conf DynamoDBConfig,
    99  	mgr types.Manager,
   100  	log log.Modular,
   101  	stats metrics.Type,
   102  ) (*DynamoDB, error) {
   103  	db := &DynamoDB{
   104  		conf:           conf,
   105  		log:            log,
   106  		stats:          stats,
   107  		table:          aws.String(conf.Table),
   108  		strColumns:     map[string]*field.Expression{},
   109  		jsonMapColumns: map[string]string{},
   110  	}
   111  	if len(conf.StringColumns) == 0 && len(conf.JSONMapColumns) == 0 {
   112  		return nil, errors.New("you must provide at least one column")
   113  	}
   114  	var err error
   115  	for k, v := range conf.StringColumns {
   116  		if db.strColumns[k], err = interop.NewBloblangField(mgr, v); err != nil {
   117  			return nil, fmt.Errorf("failed to parse column '%v' expression: %v", k, err)
   118  		}
   119  	}
   120  	for k, v := range conf.JSONMapColumns {
   121  		if v == "." {
   122  			v = ""
   123  		}
   124  		db.jsonMapColumns[k] = v
   125  	}
   126  	if conf.TTL != "" {
   127  		ttl, err := time.ParseDuration(conf.TTL)
   128  		if err != nil {
   129  			return nil, fmt.Errorf("failed to parse TTL: %v", err)
   130  		}
   131  		db.ttl = ttl
   132  	}
   133  	if db.backoffCtor, err = conf.Config.GetCtor(); err != nil {
   134  		return nil, err
   135  	}
   136  	db.boffPool = sync.Pool{
   137  		New: func() interface{} {
   138  			return db.backoffCtor()
   139  		},
   140  	}
   141  	return db, nil
   142  }
   143  
   144  // Connect attempts to establish a connection to the target SQS queue.
   145  func (d *DynamoDB) Connect() error {
   146  	return d.ConnectWithContext(context.Background())
   147  }
   148  
   149  // ConnectWithContext attempts to establish a connection to the target DynamoDB
   150  // table.
   151  func (d *DynamoDB) ConnectWithContext(ctx context.Context) error {
   152  	if d.client != nil {
   153  		return nil
   154  	}
   155  
   156  	sess, err := d.conf.GetSession()
   157  	if err != nil {
   158  		return err
   159  	}
   160  
   161  	client := dynamodb.New(sess)
   162  	out, err := client.DescribeTable(&dynamodb.DescribeTableInput{
   163  		TableName: d.table,
   164  	})
   165  	if err != nil {
   166  		return err
   167  	} else if out == nil || out.Table == nil || out.Table.TableStatus == nil || *out.Table.TableStatus != dynamodb.TableStatusActive {
   168  		return fmt.Errorf("dynamodb table '%s' must be active", d.conf.Table)
   169  	}
   170  
   171  	d.client = client
   172  	d.log.Infof("Sending messages to DynamoDB table: %v\n", d.conf.Table)
   173  	return nil
   174  }
   175  
   176  func walkJSON(root interface{}) *dynamodb.AttributeValue {
   177  	switch v := root.(type) {
   178  	case map[string]interface{}:
   179  		m := make(map[string]*dynamodb.AttributeValue, len(v))
   180  		for k, v2 := range v {
   181  			m[k] = walkJSON(v2)
   182  		}
   183  		return &dynamodb.AttributeValue{
   184  			M: m,
   185  		}
   186  	case []interface{}:
   187  		l := make([]*dynamodb.AttributeValue, len(v))
   188  		for i, v2 := range v {
   189  			l[i] = walkJSON(v2)
   190  		}
   191  		return &dynamodb.AttributeValue{
   192  			L: l,
   193  		}
   194  	case string:
   195  		return &dynamodb.AttributeValue{
   196  			S: aws.String(v),
   197  		}
   198  	case json.Number:
   199  		return &dynamodb.AttributeValue{
   200  			N: aws.String(v.String()),
   201  		}
   202  	case float64:
   203  		return &dynamodb.AttributeValue{
   204  			N: aws.String(strconv.FormatFloat(v, 'f', -1, 64)),
   205  		}
   206  	case int:
   207  		return &dynamodb.AttributeValue{
   208  			N: aws.String(strconv.Itoa(v)),
   209  		}
   210  	case int64:
   211  		return &dynamodb.AttributeValue{
   212  			N: aws.String(strconv.Itoa(int(v))),
   213  		}
   214  	case bool:
   215  		return &dynamodb.AttributeValue{
   216  			BOOL: aws.Bool(v),
   217  		}
   218  	case nil:
   219  		return &dynamodb.AttributeValue{
   220  			NULL: aws.Bool(true),
   221  		}
   222  	}
   223  	return &dynamodb.AttributeValue{
   224  		S: aws.String(fmt.Sprintf("%v", root)),
   225  	}
   226  }
   227  
   228  func jsonToMap(path string, root interface{}) (*dynamodb.AttributeValue, error) {
   229  	gObj := gabs.Wrap(root)
   230  	if len(path) > 0 {
   231  		gObj = gObj.Path(path)
   232  	}
   233  	return walkJSON(gObj.Data()), nil
   234  }
   235  
   236  // Write attempts to write message contents to a target DynamoDB table.
   237  func (d *DynamoDB) Write(msg types.Message) error {
   238  	return d.WriteWithContext(context.Background(), msg)
   239  }
   240  
   241  // WriteWithContext attempts to write message contents to a target DynamoDB
   242  // table.
   243  func (d *DynamoDB) WriteWithContext(ctx context.Context, msg types.Message) error {
   244  	if d.client == nil {
   245  		return types.ErrNotConnected
   246  	}
   247  
   248  	boff := d.boffPool.Get().(backoff.BackOff)
   249  	defer func() {
   250  		boff.Reset()
   251  		d.boffPool.Put(boff)
   252  	}()
   253  
   254  	writeReqs := []*dynamodb.WriteRequest{}
   255  	msg.Iter(func(i int, p types.Part) error {
   256  		items := map[string]*dynamodb.AttributeValue{}
   257  		if d.ttl != 0 && d.conf.TTLKey != "" {
   258  			items[d.conf.TTLKey] = &dynamodb.AttributeValue{
   259  				N: aws.String(strconv.FormatInt(time.Now().Add(d.ttl).Unix(), 10)),
   260  			}
   261  		}
   262  		for k, v := range d.strColumns {
   263  			s := v.String(i, msg)
   264  			items[k] = &dynamodb.AttributeValue{
   265  				S: &s,
   266  			}
   267  		}
   268  		if len(d.jsonMapColumns) > 0 {
   269  			jRoot, err := p.JSON()
   270  			if err != nil {
   271  				d.log.Errorf("Failed to extract JSON maps from document: %v", err)
   272  			} else {
   273  				for k, v := range d.jsonMapColumns {
   274  					if attr, err := jsonToMap(v, jRoot); err == nil {
   275  						if k == "" {
   276  							for ak, av := range attr.M {
   277  								items[ak] = av
   278  							}
   279  						} else {
   280  							items[k] = attr
   281  						}
   282  					} else {
   283  						d.log.Warnf("Unable to extract JSON map path '%v' from document: %v", v, err)
   284  					}
   285  				}
   286  			}
   287  		}
   288  		writeReqs = append(writeReqs, &dynamodb.WriteRequest{
   289  			PutRequest: &dynamodb.PutRequest{
   290  				Item: items,
   291  			},
   292  		})
   293  		return nil
   294  	})
   295  
   296  	batchResult, err := d.client.BatchWriteItem(&dynamodb.BatchWriteItemInput{
   297  		RequestItems: map[string][]*dynamodb.WriteRequest{
   298  			*d.table: writeReqs,
   299  		},
   300  	})
   301  	if err != nil {
   302  		// None of the messages were successful, attempt to send individually
   303  	individualRequestsLoop:
   304  		for err != nil {
   305  			batchErr := batchInternal.NewError(msg, err)
   306  			for i, req := range writeReqs {
   307  				if req == nil {
   308  					continue
   309  				}
   310  				if _, iErr := d.client.PutItem(&dynamodb.PutItemInput{
   311  					TableName: d.table,
   312  					Item:      req.PutRequest.Item,
   313  				}); iErr != nil {
   314  					d.log.Errorf("Put error: %v\n", iErr)
   315  					wait := boff.NextBackOff()
   316  					if wait == backoff.Stop {
   317  						break individualRequestsLoop
   318  					}
   319  					select {
   320  					case <-time.After(wait):
   321  					case <-ctx.Done():
   322  						break individualRequestsLoop
   323  					}
   324  					batchErr.Failed(i, iErr)
   325  				} else {
   326  					writeReqs[i] = nil
   327  				}
   328  			}
   329  			if batchErr.IndexedErrors() == 0 {
   330  				err = nil
   331  			} else {
   332  				err = batchErr
   333  			}
   334  		}
   335  		return err
   336  	}
   337  
   338  	unproc := batchResult.UnprocessedItems[*d.table]
   339  unprocessedLoop:
   340  	for len(unproc) > 0 {
   341  		wait := boff.NextBackOff()
   342  		if wait == backoff.Stop {
   343  			break unprocessedLoop
   344  		}
   345  
   346  		select {
   347  		case <-time.After(wait):
   348  		case <-ctx.Done():
   349  			break unprocessedLoop
   350  		}
   351  		if batchResult, err = d.client.BatchWriteItem(&dynamodb.BatchWriteItemInput{
   352  			RequestItems: map[string][]*dynamodb.WriteRequest{
   353  				*d.table: unproc,
   354  			},
   355  		}); err != nil {
   356  			d.log.Errorf("Write multi error: %v\n", err)
   357  		} else if unproc = batchResult.UnprocessedItems[*d.table]; len(unproc) > 0 {
   358  			err = fmt.Errorf("failed to set %v items", len(unproc))
   359  		} else {
   360  			unproc = nil
   361  		}
   362  	}
   363  
   364  	if len(unproc) > 0 {
   365  		if err == nil {
   366  			err = errors.New("ran out of request retries")
   367  		}
   368  
   369  		// Sad, we have unprocessed messages, we need to map the requests back
   370  		// to the origin message index. The DynamoDB API doesn't make this easy.
   371  		batchErr := batchInternal.NewError(msg, err)
   372  
   373  	requestsLoop:
   374  		for _, req := range unproc {
   375  			for i, src := range writeReqs {
   376  				if cmp.Equal(req, src) {
   377  					batchErr.Failed(i, errors.New("failed to set item"))
   378  					continue requestsLoop
   379  				}
   380  			}
   381  			// If we're unable to map a single request to the origin message
   382  			// then we return a general error.
   383  			return err
   384  		}
   385  
   386  		err = batchErr
   387  	}
   388  
   389  	return err
   390  }
   391  
   392  // CloseAsync begins cleaning up resources used by this writer asynchronously.
   393  func (d *DynamoDB) CloseAsync() {
   394  }
   395  
   396  // WaitForClose will block until either the writer is closed or a specified
   397  // timeout occurs.
   398  func (d *DynamoDB) WaitForClose(time.Duration) error {
   399  	return nil
   400  }
   401  
   402  //------------------------------------------------------------------------------