github.com/Jeffail/benthos/v3@v3.65.0/lib/output/writer/dynamodb.go (about) 1 package writer 2 3 import ( 4 "context" 5 "encoding/json" 6 "errors" 7 "fmt" 8 "strconv" 9 "sync" 10 "time" 11 12 batchInternal "github.com/Jeffail/benthos/v3/internal/batch" 13 "github.com/Jeffail/benthos/v3/internal/bloblang/field" 14 "github.com/Jeffail/benthos/v3/internal/interop" 15 "github.com/Jeffail/benthos/v3/lib/log" 16 "github.com/Jeffail/benthos/v3/lib/message/batch" 17 "github.com/Jeffail/benthos/v3/lib/metrics" 18 "github.com/Jeffail/benthos/v3/lib/types" 19 "github.com/Jeffail/benthos/v3/lib/util/aws/session" 20 "github.com/Jeffail/benthos/v3/lib/util/retries" 21 "github.com/Jeffail/gabs/v2" 22 "github.com/aws/aws-sdk-go/aws" 23 "github.com/aws/aws-sdk-go/service/dynamodb" 24 "github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface" 25 "github.com/cenkalti/backoff/v4" 26 "github.com/google/go-cmp/cmp" 27 ) 28 29 //------------------------------------------------------------------------------ 30 31 // DynamoDBConfig contains config fields for the DynamoDB output type. 32 type DynamoDBConfig struct { 33 sessionConfig `json:",inline" yaml:",inline"` 34 Table string `json:"table" yaml:"table"` 35 StringColumns map[string]string `json:"string_columns" yaml:"string_columns"` 36 JSONMapColumns map[string]string `json:"json_map_columns" yaml:"json_map_columns"` 37 TTL string `json:"ttl" yaml:"ttl"` 38 TTLKey string `json:"ttl_key" yaml:"ttl_key"` 39 MaxInFlight int `json:"max_in_flight" yaml:"max_in_flight"` 40 retries.Config `json:",inline" yaml:",inline"` 41 Batching batch.PolicyConfig `json:"batching" yaml:"batching"` 42 } 43 44 // NewDynamoDBConfig creates a DynamoDBConfig populated with default values. 45 func NewDynamoDBConfig() DynamoDBConfig { 46 rConf := retries.NewConfig() 47 rConf.MaxRetries = 3 48 rConf.Backoff.InitialInterval = "1s" 49 rConf.Backoff.MaxInterval = "5s" 50 rConf.Backoff.MaxElapsedTime = "30s" 51 return DynamoDBConfig{ 52 sessionConfig: sessionConfig{ 53 Config: session.NewConfig(), 54 }, 55 Table: "", 56 StringColumns: map[string]string{}, 57 JSONMapColumns: map[string]string{}, 58 TTL: "", 59 TTLKey: "", 60 MaxInFlight: 1, 61 Config: rConf, 62 Batching: batch.NewPolicyConfig(), 63 } 64 } 65 66 //------------------------------------------------------------------------------ 67 68 // DynamoDB is a benthos writer.Type implementation that writes messages to an 69 // Amazon SQS queue. 70 type DynamoDB struct { 71 client dynamodbiface.DynamoDBAPI 72 conf DynamoDBConfig 73 log log.Modular 74 stats metrics.Type 75 76 backoffCtor func() backoff.BackOff 77 boffPool sync.Pool 78 79 table *string 80 ttl time.Duration 81 strColumns map[string]*field.Expression 82 jsonMapColumns map[string]string 83 } 84 85 // NewDynamoDB creates a new Amazon SQS writer.Type. 86 // 87 // Deprecated: use the V2 API instead. 88 func NewDynamoDB( 89 conf DynamoDBConfig, 90 log log.Modular, 91 stats metrics.Type, 92 ) (*DynamoDB, error) { 93 return NewDynamoDBV2(conf, types.NoopMgr(), log, stats) 94 } 95 96 // NewDynamoDBV2 creates a new Amazon SQS writer.Type. 97 func NewDynamoDBV2( 98 conf DynamoDBConfig, 99 mgr types.Manager, 100 log log.Modular, 101 stats metrics.Type, 102 ) (*DynamoDB, error) { 103 db := &DynamoDB{ 104 conf: conf, 105 log: log, 106 stats: stats, 107 table: aws.String(conf.Table), 108 strColumns: map[string]*field.Expression{}, 109 jsonMapColumns: map[string]string{}, 110 } 111 if len(conf.StringColumns) == 0 && len(conf.JSONMapColumns) == 0 { 112 return nil, errors.New("you must provide at least one column") 113 } 114 var err error 115 for k, v := range conf.StringColumns { 116 if db.strColumns[k], err = interop.NewBloblangField(mgr, v); err != nil { 117 return nil, fmt.Errorf("failed to parse column '%v' expression: %v", k, err) 118 } 119 } 120 for k, v := range conf.JSONMapColumns { 121 if v == "." { 122 v = "" 123 } 124 db.jsonMapColumns[k] = v 125 } 126 if conf.TTL != "" { 127 ttl, err := time.ParseDuration(conf.TTL) 128 if err != nil { 129 return nil, fmt.Errorf("failed to parse TTL: %v", err) 130 } 131 db.ttl = ttl 132 } 133 if db.backoffCtor, err = conf.Config.GetCtor(); err != nil { 134 return nil, err 135 } 136 db.boffPool = sync.Pool{ 137 New: func() interface{} { 138 return db.backoffCtor() 139 }, 140 } 141 return db, nil 142 } 143 144 // Connect attempts to establish a connection to the target SQS queue. 145 func (d *DynamoDB) Connect() error { 146 return d.ConnectWithContext(context.Background()) 147 } 148 149 // ConnectWithContext attempts to establish a connection to the target DynamoDB 150 // table. 151 func (d *DynamoDB) ConnectWithContext(ctx context.Context) error { 152 if d.client != nil { 153 return nil 154 } 155 156 sess, err := d.conf.GetSession() 157 if err != nil { 158 return err 159 } 160 161 client := dynamodb.New(sess) 162 out, err := client.DescribeTable(&dynamodb.DescribeTableInput{ 163 TableName: d.table, 164 }) 165 if err != nil { 166 return err 167 } else if out == nil || out.Table == nil || out.Table.TableStatus == nil || *out.Table.TableStatus != dynamodb.TableStatusActive { 168 return fmt.Errorf("dynamodb table '%s' must be active", d.conf.Table) 169 } 170 171 d.client = client 172 d.log.Infof("Sending messages to DynamoDB table: %v\n", d.conf.Table) 173 return nil 174 } 175 176 func walkJSON(root interface{}) *dynamodb.AttributeValue { 177 switch v := root.(type) { 178 case map[string]interface{}: 179 m := make(map[string]*dynamodb.AttributeValue, len(v)) 180 for k, v2 := range v { 181 m[k] = walkJSON(v2) 182 } 183 return &dynamodb.AttributeValue{ 184 M: m, 185 } 186 case []interface{}: 187 l := make([]*dynamodb.AttributeValue, len(v)) 188 for i, v2 := range v { 189 l[i] = walkJSON(v2) 190 } 191 return &dynamodb.AttributeValue{ 192 L: l, 193 } 194 case string: 195 return &dynamodb.AttributeValue{ 196 S: aws.String(v), 197 } 198 case json.Number: 199 return &dynamodb.AttributeValue{ 200 N: aws.String(v.String()), 201 } 202 case float64: 203 return &dynamodb.AttributeValue{ 204 N: aws.String(strconv.FormatFloat(v, 'f', -1, 64)), 205 } 206 case int: 207 return &dynamodb.AttributeValue{ 208 N: aws.String(strconv.Itoa(v)), 209 } 210 case int64: 211 return &dynamodb.AttributeValue{ 212 N: aws.String(strconv.Itoa(int(v))), 213 } 214 case bool: 215 return &dynamodb.AttributeValue{ 216 BOOL: aws.Bool(v), 217 } 218 case nil: 219 return &dynamodb.AttributeValue{ 220 NULL: aws.Bool(true), 221 } 222 } 223 return &dynamodb.AttributeValue{ 224 S: aws.String(fmt.Sprintf("%v", root)), 225 } 226 } 227 228 func jsonToMap(path string, root interface{}) (*dynamodb.AttributeValue, error) { 229 gObj := gabs.Wrap(root) 230 if len(path) > 0 { 231 gObj = gObj.Path(path) 232 } 233 return walkJSON(gObj.Data()), nil 234 } 235 236 // Write attempts to write message contents to a target DynamoDB table. 237 func (d *DynamoDB) Write(msg types.Message) error { 238 return d.WriteWithContext(context.Background(), msg) 239 } 240 241 // WriteWithContext attempts to write message contents to a target DynamoDB 242 // table. 243 func (d *DynamoDB) WriteWithContext(ctx context.Context, msg types.Message) error { 244 if d.client == nil { 245 return types.ErrNotConnected 246 } 247 248 boff := d.boffPool.Get().(backoff.BackOff) 249 defer func() { 250 boff.Reset() 251 d.boffPool.Put(boff) 252 }() 253 254 writeReqs := []*dynamodb.WriteRequest{} 255 msg.Iter(func(i int, p types.Part) error { 256 items := map[string]*dynamodb.AttributeValue{} 257 if d.ttl != 0 && d.conf.TTLKey != "" { 258 items[d.conf.TTLKey] = &dynamodb.AttributeValue{ 259 N: aws.String(strconv.FormatInt(time.Now().Add(d.ttl).Unix(), 10)), 260 } 261 } 262 for k, v := range d.strColumns { 263 s := v.String(i, msg) 264 items[k] = &dynamodb.AttributeValue{ 265 S: &s, 266 } 267 } 268 if len(d.jsonMapColumns) > 0 { 269 jRoot, err := p.JSON() 270 if err != nil { 271 d.log.Errorf("Failed to extract JSON maps from document: %v", err) 272 } else { 273 for k, v := range d.jsonMapColumns { 274 if attr, err := jsonToMap(v, jRoot); err == nil { 275 if k == "" { 276 for ak, av := range attr.M { 277 items[ak] = av 278 } 279 } else { 280 items[k] = attr 281 } 282 } else { 283 d.log.Warnf("Unable to extract JSON map path '%v' from document: %v", v, err) 284 } 285 } 286 } 287 } 288 writeReqs = append(writeReqs, &dynamodb.WriteRequest{ 289 PutRequest: &dynamodb.PutRequest{ 290 Item: items, 291 }, 292 }) 293 return nil 294 }) 295 296 batchResult, err := d.client.BatchWriteItem(&dynamodb.BatchWriteItemInput{ 297 RequestItems: map[string][]*dynamodb.WriteRequest{ 298 *d.table: writeReqs, 299 }, 300 }) 301 if err != nil { 302 // None of the messages were successful, attempt to send individually 303 individualRequestsLoop: 304 for err != nil { 305 batchErr := batchInternal.NewError(msg, err) 306 for i, req := range writeReqs { 307 if req == nil { 308 continue 309 } 310 if _, iErr := d.client.PutItem(&dynamodb.PutItemInput{ 311 TableName: d.table, 312 Item: req.PutRequest.Item, 313 }); iErr != nil { 314 d.log.Errorf("Put error: %v\n", iErr) 315 wait := boff.NextBackOff() 316 if wait == backoff.Stop { 317 break individualRequestsLoop 318 } 319 select { 320 case <-time.After(wait): 321 case <-ctx.Done(): 322 break individualRequestsLoop 323 } 324 batchErr.Failed(i, iErr) 325 } else { 326 writeReqs[i] = nil 327 } 328 } 329 if batchErr.IndexedErrors() == 0 { 330 err = nil 331 } else { 332 err = batchErr 333 } 334 } 335 return err 336 } 337 338 unproc := batchResult.UnprocessedItems[*d.table] 339 unprocessedLoop: 340 for len(unproc) > 0 { 341 wait := boff.NextBackOff() 342 if wait == backoff.Stop { 343 break unprocessedLoop 344 } 345 346 select { 347 case <-time.After(wait): 348 case <-ctx.Done(): 349 break unprocessedLoop 350 } 351 if batchResult, err = d.client.BatchWriteItem(&dynamodb.BatchWriteItemInput{ 352 RequestItems: map[string][]*dynamodb.WriteRequest{ 353 *d.table: unproc, 354 }, 355 }); err != nil { 356 d.log.Errorf("Write multi error: %v\n", err) 357 } else if unproc = batchResult.UnprocessedItems[*d.table]; len(unproc) > 0 { 358 err = fmt.Errorf("failed to set %v items", len(unproc)) 359 } else { 360 unproc = nil 361 } 362 } 363 364 if len(unproc) > 0 { 365 if err == nil { 366 err = errors.New("ran out of request retries") 367 } 368 369 // Sad, we have unprocessed messages, we need to map the requests back 370 // to the origin message index. The DynamoDB API doesn't make this easy. 371 batchErr := batchInternal.NewError(msg, err) 372 373 requestsLoop: 374 for _, req := range unproc { 375 for i, src := range writeReqs { 376 if cmp.Equal(req, src) { 377 batchErr.Failed(i, errors.New("failed to set item")) 378 continue requestsLoop 379 } 380 } 381 // If we're unable to map a single request to the origin message 382 // then we return a general error. 383 return err 384 } 385 386 err = batchErr 387 } 388 389 return err 390 } 391 392 // CloseAsync begins cleaning up resources used by this writer asynchronously. 393 func (d *DynamoDB) CloseAsync() { 394 } 395 396 // WaitForClose will block until either the writer is closed or a specified 397 // timeout occurs. 398 func (d *DynamoDB) WaitForClose(time.Duration) error { 399 return nil 400 } 401 402 //------------------------------------------------------------------------------