github.com/Jeffail/benthos/v3@v3.65.0/lib/input/aws_s3.go (about) 1 package input 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "io" 8 "net/url" 9 "strconv" 10 "strings" 11 "sync" 12 "sync/atomic" 13 "time" 14 15 "github.com/Jeffail/benthos/v3/internal/codec" 16 "github.com/Jeffail/benthos/v3/internal/docs" 17 "github.com/Jeffail/benthos/v3/lib/input/reader" 18 "github.com/Jeffail/benthos/v3/lib/log" 19 "github.com/Jeffail/benthos/v3/lib/message" 20 "github.com/Jeffail/benthos/v3/lib/metrics" 21 "github.com/Jeffail/benthos/v3/lib/types" 22 sess "github.com/Jeffail/benthos/v3/lib/util/aws/session" 23 "github.com/Jeffail/gabs/v2" 24 "github.com/aws/aws-sdk-go/aws" 25 "github.com/aws/aws-sdk-go/aws/session" 26 "github.com/aws/aws-sdk-go/service/s3" 27 "github.com/aws/aws-sdk-go/service/sqs" 28 ) 29 30 func init() { 31 Constructors[TypeAWSS3] = TypeSpec{ 32 constructor: fromSimpleConstructor(func(conf Config, mgr types.Manager, log log.Modular, stats metrics.Type) (Type, error) { 33 var r reader.Async 34 var err error 35 if r, err = newAmazonS3(conf.AWSS3, log, stats); err != nil { 36 return nil, err 37 } 38 // If we're not pulling events directly from an SQS queue then 39 // there's no concept of propagating nacks upstream, therefore wrap 40 // our reader within a preserver in order to retry indefinitely. 41 if conf.AWSS3.SQS.URL == "" { 42 r = reader.NewAsyncPreserver(r) 43 } 44 return NewAsyncReader(TypeAWSS3, false, r, log, stats) 45 }), 46 Status: docs.StatusStable, 47 Summary: ` 48 Downloads objects within an Amazon S3 bucket, optionally filtered by a prefix, either by walking the items in the bucket or by streaming upload notifications in realtime.`, 49 Description: ` 50 ## Streaming Objects on Upload with SQS 51 52 A common pattern for consuming S3 objects is to emit upload notification events from the bucket either directly to an SQS queue, or to an SNS topic that is consumed by an SQS queue, and then have your consumer listen for events which prompt it to download the newly uploaded objects. More information about this pattern and how to set it up can be found at: https://docs.aws.amazon.com/AmazonS3/latest/dev/ways-to-add-notification-config-to-bucket.html. 53 54 Benthos is able to follow this pattern when you configure an ` + "`sqs.url`" + `, where it consumes events from SQS and only downloads object keys received within those events. In order for this to work Benthos needs to know where within the event the key and bucket names can be found, specified as [dot paths](/docs/configuration/field_paths) with the fields ` + "`sqs.key_path` and `sqs.bucket_path`" + `. The default values for these fields should already be correct when following the guide above. 55 56 If your notification events are being routed to SQS via an SNS topic then the events will be enveloped by SNS, in which case you also need to specify the field ` + "`sqs.envelope_path`" + `, which in the case of SNS to SQS will usually be ` + "`Message`" + `. 57 58 When using SQS please make sure you have sensible values for ` + "`sqs.max_messages`" + ` and also the visibility timeout of the queue itself. When Benthos consumes an S3 object the SQS message that triggered it is not deleted until the S3 object has been sent onwards. This ensures at-least-once crash resiliency, but also means that if the S3 object takes longer to process than the visibility timeout of your queue then the same objects might be processed multiple times. 59 60 ## Downloading Large Files 61 62 When downloading large files it's often necessary to process it in streamed parts in order to avoid loading the entire file in memory at a given time. In order to do this a ` + "[`codec`](#codec)" + ` can be specified that determines how to break the input into smaller individual messages. 63 64 ## Credentials 65 66 By default Benthos will use a shared credentials file when connecting to AWS services. It's also possible to set them explicitly at the component level, allowing you to transfer data across accounts. You can find out more [in this document](/docs/guides/cloud/aws). 67 68 ## Metadata 69 70 This input adds the following metadata fields to each message: 71 72 ` + "```" + ` 73 - s3_key 74 - s3_bucket 75 - s3_last_modified_unix 76 - s3_last_modified (RFC3339) 77 - s3_content_type 78 - s3_content_encoding 79 - All user defined metadata 80 ` + "```" + ` 81 82 You can access these metadata fields using [function interpolation](/docs/configuration/interpolation#metadata). Note that user defined metadata is case insensitive within AWS, and it is likely that the keys will be received in a capitalized form, if you wish to make them consistent you can map all metadata keys to lower or uppercase using a Bloblang mapping such as ` + "`meta = meta().map_each_key(key -> key.lowercase())`" + `.`, 83 84 FieldSpecs: append( 85 append(docs.FieldSpecs{ 86 docs.FieldCommon("bucket", "The bucket to consume from. If the field `sqs.url` is specified this field is optional."), 87 docs.FieldCommon("prefix", "An optional path prefix, if set only objects with the prefix are consumed when walking a bucket."), 88 }, sess.FieldSpecs()...), 89 docs.FieldAdvanced("force_path_style_urls", "Forces the client API to use path style URLs for downloading keys, which is often required when connecting to custom endpoints."), 90 docs.FieldAdvanced("delete_objects", "Whether to delete downloaded objects from the bucket once they are processed."), 91 codec.ReaderDocs, 92 docs.FieldCommon("sqs", "Consume SQS messages in order to trigger key downloads.").WithChildren( 93 docs.FieldCommon("url", "An optional SQS URL to connect to. When specified this queue will control which objects are downloaded."), 94 docs.FieldAdvanced("endpoint", "A custom endpoint to use when connecting to SQS."), 95 docs.FieldCommon("key_path", "A [dot path](/docs/configuration/field_paths) whereby object keys are found in SQS messages."), 96 docs.FieldCommon("bucket_path", "A [dot path](/docs/configuration/field_paths) whereby the bucket name can be found in SQS messages."), 97 docs.FieldCommon("envelope_path", "A [dot path](/docs/configuration/field_paths) of a field to extract an enveloped JSON payload for further extracting the key and bucket from SQS messages. This is specifically useful when subscribing an SQS queue to an SNS topic that receives bucket events.", "Message"), 98 docs.FieldAdvanced( 99 "delay_period", 100 "An optional period of time to wait from when a notification was originally sent to when the target key download is attempted.", 101 "10s", "5m", 102 ), 103 docs.FieldAdvanced("max_messages", "The maximum number of SQS messages to consume from each request."), 104 ), 105 ), 106 Categories: []Category{ 107 CategoryServices, 108 CategoryAWS, 109 }, 110 } 111 } 112 113 //------------------------------------------------------------------------------ 114 115 // AWSS3SQSConfig contains configuration for hooking up the S3 input with an SQS queue. 116 type AWSS3SQSConfig struct { 117 URL string `json:"url" yaml:"url"` 118 Endpoint string `json:"endpoint" yaml:"endpoint"` 119 EnvelopePath string `json:"envelope_path" yaml:"envelope_path"` 120 KeyPath string `json:"key_path" yaml:"key_path"` 121 BucketPath string `json:"bucket_path" yaml:"bucket_path"` 122 DelayPeriod string `json:"delay_period" yaml:"delay_period"` 123 MaxMessages int64 `json:"max_messages" yaml:"max_messages"` 124 } 125 126 // NewAWSS3SQSConfig creates a new AWSS3SQSConfig with default values. 127 func NewAWSS3SQSConfig() AWSS3SQSConfig { 128 return AWSS3SQSConfig{ 129 URL: "", 130 Endpoint: "", 131 EnvelopePath: "", 132 KeyPath: "Records.*.s3.object.key", 133 BucketPath: "Records.*.s3.bucket.name", 134 DelayPeriod: "", 135 MaxMessages: 10, 136 } 137 } 138 139 // AWSS3Config contains configuration values for the aws_s3 input type. 140 type AWSS3Config struct { 141 sess.Config `json:",inline" yaml:",inline"` 142 Bucket string `json:"bucket" yaml:"bucket"` 143 Codec string `json:"codec" yaml:"codec"` 144 Prefix string `json:"prefix" yaml:"prefix"` 145 ForcePathStyleURLs bool `json:"force_path_style_urls" yaml:"force_path_style_urls"` 146 DeleteObjects bool `json:"delete_objects" yaml:"delete_objects"` 147 SQS AWSS3SQSConfig `json:"sqs" yaml:"sqs"` 148 } 149 150 // NewAWSS3Config creates a new AWSS3Config with default values. 151 func NewAWSS3Config() AWSS3Config { 152 return AWSS3Config{ 153 Config: sess.NewConfig(), 154 Bucket: "", 155 Prefix: "", 156 Codec: "all-bytes", 157 ForcePathStyleURLs: false, 158 DeleteObjects: false, 159 SQS: NewAWSS3SQSConfig(), 160 } 161 } 162 163 //------------------------------------------------------------------------------ 164 165 type s3ObjectTarget struct { 166 key string 167 bucket string 168 notificationAt time.Time 169 170 ackFn func(context.Context, error) error 171 } 172 173 func newS3ObjectTarget(key, bucket string, notificationAt time.Time, ackFn codec.ReaderAckFn) *s3ObjectTarget { 174 if ackFn == nil { 175 ackFn = func(context.Context, error) error { 176 return nil 177 } 178 } 179 return &s3ObjectTarget{key, bucket, notificationAt, ackFn} 180 } 181 182 type s3ObjectTargetReader interface { 183 Pop(ctx context.Context) (*s3ObjectTarget, error) 184 Close(ctx context.Context) error 185 } 186 187 //------------------------------------------------------------------------------ 188 189 func deleteS3ObjectAckFn( 190 s3Client *s3.S3, 191 bucket, key string, 192 del bool, 193 prev codec.ReaderAckFn, 194 ) codec.ReaderAckFn { 195 return func(ctx context.Context, err error) error { 196 if prev != nil { 197 if aerr := prev(ctx, err); aerr != nil { 198 return aerr 199 } 200 } 201 if !del || err != nil { 202 return nil 203 } 204 _, aerr := s3Client.DeleteObjectWithContext(ctx, &s3.DeleteObjectInput{ 205 Bucket: aws.String(bucket), 206 Key: aws.String(key), 207 }) 208 return aerr 209 } 210 } 211 212 //------------------------------------------------------------------------------ 213 214 type staticTargetReader struct { 215 pending []*s3ObjectTarget 216 s3 *s3.S3 217 conf AWSS3Config 218 startAfter *string 219 } 220 221 func newStaticTargetReader( 222 ctx context.Context, 223 conf AWSS3Config, 224 log log.Modular, 225 s3Client *s3.S3, 226 ) (*staticTargetReader, error) { 227 listInput := &s3.ListObjectsV2Input{ 228 Bucket: aws.String(conf.Bucket), 229 MaxKeys: aws.Int64(100), 230 } 231 if len(conf.Prefix) > 0 { 232 listInput.Prefix = aws.String(conf.Prefix) 233 } 234 output, err := s3Client.ListObjectsV2WithContext(ctx, listInput) 235 if err != nil { 236 return nil, fmt.Errorf("failed to list objects: %v", err) 237 } 238 staticKeys := staticTargetReader{ 239 s3: s3Client, 240 conf: conf, 241 } 242 for _, obj := range output.Contents { 243 ackFn := deleteS3ObjectAckFn(s3Client, conf.Bucket, *obj.Key, conf.DeleteObjects, nil) 244 staticKeys.pending = append(staticKeys.pending, newS3ObjectTarget(*obj.Key, conf.Bucket, time.Time{}, ackFn)) 245 } 246 if len(output.Contents) > 0 { 247 staticKeys.startAfter = output.Contents[len(output.Contents)-1].Key 248 } 249 return &staticKeys, nil 250 } 251 252 func (s *staticTargetReader) Pop(ctx context.Context) (*s3ObjectTarget, error) { 253 if len(s.pending) == 0 && s.startAfter != nil { 254 s.pending = nil 255 listInput := &s3.ListObjectsV2Input{ 256 Bucket: aws.String(s.conf.Bucket), 257 MaxKeys: aws.Int64(100), 258 StartAfter: s.startAfter, 259 } 260 if len(s.conf.Prefix) > 0 { 261 listInput.Prefix = aws.String(s.conf.Prefix) 262 } 263 output, err := s.s3.ListObjectsV2WithContext(ctx, listInput) 264 if err != nil { 265 return nil, fmt.Errorf("failed to list objects: %v", err) 266 } 267 for _, obj := range output.Contents { 268 ackFn := deleteS3ObjectAckFn(s.s3, s.conf.Bucket, *obj.Key, s.conf.DeleteObjects, nil) 269 s.pending = append(s.pending, newS3ObjectTarget(*obj.Key, s.conf.Bucket, time.Time{}, ackFn)) 270 } 271 if len(output.Contents) > 0 { 272 s.startAfter = output.Contents[len(output.Contents)-1].Key 273 } 274 } 275 if len(s.pending) == 0 { 276 return nil, io.EOF 277 } 278 obj := s.pending[0] 279 s.pending = s.pending[1:] 280 return obj, nil 281 } 282 283 func (s staticTargetReader) Close(context.Context) error { 284 return nil 285 } 286 287 //------------------------------------------------------------------------------ 288 289 type sqsTargetReader struct { 290 conf AWSS3Config 291 log log.Modular 292 sqs *sqs.SQS 293 s3 *s3.S3 294 295 nextRequest time.Time 296 297 pending []*s3ObjectTarget 298 } 299 300 func newSQSTargetReader( 301 conf AWSS3Config, 302 log log.Modular, 303 s3 *s3.S3, 304 sqs *sqs.SQS, 305 ) *sqsTargetReader { 306 return &sqsTargetReader{conf, log, sqs, s3, time.Time{}, nil} 307 } 308 309 func (s *sqsTargetReader) Pop(ctx context.Context) (*s3ObjectTarget, error) { 310 if len(s.pending) > 0 { 311 t := s.pending[0] 312 s.pending = s.pending[1:] 313 return t, nil 314 } 315 316 if !s.nextRequest.IsZero() { 317 if until := time.Until(s.nextRequest); until > 0 { 318 select { 319 case <-time.After(until): 320 case <-ctx.Done(): 321 return nil, ctx.Err() 322 } 323 } 324 } 325 326 var err error 327 if s.pending, err = s.readSQSEvents(ctx); err != nil { 328 return nil, err 329 } 330 if len(s.pending) == 0 { 331 s.nextRequest = time.Now().Add(time.Millisecond * 500) 332 return nil, types.ErrTimeout 333 } 334 s.nextRequest = time.Time{} 335 t := s.pending[0] 336 s.pending = s.pending[1:] 337 return t, nil 338 } 339 340 func (s *sqsTargetReader) Close(ctx context.Context) error { 341 var err error 342 for _, p := range s.pending { 343 if aerr := p.ackFn(ctx, errors.New("service shutting down")); aerr != nil { 344 err = aerr 345 } 346 } 347 return err 348 } 349 350 func digStrsFromSlices(slice []interface{}) []string { 351 var strs []string 352 for _, v := range slice { 353 switch t := v.(type) { 354 case []interface{}: 355 strs = append(strs, digStrsFromSlices(t)...) 356 case string: 357 strs = append(strs, t) 358 } 359 } 360 return strs 361 } 362 363 func (s *sqsTargetReader) parseObjectPaths(sqsMsg *string) ([]s3ObjectTarget, error) { 364 gObj, err := gabs.ParseJSON([]byte(*sqsMsg)) 365 if err != nil { 366 return nil, fmt.Errorf("failed to parse SQS message: %v", err) 367 } 368 369 if len(s.conf.SQS.EnvelopePath) > 0 { 370 d := gObj.Path(s.conf.SQS.EnvelopePath).Data() 371 if str, ok := d.(string); ok { 372 if gObj, err = gabs.ParseJSON([]byte(str)); err != nil { 373 return nil, fmt.Errorf("failed to parse enveloped message: %v", err) 374 } 375 } else { 376 return nil, fmt.Errorf("expected string at envelope path, found %T", d) 377 } 378 } 379 380 var keys []string 381 var buckets []string 382 383 switch t := gObj.Path(s.conf.SQS.KeyPath).Data().(type) { 384 case string: 385 keys = []string{t} 386 case []interface{}: 387 keys = digStrsFromSlices(t) 388 } 389 if len(s.conf.SQS.BucketPath) > 0 { 390 switch t := gObj.Path(s.conf.SQS.BucketPath).Data().(type) { 391 case string: 392 buckets = []string{t} 393 case []interface{}: 394 buckets = digStrsFromSlices(t) 395 } 396 } 397 398 objects := make([]s3ObjectTarget, 0, len(keys)) 399 for i, key := range keys { 400 if key, err = url.QueryUnescape(key); err != nil { 401 return nil, fmt.Errorf("failed to parse key from SQS message: %v", err) 402 } 403 bucket := s.conf.Bucket 404 if len(buckets) > i { 405 bucket = buckets[i] 406 } 407 if bucket == "" { 408 return nil, errors.New("required bucket was not found in SQS message") 409 } 410 objects = append(objects, s3ObjectTarget{ 411 key: key, 412 bucket: bucket, 413 }) 414 } 415 416 return objects, nil 417 } 418 419 func (s *sqsTargetReader) readSQSEvents(ctx context.Context) ([]*s3ObjectTarget, error) { 420 var dudMessageHandles []*sqs.ChangeMessageVisibilityBatchRequestEntry 421 addDudFn := func(m *sqs.Message) { 422 dudMessageHandles = append(dudMessageHandles, &sqs.ChangeMessageVisibilityBatchRequestEntry{ 423 Id: m.MessageId, 424 ReceiptHandle: m.ReceiptHandle, 425 VisibilityTimeout: aws.Int64(0), 426 }) 427 } 428 429 output, err := s.sqs.ReceiveMessageWithContext(ctx, &sqs.ReceiveMessageInput{ 430 QueueUrl: aws.String(s.conf.SQS.URL), 431 MaxNumberOfMessages: aws.Int64(s.conf.SQS.MaxMessages), 432 AttributeNames: []*string{ 433 aws.String("SentTimestamp"), 434 }, 435 }) 436 if err != nil { 437 return nil, err 438 } 439 440 var pendingObjects []*s3ObjectTarget 441 442 for _, sqsMsg := range output.Messages { 443 sqsMsg := sqsMsg 444 445 var notificationAt time.Time 446 if rcvd, ok := sqsMsg.Attributes["SentTimestamp"]; ok && rcvd != nil { 447 if millis, _ := strconv.Atoi(*rcvd); millis > 0 { 448 notificationAt = time.Unix(0, int64(millis*1e6)) 449 } 450 } 451 452 if sqsMsg.Body == nil { 453 addDudFn(sqsMsg) 454 s.log.Errorln("Received empty SQS message") 455 continue 456 } 457 458 objects, err := s.parseObjectPaths(sqsMsg.Body) 459 if err != nil { 460 addDudFn(sqsMsg) 461 s.log.Errorf("SQS extract key error: %v\n", err) 462 continue 463 } 464 if len(objects) == 0 { 465 addDudFn(sqsMsg) 466 s.log.Debugln("Extracted zero target keys from SQS message") 467 continue 468 } 469 470 pendingAcks := int32(len(objects)) 471 var nackOnce sync.Once 472 for _, object := range objects { 473 ackOnce := sync.Once{} 474 pendingObjects = append(pendingObjects, newS3ObjectTarget( 475 object.key, object.bucket, notificationAt, 476 deleteS3ObjectAckFn( 477 s.s3, object.bucket, object.key, s.conf.DeleteObjects, 478 func(ctx context.Context, err error) (aerr error) { 479 if err != nil { 480 nackOnce.Do(func() { 481 // Prevent future acks from triggering a delete. 482 atomic.StoreInt32(&pendingAcks, -1) 483 484 s.log.Debugf("Pushing SQS notification back into the queue due to error: %v\n", err) 485 486 // It's possible that this is called for one message 487 // at the _exact_ same time as another is acked, but 488 // if the acked message triggers a full ack of the 489 // origin message then even though it shouldn't be 490 // possible, it's also harmless. 491 aerr = s.nackSQSMessage(ctx, sqsMsg) 492 }) 493 } else { 494 ackOnce.Do(func() { 495 if atomic.AddInt32(&pendingAcks, -1) == 0 { 496 aerr = s.ackSQSMessage(ctx, sqsMsg) 497 } 498 }) 499 } 500 return 501 }, 502 ), 503 )) 504 } 505 } 506 507 // Discard any SQS messages not associated with a target file. 508 for len(dudMessageHandles) > 0 { 509 input := sqs.ChangeMessageVisibilityBatchInput{ 510 QueueUrl: aws.String(s.conf.SQS.URL), 511 Entries: dudMessageHandles, 512 } 513 514 // trim input entries to max size 515 if len(dudMessageHandles) > 10 { 516 input.Entries, dudMessageHandles = dudMessageHandles[:10], dudMessageHandles[10:] 517 } else { 518 dudMessageHandles = nil 519 } 520 s.sqs.ChangeMessageVisibilityBatch(&input) 521 } 522 523 return pendingObjects, nil 524 } 525 526 func (s *sqsTargetReader) nackSQSMessage(ctx context.Context, msg *sqs.Message) error { 527 _, err := s.sqs.ChangeMessageVisibilityWithContext(ctx, &sqs.ChangeMessageVisibilityInput{ 528 QueueUrl: aws.String(s.conf.SQS.URL), 529 ReceiptHandle: msg.ReceiptHandle, 530 VisibilityTimeout: aws.Int64(0), 531 }) 532 return err 533 } 534 535 func (s *sqsTargetReader) ackSQSMessage(ctx context.Context, msg *sqs.Message) error { 536 _, err := s.sqs.DeleteMessageWithContext(ctx, &sqs.DeleteMessageInput{ 537 QueueUrl: aws.String(s.conf.SQS.URL), 538 ReceiptHandle: msg.ReceiptHandle, 539 }) 540 return err 541 } 542 543 //------------------------------------------------------------------------------ 544 545 // AmazonS3 is a benthos reader.Type implementation that reads messages from an 546 // Amazon S3 bucket. 547 type awsS3 struct { 548 conf AWSS3Config 549 550 objectScannerCtor codec.ReaderConstructor 551 keyReader s3ObjectTargetReader 552 553 session *session.Session 554 s3 *s3.S3 555 sqs *sqs.SQS 556 557 gracePeriod time.Duration 558 559 objectMut sync.Mutex 560 object *s3PendingObject 561 562 log log.Modular 563 stats metrics.Type 564 } 565 566 type s3PendingObject struct { 567 target *s3ObjectTarget 568 obj *s3.GetObjectOutput 569 extracted int 570 scanner codec.Reader 571 } 572 573 // NewAmazonS3 creates a new Amazon S3 bucket reader.Type. 574 func newAmazonS3( 575 conf AWSS3Config, 576 log log.Modular, 577 stats metrics.Type, 578 ) (*awsS3, error) { 579 if conf.Bucket == "" && conf.SQS.URL == "" { 580 return nil, errors.New("either a bucket or an sqs.url must be specified") 581 } 582 if conf.Prefix != "" && conf.SQS.URL != "" { 583 return nil, errors.New("cannot specify both a prefix and sqs.url") 584 } 585 s := &awsS3{ 586 conf: conf, 587 log: log, 588 stats: stats, 589 } 590 var err error 591 if s.objectScannerCtor, err = codec.GetReader(conf.Codec, codec.NewReaderConfig()); err != nil { 592 return nil, err 593 } 594 if len(conf.SQS.DelayPeriod) > 0 { 595 if s.gracePeriod, err = time.ParseDuration(conf.SQS.DelayPeriod); err != nil { 596 return nil, fmt.Errorf("failed to parse grace period: %w", err) 597 } 598 } 599 return s, nil 600 } 601 602 func (a *awsS3) getTargetReader(ctx context.Context) (s3ObjectTargetReader, error) { 603 if a.sqs != nil { 604 return newSQSTargetReader(a.conf, a.log, a.s3, a.sqs), nil 605 } 606 return newStaticTargetReader(ctx, a.conf, a.log, a.s3) 607 } 608 609 // ConnectWithContext attempts to establish a connection to the target S3 bucket 610 // and any relevant queues used to traverse the objects (SQS, etc). 611 func (a *awsS3) ConnectWithContext(ctx context.Context) error { 612 if a.session != nil { 613 return nil 614 } 615 616 sess, err := a.conf.GetSession(func(c *aws.Config) { 617 c.S3ForcePathStyle = aws.Bool(a.conf.ForcePathStyleURLs) 618 }) 619 if err != nil { 620 return err 621 } 622 623 a.session = sess 624 a.s3 = s3.New(sess) 625 if a.conf.SQS.URL != "" { 626 sqsSess := sess.Copy() 627 if len(a.conf.SQS.Endpoint) > 0 { 628 sqsSess.Config.Endpoint = &a.conf.SQS.Endpoint 629 } 630 a.sqs = sqs.New(sqsSess) 631 } 632 633 if a.keyReader, err = a.getTargetReader(ctx); err != nil { 634 a.session = nil 635 a.s3 = nil 636 a.sqs = nil 637 return err 638 } 639 640 if a.conf.SQS.URL == "" { 641 a.log.Infof("Downloading S3 objects from bucket: %s\n", a.conf.Bucket) 642 } else { 643 a.log.Infof("Downloading S3 objects found in messages from SQS: %s\n", a.conf.SQS.URL) 644 } 645 return nil 646 } 647 648 func s3MsgFromParts(p *s3PendingObject, parts []types.Part) types.Message { 649 msg := message.New(nil) 650 msg.Append(parts...) 651 msg.Iter(func(_ int, part types.Part) error { 652 meta := part.Metadata() 653 meta.Set("s3_key", p.target.key) 654 meta.Set("s3_bucket", p.target.bucket) 655 if p.obj.LastModified != nil { 656 meta.Set("s3_last_modified", p.obj.LastModified.Format(time.RFC3339)) 657 meta.Set("s3_last_modified_unix", strconv.FormatInt(p.obj.LastModified.Unix(), 10)) 658 } 659 if p.obj.ContentType != nil { 660 meta.Set("s3_content_type", *p.obj.ContentType) 661 } 662 if p.obj.ContentEncoding != nil { 663 meta.Set("s3_content_encoding", *p.obj.ContentEncoding) 664 } 665 for k, v := range p.obj.Metadata { 666 if v != nil { 667 meta.Set(k, *v) 668 } 669 } 670 return nil 671 }) 672 return msg 673 } 674 675 func (a *awsS3) getObjectTarget(ctx context.Context) (*s3PendingObject, error) { 676 if a.object != nil { 677 return a.object, nil 678 } 679 680 target, err := a.keyReader.Pop(ctx) 681 if err != nil { 682 return nil, err 683 } 684 685 if a.gracePeriod > 0 && !target.notificationAt.IsZero() { 686 waitFor := a.gracePeriod - time.Since(target.notificationAt) 687 if waitFor > 0 && waitFor < a.gracePeriod { 688 select { 689 case <-time.After(waitFor): 690 case <-ctx.Done(): 691 return nil, ctx.Err() 692 } 693 } 694 } 695 696 obj, err := a.s3.GetObject(&s3.GetObjectInput{ 697 Bucket: aws.String(target.bucket), 698 Key: aws.String(target.key), 699 }) 700 if err != nil { 701 _ = target.ackFn(ctx, err) 702 return nil, err 703 } 704 705 object := &s3PendingObject{ 706 target: target, 707 obj: obj, 708 } 709 if object.scanner, err = a.objectScannerCtor(target.key, obj.Body, target.ackFn); err != nil { 710 _ = target.ackFn(ctx, err) 711 return nil, err 712 } 713 714 a.object = object 715 return object, nil 716 } 717 718 // ReadWithContext attempts to read a new message from the target S3 bucket. 719 func (a *awsS3) ReadWithContext(ctx context.Context) (msg types.Message, ackFn reader.AsyncAckFn, err error) { 720 a.objectMut.Lock() 721 defer a.objectMut.Unlock() 722 if a.session == nil { 723 return nil, nil, types.ErrNotConnected 724 } 725 726 defer func() { 727 if errors.Is(err, io.EOF) { 728 err = types.ErrTypeClosed 729 } else if errors.Is(err, context.Canceled) || 730 errors.Is(err, context.DeadlineExceeded) || 731 (err != nil && strings.HasSuffix(err.Error(), "context canceled")) { 732 err = types.ErrTimeout 733 } 734 }() 735 736 var object *s3PendingObject 737 if object, err = a.getObjectTarget(ctx); err != nil { 738 return 739 } 740 741 var parts []types.Part 742 var scnAckFn codec.ReaderAckFn 743 744 for { 745 if parts, scnAckFn, err = object.scanner.Next(ctx); err == nil { 746 object.extracted++ 747 break 748 } 749 a.object = nil 750 if err != io.EOF { 751 return 752 } 753 if err = object.scanner.Close(ctx); err != nil { 754 a.log.Warnf("Failed to close bucket object scanner cleanly: %v\n", err) 755 } 756 if object.extracted == 0 { 757 a.log.Debugf("Extracted zero messages from key %v\n", object.target.key) 758 } 759 if object, err = a.getObjectTarget(ctx); err != nil { 760 return 761 } 762 } 763 764 return s3MsgFromParts(object, parts), func(rctx context.Context, res types.Response) error { 765 return scnAckFn(rctx, res.Error()) 766 }, nil 767 } 768 769 // CloseAsync begins cleaning up resources used by this reader asynchronously. 770 func (a *awsS3) CloseAsync() { 771 go func() { 772 a.objectMut.Lock() 773 if a.object != nil { 774 a.object.scanner.Close(context.Background()) 775 a.object = nil 776 } 777 a.objectMut.Unlock() 778 }() 779 } 780 781 // WaitForClose will block until either the reader is closed or a specified 782 // timeout occurs. 783 func (a *awsS3) WaitForClose(time.Duration) error { 784 return nil 785 } 786 787 //------------------------------------------------------------------------------