github.com/Jeffail/benthos/v3@v3.65.0/lib/input/s3.go (about) 1 package input 2 3 import ( 4 "github.com/Jeffail/benthos/v3/internal/docs" 5 "github.com/Jeffail/benthos/v3/lib/input/reader" 6 "github.com/Jeffail/benthos/v3/lib/log" 7 "github.com/Jeffail/benthos/v3/lib/metrics" 8 "github.com/Jeffail/benthos/v3/lib/types" 9 "github.com/Jeffail/benthos/v3/lib/util/aws/session" 10 ) 11 12 //------------------------------------------------------------------------------ 13 14 func init() { 15 Constructors[TypeS3] = TypeSpec{ 16 constructor: fromSimpleConstructor(NewAmazonS3), 17 Status: docs.StatusDeprecated, 18 Summary: ` 19 Downloads objects within an Amazon S3 bucket, optionally filtered by a prefix. 20 If an SQS queue has been configured then only object keys read from the queue 21 will be downloaded.`, 22 Description: ` 23 ## Alternatives 24 25 This input is being replaced with the shiny new ` + "[`aws_s3` input](/docs/components/inputs/aws_s3)" + `, which has improved features, consider trying it out instead. 26 27 If an SQS queue is not specified the entire list of objects found when this 28 input starts will be consumed. Note that the prefix configuration is only used 29 when downloading objects without SQS configured. 30 31 If your bucket is configured to send events directly to an SQS queue then you 32 need to set the ` + "`sqs_body_path`" + ` field to a 33 [dot path](/docs/configuration/field_paths) where the object key is found in the payload. 34 However, it is also common practice to send bucket events to an SNS topic which 35 sends enveloped events to SQS, in which case you must also set the 36 ` + "`sqs_envelope_path`" + ` field to where the payload can be found. 37 38 When using SQS events it's also possible to extract target bucket names from the 39 events by specifying a path in the field ` + "`sqs_bucket_path`" + `. For each 40 SQS event, if that path exists and contains a string it will used as the bucket 41 of the download instead of the ` + "`bucket`" + ` field. 42 43 Here is a guide for setting up an SQS queue that receives events for new S3 44 bucket objects: 45 46 https://docs.aws.amazon.com/AmazonS3/latest/dev/ways-to-add-notification-config-to-bucket.html 47 48 WARNING: When using SQS please make sure you have sensible values for 49 ` + "`sqs_max_messages`" + ` and also the visibility timeout of the queue 50 itself. 51 52 When Benthos consumes an S3 item as a result of receiving an SQS message the 53 message is not deleted until the S3 item has been sent onwards. This ensures 54 at-least-once crash resiliency, but also means that if the S3 item takes longer 55 to process than the visibility timeout of your queue then the same items might 56 be processed multiple times. 57 58 ### Credentials 59 60 By default Benthos will use a shared credentials file when connecting to AWS 61 services. It's also possible to set them explicitly at the component level, 62 allowing you to transfer data across accounts. You can find out more 63 [in this document](/docs/guides/cloud/aws). 64 65 ### Metadata 66 67 This input adds the following metadata fields to each message: 68 69 ` + "```" + ` 70 - s3_key 71 - s3_bucket 72 - s3_last_modified_unix* 73 - s3_last_modified (RFC3339)* 74 - s3_content_type* 75 - s3_content_encoding* 76 - All user defined metadata* 77 78 * Only added when NOT using download manager 79 ` + "```" + ` 80 81 You can access these metadata fields using 82 [function interpolation](/docs/configuration/interpolation#metadata).`, 83 FieldSpecs: append( 84 append(docs.FieldSpecs{ 85 docs.FieldCommon("bucket", "The bucket to consume from. If `sqs_bucket_path` is set this field is still required as a fallback."), 86 docs.FieldCommon("prefix", "An optional path prefix, if set only objects with the prefix are consumed. This field is ignored when SQS is used."), 87 docs.FieldCommon("sqs_url", "An optional SQS URL to connect to. When specified this queue will control which objects are downloaded from the target bucket."), 88 docs.FieldCommon("sqs_body_path", "A [dot path](/docs/configuration/field_paths) whereby object keys are found in SQS messages, this field is only required when an `sqs_url` is specified."), 89 docs.FieldCommon("sqs_bucket_path", "An optional [dot path](/docs/configuration/field_paths) whereby the bucket of an object can be found in consumed SQS messages."), 90 docs.FieldCommon("sqs_envelope_path", "An optional [dot path](/docs/configuration/field_paths) of enveloped payloads to extract from SQS messages. This is required when pushing events from S3 to SNS to SQS."), 91 docs.FieldAdvanced("sqs_max_messages", "The maximum number of SQS messages to consume from each request."), 92 docs.FieldAdvanced("sqs_endpoint", "A custom endpoint to use when connecting to SQS."), 93 }, session.FieldSpecs()...), 94 docs.FieldAdvanced("retries", "The maximum number of times to attempt an object download."), 95 docs.FieldAdvanced("force_path_style_urls", "Forces the client API to use path style URLs, which helps when connecting to custom endpoints."), 96 docs.FieldAdvanced("delete_objects", "Whether to delete downloaded objects from the bucket."), 97 docs.FieldAdvanced("download_manager", "Controls if and how to use the download manager API. This can help speed up file downloads, but results in file metadata not being copied.").WithChildren( 98 docs.FieldCommon("enabled", "Whether to use to download manager API."), 99 ), 100 docs.FieldAdvanced("timeout", "The period of time to wait before abandoning a request and trying again."), 101 docs.FieldDeprecated("max_batch_count"), 102 ), 103 Categories: []Category{ 104 CategoryServices, 105 CategoryAWS, 106 }, 107 } 108 } 109 110 //------------------------------------------------------------------------------ 111 112 // NewAmazonS3 creates a new AWS S3 input type. 113 func NewAmazonS3(conf Config, mgr types.Manager, log log.Modular, stats metrics.Type) (Type, error) { 114 // TODO: V4 Remove this. 115 if conf.S3.MaxBatchCount > 1 { 116 log.Warnf("Field '%v.max_batch_count' is deprecated, use the batching methods outlined in https://benthos.dev/docs/configuration/batching instead.\n", conf.Type) 117 } 118 r, err := reader.NewAmazonS3(conf.S3, log, stats) 119 if err != nil { 120 return nil, err 121 } 122 return NewAsyncReader( 123 TypeS3, 124 true, 125 reader.NewAsyncBundleUnacks( 126 reader.NewAsyncPreserver(r), 127 ), 128 log, stats, 129 ) 130 } 131 132 //------------------------------------------------------------------------------