github.com/Jeffail/benthos/v3@v3.65.0/lib/input/s3.go (about)

     1  package input
     2  
     3  import (
     4  	"github.com/Jeffail/benthos/v3/internal/docs"
     5  	"github.com/Jeffail/benthos/v3/lib/input/reader"
     6  	"github.com/Jeffail/benthos/v3/lib/log"
     7  	"github.com/Jeffail/benthos/v3/lib/metrics"
     8  	"github.com/Jeffail/benthos/v3/lib/types"
     9  	"github.com/Jeffail/benthos/v3/lib/util/aws/session"
    10  )
    11  
    12  //------------------------------------------------------------------------------
    13  
    14  func init() {
    15  	Constructors[TypeS3] = TypeSpec{
    16  		constructor: fromSimpleConstructor(NewAmazonS3),
    17  		Status:      docs.StatusDeprecated,
    18  		Summary: `
    19  Downloads objects within an Amazon S3 bucket, optionally filtered by a prefix.
    20  If an SQS queue has been configured then only object keys read from the queue
    21  will be downloaded.`,
    22  		Description: `
    23  ## Alternatives
    24  
    25  This input is being replaced with the shiny new ` + "[`aws_s3` input](/docs/components/inputs/aws_s3)" + `, which has improved features, consider trying it out instead.
    26  
    27  If an SQS queue is not specified the entire list of objects found when this
    28  input starts will be consumed. Note that the prefix configuration is only used
    29  when downloading objects without SQS configured.
    30  
    31  If your bucket is configured to send events directly to an SQS queue then you
    32  need to set the ` + "`sqs_body_path`" + ` field to a
    33  [dot path](/docs/configuration/field_paths) where the object key is found in the payload.
    34  However, it is also common practice to send bucket events to an SNS topic which
    35  sends enveloped events to SQS, in which case you must also set the
    36  ` + "`sqs_envelope_path`" + ` field to where the payload can be found.
    37  
    38  When using SQS events it's also possible to extract target bucket names from the
    39  events by specifying a path in the field ` + "`sqs_bucket_path`" + `. For each
    40  SQS event, if that path exists and contains a string it will used as the bucket
    41  of the download instead of the ` + "`bucket`" + ` field.
    42  
    43  Here is a guide for setting up an SQS queue that receives events for new S3
    44  bucket objects:
    45  
    46  https://docs.aws.amazon.com/AmazonS3/latest/dev/ways-to-add-notification-config-to-bucket.html
    47  
    48  WARNING: When using SQS please make sure you have sensible values for
    49  ` + "`sqs_max_messages`" + ` and also the visibility timeout of the queue
    50  itself.
    51  
    52  When Benthos consumes an S3 item as a result of receiving an SQS message the
    53  message is not deleted until the S3 item has been sent onwards. This ensures
    54  at-least-once crash resiliency, but also means that if the S3 item takes longer
    55  to process than the visibility timeout of your queue then the same items might
    56  be processed multiple times.
    57  
    58  ### Credentials
    59  
    60  By default Benthos will use a shared credentials file when connecting to AWS
    61  services. It's also possible to set them explicitly at the component level,
    62  allowing you to transfer data across accounts. You can find out more
    63  [in this document](/docs/guides/cloud/aws).
    64  
    65  ### Metadata
    66  
    67  This input adds the following metadata fields to each message:
    68  
    69  ` + "```" + `
    70  - s3_key
    71  - s3_bucket
    72  - s3_last_modified_unix*
    73  - s3_last_modified (RFC3339)*
    74  - s3_content_type*
    75  - s3_content_encoding*
    76  - All user defined metadata*
    77  
    78  * Only added when NOT using download manager
    79  ` + "```" + `
    80  
    81  You can access these metadata fields using
    82  [function interpolation](/docs/configuration/interpolation#metadata).`,
    83  		FieldSpecs: append(
    84  			append(docs.FieldSpecs{
    85  				docs.FieldCommon("bucket", "The bucket to consume from. If `sqs_bucket_path` is set this field is still required as a fallback."),
    86  				docs.FieldCommon("prefix", "An optional path prefix, if set only objects with the prefix are consumed. This field is ignored when SQS is used."),
    87  				docs.FieldCommon("sqs_url", "An optional SQS URL to connect to. When specified this queue will control which objects are downloaded from the target bucket."),
    88  				docs.FieldCommon("sqs_body_path", "A [dot path](/docs/configuration/field_paths) whereby object keys are found in SQS messages, this field is only required when an `sqs_url` is specified."),
    89  				docs.FieldCommon("sqs_bucket_path", "An optional [dot path](/docs/configuration/field_paths) whereby the bucket of an object can be found in consumed SQS messages."),
    90  				docs.FieldCommon("sqs_envelope_path", "An optional [dot path](/docs/configuration/field_paths) of enveloped payloads to extract from SQS messages. This is required when pushing events from S3 to SNS to SQS."),
    91  				docs.FieldAdvanced("sqs_max_messages", "The maximum number of SQS messages to consume from each request."),
    92  				docs.FieldAdvanced("sqs_endpoint", "A custom endpoint to use when connecting to SQS."),
    93  			}, session.FieldSpecs()...),
    94  			docs.FieldAdvanced("retries", "The maximum number of times to attempt an object download."),
    95  			docs.FieldAdvanced("force_path_style_urls", "Forces the client API to use path style URLs, which helps when connecting to custom endpoints."),
    96  			docs.FieldAdvanced("delete_objects", "Whether to delete downloaded objects from the bucket."),
    97  			docs.FieldAdvanced("download_manager", "Controls if and how to use the download manager API. This can help speed up file downloads, but results in file metadata not being copied.").WithChildren(
    98  				docs.FieldCommon("enabled", "Whether to use to download manager API."),
    99  			),
   100  			docs.FieldAdvanced("timeout", "The period of time to wait before abandoning a request and trying again."),
   101  			docs.FieldDeprecated("max_batch_count"),
   102  		),
   103  		Categories: []Category{
   104  			CategoryServices,
   105  			CategoryAWS,
   106  		},
   107  	}
   108  }
   109  
   110  //------------------------------------------------------------------------------
   111  
   112  // NewAmazonS3 creates a new AWS S3 input type.
   113  func NewAmazonS3(conf Config, mgr types.Manager, log log.Modular, stats metrics.Type) (Type, error) {
   114  	// TODO: V4 Remove this.
   115  	if conf.S3.MaxBatchCount > 1 {
   116  		log.Warnf("Field '%v.max_batch_count' is deprecated, use the batching methods outlined in https://benthos.dev/docs/configuration/batching instead.\n", conf.Type)
   117  	}
   118  	r, err := reader.NewAmazonS3(conf.S3, log, stats)
   119  	if err != nil {
   120  		return nil, err
   121  	}
   122  	return NewAsyncReader(
   123  		TypeS3,
   124  		true,
   125  		reader.NewAsyncBundleUnacks(
   126  			reader.NewAsyncPreserver(r),
   127  		),
   128  		log, stats,
   129  	)
   130  }
   131  
   132  //------------------------------------------------------------------------------