github.com/Jeffail/benthos/v3@v3.65.0/lib/output/aws_s3.go (about)

     1  package output
     2  
     3  import (
     4  	"github.com/Jeffail/benthos/v3/internal/docs"
     5  	"github.com/Jeffail/benthos/v3/internal/metadata"
     6  	"github.com/Jeffail/benthos/v3/lib/log"
     7  	"github.com/Jeffail/benthos/v3/lib/message/batch"
     8  	"github.com/Jeffail/benthos/v3/lib/metrics"
     9  	"github.com/Jeffail/benthos/v3/lib/output/writer"
    10  	"github.com/Jeffail/benthos/v3/lib/types"
    11  	"github.com/Jeffail/benthos/v3/lib/util/aws/session"
    12  )
    13  
    14  //------------------------------------------------------------------------------
    15  
    16  func init() {
    17  	Constructors[TypeAWSS3] = TypeSpec{
    18  		constructor: fromSimpleConstructor(NewAWSS3),
    19  		Version:     "3.36.0",
    20  		Summary: `
    21  Sends message parts as objects to an Amazon S3 bucket. Each object is uploaded
    22  with the path specified with the ` + "`path`" + ` field.`,
    23  		Description: `
    24  In order to have a different path for each object you should use function
    25  interpolations described [here](/docs/configuration/interpolation#bloblang-queries), which are
    26  calculated per message of a batch.
    27  
    28  ### Metadata
    29  
    30  Metadata fields on messages will be sent as headers, in order to mutate these values (or remove them) check out the [metadata docs](/docs/configuration/metadata).
    31  
    32  ### Tags
    33  
    34  The tags field allows you to specify key/value pairs to attach to objects as tags, where the values support
    35  [interpolation functions](/docs/configuration/interpolation#bloblang-queries):
    36  
    37  ` + "```yaml" + `
    38  output:
    39    aws_s3:
    40      bucket: TODO
    41      path: ${!count("files")}-${!timestamp_unix_nano()}.tar.gz
    42      tags:
    43        Key1: Value1
    44        Timestamp: ${!meta("Timestamp")}
    45  ` + "```" + `
    46  
    47  ### Credentials
    48  
    49  By default Benthos will use a shared credentials file when connecting to AWS
    50  services. It's also possible to set them explicitly at the component level,
    51  allowing you to transfer data across accounts. You can find out more
    52  [in this document](/docs/guides/cloud/aws).
    53  
    54  ### Batching
    55  
    56  It's common to want to upload messages to S3 as batched archives, the easiest
    57  way to do this is to batch your messages at the output level and join the batch
    58  of messages with an
    59  ` + "[`archive`](/docs/components/processors/archive)" + ` and/or
    60  ` + "[`compress`](/docs/components/processors/compress)" + ` processor.
    61  
    62  For example, if we wished to upload messages as a .tar.gz archive of documents
    63  we could achieve that with the following config:
    64  
    65  ` + "```yaml" + `
    66  output:
    67    aws_s3:
    68      bucket: TODO
    69      path: ${!count("files")}-${!timestamp_unix_nano()}.tar.gz
    70      batching:
    71        count: 100
    72        period: 10s
    73        processors:
    74          - archive:
    75              format: tar
    76          - compress:
    77              algorithm: gzip
    78  ` + "```" + `
    79  
    80  Alternatively, if we wished to upload JSON documents as a single large document
    81  containing an array of objects we can do that with:
    82  
    83  ` + "```yaml" + `
    84  output:
    85    aws_s3:
    86      bucket: TODO
    87      path: ${!count("files")}-${!timestamp_unix_nano()}.json
    88      batching:
    89        count: 100
    90        processors:
    91          - archive:
    92              format: json_array
    93  ` + "```" + ``,
    94  		Async: true,
    95  		FieldSpecs: docs.FieldSpecs{
    96  			docs.FieldCommon("bucket", "The bucket to upload messages to."),
    97  			docs.FieldCommon(
    98  				"path", "The path of each message to upload.",
    99  				`${!count("files")}-${!timestamp_unix_nano()}.txt`,
   100  				`${!meta("kafka_key")}.json`,
   101  				`${!json("doc.namespace")}/${!json("doc.id")}.json`,
   102  			).IsInterpolated(),
   103  			docs.FieldString(
   104  				"tags", "Key/value pairs to store with the object as tags.",
   105  				map[string]string{
   106  					"Key1":      "Value1",
   107  					"Timestamp": `${!meta("Timestamp")}`,
   108  				},
   109  			).IsInterpolated().Map(),
   110  			docs.FieldCommon("content_type", "The content type to set for each object.").IsInterpolated(),
   111  			docs.FieldAdvanced("content_encoding", "An optional content encoding to set for each object.").IsInterpolated(),
   112  			docs.FieldString("cache_control", "The cache control to set for each object.").Advanced().IsInterpolated(),
   113  			docs.FieldString("content_disposition", "The content disposition to set for each object.").Advanced().IsInterpolated(),
   114  			docs.FieldString("content_language", "The content language to set for each object.").Advanced().IsInterpolated(),
   115  			docs.FieldString("website_redirect_location", "The website redirect location to set for each object.").Advanced().IsInterpolated(),
   116  			docs.FieldCommon("metadata", "Specify criteria for which metadata values are attached to objects as headers.").WithChildren(metadata.ExcludeFilterFields()...),
   117  			docs.FieldAdvanced("storage_class", "The storage class to set for each object.").HasOptions(
   118  				"STANDARD", "REDUCED_REDUNDANCY", "GLACIER", "STANDARD_IA", "ONEZONE_IA", "INTELLIGENT_TIERING", "DEEP_ARCHIVE",
   119  			).IsInterpolated(),
   120  			docs.FieldAdvanced("kms_key_id", "An optional server side encryption key."),
   121  			docs.FieldAdvanced("server_side_encryption", "An optional server side encryption algorithm.").AtVersion("3.63.0"),
   122  			docs.FieldAdvanced("force_path_style_urls", "Forces the client API to use path style URLs, which helps when connecting to custom endpoints."),
   123  			docs.FieldCommon("max_in_flight", "The maximum number of messages to have in flight at a given time. Increase this to improve throughput."),
   124  			docs.FieldAdvanced("timeout", "The maximum period to wait on an upload before abandoning it and reattempting."),
   125  			batch.FieldSpec(),
   126  		}.Merge(session.FieldSpecs()),
   127  		Categories: []Category{
   128  			CategoryServices,
   129  			CategoryAWS,
   130  		},
   131  	}
   132  
   133  	Constructors[TypeS3] = TypeSpec{
   134  		constructor: fromSimpleConstructor(NewAmazonS3),
   135  		Status:      docs.StatusDeprecated,
   136  		Summary: `
   137  Sends message parts as objects to an Amazon S3 bucket. Each object is uploaded
   138  with the path specified with the ` + "`path`" + ` field.`,
   139  		Description: `
   140  ## Alternatives
   141  
   142  This output has been renamed to ` + "[`aws_s3`](/docs/components/outputs/aws_s3)" + `.
   143  
   144  In order to have a different path for each object you should use function
   145  interpolations described [here](/docs/configuration/interpolation#bloblang-queries), which are
   146  calculated per message of a batch.
   147  
   148  ### Metadata
   149  
   150  Metadata fields on messages will be sent as headers, in order to mutate these values (or remove them) check out the [metadata docs](/docs/configuration/metadata).
   151  
   152  ### Tags
   153  
   154  The tags field allows you to specify key/value pairs to attach to objects as tags, where the values support
   155  [interpolation functions](/docs/configuration/interpolation#bloblang-queries):
   156  
   157  ` + "```yaml" + `
   158  output:
   159    aws_s3:
   160      bucket: TODO
   161      path: ${!count("files")}-${!timestamp_unix_nano()}.tar.gz
   162      tags:
   163        Key1: Value1
   164        Timestamp: ${!meta("Timestamp")}
   165  ` + "```" + `
   166  
   167  ### Credentials
   168  
   169  By default Benthos will use a shared credentials file when connecting to AWS
   170  services. It's also possible to set them explicitly at the component level,
   171  allowing you to transfer data across accounts. You can find out more
   172  [in this document](/docs/guides/cloud/aws).
   173  
   174  ### Batching
   175  
   176  It's common to want to upload messages to S3 as batched archives, the easiest
   177  way to do this is to batch your messages at the output level and join the batch
   178  of messages with an
   179  ` + "[`archive`](/docs/components/processors/archive)" + ` and/or
   180  ` + "[`compress`](/docs/components/processors/compress)" + ` processor.
   181  
   182  For example, if we wished to upload messages as a .tar.gz archive of documents
   183  we could achieve that with the following config:
   184  
   185  ` + "```yaml" + `
   186  output:
   187    s3:
   188      bucket: TODO
   189      path: ${!count("files")}-${!timestamp_unix_nano()}.tar.gz
   190      batching:
   191        count: 100
   192        period: 10s
   193        processors:
   194          - archive:
   195              format: tar
   196          - compress:
   197              algorithm: gzip
   198  ` + "```" + `
   199  
   200  Alternatively, if we wished to upload JSON documents as a single large document
   201  containing an array of objects we can do that with:
   202  
   203  ` + "```yaml" + `
   204  output:
   205    s3:
   206      bucket: TODO
   207      path: ${!count("files")}-${!timestamp_unix_nano()}.json
   208      batching:
   209        count: 100
   210        processors:
   211          - archive:
   212              format: json_array
   213  ` + "```" + ``,
   214  		Async: true,
   215  		FieldSpecs: docs.FieldSpecs{
   216  			docs.FieldCommon("bucket", "The bucket to upload messages to."),
   217  			docs.FieldCommon(
   218  				"path", "The path of each message to upload.",
   219  				`${!count("files")}-${!timestamp_unix_nano()}.txt`,
   220  				`${!meta("kafka_key")}.json`,
   221  				`${!json("doc.namespace")}/${!json("doc.id")}.json`,
   222  			).IsInterpolated(),
   223  			docs.FieldString(
   224  				"tags", "Key/value pairs to store with the object as tags.",
   225  				map[string]string{
   226  					"Key1":      "Value1",
   227  					"Timestamp": `${!meta("Timestamp")}`,
   228  				},
   229  			).IsInterpolated().Map(),
   230  			docs.FieldCommon("content_type", "The content type to set for each object.").IsInterpolated(),
   231  			docs.FieldAdvanced("content_encoding", "An optional content encoding to set for each object.").IsInterpolated(),
   232  			docs.FieldString("cache_control", "The cache control to set for each object.").Advanced().IsInterpolated(),
   233  			docs.FieldString("content_disposition", "The content disposition to set for each object.").Advanced().IsInterpolated(),
   234  			docs.FieldString("content_language", "The content language to set for each object.").Advanced().IsInterpolated(),
   235  			docs.FieldString("website_redirect_location", "The website redirect location to set for each object.").Advanced().IsInterpolated(),
   236  			docs.FieldCommon("metadata", "Specify criteria for which metadata values are attached to objects as headers.").WithChildren(metadata.ExcludeFilterFields()...),
   237  			docs.FieldAdvanced("storage_class", "The storage class to set for each object.").HasOptions(
   238  				"STANDARD", "REDUCED_REDUNDANCY", "GLACIER", "STANDARD_IA", "ONEZONE_IA", "INTELLIGENT_TIERING", "DEEP_ARCHIVE",
   239  			).IsInterpolated(),
   240  			docs.FieldAdvanced("kms_key_id", "An optional server side encryption key."),
   241  			docs.FieldAdvanced("server_side_encryption", "An optional server side encryption algorithm."),
   242  			docs.FieldAdvanced("force_path_style_urls", "Forces the client API to use path style URLs, which helps when connecting to custom endpoints."),
   243  			docs.FieldCommon("max_in_flight", "The maximum number of messages to have in flight at a given time. Increase this to improve throughput."),
   244  			docs.FieldAdvanced("timeout", "The maximum period to wait on an upload before abandoning it and reattempting."),
   245  			batch.FieldSpec(),
   246  		}.Merge(session.FieldSpecs()),
   247  		Categories: []Category{
   248  			CategoryServices,
   249  			CategoryAWS,
   250  		},
   251  	}
   252  }
   253  
   254  //------------------------------------------------------------------------------
   255  
   256  // NewAWSS3 creates a new AmazonS3 output type.
   257  func NewAWSS3(conf Config, mgr types.Manager, log log.Modular, stats metrics.Type) (Type, error) {
   258  	return newAmazonS3(TypeAWSS3, conf.AWSS3, mgr, log, stats)
   259  }
   260  
   261  // NewAmazonS3 creates a new AmazonS3 output type.
   262  func NewAmazonS3(conf Config, mgr types.Manager, log log.Modular, stats metrics.Type) (Type, error) {
   263  	return newAmazonS3(TypeS3, conf.S3, mgr, log, stats)
   264  }
   265  
   266  func newAmazonS3(name string, conf writer.AmazonS3Config, mgr types.Manager, log log.Modular, stats metrics.Type) (Type, error) {
   267  	sthree, err := writer.NewAmazonS3V2(conf, mgr, log, stats)
   268  	if err != nil {
   269  		return nil, err
   270  	}
   271  
   272  	w, err := NewAsyncWriter(name, conf.MaxInFlight, sthree, log, stats)
   273  	if err != nil {
   274  		return nil, err
   275  	}
   276  	return NewBatcherFromConfig(conf.Batching, w, mgr, log, stats)
   277  }
   278  
   279  //------------------------------------------------------------------------------