github.com/Jeffail/benthos/v3@v3.65.0/lib/output/aws_s3.go (about) 1 package output 2 3 import ( 4 "github.com/Jeffail/benthos/v3/internal/docs" 5 "github.com/Jeffail/benthos/v3/internal/metadata" 6 "github.com/Jeffail/benthos/v3/lib/log" 7 "github.com/Jeffail/benthos/v3/lib/message/batch" 8 "github.com/Jeffail/benthos/v3/lib/metrics" 9 "github.com/Jeffail/benthos/v3/lib/output/writer" 10 "github.com/Jeffail/benthos/v3/lib/types" 11 "github.com/Jeffail/benthos/v3/lib/util/aws/session" 12 ) 13 14 //------------------------------------------------------------------------------ 15 16 func init() { 17 Constructors[TypeAWSS3] = TypeSpec{ 18 constructor: fromSimpleConstructor(NewAWSS3), 19 Version: "3.36.0", 20 Summary: ` 21 Sends message parts as objects to an Amazon S3 bucket. Each object is uploaded 22 with the path specified with the ` + "`path`" + ` field.`, 23 Description: ` 24 In order to have a different path for each object you should use function 25 interpolations described [here](/docs/configuration/interpolation#bloblang-queries), which are 26 calculated per message of a batch. 27 28 ### Metadata 29 30 Metadata fields on messages will be sent as headers, in order to mutate these values (or remove them) check out the [metadata docs](/docs/configuration/metadata). 31 32 ### Tags 33 34 The tags field allows you to specify key/value pairs to attach to objects as tags, where the values support 35 [interpolation functions](/docs/configuration/interpolation#bloblang-queries): 36 37 ` + "```yaml" + ` 38 output: 39 aws_s3: 40 bucket: TODO 41 path: ${!count("files")}-${!timestamp_unix_nano()}.tar.gz 42 tags: 43 Key1: Value1 44 Timestamp: ${!meta("Timestamp")} 45 ` + "```" + ` 46 47 ### Credentials 48 49 By default Benthos will use a shared credentials file when connecting to AWS 50 services. It's also possible to set them explicitly at the component level, 51 allowing you to transfer data across accounts. You can find out more 52 [in this document](/docs/guides/cloud/aws). 53 54 ### Batching 55 56 It's common to want to upload messages to S3 as batched archives, the easiest 57 way to do this is to batch your messages at the output level and join the batch 58 of messages with an 59 ` + "[`archive`](/docs/components/processors/archive)" + ` and/or 60 ` + "[`compress`](/docs/components/processors/compress)" + ` processor. 61 62 For example, if we wished to upload messages as a .tar.gz archive of documents 63 we could achieve that with the following config: 64 65 ` + "```yaml" + ` 66 output: 67 aws_s3: 68 bucket: TODO 69 path: ${!count("files")}-${!timestamp_unix_nano()}.tar.gz 70 batching: 71 count: 100 72 period: 10s 73 processors: 74 - archive: 75 format: tar 76 - compress: 77 algorithm: gzip 78 ` + "```" + ` 79 80 Alternatively, if we wished to upload JSON documents as a single large document 81 containing an array of objects we can do that with: 82 83 ` + "```yaml" + ` 84 output: 85 aws_s3: 86 bucket: TODO 87 path: ${!count("files")}-${!timestamp_unix_nano()}.json 88 batching: 89 count: 100 90 processors: 91 - archive: 92 format: json_array 93 ` + "```" + ``, 94 Async: true, 95 FieldSpecs: docs.FieldSpecs{ 96 docs.FieldCommon("bucket", "The bucket to upload messages to."), 97 docs.FieldCommon( 98 "path", "The path of each message to upload.", 99 `${!count("files")}-${!timestamp_unix_nano()}.txt`, 100 `${!meta("kafka_key")}.json`, 101 `${!json("doc.namespace")}/${!json("doc.id")}.json`, 102 ).IsInterpolated(), 103 docs.FieldString( 104 "tags", "Key/value pairs to store with the object as tags.", 105 map[string]string{ 106 "Key1": "Value1", 107 "Timestamp": `${!meta("Timestamp")}`, 108 }, 109 ).IsInterpolated().Map(), 110 docs.FieldCommon("content_type", "The content type to set for each object.").IsInterpolated(), 111 docs.FieldAdvanced("content_encoding", "An optional content encoding to set for each object.").IsInterpolated(), 112 docs.FieldString("cache_control", "The cache control to set for each object.").Advanced().IsInterpolated(), 113 docs.FieldString("content_disposition", "The content disposition to set for each object.").Advanced().IsInterpolated(), 114 docs.FieldString("content_language", "The content language to set for each object.").Advanced().IsInterpolated(), 115 docs.FieldString("website_redirect_location", "The website redirect location to set for each object.").Advanced().IsInterpolated(), 116 docs.FieldCommon("metadata", "Specify criteria for which metadata values are attached to objects as headers.").WithChildren(metadata.ExcludeFilterFields()...), 117 docs.FieldAdvanced("storage_class", "The storage class to set for each object.").HasOptions( 118 "STANDARD", "REDUCED_REDUNDANCY", "GLACIER", "STANDARD_IA", "ONEZONE_IA", "INTELLIGENT_TIERING", "DEEP_ARCHIVE", 119 ).IsInterpolated(), 120 docs.FieldAdvanced("kms_key_id", "An optional server side encryption key."), 121 docs.FieldAdvanced("server_side_encryption", "An optional server side encryption algorithm.").AtVersion("3.63.0"), 122 docs.FieldAdvanced("force_path_style_urls", "Forces the client API to use path style URLs, which helps when connecting to custom endpoints."), 123 docs.FieldCommon("max_in_flight", "The maximum number of messages to have in flight at a given time. Increase this to improve throughput."), 124 docs.FieldAdvanced("timeout", "The maximum period to wait on an upload before abandoning it and reattempting."), 125 batch.FieldSpec(), 126 }.Merge(session.FieldSpecs()), 127 Categories: []Category{ 128 CategoryServices, 129 CategoryAWS, 130 }, 131 } 132 133 Constructors[TypeS3] = TypeSpec{ 134 constructor: fromSimpleConstructor(NewAmazonS3), 135 Status: docs.StatusDeprecated, 136 Summary: ` 137 Sends message parts as objects to an Amazon S3 bucket. Each object is uploaded 138 with the path specified with the ` + "`path`" + ` field.`, 139 Description: ` 140 ## Alternatives 141 142 This output has been renamed to ` + "[`aws_s3`](/docs/components/outputs/aws_s3)" + `. 143 144 In order to have a different path for each object you should use function 145 interpolations described [here](/docs/configuration/interpolation#bloblang-queries), which are 146 calculated per message of a batch. 147 148 ### Metadata 149 150 Metadata fields on messages will be sent as headers, in order to mutate these values (or remove them) check out the [metadata docs](/docs/configuration/metadata). 151 152 ### Tags 153 154 The tags field allows you to specify key/value pairs to attach to objects as tags, where the values support 155 [interpolation functions](/docs/configuration/interpolation#bloblang-queries): 156 157 ` + "```yaml" + ` 158 output: 159 aws_s3: 160 bucket: TODO 161 path: ${!count("files")}-${!timestamp_unix_nano()}.tar.gz 162 tags: 163 Key1: Value1 164 Timestamp: ${!meta("Timestamp")} 165 ` + "```" + ` 166 167 ### Credentials 168 169 By default Benthos will use a shared credentials file when connecting to AWS 170 services. It's also possible to set them explicitly at the component level, 171 allowing you to transfer data across accounts. You can find out more 172 [in this document](/docs/guides/cloud/aws). 173 174 ### Batching 175 176 It's common to want to upload messages to S3 as batched archives, the easiest 177 way to do this is to batch your messages at the output level and join the batch 178 of messages with an 179 ` + "[`archive`](/docs/components/processors/archive)" + ` and/or 180 ` + "[`compress`](/docs/components/processors/compress)" + ` processor. 181 182 For example, if we wished to upload messages as a .tar.gz archive of documents 183 we could achieve that with the following config: 184 185 ` + "```yaml" + ` 186 output: 187 s3: 188 bucket: TODO 189 path: ${!count("files")}-${!timestamp_unix_nano()}.tar.gz 190 batching: 191 count: 100 192 period: 10s 193 processors: 194 - archive: 195 format: tar 196 - compress: 197 algorithm: gzip 198 ` + "```" + ` 199 200 Alternatively, if we wished to upload JSON documents as a single large document 201 containing an array of objects we can do that with: 202 203 ` + "```yaml" + ` 204 output: 205 s3: 206 bucket: TODO 207 path: ${!count("files")}-${!timestamp_unix_nano()}.json 208 batching: 209 count: 100 210 processors: 211 - archive: 212 format: json_array 213 ` + "```" + ``, 214 Async: true, 215 FieldSpecs: docs.FieldSpecs{ 216 docs.FieldCommon("bucket", "The bucket to upload messages to."), 217 docs.FieldCommon( 218 "path", "The path of each message to upload.", 219 `${!count("files")}-${!timestamp_unix_nano()}.txt`, 220 `${!meta("kafka_key")}.json`, 221 `${!json("doc.namespace")}/${!json("doc.id")}.json`, 222 ).IsInterpolated(), 223 docs.FieldString( 224 "tags", "Key/value pairs to store with the object as tags.", 225 map[string]string{ 226 "Key1": "Value1", 227 "Timestamp": `${!meta("Timestamp")}`, 228 }, 229 ).IsInterpolated().Map(), 230 docs.FieldCommon("content_type", "The content type to set for each object.").IsInterpolated(), 231 docs.FieldAdvanced("content_encoding", "An optional content encoding to set for each object.").IsInterpolated(), 232 docs.FieldString("cache_control", "The cache control to set for each object.").Advanced().IsInterpolated(), 233 docs.FieldString("content_disposition", "The content disposition to set for each object.").Advanced().IsInterpolated(), 234 docs.FieldString("content_language", "The content language to set for each object.").Advanced().IsInterpolated(), 235 docs.FieldString("website_redirect_location", "The website redirect location to set for each object.").Advanced().IsInterpolated(), 236 docs.FieldCommon("metadata", "Specify criteria for which metadata values are attached to objects as headers.").WithChildren(metadata.ExcludeFilterFields()...), 237 docs.FieldAdvanced("storage_class", "The storage class to set for each object.").HasOptions( 238 "STANDARD", "REDUCED_REDUNDANCY", "GLACIER", "STANDARD_IA", "ONEZONE_IA", "INTELLIGENT_TIERING", "DEEP_ARCHIVE", 239 ).IsInterpolated(), 240 docs.FieldAdvanced("kms_key_id", "An optional server side encryption key."), 241 docs.FieldAdvanced("server_side_encryption", "An optional server side encryption algorithm."), 242 docs.FieldAdvanced("force_path_style_urls", "Forces the client API to use path style URLs, which helps when connecting to custom endpoints."), 243 docs.FieldCommon("max_in_flight", "The maximum number of messages to have in flight at a given time. Increase this to improve throughput."), 244 docs.FieldAdvanced("timeout", "The maximum period to wait on an upload before abandoning it and reattempting."), 245 batch.FieldSpec(), 246 }.Merge(session.FieldSpecs()), 247 Categories: []Category{ 248 CategoryServices, 249 CategoryAWS, 250 }, 251 } 252 } 253 254 //------------------------------------------------------------------------------ 255 256 // NewAWSS3 creates a new AmazonS3 output type. 257 func NewAWSS3(conf Config, mgr types.Manager, log log.Modular, stats metrics.Type) (Type, error) { 258 return newAmazonS3(TypeAWSS3, conf.AWSS3, mgr, log, stats) 259 } 260 261 // NewAmazonS3 creates a new AmazonS3 output type. 262 func NewAmazonS3(conf Config, mgr types.Manager, log log.Modular, stats metrics.Type) (Type, error) { 263 return newAmazonS3(TypeS3, conf.S3, mgr, log, stats) 264 } 265 266 func newAmazonS3(name string, conf writer.AmazonS3Config, mgr types.Manager, log log.Modular, stats metrics.Type) (Type, error) { 267 sthree, err := writer.NewAmazonS3V2(conf, mgr, log, stats) 268 if err != nil { 269 return nil, err 270 } 271 272 w, err := NewAsyncWriter(name, conf.MaxInFlight, sthree, log, stats) 273 if err != nil { 274 return nil, err 275 } 276 return NewBatcherFromConfig(conf.Batching, w, mgr, log, stats) 277 } 278 279 //------------------------------------------------------------------------------