github.com/Jeffail/benthos/v3@v3.65.0/lib/output/writer/s3.go (about) 1 package writer 2 3 import ( 4 "bytes" 5 "context" 6 "fmt" 7 "net/url" 8 "sort" 9 "strings" 10 "time" 11 12 "github.com/Jeffail/benthos/v3/internal/bloblang/field" 13 "github.com/Jeffail/benthos/v3/internal/interop" 14 "github.com/Jeffail/benthos/v3/internal/metadata" 15 "github.com/Jeffail/benthos/v3/lib/log" 16 "github.com/Jeffail/benthos/v3/lib/message/batch" 17 "github.com/Jeffail/benthos/v3/lib/metrics" 18 "github.com/Jeffail/benthos/v3/lib/types" 19 sess "github.com/Jeffail/benthos/v3/lib/util/aws/session" 20 "github.com/aws/aws-sdk-go/aws" 21 "github.com/aws/aws-sdk-go/aws/session" 22 "github.com/aws/aws-sdk-go/service/s3/s3manager" 23 ) 24 25 //------------------------------------------------------------------------------ 26 27 // AmazonS3Config contains configuration fields for the AmazonS3 output type. 28 type AmazonS3Config struct { 29 sess.Config `json:",inline" yaml:",inline"` 30 Bucket string `json:"bucket" yaml:"bucket"` 31 ForcePathStyleURLs bool `json:"force_path_style_urls" yaml:"force_path_style_urls"` 32 Path string `json:"path" yaml:"path"` 33 Tags map[string]string `json:"tags" yaml:"tags"` 34 ContentType string `json:"content_type" yaml:"content_type"` 35 ContentEncoding string `json:"content_encoding" yaml:"content_encoding"` 36 CacheControl string `json:"cache_control" yaml:"cache_control"` 37 ContentDisposition string `json:"content_disposition" yaml:"content_disposition"` 38 ContentLanguage string `json:"content_language" yaml:"content_language"` 39 WebsiteRedirectLocation string `json:"website_redirect_location" yaml:"website_redirect_location"` 40 Metadata metadata.ExcludeFilterConfig `json:"metadata" yaml:"metadata"` 41 StorageClass string `json:"storage_class" yaml:"storage_class"` 42 Timeout string `json:"timeout" yaml:"timeout"` 43 KMSKeyID string `json:"kms_key_id" yaml:"kms_key_id"` 44 ServerSideEncryption string `json:"server_side_encryption" yaml:"server_side_encryption"` 45 MaxInFlight int `json:"max_in_flight" yaml:"max_in_flight"` 46 Batching batch.PolicyConfig `json:"batching" yaml:"batching"` 47 } 48 49 // NewAmazonS3Config creates a new Config with default values. 50 func NewAmazonS3Config() AmazonS3Config { 51 return AmazonS3Config{ 52 Config: sess.NewConfig(), 53 Bucket: "", 54 ForcePathStyleURLs: false, 55 Path: `${!count("files")}-${!timestamp_unix_nano()}.txt`, 56 Tags: map[string]string{}, 57 ContentType: "application/octet-stream", 58 ContentEncoding: "", 59 CacheControl: "", 60 ContentDisposition: "", 61 ContentLanguage: "", 62 WebsiteRedirectLocation: "", 63 Metadata: metadata.NewExcludeFilterConfig(), 64 StorageClass: "STANDARD", 65 Timeout: "5s", 66 KMSKeyID: "", 67 ServerSideEncryption: "", 68 MaxInFlight: 1, 69 Batching: batch.NewPolicyConfig(), 70 } 71 } 72 73 //------------------------------------------------------------------------------ 74 75 type s3TagPair struct { 76 key string 77 value *field.Expression 78 } 79 80 // AmazonS3 is a benthos writer.Type implementation that writes messages to an 81 // Amazon S3 bucket. 82 type AmazonS3 struct { 83 conf AmazonS3Config 84 85 path *field.Expression 86 tags []s3TagPair 87 contentType *field.Expression 88 contentEncoding *field.Expression 89 cacheControl *field.Expression 90 contentDisposition *field.Expression 91 contentLanguage *field.Expression 92 websiteRedirectLocation *field.Expression 93 storageClass *field.Expression 94 metaFilter *metadata.ExcludeFilter 95 96 session *session.Session 97 uploader *s3manager.Uploader 98 timeout time.Duration 99 100 log log.Modular 101 stats metrics.Type 102 } 103 104 // NewAmazonS3 creates a new Amazon S3 bucket writer.Type. 105 // 106 // Deprecated: use the V2 API instead. 107 func NewAmazonS3( 108 conf AmazonS3Config, 109 log log.Modular, 110 stats metrics.Type, 111 ) (*AmazonS3, error) { 112 return NewAmazonS3V2(conf, types.NoopMgr(), log, stats) 113 } 114 115 // NewAmazonS3V2 creates a new Amazon S3 bucket writer.Type. 116 func NewAmazonS3V2( 117 conf AmazonS3Config, 118 mgr types.Manager, 119 log log.Modular, 120 stats metrics.Type, 121 ) (*AmazonS3, error) { 122 var timeout time.Duration 123 if tout := conf.Timeout; len(tout) > 0 { 124 var err error 125 if timeout, err = time.ParseDuration(tout); err != nil { 126 return nil, fmt.Errorf("failed to parse timeout period string: %v", err) 127 } 128 } 129 a := &AmazonS3{ 130 conf: conf, 131 log: log, 132 stats: stats, 133 timeout: timeout, 134 } 135 var err error 136 if a.path, err = interop.NewBloblangField(mgr, conf.Path); err != nil { 137 return nil, fmt.Errorf("failed to parse path expression: %v", err) 138 } 139 if a.contentType, err = interop.NewBloblangField(mgr, conf.ContentType); err != nil { 140 return nil, fmt.Errorf("failed to parse content type expression: %v", err) 141 } 142 if a.contentEncoding, err = interop.NewBloblangField(mgr, conf.ContentEncoding); err != nil { 143 return nil, fmt.Errorf("failed to parse content encoding expression: %v", err) 144 } 145 if a.cacheControl, err = interop.NewBloblangField(mgr, conf.CacheControl); err != nil { 146 return nil, fmt.Errorf("failed to parse cache control expression: %v", err) 147 } 148 if a.contentDisposition, err = interop.NewBloblangField(mgr, conf.ContentDisposition); err != nil { 149 return nil, fmt.Errorf("failed to parse content disposition expression: %v", err) 150 } 151 if a.contentLanguage, err = interop.NewBloblangField(mgr, conf.ContentLanguage); err != nil { 152 return nil, fmt.Errorf("failed to parse content language expression: %v", err) 153 } 154 if a.websiteRedirectLocation, err = interop.NewBloblangField(mgr, conf.WebsiteRedirectLocation); err != nil { 155 return nil, fmt.Errorf("failed to parse website redirect location expression: %v", err) 156 } 157 158 if a.metaFilter, err = conf.Metadata.Filter(); err != nil { 159 return nil, fmt.Errorf("failed to construct metadata filter: %w", err) 160 } 161 if a.storageClass, err = interop.NewBloblangField(mgr, conf.StorageClass); err != nil { 162 return nil, fmt.Errorf("failed to parse storage class expression: %v", err) 163 } 164 165 a.tags = make([]s3TagPair, 0, len(conf.Tags)) 166 for k, v := range conf.Tags { 167 vExpr, err := interop.NewBloblangField(mgr, v) 168 if err != nil { 169 return nil, fmt.Errorf("failed to parse tag expression for key '%v': %v", k, err) 170 } 171 a.tags = append(a.tags, s3TagPair{ 172 key: k, 173 value: vExpr, 174 }) 175 } 176 sort.Slice(a.tags, func(i, j int) bool { 177 return a.tags[i].key < a.tags[j].key 178 }) 179 180 return a, nil 181 } 182 183 // ConnectWithContext attempts to establish a connection to the target S3 184 // bucket. 185 func (a *AmazonS3) ConnectWithContext(ctx context.Context) error { 186 return a.Connect() 187 } 188 189 // Connect attempts to establish a connection to the target S3 bucket. 190 func (a *AmazonS3) Connect() error { 191 if a.session != nil { 192 return nil 193 } 194 195 sess, err := a.conf.GetSession(func(c *aws.Config) { 196 c.S3ForcePathStyle = aws.Bool(a.conf.ForcePathStyleURLs) 197 }) 198 if err != nil { 199 return err 200 } 201 202 a.session = sess 203 a.uploader = s3manager.NewUploader(sess) 204 205 a.log.Infof("Uploading message parts as objects to Amazon S3 bucket: %v\n", a.conf.Bucket) 206 return nil 207 } 208 209 // Write attempts to write message contents to a target S3 bucket as files. 210 func (a *AmazonS3) Write(msg types.Message) error { 211 return a.WriteWithContext(context.Background(), msg) 212 } 213 214 // WriteWithContext attempts to write message contents to a target S3 bucket as 215 // files. 216 func (a *AmazonS3) WriteWithContext(wctx context.Context, msg types.Message) error { 217 if a.session == nil { 218 return types.ErrNotConnected 219 } 220 221 ctx, cancel := context.WithTimeout( 222 wctx, a.timeout, 223 ) 224 defer cancel() 225 226 return IterateBatchedSend(msg, func(i int, p types.Part) error { 227 metadata := map[string]*string{} 228 a.metaFilter.Iter(p.Metadata(), func(k, v string) error { 229 metadata[k] = aws.String(v) 230 return nil 231 }) 232 233 var contentEncoding *string 234 if ce := a.contentEncoding.String(i, msg); len(ce) > 0 { 235 contentEncoding = aws.String(ce) 236 } 237 var cacheControl *string 238 if ce := a.cacheControl.String(i, msg); len(ce) > 0 { 239 cacheControl = aws.String(ce) 240 } 241 var contentDisposition *string 242 if ce := a.contentDisposition.String(i, msg); len(ce) > 0 { 243 contentDisposition = aws.String(ce) 244 } 245 var contentLanguage *string 246 if ce := a.contentLanguage.String(i, msg); len(ce) > 0 { 247 contentLanguage = aws.String(ce) 248 } 249 var websiteRedirectLocation *string 250 if ce := a.websiteRedirectLocation.String(i, msg); len(ce) > 0 { 251 websiteRedirectLocation = aws.String(ce) 252 } 253 254 uploadInput := &s3manager.UploadInput{ 255 Bucket: &a.conf.Bucket, 256 Key: aws.String(a.path.String(i, msg)), 257 Body: bytes.NewReader(p.Get()), 258 ContentType: aws.String(a.contentType.String(i, msg)), 259 ContentEncoding: contentEncoding, 260 CacheControl: cacheControl, 261 ContentDisposition: contentDisposition, 262 ContentLanguage: contentLanguage, 263 WebsiteRedirectLocation: websiteRedirectLocation, 264 StorageClass: aws.String(a.storageClass.String(i, msg)), 265 Metadata: metadata, 266 } 267 268 // Prepare tags, escaping keys and values to ensure they're valid query string parameters. 269 if len(a.tags) > 0 { 270 tags := make([]string, len(a.tags)) 271 for j, pair := range a.tags { 272 tags[j] = url.QueryEscape(pair.key) + "=" + url.QueryEscape(pair.value.String(i, msg)) 273 } 274 uploadInput.Tagging = aws.String(strings.Join(tags, "&")) 275 } 276 277 if a.conf.KMSKeyID != "" { 278 uploadInput.ServerSideEncryption = aws.String("aws:kms") 279 uploadInput.SSEKMSKeyId = &a.conf.KMSKeyID 280 } 281 282 // NOTE: This overrides the ServerSideEncryption set above. We need this to preserve 283 // backwards compatibility, where it is allowed to only set kms_key_id in the config and 284 // the ServerSideEncryption value of "aws:kms" is implied. 285 if a.conf.ServerSideEncryption != "" { 286 uploadInput.ServerSideEncryption = &a.conf.ServerSideEncryption 287 } 288 289 if _, err := a.uploader.UploadWithContext(ctx, uploadInput); err != nil { 290 return err 291 } 292 return nil 293 }) 294 } 295 296 // CloseAsync begins cleaning up resources used by this reader asynchronously. 297 func (a *AmazonS3) CloseAsync() { 298 } 299 300 // WaitForClose will block until either the reader is closed or a specified 301 // timeout occurs. 302 func (a *AmazonS3) WaitForClose(time.Duration) error { 303 return nil 304 } 305 306 //------------------------------------------------------------------------------