github.com/Jeffail/benthos/v3@v3.65.0/lib/output/writer/s3.go (about)

     1  package writer
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"fmt"
     7  	"net/url"
     8  	"sort"
     9  	"strings"
    10  	"time"
    11  
    12  	"github.com/Jeffail/benthos/v3/internal/bloblang/field"
    13  	"github.com/Jeffail/benthos/v3/internal/interop"
    14  	"github.com/Jeffail/benthos/v3/internal/metadata"
    15  	"github.com/Jeffail/benthos/v3/lib/log"
    16  	"github.com/Jeffail/benthos/v3/lib/message/batch"
    17  	"github.com/Jeffail/benthos/v3/lib/metrics"
    18  	"github.com/Jeffail/benthos/v3/lib/types"
    19  	sess "github.com/Jeffail/benthos/v3/lib/util/aws/session"
    20  	"github.com/aws/aws-sdk-go/aws"
    21  	"github.com/aws/aws-sdk-go/aws/session"
    22  	"github.com/aws/aws-sdk-go/service/s3/s3manager"
    23  )
    24  
    25  //------------------------------------------------------------------------------
    26  
    27  // AmazonS3Config contains configuration fields for the AmazonS3 output type.
    28  type AmazonS3Config struct {
    29  	sess.Config             `json:",inline" yaml:",inline"`
    30  	Bucket                  string                       `json:"bucket" yaml:"bucket"`
    31  	ForcePathStyleURLs      bool                         `json:"force_path_style_urls" yaml:"force_path_style_urls"`
    32  	Path                    string                       `json:"path" yaml:"path"`
    33  	Tags                    map[string]string            `json:"tags" yaml:"tags"`
    34  	ContentType             string                       `json:"content_type" yaml:"content_type"`
    35  	ContentEncoding         string                       `json:"content_encoding" yaml:"content_encoding"`
    36  	CacheControl            string                       `json:"cache_control" yaml:"cache_control"`
    37  	ContentDisposition      string                       `json:"content_disposition" yaml:"content_disposition"`
    38  	ContentLanguage         string                       `json:"content_language" yaml:"content_language"`
    39  	WebsiteRedirectLocation string                       `json:"website_redirect_location" yaml:"website_redirect_location"`
    40  	Metadata                metadata.ExcludeFilterConfig `json:"metadata" yaml:"metadata"`
    41  	StorageClass            string                       `json:"storage_class" yaml:"storage_class"`
    42  	Timeout                 string                       `json:"timeout" yaml:"timeout"`
    43  	KMSKeyID                string                       `json:"kms_key_id" yaml:"kms_key_id"`
    44  	ServerSideEncryption    string                       `json:"server_side_encryption" yaml:"server_side_encryption"`
    45  	MaxInFlight             int                          `json:"max_in_flight" yaml:"max_in_flight"`
    46  	Batching                batch.PolicyConfig           `json:"batching" yaml:"batching"`
    47  }
    48  
    49  // NewAmazonS3Config creates a new Config with default values.
    50  func NewAmazonS3Config() AmazonS3Config {
    51  	return AmazonS3Config{
    52  		Config:                  sess.NewConfig(),
    53  		Bucket:                  "",
    54  		ForcePathStyleURLs:      false,
    55  		Path:                    `${!count("files")}-${!timestamp_unix_nano()}.txt`,
    56  		Tags:                    map[string]string{},
    57  		ContentType:             "application/octet-stream",
    58  		ContentEncoding:         "",
    59  		CacheControl:            "",
    60  		ContentDisposition:      "",
    61  		ContentLanguage:         "",
    62  		WebsiteRedirectLocation: "",
    63  		Metadata:                metadata.NewExcludeFilterConfig(),
    64  		StorageClass:            "STANDARD",
    65  		Timeout:                 "5s",
    66  		KMSKeyID:                "",
    67  		ServerSideEncryption:    "",
    68  		MaxInFlight:             1,
    69  		Batching:                batch.NewPolicyConfig(),
    70  	}
    71  }
    72  
    73  //------------------------------------------------------------------------------
    74  
    75  type s3TagPair struct {
    76  	key   string
    77  	value *field.Expression
    78  }
    79  
    80  // AmazonS3 is a benthos writer.Type implementation that writes messages to an
    81  // Amazon S3 bucket.
    82  type AmazonS3 struct {
    83  	conf AmazonS3Config
    84  
    85  	path                    *field.Expression
    86  	tags                    []s3TagPair
    87  	contentType             *field.Expression
    88  	contentEncoding         *field.Expression
    89  	cacheControl            *field.Expression
    90  	contentDisposition      *field.Expression
    91  	contentLanguage         *field.Expression
    92  	websiteRedirectLocation *field.Expression
    93  	storageClass            *field.Expression
    94  	metaFilter              *metadata.ExcludeFilter
    95  
    96  	session  *session.Session
    97  	uploader *s3manager.Uploader
    98  	timeout  time.Duration
    99  
   100  	log   log.Modular
   101  	stats metrics.Type
   102  }
   103  
   104  // NewAmazonS3 creates a new Amazon S3 bucket writer.Type.
   105  //
   106  // Deprecated: use the V2 API instead.
   107  func NewAmazonS3(
   108  	conf AmazonS3Config,
   109  	log log.Modular,
   110  	stats metrics.Type,
   111  ) (*AmazonS3, error) {
   112  	return NewAmazonS3V2(conf, types.NoopMgr(), log, stats)
   113  }
   114  
   115  // NewAmazonS3V2 creates a new Amazon S3 bucket writer.Type.
   116  func NewAmazonS3V2(
   117  	conf AmazonS3Config,
   118  	mgr types.Manager,
   119  	log log.Modular,
   120  	stats metrics.Type,
   121  ) (*AmazonS3, error) {
   122  	var timeout time.Duration
   123  	if tout := conf.Timeout; len(tout) > 0 {
   124  		var err error
   125  		if timeout, err = time.ParseDuration(tout); err != nil {
   126  			return nil, fmt.Errorf("failed to parse timeout period string: %v", err)
   127  		}
   128  	}
   129  	a := &AmazonS3{
   130  		conf:    conf,
   131  		log:     log,
   132  		stats:   stats,
   133  		timeout: timeout,
   134  	}
   135  	var err error
   136  	if a.path, err = interop.NewBloblangField(mgr, conf.Path); err != nil {
   137  		return nil, fmt.Errorf("failed to parse path expression: %v", err)
   138  	}
   139  	if a.contentType, err = interop.NewBloblangField(mgr, conf.ContentType); err != nil {
   140  		return nil, fmt.Errorf("failed to parse content type expression: %v", err)
   141  	}
   142  	if a.contentEncoding, err = interop.NewBloblangField(mgr, conf.ContentEncoding); err != nil {
   143  		return nil, fmt.Errorf("failed to parse content encoding expression: %v", err)
   144  	}
   145  	if a.cacheControl, err = interop.NewBloblangField(mgr, conf.CacheControl); err != nil {
   146  		return nil, fmt.Errorf("failed to parse cache control expression: %v", err)
   147  	}
   148  	if a.contentDisposition, err = interop.NewBloblangField(mgr, conf.ContentDisposition); err != nil {
   149  		return nil, fmt.Errorf("failed to parse content disposition expression: %v", err)
   150  	}
   151  	if a.contentLanguage, err = interop.NewBloblangField(mgr, conf.ContentLanguage); err != nil {
   152  		return nil, fmt.Errorf("failed to parse content language expression: %v", err)
   153  	}
   154  	if a.websiteRedirectLocation, err = interop.NewBloblangField(mgr, conf.WebsiteRedirectLocation); err != nil {
   155  		return nil, fmt.Errorf("failed to parse website redirect location expression: %v", err)
   156  	}
   157  
   158  	if a.metaFilter, err = conf.Metadata.Filter(); err != nil {
   159  		return nil, fmt.Errorf("failed to construct metadata filter: %w", err)
   160  	}
   161  	if a.storageClass, err = interop.NewBloblangField(mgr, conf.StorageClass); err != nil {
   162  		return nil, fmt.Errorf("failed to parse storage class expression: %v", err)
   163  	}
   164  
   165  	a.tags = make([]s3TagPair, 0, len(conf.Tags))
   166  	for k, v := range conf.Tags {
   167  		vExpr, err := interop.NewBloblangField(mgr, v)
   168  		if err != nil {
   169  			return nil, fmt.Errorf("failed to parse tag expression for key '%v': %v", k, err)
   170  		}
   171  		a.tags = append(a.tags, s3TagPair{
   172  			key:   k,
   173  			value: vExpr,
   174  		})
   175  	}
   176  	sort.Slice(a.tags, func(i, j int) bool {
   177  		return a.tags[i].key < a.tags[j].key
   178  	})
   179  
   180  	return a, nil
   181  }
   182  
   183  // ConnectWithContext attempts to establish a connection to the target S3
   184  // bucket.
   185  func (a *AmazonS3) ConnectWithContext(ctx context.Context) error {
   186  	return a.Connect()
   187  }
   188  
   189  // Connect attempts to establish a connection to the target S3 bucket.
   190  func (a *AmazonS3) Connect() error {
   191  	if a.session != nil {
   192  		return nil
   193  	}
   194  
   195  	sess, err := a.conf.GetSession(func(c *aws.Config) {
   196  		c.S3ForcePathStyle = aws.Bool(a.conf.ForcePathStyleURLs)
   197  	})
   198  	if err != nil {
   199  		return err
   200  	}
   201  
   202  	a.session = sess
   203  	a.uploader = s3manager.NewUploader(sess)
   204  
   205  	a.log.Infof("Uploading message parts as objects to Amazon S3 bucket: %v\n", a.conf.Bucket)
   206  	return nil
   207  }
   208  
   209  // Write attempts to write message contents to a target S3 bucket as files.
   210  func (a *AmazonS3) Write(msg types.Message) error {
   211  	return a.WriteWithContext(context.Background(), msg)
   212  }
   213  
   214  // WriteWithContext attempts to write message contents to a target S3 bucket as
   215  // files.
   216  func (a *AmazonS3) WriteWithContext(wctx context.Context, msg types.Message) error {
   217  	if a.session == nil {
   218  		return types.ErrNotConnected
   219  	}
   220  
   221  	ctx, cancel := context.WithTimeout(
   222  		wctx, a.timeout,
   223  	)
   224  	defer cancel()
   225  
   226  	return IterateBatchedSend(msg, func(i int, p types.Part) error {
   227  		metadata := map[string]*string{}
   228  		a.metaFilter.Iter(p.Metadata(), func(k, v string) error {
   229  			metadata[k] = aws.String(v)
   230  			return nil
   231  		})
   232  
   233  		var contentEncoding *string
   234  		if ce := a.contentEncoding.String(i, msg); len(ce) > 0 {
   235  			contentEncoding = aws.String(ce)
   236  		}
   237  		var cacheControl *string
   238  		if ce := a.cacheControl.String(i, msg); len(ce) > 0 {
   239  			cacheControl = aws.String(ce)
   240  		}
   241  		var contentDisposition *string
   242  		if ce := a.contentDisposition.String(i, msg); len(ce) > 0 {
   243  			contentDisposition = aws.String(ce)
   244  		}
   245  		var contentLanguage *string
   246  		if ce := a.contentLanguage.String(i, msg); len(ce) > 0 {
   247  			contentLanguage = aws.String(ce)
   248  		}
   249  		var websiteRedirectLocation *string
   250  		if ce := a.websiteRedirectLocation.String(i, msg); len(ce) > 0 {
   251  			websiteRedirectLocation = aws.String(ce)
   252  		}
   253  
   254  		uploadInput := &s3manager.UploadInput{
   255  			Bucket:                  &a.conf.Bucket,
   256  			Key:                     aws.String(a.path.String(i, msg)),
   257  			Body:                    bytes.NewReader(p.Get()),
   258  			ContentType:             aws.String(a.contentType.String(i, msg)),
   259  			ContentEncoding:         contentEncoding,
   260  			CacheControl:            cacheControl,
   261  			ContentDisposition:      contentDisposition,
   262  			ContentLanguage:         contentLanguage,
   263  			WebsiteRedirectLocation: websiteRedirectLocation,
   264  			StorageClass:            aws.String(a.storageClass.String(i, msg)),
   265  			Metadata:                metadata,
   266  		}
   267  
   268  		// Prepare tags, escaping keys and values to ensure they're valid query string parameters.
   269  		if len(a.tags) > 0 {
   270  			tags := make([]string, len(a.tags))
   271  			for j, pair := range a.tags {
   272  				tags[j] = url.QueryEscape(pair.key) + "=" + url.QueryEscape(pair.value.String(i, msg))
   273  			}
   274  			uploadInput.Tagging = aws.String(strings.Join(tags, "&"))
   275  		}
   276  
   277  		if a.conf.KMSKeyID != "" {
   278  			uploadInput.ServerSideEncryption = aws.String("aws:kms")
   279  			uploadInput.SSEKMSKeyId = &a.conf.KMSKeyID
   280  		}
   281  
   282  		// NOTE: This overrides the ServerSideEncryption set above. We need this to preserve
   283  		// backwards compatibility, where it is allowed to only set kms_key_id in the config and
   284  		// the ServerSideEncryption value of "aws:kms" is implied.
   285  		if a.conf.ServerSideEncryption != "" {
   286  			uploadInput.ServerSideEncryption = &a.conf.ServerSideEncryption
   287  		}
   288  
   289  		if _, err := a.uploader.UploadWithContext(ctx, uploadInput); err != nil {
   290  			return err
   291  		}
   292  		return nil
   293  	})
   294  }
   295  
   296  // CloseAsync begins cleaning up resources used by this reader asynchronously.
   297  func (a *AmazonS3) CloseAsync() {
   298  }
   299  
   300  // WaitForClose will block until either the reader is closed or a specified
   301  // timeout occurs.
   302  func (a *AmazonS3) WaitForClose(time.Duration) error {
   303  	return nil
   304  }
   305  
   306  //------------------------------------------------------------------------------