github.com/Jeffail/benthos/v3@v3.65.0/lib/output/writer/elasticsearch.go (about)

     1  package writer
     2  
     3  import (
     4  	"context"
     5  	"crypto/tls"
     6  	"fmt"
     7  	"net/http"
     8  	"strings"
     9  	"time"
    10  
    11  	"github.com/Jeffail/benthos/v3/internal/bloblang/field"
    12  	"github.com/Jeffail/benthos/v3/internal/interop"
    13  	"github.com/Jeffail/benthos/v3/lib/log"
    14  	"github.com/Jeffail/benthos/v3/lib/message/batch"
    15  	"github.com/Jeffail/benthos/v3/lib/metrics"
    16  	"github.com/Jeffail/benthos/v3/lib/types"
    17  	sess "github.com/Jeffail/benthos/v3/lib/util/aws/session"
    18  	"github.com/Jeffail/benthos/v3/lib/util/http/auth"
    19  	"github.com/Jeffail/benthos/v3/lib/util/retries"
    20  	btls "github.com/Jeffail/benthos/v3/lib/util/tls"
    21  	"github.com/cenkalti/backoff/v4"
    22  	"github.com/olivere/elastic/v7"
    23  	aws "github.com/olivere/elastic/v7/aws/v4"
    24  )
    25  
    26  //------------------------------------------------------------------------------
    27  
    28  // OptionalAWSConfig contains config fields for AWS authentication with an
    29  // enable flag.
    30  type OptionalAWSConfig struct {
    31  	Enabled     bool `json:"enabled" yaml:"enabled"`
    32  	sess.Config `json:",inline" yaml:",inline"`
    33  }
    34  
    35  //------------------------------------------------------------------------------
    36  
    37  // ElasticsearchConfig contains configuration fields for the Elasticsearch
    38  // output type.
    39  type ElasticsearchConfig struct {
    40  	URLs            []string             `json:"urls" yaml:"urls"`
    41  	Sniff           bool                 `json:"sniff" yaml:"sniff"`
    42  	Healthcheck     bool                 `json:"healthcheck" yaml:"healthcheck"`
    43  	ID              string               `json:"id" yaml:"id"`
    44  	Action          string               `json:"action" yaml:"action"`
    45  	Index           string               `json:"index" yaml:"index"`
    46  	Pipeline        string               `json:"pipeline" yaml:"pipeline"`
    47  	Routing         string               `json:"routing" yaml:"routing"`
    48  	Type            string               `json:"type" yaml:"type"`
    49  	Timeout         string               `json:"timeout" yaml:"timeout"`
    50  	TLS             btls.Config          `json:"tls" yaml:"tls"`
    51  	Auth            auth.BasicAuthConfig `json:"basic_auth" yaml:"basic_auth"`
    52  	AWS             OptionalAWSConfig    `json:"aws" yaml:"aws"`
    53  	GzipCompression bool                 `json:"gzip_compression" yaml:"gzip_compression"`
    54  	MaxInFlight     int                  `json:"max_in_flight" yaml:"max_in_flight"`
    55  	retries.Config  `json:",inline" yaml:",inline"`
    56  	Batching        batch.PolicyConfig `json:"batching" yaml:"batching"`
    57  }
    58  
    59  // NewElasticsearchConfig creates a new ElasticsearchConfig with default values.
    60  func NewElasticsearchConfig() ElasticsearchConfig {
    61  	rConf := retries.NewConfig()
    62  	rConf.Backoff.InitialInterval = "1s"
    63  	rConf.Backoff.MaxInterval = "5s"
    64  	rConf.Backoff.MaxElapsedTime = "30s"
    65  
    66  	return ElasticsearchConfig{
    67  		URLs:        []string{"http://localhost:9200"},
    68  		Sniff:       true,
    69  		Healthcheck: true,
    70  		Action:      "index",
    71  		ID:          `${!count("elastic_ids")}-${!timestamp_unix()}`,
    72  		Index:       "benthos_index",
    73  		Pipeline:    "",
    74  		Type:        "doc",
    75  		Routing:     "",
    76  		Timeout:     "5s",
    77  		TLS:         btls.NewConfig(),
    78  		Auth:        auth.NewBasicAuthConfig(),
    79  		AWS: OptionalAWSConfig{
    80  			Enabled: false,
    81  			Config:  sess.NewConfig(),
    82  		},
    83  		GzipCompression: false,
    84  		MaxInFlight:     1,
    85  		Config:          rConf,
    86  		Batching:        batch.NewPolicyConfig(),
    87  	}
    88  }
    89  
    90  //------------------------------------------------------------------------------
    91  
    92  // Elasticsearch is a writer type that writes messages into elasticsearch.
    93  type Elasticsearch struct {
    94  	log   log.Modular
    95  	stats metrics.Type
    96  
    97  	urls        []string
    98  	sniff       bool
    99  	healthcheck bool
   100  	conf        ElasticsearchConfig
   101  
   102  	backoffCtor func() backoff.BackOff
   103  	timeout     time.Duration
   104  	tlsConf     *tls.Config
   105  
   106  	actionStr   *field.Expression
   107  	idStr       *field.Expression
   108  	indexStr    *field.Expression
   109  	pipelineStr *field.Expression
   110  	routingStr  *field.Expression
   111  
   112  	eJSONErr metrics.StatCounter
   113  
   114  	client *elastic.Client
   115  }
   116  
   117  // NewElasticsearch creates a new Elasticsearch writer type.
   118  //
   119  // Deprecated: use the V2 API instead.
   120  func NewElasticsearch(conf ElasticsearchConfig, log log.Modular, stats metrics.Type) (*Elasticsearch, error) {
   121  	return NewElasticsearchV2(conf, types.NoopMgr(), log, stats)
   122  }
   123  
   124  // NewElasticsearchV2 creates a new Elasticsearch writer type.
   125  func NewElasticsearchV2(conf ElasticsearchConfig, mgr types.Manager, log log.Modular, stats metrics.Type) (*Elasticsearch, error) {
   126  	e := Elasticsearch{
   127  		log:         log,
   128  		stats:       stats,
   129  		conf:        conf,
   130  		sniff:       conf.Sniff,
   131  		healthcheck: conf.Healthcheck,
   132  		eJSONErr:    stats.GetCounter("error.json"),
   133  	}
   134  
   135  	var err error
   136  	if e.actionStr, err = interop.NewBloblangField(mgr, conf.Action); err != nil {
   137  		return nil, fmt.Errorf("failed to parse action expression: %v", err)
   138  	}
   139  	if e.idStr, err = interop.NewBloblangField(mgr, conf.ID); err != nil {
   140  		return nil, fmt.Errorf("failed to parse id expression: %v", err)
   141  	}
   142  	if e.indexStr, err = interop.NewBloblangField(mgr, conf.Index); err != nil {
   143  		return nil, fmt.Errorf("failed to parse index expression: %v", err)
   144  	}
   145  	if e.pipelineStr, err = interop.NewBloblangField(mgr, conf.Pipeline); err != nil {
   146  		return nil, fmt.Errorf("failed to parse pipeline expression: %v", err)
   147  	}
   148  	if e.routingStr, err = interop.NewBloblangField(mgr, conf.Routing); err != nil {
   149  		return nil, fmt.Errorf("failed to parse routing key expression: %v", err)
   150  	}
   151  
   152  	for _, u := range conf.URLs {
   153  		for _, splitURL := range strings.Split(u, ",") {
   154  			if len(splitURL) > 0 {
   155  				e.urls = append(e.urls, splitURL)
   156  			}
   157  		}
   158  	}
   159  
   160  	if tout := conf.Timeout; len(tout) > 0 {
   161  		var err error
   162  		if e.timeout, err = time.ParseDuration(tout); err != nil {
   163  			return nil, fmt.Errorf("failed to parse timeout string: %v", err)
   164  		}
   165  	}
   166  
   167  	if e.backoffCtor, err = conf.Config.GetCtor(); err != nil {
   168  		return nil, err
   169  	}
   170  
   171  	if conf.TLS.Enabled {
   172  		var err error
   173  		if e.tlsConf, err = conf.TLS.Get(); err != nil {
   174  			return nil, err
   175  		}
   176  	}
   177  	return &e, nil
   178  }
   179  
   180  //------------------------------------------------------------------------------
   181  
   182  // ConnectWithContext attempts to establish a connection to a Elasticsearch
   183  // broker.
   184  func (e *Elasticsearch) ConnectWithContext(ctx context.Context) error {
   185  	return e.Connect()
   186  }
   187  
   188  // Connect attempts to establish a connection to a Elasticsearch broker.
   189  func (e *Elasticsearch) Connect() error {
   190  	if e.client != nil {
   191  		return nil
   192  	}
   193  
   194  	opts := []elastic.ClientOptionFunc{
   195  		elastic.SetURL(e.urls...),
   196  		elastic.SetSniff(e.sniff),
   197  		elastic.SetHealthcheck(e.healthcheck),
   198  	}
   199  
   200  	if e.conf.Auth.Enabled {
   201  		opts = append(opts, elastic.SetBasicAuth(
   202  			e.conf.Auth.Username, e.conf.Auth.Password,
   203  		))
   204  	}
   205  
   206  	if e.conf.TLS.Enabled {
   207  		opts = append(opts, elastic.SetHttpClient(&http.Client{
   208  			Transport: &http.Transport{
   209  				TLSClientConfig: e.tlsConf,
   210  			},
   211  			Timeout: e.timeout,
   212  		}))
   213  
   214  	} else {
   215  		opts = append(opts, elastic.SetHttpClient(&http.Client{
   216  			Timeout: e.timeout,
   217  		}))
   218  	}
   219  
   220  	if e.conf.AWS.Enabled {
   221  		tsess, err := e.conf.AWS.GetSession()
   222  		if err != nil {
   223  			return err
   224  		}
   225  		signingClient := aws.NewV4SigningClient(tsess.Config.Credentials, e.conf.AWS.Region)
   226  		opts = append(opts, elastic.SetHttpClient(signingClient))
   227  	}
   228  
   229  	if e.conf.GzipCompression {
   230  		opts = append(opts, elastic.SetGzip(true))
   231  	}
   232  
   233  	client, err := elastic.NewClient(opts...)
   234  	if err != nil {
   235  		return err
   236  	}
   237  
   238  	e.client = client
   239  	e.log.Infof("Sending messages to Elasticsearch index at urls: %s\n", e.urls)
   240  	return nil
   241  }
   242  
   243  func shouldRetry(s int) bool {
   244  	if s >= 500 && s <= 599 {
   245  		return true
   246  	}
   247  	return false
   248  }
   249  
   250  type pendingBulkIndex struct {
   251  	Action   string
   252  	Index    string
   253  	Pipeline string
   254  	Routing  string
   255  	Type     string
   256  	Doc      interface{}
   257  	ID       string
   258  }
   259  
   260  // WriteWithContext will attempt to write a message to Elasticsearch, wait for
   261  // acknowledgement, and returns an error if applicable.
   262  func (e *Elasticsearch) WriteWithContext(ctx context.Context, msg types.Message) error {
   263  	return e.Write(msg)
   264  }
   265  
   266  // Write will attempt to write a message to Elasticsearch, wait for
   267  // acknowledgement, and returns an error if applicable.
   268  func (e *Elasticsearch) Write(msg types.Message) error {
   269  	if e.client == nil {
   270  		return types.ErrNotConnected
   271  	}
   272  
   273  	boff := e.backoffCtor()
   274  
   275  	requests := make([]*pendingBulkIndex, msg.Len())
   276  	if err := msg.Iter(func(i int, part types.Part) error {
   277  		jObj, ierr := part.JSON()
   278  		if ierr != nil {
   279  			e.eJSONErr.Incr(1)
   280  			e.log.Errorf("Failed to marshal message into JSON document: %v\n", ierr)
   281  			return fmt.Errorf("failed to marshal message into JSON document: %w", ierr)
   282  		}
   283  		requests[i] = &pendingBulkIndex{
   284  			Action:   e.actionStr.String(i, msg),
   285  			Index:    e.indexStr.String(i, msg),
   286  			Pipeline: e.pipelineStr.String(i, msg),
   287  			Routing:  e.routingStr.String(i, msg),
   288  			Type:     e.conf.Type,
   289  			Doc:      jObj,
   290  			ID:       e.idStr.String(i, msg),
   291  		}
   292  		return nil
   293  	}); err != nil {
   294  		return err
   295  	}
   296  
   297  	b := e.client.Bulk()
   298  	for _, v := range requests {
   299  		bulkReq, err := e.buildBulkableRequest(v)
   300  		if err != nil {
   301  			return err
   302  		}
   303  		b.Add(bulkReq)
   304  	}
   305  
   306  	lastErrReason := "no reason given"
   307  	for b.NumberOfActions() != 0 {
   308  		result, err := b.Do(context.Background())
   309  		if err != nil {
   310  			return err
   311  		}
   312  		if !result.Errors {
   313  			return nil
   314  		}
   315  
   316  		var newRequests []*pendingBulkIndex
   317  		for i, resp := range result.Items {
   318  			for _, item := range resp {
   319  				if item.Status >= 200 && item.Status <= 299 {
   320  					continue
   321  				}
   322  
   323  				reason := "no reason given"
   324  				if item.Error != nil {
   325  					reason = item.Error.Reason
   326  					lastErrReason = fmt.Sprintf("status [%v]: %v", item.Status, reason)
   327  				}
   328  
   329  				e.log.Errorf("Elasticsearch message '%v' rejected with status [%v]: %v\n", item.Id, item.Status, reason)
   330  				if !shouldRetry(item.Status) {
   331  					return fmt.Errorf("failed to send message '%v': %v", item.Id, reason)
   332  				}
   333  
   334  				// IMPORTANT: i exactly matches the index of our source requests
   335  				// and when we re-run our bulk request with errored requests
   336  				// that must remain true.
   337  				sourceReq := requests[i]
   338  				bulkReq, err := e.buildBulkableRequest(sourceReq)
   339  				if err != nil {
   340  					return err
   341  				}
   342  				b.Add(bulkReq)
   343  				newRequests = append(newRequests, sourceReq)
   344  			}
   345  		}
   346  		requests = newRequests
   347  
   348  		wait := boff.NextBackOff()
   349  		if wait == backoff.Stop {
   350  			return fmt.Errorf("retries exhausted for messages, aborting with last error reported as: %v", lastErrReason)
   351  		}
   352  		time.Sleep(wait)
   353  	}
   354  
   355  	return nil
   356  }
   357  
   358  // CloseAsync shuts down the Elasticsearch writer and stops processing messages.
   359  func (e *Elasticsearch) CloseAsync() {
   360  }
   361  
   362  // WaitForClose blocks until the Elasticsearch writer has closed down.
   363  func (e *Elasticsearch) WaitForClose(timeout time.Duration) error {
   364  	return nil
   365  }
   366  
   367  // Build a bulkable request for a given pending bulk index item.
   368  func (e *Elasticsearch) buildBulkableRequest(p *pendingBulkIndex) (elastic.BulkableRequest, error) {
   369  	// TODO: V4 the type field should be optional and not used
   370  	switch p.Action {
   371  	case "update":
   372  		return elastic.NewBulkUpdateRequest().
   373  			Index(p.Index).
   374  			Routing(p.Routing).
   375  			Type(p.Type).
   376  			Id(p.ID).
   377  			Doc(p.Doc), nil
   378  	case "delete":
   379  		return elastic.NewBulkDeleteRequest().
   380  			Index(p.Index).
   381  			Routing(p.Routing).
   382  			Id(p.ID).
   383  			Type(p.Type), nil
   384  	case "index":
   385  		return elastic.NewBulkIndexRequest().
   386  			Index(p.Index).
   387  			Pipeline(p.Pipeline).
   388  			Routing(p.Routing).
   389  			Type(p.Type).
   390  			Id(p.ID).
   391  			Doc(p.Doc), nil
   392  	default:
   393  		return nil, fmt.Errorf("elasticsearch action '%s' is not allowed", p.Action)
   394  	}
   395  }
   396  
   397  //------------------------------------------------------------------------------