github.com/Jeffail/benthos/v3@v3.65.0/lib/output/writer/elasticsearch.go (about) 1 package writer 2 3 import ( 4 "context" 5 "crypto/tls" 6 "fmt" 7 "net/http" 8 "strings" 9 "time" 10 11 "github.com/Jeffail/benthos/v3/internal/bloblang/field" 12 "github.com/Jeffail/benthos/v3/internal/interop" 13 "github.com/Jeffail/benthos/v3/lib/log" 14 "github.com/Jeffail/benthos/v3/lib/message/batch" 15 "github.com/Jeffail/benthos/v3/lib/metrics" 16 "github.com/Jeffail/benthos/v3/lib/types" 17 sess "github.com/Jeffail/benthos/v3/lib/util/aws/session" 18 "github.com/Jeffail/benthos/v3/lib/util/http/auth" 19 "github.com/Jeffail/benthos/v3/lib/util/retries" 20 btls "github.com/Jeffail/benthos/v3/lib/util/tls" 21 "github.com/cenkalti/backoff/v4" 22 "github.com/olivere/elastic/v7" 23 aws "github.com/olivere/elastic/v7/aws/v4" 24 ) 25 26 //------------------------------------------------------------------------------ 27 28 // OptionalAWSConfig contains config fields for AWS authentication with an 29 // enable flag. 30 type OptionalAWSConfig struct { 31 Enabled bool `json:"enabled" yaml:"enabled"` 32 sess.Config `json:",inline" yaml:",inline"` 33 } 34 35 //------------------------------------------------------------------------------ 36 37 // ElasticsearchConfig contains configuration fields for the Elasticsearch 38 // output type. 39 type ElasticsearchConfig struct { 40 URLs []string `json:"urls" yaml:"urls"` 41 Sniff bool `json:"sniff" yaml:"sniff"` 42 Healthcheck bool `json:"healthcheck" yaml:"healthcheck"` 43 ID string `json:"id" yaml:"id"` 44 Action string `json:"action" yaml:"action"` 45 Index string `json:"index" yaml:"index"` 46 Pipeline string `json:"pipeline" yaml:"pipeline"` 47 Routing string `json:"routing" yaml:"routing"` 48 Type string `json:"type" yaml:"type"` 49 Timeout string `json:"timeout" yaml:"timeout"` 50 TLS btls.Config `json:"tls" yaml:"tls"` 51 Auth auth.BasicAuthConfig `json:"basic_auth" yaml:"basic_auth"` 52 AWS OptionalAWSConfig `json:"aws" yaml:"aws"` 53 GzipCompression bool `json:"gzip_compression" yaml:"gzip_compression"` 54 MaxInFlight int `json:"max_in_flight" yaml:"max_in_flight"` 55 retries.Config `json:",inline" yaml:",inline"` 56 Batching batch.PolicyConfig `json:"batching" yaml:"batching"` 57 } 58 59 // NewElasticsearchConfig creates a new ElasticsearchConfig with default values. 60 func NewElasticsearchConfig() ElasticsearchConfig { 61 rConf := retries.NewConfig() 62 rConf.Backoff.InitialInterval = "1s" 63 rConf.Backoff.MaxInterval = "5s" 64 rConf.Backoff.MaxElapsedTime = "30s" 65 66 return ElasticsearchConfig{ 67 URLs: []string{"http://localhost:9200"}, 68 Sniff: true, 69 Healthcheck: true, 70 Action: "index", 71 ID: `${!count("elastic_ids")}-${!timestamp_unix()}`, 72 Index: "benthos_index", 73 Pipeline: "", 74 Type: "doc", 75 Routing: "", 76 Timeout: "5s", 77 TLS: btls.NewConfig(), 78 Auth: auth.NewBasicAuthConfig(), 79 AWS: OptionalAWSConfig{ 80 Enabled: false, 81 Config: sess.NewConfig(), 82 }, 83 GzipCompression: false, 84 MaxInFlight: 1, 85 Config: rConf, 86 Batching: batch.NewPolicyConfig(), 87 } 88 } 89 90 //------------------------------------------------------------------------------ 91 92 // Elasticsearch is a writer type that writes messages into elasticsearch. 93 type Elasticsearch struct { 94 log log.Modular 95 stats metrics.Type 96 97 urls []string 98 sniff bool 99 healthcheck bool 100 conf ElasticsearchConfig 101 102 backoffCtor func() backoff.BackOff 103 timeout time.Duration 104 tlsConf *tls.Config 105 106 actionStr *field.Expression 107 idStr *field.Expression 108 indexStr *field.Expression 109 pipelineStr *field.Expression 110 routingStr *field.Expression 111 112 eJSONErr metrics.StatCounter 113 114 client *elastic.Client 115 } 116 117 // NewElasticsearch creates a new Elasticsearch writer type. 118 // 119 // Deprecated: use the V2 API instead. 120 func NewElasticsearch(conf ElasticsearchConfig, log log.Modular, stats metrics.Type) (*Elasticsearch, error) { 121 return NewElasticsearchV2(conf, types.NoopMgr(), log, stats) 122 } 123 124 // NewElasticsearchV2 creates a new Elasticsearch writer type. 125 func NewElasticsearchV2(conf ElasticsearchConfig, mgr types.Manager, log log.Modular, stats metrics.Type) (*Elasticsearch, error) { 126 e := Elasticsearch{ 127 log: log, 128 stats: stats, 129 conf: conf, 130 sniff: conf.Sniff, 131 healthcheck: conf.Healthcheck, 132 eJSONErr: stats.GetCounter("error.json"), 133 } 134 135 var err error 136 if e.actionStr, err = interop.NewBloblangField(mgr, conf.Action); err != nil { 137 return nil, fmt.Errorf("failed to parse action expression: %v", err) 138 } 139 if e.idStr, err = interop.NewBloblangField(mgr, conf.ID); err != nil { 140 return nil, fmt.Errorf("failed to parse id expression: %v", err) 141 } 142 if e.indexStr, err = interop.NewBloblangField(mgr, conf.Index); err != nil { 143 return nil, fmt.Errorf("failed to parse index expression: %v", err) 144 } 145 if e.pipelineStr, err = interop.NewBloblangField(mgr, conf.Pipeline); err != nil { 146 return nil, fmt.Errorf("failed to parse pipeline expression: %v", err) 147 } 148 if e.routingStr, err = interop.NewBloblangField(mgr, conf.Routing); err != nil { 149 return nil, fmt.Errorf("failed to parse routing key expression: %v", err) 150 } 151 152 for _, u := range conf.URLs { 153 for _, splitURL := range strings.Split(u, ",") { 154 if len(splitURL) > 0 { 155 e.urls = append(e.urls, splitURL) 156 } 157 } 158 } 159 160 if tout := conf.Timeout; len(tout) > 0 { 161 var err error 162 if e.timeout, err = time.ParseDuration(tout); err != nil { 163 return nil, fmt.Errorf("failed to parse timeout string: %v", err) 164 } 165 } 166 167 if e.backoffCtor, err = conf.Config.GetCtor(); err != nil { 168 return nil, err 169 } 170 171 if conf.TLS.Enabled { 172 var err error 173 if e.tlsConf, err = conf.TLS.Get(); err != nil { 174 return nil, err 175 } 176 } 177 return &e, nil 178 } 179 180 //------------------------------------------------------------------------------ 181 182 // ConnectWithContext attempts to establish a connection to a Elasticsearch 183 // broker. 184 func (e *Elasticsearch) ConnectWithContext(ctx context.Context) error { 185 return e.Connect() 186 } 187 188 // Connect attempts to establish a connection to a Elasticsearch broker. 189 func (e *Elasticsearch) Connect() error { 190 if e.client != nil { 191 return nil 192 } 193 194 opts := []elastic.ClientOptionFunc{ 195 elastic.SetURL(e.urls...), 196 elastic.SetSniff(e.sniff), 197 elastic.SetHealthcheck(e.healthcheck), 198 } 199 200 if e.conf.Auth.Enabled { 201 opts = append(opts, elastic.SetBasicAuth( 202 e.conf.Auth.Username, e.conf.Auth.Password, 203 )) 204 } 205 206 if e.conf.TLS.Enabled { 207 opts = append(opts, elastic.SetHttpClient(&http.Client{ 208 Transport: &http.Transport{ 209 TLSClientConfig: e.tlsConf, 210 }, 211 Timeout: e.timeout, 212 })) 213 214 } else { 215 opts = append(opts, elastic.SetHttpClient(&http.Client{ 216 Timeout: e.timeout, 217 })) 218 } 219 220 if e.conf.AWS.Enabled { 221 tsess, err := e.conf.AWS.GetSession() 222 if err != nil { 223 return err 224 } 225 signingClient := aws.NewV4SigningClient(tsess.Config.Credentials, e.conf.AWS.Region) 226 opts = append(opts, elastic.SetHttpClient(signingClient)) 227 } 228 229 if e.conf.GzipCompression { 230 opts = append(opts, elastic.SetGzip(true)) 231 } 232 233 client, err := elastic.NewClient(opts...) 234 if err != nil { 235 return err 236 } 237 238 e.client = client 239 e.log.Infof("Sending messages to Elasticsearch index at urls: %s\n", e.urls) 240 return nil 241 } 242 243 func shouldRetry(s int) bool { 244 if s >= 500 && s <= 599 { 245 return true 246 } 247 return false 248 } 249 250 type pendingBulkIndex struct { 251 Action string 252 Index string 253 Pipeline string 254 Routing string 255 Type string 256 Doc interface{} 257 ID string 258 } 259 260 // WriteWithContext will attempt to write a message to Elasticsearch, wait for 261 // acknowledgement, and returns an error if applicable. 262 func (e *Elasticsearch) WriteWithContext(ctx context.Context, msg types.Message) error { 263 return e.Write(msg) 264 } 265 266 // Write will attempt to write a message to Elasticsearch, wait for 267 // acknowledgement, and returns an error if applicable. 268 func (e *Elasticsearch) Write(msg types.Message) error { 269 if e.client == nil { 270 return types.ErrNotConnected 271 } 272 273 boff := e.backoffCtor() 274 275 requests := make([]*pendingBulkIndex, msg.Len()) 276 if err := msg.Iter(func(i int, part types.Part) error { 277 jObj, ierr := part.JSON() 278 if ierr != nil { 279 e.eJSONErr.Incr(1) 280 e.log.Errorf("Failed to marshal message into JSON document: %v\n", ierr) 281 return fmt.Errorf("failed to marshal message into JSON document: %w", ierr) 282 } 283 requests[i] = &pendingBulkIndex{ 284 Action: e.actionStr.String(i, msg), 285 Index: e.indexStr.String(i, msg), 286 Pipeline: e.pipelineStr.String(i, msg), 287 Routing: e.routingStr.String(i, msg), 288 Type: e.conf.Type, 289 Doc: jObj, 290 ID: e.idStr.String(i, msg), 291 } 292 return nil 293 }); err != nil { 294 return err 295 } 296 297 b := e.client.Bulk() 298 for _, v := range requests { 299 bulkReq, err := e.buildBulkableRequest(v) 300 if err != nil { 301 return err 302 } 303 b.Add(bulkReq) 304 } 305 306 lastErrReason := "no reason given" 307 for b.NumberOfActions() != 0 { 308 result, err := b.Do(context.Background()) 309 if err != nil { 310 return err 311 } 312 if !result.Errors { 313 return nil 314 } 315 316 var newRequests []*pendingBulkIndex 317 for i, resp := range result.Items { 318 for _, item := range resp { 319 if item.Status >= 200 && item.Status <= 299 { 320 continue 321 } 322 323 reason := "no reason given" 324 if item.Error != nil { 325 reason = item.Error.Reason 326 lastErrReason = fmt.Sprintf("status [%v]: %v", item.Status, reason) 327 } 328 329 e.log.Errorf("Elasticsearch message '%v' rejected with status [%v]: %v\n", item.Id, item.Status, reason) 330 if !shouldRetry(item.Status) { 331 return fmt.Errorf("failed to send message '%v': %v", item.Id, reason) 332 } 333 334 // IMPORTANT: i exactly matches the index of our source requests 335 // and when we re-run our bulk request with errored requests 336 // that must remain true. 337 sourceReq := requests[i] 338 bulkReq, err := e.buildBulkableRequest(sourceReq) 339 if err != nil { 340 return err 341 } 342 b.Add(bulkReq) 343 newRequests = append(newRequests, sourceReq) 344 } 345 } 346 requests = newRequests 347 348 wait := boff.NextBackOff() 349 if wait == backoff.Stop { 350 return fmt.Errorf("retries exhausted for messages, aborting with last error reported as: %v", lastErrReason) 351 } 352 time.Sleep(wait) 353 } 354 355 return nil 356 } 357 358 // CloseAsync shuts down the Elasticsearch writer and stops processing messages. 359 func (e *Elasticsearch) CloseAsync() { 360 } 361 362 // WaitForClose blocks until the Elasticsearch writer has closed down. 363 func (e *Elasticsearch) WaitForClose(timeout time.Duration) error { 364 return nil 365 } 366 367 // Build a bulkable request for a given pending bulk index item. 368 func (e *Elasticsearch) buildBulkableRequest(p *pendingBulkIndex) (elastic.BulkableRequest, error) { 369 // TODO: V4 the type field should be optional and not used 370 switch p.Action { 371 case "update": 372 return elastic.NewBulkUpdateRequest(). 373 Index(p.Index). 374 Routing(p.Routing). 375 Type(p.Type). 376 Id(p.ID). 377 Doc(p.Doc), nil 378 case "delete": 379 return elastic.NewBulkDeleteRequest(). 380 Index(p.Index). 381 Routing(p.Routing). 382 Id(p.ID). 383 Type(p.Type), nil 384 case "index": 385 return elastic.NewBulkIndexRequest(). 386 Index(p.Index). 387 Pipeline(p.Pipeline). 388 Routing(p.Routing). 389 Type(p.Type). 390 Id(p.ID). 391 Doc(p.Doc), nil 392 default: 393 return nil, fmt.Errorf("elasticsearch action '%s' is not allowed", p.Action) 394 } 395 } 396 397 //------------------------------------------------------------------------------