github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/branch.go (about) 1 package processor 2 3 import ( 4 "errors" 5 "fmt" 6 "sort" 7 "time" 8 9 "github.com/Jeffail/benthos/v3/internal/bloblang/mapping" 10 "github.com/Jeffail/benthos/v3/internal/bloblang/query" 11 "github.com/Jeffail/benthos/v3/internal/docs" 12 "github.com/Jeffail/benthos/v3/internal/interop" 13 "github.com/Jeffail/benthos/v3/internal/tracing" 14 "github.com/Jeffail/benthos/v3/lib/log" 15 "github.com/Jeffail/benthos/v3/lib/message" 16 "github.com/Jeffail/benthos/v3/lib/metrics" 17 "github.com/Jeffail/benthos/v3/lib/types" 18 ) 19 20 //------------------------------------------------------------------------------ 21 22 var branchFields = docs.FieldSpecs{ 23 docs.FieldBloblang( 24 "request_map", 25 "A [Bloblang mapping](/docs/guides/bloblang/about) that describes how to create a request payload suitable for the child processors of this branch. If left empty then the branch will begin with an exact copy of the origin message (including metadata).", 26 `root = { 27 "id": this.doc.id, 28 "content": this.doc.body.text 29 }`, 30 `root = if this.type == "foo" { 31 this.foo.request 32 } else { 33 deleted() 34 }`, 35 ).HasDefault(""), 36 docs.FieldCommon( 37 "processors", 38 "A list of processors to apply to mapped requests. When processing message batches the resulting batch must match the size and ordering of the input batch, therefore filtering, grouping should not be performed within these processors.", 39 ).Array().HasType(docs.FieldTypeProcessor).HasDefault([]interface{}{}), 40 docs.FieldBloblang( 41 "result_map", 42 "A [Bloblang mapping](/docs/guides/bloblang/about) that describes how the resulting messages from branched processing should be mapped back into the original payload. If left empty the origin message will remain unchanged (including metadata).", 43 `meta foo_code = meta("code") 44 root.foo_result = this`, 45 `meta = meta() 46 root.bar.body = this.body 47 root.bar.id = this.user.id`, 48 `root.raw_result = content().string()`, 49 `root.enrichments.foo = if errored() { 50 throw(error()) 51 } else { 52 this 53 }`, 54 ).HasDefault(""), 55 } 56 57 func init() { 58 Constructors[TypeBranch] = TypeSpec{ 59 Status: docs.StatusStable, 60 constructor: NewBranch, 61 Categories: []Category{ 62 CategoryComposition, 63 }, 64 Summary: ` 65 The ` + "`branch`" + ` processor allows you to create a new request message via 66 a [Bloblang mapping](/docs/guides/bloblang/about), execute a list of processors 67 on the request messages, and, finally, map the result back into the source 68 message using another mapping.`, 69 Description: ` 70 This is useful for preserving the original message contents when using 71 processors that would otherwise replace the entire contents. 72 73 ### Metadata 74 75 Metadata fields that are added to messages during branch processing will not be 76 automatically copied into the resulting message. In order to do this you should 77 explicitly declare in your ` + "`result_map`" + ` either a wholesale copy with 78 ` + "`meta = meta()`" + `, or selective copies with 79 ` + "`meta foo = meta(\"bar\")`" + ` and so on. 80 81 ### Error Handling 82 83 If the ` + "`request_map`" + ` fails the child processors will not be executed. 84 If the child processors themselves result in an (uncaught) error then the 85 ` + "`result_map`" + ` will not be executed. If the ` + "`result_map`" + ` fails 86 the message will remain unchanged. Under any of these conditions standard 87 [error handling methods](/docs/configuration/error_handling) can be used in 88 order to filter, DLQ or recover the failed messages. 89 90 ### Conditional Branching 91 92 If the root of your request map is set to ` + "`deleted()`" + ` then the branch 93 processors are skipped for the given message, this allows you to conditionally 94 branch messages.`, 95 Examples: []docs.AnnotatedExample{ 96 { 97 Title: "HTTP Request", 98 Summary: ` 99 This example strips the request message into an empty body, grabs an HTTP 100 payload, and places the result back into the original message at the path 101 ` + "`image.pull_count`" + `:`, 102 Config: ` 103 pipeline: 104 processors: 105 - branch: 106 request_map: 'root = ""' 107 processors: 108 - http: 109 url: https://hub.docker.com/v2/repositories/jeffail/benthos 110 verb: GET 111 result_map: root.image.pull_count = this.pull_count 112 113 # Example input: {"id":"foo","some":"pre-existing data"} 114 # Example output: {"id":"foo","some":"pre-existing data","image":{"pull_count":1234}} 115 `, 116 }, 117 { 118 Title: "Non Structured Results", 119 Summary: ` 120 When the result of your branch processors is unstructured and you wish to simply set a resulting field to the raw output use the content function to obtain the raw bytes of the resulting message and then coerce it into your value type of choice:`, 121 Config: ` 122 pipeline: 123 processors: 124 - branch: 125 request_map: 'root = this.document.id' 126 processors: 127 - cache: 128 resource: descriptions_cache 129 key: ${! content() } 130 operator: get 131 result_map: root.document.description = content().string() 132 133 # Example input: {"document":{"id":"foo","content":"hello world"}} 134 # Example output: {"document":{"id":"foo","content":"hello world","description":"this is a cool doc"}} 135 `, 136 }, 137 { 138 Title: "Lambda Function", 139 Summary: ` 140 This example maps a new payload for triggering a lambda function with an ID and 141 username from the original message, and the result of the lambda is discarded, 142 meaning the original message is unchanged.`, 143 Config: ` 144 pipeline: 145 processors: 146 - branch: 147 request_map: '{"id":this.doc.id,"username":this.user.name}' 148 processors: 149 - aws_lambda: 150 function: trigger_user_update 151 152 # Example input: {"doc":{"id":"foo","body":"hello world"},"user":{"name":"fooey"}} 153 # Output matches the input, which is unchanged 154 `, 155 }, 156 { 157 Title: "Conditional Caching", 158 Summary: ` 159 This example caches a document by a message ID only when the type of the 160 document is a foo:`, 161 Config: ` 162 pipeline: 163 processors: 164 - branch: 165 request_map: | 166 meta id = this.id 167 root = if this.type == "foo" { 168 this.document 169 } else { 170 deleted() 171 } 172 processors: 173 - cache: 174 resource: TODO 175 operator: set 176 key: ${! meta("id") } 177 value: ${! content() } 178 `, 179 }, 180 }, 181 FieldSpecs: branchFields, 182 } 183 } 184 185 //------------------------------------------------------------------------------ 186 187 // BranchConfig contains configuration fields for the Branch processor. 188 type BranchConfig struct { 189 RequestMap string `json:"request_map" yaml:"request_map"` 190 Processors []Config `json:"processors" yaml:"processors"` 191 ResultMap string `json:"result_map" yaml:"result_map"` 192 } 193 194 // NewBranchConfig returns a BranchConfig with default values. 195 func NewBranchConfig() BranchConfig { 196 return BranchConfig{ 197 RequestMap: "", 198 Processors: []Config{}, 199 ResultMap: "", 200 } 201 } 202 203 // Sanitise the configuration into a minimal structure that can be printed 204 // without changing the intent. 205 func (b BranchConfig) Sanitise() (map[string]interface{}, error) { 206 var err error 207 procConfs := make([]interface{}, len(b.Processors)) 208 for i, pConf := range b.Processors { 209 if procConfs[i], err = SanitiseConfig(pConf); err != nil { 210 return nil, err 211 } 212 } 213 return map[string]interface{}{ 214 "request_map": b.RequestMap, 215 "processors": procConfs, 216 "result_map": b.ResultMap, 217 }, nil 218 } 219 220 //------------------------------------------------------------------------------ 221 222 // Branch contains conditions and maps for transforming a batch of messages into 223 // a subset of request messages, and mapping results from those requests back 224 // into the original message batch. 225 type Branch struct { 226 log log.Modular 227 stats metrics.Type 228 229 requestMap *mapping.Executor 230 resultMap *mapping.Executor 231 children []types.Processor 232 233 // Metrics 234 mCount metrics.StatCounter 235 mErr metrics.StatCounter 236 mErrParts metrics.StatCounter 237 mErrProc metrics.StatCounter 238 mErrAlign metrics.StatCounter 239 mErrReq metrics.StatCounter 240 mErrRes metrics.StatCounter 241 mSent metrics.StatCounter 242 mBatchSent metrics.StatCounter 243 } 244 245 // NewBranch creates a new branch processor. 246 func NewBranch( 247 conf Config, mgr types.Manager, log log.Modular, stats metrics.Type, 248 ) (Type, error) { 249 return newBranch(conf.Branch, mgr, log, stats) 250 } 251 252 func newBranch( 253 conf BranchConfig, mgr types.Manager, log log.Modular, stats metrics.Type, 254 ) (*Branch, error) { 255 children := make([]types.Processor, 0, len(conf.Processors)) 256 for i, pconf := range conf.Processors { 257 pMgr, pLog, pStats := interop.LabelChild(fmt.Sprintf("processor.%v", i), mgr, log, stats) 258 proc, err := New(pconf, pMgr, pLog, pStats) 259 if err != nil { 260 return nil, fmt.Errorf("failed to init processor %v: %w", i, err) 261 } 262 children = append(children, proc) 263 } 264 if len(children) == 0 { 265 return nil, errors.New("the branch processor requires at least one child processor") 266 } 267 268 b := &Branch{ 269 children: children, 270 log: log, 271 stats: stats, 272 273 mCount: stats.GetCounter("count"), 274 mErr: stats.GetCounter("error"), 275 mErrParts: stats.GetCounter("error_counts_diverged"), 276 mErrProc: stats.GetCounter("error_processors"), 277 mErrAlign: stats.GetCounter("error_result_alignment"), 278 mErrReq: stats.GetCounter("error_request_map"), 279 mErrRes: stats.GetCounter("error_result_map"), 280 mSent: stats.GetCounter("sent"), 281 mBatchSent: stats.GetCounter("batch.sent"), 282 } 283 284 var err error 285 if len(conf.RequestMap) > 0 { 286 if b.requestMap, err = interop.NewBloblangMapping(mgr, conf.RequestMap); err != nil { 287 return nil, fmt.Errorf("failed to parse request mapping: %w", err) 288 } 289 } 290 if len(conf.ResultMap) > 0 { 291 if b.resultMap, err = interop.NewBloblangMapping(mgr, conf.ResultMap); err != nil { 292 return nil, fmt.Errorf("failed to parse result mapping: %w", err) 293 } 294 } 295 296 return b, nil 297 } 298 299 //------------------------------------------------------------------------------ 300 301 // TargetsUsed returns a list of paths that this branch depends on. Each path is 302 // prefixed by a namespace `metadata` or `path` indicating the source. 303 func (b *Branch) targetsUsed() [][]string { 304 if b.requestMap == nil { 305 return nil 306 } 307 308 var paths [][]string 309 _, queryTargets := b.requestMap.QueryTargets(query.TargetsContext{}) 310 311 pathLoop: 312 for _, p := range queryTargets { 313 path := make([]string, 0, len(p.Path)+1) 314 switch p.Type { 315 case query.TargetValue: 316 path = append(path, "path") 317 case query.TargetMetadata: 318 path = append(path, "metadata") 319 default: 320 continue pathLoop 321 } 322 paths = append(paths, append(path, p.Path...)) 323 } 324 325 return paths 326 } 327 328 // TargetsProvided returns a list of paths that this branch provides. 329 func (b *Branch) targetsProvided() [][]string { 330 if b.resultMap == nil { 331 return nil 332 } 333 334 var paths [][]string 335 336 pathLoop: 337 for _, p := range b.resultMap.AssignmentTargets() { 338 path := make([]string, 0, len(p.Path)+1) 339 switch p.Type { 340 case mapping.TargetValue: 341 path = append(path, "path") 342 case mapping.TargetMetadata: 343 path = append(path, "metadata") 344 default: 345 continue pathLoop 346 } 347 paths = append(paths, append(path, p.Path...)) 348 } 349 350 return paths 351 } 352 353 //------------------------------------------------------------------------------ 354 355 // ProcessMessage applies the processor to a message, either creating >0 356 // resulting messages or a response to be sent back to the message source. 357 func (b *Branch) ProcessMessage(msg types.Message) ([]types.Message, types.Response) { 358 branchMsg, propSpans := tracing.WithChildSpans(TypeBranch, msg.Copy()) 359 defer func() { 360 for _, s := range propSpans { 361 s.Finish() 362 } 363 }() 364 365 parts := make([]types.Part, 0, branchMsg.Len()) 366 branchMsg.Iter(func(i int, p types.Part) error { 367 // Remove errors so that they aren't propagated into the branch. 368 ClearFail(p) 369 parts = append(parts, p) 370 return nil 371 }) 372 373 resultParts, mapErrs, err := b.createResult(parts, msg) 374 if err != nil { 375 result := msg.Copy() 376 // Add general error to all messages. 377 result.Iter(func(i int, p types.Part) error { 378 FlagErr(p, err) 379 return nil 380 }) 381 // And override with mapping specific errors where appropriate. 382 for _, e := range mapErrs { 383 FlagErr(result.Get(e.index), e.err) 384 } 385 msgs := [1]types.Message{result} 386 return msgs[:], nil 387 } 388 389 result := msg.DeepCopy() 390 for _, e := range mapErrs { 391 FlagErr(result.Get(e.index), e.err) 392 b.log.Errorf("Branch error: %v", e.err) 393 } 394 395 if mapErrs, err = b.overlayResult(result, resultParts); err != nil { 396 result.Iter(func(i int, p types.Part) error { 397 FlagErr(p, err) 398 return nil 399 }) 400 msgs := [1]types.Message{result} 401 return msgs[:], nil 402 } 403 for _, e := range mapErrs { 404 FlagErr(result.Get(e.index), e.err) 405 b.log.Errorf("Branch error: %v", e.err) 406 } 407 408 return []types.Message{result}, nil 409 } 410 411 //------------------------------------------------------------------------------ 412 413 type branchMapError struct { 414 index int 415 err error 416 } 417 418 func newBranchMapError(index int, err error) branchMapError { 419 return branchMapError{index, err} 420 } 421 422 //------------------------------------------------------------------------------ 423 424 // createResult performs reduction and child processors to a payload. The size 425 // of the payload will remain unchanged, where reduced indexes are nil. This 426 // result can be overlayed onto the original message in order to complete the 427 // map. 428 func (b *Branch) createResult(parts []types.Part, referenceMsg types.Message) ([]types.Part, []branchMapError, error) { 429 b.mCount.Incr(1) 430 431 originalLen := len(parts) 432 433 // Create request payloads 434 var skipped, failed []int 435 var mapErrs []branchMapError 436 437 newParts := make([]types.Part, 0, len(parts)) 438 for i := 0; i < len(parts); i++ { 439 if parts[i] == nil { 440 // Skip if the message part is nil. 441 skipped = append(skipped, i) 442 continue 443 } 444 if b.requestMap != nil { 445 _ = parts[i].Set(nil) 446 newPart, err := b.requestMap.MapOnto(parts[i], i, referenceMsg) 447 if err != nil { 448 b.mErrReq.Incr(1) 449 b.log.Debugf("Failed to map request '%v': %v\n", i, err) 450 451 // Skip if message part fails mapping. 452 failed = append(failed, i) 453 mapErrs = append(mapErrs, newBranchMapError(i, fmt.Errorf("request mapping failed: %w", err))) 454 } else if newPart == nil { 455 // Skip if the message part is deleted. 456 skipped = append(skipped, i) 457 } else { 458 newParts = append(newParts, newPart) 459 } 460 } else { 461 newParts = append(newParts, parts[i]) 462 } 463 } 464 parts = newParts 465 466 // Execute child processors 467 var procResults []types.Message 468 var err error 469 if len(parts) > 0 { 470 var res types.Response 471 msg := message.New(nil) 472 msg.SetAll(parts) 473 if procResults, res = ExecuteAll(b.children, msg); res != nil && res.Error() != nil { 474 err = fmt.Errorf("child processors failed: %v", res.Error()) 475 } 476 if len(procResults) == 0 { 477 err = errors.New("child processors resulted in zero messages") 478 } 479 if err != nil { 480 b.mErrProc.Incr(1) 481 b.mErr.Incr(1) 482 b.log.Errorf("Child processors failed: %v\n", err) 483 return nil, mapErrs, err 484 } 485 } 486 487 // Re-align processor results with original message indexes 488 var alignedResult []types.Part 489 if alignedResult, err = alignBranchResult(originalLen, skipped, failed, procResults); err != nil { 490 b.mErrAlign.Incr(1) 491 b.mErr.Incr(1) 492 b.log.Errorf("Failed to align branch result: %v. Avoid using filters or archive/unarchive processors within your branch, or anything that increases or reduces the number of messages. These processors should instead be applied before or after the branch processor.\n", err) 493 return nil, mapErrs, err 494 } 495 496 for i, p := range alignedResult { 497 if p == nil { 498 continue 499 } 500 if fail := GetFail(p); len(fail) > 0 { 501 alignedResult[i] = nil 502 mapErrs = append(mapErrs, newBranchMapError(i, fmt.Errorf("processors failed: %v", fail))) 503 } 504 } 505 506 return alignedResult, mapErrs, nil 507 } 508 509 // overlayResult attempts to merge the result of a process_map with the original 510 // payload as per the map specified in the postmap and postmap_optional fields. 511 func (b *Branch) overlayResult(payload types.Message, results []types.Part) ([]branchMapError, error) { 512 if exp, act := payload.Len(), len(results); exp != act { 513 b.mErr.Incr(1) 514 return nil, fmt.Errorf( 515 "message count returned from branch has diverged from the request, started with %v messages, finished with %v", 516 act, exp, 517 ) 518 } 519 520 resultMsg := message.New(nil) 521 resultMsg.SetAll(results) 522 523 var failed []branchMapError 524 525 if b.resultMap != nil { 526 parts := make([]types.Part, payload.Len()) 527 payload.Iter(func(i int, p types.Part) error { 528 parts[i] = p 529 return nil 530 }) 531 532 for i, result := range results { 533 if result == nil { 534 continue 535 } 536 537 newPart, err := b.resultMap.MapOnto(payload.Get(i), i, resultMsg) 538 if err != nil { 539 b.mErrRes.Incr(1) 540 b.log.Debugf("Failed to map result '%v': %v\n", i, err) 541 542 failed = append(failed, newBranchMapError(i, fmt.Errorf("result mapping failed: %w", err))) 543 continue 544 } 545 546 // TODO: Allow filtering here? 547 if newPart != nil { 548 parts[i] = newPart 549 } 550 } 551 552 payload.SetAll(parts) 553 } 554 555 b.mBatchSent.Incr(1) 556 b.mSent.Incr(int64(payload.Len())) 557 return failed, nil 558 } 559 560 func alignBranchResult(length int, skipped, failed []int, result []types.Message) ([]types.Part, error) { 561 resMsgParts := []types.Part{} 562 for _, m := range result { 563 m.Iter(func(i int, p types.Part) error { 564 resMsgParts = append(resMsgParts, p) 565 return nil 566 }) 567 } 568 569 skippedOrFailed := make([]int, len(skipped)+len(failed)) 570 i := copy(skippedOrFailed, skipped) 571 copy(skippedOrFailed[i:], failed) 572 573 sort.Ints(skippedOrFailed) 574 575 // Check that size of response is aligned with payload. 576 if rLen, pLen := len(resMsgParts)+len(skippedOrFailed), length; rLen != pLen { 577 return nil, fmt.Errorf( 578 "message count from branch processors does not match request, started with %v messages, finished with %v", 579 rLen, pLen, 580 ) 581 } 582 583 var resultParts []types.Part 584 if len(skippedOrFailed) == 0 { 585 resultParts = resMsgParts 586 } else { 587 // Remember to insert nil for each skipped part at the correct index. 588 resultParts = make([]types.Part, length) 589 sIndex := 0 590 for i = 0; i < len(resMsgParts); i++ { 591 for sIndex < len(skippedOrFailed) && skippedOrFailed[sIndex] == (i+sIndex) { 592 sIndex++ 593 } 594 resultParts[i+sIndex] = resMsgParts[i] 595 } 596 } 597 598 return resultParts, nil 599 } 600 601 // CloseAsync shuts down the processor and stops processing requests. 602 func (b *Branch) CloseAsync() { 603 for _, child := range b.children { 604 child.CloseAsync() 605 } 606 } 607 608 // WaitForClose blocks until the processor has closed down. 609 func (b *Branch) WaitForClose(timeout time.Duration) error { 610 until := time.Now().Add(timeout) 611 for _, child := range b.children { 612 if err := child.WaitForClose(time.Until(until)); err != nil { 613 return err 614 } 615 } 616 return nil 617 } 618 619 //------------------------------------------------------------------------------