github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/branch.go

github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/branch.go (about)

     1  package processor
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"sort"
     7  	"time"
     8  
     9  	"github.com/Jeffail/benthos/v3/internal/bloblang/mapping"
    10  	"github.com/Jeffail/benthos/v3/internal/bloblang/query"
    11  	"github.com/Jeffail/benthos/v3/internal/docs"
    12  	"github.com/Jeffail/benthos/v3/internal/interop"
    13  	"github.com/Jeffail/benthos/v3/internal/tracing"
    14  	"github.com/Jeffail/benthos/v3/lib/log"
    15  	"github.com/Jeffail/benthos/v3/lib/message"
    16  	"github.com/Jeffail/benthos/v3/lib/metrics"
    17  	"github.com/Jeffail/benthos/v3/lib/types"
    18  )
    19  
    20  //------------------------------------------------------------------------------
    21  
    22  var branchFields = docs.FieldSpecs{
    23  	docs.FieldBloblang(
    24  		"request_map",
    25  		"A [Bloblang mapping](/docs/guides/bloblang/about) that describes how to create a request payload suitable for the child processors of this branch. If left empty then the branch will begin with an exact copy of the origin message (including metadata).",
    26  		`root = {
    27  	"id": this.doc.id,
    28  	"content": this.doc.body.text
    29  }`,
    30  		`root = if this.type == "foo" {
    31  	this.foo.request
    32  } else {
    33  	deleted()
    34  }`,
    35  	).HasDefault(""),
    36  	docs.FieldCommon(
    37  		"processors",
    38  		"A list of processors to apply to mapped requests. When processing message batches the resulting batch must match the size and ordering of the input batch, therefore filtering, grouping should not be performed within these processors.",
    39  	).Array().HasType(docs.FieldTypeProcessor).HasDefault([]interface{}{}),
    40  	docs.FieldBloblang(
    41  		"result_map",
    42  		"A [Bloblang mapping](/docs/guides/bloblang/about) that describes how the resulting messages from branched processing should be mapped back into the original payload. If left empty the origin message will remain unchanged (including metadata).",
    43  		`meta foo_code = meta("code")
    44  root.foo_result = this`,
    45  		`meta = meta()
    46  root.bar.body = this.body
    47  root.bar.id = this.user.id`,
    48  		`root.raw_result = content().string()`,
    49  		`root.enrichments.foo = if errored() {
    50  	throw(error())
    51  } else {
    52  	this
    53  }`,
    54  	).HasDefault(""),
    55  }
    56  
    57  func init() {
    58  	Constructors[TypeBranch] = TypeSpec{
    59  		Status:      docs.StatusStable,
    60  		constructor: NewBranch,
    61  		Categories: []Category{
    62  			CategoryComposition,
    63  		},
    64  		Summary: `
    65  The ` + "`branch`" + ` processor allows you to create a new request message via
    66  a [Bloblang mapping](/docs/guides/bloblang/about), execute a list of processors
    67  on the request messages, and, finally, map the result back into the source
    68  message using another mapping.`,
    69  		Description: `
    70  This is useful for preserving the original message contents when using
    71  processors that would otherwise replace the entire contents.
    72  
    73  ### Metadata
    74  
    75  Metadata fields that are added to messages during branch processing will not be
    76  automatically copied into the resulting message. In order to do this you should
    77  explicitly declare in your ` + "`result_map`" + ` either a wholesale copy with
    78  ` + "`meta = meta()`" + `, or selective copies with
    79  ` + "`meta foo = meta(\"bar\")`" + ` and so on.
    80  
    81  ### Error Handling
    82  
    83  If the ` + "`request_map`" + ` fails the child processors will not be executed.
    84  If the child processors themselves result in an (uncaught) error then the
    85  ` + "`result_map`" + ` will not be executed. If the ` + "`result_map`" + ` fails
    86  the message will remain unchanged. Under any of these conditions standard
    87  [error handling methods](/docs/configuration/error_handling) can be used in
    88  order to filter, DLQ or recover the failed messages.
    89  
    90  ### Conditional Branching
    91  
    92  If the root of your request map is set to ` + "`deleted()`" + ` then the branch
    93  processors are skipped for the given message, this allows you to conditionally
    94  branch messages.`,
    95  		Examples: []docs.AnnotatedExample{
    96  			{
    97  				Title: "HTTP Request",
    98  				Summary: `
    99  This example strips the request message into an empty body, grabs an HTTP
   100  payload, and places the result back into the original message at the path
   101  ` + "`image.pull_count`" + `:`,
   102  				Config: `
   103  pipeline:
   104    processors:
   105      - branch:
   106          request_map: 'root = ""'
   107          processors:
   108            - http:
   109                url: https://hub.docker.com/v2/repositories/jeffail/benthos
   110                verb: GET
   111          result_map: root.image.pull_count = this.pull_count
   112  
   113  # Example input:  {"id":"foo","some":"pre-existing data"}
   114  # Example output: {"id":"foo","some":"pre-existing data","image":{"pull_count":1234}}
   115  `,
   116  			},
   117  			{
   118  				Title: "Non Structured Results",
   119  				Summary: `
   120  When the result of your branch processors is unstructured and you wish to simply set a resulting field to the raw output use the content function to obtain the raw bytes of the resulting message and then coerce it into your value type of choice:`,
   121  				Config: `
   122  pipeline:
   123    processors:
   124      - branch:
   125          request_map: 'root = this.document.id'
   126          processors:
   127            - cache:
   128                resource: descriptions_cache
   129                key: ${! content() }
   130                operator: get
   131          result_map: root.document.description = content().string()
   132  
   133  # Example input:  {"document":{"id":"foo","content":"hello world"}}
   134  # Example output: {"document":{"id":"foo","content":"hello world","description":"this is a cool doc"}}
   135  `,
   136  			},
   137  			{
   138  				Title: "Lambda Function",
   139  				Summary: `
   140  This example maps a new payload for triggering a lambda function with an ID and
   141  username from the original message, and the result of the lambda is discarded,
   142  meaning the original message is unchanged.`,
   143  				Config: `
   144  pipeline:
   145    processors:
   146      - branch:
   147          request_map: '{"id":this.doc.id,"username":this.user.name}'
   148          processors:
   149            - aws_lambda:
   150                function: trigger_user_update
   151  
   152  # Example input: {"doc":{"id":"foo","body":"hello world"},"user":{"name":"fooey"}}
   153  # Output matches the input, which is unchanged
   154  `,
   155  			},
   156  			{
   157  				Title: "Conditional Caching",
   158  				Summary: `
   159  This example caches a document by a message ID only when the type of the
   160  document is a foo:`,
   161  				Config: `
   162  pipeline:
   163    processors:
   164      - branch:
   165          request_map: |
   166            meta id = this.id
   167            root = if this.type == "foo" {
   168              this.document
   169            } else {
   170              deleted()
   171            }
   172          processors:
   173            - cache:
   174                resource: TODO
   175                operator: set
   176                key: ${! meta("id") }
   177                value: ${! content() }
   178  `,
   179  			},
   180  		},
   181  		FieldSpecs: branchFields,
   182  	}
   183  }
   184  
   185  //------------------------------------------------------------------------------
   186  
   187  // BranchConfig contains configuration fields for the Branch processor.
   188  type BranchConfig struct {
   189  	RequestMap string   `json:"request_map" yaml:"request_map"`
   190  	Processors []Config `json:"processors" yaml:"processors"`
   191  	ResultMap  string   `json:"result_map" yaml:"result_map"`
   192  }
   193  
   194  // NewBranchConfig returns a BranchConfig with default values.
   195  func NewBranchConfig() BranchConfig {
   196  	return BranchConfig{
   197  		RequestMap: "",
   198  		Processors: []Config{},
   199  		ResultMap:  "",
   200  	}
   201  }
   202  
   203  // Sanitise the configuration into a minimal structure that can be printed
   204  // without changing the intent.
   205  func (b BranchConfig) Sanitise() (map[string]interface{}, error) {
   206  	var err error
   207  	procConfs := make([]interface{}, len(b.Processors))
   208  	for i, pConf := range b.Processors {
   209  		if procConfs[i], err = SanitiseConfig(pConf); err != nil {
   210  			return nil, err
   211  		}
   212  	}
   213  	return map[string]interface{}{
   214  		"request_map": b.RequestMap,
   215  		"processors":  procConfs,
   216  		"result_map":  b.ResultMap,
   217  	}, nil
   218  }
   219  
   220  //------------------------------------------------------------------------------
   221  
   222  // Branch contains conditions and maps for transforming a batch of messages into
   223  // a subset of request messages, and mapping results from those requests back
   224  // into the original message batch.
   225  type Branch struct {
   226  	log   log.Modular
   227  	stats metrics.Type
   228  
   229  	requestMap *mapping.Executor
   230  	resultMap  *mapping.Executor
   231  	children   []types.Processor
   232  
   233  	// Metrics
   234  	mCount     metrics.StatCounter
   235  	mErr       metrics.StatCounter
   236  	mErrParts  metrics.StatCounter
   237  	mErrProc   metrics.StatCounter
   238  	mErrAlign  metrics.StatCounter
   239  	mErrReq    metrics.StatCounter
   240  	mErrRes    metrics.StatCounter
   241  	mSent      metrics.StatCounter
   242  	mBatchSent metrics.StatCounter
   243  }
   244  
   245  // NewBranch creates a new branch processor.
   246  func NewBranch(
   247  	conf Config, mgr types.Manager, log log.Modular, stats metrics.Type,
   248  ) (Type, error) {
   249  	return newBranch(conf.Branch, mgr, log, stats)
   250  }
   251  
   252  func newBranch(
   253  	conf BranchConfig, mgr types.Manager, log log.Modular, stats metrics.Type,
   254  ) (*Branch, error) {
   255  	children := make([]types.Processor, 0, len(conf.Processors))
   256  	for i, pconf := range conf.Processors {
   257  		pMgr, pLog, pStats := interop.LabelChild(fmt.Sprintf("processor.%v", i), mgr, log, stats)
   258  		proc, err := New(pconf, pMgr, pLog, pStats)
   259  		if err != nil {
   260  			return nil, fmt.Errorf("failed to init processor %v: %w", i, err)
   261  		}
   262  		children = append(children, proc)
   263  	}
   264  	if len(children) == 0 {
   265  		return nil, errors.New("the branch processor requires at least one child processor")
   266  	}
   267  
   268  	b := &Branch{
   269  		children: children,
   270  		log:      log,
   271  		stats:    stats,
   272  
   273  		mCount:     stats.GetCounter("count"),
   274  		mErr:       stats.GetCounter("error"),
   275  		mErrParts:  stats.GetCounter("error_counts_diverged"),
   276  		mErrProc:   stats.GetCounter("error_processors"),
   277  		mErrAlign:  stats.GetCounter("error_result_alignment"),
   278  		mErrReq:    stats.GetCounter("error_request_map"),
   279  		mErrRes:    stats.GetCounter("error_result_map"),
   280  		mSent:      stats.GetCounter("sent"),
   281  		mBatchSent: stats.GetCounter("batch.sent"),
   282  	}
   283  
   284  	var err error
   285  	if len(conf.RequestMap) > 0 {
   286  		if b.requestMap, err = interop.NewBloblangMapping(mgr, conf.RequestMap); err != nil {
   287  			return nil, fmt.Errorf("failed to parse request mapping: %w", err)
   288  		}
   289  	}
   290  	if len(conf.ResultMap) > 0 {
   291  		if b.resultMap, err = interop.NewBloblangMapping(mgr, conf.ResultMap); err != nil {
   292  			return nil, fmt.Errorf("failed to parse result mapping: %w", err)
   293  		}
   294  	}
   295  
   296  	return b, nil
   297  }
   298  
   299  //------------------------------------------------------------------------------
   300  
   301  // TargetsUsed returns a list of paths that this branch depends on. Each path is
   302  // prefixed by a namespace `metadata` or `path` indicating the source.
   303  func (b *Branch) targetsUsed() [][]string {
   304  	if b.requestMap == nil {
   305  		return nil
   306  	}
   307  
   308  	var paths [][]string
   309  	_, queryTargets := b.requestMap.QueryTargets(query.TargetsContext{})
   310  
   311  pathLoop:
   312  	for _, p := range queryTargets {
   313  		path := make([]string, 0, len(p.Path)+1)
   314  		switch p.Type {
   315  		case query.TargetValue:
   316  			path = append(path, "path")
   317  		case query.TargetMetadata:
   318  			path = append(path, "metadata")
   319  		default:
   320  			continue pathLoop
   321  		}
   322  		paths = append(paths, append(path, p.Path...))
   323  	}
   324  
   325  	return paths
   326  }
   327  
   328  // TargetsProvided returns a list of paths that this branch provides.
   329  func (b *Branch) targetsProvided() [][]string {
   330  	if b.resultMap == nil {
   331  		return nil
   332  	}
   333  
   334  	var paths [][]string
   335  
   336  pathLoop:
   337  	for _, p := range b.resultMap.AssignmentTargets() {
   338  		path := make([]string, 0, len(p.Path)+1)
   339  		switch p.Type {
   340  		case mapping.TargetValue:
   341  			path = append(path, "path")
   342  		case mapping.TargetMetadata:
   343  			path = append(path, "metadata")
   344  		default:
   345  			continue pathLoop
   346  		}
   347  		paths = append(paths, append(path, p.Path...))
   348  	}
   349  
   350  	return paths
   351  }
   352  
   353  //------------------------------------------------------------------------------
   354  
   355  // ProcessMessage applies the processor to a message, either creating >0
   356  // resulting messages or a response to be sent back to the message source.
   357  func (b *Branch) ProcessMessage(msg types.Message) ([]types.Message, types.Response) {
   358  	branchMsg, propSpans := tracing.WithChildSpans(TypeBranch, msg.Copy())
   359  	defer func() {
   360  		for _, s := range propSpans {
   361  			s.Finish()
   362  		}
   363  	}()
   364  
   365  	parts := make([]types.Part, 0, branchMsg.Len())
   366  	branchMsg.Iter(func(i int, p types.Part) error {
   367  		// Remove errors so that they aren't propagated into the branch.
   368  		ClearFail(p)
   369  		parts = append(parts, p)
   370  		return nil
   371  	})
   372  
   373  	resultParts, mapErrs, err := b.createResult(parts, msg)
   374  	if err != nil {
   375  		result := msg.Copy()
   376  		// Add general error to all messages.
   377  		result.Iter(func(i int, p types.Part) error {
   378  			FlagErr(p, err)
   379  			return nil
   380  		})
   381  		// And override with mapping specific errors where appropriate.
   382  		for _, e := range mapErrs {
   383  			FlagErr(result.Get(e.index), e.err)
   384  		}
   385  		msgs := [1]types.Message{result}
   386  		return msgs[:], nil
   387  	}
   388  
   389  	result := msg.DeepCopy()
   390  	for _, e := range mapErrs {
   391  		FlagErr(result.Get(e.index), e.err)
   392  		b.log.Errorf("Branch error: %v", e.err)
   393  	}
   394  
   395  	if mapErrs, err = b.overlayResult(result, resultParts); err != nil {
   396  		result.Iter(func(i int, p types.Part) error {
   397  			FlagErr(p, err)
   398  			return nil
   399  		})
   400  		msgs := [1]types.Message{result}
   401  		return msgs[:], nil
   402  	}
   403  	for _, e := range mapErrs {
   404  		FlagErr(result.Get(e.index), e.err)
   405  		b.log.Errorf("Branch error: %v", e.err)
   406  	}
   407  
   408  	return []types.Message{result}, nil
   409  }
   410  
   411  //------------------------------------------------------------------------------
   412  
   413  type branchMapError struct {
   414  	index int
   415  	err   error
   416  }
   417  
   418  func newBranchMapError(index int, err error) branchMapError {
   419  	return branchMapError{index, err}
   420  }
   421  
   422  //------------------------------------------------------------------------------
   423  
   424  // createResult performs reduction and child processors to a payload. The size
   425  // of the payload will remain unchanged, where reduced indexes are nil. This
   426  // result can be overlayed onto the original message in order to complete the
   427  // map.
   428  func (b *Branch) createResult(parts []types.Part, referenceMsg types.Message) ([]types.Part, []branchMapError, error) {
   429  	b.mCount.Incr(1)
   430  
   431  	originalLen := len(parts)
   432  
   433  	// Create request payloads
   434  	var skipped, failed []int
   435  	var mapErrs []branchMapError
   436  
   437  	newParts := make([]types.Part, 0, len(parts))
   438  	for i := 0; i < len(parts); i++ {
   439  		if parts[i] == nil {
   440  			// Skip if the message part is nil.
   441  			skipped = append(skipped, i)
   442  			continue
   443  		}
   444  		if b.requestMap != nil {
   445  			_ = parts[i].Set(nil)
   446  			newPart, err := b.requestMap.MapOnto(parts[i], i, referenceMsg)
   447  			if err != nil {
   448  				b.mErrReq.Incr(1)
   449  				b.log.Debugf("Failed to map request '%v': %v\n", i, err)
   450  
   451  				// Skip if message part fails mapping.
   452  				failed = append(failed, i)
   453  				mapErrs = append(mapErrs, newBranchMapError(i, fmt.Errorf("request mapping failed: %w", err)))
   454  			} else if newPart == nil {
   455  				// Skip if the message part is deleted.
   456  				skipped = append(skipped, i)
   457  			} else {
   458  				newParts = append(newParts, newPart)
   459  			}
   460  		} else {
   461  			newParts = append(newParts, parts[i])
   462  		}
   463  	}
   464  	parts = newParts
   465  
   466  	// Execute child processors
   467  	var procResults []types.Message
   468  	var err error
   469  	if len(parts) > 0 {
   470  		var res types.Response
   471  		msg := message.New(nil)
   472  		msg.SetAll(parts)
   473  		if procResults, res = ExecuteAll(b.children, msg); res != nil && res.Error() != nil {
   474  			err = fmt.Errorf("child processors failed: %v", res.Error())
   475  		}
   476  		if len(procResults) == 0 {
   477  			err = errors.New("child processors resulted in zero messages")
   478  		}
   479  		if err != nil {
   480  			b.mErrProc.Incr(1)
   481  			b.mErr.Incr(1)
   482  			b.log.Errorf("Child processors failed: %v\n", err)
   483  			return nil, mapErrs, err
   484  		}
   485  	}
   486  
   487  	// Re-align processor results with original message indexes
   488  	var alignedResult []types.Part
   489  	if alignedResult, err = alignBranchResult(originalLen, skipped, failed, procResults); err != nil {
   490  		b.mErrAlign.Incr(1)
   491  		b.mErr.Incr(1)
   492  		b.log.Errorf("Failed to align branch result: %v. Avoid using filters or archive/unarchive processors within your branch, or anything that increases or reduces the number of messages. These processors should instead be applied before or after the branch processor.\n", err)
   493  		return nil, mapErrs, err
   494  	}
   495  
   496  	for i, p := range alignedResult {
   497  		if p == nil {
   498  			continue
   499  		}
   500  		if fail := GetFail(p); len(fail) > 0 {
   501  			alignedResult[i] = nil
   502  			mapErrs = append(mapErrs, newBranchMapError(i, fmt.Errorf("processors failed: %v", fail)))
   503  		}
   504  	}
   505  
   506  	return alignedResult, mapErrs, nil
   507  }
   508  
   509  // overlayResult attempts to merge the result of a process_map with the original
   510  // payload as per the map specified in the postmap and postmap_optional fields.
   511  func (b *Branch) overlayResult(payload types.Message, results []types.Part) ([]branchMapError, error) {
   512  	if exp, act := payload.Len(), len(results); exp != act {
   513  		b.mErr.Incr(1)
   514  		return nil, fmt.Errorf(
   515  			"message count returned from branch has diverged from the request, started with %v messages, finished with %v",
   516  			act, exp,
   517  		)
   518  	}
   519  
   520  	resultMsg := message.New(nil)
   521  	resultMsg.SetAll(results)
   522  
   523  	var failed []branchMapError
   524  
   525  	if b.resultMap != nil {
   526  		parts := make([]types.Part, payload.Len())
   527  		payload.Iter(func(i int, p types.Part) error {
   528  			parts[i] = p
   529  			return nil
   530  		})
   531  
   532  		for i, result := range results {
   533  			if result == nil {
   534  				continue
   535  			}
   536  
   537  			newPart, err := b.resultMap.MapOnto(payload.Get(i), i, resultMsg)
   538  			if err != nil {
   539  				b.mErrRes.Incr(1)
   540  				b.log.Debugf("Failed to map result '%v': %v\n", i, err)
   541  
   542  				failed = append(failed, newBranchMapError(i, fmt.Errorf("result mapping failed: %w", err)))
   543  				continue
   544  			}
   545  
   546  			// TODO: Allow filtering here?
   547  			if newPart != nil {
   548  				parts[i] = newPart
   549  			}
   550  		}
   551  
   552  		payload.SetAll(parts)
   553  	}
   554  
   555  	b.mBatchSent.Incr(1)
   556  	b.mSent.Incr(int64(payload.Len()))
   557  	return failed, nil
   558  }
   559  
   560  func alignBranchResult(length int, skipped, failed []int, result []types.Message) ([]types.Part, error) {
   561  	resMsgParts := []types.Part{}
   562  	for _, m := range result {
   563  		m.Iter(func(i int, p types.Part) error {
   564  			resMsgParts = append(resMsgParts, p)
   565  			return nil
   566  		})
   567  	}
   568  
   569  	skippedOrFailed := make([]int, len(skipped)+len(failed))
   570  	i := copy(skippedOrFailed, skipped)
   571  	copy(skippedOrFailed[i:], failed)
   572  
   573  	sort.Ints(skippedOrFailed)
   574  
   575  	// Check that size of response is aligned with payload.
   576  	if rLen, pLen := len(resMsgParts)+len(skippedOrFailed), length; rLen != pLen {
   577  		return nil, fmt.Errorf(
   578  			"message count from branch processors does not match request, started with %v messages, finished with %v",
   579  			rLen, pLen,
   580  		)
   581  	}
   582  
   583  	var resultParts []types.Part
   584  	if len(skippedOrFailed) == 0 {
   585  		resultParts = resMsgParts
   586  	} else {
   587  		// Remember to insert nil for each skipped part at the correct index.
   588  		resultParts = make([]types.Part, length)
   589  		sIndex := 0
   590  		for i = 0; i < len(resMsgParts); i++ {
   591  			for sIndex < len(skippedOrFailed) && skippedOrFailed[sIndex] == (i+sIndex) {
   592  				sIndex++
   593  			}
   594  			resultParts[i+sIndex] = resMsgParts[i]
   595  		}
   596  	}
   597  
   598  	return resultParts, nil
   599  }
   600  
   601  // CloseAsync shuts down the processor and stops processing requests.
   602  func (b *Branch) CloseAsync() {
   603  	for _, child := range b.children {
   604  		child.CloseAsync()
   605  	}
   606  }
   607  
   608  // WaitForClose blocks until the processor has closed down.
   609  func (b *Branch) WaitForClose(timeout time.Duration) error {
   610  	until := time.Now().Add(timeout)
   611  	for _, child := range b.children {
   612  		if err := child.WaitForClose(time.Until(until)); err != nil {
   613  			return err
   614  		}
   615  	}
   616  	return nil
   617  }
   618  
   619  //------------------------------------------------------------------------------