github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/process_map.go (about)

     1  package processor
     2  
     3  import (
     4  	"encoding/json"
     5  	"errors"
     6  	"fmt"
     7  	"time"
     8  
     9  	"github.com/Jeffail/benthos/v3/internal/docs"
    10  	"github.com/Jeffail/benthos/v3/internal/interop"
    11  	"github.com/Jeffail/benthos/v3/internal/tracing"
    12  	"github.com/Jeffail/benthos/v3/lib/condition"
    13  	"github.com/Jeffail/benthos/v3/lib/log"
    14  	"github.com/Jeffail/benthos/v3/lib/message/mapper"
    15  	"github.com/Jeffail/benthos/v3/lib/metrics"
    16  	"github.com/Jeffail/benthos/v3/lib/types"
    17  )
    18  
    19  //------------------------------------------------------------------------------
    20  
    21  var processMapFields = docs.FieldSpecs{
    22  	docs.FieldCommon("conditions", "A list of [conditions](/docs/components/conditions/about) to test against messages. If any condition fails then the message will not be mapped and processed.",
    23  		[]interface{}{
    24  			map[string]interface{}{
    25  				"bloblang": "document.urls.length() > 0",
    26  			},
    27  		},
    28  	).Array().HasType(docs.FieldTypeCondition),
    29  	docs.FieldString(
    30  		"premap", "A map of source to destination [paths](/docs/configuration/field_paths) used to create a new object from the original. An empty (or dot `.`) path indicates the root of the object. If a map source is not found then the message will not be processed, for optional sources use the field [`premap_optional`](#premap_optional).",
    31  		map[string]string{
    32  			".": "field.from.document",
    33  		},
    34  		map[string]string{
    35  			"foo":     "root.body.foo",
    36  			"bar.baz": "root.extra.baz",
    37  		},
    38  	).Map(),
    39  	docs.FieldString("premap_optional", "A map of optional source to destination [paths](/docs/configuration/field_paths) used to create a new object from the original.").Map(),
    40  	docs.FieldCommon("processors", "A list of processors to apply to mapped payloads.").Array().HasType(docs.FieldTypeProcessor),
    41  	docs.FieldString(
    42  		"postmap", "A map of destination to source [paths](/docs/configuration/field_paths) used to map results from processing back into the original payload. An empty (or dot `.`) path indicates the root of the object. If a source is not found then the mapping is abandoned, for optional sources use the [`postmap_optional`](#postmap_optional) field.",
    43  		map[string]string{
    44  			"results.foo": ".",
    45  		},
    46  	).Map(),
    47  	docs.FieldString("postmap_optional", "A map of optional destination to source [paths](/docs/configuration/field_paths) used to map results from processing back into the original payload.").Map(),
    48  	PartsFieldSpec,
    49  }
    50  
    51  func init() {
    52  	Constructors[TypeProcessMap] = TypeSpec{
    53  		constructor: func(conf Config, mgr types.Manager, log log.Modular, stats metrics.Type) (Type, error) {
    54  			return NewProcessMap(conf.ProcessMap, mgr, log, stats)
    55  		},
    56  		FieldSpecs: processMapFields,
    57  		Summary: `
    58  A processor that extracts and maps fields identified via
    59  [dot path](/docs/configuration/field_paths) from the original payload into a new
    60  object, applies a list of processors to the newly constructed object, and
    61  finally maps the result back into the original payload.`,
    62  		Status: docs.StatusDeprecated,
    63  		Description: `
    64  ## Alternatives
    65  
    66  All functionality of this processor has been superseded by the
    67  [branch](/docs/components/processors/branch) processor.
    68  
    69  This processor is useful for performing processors on subsections of a payload.
    70  For example, you could extract sections of a JSON object in order to construct
    71  a reduced request object for an ` + "[`http`](/docs/components/processors/http)" + `
    72  processor, then map the result back into a field within the original object.
    73  
    74  The order of stages of this processor are as follows:
    75  
    76  - [Conditions](#conditions) are tested (if specified) against each message,
    77    messages that do not pass will not be processed.
    78  - Messages that are flagged for processing are mapped according to the
    79    [premap](#premap) fields, creating a new object. If the premap stage fails
    80    (targets are not found) the message will not be processed.
    81  - Messages that are mapped are processed as a batch.
    82  - After all child processors are applied to the mapped messages they are mapped
    83    back into the original messages they originated from following the
    84    [postmap](#postmap) fields. If the postmap stage fails the mapping is skipped
    85    and the message payload remains as it started.
    86  
    87  If the premap is empty then the full payload is sent to the processors, if the
    88  postmap is empty then the processed result replaces the original contents
    89  entirely.
    90  
    91  ### Batch Ordering
    92  
    93  This processor supports batched messages, but the list of processors to apply
    94  must NOT change the ordering (or count) of the messages (do not use a
    95  ` + "`group_by`" + ` processor, for example).
    96  
    97  ### Error Handling
    98  
    99  When premap, processing or postmap stages fail the underlying message will
   100  remain unchanged, the errors are logged, and the message is flagged as having
   101  failed, allowing you to use
   102  [standard processor error handling patterns](/docs/configuration/error_handling)
   103  for recovery.`,
   104  		Footnotes: `
   105  ## Examples
   106  
   107  Given a message payload of:
   108  
   109  ` + "```json" + `
   110  {
   111    "doc": {
   112      "id": "foo",
   113      "title": "foo bar baz",
   114      "description": "here's a thing",
   115      "content": "this is a body"
   116    }
   117  }
   118  ` + "```" + `
   119  
   120  We might wish to perform language detection on the ` + "`doc.content`" + ` field
   121  by sending it to a hypothetical HTTP service. We do not wish to overwrite the
   122  original document with the result, and instead want to place it within the path
   123  ` + "`doc.language`" + `, and so this is a good use case for ` + "`process_map`" + `:
   124  
   125  ` + "```yaml" + `
   126  pipeline:
   127    processors:
   128      - process_map:
   129          premap:
   130            content: doc.content
   131          processors:
   132            - http:
   133                url: http://localhost:1234
   134          postmap:
   135            doc.language: .
   136  ` + "```" + `
   137  
   138  With the above config we would send our target HTTP service the payload
   139  ` + "`{\"content\":\"this is a body\"}`" + `, and whatever the service returns
   140  will get mapped into our original document:
   141  
   142  ` + "```json" + `
   143  {
   144    "doc": {
   145      "id": "foo",
   146      "title": "foo bar baz",
   147      "description": "here's a thing",
   148      "content": "this is a body",
   149      "language": {
   150        "code": "en",
   151        "certainty": 0.2
   152      }
   153    }
   154  }
   155  ` + "```" + ``,
   156  	}
   157  }
   158  
   159  //------------------------------------------------------------------------------
   160  
   161  // ProcessMapConfig is a config struct containing fields for the
   162  // ProcessMap processor.
   163  type ProcessMapConfig struct {
   164  	Parts           []int              `json:"parts" yaml:"parts"`
   165  	Conditions      []condition.Config `json:"conditions" yaml:"conditions"`
   166  	Premap          map[string]string  `json:"premap" yaml:"premap"`
   167  	PremapOptional  map[string]string  `json:"premap_optional" yaml:"premap_optional"`
   168  	Postmap         map[string]string  `json:"postmap" yaml:"postmap"`
   169  	PostmapOptional map[string]string  `json:"postmap_optional" yaml:"postmap_optional"`
   170  	Processors      []Config           `json:"processors" yaml:"processors"`
   171  }
   172  
   173  // NewProcessMapConfig returns a default ProcessMapConfig.
   174  func NewProcessMapConfig() ProcessMapConfig {
   175  	return ProcessMapConfig{
   176  		Parts:           []int{},
   177  		Conditions:      []condition.Config{},
   178  		Premap:          map[string]string{},
   179  		PremapOptional:  map[string]string{},
   180  		Postmap:         map[string]string{},
   181  		PostmapOptional: map[string]string{},
   182  		Processors:      []Config{},
   183  	}
   184  }
   185  
   186  // Sanitise the configuration into a minimal structure that can be printed
   187  // without changing the intent.
   188  func (p ProcessMapConfig) Sanitise() (map[string]interface{}, error) {
   189  	var err error
   190  	condConfs := make([]interface{}, len(p.Conditions))
   191  	for i, cConf := range p.Conditions {
   192  		if condConfs[i], err = condition.SanitiseConfig(cConf); err != nil {
   193  			return nil, err
   194  		}
   195  	}
   196  	procConfs := make([]interface{}, len(p.Processors))
   197  	for i, pConf := range p.Processors {
   198  		if procConfs[i], err = SanitiseConfig(pConf); err != nil {
   199  			return nil, err
   200  		}
   201  	}
   202  	return map[string]interface{}{
   203  		"parts":            p.Parts,
   204  		"conditions":       condConfs,
   205  		"premap":           p.Premap,
   206  		"premap_optional":  p.PremapOptional,
   207  		"postmap":          p.Postmap,
   208  		"postmap_optional": p.PostmapOptional,
   209  		"processors":       procConfs,
   210  	}, nil
   211  }
   212  
   213  //------------------------------------------------------------------------------
   214  
   215  // UnmarshalJSON ensures that when parsing configs that are in a slice the
   216  // default values are still applied.
   217  func (p *ProcessMapConfig) UnmarshalJSON(bytes []byte) error {
   218  	type confAlias ProcessMapConfig
   219  	aliased := confAlias(NewProcessMapConfig())
   220  
   221  	if err := json.Unmarshal(bytes, &aliased); err != nil {
   222  		return err
   223  	}
   224  
   225  	*p = ProcessMapConfig(aliased)
   226  	return nil
   227  }
   228  
   229  // UnmarshalYAML ensures that when parsing configs that are in a slice the
   230  // default values are still applied.
   231  func (p *ProcessMapConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
   232  	type confAlias ProcessMapConfig
   233  	aliased := confAlias(NewProcessMapConfig())
   234  
   235  	if err := unmarshal(&aliased); err != nil {
   236  		return err
   237  	}
   238  
   239  	*p = ProcessMapConfig(aliased)
   240  	return nil
   241  }
   242  
   243  //------------------------------------------------------------------------------
   244  
   245  // ProcessMap is a processor that applies a list of child processors to a new
   246  // payload mapped from the original, and after processing attempts to overlay
   247  // the results back onto the original payloads according to more mappings.
   248  type ProcessMap struct {
   249  	parts []int
   250  
   251  	mapper   *mapper.Type
   252  	children []types.Processor
   253  
   254  	log log.Modular
   255  
   256  	mCount     metrics.StatCounter
   257  	mErr       metrics.StatCounter
   258  	mErrPre    metrics.StatCounter
   259  	mErrProc   metrics.StatCounter
   260  	mErrPost   metrics.StatCounter
   261  	mSent      metrics.StatCounter
   262  	mBatchSent metrics.StatCounter
   263  }
   264  
   265  // NewProcessMap returns a ProcessField processor.
   266  func NewProcessMap(
   267  	conf ProcessMapConfig, mgr types.Manager, log log.Modular, stats metrics.Type,
   268  ) (*ProcessMap, error) {
   269  	var children []types.Processor
   270  	for i, pconf := range conf.Processors {
   271  		pMgr, pLog, pStats := interop.LabelChild(fmt.Sprintf("processor.%v", i), mgr, log, stats)
   272  		proc, err := New(pconf, pMgr, pLog, pStats)
   273  		if err != nil {
   274  			return nil, err
   275  		}
   276  		children = append(children, proc)
   277  	}
   278  
   279  	var conditions []types.Condition
   280  	for i, cconf := range conf.Conditions {
   281  		cMgr, cLog, cStats := interop.LabelChild(fmt.Sprintf("condition.%v", i), mgr, log, stats)
   282  		cond, err := condition.New(cconf, cMgr, cLog, cStats)
   283  		if err != nil {
   284  			return nil, err
   285  		}
   286  		conditions = append(conditions, cond)
   287  	}
   288  
   289  	p := &ProcessMap{
   290  		parts: conf.Parts,
   291  
   292  		children: children,
   293  
   294  		log:        log,
   295  		mCount:     stats.GetCounter("count"),
   296  		mErr:       stats.GetCounter("error"),
   297  		mErrPre:    stats.GetCounter("error.premap"),
   298  		mErrProc:   stats.GetCounter("error.processors"),
   299  		mErrPost:   stats.GetCounter("error.postmap"),
   300  		mSent:      stats.GetCounter("sent"),
   301  		mBatchSent: stats.GetCounter("batch.sent"),
   302  	}
   303  
   304  	var err error
   305  	if p.mapper, err = mapper.New(
   306  		mapper.OptSetLogger(log),
   307  		mapper.OptSetStats(stats),
   308  		mapper.OptSetConditions(conditions),
   309  		mapper.OptSetReqMap(conf.Premap),
   310  		mapper.OptSetOptReqMap(conf.PremapOptional),
   311  		mapper.OptSetResMap(conf.Postmap),
   312  		mapper.OptSetOptResMap(conf.PostmapOptional),
   313  	); err != nil {
   314  		return nil, err
   315  	}
   316  
   317  	return p, nil
   318  }
   319  
   320  //------------------------------------------------------------------------------
   321  
   322  // ProcessMessage applies the processor to a message, either creating >0
   323  // resulting messages or a response to be sent back to the message source.
   324  func (p *ProcessMap) ProcessMessage(msg types.Message) ([]types.Message, types.Response) {
   325  	propMsg, propSpans := tracing.WithChildSpans(TypeProcessMap, msg.Copy())
   326  	defer func() {
   327  		for _, s := range propSpans {
   328  			s.Finish()
   329  		}
   330  	}()
   331  
   332  	result := msg.DeepCopy()
   333  	err := p.CreateResult(propMsg)
   334  	if err != nil {
   335  		result.Iter(func(i int, p types.Part) error {
   336  			FlagErr(p, err)
   337  			return nil
   338  		})
   339  		msgs := [1]types.Message{result}
   340  		return msgs[:], nil
   341  	}
   342  
   343  	var failed []int
   344  	if failed, err = p.OverlayResult(result, propMsg); err != nil {
   345  		result.Iter(func(i int, p types.Part) error {
   346  			FlagErr(p, err)
   347  			return nil
   348  		})
   349  		msgs := [1]types.Message{result}
   350  		return msgs[:], nil
   351  	}
   352  	for _, i := range failed {
   353  		FlagErr(result.Get(i), errors.New("failed to overlay result from map processors"))
   354  	}
   355  
   356  	msgs := [1]types.Message{result}
   357  	return msgs[:], nil
   358  }
   359  
   360  // TargetsUsed returns a list of target dependencies of this processor derived
   361  // from its premap and premap_optional fields.
   362  func (p *ProcessMap) TargetsUsed() []string {
   363  	return p.mapper.TargetsUsed()
   364  }
   365  
   366  // TargetsProvided returns a list of targets provided by this processor derived
   367  // from its postmap and postmap_optional fields.
   368  func (p *ProcessMap) TargetsProvided() []string {
   369  	return p.mapper.TargetsProvided()
   370  }
   371  
   372  // CreateResult performs reduction and child processors to a payload. The size
   373  // of the payload will remain unchanged, where reduced indexes are nil. This
   374  // result can be overlayed onto the original message in order to complete the
   375  // map.
   376  func (p *ProcessMap) CreateResult(msg types.Message) error {
   377  	p.mCount.Incr(1)
   378  
   379  	if len(p.parts) > 0 {
   380  		parts := make([]types.Part, msg.Len())
   381  		for _, sel := range p.parts {
   382  			index := sel
   383  			if index < 0 {
   384  				index = msg.Len() + index
   385  			}
   386  			if index < 0 || index >= msg.Len() {
   387  				continue
   388  			}
   389  			parts[index] = msg.Get(index)
   390  		}
   391  		msg.SetAll(parts)
   392  	}
   393  
   394  	originalLen := msg.Len()
   395  
   396  	skipped, failed := p.mapper.MapRequests(msg)
   397  	if msg.Len() == 0 {
   398  		msg.SetAll(make([]types.Part, originalLen))
   399  		errMapFailed := errors.New("mapping failed for this message")
   400  		for _, i := range failed {
   401  			FlagErr(msg.Get(i), errMapFailed)
   402  		}
   403  		return nil
   404  	}
   405  
   406  	procResults, err := processMap(msg, p.children)
   407  	if err != nil {
   408  		p.mErrProc.Incr(1)
   409  		p.mErr.Incr(1)
   410  		p.log.Errorf("Processors failed: %v\n", err)
   411  		return err
   412  	}
   413  
   414  	var alignedResult types.Message
   415  	if alignedResult, err = p.mapper.AlignResult(originalLen, skipped, failed, procResults); err != nil {
   416  		p.mErrPost.Incr(1)
   417  		p.mErr.Incr(1)
   418  		p.log.Errorf("Postmap failed: %v\n", err)
   419  		return err
   420  	}
   421  
   422  	for _, i := range failed {
   423  		FlagFail(alignedResult.Get(i))
   424  	}
   425  
   426  	alignedParts := make([]types.Part, alignedResult.Len())
   427  	for i := range alignedParts {
   428  		alignedParts[i] = alignedResult.Get(i)
   429  	}
   430  	msg.SetAll(alignedParts)
   431  	return nil
   432  }
   433  
   434  // OverlayResult attempts to merge the result of a process_map with the original
   435  //  payload as per the map specified in the postmap and postmap_optional fields.
   436  func (p *ProcessMap) OverlayResult(payload, response types.Message) ([]int, error) {
   437  	failed, err := p.mapper.MapResponses(payload, response)
   438  	if err != nil {
   439  		p.mErrPost.Incr(1)
   440  		p.mErr.Incr(1)
   441  		p.log.Errorf("Postmap failed: %v\n", err)
   442  		return nil, err
   443  	}
   444  
   445  	p.mBatchSent.Incr(1)
   446  	p.mSent.Incr(int64(payload.Len()))
   447  	return failed, nil
   448  }
   449  
   450  func processMap(mappedMsg types.Message, processors []types.Processor) ([]types.Message, error) {
   451  	requestMsgs, res := ExecuteAll(processors, mappedMsg)
   452  	if res != nil && res.Error() != nil {
   453  		return nil, res.Error()
   454  	}
   455  
   456  	if len(requestMsgs) == 0 {
   457  		return nil, errors.New("processors resulted in zero messages")
   458  	}
   459  
   460  	return requestMsgs, nil
   461  }
   462  
   463  // CloseAsync shuts down the processor and stops processing requests.
   464  func (p *ProcessMap) CloseAsync() {
   465  	for _, c := range p.children {
   466  		c.CloseAsync()
   467  	}
   468  }
   469  
   470  // WaitForClose blocks until the processor has closed down.
   471  func (p *ProcessMap) WaitForClose(timeout time.Duration) error {
   472  	stopBy := time.Now().Add(timeout)
   473  	for _, c := range p.children {
   474  		if err := c.WaitForClose(time.Until(stopBy)); err != nil {
   475  			return err
   476  		}
   477  	}
   478  	return nil
   479  }