github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/process_field.go (about)

     1  package processor
     2  
     3  import (
     4  	"fmt"
     5  	"strconv"
     6  	"strings"
     7  	"time"
     8  
     9  	"github.com/Jeffail/benthos/v3/internal/docs"
    10  	"github.com/Jeffail/benthos/v3/internal/interop"
    11  	"github.com/Jeffail/benthos/v3/internal/tracing"
    12  	"github.com/Jeffail/benthos/v3/lib/log"
    13  	"github.com/Jeffail/benthos/v3/lib/message"
    14  	"github.com/Jeffail/benthos/v3/lib/metrics"
    15  	"github.com/Jeffail/benthos/v3/lib/types"
    16  	"github.com/Jeffail/gabs/v2"
    17  )
    18  
    19  //------------------------------------------------------------------------------
    20  
    21  func init() {
    22  	Constructors[TypeProcessField] = TypeSpec{
    23  		constructor: NewProcessField,
    24  		Summary: `
    25  A processor that extracts the value of a field [dot path](/docs/configuration/field_paths)
    26  within payloads according to a specified [codec](#codec), applies a list of
    27  processors to the extracted value and finally sets the field within the original
    28  payloads to the processed result.`,
    29  		Status: docs.StatusDeprecated,
    30  		FieldSpecs: docs.FieldSpecs{
    31  			docs.FieldCommon("codec", "A [codec](#codec) to use in order to extract (and set) the target field.").HasOptions("json", "metadata"),
    32  			docs.FieldCommon("path", "A [dot path](/docs/configuration/field_paths) pointing to the target field."),
    33  			docs.FieldCommon(
    34  				"result_type", "The final data type to marshal the processing result into. The `discard` type is a special case that discards the result of the processing steps entirely.",
    35  			).HasOptions("string", "int", "float", "bool", "object", "discard"),
    36  			docs.FieldCommon("processors", "A list of child processors to execute on the extracted value.").Array().HasType(docs.FieldTypeProcessor),
    37  			PartsFieldSpec,
    38  		},
    39  		Description: `
    40  The result can be marshalled into a specific data type with the field
    41  [` + "`result_type`" + `](#result_type).
    42  
    43  It's therefore possible to use this codec without any child processors as a way
    44  of casting string values into other types. For example, with an input JSON
    45  document ` + "`{\"foo\":\"10\"}`" + ` it's possible to cast the value of the
    46  field foo to an integer type with:
    47  
    48  ` + "```yaml" + `
    49  process_field:
    50    path: foo
    51    result_type: int
    52  ` + "```" + `
    53  
    54  ## Codecs
    55  
    56  ### ` + "`json`" + `
    57  
    58  Parses the payload as a JSON document, extracts and sets the field using a dot
    59  notation path.
    60  
    61  ### ` + "`metadata`" + `
    62  
    63  Extracts and sets a metadata value identified by the path field.`,
    64  		Footnotes: `
    65  ## Alternatives
    66  
    67  The ` + "[`branch` processor](/docs/components/processors/branch)" + ` offers a
    68  more flexible and robust way to perform the actions of this processor.`,
    69  	}
    70  }
    71  
    72  //------------------------------------------------------------------------------
    73  
    74  // ProcessFieldConfig is a config struct containing fields for the ProcessField
    75  // processor.
    76  type ProcessFieldConfig struct {
    77  	Parts      []int    `json:"parts" yaml:"parts"`
    78  	Codec      string   `json:"codec" yaml:"codec"`
    79  	Path       string   `json:"path" yaml:"path"`
    80  	ResultType string   `json:"result_type" yaml:"result_type"`
    81  	Processors []Config `json:"processors" yaml:"processors"`
    82  }
    83  
    84  // NewProcessFieldConfig returns a default ProcessFieldConfig.
    85  func NewProcessFieldConfig() ProcessFieldConfig {
    86  	return ProcessFieldConfig{
    87  		Parts:      []int{},
    88  		Codec:      "json",
    89  		Path:       "",
    90  		ResultType: "string",
    91  		Processors: []Config{},
    92  	}
    93  }
    94  
    95  //------------------------------------------------------------------------------
    96  
    97  type processFieldCodec interface {
    98  	CreateRequest(types.Part) (types.Part, error)
    99  	ExtractResult(from, to types.Part) error
   100  	Discard() bool
   101  }
   102  
   103  // ProcessField is a processor that applies a list of child processors to a
   104  // field extracted from the original payload.
   105  type ProcessField struct {
   106  	parts    []int
   107  	path     []string
   108  	children []types.Processor
   109  
   110  	codec processFieldCodec
   111  
   112  	log log.Modular
   113  
   114  	mCount              metrics.StatCounter
   115  	mErr                metrics.StatCounter
   116  	mErrParse           metrics.StatCounter
   117  	mErrMisaligned      metrics.StatCounter
   118  	mErrMisalignedBatch metrics.StatCounter
   119  	mSent               metrics.StatCounter
   120  	mBatchSent          metrics.StatCounter
   121  }
   122  
   123  // NewProcessField returns a ProcessField processor.
   124  func NewProcessField(
   125  	conf Config, mgr types.Manager, log log.Modular, stats metrics.Type,
   126  ) (Type, error) {
   127  	var children []types.Processor
   128  	for i, pconf := range conf.ProcessField.Processors {
   129  		pMgr, pLog, pStats := interop.LabelChild(fmt.Sprintf("%v", i), mgr, log, stats)
   130  		proc, err := New(pconf, pMgr, pLog, pStats)
   131  		if err != nil {
   132  			return nil, err
   133  		}
   134  		children = append(children, proc)
   135  	}
   136  	codec, err := stringToProcessFieldCodec(conf.ProcessField.Path, conf.ProcessField.Codec, conf.ProcessField.ResultType)
   137  	if err != nil {
   138  		return nil, err
   139  	}
   140  	return &ProcessField{
   141  		parts:    conf.ProcessField.Parts,
   142  		path:     strings.Split(conf.ProcessField.Path, "."),
   143  		children: children,
   144  		codec:    codec,
   145  
   146  		log: log,
   147  
   148  		mCount:              stats.GetCounter("count"),
   149  		mErr:                stats.GetCounter("error"),
   150  		mErrParse:           stats.GetCounter("error.parse"),
   151  		mErrMisaligned:      stats.GetCounter("error.misaligned"),
   152  		mErrMisalignedBatch: stats.GetCounter("error.misaligned_messages"),
   153  		mSent:               stats.GetCounter("sent"),
   154  		mBatchSent:          stats.GetCounter("batch.sent"),
   155  	}, nil
   156  }
   157  
   158  //------------------------------------------------------------------------------
   159  
   160  type processFieldJSONCodec struct {
   161  	path             []string
   162  	resultMarshaller func(p types.Part) (interface{}, error)
   163  }
   164  
   165  func newProcessFieldJSONCodec(path, resultStr string) (*processFieldJSONCodec, error) {
   166  	var resultMarshaller func(p types.Part) (interface{}, error)
   167  	switch resultStr {
   168  	case "string":
   169  		resultMarshaller = processFieldJSONResultStringMarshaller
   170  	case "int":
   171  		resultMarshaller = processFieldJSONResultIntMarshaller
   172  	case "float":
   173  		resultMarshaller = processFieldJSONResultFloatMarshaller
   174  	case "bool":
   175  		resultMarshaller = processFieldJSONResultBoolMarshaller
   176  	case "object":
   177  		resultMarshaller = processFieldJSONResultObjectMarshaller
   178  	case "array":
   179  		resultMarshaller = processFieldJSONResultArrayMarshaller
   180  	case "discard":
   181  		resultMarshaller = nil
   182  	default:
   183  		return nil, fmt.Errorf("unrecognised json codec result_type: %v", resultStr)
   184  	}
   185  	return &processFieldJSONCodec{
   186  		path:             strings.Split(path, "."),
   187  		resultMarshaller: resultMarshaller,
   188  	}, nil
   189  }
   190  
   191  func (p *processFieldJSONCodec) CreateRequest(source types.Part) (types.Part, error) {
   192  	reqPart := source.Copy()
   193  	jObj, err := reqPart.JSON()
   194  	if err != nil {
   195  		return nil, err
   196  	}
   197  	gObj := gabs.Wrap(jObj)
   198  	gTarget := gObj.S(p.path...)
   199  	switch t := gTarget.Data().(type) {
   200  	case string:
   201  		reqPart.Set([]byte(t))
   202  	default:
   203  		reqPart.SetJSON(gTarget.Data())
   204  	}
   205  	return reqPart, nil
   206  }
   207  
   208  func (p *processFieldJSONCodec) ExtractResult(from, to types.Part) error {
   209  	resVal, err := p.resultMarshaller(from)
   210  	if err != nil {
   211  		return err
   212  	}
   213  	jObj, err := to.JSON()
   214  	if err == nil {
   215  		jObj, err = message.CopyJSON(jObj)
   216  	}
   217  	if err != nil {
   218  		return err
   219  	}
   220  	gObj := gabs.Wrap(jObj)
   221  	gObj.Set(resVal, p.path...)
   222  	return to.SetJSON(gObj.Data())
   223  }
   224  
   225  func (p *processFieldJSONCodec) Discard() bool {
   226  	return p.resultMarshaller == nil
   227  }
   228  
   229  func processFieldJSONResultStringMarshaller(p types.Part) (interface{}, error) {
   230  	return string(p.Get()), nil
   231  }
   232  
   233  func processFieldJSONResultIntMarshaller(p types.Part) (interface{}, error) {
   234  	return strconv.Atoi(string(p.Get()))
   235  }
   236  
   237  func processFieldJSONResultFloatMarshaller(p types.Part) (interface{}, error) {
   238  	return strconv.ParseFloat(string(p.Get()), 64)
   239  }
   240  
   241  func processFieldJSONResultBoolMarshaller(p types.Part) (interface{}, error) {
   242  	str := string(p.Get())
   243  	if str == "true" {
   244  		return true, nil
   245  	}
   246  	if str == "false" {
   247  		return false, nil
   248  	}
   249  	return nil, fmt.Errorf("value '%v' could not be parsed as bool", str)
   250  }
   251  
   252  func processFieldJSONResultObjectMarshaller(p types.Part) (interface{}, error) {
   253  	jVal, err := p.JSON()
   254  	if err != nil {
   255  		return nil, err
   256  	}
   257  	// We consider null as an object
   258  	if jVal == nil {
   259  		return nil, nil
   260  	}
   261  	if jObj, ok := jVal.(map[string]interface{}); ok {
   262  		return jObj, nil
   263  	}
   264  	return nil, fmt.Errorf("failed to parse JSON type '%T' into object", jVal)
   265  }
   266  
   267  func processFieldJSONResultArrayMarshaller(p types.Part) (interface{}, error) {
   268  	jVal, err := p.JSON()
   269  	if err != nil {
   270  		return nil, err
   271  	}
   272  	if jArray, ok := jVal.([]interface{}); ok {
   273  		return jArray, nil
   274  	}
   275  	return nil, fmt.Errorf("failed to parse JSON type '%T' into array", jVal)
   276  }
   277  
   278  //------------------------------------------------------------------------------
   279  
   280  type processFieldMetadataCodec struct {
   281  	key     string
   282  	discard bool
   283  }
   284  
   285  func newProcessFieldMetadataCodec(path, resultStr string) (*processFieldMetadataCodec, error) {
   286  	return &processFieldMetadataCodec{
   287  		key:     path,
   288  		discard: resultStr == "discard",
   289  	}, nil
   290  }
   291  
   292  func (p *processFieldMetadataCodec) CreateRequest(source types.Part) (types.Part, error) {
   293  	reqPart := source.Copy()
   294  	reqPart.Set([]byte(reqPart.Metadata().Get(p.key)))
   295  	return reqPart, nil
   296  }
   297  
   298  func (p *processFieldMetadataCodec) ExtractResult(from, to types.Part) error {
   299  	to.Metadata().Set(p.key, string(from.Get()))
   300  	return nil
   301  }
   302  
   303  func (p *processFieldMetadataCodec) Discard() bool {
   304  	return p.discard
   305  }
   306  
   307  //------------------------------------------------------------------------------
   308  
   309  func stringToProcessFieldCodec(path, codecStr, resultStr string) (processFieldCodec, error) {
   310  	switch codecStr {
   311  	case "json":
   312  		return newProcessFieldJSONCodec(path, resultStr)
   313  	case "metadata":
   314  		return newProcessFieldMetadataCodec(path, resultStr)
   315  	}
   316  	return nil, fmt.Errorf("unrecognised codec: %v", codecStr)
   317  }
   318  
   319  //------------------------------------------------------------------------------
   320  
   321  // ProcessMessage applies the processor to a message, either creating >0
   322  // resulting messages or a response to be sent back to the message source.
   323  func (p *ProcessField) ProcessMessage(msg types.Message) (msgs []types.Message, res types.Response) {
   324  	p.mCount.Incr(1)
   325  	payload := msg.Copy()
   326  	resMsgs := [1]types.Message{payload}
   327  	msgs = resMsgs[:]
   328  
   329  	targetParts := p.parts
   330  	if len(targetParts) == 0 {
   331  		targetParts = make([]int, payload.Len())
   332  		for i := range targetParts {
   333  			targetParts[i] = i
   334  		}
   335  	}
   336  
   337  	reqMsg := message.New(nil)
   338  	for _, index := range targetParts {
   339  		reqPart, err := p.codec.CreateRequest(payload.Get(index))
   340  		if err != nil {
   341  			p.mErrParse.Incr(1)
   342  			p.mErr.Incr(1)
   343  			p.log.Errorf("Failed to decode part: %v\n", err)
   344  			reqPart = payload.Get(index).Copy()
   345  			reqPart.Set(nil)
   346  			FlagErr(reqPart, err)
   347  		}
   348  		reqMsg.Append(reqPart)
   349  	}
   350  
   351  	propMsg, _ := tracing.WithChildSpans(TypeProcessField, reqMsg)
   352  	resultMsgs, _ := ExecuteAll(p.children, propMsg)
   353  	resMsg := message.New(nil)
   354  	for _, rMsg := range resultMsgs {
   355  		rMsg.Iter(func(i int, p types.Part) error {
   356  			resMsg.Append(p.Copy())
   357  			return nil
   358  		})
   359  	}
   360  	defer tracing.FinishSpans(propMsg)
   361  
   362  	if p.codec.Discard() {
   363  		// With no result codec, if our results are inline with our original
   364  		// batch we copy the metadata only.
   365  		if len(targetParts) == resMsg.Len() {
   366  			for i, index := range targetParts {
   367  				tPart := payload.Get(index)
   368  				tPartMeta := tPart.Metadata()
   369  				resMsg.Get(i).Metadata().Iter(func(k, v string) error {
   370  					tPartMeta.Set(k, v)
   371  					return nil
   372  				})
   373  			}
   374  		}
   375  		p.mBatchSent.Incr(1)
   376  		p.mSent.Incr(int64(payload.Len()))
   377  		return
   378  	}
   379  
   380  	if exp, act := len(targetParts), resMsg.Len(); exp != act {
   381  		p.mBatchSent.Incr(1)
   382  		p.mSent.Incr(int64(payload.Len()))
   383  		p.mErr.Incr(1)
   384  		p.mErrMisalignedBatch.Incr(1)
   385  		p.log.Errorf("Misaligned processor result batch. Expected %v messages, received %v\n", exp, act)
   386  		partsErr := fmt.Errorf("mismatched processor result, expected %v, received %v messages", exp, act)
   387  		payload.Iter(func(i int, p types.Part) error {
   388  			FlagErr(p, partsErr)
   389  			return nil
   390  		})
   391  		return
   392  	}
   393  
   394  	for i, index := range targetParts {
   395  		tPart := payload.Get(index)
   396  		tPartMeta := tPart.Metadata()
   397  		resMsg.Get(i).Metadata().Iter(func(k, v string) error {
   398  			tPartMeta.Set(k, v)
   399  			return nil
   400  		})
   401  		rErr := p.codec.ExtractResult(resMsg.Get(i), tPart)
   402  		if rErr != nil {
   403  			p.log.Errorf("Failed to marshal result: %v\n", rErr)
   404  			FlagErr(tPart, rErr)
   405  			continue
   406  		}
   407  	}
   408  
   409  	p.mBatchSent.Incr(1)
   410  	p.mSent.Incr(int64(payload.Len()))
   411  	return
   412  }
   413  
   414  // CloseAsync shuts down the processor and stops processing requests.
   415  func (p *ProcessField) CloseAsync() {
   416  	for _, c := range p.children {
   417  		c.CloseAsync()
   418  	}
   419  }
   420  
   421  // WaitForClose blocks until the processor has closed down.
   422  func (p *ProcessField) WaitForClose(timeout time.Duration) error {
   423  	stopBy := time.Now().Add(timeout)
   424  	for _, c := range p.children {
   425  		if err := c.WaitForClose(time.Until(stopBy)); err != nil {
   426  			return err
   427  		}
   428  	}
   429  	return nil
   430  }
   431  
   432  //------------------------------------------------------------------------------