github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/jq.go (about)

     1  package processor
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/json"
     6  	"fmt"
     7  	"time"
     8  
     9  	"github.com/Jeffail/benthos/v3/internal/docs"
    10  	"github.com/Jeffail/benthos/v3/internal/tracing"
    11  	"github.com/Jeffail/benthos/v3/lib/log"
    12  	"github.com/Jeffail/benthos/v3/lib/message"
    13  	"github.com/Jeffail/benthos/v3/lib/metrics"
    14  	"github.com/Jeffail/benthos/v3/lib/response"
    15  	"github.com/Jeffail/benthos/v3/lib/types"
    16  	"github.com/itchyny/gojq"
    17  )
    18  
    19  func init() {
    20  	Constructors[TypeJQ] = TypeSpec{
    21  		constructor: NewJQ,
    22  		Status:      docs.StatusStable,
    23  		Categories: []Category{
    24  			CategoryMapping,
    25  		},
    26  		Summary: `
    27  Transforms and filters messages using jq queries.`,
    28  		Description: `
    29  :::note Try out Bloblang
    30  For better performance and improved capabilities try out native Benthos mapping with the [bloblang processor](/docs/components/processors/bloblang).
    31  :::
    32  
    33  The provided query is executed on each message, targeting either the contents
    34  as a structured JSON value or as a raw string using the field ` + "`raw`" + `,
    35  and the message is replaced with the query result.
    36  
    37  Message metadata is also accessible within the query from the variable
    38  ` + "`$metadata`" + `.
    39  
    40  This processor uses the [gojq library][gojq], and therefore does not require
    41  jq to be installed as a dependency. However, this also means there are some
    42  differences in how these queries are executed versus the jq cli which you can
    43  [read about here][gojq-difference].
    44  
    45  If the query does not emit any value then the message is filtered, if the query
    46  returns multiple values then the resulting message will be an array containing
    47  all values.
    48  
    49  The full query syntax is described in [jq's documentation][jq-docs].
    50  
    51  ## Error Handling
    52  
    53  Queries can fail, in which case the message remains unchanged, errors are
    54  logged, and the message is flagged as having failed, allowing you to use
    55  [standard processor error handling patterns](/docs/configuration/error_handling).`,
    56  		Footnotes: `
    57  [gojq]: https://github.com/itchyny/gojq
    58  [gojq-difference]: https://github.com/itchyny/gojq#difference-to-jq
    59  [jq-docs]: https://stedolan.github.io/jq/manual/`,
    60  		Examples: []docs.AnnotatedExample{
    61  			{
    62  				Title: "Mapping",
    63  				Summary: `
    64  When receiving JSON documents of the form:
    65  
    66  ` + "```json" + `
    67  {
    68    "locations": [
    69      {"name": "Seattle", "state": "WA"},
    70      {"name": "New York", "state": "NY"},
    71      {"name": "Bellevue", "state": "WA"},
    72      {"name": "Olympia", "state": "WA"}
    73    ]
    74  }
    75  ` + "```" + `
    76  
    77  We could collapse the location names from the state of Washington into a field ` + "`Cities`" + `:
    78  
    79  ` + "```json" + `
    80  {"Cities": "Bellevue, Olympia, Seattle"}
    81  ` + "```" + `
    82  
    83  With the following config:`,
    84  				Config: `
    85  pipeline:
    86    processors:
    87      - jq:
    88          query: '{Cities: .locations | map(select(.state == "WA").name) | sort | join(", ") }'
    89  `,
    90  			},
    91  		},
    92  		FieldSpecs: docs.FieldSpecs{
    93  			docs.FieldCommon("query", "The jq query to filter and transform messages with."),
    94  			docs.FieldAdvanced("raw", "Whether to process the input as a raw string instead of as JSON."),
    95  			docs.FieldAdvanced("output_raw", "Whether to output raw text (unquoted) instead of JSON strings when the emitted values are string types."),
    96  		},
    97  	}
    98  }
    99  
   100  //------------------------------------------------------------------------------
   101  
   102  // JQConfig contains configuration fields for the JQ processor.
   103  type JQConfig struct {
   104  	Query     string `json:"query" yaml:"query"`
   105  	Raw       bool   `json:"raw" yaml:"raw"`
   106  	OutputRaw bool   `json:"output_raw" yaml:"output_raw"`
   107  }
   108  
   109  // NewJQConfig returns a JQConfig with default values.
   110  func NewJQConfig() JQConfig {
   111  	return JQConfig{
   112  		Query: ".",
   113  	}
   114  }
   115  
   116  //------------------------------------------------------------------------------
   117  
   118  var jqCompileOptions = []gojq.CompilerOption{
   119  	gojq.WithVariables([]string{"$metadata"}),
   120  }
   121  
   122  // JQ is a processor that passes messages through gojq.
   123  type JQ struct {
   124  	conf  JQConfig
   125  	log   log.Modular
   126  	stats metrics.Type
   127  	code  *gojq.Code
   128  
   129  	mCount        metrics.StatCounter
   130  	mCountParts   metrics.StatCounter
   131  	mSent         metrics.StatCounter
   132  	mBatchSent    metrics.StatCounter
   133  	mDropped      metrics.StatCounter
   134  	mDroppedParts metrics.StatCounter
   135  	mErr          metrics.StatCounter
   136  	mErrJSONParse metrics.StatCounter
   137  	mErrJSONSet   metrics.StatCounter
   138  	mErrQuery     metrics.StatCounter
   139  }
   140  
   141  // NewJQ returns a JQ processor.
   142  func NewJQ(
   143  	conf Config, mgr types.Manager, log log.Modular, stats metrics.Type,
   144  ) (Type, error) {
   145  	j := &JQ{
   146  		conf:  conf.JQ,
   147  		stats: stats,
   148  		log:   log,
   149  
   150  		mCount:        stats.GetCounter("count"),
   151  		mCountParts:   stats.GetCounter("count_parts"),
   152  		mSent:         stats.GetCounter("sent"),
   153  		mBatchSent:    stats.GetCounter("batch.count"),
   154  		mDropped:      stats.GetCounter("dropped"),
   155  		mDroppedParts: stats.GetCounter("dropped_num_parts"),
   156  		mErr:          stats.GetCounter("error"),
   157  		mErrJSONParse: stats.GetCounter("error.json_parse"),
   158  		mErrJSONSet:   stats.GetCounter("error.json_set"),
   159  		mErrQuery:     stats.GetCounter("error.query"),
   160  	}
   161  
   162  	query, err := gojq.Parse(j.conf.Query)
   163  	if err != nil {
   164  		return nil, fmt.Errorf("error parsing jq query: %w", err)
   165  	}
   166  
   167  	j.code, err = gojq.Compile(query, jqCompileOptions...)
   168  	if err != nil {
   169  		return nil, fmt.Errorf("error compiling jq query: %w", err)
   170  	}
   171  
   172  	return j, nil
   173  }
   174  
   175  //------------------------------------------------------------------------------
   176  
   177  func (j *JQ) getPartMetadata(part types.Part) map[string]interface{} {
   178  	metadata := map[string]interface{}{}
   179  	part.Metadata().Iter(func(k, v string) error {
   180  		metadata[k] = v
   181  		return nil
   182  	})
   183  	return metadata
   184  }
   185  
   186  func (j *JQ) getPartValue(part types.Part, raw bool) (obj interface{}, err error) {
   187  	if raw {
   188  		return string(part.Get()), nil
   189  	}
   190  	obj, err = part.JSON()
   191  	if err == nil {
   192  		obj, err = message.CopyJSON(obj)
   193  	}
   194  	if err != nil {
   195  		j.mErrJSONParse.Incr(1)
   196  		j.log.Debugf("Failed to parse part into json: %v\n", err)
   197  		return nil, err
   198  	}
   199  	return obj, nil
   200  }
   201  
   202  // ProcessMessage applies the processor to a message, either creating >0
   203  // resulting messages or a response to be sent back to the message source.
   204  func (j *JQ) ProcessMessage(msg types.Message) ([]types.Message, types.Response) {
   205  	j.mCount.Incr(1)
   206  
   207  	newMsg := msg.Copy()
   208  	iteratePartsFilterableWithSpan(TypeJQ, nil, newMsg, func(index int, span *tracing.Span, part types.Part) (bool, error) {
   209  		in, err := j.getPartValue(part, j.conf.Raw)
   210  		if err != nil {
   211  			j.mErr.Incr(1)
   212  			return false, err
   213  		}
   214  		metadata := j.getPartMetadata(part)
   215  
   216  		var emitted []interface{}
   217  		iter := j.code.Run(in, metadata)
   218  		for {
   219  			out, ok := iter.Next()
   220  			if !ok {
   221  				break
   222  			}
   223  
   224  			if err, ok := out.(error); ok {
   225  				j.log.Debugf(err.Error())
   226  				j.mErr.Incr(1)
   227  				j.mErrQuery.Incr(1)
   228  				return false, err
   229  			}
   230  
   231  			j.mSent.Incr(1)
   232  			emitted = append(emitted, out)
   233  		}
   234  
   235  		if j.conf.OutputRaw {
   236  			raw, err := j.marshalRaw(emitted)
   237  			if err != nil {
   238  				j.log.Debugf("Failed to marshal raw text: %s", err)
   239  				j.mErr.Incr(1)
   240  				return false, err
   241  			}
   242  
   243  			// Sometimes the query result is an empty string. Example:
   244  			//    echo '{ "foo": "" }' | jq .foo
   245  			// In that case we want pass on the empty string instead of treating it as
   246  			// an empty message and dropping it
   247  			if len(raw) == 0 && len(emitted) == 0 {
   248  				j.mDroppedParts.Incr(1)
   249  				return false, nil
   250  			}
   251  
   252  			part.Set(raw)
   253  			return true, nil
   254  		} else if len(emitted) > 1 {
   255  			if err = part.SetJSON(emitted); err != nil {
   256  				j.log.Debugf("Failed to set part JSON: %v\n", err)
   257  				j.mErr.Incr(1)
   258  				j.mErrJSONSet.Incr(1)
   259  				return false, err
   260  			}
   261  		} else if len(emitted) == 1 {
   262  			if err = part.SetJSON(emitted[0]); err != nil {
   263  				j.log.Debugf("Failed to set part JSON: %v\n", err)
   264  				j.mErr.Incr(1)
   265  				j.mErrJSONSet.Incr(1)
   266  				return false, err
   267  			}
   268  		} else {
   269  			j.mDroppedParts.Incr(1)
   270  			return false, nil
   271  		}
   272  
   273  		return true, nil
   274  	})
   275  
   276  	if newMsg.Len() == 0 {
   277  		j.mDropped.Incr(1)
   278  		return nil, response.NewAck()
   279  	}
   280  
   281  	j.mBatchSent.Incr(1)
   282  	j.mSent.Incr(int64(newMsg.Len()))
   283  
   284  	return []types.Message{newMsg}, nil
   285  }
   286  
   287  // CloseAsync shuts down the processor and stops processing requests.
   288  func (*JQ) CloseAsync() {
   289  }
   290  
   291  // WaitForClose blocks until the processor has closed down.
   292  func (*JQ) WaitForClose(timeout time.Duration) error {
   293  	return nil
   294  }
   295  
   296  func (j *JQ) marshalRaw(values []interface{}) ([]byte, error) {
   297  	buf := bytes.NewBufferString("")
   298  
   299  	for index, el := range values {
   300  		var rawResult []byte
   301  
   302  		val, isString := el.(string)
   303  		if isString {
   304  			rawResult = []byte(val)
   305  		} else {
   306  			marshalled, err := json.Marshal(el)
   307  			if err != nil {
   308  				return nil, fmt.Errorf("failed marshal JQ result at index %d: %w", index, err)
   309  			}
   310  
   311  			rawResult = marshalled
   312  		}
   313  
   314  		if _, err := buf.Write(rawResult); err != nil {
   315  			return nil, fmt.Errorf("failed to write JQ result at index %d: %w", index, err)
   316  		}
   317  	}
   318  
   319  	bs := buf.Bytes()
   320  	return bs, nil
   321  }