github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/jq.go (about) 1 package processor 2 3 import ( 4 "bytes" 5 "encoding/json" 6 "fmt" 7 "time" 8 9 "github.com/Jeffail/benthos/v3/internal/docs" 10 "github.com/Jeffail/benthos/v3/internal/tracing" 11 "github.com/Jeffail/benthos/v3/lib/log" 12 "github.com/Jeffail/benthos/v3/lib/message" 13 "github.com/Jeffail/benthos/v3/lib/metrics" 14 "github.com/Jeffail/benthos/v3/lib/response" 15 "github.com/Jeffail/benthos/v3/lib/types" 16 "github.com/itchyny/gojq" 17 ) 18 19 func init() { 20 Constructors[TypeJQ] = TypeSpec{ 21 constructor: NewJQ, 22 Status: docs.StatusStable, 23 Categories: []Category{ 24 CategoryMapping, 25 }, 26 Summary: ` 27 Transforms and filters messages using jq queries.`, 28 Description: ` 29 :::note Try out Bloblang 30 For better performance and improved capabilities try out native Benthos mapping with the [bloblang processor](/docs/components/processors/bloblang). 31 ::: 32 33 The provided query is executed on each message, targeting either the contents 34 as a structured JSON value or as a raw string using the field ` + "`raw`" + `, 35 and the message is replaced with the query result. 36 37 Message metadata is also accessible within the query from the variable 38 ` + "`$metadata`" + `. 39 40 This processor uses the [gojq library][gojq], and therefore does not require 41 jq to be installed as a dependency. However, this also means there are some 42 differences in how these queries are executed versus the jq cli which you can 43 [read about here][gojq-difference]. 44 45 If the query does not emit any value then the message is filtered, if the query 46 returns multiple values then the resulting message will be an array containing 47 all values. 48 49 The full query syntax is described in [jq's documentation][jq-docs]. 50 51 ## Error Handling 52 53 Queries can fail, in which case the message remains unchanged, errors are 54 logged, and the message is flagged as having failed, allowing you to use 55 [standard processor error handling patterns](/docs/configuration/error_handling).`, 56 Footnotes: ` 57 [gojq]: https://github.com/itchyny/gojq 58 [gojq-difference]: https://github.com/itchyny/gojq#difference-to-jq 59 [jq-docs]: https://stedolan.github.io/jq/manual/`, 60 Examples: []docs.AnnotatedExample{ 61 { 62 Title: "Mapping", 63 Summary: ` 64 When receiving JSON documents of the form: 65 66 ` + "```json" + ` 67 { 68 "locations": [ 69 {"name": "Seattle", "state": "WA"}, 70 {"name": "New York", "state": "NY"}, 71 {"name": "Bellevue", "state": "WA"}, 72 {"name": "Olympia", "state": "WA"} 73 ] 74 } 75 ` + "```" + ` 76 77 We could collapse the location names from the state of Washington into a field ` + "`Cities`" + `: 78 79 ` + "```json" + ` 80 {"Cities": "Bellevue, Olympia, Seattle"} 81 ` + "```" + ` 82 83 With the following config:`, 84 Config: ` 85 pipeline: 86 processors: 87 - jq: 88 query: '{Cities: .locations | map(select(.state == "WA").name) | sort | join(", ") }' 89 `, 90 }, 91 }, 92 FieldSpecs: docs.FieldSpecs{ 93 docs.FieldCommon("query", "The jq query to filter and transform messages with."), 94 docs.FieldAdvanced("raw", "Whether to process the input as a raw string instead of as JSON."), 95 docs.FieldAdvanced("output_raw", "Whether to output raw text (unquoted) instead of JSON strings when the emitted values are string types."), 96 }, 97 } 98 } 99 100 //------------------------------------------------------------------------------ 101 102 // JQConfig contains configuration fields for the JQ processor. 103 type JQConfig struct { 104 Query string `json:"query" yaml:"query"` 105 Raw bool `json:"raw" yaml:"raw"` 106 OutputRaw bool `json:"output_raw" yaml:"output_raw"` 107 } 108 109 // NewJQConfig returns a JQConfig with default values. 110 func NewJQConfig() JQConfig { 111 return JQConfig{ 112 Query: ".", 113 } 114 } 115 116 //------------------------------------------------------------------------------ 117 118 var jqCompileOptions = []gojq.CompilerOption{ 119 gojq.WithVariables([]string{"$metadata"}), 120 } 121 122 // JQ is a processor that passes messages through gojq. 123 type JQ struct { 124 conf JQConfig 125 log log.Modular 126 stats metrics.Type 127 code *gojq.Code 128 129 mCount metrics.StatCounter 130 mCountParts metrics.StatCounter 131 mSent metrics.StatCounter 132 mBatchSent metrics.StatCounter 133 mDropped metrics.StatCounter 134 mDroppedParts metrics.StatCounter 135 mErr metrics.StatCounter 136 mErrJSONParse metrics.StatCounter 137 mErrJSONSet metrics.StatCounter 138 mErrQuery metrics.StatCounter 139 } 140 141 // NewJQ returns a JQ processor. 142 func NewJQ( 143 conf Config, mgr types.Manager, log log.Modular, stats metrics.Type, 144 ) (Type, error) { 145 j := &JQ{ 146 conf: conf.JQ, 147 stats: stats, 148 log: log, 149 150 mCount: stats.GetCounter("count"), 151 mCountParts: stats.GetCounter("count_parts"), 152 mSent: stats.GetCounter("sent"), 153 mBatchSent: stats.GetCounter("batch.count"), 154 mDropped: stats.GetCounter("dropped"), 155 mDroppedParts: stats.GetCounter("dropped_num_parts"), 156 mErr: stats.GetCounter("error"), 157 mErrJSONParse: stats.GetCounter("error.json_parse"), 158 mErrJSONSet: stats.GetCounter("error.json_set"), 159 mErrQuery: stats.GetCounter("error.query"), 160 } 161 162 query, err := gojq.Parse(j.conf.Query) 163 if err != nil { 164 return nil, fmt.Errorf("error parsing jq query: %w", err) 165 } 166 167 j.code, err = gojq.Compile(query, jqCompileOptions...) 168 if err != nil { 169 return nil, fmt.Errorf("error compiling jq query: %w", err) 170 } 171 172 return j, nil 173 } 174 175 //------------------------------------------------------------------------------ 176 177 func (j *JQ) getPartMetadata(part types.Part) map[string]interface{} { 178 metadata := map[string]interface{}{} 179 part.Metadata().Iter(func(k, v string) error { 180 metadata[k] = v 181 return nil 182 }) 183 return metadata 184 } 185 186 func (j *JQ) getPartValue(part types.Part, raw bool) (obj interface{}, err error) { 187 if raw { 188 return string(part.Get()), nil 189 } 190 obj, err = part.JSON() 191 if err == nil { 192 obj, err = message.CopyJSON(obj) 193 } 194 if err != nil { 195 j.mErrJSONParse.Incr(1) 196 j.log.Debugf("Failed to parse part into json: %v\n", err) 197 return nil, err 198 } 199 return obj, nil 200 } 201 202 // ProcessMessage applies the processor to a message, either creating >0 203 // resulting messages or a response to be sent back to the message source. 204 func (j *JQ) ProcessMessage(msg types.Message) ([]types.Message, types.Response) { 205 j.mCount.Incr(1) 206 207 newMsg := msg.Copy() 208 iteratePartsFilterableWithSpan(TypeJQ, nil, newMsg, func(index int, span *tracing.Span, part types.Part) (bool, error) { 209 in, err := j.getPartValue(part, j.conf.Raw) 210 if err != nil { 211 j.mErr.Incr(1) 212 return false, err 213 } 214 metadata := j.getPartMetadata(part) 215 216 var emitted []interface{} 217 iter := j.code.Run(in, metadata) 218 for { 219 out, ok := iter.Next() 220 if !ok { 221 break 222 } 223 224 if err, ok := out.(error); ok { 225 j.log.Debugf(err.Error()) 226 j.mErr.Incr(1) 227 j.mErrQuery.Incr(1) 228 return false, err 229 } 230 231 j.mSent.Incr(1) 232 emitted = append(emitted, out) 233 } 234 235 if j.conf.OutputRaw { 236 raw, err := j.marshalRaw(emitted) 237 if err != nil { 238 j.log.Debugf("Failed to marshal raw text: %s", err) 239 j.mErr.Incr(1) 240 return false, err 241 } 242 243 // Sometimes the query result is an empty string. Example: 244 // echo '{ "foo": "" }' | jq .foo 245 // In that case we want pass on the empty string instead of treating it as 246 // an empty message and dropping it 247 if len(raw) == 0 && len(emitted) == 0 { 248 j.mDroppedParts.Incr(1) 249 return false, nil 250 } 251 252 part.Set(raw) 253 return true, nil 254 } else if len(emitted) > 1 { 255 if err = part.SetJSON(emitted); err != nil { 256 j.log.Debugf("Failed to set part JSON: %v\n", err) 257 j.mErr.Incr(1) 258 j.mErrJSONSet.Incr(1) 259 return false, err 260 } 261 } else if len(emitted) == 1 { 262 if err = part.SetJSON(emitted[0]); err != nil { 263 j.log.Debugf("Failed to set part JSON: %v\n", err) 264 j.mErr.Incr(1) 265 j.mErrJSONSet.Incr(1) 266 return false, err 267 } 268 } else { 269 j.mDroppedParts.Incr(1) 270 return false, nil 271 } 272 273 return true, nil 274 }) 275 276 if newMsg.Len() == 0 { 277 j.mDropped.Incr(1) 278 return nil, response.NewAck() 279 } 280 281 j.mBatchSent.Incr(1) 282 j.mSent.Incr(int64(newMsg.Len())) 283 284 return []types.Message{newMsg}, nil 285 } 286 287 // CloseAsync shuts down the processor and stops processing requests. 288 func (*JQ) CloseAsync() { 289 } 290 291 // WaitForClose blocks until the processor has closed down. 292 func (*JQ) WaitForClose(timeout time.Duration) error { 293 return nil 294 } 295 296 func (j *JQ) marshalRaw(values []interface{}) ([]byte, error) { 297 buf := bytes.NewBufferString("") 298 299 for index, el := range values { 300 var rawResult []byte 301 302 val, isString := el.(string) 303 if isString { 304 rawResult = []byte(val) 305 } else { 306 marshalled, err := json.Marshal(el) 307 if err != nil { 308 return nil, fmt.Errorf("failed marshal JQ result at index %d: %w", index, err) 309 } 310 311 rawResult = marshalled 312 } 313 314 if _, err := buf.Write(rawResult); err != nil { 315 return nil, fmt.Errorf("failed to write JQ result at index %d: %w", index, err) 316 } 317 } 318 319 bs := buf.Bytes() 320 return bs, nil 321 }