github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/process_field.go (about) 1 package processor 2 3 import ( 4 "fmt" 5 "strconv" 6 "strings" 7 "time" 8 9 "github.com/Jeffail/benthos/v3/internal/docs" 10 "github.com/Jeffail/benthos/v3/internal/interop" 11 "github.com/Jeffail/benthos/v3/internal/tracing" 12 "github.com/Jeffail/benthos/v3/lib/log" 13 "github.com/Jeffail/benthos/v3/lib/message" 14 "github.com/Jeffail/benthos/v3/lib/metrics" 15 "github.com/Jeffail/benthos/v3/lib/types" 16 "github.com/Jeffail/gabs/v2" 17 ) 18 19 //------------------------------------------------------------------------------ 20 21 func init() { 22 Constructors[TypeProcessField] = TypeSpec{ 23 constructor: NewProcessField, 24 Summary: ` 25 A processor that extracts the value of a field [dot path](/docs/configuration/field_paths) 26 within payloads according to a specified [codec](#codec), applies a list of 27 processors to the extracted value and finally sets the field within the original 28 payloads to the processed result.`, 29 Status: docs.StatusDeprecated, 30 FieldSpecs: docs.FieldSpecs{ 31 docs.FieldCommon("codec", "A [codec](#codec) to use in order to extract (and set) the target field.").HasOptions("json", "metadata"), 32 docs.FieldCommon("path", "A [dot path](/docs/configuration/field_paths) pointing to the target field."), 33 docs.FieldCommon( 34 "result_type", "The final data type to marshal the processing result into. The `discard` type is a special case that discards the result of the processing steps entirely.", 35 ).HasOptions("string", "int", "float", "bool", "object", "discard"), 36 docs.FieldCommon("processors", "A list of child processors to execute on the extracted value.").Array().HasType(docs.FieldTypeProcessor), 37 PartsFieldSpec, 38 }, 39 Description: ` 40 The result can be marshalled into a specific data type with the field 41 [` + "`result_type`" + `](#result_type). 42 43 It's therefore possible to use this codec without any child processors as a way 44 of casting string values into other types. For example, with an input JSON 45 document ` + "`{\"foo\":\"10\"}`" + ` it's possible to cast the value of the 46 field foo to an integer type with: 47 48 ` + "```yaml" + ` 49 process_field: 50 path: foo 51 result_type: int 52 ` + "```" + ` 53 54 ## Codecs 55 56 ### ` + "`json`" + ` 57 58 Parses the payload as a JSON document, extracts and sets the field using a dot 59 notation path. 60 61 ### ` + "`metadata`" + ` 62 63 Extracts and sets a metadata value identified by the path field.`, 64 Footnotes: ` 65 ## Alternatives 66 67 The ` + "[`branch` processor](/docs/components/processors/branch)" + ` offers a 68 more flexible and robust way to perform the actions of this processor.`, 69 } 70 } 71 72 //------------------------------------------------------------------------------ 73 74 // ProcessFieldConfig is a config struct containing fields for the ProcessField 75 // processor. 76 type ProcessFieldConfig struct { 77 Parts []int `json:"parts" yaml:"parts"` 78 Codec string `json:"codec" yaml:"codec"` 79 Path string `json:"path" yaml:"path"` 80 ResultType string `json:"result_type" yaml:"result_type"` 81 Processors []Config `json:"processors" yaml:"processors"` 82 } 83 84 // NewProcessFieldConfig returns a default ProcessFieldConfig. 85 func NewProcessFieldConfig() ProcessFieldConfig { 86 return ProcessFieldConfig{ 87 Parts: []int{}, 88 Codec: "json", 89 Path: "", 90 ResultType: "string", 91 Processors: []Config{}, 92 } 93 } 94 95 //------------------------------------------------------------------------------ 96 97 type processFieldCodec interface { 98 CreateRequest(types.Part) (types.Part, error) 99 ExtractResult(from, to types.Part) error 100 Discard() bool 101 } 102 103 // ProcessField is a processor that applies a list of child processors to a 104 // field extracted from the original payload. 105 type ProcessField struct { 106 parts []int 107 path []string 108 children []types.Processor 109 110 codec processFieldCodec 111 112 log log.Modular 113 114 mCount metrics.StatCounter 115 mErr metrics.StatCounter 116 mErrParse metrics.StatCounter 117 mErrMisaligned metrics.StatCounter 118 mErrMisalignedBatch metrics.StatCounter 119 mSent metrics.StatCounter 120 mBatchSent metrics.StatCounter 121 } 122 123 // NewProcessField returns a ProcessField processor. 124 func NewProcessField( 125 conf Config, mgr types.Manager, log log.Modular, stats metrics.Type, 126 ) (Type, error) { 127 var children []types.Processor 128 for i, pconf := range conf.ProcessField.Processors { 129 pMgr, pLog, pStats := interop.LabelChild(fmt.Sprintf("%v", i), mgr, log, stats) 130 proc, err := New(pconf, pMgr, pLog, pStats) 131 if err != nil { 132 return nil, err 133 } 134 children = append(children, proc) 135 } 136 codec, err := stringToProcessFieldCodec(conf.ProcessField.Path, conf.ProcessField.Codec, conf.ProcessField.ResultType) 137 if err != nil { 138 return nil, err 139 } 140 return &ProcessField{ 141 parts: conf.ProcessField.Parts, 142 path: strings.Split(conf.ProcessField.Path, "."), 143 children: children, 144 codec: codec, 145 146 log: log, 147 148 mCount: stats.GetCounter("count"), 149 mErr: stats.GetCounter("error"), 150 mErrParse: stats.GetCounter("error.parse"), 151 mErrMisaligned: stats.GetCounter("error.misaligned"), 152 mErrMisalignedBatch: stats.GetCounter("error.misaligned_messages"), 153 mSent: stats.GetCounter("sent"), 154 mBatchSent: stats.GetCounter("batch.sent"), 155 }, nil 156 } 157 158 //------------------------------------------------------------------------------ 159 160 type processFieldJSONCodec struct { 161 path []string 162 resultMarshaller func(p types.Part) (interface{}, error) 163 } 164 165 func newProcessFieldJSONCodec(path, resultStr string) (*processFieldJSONCodec, error) { 166 var resultMarshaller func(p types.Part) (interface{}, error) 167 switch resultStr { 168 case "string": 169 resultMarshaller = processFieldJSONResultStringMarshaller 170 case "int": 171 resultMarshaller = processFieldJSONResultIntMarshaller 172 case "float": 173 resultMarshaller = processFieldJSONResultFloatMarshaller 174 case "bool": 175 resultMarshaller = processFieldJSONResultBoolMarshaller 176 case "object": 177 resultMarshaller = processFieldJSONResultObjectMarshaller 178 case "array": 179 resultMarshaller = processFieldJSONResultArrayMarshaller 180 case "discard": 181 resultMarshaller = nil 182 default: 183 return nil, fmt.Errorf("unrecognised json codec result_type: %v", resultStr) 184 } 185 return &processFieldJSONCodec{ 186 path: strings.Split(path, "."), 187 resultMarshaller: resultMarshaller, 188 }, nil 189 } 190 191 func (p *processFieldJSONCodec) CreateRequest(source types.Part) (types.Part, error) { 192 reqPart := source.Copy() 193 jObj, err := reqPart.JSON() 194 if err != nil { 195 return nil, err 196 } 197 gObj := gabs.Wrap(jObj) 198 gTarget := gObj.S(p.path...) 199 switch t := gTarget.Data().(type) { 200 case string: 201 reqPart.Set([]byte(t)) 202 default: 203 reqPart.SetJSON(gTarget.Data()) 204 } 205 return reqPart, nil 206 } 207 208 func (p *processFieldJSONCodec) ExtractResult(from, to types.Part) error { 209 resVal, err := p.resultMarshaller(from) 210 if err != nil { 211 return err 212 } 213 jObj, err := to.JSON() 214 if err == nil { 215 jObj, err = message.CopyJSON(jObj) 216 } 217 if err != nil { 218 return err 219 } 220 gObj := gabs.Wrap(jObj) 221 gObj.Set(resVal, p.path...) 222 return to.SetJSON(gObj.Data()) 223 } 224 225 func (p *processFieldJSONCodec) Discard() bool { 226 return p.resultMarshaller == nil 227 } 228 229 func processFieldJSONResultStringMarshaller(p types.Part) (interface{}, error) { 230 return string(p.Get()), nil 231 } 232 233 func processFieldJSONResultIntMarshaller(p types.Part) (interface{}, error) { 234 return strconv.Atoi(string(p.Get())) 235 } 236 237 func processFieldJSONResultFloatMarshaller(p types.Part) (interface{}, error) { 238 return strconv.ParseFloat(string(p.Get()), 64) 239 } 240 241 func processFieldJSONResultBoolMarshaller(p types.Part) (interface{}, error) { 242 str := string(p.Get()) 243 if str == "true" { 244 return true, nil 245 } 246 if str == "false" { 247 return false, nil 248 } 249 return nil, fmt.Errorf("value '%v' could not be parsed as bool", str) 250 } 251 252 func processFieldJSONResultObjectMarshaller(p types.Part) (interface{}, error) { 253 jVal, err := p.JSON() 254 if err != nil { 255 return nil, err 256 } 257 // We consider null as an object 258 if jVal == nil { 259 return nil, nil 260 } 261 if jObj, ok := jVal.(map[string]interface{}); ok { 262 return jObj, nil 263 } 264 return nil, fmt.Errorf("failed to parse JSON type '%T' into object", jVal) 265 } 266 267 func processFieldJSONResultArrayMarshaller(p types.Part) (interface{}, error) { 268 jVal, err := p.JSON() 269 if err != nil { 270 return nil, err 271 } 272 if jArray, ok := jVal.([]interface{}); ok { 273 return jArray, nil 274 } 275 return nil, fmt.Errorf("failed to parse JSON type '%T' into array", jVal) 276 } 277 278 //------------------------------------------------------------------------------ 279 280 type processFieldMetadataCodec struct { 281 key string 282 discard bool 283 } 284 285 func newProcessFieldMetadataCodec(path, resultStr string) (*processFieldMetadataCodec, error) { 286 return &processFieldMetadataCodec{ 287 key: path, 288 discard: resultStr == "discard", 289 }, nil 290 } 291 292 func (p *processFieldMetadataCodec) CreateRequest(source types.Part) (types.Part, error) { 293 reqPart := source.Copy() 294 reqPart.Set([]byte(reqPart.Metadata().Get(p.key))) 295 return reqPart, nil 296 } 297 298 func (p *processFieldMetadataCodec) ExtractResult(from, to types.Part) error { 299 to.Metadata().Set(p.key, string(from.Get())) 300 return nil 301 } 302 303 func (p *processFieldMetadataCodec) Discard() bool { 304 return p.discard 305 } 306 307 //------------------------------------------------------------------------------ 308 309 func stringToProcessFieldCodec(path, codecStr, resultStr string) (processFieldCodec, error) { 310 switch codecStr { 311 case "json": 312 return newProcessFieldJSONCodec(path, resultStr) 313 case "metadata": 314 return newProcessFieldMetadataCodec(path, resultStr) 315 } 316 return nil, fmt.Errorf("unrecognised codec: %v", codecStr) 317 } 318 319 //------------------------------------------------------------------------------ 320 321 // ProcessMessage applies the processor to a message, either creating >0 322 // resulting messages or a response to be sent back to the message source. 323 func (p *ProcessField) ProcessMessage(msg types.Message) (msgs []types.Message, res types.Response) { 324 p.mCount.Incr(1) 325 payload := msg.Copy() 326 resMsgs := [1]types.Message{payload} 327 msgs = resMsgs[:] 328 329 targetParts := p.parts 330 if len(targetParts) == 0 { 331 targetParts = make([]int, payload.Len()) 332 for i := range targetParts { 333 targetParts[i] = i 334 } 335 } 336 337 reqMsg := message.New(nil) 338 for _, index := range targetParts { 339 reqPart, err := p.codec.CreateRequest(payload.Get(index)) 340 if err != nil { 341 p.mErrParse.Incr(1) 342 p.mErr.Incr(1) 343 p.log.Errorf("Failed to decode part: %v\n", err) 344 reqPart = payload.Get(index).Copy() 345 reqPart.Set(nil) 346 FlagErr(reqPart, err) 347 } 348 reqMsg.Append(reqPart) 349 } 350 351 propMsg, _ := tracing.WithChildSpans(TypeProcessField, reqMsg) 352 resultMsgs, _ := ExecuteAll(p.children, propMsg) 353 resMsg := message.New(nil) 354 for _, rMsg := range resultMsgs { 355 rMsg.Iter(func(i int, p types.Part) error { 356 resMsg.Append(p.Copy()) 357 return nil 358 }) 359 } 360 defer tracing.FinishSpans(propMsg) 361 362 if p.codec.Discard() { 363 // With no result codec, if our results are inline with our original 364 // batch we copy the metadata only. 365 if len(targetParts) == resMsg.Len() { 366 for i, index := range targetParts { 367 tPart := payload.Get(index) 368 tPartMeta := tPart.Metadata() 369 resMsg.Get(i).Metadata().Iter(func(k, v string) error { 370 tPartMeta.Set(k, v) 371 return nil 372 }) 373 } 374 } 375 p.mBatchSent.Incr(1) 376 p.mSent.Incr(int64(payload.Len())) 377 return 378 } 379 380 if exp, act := len(targetParts), resMsg.Len(); exp != act { 381 p.mBatchSent.Incr(1) 382 p.mSent.Incr(int64(payload.Len())) 383 p.mErr.Incr(1) 384 p.mErrMisalignedBatch.Incr(1) 385 p.log.Errorf("Misaligned processor result batch. Expected %v messages, received %v\n", exp, act) 386 partsErr := fmt.Errorf("mismatched processor result, expected %v, received %v messages", exp, act) 387 payload.Iter(func(i int, p types.Part) error { 388 FlagErr(p, partsErr) 389 return nil 390 }) 391 return 392 } 393 394 for i, index := range targetParts { 395 tPart := payload.Get(index) 396 tPartMeta := tPart.Metadata() 397 resMsg.Get(i).Metadata().Iter(func(k, v string) error { 398 tPartMeta.Set(k, v) 399 return nil 400 }) 401 rErr := p.codec.ExtractResult(resMsg.Get(i), tPart) 402 if rErr != nil { 403 p.log.Errorf("Failed to marshal result: %v\n", rErr) 404 FlagErr(tPart, rErr) 405 continue 406 } 407 } 408 409 p.mBatchSent.Incr(1) 410 p.mSent.Incr(int64(payload.Len())) 411 return 412 } 413 414 // CloseAsync shuts down the processor and stops processing requests. 415 func (p *ProcessField) CloseAsync() { 416 for _, c := range p.children { 417 c.CloseAsync() 418 } 419 } 420 421 // WaitForClose blocks until the processor has closed down. 422 func (p *ProcessField) WaitForClose(timeout time.Duration) error { 423 stopBy := time.Now().Add(timeout) 424 for _, c := range p.children { 425 if err := c.WaitForClose(time.Until(stopBy)); err != nil { 426 return err 427 } 428 } 429 return nil 430 } 431 432 //------------------------------------------------------------------------------