github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/json.go (about) 1 package processor 2 3 import ( 4 "encoding/json" 5 "errors" 6 "fmt" 7 "strings" 8 "time" 9 10 "github.com/Jeffail/benthos/v3/internal/bloblang/field" 11 "github.com/Jeffail/benthos/v3/internal/docs" 12 "github.com/Jeffail/benthos/v3/internal/interop" 13 "github.com/Jeffail/benthos/v3/internal/tracing" 14 "github.com/Jeffail/benthos/v3/lib/log" 15 "github.com/Jeffail/benthos/v3/lib/message" 16 "github.com/Jeffail/benthos/v3/lib/metrics" 17 "github.com/Jeffail/benthos/v3/lib/types" 18 "github.com/Jeffail/gabs/v2" 19 ) 20 21 //------------------------------------------------------------------------------ 22 23 func init() { 24 Constructors[TypeJSON] = TypeSpec{ 25 constructor: NewJSON, 26 Status: docs.StatusDeprecated, 27 Footnotes: ` 28 ## Alternatives 29 30 All functionality of this processor has been superseded by the 31 [bloblang](/docs/components/processors/bloblang) processor.`, 32 FieldSpecs: docs.FieldSpecs{ 33 docs.FieldCommon("operator", "The [operator](#operators) to apply to messages.").HasOptions( 34 "append", "clean", "copy", "delete", "explode", "flatten", "flatten_array", "fold_number_array", 35 "fold_string_array", "move", "select", "set", "split", 36 ), 37 docs.FieldCommon("path", "A [dot path](/docs/configuration/field_paths) specifying the target within the document to the apply the chosen operator to.", "foo.bar", ".", "some_array.0.id"), 38 docs.FieldCommon( 39 "value", 40 "A value to use with the chosen operator (sometimes not applicable). This is a generic field that can be any type.", 41 "foo", "${!metadata:kafka_key}", false, 10, 42 map[string]interface{}{"topic": "${!metadata:kafka_topic}", "key": "${!metadata:kafka_key}"}, 43 ), 44 PartsFieldSpec, 45 }, 46 } 47 } 48 49 //------------------------------------------------------------------------------ 50 51 type rawJSONValue []byte 52 53 func (r *rawJSONValue) UnmarshalJSON(bytes []byte) error { 54 *r = append((*r)[0:0], bytes...) 55 return nil 56 } 57 58 func (r rawJSONValue) MarshalJSON() ([]byte, error) { 59 if r == nil { 60 return []byte("null"), nil 61 } 62 return r, nil 63 } 64 65 func (r *rawJSONValue) UnmarshalYAML(unmarshal func(interface{}) error) error { 66 var yamlObj interface{} 67 if err := unmarshal(&yamlObj); err != nil { 68 return err 69 } 70 71 var convertMap func(m map[interface{}]interface{}) map[string]interface{} 72 var convertArray func(a []interface{}) 73 convertMap = func(m map[interface{}]interface{}) map[string]interface{} { 74 newMap := map[string]interface{}{} 75 for k, v := range m { 76 keyStr, ok := k.(string) 77 if !ok { 78 continue 79 } 80 newVal := v 81 switch t := v.(type) { 82 case []interface{}: 83 convertArray(t) 84 case map[interface{}]interface{}: 85 newVal = convertMap(t) 86 } 87 newMap[keyStr] = newVal 88 } 89 return newMap 90 } 91 convertArray = func(a []interface{}) { 92 for i, v := range a { 93 newVal := v 94 switch t := v.(type) { 95 case []interface{}: 96 convertArray(t) 97 case map[interface{}]interface{}: 98 newVal = convertMap(t) 99 } 100 a[i] = newVal 101 } 102 } 103 switch t := yamlObj.(type) { 104 case []interface{}: 105 convertArray(t) 106 case map[interface{}]interface{}: 107 yamlObj = convertMap(t) 108 } 109 110 rawJSON, err := json.Marshal(yamlObj) 111 if err != nil { 112 return err 113 } 114 115 *r = append((*r)[0:0], rawJSON...) 116 return nil 117 } 118 119 func (r rawJSONValue) MarshalYAML() (interface{}, error) { 120 if r == nil { 121 return nil, nil 122 } 123 var val interface{} 124 if err := json.Unmarshal(r, &val); err != nil { 125 return nil, err 126 } 127 return val, nil 128 } 129 130 //------------------------------------------------------------------------------ 131 132 // JSONConfig contains configuration fields for the JSON processor. 133 type JSONConfig struct { 134 Parts []int `json:"parts" yaml:"parts"` 135 Operator string `json:"operator" yaml:"operator"` 136 Path string `json:"path" yaml:"path"` 137 Value rawJSONValue `json:"value" yaml:"value"` 138 } 139 140 // NewJSONConfig returns a JSONConfig with default values. 141 func NewJSONConfig() JSONConfig { 142 return JSONConfig{ 143 Parts: []int{}, 144 Operator: "clean", 145 Path: "", 146 Value: rawJSONValue(`""`), 147 } 148 } 149 150 //------------------------------------------------------------------------------ 151 152 type jsonOperator func(body interface{}, value json.RawMessage) (interface{}, error) 153 154 func newSetOperator(path []string) jsonOperator { 155 return func(body interface{}, value json.RawMessage) (interface{}, error) { 156 if len(path) == 0 { 157 var data interface{} 158 if value != nil { 159 if err := json.Unmarshal([]byte(value), &data); err != nil { 160 return nil, fmt.Errorf("failed to parse value: %v", err) 161 } 162 } 163 return data, nil 164 } 165 166 gPart := gabs.Wrap(body) 167 168 var data interface{} 169 if value != nil { 170 if err := json.Unmarshal([]byte(value), &data); err != nil { 171 return nil, fmt.Errorf("failed to parse value: %v", err) 172 } 173 } 174 175 gPart.Set(data, path...) 176 return gPart.Data(), nil 177 } 178 } 179 180 func newMoveOperator(srcPath, destPath []string) (jsonOperator, error) { 181 if len(srcPath) == 0 && len(destPath) == 0 { 182 return nil, errors.New("an empty source and destination path is not valid for the move operator") 183 } 184 return func(body interface{}, value json.RawMessage) (interface{}, error) { 185 var gPart *gabs.Container 186 var gSrc interface{} 187 if len(srcPath) > 0 { 188 gPart = gabs.Wrap(body) 189 gSrc = gPart.S(srcPath...).Data() 190 gPart.Delete(srcPath...) 191 } else { 192 gPart = gabs.New() 193 gSrc = body 194 } 195 if gSrc == nil { 196 return nil, fmt.Errorf("item not found at path '%v'", strings.Join(srcPath, ".")) 197 } 198 if len(destPath) == 0 { 199 return gSrc, nil 200 } 201 if _, err := gPart.Set(gSrc, destPath...); err != nil { 202 return nil, fmt.Errorf("failed to set destination path '%v': %v", strings.Join(destPath, "."), err) 203 } 204 return gPart.Data(), nil 205 }, nil 206 } 207 208 func newCopyOperator(srcPath, destPath []string) (jsonOperator, error) { 209 if len(srcPath) == 0 { 210 return nil, errors.New("an empty source path is not valid for the copy operator") 211 } 212 if len(destPath) == 0 { 213 return nil, errors.New("an empty destination path is not valid for the copy operator") 214 } 215 return func(body interface{}, value json.RawMessage) (interface{}, error) { 216 gPart := gabs.Wrap(body) 217 gSrc := gPart.S(srcPath...).Data() 218 if gSrc == nil { 219 return nil, fmt.Errorf("item not found at path '%v'", strings.Join(srcPath, ".")) 220 } 221 222 if _, err := gPart.Set(gSrc, destPath...); err != nil { 223 return nil, fmt.Errorf("failed to set destination path '%v': %v", strings.Join(destPath, "."), err) 224 } 225 return gPart.Data(), nil 226 }, nil 227 } 228 229 func newExplodeOperator(path []string) (jsonOperator, error) { 230 if len(path) == 0 { 231 return nil, errors.New("explode operator requires a target path") 232 } 233 return func(body interface{}, value json.RawMessage) (interface{}, error) { 234 target := gabs.Wrap(body).Search(path...) 235 236 switch t := target.Data().(type) { 237 case []interface{}: 238 result := make([]interface{}, len(t)) 239 for i, ele := range t { 240 exploded, err := message.CopyJSON(body) 241 if err != nil { 242 return nil, fmt.Errorf("failed to clone root object to explode: %v", err) 243 } 244 245 gExploded := gabs.Wrap(exploded) 246 gExploded.Set(ele, path...) 247 result[i] = gExploded.Data() 248 } 249 return result, nil 250 case map[string]interface{}: 251 result := make(map[string]interface{}) 252 for key, ele := range t { 253 exploded, err := message.CopyJSON(body) 254 if err != nil { 255 return nil, fmt.Errorf("failed to clone root object to explode: %v", err) 256 } 257 258 gExploded := gabs.Wrap(exploded) 259 gExploded.Set(ele, path...) 260 result[key] = gExploded.Data() 261 } 262 return result, nil 263 } 264 265 return nil, fmt.Errorf("target value was not an array or a map, found: %T", target.Data()) 266 }, nil 267 } 268 269 func foldStringArray(children []*gabs.Container, value json.RawMessage) (string, error) { 270 var delim string 271 if value != nil && len(value) > 0 { 272 if err := json.Unmarshal(value, &delim); err != nil { 273 return "", fmt.Errorf("failed to unmarshal json: %w", err) 274 } 275 } 276 var b strings.Builder 277 for i, child := range children { 278 switch t := child.Data().(type) { 279 case string: 280 if i > 0 && len(delim) > 0 { 281 b.WriteString(delim) 282 } 283 b.WriteString(t) 284 default: 285 return "", fmt.Errorf("mismatched types found in array, expected string, found: %T", t) 286 } 287 } 288 return b.String(), nil 289 } 290 291 func foldArrayArray(children []*gabs.Container) ([]interface{}, error) { 292 var b []interface{} 293 for _, child := range children { 294 switch t := child.Data().(type) { 295 case []interface{}: 296 b = append(b, t...) 297 default: 298 b = append(b, t) 299 } 300 } 301 return b, nil 302 } 303 304 func foldNumberArray(children []*gabs.Container) (float64, error) { 305 var b float64 306 for _, child := range children { 307 switch t := child.Data().(type) { 308 case int: 309 b += float64(t) 310 case int64: 311 b += float64(t) 312 case float64: 313 b += t 314 case json.Number: 315 f, err := t.Float64() 316 if err != nil { 317 i, _ := t.Int64() 318 f = float64(i) 319 } 320 b += f 321 default: 322 return 0, fmt.Errorf("mismatched types found in array, expected number, found: %T", t) 323 } 324 } 325 return b, nil 326 } 327 328 func newFlattenOperator(path []string) jsonOperator { 329 return func(body interface{}, value json.RawMessage) (interface{}, error) { 330 gPart := gabs.Wrap(body) 331 target := gPart 332 if len(path) > 0 { 333 target = gPart.Search(path...) 334 } 335 336 v, err := target.Flatten() 337 if err != nil { 338 return nil, err 339 } 340 341 gPart.Set(v, path...) 342 return gPart.Data(), nil 343 } 344 } 345 346 func newFlattenArrayOperator(path []string) jsonOperator { 347 return func(body interface{}, value json.RawMessage) (interface{}, error) { 348 gPart := gabs.Wrap(body) 349 target := gPart 350 if len(path) > 0 { 351 target = gPart.Search(path...) 352 } 353 354 if _, isArray := target.Data().([]interface{}); !isArray { 355 return nil, fmt.Errorf("non-array value found at path: %T", target.Data()) 356 } 357 358 children := target.Children() 359 if len(children) == 0 { 360 return body, nil 361 } 362 363 v, err := foldArrayArray(children) 364 if err != nil { 365 return nil, err 366 } 367 368 gPart.Set(v, path...) 369 return gPart.Data(), nil 370 } 371 } 372 373 func newFoldNumberArrayOperator(path []string) jsonOperator { 374 return func(body interface{}, value json.RawMessage) (interface{}, error) { 375 gPart := gabs.Wrap(body) 376 target := gPart 377 if len(path) > 0 { 378 target = gPart.Search(path...) 379 } 380 381 if _, isArray := target.Data().([]interface{}); !isArray { 382 return nil, fmt.Errorf("non-array value found at path: %T", target.Data()) 383 } 384 385 var v float64 386 var err error 387 388 children := target.Children() 389 if len(children) > 0 { 390 v, err = foldNumberArray(children) 391 } 392 if err != nil { 393 return nil, err 394 } 395 396 gPart.Set(v, path...) 397 return gPart.Data(), nil 398 } 399 } 400 401 func newFoldStringArrayOperator(path []string) jsonOperator { 402 return func(body interface{}, value json.RawMessage) (interface{}, error) { 403 gPart := gabs.Wrap(body) 404 target := gPart 405 if len(path) > 0 { 406 target = gPart.Search(path...) 407 } 408 409 if _, isArray := target.Data().([]interface{}); !isArray { 410 return nil, fmt.Errorf("non-array value found at path: %T", target.Data()) 411 } 412 413 var v string 414 var err error 415 416 children := target.Children() 417 if len(children) > 0 { 418 v, err = foldStringArray(children, value) 419 } 420 if err != nil { 421 return nil, err 422 } 423 424 gPart.Set(v, path...) 425 return gPart.Data(), nil 426 } 427 } 428 429 func newSelectOperator(path []string) jsonOperator { 430 return func(body interface{}, value json.RawMessage) (interface{}, error) { 431 gPart := gabs.Wrap(body) 432 target := gPart 433 if len(path) > 0 { 434 target = gPart.Search(path...) 435 } 436 437 switch t := target.Data().(type) { 438 case string: 439 return rawJSONValue(t), nil 440 case json.Number: 441 return rawJSONValue(t.String()), nil 442 } 443 444 return target.Data(), nil 445 } 446 } 447 448 func newDeleteOperator(path []string) jsonOperator { 449 return func(body interface{}, value json.RawMessage) (interface{}, error) { 450 if len(path) == 0 { 451 return nil, nil 452 } 453 454 gPart := gabs.Wrap(body) 455 if err := gPart.Delete(path...); err != nil { 456 return nil, err 457 } 458 return gPart.Data(), nil 459 } 460 } 461 462 func newCleanOperator(path []string) jsonOperator { 463 return func(body interface{}, value json.RawMessage) (interface{}, error) { 464 gRoot := gabs.Wrap(body) 465 466 var cleanValueFn func(g interface{}) interface{} 467 var cleanArrayFn func(g []interface{}) []interface{} 468 var cleanObjectFn func(g map[string]interface{}) map[string]interface{} 469 cleanValueFn = func(g interface{}) interface{} { 470 if g == nil { 471 return nil 472 } 473 switch t := g.(type) { 474 case map[string]interface{}: 475 if nv := cleanObjectFn(t); len(nv) > 0 { 476 return nv 477 } 478 return nil 479 case []interface{}: 480 if na := cleanArrayFn(t); len(na) > 0 { 481 return na 482 } 483 return nil 484 case string: 485 if len(t) > 0 { 486 return t 487 } 488 return nil 489 } 490 return g 491 } 492 cleanArrayFn = func(g []interface{}) []interface{} { 493 newArray := []interface{}{} 494 for _, v := range g { 495 if nv := cleanValueFn(v); nv != nil { 496 newArray = append(newArray, nv) 497 } 498 } 499 return newArray 500 } 501 cleanObjectFn = func(g map[string]interface{}) map[string]interface{} { 502 newObject := map[string]interface{}{} 503 for k, v := range g { 504 if nv := cleanValueFn(v); nv != nil { 505 newObject[k] = nv 506 } 507 } 508 return newObject 509 } 510 if val := cleanValueFn(gRoot.S(path...).Data()); val == nil { 511 if len(path) == 0 { 512 switch gRoot.Data().(type) { 513 case []interface{}: 514 return []interface{}{}, nil 515 case map[string]interface{}: 516 return map[string]interface{}{}, nil 517 } 518 return nil, nil 519 } 520 gRoot.Delete(path...) 521 } else { 522 gRoot.Set(val, path...) 523 } 524 525 return gRoot.Data(), nil 526 } 527 } 528 529 func newAppendOperator(path []string) jsonOperator { 530 return func(body interface{}, value json.RawMessage) (interface{}, error) { 531 gPart := gabs.Wrap(body) 532 var array []interface{} 533 534 var valueParsed interface{} 535 if value != nil { 536 if err := json.Unmarshal(value, &valueParsed); err != nil { 537 return nil, err 538 } 539 } 540 switch t := valueParsed.(type) { 541 case []interface{}: 542 array = t 543 default: 544 array = append(array, t) 545 } 546 547 if gTarget := gPart.S(path...); gTarget != nil { 548 switch t := gTarget.Data().(type) { 549 case []interface{}: 550 t = append(t, array...) 551 array = t 552 case nil: 553 array = append([]interface{}{t}, array...) 554 default: 555 array = append([]interface{}{t}, array...) 556 } 557 } 558 gPart.Set(array, path...) 559 560 return gPart.Data(), nil 561 } 562 } 563 564 func newSplitOperator(path []string) jsonOperator { 565 return func(body interface{}, value json.RawMessage) (interface{}, error) { 566 gPart := gabs.Wrap(body) 567 568 var valueParsed string 569 if value != nil { 570 if err := json.Unmarshal(value, &valueParsed); err != nil { 571 return nil, err 572 } 573 } 574 if valueParsed == "" { 575 return nil, errors.New("value field must be a non-empty string") 576 } 577 578 targetStr, ok := gPart.S(path...).Data().(string) 579 if !ok { 580 return nil, errors.New("path value must be a string") 581 } 582 583 var values []interface{} 584 for _, v := range strings.Split(targetStr, valueParsed) { 585 values = append(values, v) 586 } 587 588 gPart.Set(values, path...) 589 return gPart.Data(), nil 590 } 591 } 592 593 func getOperator(opStr string, path []string, value json.RawMessage) (jsonOperator, error) { 594 var destPath []string 595 if opStr == "move" || opStr == "copy" { 596 var destDotPath string 597 if err := json.Unmarshal(value, &destDotPath); err != nil { 598 return nil, fmt.Errorf("failed to parse destination path from value: %v", err) 599 } 600 if len(destDotPath) > 0 { 601 destPath = gabs.DotPathToSlice(destDotPath) 602 } 603 } 604 switch opStr { 605 case "set": 606 return newSetOperator(path), nil 607 case "flatten": 608 return newFlattenOperator(path), nil 609 case "flatten_array": 610 return newFlattenArrayOperator(path), nil 611 case "fold_number_array": 612 return newFoldNumberArrayOperator(path), nil 613 case "fold_string_array": 614 return newFoldStringArrayOperator(path), nil 615 case "select": 616 return newSelectOperator(path), nil 617 case "split": 618 return newSplitOperator(path), nil 619 case "copy": 620 return newCopyOperator(path, destPath) 621 case "move": 622 return newMoveOperator(path, destPath) 623 case "delete": 624 return newDeleteOperator(path), nil 625 case "append": 626 return newAppendOperator(path), nil 627 case "clean": 628 return newCleanOperator(path), nil 629 case "explode": 630 return newExplodeOperator(path) 631 } 632 return nil, fmt.Errorf("operator not recognised: %v", opStr) 633 } 634 635 //------------------------------------------------------------------------------ 636 637 // JSON is a processor that performs an operation on a JSON payload. 638 type JSON struct { 639 parts []int 640 641 value *field.Expression 642 operator jsonOperator 643 644 conf Config 645 log log.Modular 646 stats metrics.Type 647 648 mCount metrics.StatCounter 649 mErrJSONP metrics.StatCounter 650 mErrJSONS metrics.StatCounter 651 mErr metrics.StatCounter 652 mSent metrics.StatCounter 653 mBatchSent metrics.StatCounter 654 } 655 656 // NewJSON returns a JSON processor. 657 func NewJSON( 658 conf Config, mgr types.Manager, log log.Modular, stats metrics.Type, 659 ) (Type, error) { 660 value, err := interop.NewBloblangField(mgr, string(conf.JSON.Value)) 661 if err != nil { 662 return nil, fmt.Errorf("failed to parse value expression: %v", err) 663 } 664 665 j := &JSON{ 666 parts: conf.JSON.Parts, 667 conf: conf, 668 log: log, 669 stats: stats, 670 671 value: value, 672 673 mCount: stats.GetCounter("count"), 674 mErrJSONP: stats.GetCounter("error.json_parse"), 675 mErrJSONS: stats.GetCounter("error.json_set"), 676 mErr: stats.GetCounter("error"), 677 mSent: stats.GetCounter("sent"), 678 mBatchSent: stats.GetCounter("batch.sent"), 679 } 680 681 splitPath := gabs.DotPathToSlice(conf.JSON.Path) 682 if conf.JSON.Path == "" || conf.JSON.Path == "." { 683 splitPath = []string{} 684 } 685 686 if j.operator, err = getOperator(conf.JSON.Operator, splitPath, json.RawMessage(j.value.Bytes(0, message.New(nil)))); err != nil { 687 return nil, err 688 } 689 return j, nil 690 } 691 692 //------------------------------------------------------------------------------ 693 694 // ProcessMessage applies the processor to a message, either creating >0 695 // resulting messages or a response to be sent back to the message source. 696 func (p *JSON) ProcessMessage(msg types.Message) ([]types.Message, types.Response) { 697 p.mCount.Incr(1) 698 newMsg := msg.Copy() 699 700 proc := func(index int, span *tracing.Span, part types.Part) error { 701 valueBytes := p.value.BytesEscapedLegacy(index, newMsg) 702 jsonPart, err := part.JSON() 703 if err == nil { 704 jsonPart, err = message.CopyJSON(jsonPart) 705 } 706 if err != nil { 707 p.mErrJSONP.Incr(1) 708 p.mErr.Incr(1) 709 p.log.Debugf("Failed to parse part into json: %v\n", err) 710 return err 711 } 712 713 var data interface{} 714 if data, err = p.operator(jsonPart, json.RawMessage(valueBytes)); err != nil { 715 p.mErr.Incr(1) 716 p.log.Debugf("Failed to apply operator: %v\n", err) 717 return err 718 } 719 720 switch t := data.(type) { 721 case rawJSONValue: 722 newMsg.Get(index).Set([]byte(t)) 723 case []byte: 724 newMsg.Get(index).Set(t) 725 default: 726 if err = newMsg.Get(index).SetJSON(data); err != nil { 727 p.mErrJSONS.Incr(1) 728 p.mErr.Incr(1) 729 p.log.Debugf("Failed to convert json into part: %v\n", err) 730 return err 731 } 732 } 733 return nil 734 } 735 736 IteratePartsWithSpanV2(TypeJSON, p.parts, newMsg, proc) 737 738 msgs := [1]types.Message{newMsg} 739 740 p.mBatchSent.Incr(1) 741 p.mSent.Incr(int64(newMsg.Len())) 742 return msgs[:], nil 743 } 744 745 // CloseAsync shuts down the processor and stops processing requests. 746 func (p *JSON) CloseAsync() { 747 } 748 749 // WaitForClose blocks until the processor has closed down. 750 func (p *JSON) WaitForClose(timeout time.Duration) error { 751 return nil 752 } 753 754 //------------------------------------------------------------------------------