github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/awk.go (about) 1 package processor 2 3 import ( 4 "bytes" 5 "encoding/json" 6 "errors" 7 "fmt" 8 "io" 9 "regexp" 10 "sync" 11 "time" 12 13 "github.com/Jeffail/benthos/v3/internal/docs" 14 "github.com/Jeffail/benthos/v3/internal/tracing" 15 "github.com/Jeffail/benthos/v3/lib/log" 16 "github.com/Jeffail/benthos/v3/lib/message" 17 "github.com/Jeffail/benthos/v3/lib/metrics" 18 "github.com/Jeffail/benthos/v3/lib/types" 19 "github.com/Jeffail/gabs/v2" 20 "github.com/benhoyt/goawk/interp" 21 "github.com/benhoyt/goawk/parser" 22 ) 23 24 //------------------------------------------------------------------------------ 25 26 var varInvalidRegexp *regexp.Regexp 27 28 func init() { 29 varInvalidRegexp = regexp.MustCompile(`[^a-zA-Z0-9_]`) 30 31 Constructors[TypeAWK] = TypeSpec{ 32 constructor: NewAWK, 33 Categories: []Category{ 34 CategoryMapping, 35 }, 36 Summary: ` 37 Executes an AWK program on messages. This processor is very powerful as it 38 offers a range of [custom functions](#awk-functions) for querying and mutating 39 message contents and metadata.`, 40 Description: ` 41 Works by feeding message contents as the program input based on a chosen 42 [codec](#codecs) and replaces the contents of each message with the result. If 43 the result is empty (nothing is printed by the program) then the original 44 message contents remain unchanged. 45 46 Comes with a wide range of [custom functions](#awk-functions) for accessing 47 message metadata, json fields, printing logs, etc. These functions can be 48 overridden by functions within the program. 49 50 Check out the [examples section](#examples) in order to see how this processor 51 can be used. 52 53 This processor uses [GoAWK][goawk], in order to understand the differences 54 in how the program works you can [read more about it here][goawk.differences].`, 55 Footnotes: ` 56 ## Codecs 57 58 The chosen codec determines how the contents of the message are fed into the 59 program. Codecs only impact the input string and variables initialised for your 60 program, they do not change the range of custom functions available. 61 62 ### ` + "`none`" + ` 63 64 An empty string is fed into the program. Functions can still be used in order to 65 extract and mutate metadata and message contents. 66 67 This is useful for when your program only uses functions and doesn't need the 68 full text of the message to be parsed by the program, as it is significantly 69 faster. 70 71 ### ` + "`text`" + ` 72 73 The full contents of the message are fed into the program as a string, allowing 74 you to reference tokenised segments of the message with variables ($0, $1, etc). 75 Custom functions can still be used with this codec. 76 77 This is the default codec as it behaves most similar to typical usage of the awk 78 command line tool. 79 80 ### ` + "`json`" + ` 81 82 An empty string is fed into the program, and variables are automatically 83 initialised before execution of your program by walking the flattened JSON 84 structure. Each value is converted into a variable by taking its full path, 85 e.g. the object: 86 87 ` + "``` json" + ` 88 { 89 "foo": { 90 "bar": { 91 "value": 10 92 }, 93 "created_at": "2018-12-18T11:57:32" 94 } 95 } 96 ` + "```" + ` 97 98 Would result in the following variable declarations: 99 100 ` + "```" + ` 101 foo_bar_value = 10 102 foo_created_at = "2018-12-18T11:57:32" 103 ` + "```" + ` 104 105 Custom functions can also still be used with this codec. 106 107 ## AWK Functions 108 109 ` + "### `json_get`" + ` 110 111 Signature: ` + "`json_get(path)`" + ` 112 113 Attempts to find a JSON value in the input message payload by a 114 [dot separated path](/docs/configuration/field_paths) and returns it as a string. 115 116 ` + "### `json_set`" + ` 117 118 Signature: ` + "`json_set(path, value)`" + ` 119 120 Attempts to set a JSON value in the input message payload identified by a 121 [dot separated path](/docs/configuration/field_paths), the value argument will be interpreted 122 as a string. 123 124 In order to set non-string values use one of the following typed varieties: 125 126 ` + "- `json_set_int(path, value)`" + ` 127 ` + "- `json_set_float(path, value)`" + ` 128 ` + "- `json_set_bool(path, value)`" + ` 129 130 ` + "### `json_append`" + ` 131 132 Signature: ` + "`json_append(path, value)`" + ` 133 134 Attempts to append a value to an array identified by a 135 [dot separated path](/docs/configuration/field_paths). If the target does not 136 exist it will be created. If the target exists but is not already an array then 137 it will be converted into one, with its original contents set to the first 138 element of the array. 139 140 The value argument will be interpreted as a string. In order to append 141 non-string values use one of the following typed varieties: 142 143 ` + "- `json_append_int(path, value)`" + ` 144 ` + "- `json_append_float(path, value)`" + ` 145 ` + "- `json_append_bool(path, value)`" + ` 146 147 ` + "### `json_delete`" + ` 148 149 Signature: ` + "`json_delete(path)`" + ` 150 151 Attempts to delete a JSON field from the input message payload identified by a 152 [dot separated path](/docs/configuration/field_paths). 153 154 ` + "### `json_length`" + ` 155 156 Signature: ` + "`json_length(path)`" + ` 157 158 Returns the size of the string or array value of JSON field from the input 159 message payload identified by a [dot separated path](/docs/configuration/field_paths). 160 161 If the target field does not exist, or is not a string or array type, then zero 162 is returned. In order to explicitly check the type of a field use ` + "`json_type`" + `. 163 164 ` + "### `json_type`" + ` 165 166 Signature: ` + "`json_type(path)`" + ` 167 168 Returns the type of a JSON field from the input message payload identified by a 169 [dot separated path](/docs/configuration/field_paths). 170 171 Possible values are: "string", "int", "float", "bool", "undefined", "null", 172 "array", "object". 173 174 ` + "### `create_json_object`" + ` 175 176 Signature: ` + "`create_json_object(key1, val1, key2, val2, ...)`" + ` 177 178 Generates a valid JSON object of key value pair arguments. The arguments are 179 variadic, meaning any number of pairs can be listed. The value will always 180 resolve to a string regardless of the value type. E.g. the following call: 181 182 ` + "`create_json_object(\"a\", \"1\", \"b\", 2, \"c\", \"3\")`" + ` 183 184 Would result in this string: 185 186 ` + "`{\"a\":\"1\",\"b\":\"2\",\"c\":\"3\"}`" + ` 187 188 ` + "### `create_json_array`" + ` 189 190 Signature: ` + "`create_json_array(val1, val2, ...)`" + ` 191 192 Generates a valid JSON array of value arguments. The arguments are variadic, 193 meaning any number of values can be listed. The value will always resolve to a 194 string regardless of the value type. E.g. the following call: 195 196 ` + "`create_json_array(\"1\", 2, \"3\")`" + ` 197 198 Would result in this string: 199 200 ` + "`[\"1\",\"2\",\"3\"]`" + ` 201 202 ` + "### `metadata_set`" + ` 203 204 Signature: ` + "`metadata_set(key, value)`" + ` 205 206 Set a metadata key for the message to a value. The value will always resolve to 207 a string regardless of the value type. 208 209 ` + "### `metadata_get`" + ` 210 211 Signature: ` + "`metadata_get(key) string`" + ` 212 213 Get the value of a metadata key from the message. 214 215 ` + "### `timestamp_unix`" + ` 216 217 Signature: ` + "`timestamp_unix() int`" + ` 218 219 Returns the current unix timestamp (the number of seconds since 01-01-1970). 220 221 ` + "### `timestamp_unix`" + ` 222 223 Signature: ` + "`timestamp_unix(date) int`" + ` 224 225 Attempts to parse a date string by detecting its format and returns the 226 equivalent unix timestamp (the number of seconds since 01-01-1970). 227 228 ` + "### `timestamp_unix`" + ` 229 230 Signature: ` + "`timestamp_unix(date, format) int`" + ` 231 232 Attempts to parse a date string according to a format and returns the equivalent 233 unix timestamp (the number of seconds since 01-01-1970). 234 235 The format is defined by showing how the reference time, defined to be 236 ` + "`Mon Jan 2 15:04:05 -0700 MST 2006`" + ` would be displayed if it were the value. 237 238 ` + "### `timestamp_unix_nano`" + ` 239 240 Signature: ` + "`timestamp_unix_nano() int`" + ` 241 242 Returns the current unix timestamp in nanoseconds (the number of nanoseconds 243 since 01-01-1970). 244 245 ` + "### `timestamp_unix_nano`" + ` 246 247 Signature: ` + "`timestamp_unix_nano(date) int`" + ` 248 249 Attempts to parse a date string by detecting its format and returns the 250 equivalent unix timestamp in nanoseconds (the number of nanoseconds since 251 01-01-1970). 252 253 ` + "### `timestamp_unix_nano`" + ` 254 255 Signature: ` + "`timestamp_unix_nano(date, format) int`" + ` 256 257 Attempts to parse a date string according to a format and returns the equivalent 258 unix timestamp in nanoseconds (the number of nanoseconds since 01-01-1970). 259 260 The format is defined by showing how the reference time, defined to be 261 ` + "`Mon Jan 2 15:04:05 -0700 MST 2006`" + ` would be displayed if it were the value. 262 263 ` + "### `timestamp_format`" + ` 264 265 Signature: ` + "`timestamp_format(unix, format) string`" + ` 266 267 Formats a unix timestamp. The format is defined by showing how the reference 268 time, defined to be ` + "`Mon Jan 2 15:04:05 -0700 MST 2006`" + ` would be displayed if it 269 were the value. 270 271 The format is optional, and if omitted RFC3339 (` + "`2006-01-02T15:04:05Z07:00`" + `) 272 will be used. 273 274 ` + "### `timestamp_format_nano`" + ` 275 276 Signature: ` + "`timestamp_format_nano(unixNano, format) string`" + ` 277 278 Formats a unix timestamp in nanoseconds. The format is defined by showing how 279 the reference time, defined to be ` + "`Mon Jan 2 15:04:05 -0700 MST 2006`" + ` would be 280 displayed if it were the value. 281 282 The format is optional, and if omitted RFC3339 (` + "`2006-01-02T15:04:05Z07:00`" + `) 283 will be used. 284 285 ` + "### `print_log`" + ` 286 287 Signature: ` + "`print_log(message, level)`" + ` 288 289 Prints a Benthos log message at a particular log level. The log level is 290 optional, and if omitted the level ` + "`INFO`" + ` will be used. 291 292 [goawk]: https://github.com/benhoyt/goawk 293 [goawk.differences]: https://github.com/benhoyt/goawk#differences-from-awk`, 294 FieldSpecs: docs.FieldSpecs{ 295 docs.FieldCommon("codec", "A [codec](#codecs) defines how messages should be inserted into the AWK program as variables. The codec does not change which [custom Benthos functions](#awk-functions) are available. The `text` codec is the closest to a typical AWK use case.").HasOptions("none", "text", "json"), 296 docs.FieldCommon("program", "An AWK program to execute"), 297 PartsFieldSpec, 298 }, 299 Examples: []docs.AnnotatedExample{ 300 { 301 Title: "JSON Mapping and Arithmetic", 302 Summary: ` 303 Because AWK is a full programming language it's much easier to map documents and 304 perform arithmetic with it than with other Benthos processors. For example, if 305 we were expecting documents of the form: 306 307 ` + "```json" + ` 308 {"doc":{"val1":5,"val2":10},"id":"1","type":"add"} 309 {"doc":{"val1":5,"val2":10},"id":"2","type":"multiply"} 310 ` + "```" + ` 311 312 And we wished to perform the arithmetic specified in the ` + "`type`" + ` field, 313 on the values ` + "`val1` and `val2`" + ` and, finally, map the result into the 314 document, giving us the following resulting documents: 315 316 ` + "```json" + ` 317 {"doc":{"result":15,"val1":5,"val2":10},"id":"1","type":"add"} 318 {"doc":{"result":50,"val1":5,"val2":10},"id":"2","type":"multiply"} 319 ` + "```" + ` 320 321 We can do that with the following:`, 322 Config: ` 323 pipeline: 324 processors: 325 - awk: 326 program: | 327 function map_add_vals() { 328 json_set_int("doc.result", json_get("doc.val1") + json_get("doc.val2")); 329 } 330 function map_multiply_vals() { 331 json_set_int("doc.result", json_get("doc.val1") * json_get("doc.val2")); 332 } 333 function map_unknown(type) { 334 json_set("error","unknown document type"); 335 print_log("Document type not recognised: " type, "ERROR"); 336 } 337 { 338 type = json_get("type"); 339 if (type == "add") 340 map_add_vals(); 341 else if (type == "multiply") 342 map_multiply_vals(); 343 else 344 map_unknown(type); 345 } 346 `, 347 }, 348 { 349 Title: "Stuff With Arrays", 350 Summary: ` 351 It's possible to iterate JSON arrays by appending an index value to the path, 352 this can be used to do things like removing duplicates from arrays. For example, 353 given the following input document: 354 355 ` + "```json" + ` 356 {"path":{"to":{"foos":["one","two","three","two","four"]}}} 357 ` + "```" + ` 358 359 We could create a new array ` + "`foos_unique` from `foos`" + ` giving us the result: 360 361 ` + "```json" + ` 362 {"path":{"to":{"foos":["one","two","three","two","four"],"foos_unique":["one","two","three","four"]}}} 363 ` + "```" + ` 364 365 With the following config:`, 366 Config: ` 367 pipeline: 368 processors: 369 - awk: 370 program: | 371 { 372 array_path = "path.to.foos" 373 array_len = json_length(array_path) 374 375 for (i = 0; i < array_len; i++) { 376 ele = json_get(array_path "." i) 377 if ( ! ( ele in seen ) ) { 378 json_append(array_path "_unique", ele) 379 seen[ele] = 1 380 } 381 } 382 } 383 `, 384 }, 385 }, 386 } 387 } 388 389 //------------------------------------------------------------------------------ 390 391 // AWKConfig contains configuration fields for the AWK processor. 392 type AWKConfig struct { 393 Parts []int `json:"parts" yaml:"parts"` 394 Codec string `json:"codec" yaml:"codec"` 395 Program string `json:"program" yaml:"program"` 396 } 397 398 // NewAWKConfig returns a AWKConfig with default values. 399 func NewAWKConfig() AWKConfig { 400 return AWKConfig{ 401 Parts: []int{}, 402 Codec: "text", 403 Program: "BEGIN { x = 0 } { print $0, x; x++ }", 404 } 405 } 406 407 //------------------------------------------------------------------------------ 408 409 // AWK is a processor that executes AWK programs on a message part and replaces 410 // the contents with the result. 411 type AWK struct { 412 parts []int 413 program *parser.Program 414 415 conf AWKConfig 416 log log.Modular 417 stats metrics.Type 418 mut sync.Mutex 419 420 functions map[string]interface{} 421 422 mCount metrics.StatCounter 423 mErr metrics.StatCounter 424 mSent metrics.StatCounter 425 mBatchSent metrics.StatCounter 426 } 427 428 // NewAWK returns a AWK processor. 429 func NewAWK( 430 conf Config, mgr types.Manager, log log.Modular, stats metrics.Type, 431 ) (Type, error) { 432 program, err := parser.ParseProgram([]byte(conf.AWK.Program), &parser.ParserConfig{ 433 Funcs: awkFunctionsMap, 434 }) 435 if err != nil { 436 return nil, fmt.Errorf("failed to compile AWK program: %v", err) 437 } 438 switch conf.AWK.Codec { 439 case "none": 440 case "text": 441 case "json": 442 default: 443 return nil, fmt.Errorf("unrecognised codec: %v", conf.AWK.Codec) 444 } 445 functionOverrides := make(map[string]interface{}, len(awkFunctionsMap)) 446 for k, v := range awkFunctionsMap { 447 functionOverrides[k] = v 448 } 449 functionOverrides["print_log"] = func(value, level string) { 450 switch level { 451 default: 452 fallthrough 453 case "", "INFO": 454 log.Infoln(value) 455 case "TRACE": 456 log.Traceln(value) 457 case "DEBUG": 458 log.Debugln(value) 459 case "WARN": 460 log.Warnln(value) 461 case "ERROR": 462 log.Errorln(value) 463 case "FATAL": 464 log.Fatalln(value) 465 } 466 } 467 a := &AWK{ 468 parts: conf.AWK.Parts, 469 program: program, 470 conf: conf.AWK, 471 log: log, 472 stats: stats, 473 474 functions: functionOverrides, 475 476 mCount: stats.GetCounter("count"), 477 mErr: stats.GetCounter("error"), 478 mSent: stats.GetCounter("sent"), 479 mBatchSent: stats.GetCounter("batch.sent"), 480 } 481 return a, nil 482 } 483 484 //------------------------------------------------------------------------------ 485 486 func getTime(dateStr, format string) (time.Time, error) { 487 if dateStr == "" { 488 return time.Now(), nil 489 } 490 if format == "" { 491 var err error 492 var parsed time.Time 493 for _, layout := range []string{ 494 time.RubyDate, 495 time.RFC1123Z, 496 time.RFC1123, 497 time.RFC3339, 498 time.RFC822, 499 time.RFC822Z, 500 "Mon, 2 Jan 2006 15:04:05 -0700", 501 "2006-01-02T15:04:05MST", 502 "2006-01-02T15:04:05", 503 "2006-01-02 15:04:05", 504 "2006-01-02T15:04:05Z0700", 505 "2006-01-02", 506 } { 507 if parsed, err = time.Parse(layout, dateStr); err == nil { 508 break 509 } 510 } 511 if err != nil { 512 return time.Time{}, fmt.Errorf("failed to detect datetime format of: %v", dateStr) 513 } 514 return parsed, nil 515 } 516 return time.Parse(format, dateStr) 517 } 518 519 var awkFunctionsMap = map[string]interface{}{ 520 "timestamp_unix": func(dateStr string, format string) (int64, error) { 521 ts, err := getTime(dateStr, format) 522 if err != nil { 523 return 0, err 524 } 525 return ts.Unix(), nil 526 }, 527 "timestamp_unix_nano": func(dateStr string, format string) (int64, error) { 528 ts, err := getTime(dateStr, format) 529 if err != nil { 530 return 0, err 531 } 532 return ts.UnixNano(), nil 533 }, 534 "timestamp_format": func(unix int64, formatArg string) string { 535 format := time.RFC3339 536 if len(formatArg) > 0 { 537 format = formatArg 538 } 539 t := time.Unix(unix, 0).In(time.UTC) 540 return t.Format(format) 541 }, 542 "timestamp_format_nano": func(unixNano int64, formatArg string) string { 543 format := time.RFC3339 544 if len(formatArg) > 0 { 545 format = formatArg 546 } 547 s := unixNano / 1000000000 548 ns := unixNano - (s * 1000000000) 549 t := time.Unix(s, ns).In(time.UTC) 550 return t.Format(format) 551 }, 552 "metadata_get": func(key string) string { 553 // Do nothing, this is a placeholder for compilation. 554 return "" 555 }, 556 "metadata_set": func(key, value string) { 557 // Do nothing, this is a placeholder for compilation. 558 }, 559 "json_get": func(path string) (string, error) { 560 // Do nothing, this is a placeholder for compilation. 561 return "", errors.New("not implemented") 562 }, 563 "json_set": func(path, value string) (int, error) { 564 // Do nothing, this is a placeholder for compilation. 565 return 0, errors.New("not implemented") 566 }, 567 "json_set_int": func(path string, value int) (int, error) { 568 // Do nothing, this is a placeholder for compilation. 569 return 0, errors.New("not implemented") 570 }, 571 "json_set_float": func(path string, value float64) (int, error) { 572 // Do nothing, this is a placeholder for compilation. 573 return 0, errors.New("not implemented") 574 }, 575 "json_set_bool": func(path string, value bool) (int, error) { 576 // Do nothing, this is a placeholder for compilation. 577 return 0, errors.New("not implemented") 578 }, 579 "json_append": func(path, value string) (int, error) { 580 // Do nothing, this is a placeholder for compilation. 581 return 0, errors.New("not implemented") 582 }, 583 "json_append_int": func(path string, value int) (int, error) { 584 // Do nothing, this is a placeholder for compilation. 585 return 0, errors.New("not implemented") 586 }, 587 "json_append_float": func(path string, value float64) (int, error) { 588 // Do nothing, this is a placeholder for compilation. 589 return 0, errors.New("not implemented") 590 }, 591 "json_append_bool": func(path string, value bool) (int, error) { 592 // Do nothing, this is a placeholder for compilation. 593 return 0, errors.New("not implemented") 594 }, 595 "json_delete": func(path string) (int, error) { 596 // Do nothing, this is a placeholder for compilation. 597 return 0, errors.New("not implemented") 598 }, 599 "json_length": func(path string) (int, error) { 600 // Do nothing, this is a placeholder for compilation. 601 return 0, errors.New("not implemented") 602 }, 603 "json_type": func(path string) (string, error) { 604 // Do nothing, this is a placeholder for compilation. 605 return "", errors.New("not implemented") 606 }, 607 "create_json_object": func(vals ...string) string { 608 pairs := map[string]string{} 609 for i := 0; i < len(vals)-1; i += 2 { 610 pairs[vals[i]] = vals[i+1] 611 } 612 bytes, _ := json.Marshal(pairs) 613 if len(bytes) == 0 { 614 return "{}" 615 } 616 return string(bytes) 617 }, 618 "create_json_array": func(vals ...string) string { 619 bytes, _ := json.Marshal(vals) 620 if len(bytes) == 0 { 621 return "[]" 622 } 623 return string(bytes) 624 }, 625 "print_log": func(value, level string) { 626 // Do nothing, this is a placeholder for compilation. 627 }, 628 } 629 630 //------------------------------------------------------------------------------ 631 632 func flattenForAWK(path string, data interface{}) map[string]string { 633 m := map[string]string{} 634 635 switch t := data.(type) { 636 case map[string]interface{}: 637 for k, v := range t { 638 newPath := k 639 if len(path) > 0 { 640 newPath = path + "." + k 641 } 642 for k2, v2 := range flattenForAWK(newPath, v) { 643 m[k2] = v2 644 } 645 } 646 case []interface{}: 647 for _, ele := range t { 648 for k, v := range flattenForAWK(path, ele) { 649 m[k] = v 650 } 651 } 652 default: 653 m[path] = fmt.Sprintf("%v", t) 654 } 655 656 return m 657 } 658 659 //------------------------------------------------------------------------------ 660 661 // ProcessMessage applies the processor to a message, either creating >0 662 // resulting messages or a response to be sent back to the message source. 663 func (a *AWK) ProcessMessage(msg types.Message) ([]types.Message, types.Response) { 664 a.mCount.Incr(1) 665 newMsg := msg.Copy() 666 mutableJSONParts := make([]interface{}, newMsg.Len()) 667 668 a.mut.Lock() 669 customFuncs := make(map[string]interface{}, len(a.functions)) 670 for k, v := range a.functions { 671 customFuncs[k] = v 672 } 673 a.mut.Unlock() 674 675 proc := func(i int, span *tracing.Span, part types.Part) error { 676 var outBuf, errBuf bytes.Buffer 677 678 // Function overrides 679 customFuncs["metadata_get"] = func(k string) string { 680 return part.Metadata().Get(k) 681 } 682 customFuncs["metadata_set"] = func(k, v string) { 683 part.Metadata().Set(k, v) 684 } 685 customFuncs["json_get"] = func(path string) (string, error) { 686 jsonPart, err := part.JSON() 687 if err != nil { 688 return "", fmt.Errorf("failed to parse message into json: %v", err) 689 } 690 gPart := gabs.Wrap(jsonPart) 691 gTarget := gPart.Path(path) 692 if gTarget.Data() == nil { 693 return "null", nil 694 } 695 if str, isString := gTarget.Data().(string); isString { 696 return str, nil 697 } 698 return gTarget.String(), nil 699 } 700 getJSON := func() (*gabs.Container, error) { 701 var err error 702 jsonPart := mutableJSONParts[i] 703 if jsonPart == nil { 704 if jsonPart, err = part.JSON(); err == nil { 705 jsonPart, err = message.CopyJSON(jsonPart) 706 } 707 if err == nil { 708 mutableJSONParts[i] = jsonPart 709 } 710 } 711 if err != nil { 712 return nil, fmt.Errorf("failed to parse message into json: %v", err) 713 } 714 gPart := gabs.Wrap(jsonPart) 715 return gPart, nil 716 } 717 setJSON := func(path string, v interface{}) (int, error) { 718 gPart, err := getJSON() 719 if err != nil { 720 return 0, err 721 } 722 gPart.SetP(v, path) 723 part.SetJSON(gPart.Data()) 724 return 0, nil 725 } 726 customFuncs["json_set"] = func(path, v string) (int, error) { 727 return setJSON(path, v) 728 } 729 customFuncs["json_set_int"] = func(path string, v int) (int, error) { 730 return setJSON(path, v) 731 } 732 customFuncs["json_set_float"] = func(path string, v float64) (int, error) { 733 return setJSON(path, v) 734 } 735 customFuncs["json_set_bool"] = func(path string, v bool) (int, error) { 736 return setJSON(path, v) 737 } 738 arrayAppendJSON := func(path string, v interface{}) (int, error) { 739 gPart, err := getJSON() 740 if err != nil { 741 return 0, err 742 } 743 gPart.ArrayAppendP(v, path) 744 part.SetJSON(gPart.Data()) 745 return 0, nil 746 } 747 customFuncs["json_append"] = func(path, v string) (int, error) { 748 return arrayAppendJSON(path, v) 749 } 750 customFuncs["json_append_int"] = func(path string, v int) (int, error) { 751 return arrayAppendJSON(path, v) 752 } 753 customFuncs["json_append_float"] = func(path string, v float64) (int, error) { 754 return arrayAppendJSON(path, v) 755 } 756 customFuncs["json_append_bool"] = func(path string, v bool) (int, error) { 757 return arrayAppendJSON(path, v) 758 } 759 customFuncs["json_delete"] = func(path string) (int, error) { 760 gObj, err := getJSON() 761 if err != nil { 762 return 0, err 763 } 764 gObj.DeleteP(path) 765 part.SetJSON(gObj.Data()) 766 return 0, nil 767 } 768 customFuncs["json_length"] = func(path string) (int, error) { 769 gObj, err := getJSON() 770 if err != nil { 771 return 0, err 772 } 773 switch t := gObj.Path(path).Data().(type) { 774 case string: 775 return len(t), nil 776 case []interface{}: 777 return len(t), nil 778 } 779 return 0, nil 780 } 781 customFuncs["json_type"] = func(path string) (string, error) { 782 gObj, err := getJSON() 783 if err != nil { 784 return "", err 785 } 786 if !gObj.ExistsP(path) { 787 return "undefined", nil 788 } 789 switch t := gObj.Path(path).Data().(type) { 790 case int: 791 return "int", nil 792 case float64: 793 return "float", nil 794 case json.Number: 795 return "float", nil 796 case string: 797 return "string", nil 798 case bool: 799 return "bool", nil 800 case []interface{}: 801 return "array", nil 802 case map[string]interface{}: 803 return "object", nil 804 case nil: 805 return "null", nil 806 default: 807 return "", fmt.Errorf("type not recognised: %T", t) 808 } 809 } 810 811 config := &interp.Config{ 812 Output: &outBuf, 813 Error: &errBuf, 814 Funcs: customFuncs, 815 } 816 817 if a.conf.Codec == "json" { 818 jsonPart, err := part.JSON() 819 if err != nil { 820 a.mErr.Incr(1) 821 a.log.Errorf("Failed to parse part into json: %v\n", err) 822 return err 823 } 824 825 for k, v := range flattenForAWK("", jsonPart) { 826 config.Vars = append(config.Vars, varInvalidRegexp.ReplaceAllString(k, "_"), v) 827 } 828 config.Stdin = bytes.NewReader([]byte(" ")) 829 } else if a.conf.Codec == "text" { 830 config.Stdin = bytes.NewReader(part.Get()) 831 } else { 832 config.Stdin = bytes.NewReader([]byte(" ")) 833 } 834 835 if a.conf.Codec != "none" { 836 part.Metadata().Iter(func(k, v string) error { 837 config.Vars = append(config.Vars, varInvalidRegexp.ReplaceAllString(k, "_"), v) 838 return nil 839 }) 840 } 841 842 if exitStatus, err := interp.ExecProgram(a.program, config); err != nil { 843 a.mErr.Incr(1) 844 a.log.Errorf("Non-fatal execution error: %v\n", err) 845 return err 846 } else if exitStatus != 0 { 847 a.mErr.Incr(1) 848 err = fmt.Errorf( 849 "non-fatal execution error: awk interpreter returned non-zero exit code: %d", exitStatus, 850 ) 851 a.log.Errorf("AWK: %v\n", err) 852 return err 853 } 854 855 if errMsg, err := io.ReadAll(&errBuf); err != nil { 856 a.log.Errorf("Read err error: %v\n", err) 857 } else if len(errMsg) > 0 { 858 a.mErr.Incr(1) 859 a.log.Errorf("Execution error: %s\n", errMsg) 860 return errors.New(string(errMsg)) 861 } 862 863 resMsg, err := io.ReadAll(&outBuf) 864 if err != nil { 865 a.mErr.Incr(1) 866 a.log.Errorf("Read output error: %v\n", err) 867 return err 868 } 869 870 if len(resMsg) > 0 { 871 // Remove trailing line break 872 if resMsg[len(resMsg)-1] == '\n' { 873 resMsg = resMsg[:len(resMsg)-1] 874 } 875 part.Set(resMsg) 876 } 877 return nil 878 } 879 880 IteratePartsWithSpanV2(TypeAWK, a.parts, newMsg, proc) 881 882 msgs := [1]types.Message{newMsg} 883 884 a.mBatchSent.Incr(1) 885 a.mSent.Incr(int64(newMsg.Len())) 886 return msgs[:], nil 887 } 888 889 // CloseAsync shuts down the processor and stops processing requests. 890 func (a *AWK) CloseAsync() { 891 } 892 893 // WaitForClose blocks until the processor has closed down. 894 func (a *AWK) WaitForClose(timeout time.Duration) error { 895 return nil 896 } 897 898 //------------------------------------------------------------------------------