github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/awk.go (about)

     1  package processor
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/json"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"regexp"
    10  	"sync"
    11  	"time"
    12  
    13  	"github.com/Jeffail/benthos/v3/internal/docs"
    14  	"github.com/Jeffail/benthos/v3/internal/tracing"
    15  	"github.com/Jeffail/benthos/v3/lib/log"
    16  	"github.com/Jeffail/benthos/v3/lib/message"
    17  	"github.com/Jeffail/benthos/v3/lib/metrics"
    18  	"github.com/Jeffail/benthos/v3/lib/types"
    19  	"github.com/Jeffail/gabs/v2"
    20  	"github.com/benhoyt/goawk/interp"
    21  	"github.com/benhoyt/goawk/parser"
    22  )
    23  
    24  //------------------------------------------------------------------------------
    25  
    26  var varInvalidRegexp *regexp.Regexp
    27  
    28  func init() {
    29  	varInvalidRegexp = regexp.MustCompile(`[^a-zA-Z0-9_]`)
    30  
    31  	Constructors[TypeAWK] = TypeSpec{
    32  		constructor: NewAWK,
    33  		Categories: []Category{
    34  			CategoryMapping,
    35  		},
    36  		Summary: `
    37  Executes an AWK program on messages. This processor is very powerful as it
    38  offers a range of [custom functions](#awk-functions) for querying and mutating
    39  message contents and metadata.`,
    40  		Description: `
    41  Works by feeding message contents as the program input based on a chosen
    42  [codec](#codecs) and replaces the contents of each message with the result. If
    43  the result is empty (nothing is printed by the program) then the original
    44  message contents remain unchanged.
    45  
    46  Comes with a wide range of [custom functions](#awk-functions) for accessing
    47  message metadata, json fields, printing logs, etc. These functions can be
    48  overridden by functions within the program.
    49  
    50  Check out the [examples section](#examples) in order to see how this processor
    51  can be used.
    52  
    53  This processor uses [GoAWK][goawk], in order to understand the differences
    54  in how the program works you can [read more about it here][goawk.differences].`,
    55  		Footnotes: `
    56  ## Codecs
    57  
    58  The chosen codec determines how the contents of the message are fed into the
    59  program. Codecs only impact the input string and variables initialised for your
    60  program, they do not change the range of custom functions available.
    61  
    62  ### ` + "`none`" + `
    63  
    64  An empty string is fed into the program. Functions can still be used in order to
    65  extract and mutate metadata and message contents.
    66  
    67  This is useful for when your program only uses functions and doesn't need the
    68  full text of the message to be parsed by the program, as it is significantly
    69  faster.
    70  
    71  ### ` + "`text`" + `
    72  
    73  The full contents of the message are fed into the program as a string, allowing
    74  you to reference tokenised segments of the message with variables ($0, $1, etc).
    75  Custom functions can still be used with this codec.
    76  
    77  This is the default codec as it behaves most similar to typical usage of the awk
    78  command line tool.
    79  
    80  ### ` + "`json`" + `
    81  
    82  An empty string is fed into the program, and variables are automatically
    83  initialised before execution of your program by walking the flattened JSON
    84  structure. Each value is converted into a variable by taking its full path,
    85  e.g. the object:
    86  
    87  ` + "``` json" + `
    88  {
    89  	"foo": {
    90  		"bar": {
    91  			"value": 10
    92  		},
    93  		"created_at": "2018-12-18T11:57:32"
    94  	}
    95  }
    96  ` + "```" + `
    97  
    98  Would result in the following variable declarations:
    99  
   100  ` + "```" + `
   101  foo_bar_value = 10
   102  foo_created_at = "2018-12-18T11:57:32"
   103  ` + "```" + `
   104  
   105  Custom functions can also still be used with this codec.
   106  
   107  ## AWK Functions
   108  
   109  ` + "### `json_get`" + `
   110  
   111  Signature: ` + "`json_get(path)`" + `
   112  
   113  Attempts to find a JSON value in the input message payload by a
   114  [dot separated path](/docs/configuration/field_paths) and returns it as a string.
   115  
   116  ` + "### `json_set`" + `
   117  
   118  Signature: ` + "`json_set(path, value)`" + `
   119  
   120  Attempts to set a JSON value in the input message payload identified by a
   121  [dot separated path](/docs/configuration/field_paths), the value argument will be interpreted
   122  as a string.
   123  
   124  In order to set non-string values use one of the following typed varieties:
   125  
   126  ` + "- `json_set_int(path, value)`" + `
   127  ` + "- `json_set_float(path, value)`" + `
   128  ` + "- `json_set_bool(path, value)`" + `
   129  
   130  ` + "### `json_append`" + `
   131  
   132  Signature: ` + "`json_append(path, value)`" + `
   133  
   134  Attempts to append a value to an array identified by a
   135  [dot separated path](/docs/configuration/field_paths). If the target does not
   136  exist it will be created. If the target exists but is not already an array then
   137  it will be converted into one, with its original contents set to the first
   138  element of the array.
   139  
   140  The value argument will be interpreted as a string. In order to append
   141  non-string values use one of the following typed varieties:
   142  
   143  ` + "- `json_append_int(path, value)`" + `
   144  ` + "- `json_append_float(path, value)`" + `
   145  ` + "- `json_append_bool(path, value)`" + `
   146  
   147  ` + "### `json_delete`" + `
   148  
   149  Signature: ` + "`json_delete(path)`" + `
   150  
   151  Attempts to delete a JSON field from the input message payload identified by a
   152  [dot separated path](/docs/configuration/field_paths).
   153  
   154  ` + "### `json_length`" + `
   155  
   156  Signature: ` + "`json_length(path)`" + `
   157  
   158  Returns the size of the string or array value of JSON field from the input
   159  message payload identified by a [dot separated path](/docs/configuration/field_paths).
   160  
   161  If the target field does not exist, or is not a string or array type, then zero
   162  is returned. In order to explicitly check the type of a field use ` + "`json_type`" + `.
   163  
   164  ` + "### `json_type`" + `
   165  
   166  Signature: ` + "`json_type(path)`" + `
   167  
   168  Returns the type of a JSON field from the input message payload identified by a
   169  [dot separated path](/docs/configuration/field_paths).
   170  
   171  Possible values are: "string", "int", "float", "bool", "undefined", "null",
   172  "array", "object".
   173  
   174  ` + "### `create_json_object`" + `
   175  
   176  Signature: ` + "`create_json_object(key1, val1, key2, val2, ...)`" + `
   177  
   178  Generates a valid JSON object of key value pair arguments. The arguments are
   179  variadic, meaning any number of pairs can be listed. The value will always
   180  resolve to a string regardless of the value type. E.g. the following call:
   181  
   182  ` + "`create_json_object(\"a\", \"1\", \"b\", 2, \"c\", \"3\")`" + `
   183  
   184  Would result in this string:
   185  
   186  ` + "`{\"a\":\"1\",\"b\":\"2\",\"c\":\"3\"}`" + `
   187  
   188  ` + "### `create_json_array`" + `
   189  
   190  Signature: ` + "`create_json_array(val1, val2, ...)`" + `
   191  
   192  Generates a valid JSON array of value arguments. The arguments are variadic,
   193  meaning any number of values can be listed. The value will always resolve to a
   194  string regardless of the value type. E.g. the following call:
   195  
   196  ` + "`create_json_array(\"1\", 2, \"3\")`" + `
   197  
   198  Would result in this string:
   199  
   200  ` + "`[\"1\",\"2\",\"3\"]`" + `
   201  
   202  ` + "### `metadata_set`" + `
   203  
   204  Signature: ` + "`metadata_set(key, value)`" + `
   205  
   206  Set a metadata key for the message to a value. The value will always resolve to
   207  a string regardless of the value type.
   208  
   209  ` + "### `metadata_get`" + `
   210  
   211  Signature: ` + "`metadata_get(key) string`" + `
   212  
   213  Get the value of a metadata key from the message.
   214  
   215  ` + "### `timestamp_unix`" + `
   216  
   217  Signature: ` + "`timestamp_unix() int`" + `
   218  
   219  Returns the current unix timestamp (the number of seconds since 01-01-1970).
   220  
   221  ` + "### `timestamp_unix`" + `
   222  
   223  Signature: ` + "`timestamp_unix(date) int`" + `
   224  
   225  Attempts to parse a date string by detecting its format and returns the
   226  equivalent unix timestamp (the number of seconds since 01-01-1970).
   227  
   228  ` + "### `timestamp_unix`" + `
   229  
   230  Signature: ` + "`timestamp_unix(date, format) int`" + `
   231  
   232  Attempts to parse a date string according to a format and returns the equivalent
   233  unix timestamp (the number of seconds since 01-01-1970).
   234  
   235  The format is defined by showing how the reference time, defined to be
   236  ` + "`Mon Jan 2 15:04:05 -0700 MST 2006`" + ` would be displayed if it were the value.
   237  
   238  ` + "### `timestamp_unix_nano`" + `
   239  
   240  Signature: ` + "`timestamp_unix_nano() int`" + `
   241  
   242  Returns the current unix timestamp in nanoseconds (the number of nanoseconds
   243  since 01-01-1970).
   244  
   245  ` + "### `timestamp_unix_nano`" + `
   246  
   247  Signature: ` + "`timestamp_unix_nano(date) int`" + `
   248  
   249  Attempts to parse a date string by detecting its format and returns the
   250  equivalent unix timestamp in nanoseconds (the number of nanoseconds since
   251  01-01-1970).
   252  
   253  ` + "### `timestamp_unix_nano`" + `
   254  
   255  Signature: ` + "`timestamp_unix_nano(date, format) int`" + `
   256  
   257  Attempts to parse a date string according to a format and returns the equivalent
   258  unix timestamp in nanoseconds (the number of nanoseconds since 01-01-1970).
   259  
   260  The format is defined by showing how the reference time, defined to be
   261  ` + "`Mon Jan 2 15:04:05 -0700 MST 2006`" + ` would be displayed if it were the value.
   262  
   263  ` + "### `timestamp_format`" + `
   264  
   265  Signature: ` + "`timestamp_format(unix, format) string`" + `
   266  
   267  Formats a unix timestamp. The format is defined by showing how the reference
   268  time, defined to be ` + "`Mon Jan 2 15:04:05 -0700 MST 2006`" + ` would be displayed if it
   269  were the value.
   270  
   271  The format is optional, and if omitted RFC3339 (` + "`2006-01-02T15:04:05Z07:00`" + `)
   272  will be used.
   273  
   274  ` + "### `timestamp_format_nano`" + `
   275  
   276  Signature: ` + "`timestamp_format_nano(unixNano, format) string`" + `
   277  
   278  Formats a unix timestamp in nanoseconds. The format is defined by showing how
   279  the reference time, defined to be ` + "`Mon Jan 2 15:04:05 -0700 MST 2006`" + ` would be
   280  displayed if it were the value.
   281  
   282  The format is optional, and if omitted RFC3339 (` + "`2006-01-02T15:04:05Z07:00`" + `)
   283  will be used.
   284  
   285  ` + "### `print_log`" + `
   286  
   287  Signature: ` + "`print_log(message, level)`" + `
   288  
   289  Prints a Benthos log message at a particular log level. The log level is
   290  optional, and if omitted the level ` + "`INFO`" + ` will be used.
   291  
   292  [goawk]: https://github.com/benhoyt/goawk
   293  [goawk.differences]: https://github.com/benhoyt/goawk#differences-from-awk`,
   294  		FieldSpecs: docs.FieldSpecs{
   295  			docs.FieldCommon("codec", "A [codec](#codecs) defines how messages should be inserted into the AWK program as variables. The codec does not change which [custom Benthos functions](#awk-functions) are available. The `text` codec is the closest to a typical AWK use case.").HasOptions("none", "text", "json"),
   296  			docs.FieldCommon("program", "An AWK program to execute"),
   297  			PartsFieldSpec,
   298  		},
   299  		Examples: []docs.AnnotatedExample{
   300  			{
   301  				Title: "JSON Mapping and Arithmetic",
   302  				Summary: `
   303  Because AWK is a full programming language it's much easier to map documents and
   304  perform arithmetic with it than with other Benthos processors. For example, if
   305  we were expecting documents of the form:
   306  
   307  ` + "```json" + `
   308  {"doc":{"val1":5,"val2":10},"id":"1","type":"add"}
   309  {"doc":{"val1":5,"val2":10},"id":"2","type":"multiply"}
   310  ` + "```" + `
   311  
   312  And we wished to perform the arithmetic specified in the ` + "`type`" + ` field,
   313  on the values ` + "`val1` and `val2`" + ` and, finally, map the result into the
   314  document, giving us the following resulting documents:
   315  
   316  ` + "```json" + `
   317  {"doc":{"result":15,"val1":5,"val2":10},"id":"1","type":"add"}
   318  {"doc":{"result":50,"val1":5,"val2":10},"id":"2","type":"multiply"}
   319  ` + "```" + `
   320  
   321  We can do that with the following:`,
   322  				Config: `
   323  pipeline:
   324    processors:
   325    - awk:
   326        program: |
   327          function map_add_vals() {
   328            json_set_int("doc.result", json_get("doc.val1") + json_get("doc.val2"));
   329          }
   330          function map_multiply_vals() {
   331            json_set_int("doc.result", json_get("doc.val1") * json_get("doc.val2"));
   332          }
   333          function map_unknown(type) {
   334            json_set("error","unknown document type");
   335            print_log("Document type not recognised: " type, "ERROR");
   336          }
   337          {
   338            type = json_get("type");
   339            if (type == "add")
   340              map_add_vals();
   341            else if (type == "multiply")
   342              map_multiply_vals();
   343            else
   344              map_unknown(type);
   345          }
   346  `,
   347  			},
   348  			{
   349  				Title: "Stuff With Arrays",
   350  				Summary: `
   351  It's possible to iterate JSON arrays by appending an index value to the path,
   352  this can be used to do things like removing duplicates from arrays. For example,
   353  given the following input document:
   354  
   355  ` + "```json" + `
   356  {"path":{"to":{"foos":["one","two","three","two","four"]}}}
   357  ` + "```" + `
   358  
   359  We could create a new array ` + "`foos_unique` from `foos`" + ` giving us the result:
   360  
   361  ` + "```json" + `
   362  {"path":{"to":{"foos":["one","two","three","two","four"],"foos_unique":["one","two","three","four"]}}}
   363  ` + "```" + `
   364  
   365  With the following config:`,
   366  				Config: `
   367  pipeline:
   368    processors:
   369    - awk:
   370        program: |
   371          {
   372            array_path = "path.to.foos"
   373            array_len = json_length(array_path)
   374  
   375            for (i = 0; i < array_len; i++) {
   376              ele = json_get(array_path "." i)
   377              if ( ! ( ele in seen ) ) {
   378                json_append(array_path "_unique", ele)
   379                seen[ele] = 1
   380              }
   381            }
   382          }
   383  `,
   384  			},
   385  		},
   386  	}
   387  }
   388  
   389  //------------------------------------------------------------------------------
   390  
   391  // AWKConfig contains configuration fields for the AWK processor.
   392  type AWKConfig struct {
   393  	Parts   []int  `json:"parts" yaml:"parts"`
   394  	Codec   string `json:"codec" yaml:"codec"`
   395  	Program string `json:"program" yaml:"program"`
   396  }
   397  
   398  // NewAWKConfig returns a AWKConfig with default values.
   399  func NewAWKConfig() AWKConfig {
   400  	return AWKConfig{
   401  		Parts:   []int{},
   402  		Codec:   "text",
   403  		Program: "BEGIN { x = 0 } { print $0, x; x++ }",
   404  	}
   405  }
   406  
   407  //------------------------------------------------------------------------------
   408  
   409  // AWK is a processor that executes AWK programs on a message part and replaces
   410  // the contents with the result.
   411  type AWK struct {
   412  	parts   []int
   413  	program *parser.Program
   414  
   415  	conf  AWKConfig
   416  	log   log.Modular
   417  	stats metrics.Type
   418  	mut   sync.Mutex
   419  
   420  	functions map[string]interface{}
   421  
   422  	mCount     metrics.StatCounter
   423  	mErr       metrics.StatCounter
   424  	mSent      metrics.StatCounter
   425  	mBatchSent metrics.StatCounter
   426  }
   427  
   428  // NewAWK returns a AWK processor.
   429  func NewAWK(
   430  	conf Config, mgr types.Manager, log log.Modular, stats metrics.Type,
   431  ) (Type, error) {
   432  	program, err := parser.ParseProgram([]byte(conf.AWK.Program), &parser.ParserConfig{
   433  		Funcs: awkFunctionsMap,
   434  	})
   435  	if err != nil {
   436  		return nil, fmt.Errorf("failed to compile AWK program: %v", err)
   437  	}
   438  	switch conf.AWK.Codec {
   439  	case "none":
   440  	case "text":
   441  	case "json":
   442  	default:
   443  		return nil, fmt.Errorf("unrecognised codec: %v", conf.AWK.Codec)
   444  	}
   445  	functionOverrides := make(map[string]interface{}, len(awkFunctionsMap))
   446  	for k, v := range awkFunctionsMap {
   447  		functionOverrides[k] = v
   448  	}
   449  	functionOverrides["print_log"] = func(value, level string) {
   450  		switch level {
   451  		default:
   452  			fallthrough
   453  		case "", "INFO":
   454  			log.Infoln(value)
   455  		case "TRACE":
   456  			log.Traceln(value)
   457  		case "DEBUG":
   458  			log.Debugln(value)
   459  		case "WARN":
   460  			log.Warnln(value)
   461  		case "ERROR":
   462  			log.Errorln(value)
   463  		case "FATAL":
   464  			log.Fatalln(value)
   465  		}
   466  	}
   467  	a := &AWK{
   468  		parts:   conf.AWK.Parts,
   469  		program: program,
   470  		conf:    conf.AWK,
   471  		log:     log,
   472  		stats:   stats,
   473  
   474  		functions: functionOverrides,
   475  
   476  		mCount:     stats.GetCounter("count"),
   477  		mErr:       stats.GetCounter("error"),
   478  		mSent:      stats.GetCounter("sent"),
   479  		mBatchSent: stats.GetCounter("batch.sent"),
   480  	}
   481  	return a, nil
   482  }
   483  
   484  //------------------------------------------------------------------------------
   485  
   486  func getTime(dateStr, format string) (time.Time, error) {
   487  	if dateStr == "" {
   488  		return time.Now(), nil
   489  	}
   490  	if format == "" {
   491  		var err error
   492  		var parsed time.Time
   493  		for _, layout := range []string{
   494  			time.RubyDate,
   495  			time.RFC1123Z,
   496  			time.RFC1123,
   497  			time.RFC3339,
   498  			time.RFC822,
   499  			time.RFC822Z,
   500  			"Mon, 2 Jan 2006 15:04:05 -0700",
   501  			"2006-01-02T15:04:05MST",
   502  			"2006-01-02T15:04:05",
   503  			"2006-01-02 15:04:05",
   504  			"2006-01-02T15:04:05Z0700",
   505  			"2006-01-02",
   506  		} {
   507  			if parsed, err = time.Parse(layout, dateStr); err == nil {
   508  				break
   509  			}
   510  		}
   511  		if err != nil {
   512  			return time.Time{}, fmt.Errorf("failed to detect datetime format of: %v", dateStr)
   513  		}
   514  		return parsed, nil
   515  	}
   516  	return time.Parse(format, dateStr)
   517  }
   518  
   519  var awkFunctionsMap = map[string]interface{}{
   520  	"timestamp_unix": func(dateStr string, format string) (int64, error) {
   521  		ts, err := getTime(dateStr, format)
   522  		if err != nil {
   523  			return 0, err
   524  		}
   525  		return ts.Unix(), nil
   526  	},
   527  	"timestamp_unix_nano": func(dateStr string, format string) (int64, error) {
   528  		ts, err := getTime(dateStr, format)
   529  		if err != nil {
   530  			return 0, err
   531  		}
   532  		return ts.UnixNano(), nil
   533  	},
   534  	"timestamp_format": func(unix int64, formatArg string) string {
   535  		format := time.RFC3339
   536  		if len(formatArg) > 0 {
   537  			format = formatArg
   538  		}
   539  		t := time.Unix(unix, 0).In(time.UTC)
   540  		return t.Format(format)
   541  	},
   542  	"timestamp_format_nano": func(unixNano int64, formatArg string) string {
   543  		format := time.RFC3339
   544  		if len(formatArg) > 0 {
   545  			format = formatArg
   546  		}
   547  		s := unixNano / 1000000000
   548  		ns := unixNano - (s * 1000000000)
   549  		t := time.Unix(s, ns).In(time.UTC)
   550  		return t.Format(format)
   551  	},
   552  	"metadata_get": func(key string) string {
   553  		// Do nothing, this is a placeholder for compilation.
   554  		return ""
   555  	},
   556  	"metadata_set": func(key, value string) {
   557  		// Do nothing, this is a placeholder for compilation.
   558  	},
   559  	"json_get": func(path string) (string, error) {
   560  		// Do nothing, this is a placeholder for compilation.
   561  		return "", errors.New("not implemented")
   562  	},
   563  	"json_set": func(path, value string) (int, error) {
   564  		// Do nothing, this is a placeholder for compilation.
   565  		return 0, errors.New("not implemented")
   566  	},
   567  	"json_set_int": func(path string, value int) (int, error) {
   568  		// Do nothing, this is a placeholder for compilation.
   569  		return 0, errors.New("not implemented")
   570  	},
   571  	"json_set_float": func(path string, value float64) (int, error) {
   572  		// Do nothing, this is a placeholder for compilation.
   573  		return 0, errors.New("not implemented")
   574  	},
   575  	"json_set_bool": func(path string, value bool) (int, error) {
   576  		// Do nothing, this is a placeholder for compilation.
   577  		return 0, errors.New("not implemented")
   578  	},
   579  	"json_append": func(path, value string) (int, error) {
   580  		// Do nothing, this is a placeholder for compilation.
   581  		return 0, errors.New("not implemented")
   582  	},
   583  	"json_append_int": func(path string, value int) (int, error) {
   584  		// Do nothing, this is a placeholder for compilation.
   585  		return 0, errors.New("not implemented")
   586  	},
   587  	"json_append_float": func(path string, value float64) (int, error) {
   588  		// Do nothing, this is a placeholder for compilation.
   589  		return 0, errors.New("not implemented")
   590  	},
   591  	"json_append_bool": func(path string, value bool) (int, error) {
   592  		// Do nothing, this is a placeholder for compilation.
   593  		return 0, errors.New("not implemented")
   594  	},
   595  	"json_delete": func(path string) (int, error) {
   596  		// Do nothing, this is a placeholder for compilation.
   597  		return 0, errors.New("not implemented")
   598  	},
   599  	"json_length": func(path string) (int, error) {
   600  		// Do nothing, this is a placeholder for compilation.
   601  		return 0, errors.New("not implemented")
   602  	},
   603  	"json_type": func(path string) (string, error) {
   604  		// Do nothing, this is a placeholder for compilation.
   605  		return "", errors.New("not implemented")
   606  	},
   607  	"create_json_object": func(vals ...string) string {
   608  		pairs := map[string]string{}
   609  		for i := 0; i < len(vals)-1; i += 2 {
   610  			pairs[vals[i]] = vals[i+1]
   611  		}
   612  		bytes, _ := json.Marshal(pairs)
   613  		if len(bytes) == 0 {
   614  			return "{}"
   615  		}
   616  		return string(bytes)
   617  	},
   618  	"create_json_array": func(vals ...string) string {
   619  		bytes, _ := json.Marshal(vals)
   620  		if len(bytes) == 0 {
   621  			return "[]"
   622  		}
   623  		return string(bytes)
   624  	},
   625  	"print_log": func(value, level string) {
   626  		// Do nothing, this is a placeholder for compilation.
   627  	},
   628  }
   629  
   630  //------------------------------------------------------------------------------
   631  
   632  func flattenForAWK(path string, data interface{}) map[string]string {
   633  	m := map[string]string{}
   634  
   635  	switch t := data.(type) {
   636  	case map[string]interface{}:
   637  		for k, v := range t {
   638  			newPath := k
   639  			if len(path) > 0 {
   640  				newPath = path + "." + k
   641  			}
   642  			for k2, v2 := range flattenForAWK(newPath, v) {
   643  				m[k2] = v2
   644  			}
   645  		}
   646  	case []interface{}:
   647  		for _, ele := range t {
   648  			for k, v := range flattenForAWK(path, ele) {
   649  				m[k] = v
   650  			}
   651  		}
   652  	default:
   653  		m[path] = fmt.Sprintf("%v", t)
   654  	}
   655  
   656  	return m
   657  }
   658  
   659  //------------------------------------------------------------------------------
   660  
   661  // ProcessMessage applies the processor to a message, either creating >0
   662  // resulting messages or a response to be sent back to the message source.
   663  func (a *AWK) ProcessMessage(msg types.Message) ([]types.Message, types.Response) {
   664  	a.mCount.Incr(1)
   665  	newMsg := msg.Copy()
   666  	mutableJSONParts := make([]interface{}, newMsg.Len())
   667  
   668  	a.mut.Lock()
   669  	customFuncs := make(map[string]interface{}, len(a.functions))
   670  	for k, v := range a.functions {
   671  		customFuncs[k] = v
   672  	}
   673  	a.mut.Unlock()
   674  
   675  	proc := func(i int, span *tracing.Span, part types.Part) error {
   676  		var outBuf, errBuf bytes.Buffer
   677  
   678  		// Function overrides
   679  		customFuncs["metadata_get"] = func(k string) string {
   680  			return part.Metadata().Get(k)
   681  		}
   682  		customFuncs["metadata_set"] = func(k, v string) {
   683  			part.Metadata().Set(k, v)
   684  		}
   685  		customFuncs["json_get"] = func(path string) (string, error) {
   686  			jsonPart, err := part.JSON()
   687  			if err != nil {
   688  				return "", fmt.Errorf("failed to parse message into json: %v", err)
   689  			}
   690  			gPart := gabs.Wrap(jsonPart)
   691  			gTarget := gPart.Path(path)
   692  			if gTarget.Data() == nil {
   693  				return "null", nil
   694  			}
   695  			if str, isString := gTarget.Data().(string); isString {
   696  				return str, nil
   697  			}
   698  			return gTarget.String(), nil
   699  		}
   700  		getJSON := func() (*gabs.Container, error) {
   701  			var err error
   702  			jsonPart := mutableJSONParts[i]
   703  			if jsonPart == nil {
   704  				if jsonPart, err = part.JSON(); err == nil {
   705  					jsonPart, err = message.CopyJSON(jsonPart)
   706  				}
   707  				if err == nil {
   708  					mutableJSONParts[i] = jsonPart
   709  				}
   710  			}
   711  			if err != nil {
   712  				return nil, fmt.Errorf("failed to parse message into json: %v", err)
   713  			}
   714  			gPart := gabs.Wrap(jsonPart)
   715  			return gPart, nil
   716  		}
   717  		setJSON := func(path string, v interface{}) (int, error) {
   718  			gPart, err := getJSON()
   719  			if err != nil {
   720  				return 0, err
   721  			}
   722  			gPart.SetP(v, path)
   723  			part.SetJSON(gPart.Data())
   724  			return 0, nil
   725  		}
   726  		customFuncs["json_set"] = func(path, v string) (int, error) {
   727  			return setJSON(path, v)
   728  		}
   729  		customFuncs["json_set_int"] = func(path string, v int) (int, error) {
   730  			return setJSON(path, v)
   731  		}
   732  		customFuncs["json_set_float"] = func(path string, v float64) (int, error) {
   733  			return setJSON(path, v)
   734  		}
   735  		customFuncs["json_set_bool"] = func(path string, v bool) (int, error) {
   736  			return setJSON(path, v)
   737  		}
   738  		arrayAppendJSON := func(path string, v interface{}) (int, error) {
   739  			gPart, err := getJSON()
   740  			if err != nil {
   741  				return 0, err
   742  			}
   743  			gPart.ArrayAppendP(v, path)
   744  			part.SetJSON(gPart.Data())
   745  			return 0, nil
   746  		}
   747  		customFuncs["json_append"] = func(path, v string) (int, error) {
   748  			return arrayAppendJSON(path, v)
   749  		}
   750  		customFuncs["json_append_int"] = func(path string, v int) (int, error) {
   751  			return arrayAppendJSON(path, v)
   752  		}
   753  		customFuncs["json_append_float"] = func(path string, v float64) (int, error) {
   754  			return arrayAppendJSON(path, v)
   755  		}
   756  		customFuncs["json_append_bool"] = func(path string, v bool) (int, error) {
   757  			return arrayAppendJSON(path, v)
   758  		}
   759  		customFuncs["json_delete"] = func(path string) (int, error) {
   760  			gObj, err := getJSON()
   761  			if err != nil {
   762  				return 0, err
   763  			}
   764  			gObj.DeleteP(path)
   765  			part.SetJSON(gObj.Data())
   766  			return 0, nil
   767  		}
   768  		customFuncs["json_length"] = func(path string) (int, error) {
   769  			gObj, err := getJSON()
   770  			if err != nil {
   771  				return 0, err
   772  			}
   773  			switch t := gObj.Path(path).Data().(type) {
   774  			case string:
   775  				return len(t), nil
   776  			case []interface{}:
   777  				return len(t), nil
   778  			}
   779  			return 0, nil
   780  		}
   781  		customFuncs["json_type"] = func(path string) (string, error) {
   782  			gObj, err := getJSON()
   783  			if err != nil {
   784  				return "", err
   785  			}
   786  			if !gObj.ExistsP(path) {
   787  				return "undefined", nil
   788  			}
   789  			switch t := gObj.Path(path).Data().(type) {
   790  			case int:
   791  				return "int", nil
   792  			case float64:
   793  				return "float", nil
   794  			case json.Number:
   795  				return "float", nil
   796  			case string:
   797  				return "string", nil
   798  			case bool:
   799  				return "bool", nil
   800  			case []interface{}:
   801  				return "array", nil
   802  			case map[string]interface{}:
   803  				return "object", nil
   804  			case nil:
   805  				return "null", nil
   806  			default:
   807  				return "", fmt.Errorf("type not recognised: %T", t)
   808  			}
   809  		}
   810  
   811  		config := &interp.Config{
   812  			Output: &outBuf,
   813  			Error:  &errBuf,
   814  			Funcs:  customFuncs,
   815  		}
   816  
   817  		if a.conf.Codec == "json" {
   818  			jsonPart, err := part.JSON()
   819  			if err != nil {
   820  				a.mErr.Incr(1)
   821  				a.log.Errorf("Failed to parse part into json: %v\n", err)
   822  				return err
   823  			}
   824  
   825  			for k, v := range flattenForAWK("", jsonPart) {
   826  				config.Vars = append(config.Vars, varInvalidRegexp.ReplaceAllString(k, "_"), v)
   827  			}
   828  			config.Stdin = bytes.NewReader([]byte(" "))
   829  		} else if a.conf.Codec == "text" {
   830  			config.Stdin = bytes.NewReader(part.Get())
   831  		} else {
   832  			config.Stdin = bytes.NewReader([]byte(" "))
   833  		}
   834  
   835  		if a.conf.Codec != "none" {
   836  			part.Metadata().Iter(func(k, v string) error {
   837  				config.Vars = append(config.Vars, varInvalidRegexp.ReplaceAllString(k, "_"), v)
   838  				return nil
   839  			})
   840  		}
   841  
   842  		if exitStatus, err := interp.ExecProgram(a.program, config); err != nil {
   843  			a.mErr.Incr(1)
   844  			a.log.Errorf("Non-fatal execution error: %v\n", err)
   845  			return err
   846  		} else if exitStatus != 0 {
   847  			a.mErr.Incr(1)
   848  			err = fmt.Errorf(
   849  				"non-fatal execution error: awk interpreter returned non-zero exit code: %d", exitStatus,
   850  			)
   851  			a.log.Errorf("AWK: %v\n", err)
   852  			return err
   853  		}
   854  
   855  		if errMsg, err := io.ReadAll(&errBuf); err != nil {
   856  			a.log.Errorf("Read err error: %v\n", err)
   857  		} else if len(errMsg) > 0 {
   858  			a.mErr.Incr(1)
   859  			a.log.Errorf("Execution error: %s\n", errMsg)
   860  			return errors.New(string(errMsg))
   861  		}
   862  
   863  		resMsg, err := io.ReadAll(&outBuf)
   864  		if err != nil {
   865  			a.mErr.Incr(1)
   866  			a.log.Errorf("Read output error: %v\n", err)
   867  			return err
   868  		}
   869  
   870  		if len(resMsg) > 0 {
   871  			// Remove trailing line break
   872  			if resMsg[len(resMsg)-1] == '\n' {
   873  				resMsg = resMsg[:len(resMsg)-1]
   874  			}
   875  			part.Set(resMsg)
   876  		}
   877  		return nil
   878  	}
   879  
   880  	IteratePartsWithSpanV2(TypeAWK, a.parts, newMsg, proc)
   881  
   882  	msgs := [1]types.Message{newMsg}
   883  
   884  	a.mBatchSent.Incr(1)
   885  	a.mSent.Incr(int64(newMsg.Len()))
   886  	return msgs[:], nil
   887  }
   888  
   889  // CloseAsync shuts down the processor and stops processing requests.
   890  func (a *AWK) CloseAsync() {
   891  }
   892  
   893  // WaitForClose blocks until the processor has closed down.
   894  func (a *AWK) WaitForClose(timeout time.Duration) error {
   895  	return nil
   896  }
   897  
   898  //------------------------------------------------------------------------------