github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/parse_log.go (about)

     1  package processor
     2  
     3  import (
     4  	"fmt"
     5  	"strconv"
     6  	"time"
     7  
     8  	"github.com/Jeffail/benthos/v3/internal/docs"
     9  	"github.com/Jeffail/benthos/v3/internal/tracing"
    10  	"github.com/Jeffail/benthos/v3/lib/log"
    11  	"github.com/Jeffail/benthos/v3/lib/metrics"
    12  	"github.com/Jeffail/benthos/v3/lib/types"
    13  	syslog "github.com/influxdata/go-syslog/v3"
    14  	"github.com/influxdata/go-syslog/v3/rfc3164"
    15  	"github.com/influxdata/go-syslog/v3/rfc5424"
    16  )
    17  
    18  func init() {
    19  	Constructors[TypeParseLog] = TypeSpec{
    20  		constructor: NewParseLog,
    21  		Categories: []Category{
    22  			CategoryParsing,
    23  		},
    24  		Summary: `
    25  Parses common log [formats](#formats) into [structured data](#codecs). This is
    26  easier and often much faster than ` + "[`grok`](/docs/components/processors/grok)" + `.`,
    27  		FieldSpecs: docs.FieldSpecs{
    28  			docs.FieldCommon("format", "A common log [format](#formats) to parse.").HasOptions(
    29  				"syslog_rfc5424", "syslog_rfc3164",
    30  			),
    31  			docs.FieldCommon("codec", "Specifies the structured format to parse a log into.").HasOptions(
    32  				"json",
    33  			),
    34  			docs.FieldAdvanced("best_effort", "Still returns partially parsed messages even if an error occurs."),
    35  			docs.FieldAdvanced("allow_rfc3339", "Also accept timestamps in rfc3339 format while parsing."+
    36  				" Applicable to format `syslog_rfc3164`."),
    37  			docs.FieldAdvanced("default_year", "Sets the strategy used to set the year for rfc3164 timestamps."+
    38  				" Applicable to format `syslog_rfc3164`. When set to `current` the current year will be set, when"+
    39  				" set to an integer that value will be used. Leave this field empty to not set a default year at all."),
    40  			docs.FieldAdvanced("default_timezone", "Sets the strategy to decide the timezone for rfc3164 timestamps."+
    41  				" Applicable to format `syslog_rfc3164`. This value should follow the [time.LoadLocation](https://golang.org/pkg/time/#LoadLocation) format."),
    42  
    43  			PartsFieldSpec,
    44  		},
    45  		Footnotes: `
    46  ## Codecs
    47  
    48  Currently the only supported structured data codec is ` + "`json`" + `.
    49  
    50  ## Formats
    51  
    52  ### ` + "`syslog_rfc5424`" + `
    53  
    54  Attempts to parse a log following the [Syslog rfc5424](https://tools.ietf.org/html/rfc5424)
    55  spec. The resulting structured document may contain any of the following fields:
    56  
    57  - ` + "`message`" + ` (string)
    58  - ` + "`timestamp`" + ` (string, RFC3339)
    59  - ` + "`facility`" + ` (int)
    60  - ` + "`severity`" + ` (int)
    61  - ` + "`priority`" + ` (int)
    62  - ` + "`version`" + ` (int)
    63  - ` + "`hostname`" + ` (string)
    64  - ` + "`procid`" + ` (string)
    65  - ` + "`appname`" + ` (string)
    66  - ` + "`msgid`" + ` (string)
    67  - ` + "`structureddata`" + ` (object)
    68  
    69  ### ` + "`syslog_rfc3164`" + `
    70  
    71  Attempts to parse a log following the [Syslog rfc3164](https://tools.ietf.org/html/rfc3164)
    72  spec. The resulting structured document may contain any of the following fields:
    73  
    74  - ` + "`message`" + ` (string)
    75  - ` + "`timestamp`" + ` (string, RFC3339)
    76  - ` + "`facility`" + ` (int)
    77  - ` + "`severity`" + ` (int)
    78  - ` + "`priority`" + ` (int)
    79  - ` + "`hostname`" + ` (string)
    80  - ` + "`procid`" + ` (string)
    81  - ` + "`appname`" + ` (string)
    82  - ` + "`msgid`" + ` (string)
    83  `,
    84  	}
    85  }
    86  
    87  //------------------------------------------------------------------------------
    88  
    89  // ParseLogConfig contains configuration fields for the ParseLog processor.
    90  type ParseLogConfig struct {
    91  	Parts        []int  `json:"parts" yaml:"parts"`
    92  	Format       string `json:"format" yaml:"format"`
    93  	Codec        string `json:"codec" yaml:"codec"`
    94  	BestEffort   bool   `json:"best_effort" yaml:"best_effort"`
    95  	WithRFC3339  bool   `json:"allow_rfc3339" yaml:"allow_rfc3339"`
    96  	WithYear     string `json:"default_year" yaml:"default_year"`
    97  	WithTimezone string `json:"default_timezone" yaml:"default_timezone"`
    98  }
    99  
   100  // NewParseLogConfig returns a ParseLogConfig with default values.
   101  func NewParseLogConfig() ParseLogConfig {
   102  	return ParseLogConfig{
   103  		Parts:  []int{},
   104  		Format: "syslog_rfc5424",
   105  		Codec:  "json",
   106  
   107  		BestEffort:   true,
   108  		WithRFC3339:  true,
   109  		WithYear:     "current",
   110  		WithTimezone: "UTC",
   111  	}
   112  }
   113  
   114  //------------------------------------------------------------------------------
   115  
   116  type parserFormat func(body []byte) (map[string]interface{}, error)
   117  
   118  func parserRFC5424(bestEffort bool) parserFormat {
   119  	var opts []syslog.MachineOption
   120  	if bestEffort {
   121  		opts = append(opts, rfc5424.WithBestEffort())
   122  	}
   123  	p := rfc5424.NewParser(opts...)
   124  
   125  	return func(body []byte) (map[string]interface{}, error) {
   126  		resGen, err := p.Parse(body)
   127  		if err != nil {
   128  			return nil, err
   129  		}
   130  		res := resGen.(*rfc5424.SyslogMessage)
   131  
   132  		resMap := make(map[string]interface{})
   133  		if res.Message != nil {
   134  			resMap["message"] = *res.Message
   135  		}
   136  		if res.Timestamp != nil {
   137  			resMap["timestamp"] = res.Timestamp.Format(time.RFC3339Nano)
   138  		}
   139  		if res.Facility != nil {
   140  			resMap["facility"] = *res.Facility
   141  		}
   142  		if res.Severity != nil {
   143  			resMap["severity"] = *res.Severity
   144  		}
   145  		if res.Priority != nil {
   146  			resMap["priority"] = *res.Priority
   147  		}
   148  		if res.Version != 0 {
   149  			resMap["version"] = res.Version
   150  		}
   151  		if res.Hostname != nil {
   152  			resMap["hostname"] = *res.Hostname
   153  		}
   154  		if res.ProcID != nil {
   155  			resMap["procid"] = *res.ProcID
   156  		}
   157  		if res.Appname != nil {
   158  			resMap["appname"] = *res.Appname
   159  		}
   160  		if res.MsgID != nil {
   161  			resMap["msgid"] = *res.MsgID
   162  		}
   163  		if res.StructuredData != nil {
   164  			resMap["structureddata"] = *res.StructuredData
   165  		}
   166  
   167  		return resMap, nil
   168  	}
   169  }
   170  
   171  func parserRFC3164(bestEffort, wrfc3339 bool, year, tz string) (parserFormat, error) {
   172  	var opts []syslog.MachineOption
   173  	if bestEffort {
   174  		opts = append(opts, rfc3164.WithBestEffort())
   175  	}
   176  	if wrfc3339 {
   177  		opts = append(opts, rfc3164.WithRFC3339())
   178  	}
   179  	switch year {
   180  	case "current":
   181  		opts = append(opts, rfc3164.WithYear(rfc3164.CurrentYear{}))
   182  	case "":
   183  		// do nothing
   184  	default:
   185  		iYear, err := strconv.Atoi(year)
   186  		if err != nil {
   187  			return nil, fmt.Errorf("failed to convert year %s into integer:  %v", year, err)
   188  		}
   189  		opts = append(opts, rfc3164.WithYear(rfc3164.Year{YYYY: iYear}))
   190  	}
   191  	if tz != "" {
   192  		loc, err := time.LoadLocation(tz)
   193  		if err != nil {
   194  			return nil, fmt.Errorf("failed to lookup timezone %s - %v", loc, err)
   195  		}
   196  		opts = append(opts, rfc3164.WithTimezone(loc))
   197  	}
   198  
   199  	p := rfc3164.NewParser(opts...)
   200  
   201  	return func(body []byte) (map[string]interface{}, error) {
   202  		resGen, err := p.Parse(body)
   203  		if err != nil {
   204  			return nil, err
   205  		}
   206  		res := resGen.(*rfc3164.SyslogMessage)
   207  
   208  		resMap := make(map[string]interface{})
   209  		if res.Message != nil {
   210  			resMap["message"] = *res.Message
   211  		}
   212  		if res.Timestamp != nil {
   213  			resMap["timestamp"] = res.Timestamp.Format(time.RFC3339Nano)
   214  		}
   215  		if res.Facility != nil {
   216  			resMap["facility"] = *res.Facility
   217  		}
   218  		if res.Severity != nil {
   219  			resMap["severity"] = *res.Severity
   220  		}
   221  		if res.Priority != nil {
   222  			resMap["priority"] = *res.Priority
   223  		}
   224  		if res.Hostname != nil {
   225  			resMap["hostname"] = *res.Hostname
   226  		}
   227  		if res.ProcID != nil {
   228  			resMap["procid"] = *res.ProcID
   229  		}
   230  		if res.Appname != nil {
   231  			resMap["appname"] = *res.Appname
   232  		}
   233  		if res.MsgID != nil {
   234  			resMap["msgid"] = *res.MsgID
   235  		}
   236  
   237  		return resMap, nil
   238  	}, nil
   239  }
   240  
   241  func getParseFormat(parser string, bestEffort, rfc3339 bool, defYear, defTZ string) (parserFormat, error) {
   242  	switch parser {
   243  	case "syslog_rfc5424":
   244  		return parserRFC5424(bestEffort), nil
   245  	case "syslog_rfc3164":
   246  		return parserRFC3164(bestEffort, rfc3339, defYear, defTZ)
   247  	}
   248  	return nil, fmt.Errorf("format not recognised: %s", parser)
   249  }
   250  
   251  //------------------------------------------------------------------------------
   252  
   253  // ParseLog is a processor that parses properly formatted messages.
   254  type ParseLog struct {
   255  	parts  []int
   256  	format parserFormat
   257  
   258  	conf  Config
   259  	log   log.Modular
   260  	stats metrics.Type
   261  
   262  	mCount     metrics.StatCounter
   263  	mErr       metrics.StatCounter
   264  	mErrJSONS  metrics.StatCounter
   265  	mSent      metrics.StatCounter
   266  	mBatchSent metrics.StatCounter
   267  }
   268  
   269  // NewParseLog returns a ParseLog processor.
   270  func NewParseLog(
   271  	conf Config, mgr types.Manager, log log.Modular, stats metrics.Type,
   272  ) (Type, error) {
   273  	s := &ParseLog{
   274  		parts: conf.ParseLog.Parts,
   275  		conf:  conf,
   276  		log:   log,
   277  		stats: stats,
   278  
   279  		mCount:     stats.GetCounter("count"),
   280  		mErr:       stats.GetCounter("error"),
   281  		mSent:      stats.GetCounter("sent"),
   282  		mBatchSent: stats.GetCounter("batch.sent"),
   283  	}
   284  	var err error
   285  	if s.format, err = getParseFormat(conf.ParseLog.Format, conf.ParseLog.BestEffort, conf.ParseLog.WithRFC3339,
   286  		conf.ParseLog.WithYear, conf.ParseLog.WithTimezone); err != nil {
   287  		return nil, err
   288  	}
   289  	return s, nil
   290  }
   291  
   292  //------------------------------------------------------------------------------
   293  
   294  // ProcessMessage applies the processor to a message, either creating >0
   295  // resulting messages or a response to be sent back to the message source.
   296  func (s *ParseLog) ProcessMessage(msg types.Message) ([]types.Message, types.Response) {
   297  	s.mCount.Incr(1)
   298  	newMsg := msg.Copy()
   299  
   300  	proc := func(index int, span *tracing.Span, part types.Part) error {
   301  		dataMap, err := s.format(part.Get())
   302  		if err != nil {
   303  			s.mErr.Incr(1)
   304  			s.log.Debugf("Failed to parse message as %s: %v\n", s.conf.ParseLog.Format, err)
   305  			return err
   306  		}
   307  
   308  		if err := newMsg.Get(index).SetJSON(dataMap); err != nil {
   309  			s.mErrJSONS.Incr(1)
   310  			s.mErr.Incr(1)
   311  			s.log.Debugf("Failed to convert log format result into json: %v\n", err)
   312  			return err
   313  		}
   314  
   315  		return nil
   316  	}
   317  
   318  	IteratePartsWithSpanV2(TypeParseLog, s.parts, newMsg, proc)
   319  
   320  	s.mBatchSent.Incr(1)
   321  	s.mSent.Incr(int64(newMsg.Len()))
   322  	return []types.Message{newMsg}, nil
   323  }
   324  
   325  // CloseAsync shuts down the processor and stops processing requests.
   326  func (s *ParseLog) CloseAsync() {
   327  }
   328  
   329  // WaitForClose blocks until the processor has closed down.
   330  func (s *ParseLog) WaitForClose(timeout time.Duration) error {
   331  	return nil
   332  }
   333  
   334  //------------------------------------------------------------------------------