
     1  package processor
     3  import (
     4  	"fmt"
     5  	"strconv"
     6  	"time"
     8  	""
     9  	""
    10  	""
    11  	""
    12  	""
    13  	syslog ""
    14  	""
    15  	""
    16  )
    18  func init() {
    19  	Constructors[TypeParseLog] = TypeSpec{
    20  		constructor: NewParseLog,
    21  		Categories: []Category{
    22  			CategoryParsing,
    23  		},
    24  		Summary: `
    25  Parses common log [formats](#formats) into [structured data](#codecs). This is
    26  easier and often much faster than ` + "[`grok`](/docs/components/processors/grok)" + `.`,
    27  		FieldSpecs: docs.FieldSpecs{
    28  			docs.FieldCommon("format", "A common log [format](#formats) to parse.").HasOptions(
    29  				"syslog_rfc5424", "syslog_rfc3164",
    30  			),
    31  			docs.FieldCommon("codec", "Specifies the structured format to parse a log into.").HasOptions(
    32  				"json",
    33  			),
    34  			docs.FieldAdvanced("best_effort", "Still returns partially parsed messages even if an error occurs."),
    35  			docs.FieldAdvanced("allow_rfc3339", "Also accept timestamps in rfc3339 format while parsing."+
    36  				" Applicable to format `syslog_rfc3164`."),
    37  			docs.FieldAdvanced("default_year", "Sets the strategy used to set the year for rfc3164 timestamps."+
    38  				" Applicable to format `syslog_rfc3164`. When set to `current` the current year will be set, when"+
    39  				" set to an integer that value will be used. Leave this field empty to not set a default year at all."),
    40  			docs.FieldAdvanced("default_timezone", "Sets the strategy to decide the timezone for rfc3164 timestamps."+
    41  				" Applicable to format `syslog_rfc3164`. This value should follow the [time.LoadLocation]( format."),
    43  			PartsFieldSpec,
    44  		},
    45  		Footnotes: `
    46  ## Codecs
    48  Currently the only supported structured data codec is ` + "`json`" + `.
    50  ## Formats
    52  ### ` + "`syslog_rfc5424`" + `
    54  Attempts to parse a log following the [Syslog rfc5424](
    55  spec. The resulting structured document may contain any of the following fields:
    57  - ` + "`message`" + ` (string)
    58  - ` + "`timestamp`" + ` (string, RFC3339)
    59  - ` + "`facility`" + ` (int)
    60  - ` + "`severity`" + ` (int)
    61  - ` + "`priority`" + ` (int)
    62  - ` + "`version`" + ` (int)
    63  - ` + "`hostname`" + ` (string)
    64  - ` + "`procid`" + ` (string)
    65  - ` + "`appname`" + ` (string)
    66  - ` + "`msgid`" + ` (string)
    67  - ` + "`structureddata`" + ` (object)
    69  ### ` + "`syslog_rfc3164`" + `
    71  Attempts to parse a log following the [Syslog rfc3164](
    72  spec. The resulting structured document may contain any of the following fields:
    74  - ` + "`message`" + ` (string)
    75  - ` + "`timestamp`" + ` (string, RFC3339)
    76  - ` + "`facility`" + ` (int)
    77  - ` + "`severity`" + ` (int)
    78  - ` + "`priority`" + ` (int)
    79  - ` + "`hostname`" + ` (string)
    80  - ` + "`procid`" + ` (string)
    81  - ` + "`appname`" + ` (string)
    82  - ` + "`msgid`" + ` (string)
    83  `,
    84  	}
    85  }
    87  //------------------------------------------------------------------------------
    89  // ParseLogConfig contains configuration fields for the ParseLog processor.
    90  type ParseLogConfig struct {
    91  	Parts        []int  `json:"parts" yaml:"parts"`
    92  	Format       string `json:"format" yaml:"format"`
    93  	Codec        string `json:"codec" yaml:"codec"`
    94  	BestEffort   bool   `json:"best_effort" yaml:"best_effort"`
    95  	WithRFC3339  bool   `json:"allow_rfc3339" yaml:"allow_rfc3339"`
    96  	WithYear     string `json:"default_year" yaml:"default_year"`
    97  	WithTimezone string `json:"default_timezone" yaml:"default_timezone"`
    98  }
   100  // NewParseLogConfig returns a ParseLogConfig with default values.
   101  func NewParseLogConfig() ParseLogConfig {
   102  	return ParseLogConfig{
   103  		Parts:  []int{},
   104  		Format: "syslog_rfc5424",
   105  		Codec:  "json",
   107  		BestEffort:   true,
   108  		WithRFC3339:  true,
   109  		WithYear:     "current",
   110  		WithTimezone: "UTC",
   111  	}
   112  }
   114  //------------------------------------------------------------------------------
   116  type parserFormat func(body []byte) (map[string]interface{}, error)
   118  func parserRFC5424(bestEffort bool) parserFormat {
   119  	var opts []syslog.MachineOption
   120  	if bestEffort {
   121  		opts = append(opts, rfc5424.WithBestEffort())
   122  	}
   123  	p := rfc5424.NewParser(opts...)
   125  	return func(body []byte) (map[string]interface{}, error) {
   126  		resGen, err := p.Parse(body)
   127  		if err != nil {
   128  			return nil, err
   129  		}
   130  		res := resGen.(*rfc5424.SyslogMessage)
   132  		resMap := make(map[string]interface{})
   133  		if res.Message != nil {
   134  			resMap["message"] = *res.Message
   135  		}
   136  		if res.Timestamp != nil {
   137  			resMap["timestamp"] = res.Timestamp.Format(time.RFC3339Nano)
   138  		}
   139  		if res.Facility != nil {
   140  			resMap["facility"] = *res.Facility
   141  		}
   142  		if res.Severity != nil {
   143  			resMap["severity"] = *res.Severity
   144  		}
   145  		if res.Priority != nil {
   146  			resMap["priority"] = *res.Priority
   147  		}
   148  		if res.Version != 0 {
   149  			resMap["version"] = res.Version
   150  		}
   151  		if res.Hostname != nil {
   152  			resMap["hostname"] = *res.Hostname
   153  		}
   154  		if res.ProcID != nil {
   155  			resMap["procid"] = *res.ProcID
   156  		}
   157  		if res.Appname != nil {
   158  			resMap["appname"] = *res.Appname
   159  		}
   160  		if res.MsgID != nil {
   161  			resMap["msgid"] = *res.MsgID
   162  		}
   163  		if res.StructuredData != nil {
   164  			resMap["structureddata"] = *res.StructuredData
   165  		}
   167  		return resMap, nil
   168  	}
   169  }
   171  func parserRFC3164(bestEffort, wrfc3339 bool, year, tz string) (parserFormat, error) {
   172  	var opts []syslog.MachineOption
   173  	if bestEffort {
   174  		opts = append(opts, rfc3164.WithBestEffort())
   175  	}
   176  	if wrfc3339 {
   177  		opts = append(opts, rfc3164.WithRFC3339())
   178  	}
   179  	switch year {
   180  	case "current":
   181  		opts = append(opts, rfc3164.WithYear(rfc3164.CurrentYear{}))
   182  	case "":
   183  		// do nothing
   184  	default:
   185  		iYear, err := strconv.Atoi(year)
   186  		if err != nil {
   187  			return nil, fmt.Errorf("failed to convert year %s into integer:  %v", year, err)
   188  		}
   189  		opts = append(opts, rfc3164.WithYear(rfc3164.Year{YYYY: iYear}))
   190  	}
   191  	if tz != "" {
   192  		loc, err := time.LoadLocation(tz)
   193  		if err != nil {
   194  			return nil, fmt.Errorf("failed to lookup timezone %s - %v", loc, err)
   195  		}
   196  		opts = append(opts, rfc3164.WithTimezone(loc))
   197  	}
   199  	p := rfc3164.NewParser(opts...)
   201  	return func(body []byte) (map[string]interface{}, error) {
   202  		resGen, err := p.Parse(body)
   203  		if err != nil {
   204  			return nil, err
   205  		}
   206  		res := resGen.(*rfc3164.SyslogMessage)
   208  		resMap := make(map[string]interface{})
   209  		if res.Message != nil {
   210  			resMap["message"] = *res.Message
   211  		}
   212  		if res.Timestamp != nil {
   213  			resMap["timestamp"] = res.Timestamp.Format(time.RFC3339Nano)
   214  		}
   215  		if res.Facility != nil {
   216  			resMap["facility"] = *res.Facility
   217  		}
   218  		if res.Severity != nil {
   219  			resMap["severity"] = *res.Severity
   220  		}
   221  		if res.Priority != nil {
   222  			resMap["priority"] = *res.Priority
   223  		}
   224  		if res.Hostname != nil {
   225  			resMap["hostname"] = *res.Hostname
   226  		}
   227  		if res.ProcID != nil {
   228  			resMap["procid"] = *res.ProcID
   229  		}
   230  		if res.Appname != nil {
   231  			resMap["appname"] = *res.Appname
   232  		}
   233  		if res.MsgID != nil {
   234  			resMap["msgid"] = *res.MsgID
   235  		}
   237  		return resMap, nil
   238  	}, nil
   239  }
   241  func getParseFormat(parser string, bestEffort, rfc3339 bool, defYear, defTZ string) (parserFormat, error) {
   242  	switch parser {
   243  	case "syslog_rfc5424":
   244  		return parserRFC5424(bestEffort), nil
   245  	case "syslog_rfc3164":
   246  		return parserRFC3164(bestEffort, rfc3339, defYear, defTZ)
   247  	}
   248  	return nil, fmt.Errorf("format not recognised: %s", parser)
   249  }
   251  //------------------------------------------------------------------------------
   253  // ParseLog is a processor that parses properly formatted messages.
   254  type ParseLog struct {
   255  	parts  []int
   256  	format parserFormat
   258  	conf  Config
   259  	log   log.Modular
   260  	stats metrics.Type
   262  	mCount     metrics.StatCounter
   263  	mErr       metrics.StatCounter
   264  	mErrJSONS  metrics.StatCounter
   265  	mSent      metrics.StatCounter
   266  	mBatchSent metrics.StatCounter
   267  }
   269  // NewParseLog returns a ParseLog processor.
   270  func NewParseLog(
   271  	conf Config, mgr types.Manager, log log.Modular, stats metrics.Type,
   272  ) (Type, error) {
   273  	s := &ParseLog{
   274  		parts: conf.ParseLog.Parts,
   275  		conf:  conf,
   276  		log:   log,
   277  		stats: stats,
   279  		mCount:     stats.GetCounter("count"),
   280  		mErr:       stats.GetCounter("error"),
   281  		mSent:      stats.GetCounter("sent"),
   282  		mBatchSent: stats.GetCounter("batch.sent"),
   283  	}
   284  	var err error
   285  	if s.format, err = getParseFormat(conf.ParseLog.Format, conf.ParseLog.BestEffort, conf.ParseLog.WithRFC3339,
   286  		conf.ParseLog.WithYear, conf.ParseLog.WithTimezone); err != nil {
   287  		return nil, err
   288  	}
   289  	return s, nil
   290  }
   292  //------------------------------------------------------------------------------
   294  // ProcessMessage applies the processor to a message, either creating >0
   295  // resulting messages or a response to be sent back to the message source.
   296  func (s *ParseLog) ProcessMessage(msg types.Message) ([]types.Message, types.Response) {
   297  	s.mCount.Incr(1)
   298  	newMsg := msg.Copy()
   300  	proc := func(index int, span *tracing.Span, part types.Part) error {
   301  		dataMap, err := s.format(part.Get())
   302  		if err != nil {
   303  			s.mErr.Incr(1)
   304  			s.log.Debugf("Failed to parse message as %s: %v\n", s.conf.ParseLog.Format, err)
   305  			return err
   306  		}
   308  		if err := newMsg.Get(index).SetJSON(dataMap); err != nil {
   309  			s.mErrJSONS.Incr(1)
   310  			s.mErr.Incr(1)
   311  			s.log.Debugf("Failed to convert log format result into json: %v\n", err)
   312  			return err
   313  		}
   315  		return nil
   316  	}
   318  	IteratePartsWithSpanV2(TypeParseLog,, newMsg, proc)
   320  	s.mBatchSent.Incr(1)
   321  	s.mSent.Incr(int64(newMsg.Len()))
   322  	return []types.Message{newMsg}, nil
   323  }
   325  // CloseAsync shuts down the processor and stops processing requests.
   326  func (s *ParseLog) CloseAsync() {
   327  }
   329  // WaitForClose blocks until the processor has closed down.
   330  func (s *ParseLog) WaitForClose(timeout time.Duration) error {
   331  	return nil
   332  }
   334  //------------------------------------------------------------------------------