github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/parse_log.go (about) 1 package processor 2 3 import ( 4 "fmt" 5 "strconv" 6 "time" 7 8 "github.com/Jeffail/benthos/v3/internal/docs" 9 "github.com/Jeffail/benthos/v3/internal/tracing" 10 "github.com/Jeffail/benthos/v3/lib/log" 11 "github.com/Jeffail/benthos/v3/lib/metrics" 12 "github.com/Jeffail/benthos/v3/lib/types" 13 syslog "github.com/influxdata/go-syslog/v3" 14 "github.com/influxdata/go-syslog/v3/rfc3164" 15 "github.com/influxdata/go-syslog/v3/rfc5424" 16 ) 17 18 func init() { 19 Constructors[TypeParseLog] = TypeSpec{ 20 constructor: NewParseLog, 21 Categories: []Category{ 22 CategoryParsing, 23 }, 24 Summary: ` 25 Parses common log [formats](#formats) into [structured data](#codecs). This is 26 easier and often much faster than ` + "[`grok`](/docs/components/processors/grok)" + `.`, 27 FieldSpecs: docs.FieldSpecs{ 28 docs.FieldCommon("format", "A common log [format](#formats) to parse.").HasOptions( 29 "syslog_rfc5424", "syslog_rfc3164", 30 ), 31 docs.FieldCommon("codec", "Specifies the structured format to parse a log into.").HasOptions( 32 "json", 33 ), 34 docs.FieldAdvanced("best_effort", "Still returns partially parsed messages even if an error occurs."), 35 docs.FieldAdvanced("allow_rfc3339", "Also accept timestamps in rfc3339 format while parsing."+ 36 " Applicable to format `syslog_rfc3164`."), 37 docs.FieldAdvanced("default_year", "Sets the strategy used to set the year for rfc3164 timestamps."+ 38 " Applicable to format `syslog_rfc3164`. When set to `current` the current year will be set, when"+ 39 " set to an integer that value will be used. Leave this field empty to not set a default year at all."), 40 docs.FieldAdvanced("default_timezone", "Sets the strategy to decide the timezone for rfc3164 timestamps."+ 41 " Applicable to format `syslog_rfc3164`. This value should follow the [time.LoadLocation](https://golang.org/pkg/time/#LoadLocation) format."), 42 43 PartsFieldSpec, 44 }, 45 Footnotes: ` 46 ## Codecs 47 48 Currently the only supported structured data codec is ` + "`json`" + `. 49 50 ## Formats 51 52 ### ` + "`syslog_rfc5424`" + ` 53 54 Attempts to parse a log following the [Syslog rfc5424](https://tools.ietf.org/html/rfc5424) 55 spec. The resulting structured document may contain any of the following fields: 56 57 - ` + "`message`" + ` (string) 58 - ` + "`timestamp`" + ` (string, RFC3339) 59 - ` + "`facility`" + ` (int) 60 - ` + "`severity`" + ` (int) 61 - ` + "`priority`" + ` (int) 62 - ` + "`version`" + ` (int) 63 - ` + "`hostname`" + ` (string) 64 - ` + "`procid`" + ` (string) 65 - ` + "`appname`" + ` (string) 66 - ` + "`msgid`" + ` (string) 67 - ` + "`structureddata`" + ` (object) 68 69 ### ` + "`syslog_rfc3164`" + ` 70 71 Attempts to parse a log following the [Syslog rfc3164](https://tools.ietf.org/html/rfc3164) 72 spec. The resulting structured document may contain any of the following fields: 73 74 - ` + "`message`" + ` (string) 75 - ` + "`timestamp`" + ` (string, RFC3339) 76 - ` + "`facility`" + ` (int) 77 - ` + "`severity`" + ` (int) 78 - ` + "`priority`" + ` (int) 79 - ` + "`hostname`" + ` (string) 80 - ` + "`procid`" + ` (string) 81 - ` + "`appname`" + ` (string) 82 - ` + "`msgid`" + ` (string) 83 `, 84 } 85 } 86 87 //------------------------------------------------------------------------------ 88 89 // ParseLogConfig contains configuration fields for the ParseLog processor. 90 type ParseLogConfig struct { 91 Parts []int `json:"parts" yaml:"parts"` 92 Format string `json:"format" yaml:"format"` 93 Codec string `json:"codec" yaml:"codec"` 94 BestEffort bool `json:"best_effort" yaml:"best_effort"` 95 WithRFC3339 bool `json:"allow_rfc3339" yaml:"allow_rfc3339"` 96 WithYear string `json:"default_year" yaml:"default_year"` 97 WithTimezone string `json:"default_timezone" yaml:"default_timezone"` 98 } 99 100 // NewParseLogConfig returns a ParseLogConfig with default values. 101 func NewParseLogConfig() ParseLogConfig { 102 return ParseLogConfig{ 103 Parts: []int{}, 104 Format: "syslog_rfc5424", 105 Codec: "json", 106 107 BestEffort: true, 108 WithRFC3339: true, 109 WithYear: "current", 110 WithTimezone: "UTC", 111 } 112 } 113 114 //------------------------------------------------------------------------------ 115 116 type parserFormat func(body []byte) (map[string]interface{}, error) 117 118 func parserRFC5424(bestEffort bool) parserFormat { 119 var opts []syslog.MachineOption 120 if bestEffort { 121 opts = append(opts, rfc5424.WithBestEffort()) 122 } 123 p := rfc5424.NewParser(opts...) 124 125 return func(body []byte) (map[string]interface{}, error) { 126 resGen, err := p.Parse(body) 127 if err != nil { 128 return nil, err 129 } 130 res := resGen.(*rfc5424.SyslogMessage) 131 132 resMap := make(map[string]interface{}) 133 if res.Message != nil { 134 resMap["message"] = *res.Message 135 } 136 if res.Timestamp != nil { 137 resMap["timestamp"] = res.Timestamp.Format(time.RFC3339Nano) 138 } 139 if res.Facility != nil { 140 resMap["facility"] = *res.Facility 141 } 142 if res.Severity != nil { 143 resMap["severity"] = *res.Severity 144 } 145 if res.Priority != nil { 146 resMap["priority"] = *res.Priority 147 } 148 if res.Version != 0 { 149 resMap["version"] = res.Version 150 } 151 if res.Hostname != nil { 152 resMap["hostname"] = *res.Hostname 153 } 154 if res.ProcID != nil { 155 resMap["procid"] = *res.ProcID 156 } 157 if res.Appname != nil { 158 resMap["appname"] = *res.Appname 159 } 160 if res.MsgID != nil { 161 resMap["msgid"] = *res.MsgID 162 } 163 if res.StructuredData != nil { 164 resMap["structureddata"] = *res.StructuredData 165 } 166 167 return resMap, nil 168 } 169 } 170 171 func parserRFC3164(bestEffort, wrfc3339 bool, year, tz string) (parserFormat, error) { 172 var opts []syslog.MachineOption 173 if bestEffort { 174 opts = append(opts, rfc3164.WithBestEffort()) 175 } 176 if wrfc3339 { 177 opts = append(opts, rfc3164.WithRFC3339()) 178 } 179 switch year { 180 case "current": 181 opts = append(opts, rfc3164.WithYear(rfc3164.CurrentYear{})) 182 case "": 183 // do nothing 184 default: 185 iYear, err := strconv.Atoi(year) 186 if err != nil { 187 return nil, fmt.Errorf("failed to convert year %s into integer: %v", year, err) 188 } 189 opts = append(opts, rfc3164.WithYear(rfc3164.Year{YYYY: iYear})) 190 } 191 if tz != "" { 192 loc, err := time.LoadLocation(tz) 193 if err != nil { 194 return nil, fmt.Errorf("failed to lookup timezone %s - %v", loc, err) 195 } 196 opts = append(opts, rfc3164.WithTimezone(loc)) 197 } 198 199 p := rfc3164.NewParser(opts...) 200 201 return func(body []byte) (map[string]interface{}, error) { 202 resGen, err := p.Parse(body) 203 if err != nil { 204 return nil, err 205 } 206 res := resGen.(*rfc3164.SyslogMessage) 207 208 resMap := make(map[string]interface{}) 209 if res.Message != nil { 210 resMap["message"] = *res.Message 211 } 212 if res.Timestamp != nil { 213 resMap["timestamp"] = res.Timestamp.Format(time.RFC3339Nano) 214 } 215 if res.Facility != nil { 216 resMap["facility"] = *res.Facility 217 } 218 if res.Severity != nil { 219 resMap["severity"] = *res.Severity 220 } 221 if res.Priority != nil { 222 resMap["priority"] = *res.Priority 223 } 224 if res.Hostname != nil { 225 resMap["hostname"] = *res.Hostname 226 } 227 if res.ProcID != nil { 228 resMap["procid"] = *res.ProcID 229 } 230 if res.Appname != nil { 231 resMap["appname"] = *res.Appname 232 } 233 if res.MsgID != nil { 234 resMap["msgid"] = *res.MsgID 235 } 236 237 return resMap, nil 238 }, nil 239 } 240 241 func getParseFormat(parser string, bestEffort, rfc3339 bool, defYear, defTZ string) (parserFormat, error) { 242 switch parser { 243 case "syslog_rfc5424": 244 return parserRFC5424(bestEffort), nil 245 case "syslog_rfc3164": 246 return parserRFC3164(bestEffort, rfc3339, defYear, defTZ) 247 } 248 return nil, fmt.Errorf("format not recognised: %s", parser) 249 } 250 251 //------------------------------------------------------------------------------ 252 253 // ParseLog is a processor that parses properly formatted messages. 254 type ParseLog struct { 255 parts []int 256 format parserFormat 257 258 conf Config 259 log log.Modular 260 stats metrics.Type 261 262 mCount metrics.StatCounter 263 mErr metrics.StatCounter 264 mErrJSONS metrics.StatCounter 265 mSent metrics.StatCounter 266 mBatchSent metrics.StatCounter 267 } 268 269 // NewParseLog returns a ParseLog processor. 270 func NewParseLog( 271 conf Config, mgr types.Manager, log log.Modular, stats metrics.Type, 272 ) (Type, error) { 273 s := &ParseLog{ 274 parts: conf.ParseLog.Parts, 275 conf: conf, 276 log: log, 277 stats: stats, 278 279 mCount: stats.GetCounter("count"), 280 mErr: stats.GetCounter("error"), 281 mSent: stats.GetCounter("sent"), 282 mBatchSent: stats.GetCounter("batch.sent"), 283 } 284 var err error 285 if s.format, err = getParseFormat(conf.ParseLog.Format, conf.ParseLog.BestEffort, conf.ParseLog.WithRFC3339, 286 conf.ParseLog.WithYear, conf.ParseLog.WithTimezone); err != nil { 287 return nil, err 288 } 289 return s, nil 290 } 291 292 //------------------------------------------------------------------------------ 293 294 // ProcessMessage applies the processor to a message, either creating >0 295 // resulting messages or a response to be sent back to the message source. 296 func (s *ParseLog) ProcessMessage(msg types.Message) ([]types.Message, types.Response) { 297 s.mCount.Incr(1) 298 newMsg := msg.Copy() 299 300 proc := func(index int, span *tracing.Span, part types.Part) error { 301 dataMap, err := s.format(part.Get()) 302 if err != nil { 303 s.mErr.Incr(1) 304 s.log.Debugf("Failed to parse message as %s: %v\n", s.conf.ParseLog.Format, err) 305 return err 306 } 307 308 if err := newMsg.Get(index).SetJSON(dataMap); err != nil { 309 s.mErrJSONS.Incr(1) 310 s.mErr.Incr(1) 311 s.log.Debugf("Failed to convert log format result into json: %v\n", err) 312 return err 313 } 314 315 return nil 316 } 317 318 IteratePartsWithSpanV2(TypeParseLog, s.parts, newMsg, proc) 319 320 s.mBatchSent.Incr(1) 321 s.mSent.Incr(int64(newMsg.Len())) 322 return []types.Message{newMsg}, nil 323 } 324 325 // CloseAsync shuts down the processor and stops processing requests. 326 func (s *ParseLog) CloseAsync() { 327 } 328 329 // WaitForClose blocks until the processor has closed down. 330 func (s *ParseLog) WaitForClose(timeout time.Duration) error { 331 return nil 332 } 333 334 //------------------------------------------------------------------------------