github.com/honeycombio/honeytail@v1.9.0/parsers/regex/regex.go (about) 1 // Package regex consumes logs given user-defined regex format for lines 2 3 // RE2 regex syntax reference: https://github.com/google/re2/wiki/Syntax 4 // Example format for a named capture group: `(?P<name>re)` 5 6 package regex 7 8 import ( 9 "errors" 10 "fmt" 11 "regexp" 12 "strings" 13 "sync" 14 15 "github.com/sirupsen/logrus" 16 17 "github.com/honeycombio/honeytail/event" 18 "github.com/honeycombio/honeytail/httime" 19 "github.com/honeycombio/honeytail/parsers" 20 ) 21 22 type Options struct { 23 // Note: `LineRegex` and `line_regex` are named as singular so that 24 // it's less confusing to users to input them. 25 // Might be worth making this consistent across the entire repo 26 LineRegex []string `long:"line_regex" description:"Regular expression with named capture groups representing the fields you want parsed (RE2 syntax). You can enter multiple regexes to match (--regex.line_regex=\"(?P<foo>re)\" --regex.line_regex=\"(?P<bar>...)\"). Parses using the first regex to match a line, so list them in most-to-least-specific order." yaml:"line_regex,omitempty"` 27 TimeFieldName string `long:"timefield" description:"Name of the field that contains a timestamp" yaml:"timefield,omitempty"` 28 TimeFieldFormat string `long:"time_format" description:"Timestamp format to use (strftime and Golang time.Parse supported)" yaml:"time_format,omitempty"` 29 NumParsers int `hidden:"true" description:"number of regex parsers to spin up" yaml:"-"` 30 } 31 32 type Parser struct { 33 conf Options 34 lineParser parsers.LineParser 35 } 36 37 func (p *Parser) Init(options interface{}) error { 38 p.conf = *options.(*Options) 39 if len(p.conf.LineRegex) == 0 { 40 return errors.New("Must provide at least one regex for parsing log lines; use `--regex.line_regex` flag.") 41 } 42 lineParser, err := NewRegexLineParser(p.conf.LineRegex) 43 if err != nil { 44 return err 45 } 46 p.lineParser = lineParser 47 return nil 48 } 49 50 // Compile multiple log line regexes 51 func ParseLineRegexes(regexStrs []string) ([]*regexp.Regexp, error) { 52 regexes := make([]*regexp.Regexp, 0) 53 for _, regexStr := range regexStrs { 54 regex, err := ParseLineRegex(regexStr) 55 if err != nil { 56 return regexes, err 57 } 58 regexes = append(regexes, regex) 59 } 60 return regexes, nil 61 } 62 63 // Compile a regex & validate expectations for log line parsing 64 func ParseLineRegex(regexStr string) (*regexp.Regexp, error) { 65 // Regex can't be blank 66 if regexStr == "" { 67 logrus.Debug("LineRegex is blank; required field") 68 return nil, errors.New("Must provide a regex for parsing log lines; use `--regex.line_regex` flag.") 69 } 70 71 // Compile regex 72 lineRegex, err := regexp.Compile(regexStr) 73 if err != nil { 74 logrus.WithFields(logrus.Fields{ 75 "lineRegex": regexStr, 76 }).Error("Could not compile line regex") 77 return nil, err 78 } 79 80 // Require at least one named group 81 var numNamedGroups int 82 for _, groupName := range lineRegex.SubexpNames() { 83 if groupName != "" { 84 numNamedGroups++ 85 } 86 } 87 if numNamedGroups == 0 { 88 logrus.WithFields(logrus.Fields{ 89 "LineRegex": regexStr, 90 }).Error("No named capture groups") 91 return nil, errors.New(fmt.Sprintf("No named capture groups found in regex: '%s'. Must provide at least one named group with line regex. Example: `(?P<name>re)`", regexStr)) 92 } 93 94 return lineRegex, nil 95 } 96 97 /* LineParser for regexes */ 98 99 type RegexLineParser struct { 100 lineRegexes []*regexp.Regexp 101 } 102 103 // NewRegexLineParser factory 104 func NewRegexLineParser(regexStrs []string) (*RegexLineParser, error) { 105 lineRegexes, err := ParseLineRegexes(regexStrs) 106 if err != nil { 107 return nil, err 108 } 109 logrus.WithFields(logrus.Fields{ 110 "lineRegexes": lineRegexes, 111 }).Debug("Compiled line regexes") 112 return &RegexLineParser{lineRegexes}, nil 113 } 114 115 func (p *RegexLineParser) ParseLine(line string) (map[string]interface{}, error) { 116 for _, lineRegex := range p.lineRegexes { 117 parsed := make(map[string]interface{}) 118 match := lineRegex.FindAllStringSubmatch(line, -1) 119 if match == nil || len(match) == 0 { 120 logrus.WithFields(logrus.Fields{ 121 "line": line, 122 "lineRegex": lineRegex, 123 }).Debug("No matches for regex log line") 124 continue // No matches found, skip to next regex 125 } 126 127 // Map capture groups 128 var firstMatch []string = match[0] // We only care about the first full lineRegex match 129 for i, name := range lineRegex.SubexpNames() { 130 if i != 0 && i < len(firstMatch) { 131 parsed[name] = firstMatch[i] 132 } 133 } 134 logrus.WithFields(logrus.Fields{ 135 "parsed": parsed, 136 "line": line, 137 "lineRegex": lineRegex, 138 }).Debug("Regex parsing log line") 139 140 return parsed, nil 141 } 142 return make(map[string]interface{}), nil 143 } 144 145 func (p *Parser) ProcessLines(lines <-chan string, send chan<- event.Event, prefixRegex *parsers.ExtRegexp) { 146 // parse lines one by one 147 wg := sync.WaitGroup{} 148 numParsers := 1 149 if p.conf.NumParsers > 0 { 150 numParsers = p.conf.NumParsers 151 } 152 for i := 0; i < numParsers; i++ { 153 wg.Add(1) 154 go func() { 155 for line := range lines { 156 logrus.WithFields(logrus.Fields{ 157 "line": line, 158 }).Debug("Attempting to process regex log line") 159 160 // take care of any headers on the line 161 var prefixFields map[string]string 162 if prefixRegex != nil { 163 var prefix string 164 prefix, prefixFields = prefixRegex.FindStringSubmatchMap(line) 165 line = strings.TrimPrefix(line, prefix) 166 } 167 168 parsedLine, err := p.lineParser.ParseLine(line) 169 if err != nil { 170 continue 171 } 172 173 // merge the prefix fields and the parsed line contents 174 for k, v := range prefixFields { 175 parsedLine[k] = v 176 } 177 178 if len(parsedLine) == 0 { 179 logrus.WithFields(logrus.Fields{ 180 "line": line, 181 }).Debug("Skipping line; no capture groups found") 182 continue 183 } 184 185 // look for the timestamp in any of the prefix fields or regular content 186 timestamp := httime.GetTimestamp(parsedLine, p.conf.TimeFieldName, p.conf.TimeFieldFormat) 187 188 // send an event to Transmission 189 e := event.Event{ 190 Timestamp: timestamp, 191 Data: parsedLine, 192 } 193 send <- e 194 } 195 wg.Done() 196 }() 197 } 198 wg.Wait() 199 logrus.Debug("lines channel is closed, ending regex processor") 200 }