github.com/honeycombio/honeytail@v1.9.0/parsers/arangodb/arangodb.go (about) 1 // Package arangodb is a parser for ArangoDB logs 2 package arangodb 3 4 import ( 5 "errors" 6 "strconv" 7 "strings" 8 "sync" 9 "time" 10 11 "github.com/honeycombio/honeytail/event" 12 "github.com/honeycombio/honeytail/httime" 13 "github.com/honeycombio/honeytail/parsers" 14 "github.com/sirupsen/logrus" 15 ) 16 17 const defaultNumParsers = 20 18 19 const ( 20 iso8601UTCTimeFormat = "2006-01-02T15:04:05Z" 21 iso8601LocalTimeFormat = "2006-01-02T15:04:05" 22 23 timestampFieldName = "timestamp" 24 pidFieldName = "pid" 25 logLevelFieldName = "logLevel" 26 logTopicFieldName = "logTopic" 27 idFieldName = "id" 28 sourceIPFieldName = "sourceIP" 29 methodFieldName = "method" 30 protocolFieldName = "protocol" 31 resCodeFieldName = "responseCode" 32 reqBodyLenFieldName = "reqBodyLen" 33 resBodyLenFieldName = "resBodyLen" 34 fullURLFieldName = "fullURL" 35 totalTimeFieldName = "totalTime" 36 ) 37 38 var timestampFormats = []string{ 39 iso8601UTCTimeFormat, 40 iso8601LocalTimeFormat, 41 } 42 43 // Options type for line parser, so far there are none. 44 type Options struct { 45 numParsers int 46 } 47 48 // Parser for log lines. 49 type Parser struct { 50 conf Options 51 lineParser parsers.LineParser 52 } 53 54 // ArangoLineParser is a LineParser for ArangoDB log files. 55 type ArangoLineParser struct { 56 } 57 58 func firstWord(line *string) (word string, abort bool) { 59 var pos = strings.IndexByte(*line, ' ') 60 if pos < 0 { 61 return "", true 62 } 63 word = (*line)[:pos] 64 *line = (*line)[pos+1:] 65 abort = false 66 return 67 } 68 69 func removeBrackets(word string) string { 70 var l = len(word) 71 if l < 2 { 72 return word 73 } 74 if word[0] == '(' && word[l-1] == ')' { 75 return word[1 : l-1] 76 } 77 if word[0] == '[' && word[l-1] == ']' { 78 return word[1 : l-1] 79 } 80 if word[0] == '{' && word[l-1] == '}' { 81 return word[1 : l-1] 82 } 83 return word 84 } 85 86 func removeQuotes(word string) string { 87 if len(word) == 0 { 88 return word 89 } 90 if word[0] == '"' { 91 word = word[1:] 92 } 93 if len(word) > 0 && word[len(word)-1] == '"' { 94 word = word[:len(word)-1] 95 } 96 return word 97 } 98 99 // ParseLine method for an ArangoLineParser implementing LineParser. 100 func (m *ArangoLineParser) ParseLine(line string) (_ map[string]interface{}, err error) { 101 // Do the actual work here, we look for log lines in the log topic "requests", 102 // there are two types, one is a DEBUG line (could be switched off) containing 103 // the request body, the other is the INFO line marking the end of the 104 // request. 105 var v = make(map[string]interface{}) 106 err = errors.New("Line is not a request log line.") 107 var abort bool 108 var s string 109 110 v[timestampFieldName], abort = firstWord(&line) 111 if abort { 112 return 113 } 114 115 s, abort = firstWord(&line) 116 if abort { 117 return 118 } 119 v[pidFieldName] = removeBrackets(s) 120 121 v[logLevelFieldName], abort = firstWord(&line) 122 if abort { 123 return 124 } 125 126 s, abort = firstWord(&line) 127 if abort { 128 return 129 } 130 v[logTopicFieldName] = s 131 132 if s != "{requests}" { 133 return 134 } 135 136 var fields = strings.Split(line, ",") 137 if v[logLevelFieldName] == "DEBUG" { 138 if len(fields) != 6 { 139 return 140 } 141 v[idFieldName] = removeQuotes(fields[1]) 142 v[sourceIPFieldName] = removeQuotes(fields[2]) 143 v[methodFieldName] = removeQuotes(fields[3]) 144 v[protocolFieldName] = removeQuotes(fields[4]) 145 v[fullURLFieldName] = removeQuotes(fields[5]) 146 } else { 147 if len(fields) != 10 { 148 return 149 } 150 v[idFieldName] = removeQuotes(fields[1]) 151 v[sourceIPFieldName] = removeQuotes(fields[2]) 152 v[methodFieldName] = removeQuotes(fields[3]) 153 v[protocolFieldName] = removeQuotes(fields[4]) 154 v[resCodeFieldName], _ = strconv.ParseInt(fields[5], 10, 32) 155 v[reqBodyLenFieldName], _ = strconv.ParseInt(fields[6], 10, 64) 156 v[resBodyLenFieldName], _ = strconv.ParseInt(fields[7], 10, 64) 157 v[fullURLFieldName] = removeQuotes(fields[8]) 158 v[totalTimeFieldName], _ = strconv.ParseFloat(fields[9], 64) 159 } 160 return v, nil 161 } 162 163 // Init method for parser object. 164 func (p *Parser) Init(options interface{}) error { 165 p.conf = *options.(*Options) 166 p.lineParser = &ArangoLineParser{} 167 return nil 168 } 169 170 // ProcessLines method for Parser. 171 func (p *Parser) ProcessLines(lines <-chan string, send chan<- event.Event, prefixRegex *parsers.ExtRegexp) { 172 wg := sync.WaitGroup{} 173 numParsers := defaultNumParsers 174 if p.conf.numParsers > 0 { 175 numParsers = p.conf.numParsers 176 } 177 for i := 0; i < numParsers; i++ { 178 wg.Add(1) 179 go func() { 180 for line := range lines { 181 line = strings.TrimSpace(line) 182 // take care of any headers on the line 183 var prefixFields map[string]string 184 if prefixRegex != nil { 185 var prefix string 186 prefix, prefixFields = prefixRegex.FindStringSubmatchMap(line) 187 line = strings.TrimPrefix(line, prefix) 188 } 189 190 values, err := p.lineParser.ParseLine(line) 191 // we get a bunch of errors from the parser on ArangoDB logs, skip em 192 if err == nil { 193 timestamp, err := p.parseTimestamp(values) 194 if err != nil { 195 logSkipped(line, "couldn't parse logline timestamp, skipping") 196 continue 197 } 198 199 // merge the prefix fields and the parsed line contents 200 for k, v := range prefixFields { 201 values[k] = v 202 } 203 204 logrus.WithFields(logrus.Fields{ 205 "line": line, 206 "values": values, 207 }).Debug("Successfully parsed line") 208 209 // we'll be putting the timestamp in the Event 210 // itself, no need to also have it in the Data 211 delete(values, timestampFieldName) 212 213 send <- event.Event{ 214 Timestamp: timestamp, 215 Data: values, 216 } 217 } else { 218 logSkipped(line, "logline didn't parse, skipping.") 219 } 220 } 221 wg.Done() 222 }() 223 } 224 wg.Wait() 225 logrus.Debug("lines channel is closed, ending arangodb processor") 226 } 227 228 func (p *Parser) parseTimestamp(values map[string]interface{}) (time.Time, error) { 229 timestampValue, ok := values[timestampFieldName].(string) 230 if ok { 231 var err error 232 for _, f := range timestampFormats { 233 var timestamp time.Time 234 timestamp, err = httime.Parse(f, timestampValue) 235 if err == nil { 236 return timestamp, nil 237 } 238 } 239 return time.Time{}, err 240 } 241 242 return time.Time{}, errors.New("timestamp missing from logline") 243 } 244 245 func logSkipped(line string, msg string) { 246 logrus.WithFields(logrus.Fields{"line": line}).Debugln(msg) 247 }