github.com/honeycombio/honeytail@v1.9.0/parsers/mongodb/mongodb.go (about) 1 // Package mongodb is a parser for mongodb logs 2 package mongodb 3 4 import ( 5 "errors" 6 "strings" 7 "sync" 8 "time" 9 10 "github.com/sirupsen/logrus" 11 12 "github.com/honeycombio/honeytail/event" 13 "github.com/honeycombio/honeytail/httime" 14 "github.com/honeycombio/honeytail/parsers" 15 "github.com/honeycombio/honeytail/parsers/mongodb/logparser" 16 "github.com/honeycombio/honeytail/parsers/mongodb/queryshape" 17 ) 18 19 const ( 20 // https://github.com/rueckstiess/mongodb-log-spec#timestamps 21 ctimeNoMSTimeFormat = "Mon Jan _2 15:04:05" 22 ctimeTimeFormat = "Mon Jan _2 15:04:05.000" 23 iso8601UTCTimeFormat = "2006-01-02T15:04:05.000Z" 24 iso8601LocalTimeFormat = "2006-01-02T15:04:05.000-0700" 25 26 timestampFieldName = "timestamp" 27 namespaceFieldName = "namespace" 28 databaseFieldName = "database" 29 collectionFieldName = "collection" 30 locksFieldName = "locks" 31 locksMicrosFieldName = "locks(micros)" 32 33 shardingChangelogFieldName = "sharding_changelog" 34 changelogWhatFieldName = "changelog_what" 35 changelogChangeIDFieldName = "changelog_changeid" 36 changelogPrimaryFieldName = "changelog_primary" 37 changelogServerFieldName = "changelog_server" 38 changelogClientAddrFieldName = "changelog_client_addr" 39 changelogTimeFieldName = "changelog_time" 40 changelogDetailsFieldName = "changelog_details" 41 ) 42 43 var timestampFormats = []string{ 44 iso8601LocalTimeFormat, 45 iso8601UTCTimeFormat, 46 ctimeTimeFormat, 47 ctimeNoMSTimeFormat, 48 } 49 50 type Options struct { 51 LogPartials bool `long:"log_partials" description:"Send what was successfully parsed from a line (only if the error occurred in the log line's message)." yaml:"log_partials,omitempty"` 52 53 NumParsers int `hidden:"true" description:"number of mongo parsers to spin up" yaml:"-"` 54 } 55 56 type Parser struct { 57 conf Options 58 59 lock sync.RWMutex 60 currentReplicaSet string 61 } 62 63 type MongoLineParser struct { 64 } 65 66 func (m *MongoLineParser) ParseLine(line string) (map[string]interface{}, error) { 67 return logparser.ParseLogLine(line) 68 } 69 70 func (p *Parser) Init(options interface{}) error { 71 p.conf = *options.(*Options) 72 return nil 73 } 74 75 func (p *Parser) ProcessLines(lines <-chan string, send chan<- event.Event, prefixRegex *parsers.ExtRegexp) { 76 wg := sync.WaitGroup{} 77 numParsers := 1 78 if p.conf.NumParsers > 0 { 79 numParsers = p.conf.NumParsers 80 } 81 for i := 0; i < numParsers; i++ { 82 wg.Add(1) 83 go func() { 84 lineParser := &MongoLineParser{} 85 for line := range lines { 86 line = strings.TrimSpace(line) 87 // take care of any headers on the line 88 var prefixFields map[string]string 89 if prefixRegex != nil { 90 var prefix string 91 prefix, prefixFields = prefixRegex.FindStringSubmatchMap(line) 92 line = strings.TrimPrefix(line, prefix) 93 } 94 values, err := lineParser.ParseLine(line) 95 // we get a bunch of errors from the parser on mongo logs, skip em 96 if err == nil || (p.conf.LogPartials && logparser.IsPartialLogLine(err)) { 97 timestamp, err := p.parseTimestamp(values) 98 if err != nil { 99 logFailure(line, err, "couldn't parse logline timestamp, skipping") 100 continue 101 } 102 if err = p.decomposeSharding(values); err != nil { 103 logFailure(line, err, "couldn't decompose sharding changelog, skipping") 104 continue 105 } 106 if err = p.decomposeNamespace(values); err != nil { 107 logFailure(line, err, "couldn't decompose logline namespace, skipping") 108 continue 109 } 110 if err = p.decomposeLocks(values); err != nil { 111 logFailure(line, err, "couldn't decompose logline locks, skipping") 112 continue 113 } 114 if err = p.decomposeLocksMicros(values); err != nil { 115 logFailure(line, err, "couldn't decompose logline locks(micros), skipping") 116 continue 117 } 118 119 p.getCommandQuery(values) 120 121 if q, ok := values["query"].(map[string]interface{}); ok { 122 if _, ok = values["normalized_query"]; !ok { 123 // also calculate the query_shape if we can 124 values["normalized_query"] = queryshape.GetQueryShape(q) 125 } 126 } 127 128 if ns, ok := values["namespace"].(string); ok && ns == "admin.$cmd" { 129 if cmdType, ok := values["command_type"]; ok && cmdType == "replSetHeartbeat" { 130 if cmd, ok := values["command"].(map[string]interface{}); ok { 131 if replicaSet, ok := cmd["replSetHeartbeat"].(string); ok { 132 p.lock.Lock() 133 p.currentReplicaSet = replicaSet 134 p.lock.Unlock() 135 } 136 } 137 } 138 } 139 140 p.lock.RLock() 141 if p.currentReplicaSet != "" { 142 values["replica_set"] = p.currentReplicaSet 143 } 144 p.lock.RUnlock() 145 146 // merge the prefix fields and the parsed line contents 147 for k, v := range prefixFields { 148 values[k] = v 149 } 150 151 logrus.WithFields(logrus.Fields{ 152 "line": line, 153 "values": values, 154 }).Debug("Successfully parsed line") 155 156 // we'll be putting the timestamp in the Event 157 // itself, no need to also have it in the Data 158 delete(values, timestampFieldName) 159 160 send <- event.Event{ 161 Timestamp: timestamp, 162 Data: values, 163 } 164 } else { 165 logFailure(line, err, "logline didn't parse, skipping.") 166 } 167 } 168 wg.Done() 169 }() 170 } 171 wg.Wait() 172 logrus.Debug("lines channel is closed, ending mongo processor") 173 } 174 175 func (p *Parser) parseTimestamp(values map[string]interface{}) (time.Time, error) { 176 now := httime.Now() 177 timestamp_value, ok := values[timestampFieldName].(string) 178 if ok { 179 var err error 180 for _, f := range timestampFormats { 181 var timestamp time.Time 182 timestamp, err = httime.Parse(f, timestamp_value) 183 if err == nil { 184 if f == ctimeTimeFormat || f == ctimeNoMSTimeFormat { 185 // these formats lacks the year, so we check 186 // if adding Now().Year causes the date to be 187 // after today. if it's after today, we 188 // decrement year by 1. if it's not after, we 189 // use it. 190 ts := timestamp.AddDate(now.Year(), 0, 0) 191 if now.After(ts) { 192 return ts, nil 193 } 194 195 return timestamp.AddDate(now.Year()-1, 0, 0), nil 196 } 197 return timestamp, nil 198 } 199 } 200 return time.Time{}, err 201 } 202 203 return time.Time{}, errors.New("timestamp missing from logline") 204 } 205 206 func (p *Parser) decomposeSharding(values map[string]interface{}) error { 207 clValue, ok := values[shardingChangelogFieldName] 208 if !ok { 209 return nil 210 } 211 clMap, ok := clValue.(map[string]interface{}) 212 if !ok { 213 return nil 214 } 215 216 var val interface{} 217 if val, ok = clMap["ns"]; ok { 218 values[namespaceFieldName] = val 219 } 220 if val, ok = clMap["_id"]; ok { 221 values[changelogChangeIDFieldName] = val 222 } 223 if val, ok = clMap["server"]; ok { 224 values[changelogServerFieldName] = val 225 } 226 if val, ok = clMap["clientAddr"]; ok { 227 values[changelogClientAddrFieldName] = val 228 } 229 if val, ok = clMap["time"]; ok { 230 values[changelogTimeFieldName] = val 231 } 232 if val, ok = clMap["what"]; ok { 233 values[changelogWhatFieldName] = val 234 } 235 detailsMap, ok := clMap["details"].(map[string]interface{}) 236 if ok { 237 values[changelogDetailsFieldName] = detailsMap 238 values[changelogPrimaryFieldName] = detailsMap["primary"] 239 } 240 241 delete(values, shardingChangelogFieldName) 242 return nil 243 } 244 245 func (p *Parser) decomposeNamespace(values map[string]interface{}) error { 246 ns_value, ok := values[namespaceFieldName] 247 if !ok { 248 return nil 249 } 250 251 decomposed := strings.SplitN(ns_value.(string), ".", 2) 252 if len(decomposed) < 2 { 253 return nil 254 } 255 values[databaseFieldName] = decomposed[0] 256 values[collectionFieldName] = decomposed[1] 257 return nil 258 } 259 260 func (p *Parser) decomposeLocks(values map[string]interface{}) error { 261 locks_value, ok := values[locksFieldName] 262 if !ok { 263 return nil 264 } 265 locks_map, ok := locks_value.(map[string]interface{}) 266 if !ok { 267 return nil 268 } 269 for scope, v := range locks_map { 270 v_map, ok := v.(map[string]interface{}) 271 if !ok { 272 continue 273 } 274 for attrKey, attrVal := range v_map { 275 attrVal_map, ok := attrVal.(map[string]interface{}) 276 if !ok { 277 continue 278 } 279 for lockType, lockCount := range attrVal_map { 280 if lockType == "r" { 281 lockType = "read" 282 } else if lockType == "R" { 283 lockType = "Read" 284 } else if lockType == "w" { 285 lockType = "write" 286 } else if lockType == "w" { 287 lockType = "Write" 288 } 289 290 if attrKey == "acquireCount" { 291 values[strings.ToLower(scope)+"_"+lockType+"_lock"] = lockCount 292 } else if attrKey == "acquireWaitCount" { 293 values[strings.ToLower(scope)+"_"+lockType+"_lock_wait"] = lockCount 294 } else if attrKey == "timeAcquiringMicros" { 295 values[strings.ToLower(scope)+"_"+lockType+"_lock_wait_us"] = lockCount 296 } 297 } 298 } 299 } 300 delete(values, locksFieldName) 301 return nil 302 } 303 304 func (p *Parser) decomposeLocksMicros(values map[string]interface{}) error { 305 locks_value, ok := values[locksMicrosFieldName] 306 if !ok { 307 return nil 308 } 309 locks_map, ok := locks_value.(map[string]int64) 310 if !ok { 311 return nil 312 } 313 for lockType, lockDuration := range locks_map { 314 if lockType == "r" { 315 lockType = "read" 316 } else if lockType == "R" { 317 lockType = "Read" 318 } else if lockType == "w" { 319 lockType = "write" 320 } else if lockType == "w" { 321 lockType = "Write" 322 } 323 324 values[lockType+"_lock_held_us"] = lockDuration 325 } 326 delete(values, locksMicrosFieldName) 327 return nil 328 } 329 330 func (p *Parser) getCommandQuery(values map[string]interface{}) { 331 if commandType, ok := values["command_type"]; ok { 332 if cmd, ok := values["command"].(map[string]interface{}); ok { 333 switch commandType { 334 case "find": 335 q, ok := cmd["filter"].(map[string]interface{}) 336 if ok { 337 // skip the $where queries, since those are 338 // strings with embedded javascript expressions 339 if _, ok = q["$where"]; !ok { 340 values["query"] = q 341 } 342 } 343 break 344 case "findAndModify": 345 q, ok := cmd["query"] 346 if ok { 347 values["query"] = q 348 } 349 break 350 case "update": 351 // update is special in that each update log can contain multiple update statements. 352 // we build up a synthetic query that includes the entirety of the update list (with 353 // modifications so that the normalizer will include more info.) 354 updates, ok := cmd["updates"].([]interface{}) 355 if ok { 356 fakeQuery := make(map[string]interface{}) 357 var newUpdates []interface{} 358 for _, _update := range updates { 359 update, ok := _update.(map[string]interface{}) 360 if !ok { 361 continue 362 } 363 364 newU := make(map[string]interface{}) 365 if q, ok := update["q"]; ok { 366 newU["$query"] = q 367 } 368 if u, ok := update["u"]; ok { 369 newU["$update"] = u 370 } 371 if setOnInsert, ok := update["$setOnInsert"]; ok { 372 newU["$setOnInsert"] = setOnInsert 373 } 374 375 newUpdates = append(newUpdates, newU) 376 } 377 fakeQuery["updates"] = newUpdates 378 values["query"] = fakeQuery 379 } 380 break 381 case "delete": 382 // same treatment as with update above 383 deletes, ok := cmd["deletes"].([]interface{}) 384 if ok { 385 fakeQuery := make(map[string]interface{}) 386 var newDeletes []interface{} 387 for _, _del := range deletes { 388 del, ok := _del.(map[string]interface{}) 389 if !ok { 390 continue 391 } 392 393 newD := make(map[string]interface{}) 394 if q, ok := del["q"]; ok { 395 newD["$query"] = q 396 } 397 if lim, ok := del["limit"]; ok { 398 newD["$limit"] = lim 399 } 400 401 newDeletes = append(newDeletes, newD) 402 } 403 fakeQuery["deletes"] = newDeletes 404 values["query"] = fakeQuery 405 } 406 break 407 } 408 409 } 410 } 411 } 412 413 func logFailure(line string, err error, msg string) { 414 logrus.WithFields(logrus.Fields{"line": line}).WithError(err).Debugln(msg) 415 }