github.com/honeycombio/honeytail@v1.9.0/parsers/mongodb/mongodb.go (about)

     1  // Package mongodb is a parser for mongodb logs
     2  package mongodb
     3  
     4  import (
     5  	"errors"
     6  	"strings"
     7  	"sync"
     8  	"time"
     9  
    10  	"github.com/sirupsen/logrus"
    11  
    12  	"github.com/honeycombio/honeytail/event"
    13  	"github.com/honeycombio/honeytail/httime"
    14  	"github.com/honeycombio/honeytail/parsers"
    15  	"github.com/honeycombio/honeytail/parsers/mongodb/logparser"
    16  	"github.com/honeycombio/honeytail/parsers/mongodb/queryshape"
    17  )
    18  
    19  const (
    20  	// https://github.com/rueckstiess/mongodb-log-spec#timestamps
    21  	ctimeNoMSTimeFormat    = "Mon Jan _2 15:04:05"
    22  	ctimeTimeFormat        = "Mon Jan _2 15:04:05.000"
    23  	iso8601UTCTimeFormat   = "2006-01-02T15:04:05.000Z"
    24  	iso8601LocalTimeFormat = "2006-01-02T15:04:05.000-0700"
    25  
    26  	timestampFieldName   = "timestamp"
    27  	namespaceFieldName   = "namespace"
    28  	databaseFieldName    = "database"
    29  	collectionFieldName  = "collection"
    30  	locksFieldName       = "locks"
    31  	locksMicrosFieldName = "locks(micros)"
    32  
    33  	shardingChangelogFieldName   = "sharding_changelog"
    34  	changelogWhatFieldName       = "changelog_what"
    35  	changelogChangeIDFieldName   = "changelog_changeid"
    36  	changelogPrimaryFieldName    = "changelog_primary"
    37  	changelogServerFieldName     = "changelog_server"
    38  	changelogClientAddrFieldName = "changelog_client_addr"
    39  	changelogTimeFieldName       = "changelog_time"
    40  	changelogDetailsFieldName    = "changelog_details"
    41  )
    42  
    43  var timestampFormats = []string{
    44  	iso8601LocalTimeFormat,
    45  	iso8601UTCTimeFormat,
    46  	ctimeTimeFormat,
    47  	ctimeNoMSTimeFormat,
    48  }
    49  
    50  type Options struct {
    51  	LogPartials bool `long:"log_partials" description:"Send what was successfully parsed from a line (only if the error occurred in the log line's message)." yaml:"log_partials,omitempty"`
    52  
    53  	NumParsers int `hidden:"true" description:"number of mongo parsers to spin up" yaml:"-"`
    54  }
    55  
    56  type Parser struct {
    57  	conf Options
    58  
    59  	lock              sync.RWMutex
    60  	currentReplicaSet string
    61  }
    62  
    63  type MongoLineParser struct {
    64  }
    65  
    66  func (m *MongoLineParser) ParseLine(line string) (map[string]interface{}, error) {
    67  	return logparser.ParseLogLine(line)
    68  }
    69  
    70  func (p *Parser) Init(options interface{}) error {
    71  	p.conf = *options.(*Options)
    72  	return nil
    73  }
    74  
    75  func (p *Parser) ProcessLines(lines <-chan string, send chan<- event.Event, prefixRegex *parsers.ExtRegexp) {
    76  	wg := sync.WaitGroup{}
    77  	numParsers := 1
    78  	if p.conf.NumParsers > 0 {
    79  		numParsers = p.conf.NumParsers
    80  	}
    81  	for i := 0; i < numParsers; i++ {
    82  		wg.Add(1)
    83  		go func() {
    84  			lineParser := &MongoLineParser{}
    85  			for line := range lines {
    86  				line = strings.TrimSpace(line)
    87  				// take care of any headers on the line
    88  				var prefixFields map[string]string
    89  				if prefixRegex != nil {
    90  					var prefix string
    91  					prefix, prefixFields = prefixRegex.FindStringSubmatchMap(line)
    92  					line = strings.TrimPrefix(line, prefix)
    93  				}
    94  				values, err := lineParser.ParseLine(line)
    95  				// we get a bunch of errors from the parser on mongo logs, skip em
    96  				if err == nil || (p.conf.LogPartials && logparser.IsPartialLogLine(err)) {
    97  					timestamp, err := p.parseTimestamp(values)
    98  					if err != nil {
    99  						logFailure(line, err, "couldn't parse logline timestamp, skipping")
   100  						continue
   101  					}
   102  					if err = p.decomposeSharding(values); err != nil {
   103  						logFailure(line, err, "couldn't decompose sharding changelog, skipping")
   104  						continue
   105  					}
   106  					if err = p.decomposeNamespace(values); err != nil {
   107  						logFailure(line, err, "couldn't decompose logline namespace, skipping")
   108  						continue
   109  					}
   110  					if err = p.decomposeLocks(values); err != nil {
   111  						logFailure(line, err, "couldn't decompose logline locks, skipping")
   112  						continue
   113  					}
   114  					if err = p.decomposeLocksMicros(values); err != nil {
   115  						logFailure(line, err, "couldn't decompose logline locks(micros), skipping")
   116  						continue
   117  					}
   118  
   119  					p.getCommandQuery(values)
   120  
   121  					if q, ok := values["query"].(map[string]interface{}); ok {
   122  						if _, ok = values["normalized_query"]; !ok {
   123  							// also calculate the query_shape if we can
   124  							values["normalized_query"] = queryshape.GetQueryShape(q)
   125  						}
   126  					}
   127  
   128  					if ns, ok := values["namespace"].(string); ok && ns == "admin.$cmd" {
   129  						if cmdType, ok := values["command_type"]; ok && cmdType == "replSetHeartbeat" {
   130  							if cmd, ok := values["command"].(map[string]interface{}); ok {
   131  								if replicaSet, ok := cmd["replSetHeartbeat"].(string); ok {
   132  									p.lock.Lock()
   133  									p.currentReplicaSet = replicaSet
   134  									p.lock.Unlock()
   135  								}
   136  							}
   137  						}
   138  					}
   139  
   140  					p.lock.RLock()
   141  					if p.currentReplicaSet != "" {
   142  						values["replica_set"] = p.currentReplicaSet
   143  					}
   144  					p.lock.RUnlock()
   145  
   146  					// merge the prefix fields and the parsed line contents
   147  					for k, v := range prefixFields {
   148  						values[k] = v
   149  					}
   150  
   151  					logrus.WithFields(logrus.Fields{
   152  						"line":   line,
   153  						"values": values,
   154  					}).Debug("Successfully parsed line")
   155  
   156  					// we'll be putting the timestamp in the Event
   157  					// itself, no need to also have it in the Data
   158  					delete(values, timestampFieldName)
   159  
   160  					send <- event.Event{
   161  						Timestamp: timestamp,
   162  						Data:      values,
   163  					}
   164  				} else {
   165  					logFailure(line, err, "logline didn't parse, skipping.")
   166  				}
   167  			}
   168  			wg.Done()
   169  		}()
   170  	}
   171  	wg.Wait()
   172  	logrus.Debug("lines channel is closed, ending mongo processor")
   173  }
   174  
   175  func (p *Parser) parseTimestamp(values map[string]interface{}) (time.Time, error) {
   176  	now := httime.Now()
   177  	timestamp_value, ok := values[timestampFieldName].(string)
   178  	if ok {
   179  		var err error
   180  		for _, f := range timestampFormats {
   181  			var timestamp time.Time
   182  			timestamp, err = httime.Parse(f, timestamp_value)
   183  			if err == nil {
   184  				if f == ctimeTimeFormat || f == ctimeNoMSTimeFormat {
   185  					// these formats lacks the year, so we check
   186  					// if adding Now().Year causes the date to be
   187  					// after today.  if it's after today, we
   188  					// decrement year by 1.  if it's not after, we
   189  					// use it.
   190  					ts := timestamp.AddDate(now.Year(), 0, 0)
   191  					if now.After(ts) {
   192  						return ts, nil
   193  					}
   194  
   195  					return timestamp.AddDate(now.Year()-1, 0, 0), nil
   196  				}
   197  				return timestamp, nil
   198  			}
   199  		}
   200  		return time.Time{}, err
   201  	}
   202  
   203  	return time.Time{}, errors.New("timestamp missing from logline")
   204  }
   205  
   206  func (p *Parser) decomposeSharding(values map[string]interface{}) error {
   207  	clValue, ok := values[shardingChangelogFieldName]
   208  	if !ok {
   209  		return nil
   210  	}
   211  	clMap, ok := clValue.(map[string]interface{})
   212  	if !ok {
   213  		return nil
   214  	}
   215  
   216  	var val interface{}
   217  	if val, ok = clMap["ns"]; ok {
   218  		values[namespaceFieldName] = val
   219  	}
   220  	if val, ok = clMap["_id"]; ok {
   221  		values[changelogChangeIDFieldName] = val
   222  	}
   223  	if val, ok = clMap["server"]; ok {
   224  		values[changelogServerFieldName] = val
   225  	}
   226  	if val, ok = clMap["clientAddr"]; ok {
   227  		values[changelogClientAddrFieldName] = val
   228  	}
   229  	if val, ok = clMap["time"]; ok {
   230  		values[changelogTimeFieldName] = val
   231  	}
   232  	if val, ok = clMap["what"]; ok {
   233  		values[changelogWhatFieldName] = val
   234  	}
   235  	detailsMap, ok := clMap["details"].(map[string]interface{})
   236  	if ok {
   237  		values[changelogDetailsFieldName] = detailsMap
   238  		values[changelogPrimaryFieldName] = detailsMap["primary"]
   239  	}
   240  
   241  	delete(values, shardingChangelogFieldName)
   242  	return nil
   243  }
   244  
   245  func (p *Parser) decomposeNamespace(values map[string]interface{}) error {
   246  	ns_value, ok := values[namespaceFieldName]
   247  	if !ok {
   248  		return nil
   249  	}
   250  
   251  	decomposed := strings.SplitN(ns_value.(string), ".", 2)
   252  	if len(decomposed) < 2 {
   253  		return nil
   254  	}
   255  	values[databaseFieldName] = decomposed[0]
   256  	values[collectionFieldName] = decomposed[1]
   257  	return nil
   258  }
   259  
   260  func (p *Parser) decomposeLocks(values map[string]interface{}) error {
   261  	locks_value, ok := values[locksFieldName]
   262  	if !ok {
   263  		return nil
   264  	}
   265  	locks_map, ok := locks_value.(map[string]interface{})
   266  	if !ok {
   267  		return nil
   268  	}
   269  	for scope, v := range locks_map {
   270  		v_map, ok := v.(map[string]interface{})
   271  		if !ok {
   272  			continue
   273  		}
   274  		for attrKey, attrVal := range v_map {
   275  			attrVal_map, ok := attrVal.(map[string]interface{})
   276  			if !ok {
   277  				continue
   278  			}
   279  			for lockType, lockCount := range attrVal_map {
   280  				if lockType == "r" {
   281  					lockType = "read"
   282  				} else if lockType == "R" {
   283  					lockType = "Read"
   284  				} else if lockType == "w" {
   285  					lockType = "write"
   286  				} else if lockType == "w" {
   287  					lockType = "Write"
   288  				}
   289  
   290  				if attrKey == "acquireCount" {
   291  					values[strings.ToLower(scope)+"_"+lockType+"_lock"] = lockCount
   292  				} else if attrKey == "acquireWaitCount" {
   293  					values[strings.ToLower(scope)+"_"+lockType+"_lock_wait"] = lockCount
   294  				} else if attrKey == "timeAcquiringMicros" {
   295  					values[strings.ToLower(scope)+"_"+lockType+"_lock_wait_us"] = lockCount
   296  				}
   297  			}
   298  		}
   299  	}
   300  	delete(values, locksFieldName)
   301  	return nil
   302  }
   303  
   304  func (p *Parser) decomposeLocksMicros(values map[string]interface{}) error {
   305  	locks_value, ok := values[locksMicrosFieldName]
   306  	if !ok {
   307  		return nil
   308  	}
   309  	locks_map, ok := locks_value.(map[string]int64)
   310  	if !ok {
   311  		return nil
   312  	}
   313  	for lockType, lockDuration := range locks_map {
   314  		if lockType == "r" {
   315  			lockType = "read"
   316  		} else if lockType == "R" {
   317  			lockType = "Read"
   318  		} else if lockType == "w" {
   319  			lockType = "write"
   320  		} else if lockType == "w" {
   321  			lockType = "Write"
   322  		}
   323  
   324  		values[lockType+"_lock_held_us"] = lockDuration
   325  	}
   326  	delete(values, locksMicrosFieldName)
   327  	return nil
   328  }
   329  
   330  func (p *Parser) getCommandQuery(values map[string]interface{}) {
   331  	if commandType, ok := values["command_type"]; ok {
   332  		if cmd, ok := values["command"].(map[string]interface{}); ok {
   333  			switch commandType {
   334  			case "find":
   335  				q, ok := cmd["filter"].(map[string]interface{})
   336  				if ok {
   337  					// skip the $where queries, since those are
   338  					// strings with embedded javascript expressions
   339  					if _, ok = q["$where"]; !ok {
   340  						values["query"] = q
   341  					}
   342  				}
   343  				break
   344  			case "findAndModify":
   345  				q, ok := cmd["query"]
   346  				if ok {
   347  					values["query"] = q
   348  				}
   349  				break
   350  			case "update":
   351  				// update is special in that each update log can contain multiple update statements.
   352  				// we build up a synthetic query that includes the entirety of the update list (with
   353  				// modifications so that the normalizer will include more info.)
   354  				updates, ok := cmd["updates"].([]interface{})
   355  				if ok {
   356  					fakeQuery := make(map[string]interface{})
   357  					var newUpdates []interface{}
   358  					for _, _update := range updates {
   359  						update, ok := _update.(map[string]interface{})
   360  						if !ok {
   361  							continue
   362  						}
   363  
   364  						newU := make(map[string]interface{})
   365  						if q, ok := update["q"]; ok {
   366  							newU["$query"] = q
   367  						}
   368  						if u, ok := update["u"]; ok {
   369  							newU["$update"] = u
   370  						}
   371  						if setOnInsert, ok := update["$setOnInsert"]; ok {
   372  							newU["$setOnInsert"] = setOnInsert
   373  						}
   374  
   375  						newUpdates = append(newUpdates, newU)
   376  					}
   377  					fakeQuery["updates"] = newUpdates
   378  					values["query"] = fakeQuery
   379  				}
   380  				break
   381  			case "delete":
   382  				// same treatment as with update above
   383  				deletes, ok := cmd["deletes"].([]interface{})
   384  				if ok {
   385  					fakeQuery := make(map[string]interface{})
   386  					var newDeletes []interface{}
   387  					for _, _del := range deletes {
   388  						del, ok := _del.(map[string]interface{})
   389  						if !ok {
   390  							continue
   391  						}
   392  
   393  						newD := make(map[string]interface{})
   394  						if q, ok := del["q"]; ok {
   395  							newD["$query"] = q
   396  						}
   397  						if lim, ok := del["limit"]; ok {
   398  							newD["$limit"] = lim
   399  						}
   400  
   401  						newDeletes = append(newDeletes, newD)
   402  					}
   403  					fakeQuery["deletes"] = newDeletes
   404  					values["query"] = fakeQuery
   405  				}
   406  				break
   407  			}
   408  
   409  		}
   410  	}
   411  }
   412  
   413  func logFailure(line string, err error, msg string) {
   414  	logrus.WithFields(logrus.Fields{"line": line}).WithError(err).Debugln(msg)
   415  }