github.com/honeycombio/honeytail@v1.9.0/parsers/arangodb/arangodb.go (about)

     1  // Package arangodb is a parser for ArangoDB logs
     2  package arangodb
     3  
     4  import (
     5  	"errors"
     6  	"strconv"
     7  	"strings"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/honeycombio/honeytail/event"
    12  	"github.com/honeycombio/honeytail/httime"
    13  	"github.com/honeycombio/honeytail/parsers"
    14  	"github.com/sirupsen/logrus"
    15  )
    16  
    17  const defaultNumParsers = 20
    18  
    19  const (
    20  	iso8601UTCTimeFormat   = "2006-01-02T15:04:05Z"
    21  	iso8601LocalTimeFormat = "2006-01-02T15:04:05"
    22  
    23  	timestampFieldName  = "timestamp"
    24  	pidFieldName        = "pid"
    25  	logLevelFieldName   = "logLevel"
    26  	logTopicFieldName   = "logTopic"
    27  	idFieldName         = "id"
    28  	sourceIPFieldName   = "sourceIP"
    29  	methodFieldName     = "method"
    30  	protocolFieldName   = "protocol"
    31  	resCodeFieldName    = "responseCode"
    32  	reqBodyLenFieldName = "reqBodyLen"
    33  	resBodyLenFieldName = "resBodyLen"
    34  	fullURLFieldName    = "fullURL"
    35  	totalTimeFieldName  = "totalTime"
    36  )
    37  
    38  var timestampFormats = []string{
    39  	iso8601UTCTimeFormat,
    40  	iso8601LocalTimeFormat,
    41  }
    42  
    43  // Options type for line parser, so far there are none.
    44  type Options struct {
    45  	numParsers int
    46  }
    47  
    48  // Parser for log lines.
    49  type Parser struct {
    50  	conf       Options
    51  	lineParser parsers.LineParser
    52  }
    53  
    54  // ArangoLineParser is a LineParser for ArangoDB log files.
    55  type ArangoLineParser struct {
    56  }
    57  
    58  func firstWord(line *string) (word string, abort bool) {
    59  	var pos = strings.IndexByte(*line, ' ')
    60  	if pos < 0 {
    61  		return "", true
    62  	}
    63  	word = (*line)[:pos]
    64  	*line = (*line)[pos+1:]
    65  	abort = false
    66  	return
    67  }
    68  
    69  func removeBrackets(word string) string {
    70  	var l = len(word)
    71  	if l < 2 {
    72  		return word
    73  	}
    74  	if word[0] == '(' && word[l-1] == ')' {
    75  		return word[1 : l-1]
    76  	}
    77  	if word[0] == '[' && word[l-1] == ']' {
    78  		return word[1 : l-1]
    79  	}
    80  	if word[0] == '{' && word[l-1] == '}' {
    81  		return word[1 : l-1]
    82  	}
    83  	return word
    84  }
    85  
    86  func removeQuotes(word string) string {
    87  	if len(word) == 0 {
    88  		return word
    89  	}
    90  	if word[0] == '"' {
    91  		word = word[1:]
    92  	}
    93  	if len(word) > 0 && word[len(word)-1] == '"' {
    94  		word = word[:len(word)-1]
    95  	}
    96  	return word
    97  }
    98  
    99  // ParseLine method for an ArangoLineParser implementing LineParser.
   100  func (m *ArangoLineParser) ParseLine(line string) (_ map[string]interface{}, err error) {
   101  	// Do the actual work here, we look for log lines in the log topic "requests",
   102  	// there are two types, one is a DEBUG line (could be switched off) containing
   103  	// the request body, the other is the INFO line marking the end of the
   104  	// request.
   105  	var v = make(map[string]interface{})
   106  	err = errors.New("Line is not a request log line.")
   107  	var abort bool
   108  	var s string
   109  
   110  	v[timestampFieldName], abort = firstWord(&line)
   111  	if abort {
   112  		return
   113  	}
   114  
   115  	s, abort = firstWord(&line)
   116  	if abort {
   117  		return
   118  	}
   119  	v[pidFieldName] = removeBrackets(s)
   120  
   121  	v[logLevelFieldName], abort = firstWord(&line)
   122  	if abort {
   123  		return
   124  	}
   125  
   126  	s, abort = firstWord(&line)
   127  	if abort {
   128  		return
   129  	}
   130  	v[logTopicFieldName] = s
   131  
   132  	if s != "{requests}" {
   133  		return
   134  	}
   135  
   136  	var fields = strings.Split(line, ",")
   137  	if v[logLevelFieldName] == "DEBUG" {
   138  		if len(fields) != 6 {
   139  			return
   140  		}
   141  		v[idFieldName] = removeQuotes(fields[1])
   142  		v[sourceIPFieldName] = removeQuotes(fields[2])
   143  		v[methodFieldName] = removeQuotes(fields[3])
   144  		v[protocolFieldName] = removeQuotes(fields[4])
   145  		v[fullURLFieldName] = removeQuotes(fields[5])
   146  	} else {
   147  		if len(fields) != 10 {
   148  			return
   149  		}
   150  		v[idFieldName] = removeQuotes(fields[1])
   151  		v[sourceIPFieldName] = removeQuotes(fields[2])
   152  		v[methodFieldName] = removeQuotes(fields[3])
   153  		v[protocolFieldName] = removeQuotes(fields[4])
   154  		v[resCodeFieldName], _ = strconv.ParseInt(fields[5], 10, 32)
   155  		v[reqBodyLenFieldName], _ = strconv.ParseInt(fields[6], 10, 64)
   156  		v[resBodyLenFieldName], _ = strconv.ParseInt(fields[7], 10, 64)
   157  		v[fullURLFieldName] = removeQuotes(fields[8])
   158  		v[totalTimeFieldName], _ = strconv.ParseFloat(fields[9], 64)
   159  	}
   160  	return v, nil
   161  }
   162  
   163  // Init method for parser object.
   164  func (p *Parser) Init(options interface{}) error {
   165  	p.conf = *options.(*Options)
   166  	p.lineParser = &ArangoLineParser{}
   167  	return nil
   168  }
   169  
   170  // ProcessLines method for Parser.
   171  func (p *Parser) ProcessLines(lines <-chan string, send chan<- event.Event, prefixRegex *parsers.ExtRegexp) {
   172  	wg := sync.WaitGroup{}
   173  	numParsers := defaultNumParsers
   174  	if p.conf.numParsers > 0 {
   175  		numParsers = p.conf.numParsers
   176  	}
   177  	for i := 0; i < numParsers; i++ {
   178  		wg.Add(1)
   179  		go func() {
   180  			for line := range lines {
   181  				line = strings.TrimSpace(line)
   182  				// take care of any headers on the line
   183  				var prefixFields map[string]string
   184  				if prefixRegex != nil {
   185  					var prefix string
   186  					prefix, prefixFields = prefixRegex.FindStringSubmatchMap(line)
   187  					line = strings.TrimPrefix(line, prefix)
   188  				}
   189  
   190  				values, err := p.lineParser.ParseLine(line)
   191  				// we get a bunch of errors from the parser on ArangoDB logs, skip em
   192  				if err == nil {
   193  					timestamp, err := p.parseTimestamp(values)
   194  					if err != nil {
   195  						logSkipped(line, "couldn't parse logline timestamp, skipping")
   196  						continue
   197  					}
   198  
   199  					// merge the prefix fields and the parsed line contents
   200  					for k, v := range prefixFields {
   201  						values[k] = v
   202  					}
   203  
   204  					logrus.WithFields(logrus.Fields{
   205  						"line":   line,
   206  						"values": values,
   207  					}).Debug("Successfully parsed line")
   208  
   209  					// we'll be putting the timestamp in the Event
   210  					// itself, no need to also have it in the Data
   211  					delete(values, timestampFieldName)
   212  
   213  					send <- event.Event{
   214  						Timestamp: timestamp,
   215  						Data:      values,
   216  					}
   217  				} else {
   218  					logSkipped(line, "logline didn't parse, skipping.")
   219  				}
   220  			}
   221  			wg.Done()
   222  		}()
   223  	}
   224  	wg.Wait()
   225  	logrus.Debug("lines channel is closed, ending arangodb processor")
   226  }
   227  
   228  func (p *Parser) parseTimestamp(values map[string]interface{}) (time.Time, error) {
   229  	timestampValue, ok := values[timestampFieldName].(string)
   230  	if ok {
   231  		var err error
   232  		for _, f := range timestampFormats {
   233  			var timestamp time.Time
   234  			timestamp, err = httime.Parse(f, timestampValue)
   235  			if err == nil {
   236  				return timestamp, nil
   237  			}
   238  		}
   239  		return time.Time{}, err
   240  	}
   241  
   242  	return time.Time{}, errors.New("timestamp missing from logline")
   243  }
   244  
   245  func logSkipped(line string, msg string) {
   246  	logrus.WithFields(logrus.Fields{"line": line}).Debugln(msg)
   247  }