github.com/honeycombio/honeytail@v1.9.0/parsers/keyval/keyval.go (about)

     1  // Package keyval parses logs whose format is many key=val pairs
     2  package keyval
     3  
     4  import (
     5  	"regexp"
     6  	"strconv"
     7  	"strings"
     8  	"sync"
     9  
    10  	"github.com/kr/logfmt"
    11  	"github.com/sirupsen/logrus"
    12  
    13  	"github.com/honeycombio/honeytail/event"
    14  	"github.com/honeycombio/honeytail/httime"
    15  	"github.com/honeycombio/honeytail/parsers"
    16  )
    17  
    18  type Options struct {
    19  	TimeFieldName   string `long:"timefield" description:"Name of the field that contains a timestamp" yaml:"timefield,omitempty"`
    20  	TimeFieldFormat string `long:"format" description:"Format of the timestamp found in timefield (supports strftime and Golang time formats)" yaml:"format,omitempty"`
    21  	FilterRegex     string `long:"filter_regex" description:"a regular expression that will filter the input stream and only parse lines that match" yaml:"filter_regex,omitempty"`
    22  	InvertFilter    bool   `long:"invert_filter" description:"change the filter_regex to only process lines that do *not* match" yaml:"invert_filter,omitempty"`
    23  
    24  	NumParsers int `hidden:"true" description:"number of keyval parsers to spin up" yaml:"-"`
    25  }
    26  
    27  type Parser struct {
    28  	conf        Options
    29  	lineParser  parsers.LineParser
    30  	filterRegex *regexp.Regexp
    31  
    32  	warnedAboutTime bool
    33  }
    34  
    35  func (p *Parser) Init(options interface{}) error {
    36  	p.conf = *options.(*Options)
    37  	if p.conf.FilterRegex != "" {
    38  		var err error
    39  		if p.filterRegex, err = regexp.Compile(p.conf.FilterRegex); err != nil {
    40  			return err
    41  		}
    42  	}
    43  
    44  	p.lineParser = &KeyValLineParser{}
    45  	return nil
    46  }
    47  
    48  type KeyValLineParser struct {
    49  }
    50  
    51  func (j *KeyValLineParser) ParseLine(line string) (map[string]interface{}, error) {
    52  	parsed := make(map[string]interface{})
    53  	f := func(key, val []byte) error {
    54  		keyStr := string(key)
    55  		valStr := string(val)
    56  		if b, err := strconv.ParseBool(valStr); err == nil {
    57  			parsed[keyStr] = b
    58  			return nil
    59  		}
    60  		if i, err := strconv.Atoi(valStr); err == nil {
    61  			parsed[keyStr] = i
    62  			return nil
    63  		}
    64  		if f, err := strconv.ParseFloat(valStr, 64); err == nil {
    65  			parsed[keyStr] = f
    66  			return nil
    67  		}
    68  		parsed[keyStr] = valStr
    69  		return nil
    70  	}
    71  	err := logfmt.Unmarshal([]byte(line), logfmt.HandlerFunc(f))
    72  	return parsed, err
    73  }
    74  
    75  func (p *Parser) ProcessLines(lines <-chan string, send chan<- event.Event, prefixRegex *parsers.ExtRegexp) {
    76  	wg := sync.WaitGroup{}
    77  	numParsers := 1
    78  	if p.conf.NumParsers > 0 {
    79  		numParsers = p.conf.NumParsers
    80  	}
    81  	for i := 0; i < numParsers; i++ {
    82  		wg.Add(1)
    83  		go func() {
    84  			for line := range lines {
    85  				line = strings.TrimSpace(line)
    86  				logrus.WithFields(logrus.Fields{
    87  					"line": line,
    88  				}).Debug("Attempting to process keyval log line")
    89  
    90  				// if matching regex is set, filter lines here
    91  				if p.filterRegex != nil {
    92  					matched := p.filterRegex.MatchString(line)
    93  					// if both are true or both are false, skip. else continue
    94  					if matched == p.conf.InvertFilter {
    95  						logrus.WithFields(logrus.Fields{
    96  							"line":    line,
    97  							"matched": matched,
    98  						}).Debug("skipping line due to FilterMatch.")
    99  						continue
   100  					}
   101  				}
   102  
   103  				// take care of any headers on the line
   104  				var prefixFields map[string]string
   105  				if prefixRegex != nil {
   106  					var prefix string
   107  					prefix, prefixFields = prefixRegex.FindStringSubmatchMap(line)
   108  					line = strings.TrimPrefix(line, prefix)
   109  				}
   110  
   111  				parsedLine, err := p.lineParser.ParseLine(line)
   112  				if err != nil {
   113  					// skip lines that won't parse
   114  					logrus.WithFields(logrus.Fields{
   115  						"line":  line,
   116  						"error": err,
   117  					}).Debug("skipping line; failed to parse.")
   118  					continue
   119  				}
   120  				if len(parsedLine) == 0 {
   121  					// skip empty lines, as determined by the parser
   122  					logrus.WithFields(logrus.Fields{
   123  						"line":  line,
   124  						"error": err,
   125  					}).Debug("skipping line; no key/val pairs found.")
   126  					continue
   127  				}
   128  				if allEmpty(parsedLine) {
   129  					// skip events for which all fields are the empty string, because that's
   130  					// probably broken
   131  					logrus.WithFields(logrus.Fields{
   132  						"line":  line,
   133  						"error": err,
   134  					}).Debug("skipping line; all values are the empty string.")
   135  					continue
   136  				}
   137  				// merge the prefix fields and the parsed line contents
   138  				for k, v := range prefixFields {
   139  					parsedLine[k] = v
   140  				}
   141  
   142  				// look for the timestamp in any of the prefix fields or regular content
   143  				timestamp := httime.GetTimestamp(parsedLine, p.conf.TimeFieldName, p.conf.TimeFieldFormat)
   144  
   145  				// send an event to Transmission
   146  				e := event.Event{
   147  					Timestamp: timestamp,
   148  					Data:      parsedLine,
   149  				}
   150  				send <- e
   151  			}
   152  			wg.Done()
   153  		}()
   154  	}
   155  	wg.Wait()
   156  	logrus.Debug("lines channel is closed, ending keyval processor")
   157  }
   158  
   159  // allEmpty returns true if all values in the map are the empty string
   160  // TODO move this into the main honeytail loop instead of the keyval parser
   161  func allEmpty(pl map[string]interface{}) bool {
   162  	for _, v := range pl {
   163  		vStr, ok := v.(string)
   164  		if !ok {
   165  			// wouldn't coerce to string, so it must have something that's not an
   166  			// empty string
   167  			return false
   168  		}
   169  		if vStr != "" {
   170  			return false
   171  		}
   172  	}
   173  	// we've gone through the entire map and every field value has matched ""
   174  	return true
   175  }