github.com/honeycombio/honeytail@v1.9.0/parsers/regex/regex.go (about)

     1  // Package regex consumes logs given user-defined regex format for lines
     2  
     3  // RE2 regex syntax reference: https://github.com/google/re2/wiki/Syntax
     4  // Example format for a named capture group: `(?P<name>re)`
     5  
     6  package regex
     7  
     8  import (
     9  	"errors"
    10  	"fmt"
    11  	"regexp"
    12  	"strings"
    13  	"sync"
    14  
    15  	"github.com/sirupsen/logrus"
    16  
    17  	"github.com/honeycombio/honeytail/event"
    18  	"github.com/honeycombio/honeytail/httime"
    19  	"github.com/honeycombio/honeytail/parsers"
    20  )
    21  
    22  type Options struct {
    23  	// Note: `LineRegex` and `line_regex` are named as singular so that
    24  	// it's less confusing to users to input them.
    25  	// Might be worth making this consistent across the entire repo
    26  	LineRegex       []string `long:"line_regex" description:"Regular expression with named capture groups representing the fields you want parsed (RE2 syntax). You can enter multiple regexes to match (--regex.line_regex=\"(?P<foo>re)\" --regex.line_regex=\"(?P<bar>...)\"). Parses using the first regex to match a line, so list them in most-to-least-specific order." yaml:"line_regex,omitempty"`
    27  	TimeFieldName   string   `long:"timefield" description:"Name of the field that contains a timestamp" yaml:"timefield,omitempty"`
    28  	TimeFieldFormat string   `long:"time_format" description:"Timestamp format to use (strftime and Golang time.Parse supported)" yaml:"time_format,omitempty"`
    29  	NumParsers      int      `hidden:"true" description:"number of regex parsers to spin up" yaml:"-"`
    30  }
    31  
    32  type Parser struct {
    33  	conf       Options
    34  	lineParser parsers.LineParser
    35  }
    36  
    37  func (p *Parser) Init(options interface{}) error {
    38  	p.conf = *options.(*Options)
    39  	if len(p.conf.LineRegex) == 0 {
    40  		return errors.New("Must provide at least one regex for parsing log lines; use `--regex.line_regex` flag.")
    41  	}
    42  	lineParser, err := NewRegexLineParser(p.conf.LineRegex)
    43  	if err != nil {
    44  		return err
    45  	}
    46  	p.lineParser = lineParser
    47  	return nil
    48  }
    49  
    50  // Compile multiple log line regexes
    51  func ParseLineRegexes(regexStrs []string) ([]*regexp.Regexp, error) {
    52  	regexes := make([]*regexp.Regexp, 0)
    53  	for _, regexStr := range regexStrs {
    54  		regex, err := ParseLineRegex(regexStr)
    55  		if err != nil {
    56  			return regexes, err
    57  		}
    58  		regexes = append(regexes, regex)
    59  	}
    60  	return regexes, nil
    61  }
    62  
    63  // Compile a regex & validate expectations for log line parsing
    64  func ParseLineRegex(regexStr string) (*regexp.Regexp, error) {
    65  	// Regex can't be blank
    66  	if regexStr == "" {
    67  		logrus.Debug("LineRegex is blank; required field")
    68  		return nil, errors.New("Must provide a regex for parsing log lines; use `--regex.line_regex` flag.")
    69  	}
    70  
    71  	// Compile regex
    72  	lineRegex, err := regexp.Compile(regexStr)
    73  	if err != nil {
    74  		logrus.WithFields(logrus.Fields{
    75  			"lineRegex": regexStr,
    76  		}).Error("Could not compile line regex")
    77  		return nil, err
    78  	}
    79  
    80  	// Require at least one named group
    81  	var numNamedGroups int
    82  	for _, groupName := range lineRegex.SubexpNames() {
    83  		if groupName != "" {
    84  			numNamedGroups++
    85  		}
    86  	}
    87  	if numNamedGroups == 0 {
    88  		logrus.WithFields(logrus.Fields{
    89  			"LineRegex": regexStr,
    90  		}).Error("No named capture groups")
    91  		return nil, errors.New(fmt.Sprintf("No named capture groups found in regex: '%s'. Must provide at least one named group with line regex. Example: `(?P<name>re)`", regexStr))
    92  	}
    93  
    94  	return lineRegex, nil
    95  }
    96  
    97  /* LineParser for regexes */
    98  
    99  type RegexLineParser struct {
   100  	lineRegexes []*regexp.Regexp
   101  }
   102  
   103  // NewRegexLineParser factory
   104  func NewRegexLineParser(regexStrs []string) (*RegexLineParser, error) {
   105  	lineRegexes, err := ParseLineRegexes(regexStrs)
   106  	if err != nil {
   107  		return nil, err
   108  	}
   109  	logrus.WithFields(logrus.Fields{
   110  		"lineRegexes": lineRegexes,
   111  	}).Debug("Compiled line regexes")
   112  	return &RegexLineParser{lineRegexes}, nil
   113  }
   114  
   115  func (p *RegexLineParser) ParseLine(line string) (map[string]interface{}, error) {
   116  	for _, lineRegex := range p.lineRegexes {
   117  		parsed := make(map[string]interface{})
   118  		match := lineRegex.FindAllStringSubmatch(line, -1)
   119  		if match == nil || len(match) == 0 {
   120  			logrus.WithFields(logrus.Fields{
   121  				"line":      line,
   122  				"lineRegex": lineRegex,
   123  			}).Debug("No matches for regex log line")
   124  			continue // No matches found, skip to next regex
   125  		}
   126  
   127  		// Map capture groups
   128  		var firstMatch []string = match[0] // We only care about the first full lineRegex match
   129  		for i, name := range lineRegex.SubexpNames() {
   130  			if i != 0 && i < len(firstMatch) {
   131  				parsed[name] = firstMatch[i]
   132  			}
   133  		}
   134  		logrus.WithFields(logrus.Fields{
   135  			"parsed":    parsed,
   136  			"line":      line,
   137  			"lineRegex": lineRegex,
   138  		}).Debug("Regex parsing log line")
   139  
   140  		return parsed, nil
   141  	}
   142  	return make(map[string]interface{}), nil
   143  }
   144  
   145  func (p *Parser) ProcessLines(lines <-chan string, send chan<- event.Event, prefixRegex *parsers.ExtRegexp) {
   146  	// parse lines one by one
   147  	wg := sync.WaitGroup{}
   148  	numParsers := 1
   149  	if p.conf.NumParsers > 0 {
   150  		numParsers = p.conf.NumParsers
   151  	}
   152  	for i := 0; i < numParsers; i++ {
   153  		wg.Add(1)
   154  		go func() {
   155  			for line := range lines {
   156  				logrus.WithFields(logrus.Fields{
   157  					"line": line,
   158  				}).Debug("Attempting to process regex log line")
   159  
   160  				// take care of any headers on the line
   161  				var prefixFields map[string]string
   162  				if prefixRegex != nil {
   163  					var prefix string
   164  					prefix, prefixFields = prefixRegex.FindStringSubmatchMap(line)
   165  					line = strings.TrimPrefix(line, prefix)
   166  				}
   167  
   168  				parsedLine, err := p.lineParser.ParseLine(line)
   169  				if err != nil {
   170  					continue
   171  				}
   172  
   173  				// merge the prefix fields and the parsed line contents
   174  				for k, v := range prefixFields {
   175  					parsedLine[k] = v
   176  				}
   177  
   178  				if len(parsedLine) == 0 {
   179  					logrus.WithFields(logrus.Fields{
   180  						"line": line,
   181  					}).Debug("Skipping line; no capture groups found")
   182  					continue
   183  				}
   184  
   185  				// look for the timestamp in any of the prefix fields or regular content
   186  				timestamp := httime.GetTimestamp(parsedLine, p.conf.TimeFieldName, p.conf.TimeFieldFormat)
   187  
   188  				// send an event to Transmission
   189  				e := event.Event{
   190  					Timestamp: timestamp,
   191  					Data:      parsedLine,
   192  				}
   193  				send <- e
   194  			}
   195  			wg.Done()
   196  		}()
   197  	}
   198  	wg.Wait()
   199  	logrus.Debug("lines channel is closed, ending regex processor")
   200  }