github.com/honeycombio/honeytail@v1.9.0/parsers/regex/regex_test.go (about)

     1  package regex
     2  
     3  import (
     4  	"reflect"
     5  	"regexp"
     6  	"testing"
     7  	"time"
     8  
     9  	"github.com/stretchr/testify/assert"
    10  
    11  	"github.com/honeycombio/honeytail/event"
    12  	"github.com/honeycombio/honeytail/parsers"
    13  )
    14  
    15  const (
    16  	commonLogFormatTimeLayout = "02/Jan/2006:15:04:05 -0700"
    17  	iso8601TimeLayout         = "2006-01-02T15:04:05-07:00"
    18  )
    19  
    20  // Test Init(...) success/fail
    21  
    22  type testInitMap struct {
    23  	options      *Options
    24  	expectedPass bool
    25  }
    26  
    27  var testInitCases = []testInitMap{
    28  	{
    29  		expectedPass: true,
    30  		options: &Options{
    31  			NumParsers:      5,
    32  			TimeFieldName:   "local_time",
    33  			TimeFieldFormat: "%d/%b/%Y:%H:%M:%S %z",
    34  			LineRegex:       []string{`(?P<foo>[A-Za-z]+)`},
    35  		},
    36  	},
    37  	{
    38  		expectedPass: false,
    39  		options: &Options{
    40  			NumParsers:      5,
    41  			TimeFieldName:   "local_time",
    42  			TimeFieldFormat: "%d/%b/%Y:%H:%M:%S %z",
    43  			LineRegex:       []string{``}, // Empty regex should fail
    44  		},
    45  	},
    46  	{
    47  		expectedPass: false,
    48  		options: &Options{
    49  			NumParsers:      5,
    50  			TimeFieldName:   "local_time",
    51  			TimeFieldFormat: "%d/%b/%Y:%H:%M:%S %z",
    52  			LineRegex:       []string{`(?P<foo>[A-Za-`}, // Broken regex should fail
    53  		},
    54  	},
    55  	{
    56  		expectedPass: false,
    57  		options: &Options{
    58  			NumParsers: 5,
    59  			LineRegex:  []string{`[a-z]+`}, // Require at least one named group
    60  		},
    61  	},
    62  	{
    63  		expectedPass: false,
    64  		options: &Options{
    65  			NumParsers: 5,
    66  			LineRegex:  []string{`(?P[a-z]+)`}, // Require at least one named group
    67  		},
    68  	},
    69  	{
    70  		expectedPass: true,
    71  		options: &Options{
    72  			NumParsers: 5,
    73  			LineRegex: []string{ // Take in multiple regexes
    74  				`\[(?P<word1>\w+)\]`,
    75  				`(?P<dummy>banana)`,
    76  			},
    77  		},
    78  	},
    79  	{
    80  		expectedPass: false,
    81  		options: &Options{
    82  			NumParsers: 5,
    83  			LineRegex: []string{
    84  				`[(?P<word1>\w+)]`, // Invalid -- brackets need to be escaped
    85  			},
    86  		},
    87  	},
    88  }
    89  
    90  func TestInit(t *testing.T) {
    91  	for _, testCase := range testInitCases {
    92  		p := &Parser{}
    93  		err := p.Init(testCase.options)
    94  		if (err == nil) != testCase.expectedPass {
    95  			if err == nil {
    96  				t.Error("Parser Init(...) passed; expected it to fail.")
    97  			} else {
    98  				t.Error("Parser Init(...) failed; expected it to pass. Error:", err)
    99  			}
   100  		} else {
   101  			t.Logf("Init pass status is %t as expected", (err == nil))
   102  		}
   103  	}
   104  }
   105  
   106  // Test cases for RegexLineParser.ParseLine
   107  
   108  type testLineMap struct {
   109  	lineRegexes []string
   110  	input       string
   111  	expected    map[string]interface{}
   112  }
   113  
   114  var tlms = []testLineMap{
   115  	{
   116  		// Simple word parsing
   117  		lineRegexes: []string{
   118  			`(?P<word1>\w+) (?P<word2>\w+) (?P<word3>\w+)`,
   119  		},
   120  		input: `apple banana orange`,
   121  		expected: map[string]interface{}{
   122  			"word1": "apple",
   123  			"word2": "banana",
   124  			"word3": "orange",
   125  		},
   126  	},
   127  	{
   128  		// Matches no lines
   129  		lineRegexes: []string{
   130  			`(?P<word1>[a-zA-Z]+)`,
   131  		},
   132  		input:    `123456 654321`,
   133  		expected: map[string]interface{}{},
   134  	},
   135  	{
   136  		// Simple time parsing
   137  		lineRegexes: []string{
   138  			`(?P<Year>\d{4})-(?P<Month>\d{2})-(?P<Day>\d{2})`,
   139  		},
   140  		input: `2017-01-30 1980-01-02`, // Ignore the second date
   141  		expected: map[string]interface{}{
   142  			"Year":  "2017",
   143  			"Month": "01",
   144  			"Day":   "30",
   145  		},
   146  	},
   147  	{
   148  		// Fields containing whitespace
   149  		lineRegexes: []string{
   150  			`\[(?P<BracketedField>[0-9A-Za-z\s]+)\] (?P<UnbracketedField>[0-9A-Za-z]+)`,
   151  		},
   152  		input: `[some value] unbracketed`,
   153  		expected: map[string]interface{}{
   154  			"BracketedField":   "some value",
   155  			"UnbracketedField": "unbracketed",
   156  		},
   157  	},
   158  	{
   159  		// Nested regex grouping
   160  		lineRegexes: []string{
   161  			`(?P<outer>[^ ]* (?P<inner1>[^ ]*) (?P<inner2>[^ ]*))`,
   162  		},
   163  		input: `foo bar baz`,
   164  		expected: map[string]interface{}{
   165  			"outer":  "foo bar baz",
   166  			"inner1": "bar",
   167  			"inner2": "baz",
   168  		},
   169  	},
   170  	{
   171  		// Sample nginx error log line
   172  		lineRegexes: []string{
   173  			`(?P<time>\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}) \[(?P<status>.*)\].* request: "(?P<request>[^"]*)"`,
   174  		},
   175  		input: `2017/11/07 22:59:46 [error] 5812#0: *777536449 connect() failed (111: Connection refused) while connecting to upstream, client: 127.0.0.1, server: localhost, request: "GET /isbns HTTP/1.1", upstream: "http://127.0.0.1:8080/isbns", host: "localhost"`,
   176  		expected: map[string]interface{}{
   177  			"time":    "2017/11/07 22:59:46",
   178  			"status":  "error",
   179  			"request": "GET /isbns HTTP/1.1",
   180  		},
   181  	},
   182  	{
   183  		// Multi-regex parsing
   184  		lineRegexes: []string{
   185  			`(?P<word1>\w+) (?P<word2>\w+) (?P<word3>\w+)`,
   186  			`(?P<dummy>banana)`,
   187  		},
   188  		input: `apple banana orange`, // Should match to first regex
   189  		expected: map[string]interface{}{
   190  			"word1": "apple",
   191  			"word2": "banana",
   192  			"word3": "orange",
   193  		},
   194  	},
   195  	{
   196  		// Multi-regex parsing
   197  		lineRegexes: []string{
   198  			`\[(?P<word1>\w+)\]`,
   199  			`(?P<dummy>banana)`,
   200  		},
   201  		input: `apple banana orange`, // Should match to second regex
   202  		expected: map[string]interface{}{
   203  			"dummy": "banana",
   204  		},
   205  	},
   206  }
   207  
   208  func TestParseLine(t *testing.T) {
   209  	for _, tlm := range tlms {
   210  		p := &Parser{}
   211  		err := p.Init(&Options{
   212  			NumParsers: 5,
   213  			LineRegex:  tlm.lineRegexes,
   214  		})
   215  		assert.NoError(t, err, "Could not instantiate parser with regexes: %v", tlm.lineRegexes)
   216  		resp, err := p.lineParser.ParseLine(tlm.input)
   217  		t.Logf("%+v", resp)
   218  		assert.NoError(t, err, "p.ParseLine unexpectedly returned error %v", err)
   219  		if !reflect.DeepEqual(resp, tlm.expected) {
   220  			t.Errorf("response %+v didn't match expected %+v", resp, tlm.expected)
   221  		}
   222  	}
   223  }
   224  
   225  type testLineMaps struct {
   226  	line        string
   227  	trimmedLine string
   228  	resp        map[string]interface{}
   229  	typedResp   map[string]interface{}
   230  	ev          event.Event
   231  }
   232  
   233  // Test event emitted from ProcessLines
   234  func TestProcessLines(t *testing.T) {
   235  	t1, _ := time.ParseInLocation(commonLogFormatTimeLayout, "08/Oct/2015:00:26:26 -0000", time.UTC)
   236  	preReg := &parsers.ExtRegexp{regexp.MustCompile("^.*:..:.. (?P<pre_hostname>[a-zA-Z-.]+): ")}
   237  	tlm := []testLineMaps{
   238  		{
   239  			line: "https - 10.252.4.24 - - [08/Oct/2015:00:26:26 +0000] 200 174 0.099",
   240  			ev: event.Event{
   241  				Timestamp: t1,
   242  				Data: map[string]interface{}{
   243  					"http_x_forwarded_proto": "https",
   244  					"remote_addr":            "10.252.4.24",
   245  				},
   246  			},
   247  		},
   248  	}
   249  	p := &Parser{}
   250  	err := p.Init(&Options{
   251  		NumParsers:      5,
   252  		TimeFieldName:   "local_time",
   253  		TimeFieldFormat: "%d/%b/%Y:%H:%M:%S %z",
   254  		LineRegex: []string{
   255  			`(?P<http_x_forwarded_proto>\w+) - (?P<remote_addr>\d{1,4}\.\d{1,4}\.\d{1,4}\.\d{1,4}) - - \[(?P<local_time>\d{2}\/[A-Za-z]+\/\d{4}:\d{2}:\d{2}:\d{2}.*)\]`,
   256  		},
   257  	})
   258  	assert.NoError(t, err, "Couldn't instantiate Parser")
   259  
   260  	lines := make(chan string)
   261  	send := make(chan event.Event)
   262  	go func() {
   263  		for _, pair := range tlm {
   264  			lines <- pair.line
   265  		}
   266  		close(lines)
   267  	}()
   268  	go p.ProcessLines(lines, send, preReg)
   269  	for _, pair := range tlm {
   270  		resp := <-send
   271  		if !reflect.DeepEqual(resp, pair.ev) {
   272  			t.Fatalf("line resp didn't match up for %s. Expected: %+v, actual: %+v",
   273  				pair.line, pair.ev, resp)
   274  		}
   275  	}
   276  }