github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/logql/log/parser_test.go (about)

     1  package log
     2  
     3  import (
     4  	"fmt"
     5  	"sort"
     6  	"testing"
     7  
     8  	"github.com/prometheus/prometheus/model/labels"
     9  	"github.com/stretchr/testify/require"
    10  
    11  	"github.com/grafana/loki/pkg/logqlmodel"
    12  )
    13  
    14  func Test_jsonParser_Parse(t *testing.T) {
    15  	tests := []struct {
    16  		name string
    17  		line []byte
    18  		lbs  labels.Labels
    19  		want labels.Labels
    20  	}{
    21  		{
    22  			"multi depth",
    23  			[]byte(`{"app":"foo","namespace":"prod","pod":{"uuid":"foo","deployment":{"ref":"foobar"}}}`),
    24  			labels.Labels{},
    25  			labels.Labels{
    26  				{Name: "app", Value: "foo"},
    27  				{Name: "namespace", Value: "prod"},
    28  				{Name: "pod_uuid", Value: "foo"},
    29  				{Name: "pod_deployment_ref", Value: "foobar"},
    30  			},
    31  		},
    32  		{
    33  			"numeric",
    34  			[]byte(`{"counter":1, "price": {"_net_":5.56909}}`),
    35  			labels.Labels{},
    36  			labels.Labels{
    37  				{Name: "counter", Value: "1"},
    38  				{Name: "price__net_", Value: "5.56909"},
    39  			},
    40  		},
    41  		{
    42  			"escaped",
    43  			[]byte(`{"counter":1,"foo":"foo\\\"bar", "price": {"_net_":5.56909}}`),
    44  			labels.Labels{},
    45  			labels.Labels{
    46  				{Name: "counter", Value: "1"},
    47  				{Name: "price__net_", Value: "5.56909"},
    48  				{Name: "foo", Value: `foo\"bar`},
    49  			},
    50  		},
    51  		{
    52  			"utf8 error rune",
    53  			[]byte(`{"counter":1,"foo":"�", "price": {"_net_":5.56909}}`),
    54  			labels.Labels{},
    55  			labels.Labels{
    56  				{Name: "counter", Value: "1"},
    57  				{Name: "price__net_", Value: "5.56909"},
    58  				{Name: "foo", Value: ""},
    59  			},
    60  		},
    61  		{
    62  			"skip arrays",
    63  			[]byte(`{"counter":1, "price": {"net_":["10","20"]}}`),
    64  			labels.Labels{},
    65  			labels.Labels{
    66  				{Name: "counter", Value: "1"},
    67  			},
    68  		},
    69  		{
    70  			"bad key replaced",
    71  			[]byte(`{"cou-nter":1}`),
    72  			labels.Labels{},
    73  			labels.Labels{
    74  				{Name: "cou_nter", Value: "1"},
    75  			},
    76  		},
    77  		{
    78  			"errors",
    79  			[]byte(`{n}`),
    80  			labels.Labels{},
    81  			labels.Labels{
    82  				{Name: "__error__", Value: "JSONParserErr"},
    83  				{Name: "__error_details__", Value: "ReadMapCB: expect \" after {, but found n, error found in #2 byte of ...|{n}|..., bigger context ...|{n}|..."},
    84  			},
    85  		},
    86  		{
    87  			"duplicate extraction",
    88  			[]byte(`{"app":"foo","namespace":"prod","pod":{"uuid":"foo","deployment":{"ref":"foobar"}},"next":{"err":false}}`),
    89  			labels.Labels{
    90  				{Name: "app", Value: "bar"},
    91  			},
    92  			labels.Labels{
    93  				{Name: "app", Value: "bar"},
    94  				{Name: "app_extracted", Value: "foo"},
    95  				{Name: "namespace", Value: "prod"},
    96  				{Name: "pod_uuid", Value: "foo"},
    97  				{Name: "next_err", Value: "false"},
    98  				{Name: "pod_deployment_ref", Value: "foobar"},
    99  			},
   100  		},
   101  	}
   102  	for _, tt := range tests {
   103  		j := NewJSONParser()
   104  		t.Run(tt.name, func(t *testing.T) {
   105  			b := NewBaseLabelsBuilder().ForLabels(tt.lbs, tt.lbs.Hash())
   106  			b.Reset()
   107  			_, _ = j.Process(0, tt.line, b)
   108  			sort.Sort(tt.want)
   109  			require.Equal(t, tt.want, b.LabelsResult().Labels())
   110  		})
   111  	}
   112  }
   113  
   114  func TestJSONExpressionParser(t *testing.T) {
   115  	testLine := []byte(`{"app":"foo","field with space":"value","field with ÜFT8👌":"value","null_field":null,"bool_field":false,"namespace":"prod","pod":{"uuid":"foo","deployment":{"ref":"foobar", "params": [1,2,3]}}}`)
   116  
   117  	tests := []struct {
   118  		name        string
   119  		line        []byte
   120  		expressions []JSONExpression
   121  		lbs         labels.Labels
   122  		want        labels.Labels
   123  	}{
   124  		{
   125  			"single field",
   126  			testLine,
   127  			[]JSONExpression{
   128  				NewJSONExpr("app", "app"),
   129  			},
   130  			labels.Labels{},
   131  			labels.Labels{
   132  				{Name: "app", Value: "foo"},
   133  			},
   134  		},
   135  		{
   136  			"alternate syntax",
   137  			testLine,
   138  			[]JSONExpression{
   139  				NewJSONExpr("test", `["field with space"]`),
   140  			},
   141  			labels.Labels{},
   142  			labels.Labels{
   143  				{Name: "test", Value: "value"},
   144  			},
   145  		},
   146  		{
   147  			"multiple fields",
   148  			testLine,
   149  			[]JSONExpression{
   150  				NewJSONExpr("app", "app"),
   151  				NewJSONExpr("namespace", "namespace"),
   152  			},
   153  			labels.Labels{},
   154  			labels.Labels{
   155  				{Name: "app", Value: "foo"},
   156  				{Name: "namespace", Value: "prod"},
   157  			},
   158  		},
   159  		{
   160  			"utf8",
   161  			testLine,
   162  			[]JSONExpression{
   163  				NewJSONExpr("utf8", `["field with ÜFT8👌"]`),
   164  			},
   165  			labels.Labels{},
   166  			labels.Labels{
   167  				{Name: "utf8", Value: "value"},
   168  			},
   169  		},
   170  		{
   171  			"nested field",
   172  			testLine,
   173  			[]JSONExpression{
   174  				NewJSONExpr("uuid", "pod.uuid"),
   175  			},
   176  			labels.Labels{},
   177  			labels.Labels{
   178  				{Name: "uuid", Value: "foo"},
   179  			},
   180  		},
   181  		{
   182  			"nested field alternate syntax",
   183  			testLine,
   184  			[]JSONExpression{
   185  				NewJSONExpr("uuid", `pod["uuid"]`),
   186  			},
   187  			labels.Labels{},
   188  			labels.Labels{
   189  				{Name: "uuid", Value: "foo"},
   190  			},
   191  		},
   192  		{
   193  			"nested field alternate syntax 2",
   194  			testLine,
   195  			[]JSONExpression{
   196  				NewJSONExpr("uuid", `["pod"]["uuid"]`),
   197  			},
   198  			labels.Labels{},
   199  			labels.Labels{
   200  				{Name: "uuid", Value: "foo"},
   201  			},
   202  		},
   203  		{
   204  			"nested field alternate syntax 3",
   205  			testLine,
   206  			[]JSONExpression{
   207  				NewJSONExpr("uuid", `["pod"].uuid`),
   208  			},
   209  			labels.Labels{},
   210  			labels.Labels{
   211  				{Name: "uuid", Value: "foo"},
   212  			},
   213  		},
   214  		{
   215  			"array element",
   216  			testLine,
   217  			[]JSONExpression{
   218  				NewJSONExpr("param", `pod.deployment.params[0]`),
   219  			},
   220  			labels.Labels{},
   221  			labels.Labels{
   222  				{Name: "param", Value: "1"},
   223  			},
   224  		},
   225  		{
   226  			"full array",
   227  			testLine,
   228  			[]JSONExpression{
   229  				NewJSONExpr("params", `pod.deployment.params`),
   230  			},
   231  			labels.Labels{},
   232  			labels.Labels{
   233  				{Name: "params", Value: "[1,2,3]"},
   234  			},
   235  		},
   236  		{
   237  			"full object",
   238  			testLine,
   239  			[]JSONExpression{
   240  				NewJSONExpr("deployment", `pod.deployment`),
   241  			},
   242  			labels.Labels{},
   243  			labels.Labels{
   244  				{Name: "deployment", Value: `{"ref":"foobar", "params": [1,2,3]}`},
   245  			},
   246  		},
   247  		{
   248  			"expression matching nothing",
   249  			testLine,
   250  			[]JSONExpression{
   251  				NewJSONExpr("nope", `pod.nope`),
   252  			},
   253  			labels.Labels{},
   254  			labels.Labels{
   255  				labels.Label{Name: "nope", Value: ""},
   256  			},
   257  		},
   258  		{
   259  			"null field",
   260  			testLine,
   261  			[]JSONExpression{
   262  				NewJSONExpr("nf", `null_field`),
   263  			},
   264  			labels.Labels{},
   265  			labels.Labels{
   266  				labels.Label{Name: "nf", Value: ""}, // null is coerced to an empty string
   267  			},
   268  		},
   269  		{
   270  			"boolean field",
   271  			testLine,
   272  			[]JSONExpression{
   273  				NewJSONExpr("bool", `bool_field`),
   274  			},
   275  			labels.Labels{},
   276  			labels.Labels{
   277  				{Name: "bool", Value: `false`},
   278  			},
   279  		},
   280  		{
   281  			"label override",
   282  			testLine,
   283  			[]JSONExpression{
   284  				NewJSONExpr("uuid", `pod.uuid`),
   285  			},
   286  			labels.Labels{
   287  				{Name: "uuid", Value: "bar"},
   288  			},
   289  			labels.Labels{
   290  				{Name: "uuid", Value: "bar"},
   291  				{Name: "uuid_extracted", Value: "foo"},
   292  			},
   293  		},
   294  		{
   295  			"non-matching expression",
   296  			testLine,
   297  			[]JSONExpression{
   298  				NewJSONExpr("request_size", `request.size.invalid`),
   299  			},
   300  			labels.Labels{
   301  				{Name: "uuid", Value: "bar"},
   302  			},
   303  			labels.Labels{
   304  				{Name: "uuid", Value: "bar"},
   305  				{Name: "request_size", Value: ""},
   306  			},
   307  		},
   308  		{
   309  			"empty line",
   310  			[]byte("{}"),
   311  			[]JSONExpression{
   312  				NewJSONExpr("uuid", `pod.uuid`),
   313  			},
   314  			labels.Labels{},
   315  			labels.Labels{
   316  				labels.Label{Name: "uuid", Value: ""},
   317  			},
   318  		},
   319  		{
   320  			"existing labels are not affected",
   321  			testLine,
   322  			[]JSONExpression{
   323  				NewJSONExpr("uuid", `will.not.work`),
   324  			},
   325  			labels.Labels{
   326  				{Name: "foo", Value: "bar"},
   327  			},
   328  			labels.Labels{
   329  				{Name: "foo", Value: "bar"},
   330  				{Name: "uuid", Value: ""},
   331  			},
   332  		},
   333  		{
   334  			"invalid JSON line",
   335  			[]byte(`invalid json`),
   336  			[]JSONExpression{
   337  				NewJSONExpr("uuid", `will.not.work`),
   338  			},
   339  			labels.Labels{
   340  				{Name: "foo", Value: "bar"},
   341  			},
   342  			labels.Labels{
   343  				{Name: "foo", Value: "bar"},
   344  				{Name: logqlmodel.ErrorLabel, Value: errJSON},
   345  			},
   346  		},
   347  	}
   348  	for _, tt := range tests {
   349  		j, err := NewJSONExpressionParser(tt.expressions)
   350  		if err != nil {
   351  			t.Fatalf("cannot create JSON expression parser: %s", err.Error())
   352  		}
   353  
   354  		t.Run(tt.name, func(t *testing.T) {
   355  			b := NewBaseLabelsBuilder().ForLabels(tt.lbs, tt.lbs.Hash())
   356  			b.Reset()
   357  			_, _ = j.Process(0, tt.line, b)
   358  			sort.Sort(tt.want)
   359  			require.Equal(t, tt.want, b.LabelsResult().Labels())
   360  		})
   361  	}
   362  }
   363  
   364  func TestJSONExpressionParserFailures(t *testing.T) {
   365  	tests := []struct {
   366  		name       string
   367  		expression JSONExpression
   368  		error      string
   369  	}{
   370  		{
   371  			"invalid field name",
   372  			NewJSONExpr("app", `field with space`),
   373  			"unexpected FIELD",
   374  		},
   375  		{
   376  			"missing opening square bracket",
   377  			NewJSONExpr("app", `"pod"]`),
   378  			"unexpected STRING, expecting LSB or FIELD",
   379  		},
   380  		{
   381  			"missing closing square bracket",
   382  			NewJSONExpr("app", `["pod"`),
   383  			"unexpected $end, expecting RSB",
   384  		},
   385  		{
   386  			"missing closing square bracket",
   387  			NewJSONExpr("app", `["pod""uuid"]`),
   388  			"unexpected STRING, expecting RSB",
   389  		},
   390  		{
   391  			"invalid nesting",
   392  			NewJSONExpr("app", `pod..uuid`),
   393  			"unexpected DOT, expecting FIELD",
   394  		},
   395  	}
   396  	for _, tt := range tests {
   397  		t.Run(tt.name, func(t *testing.T) {
   398  			_, err := NewJSONExpressionParser([]JSONExpression{tt.expression})
   399  
   400  			require.NotNil(t, err)
   401  			require.Equal(t, err.Error(), fmt.Sprintf("cannot parse expression [%s]: syntax error: %s", tt.expression.Expression, tt.error))
   402  		})
   403  	}
   404  }
   405  
   406  func Benchmark_Parser(b *testing.B) {
   407  	lbs := labels.Labels{
   408  		{Name: "cluster", Value: "qa-us-central1"},
   409  		{Name: "namespace", Value: "qa"},
   410  		{Name: "filename", Value: "/var/log/pods/ingress-nginx_nginx-ingress-controller-7745855568-blq6t_1f8962ef-f858-4188-a573-ba276a3cacc3/ingress-nginx/0.log"},
   411  		{Name: "job", Value: "ingress-nginx/nginx-ingress-controller"},
   412  		{Name: "name", Value: "nginx-ingress-controller"},
   413  		{Name: "pod", Value: "nginx-ingress-controller-7745855568-blq6t"},
   414  		{Name: "pod_template_hash", Value: "7745855568"},
   415  		{Name: "stream", Value: "stdout"},
   416  	}
   417  
   418  	jsonLine := `{"invalid":"a\\xc5z","proxy_protocol_addr": "","remote_addr": "3.112.221.14","remote_user": "","upstream_addr": "10.12.15.234:5000","the_real_ip": "3.112.221.14","timestamp": "2020-12-11T16:20:07+00:00","protocol": "HTTP/1.1","upstream_name": "hosted-grafana-hosted-grafana-api-80","request": {"id": "c8eacb6053552c0cd1ae443bc660e140","time": "0.001","method" : "GET","host": "hg-api-qa-us-central1.grafana.net","uri": "/","size" : "128","user_agent": "worldping-api-","referer": ""},"response": {"status": 200,"upstream_status": "200","size": "1155","size_sent": "265","latency_seconds": "0.001"}}`
   419  	logfmtLine := `level=info ts=2020-12-14T21:25:20.947307459Z caller=metrics.go:83 org_id=29 traceID=c80e691e8db08e2 latency=fast query="sum by (object_name) (rate(({container=\"metrictank\", cluster=\"hm-us-east2\"} |= \"PANIC\")[5m]))" query_type=metric range_type=range length=5m0s step=15s duration=322.623724ms status=200 throughput=1.2GB total_bytes=375MB`
   420  	nginxline := `10.1.0.88 - - [14/Dec/2020:22:56:24 +0000] "GET /static/img/about/bob.jpg HTTP/1.1" 200 60755 "https://grafana.com/go/observabilitycon/grafana-the-open-and-composable-observability-platform/?tech=ggl-o&pg=oss-graf&plcmt=hero-txt" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.1 Safari/605.1.15" "123.123.123.123, 35.35.122.223" "TLSv1.3"`
   421  	packedLike := `{"job":"123","pod":"someuid123","app":"foo","_entry":"10.1.0.88 - - [14/Dec/2020:22:56:24 +0000] "GET /static/img/about/bob.jpg HTTP/1.1"}`
   422  
   423  	for _, tt := range []struct {
   424  		name            string
   425  		line            string
   426  		s               Stage
   427  		LabelParseHints []string //  hints to reduce label extractions.
   428  	}{
   429  		{"json", jsonLine, NewJSONParser(), []string{"response_latency_seconds"}},
   430  		{"jsonParser-not json line", nginxline, NewJSONParser(), []string{"response_latency_seconds"}},
   431  		{"unpack", packedLike, NewUnpackParser(), []string{"pod"}},
   432  		{"unpack-not json line", nginxline, NewUnpackParser(), []string{"pod"}},
   433  		{"logfmt", logfmtLine, NewLogfmtParser(), []string{"info", "throughput", "org_id"}},
   434  		{"regex greedy", nginxline, mustStage(NewRegexpParser(`GET (?P<path>.*?)/\?`)), []string{"path"}},
   435  		{"regex status digits", nginxline, mustStage(NewRegexpParser(`HTTP/1.1" (?P<statuscode>\d{3}) `)), []string{"statuscode"}},
   436  		{"pattern", nginxline, mustStage(NewPatternParser(`<_> "<method> <path> <_>"<_>`)), []string{"path"}},
   437  	} {
   438  		b.Run(tt.name, func(b *testing.B) {
   439  			line := []byte(tt.line)
   440  			b.Run("no labels hints", func(b *testing.B) {
   441  				builder := NewBaseLabelsBuilder().ForLabels(lbs, lbs.Hash())
   442  				for n := 0; n < b.N; n++ {
   443  					builder.Reset()
   444  					_, _ = tt.s.Process(0, line, builder)
   445  				}
   446  			})
   447  
   448  			b.Run("labels hints", func(b *testing.B) {
   449  				builder := NewBaseLabelsBuilder().ForLabels(lbs, lbs.Hash())
   450  				builder.parserKeyHints = newParserHint(tt.LabelParseHints, tt.LabelParseHints, false, false, "")
   451  				for n := 0; n < b.N; n++ {
   452  					builder.Reset()
   453  					_, _ = tt.s.Process(0, line, builder)
   454  				}
   455  			})
   456  		})
   457  	}
   458  }
   459  
   460  func mustStage(s Stage, err error) Stage {
   461  	if err != nil {
   462  		panic(err)
   463  	}
   464  	return s
   465  }
   466  
   467  func TestNewRegexpParser(t *testing.T) {
   468  	tests := []struct {
   469  		name    string
   470  		re      string
   471  		wantErr bool
   472  	}{
   473  		{"no sub", "w.*", true},
   474  		{"sub but not named", "f(.*) (foo|bar|buzz)", true},
   475  		{"named and unamed", "blah (.*) (?P<foo>)", false},
   476  		{"named", "blah (.*) (?P<foo>foo)(?P<bar>barr)", false},
   477  		{"invalid name", "blah (.*) (?P<foo$>foo)(?P<bar>barr)", true},
   478  		{"duplicate", "blah (.*) (?P<foo>foo)(?P<foo>barr)", true},
   479  	}
   480  	for _, tt := range tests {
   481  		t.Run(tt.name, func(t *testing.T) {
   482  			_, err := NewRegexpParser(tt.re)
   483  			if (err != nil) != tt.wantErr {
   484  				t.Errorf("NewRegexpParser() error = %v, wantErr %v", err, tt.wantErr)
   485  				return
   486  			}
   487  		})
   488  	}
   489  }
   490  
   491  func Test_regexpParser_Parse(t *testing.T) {
   492  	tests := []struct {
   493  		name   string
   494  		parser Stage
   495  		line   []byte
   496  		lbs    labels.Labels
   497  		want   labels.Labels
   498  	}{
   499  		{
   500  			"no matches",
   501  			mustStage(NewRegexpParser("(?P<foo>foo|bar)buzz")),
   502  			[]byte("blah"),
   503  			labels.Labels{
   504  				{Name: "app", Value: "foo"},
   505  			},
   506  			labels.Labels{
   507  				{Name: "app", Value: "foo"},
   508  			},
   509  		},
   510  		{
   511  			"double matches",
   512  			mustStage(NewRegexpParser("(?P<foo>.*)buzz")),
   513  			[]byte("matchebuzz barbuzz"),
   514  			labels.Labels{
   515  				{Name: "app", Value: "bar"},
   516  			},
   517  			labels.Labels{
   518  				{Name: "app", Value: "bar"},
   519  				{Name: "foo", Value: "matchebuzz bar"},
   520  			},
   521  		},
   522  		{
   523  			"duplicate labels",
   524  			mustStage(NewRegexpParser("(?P<bar>bar)buzz")),
   525  			[]byte("barbuzz"),
   526  			labels.Labels{
   527  				{Name: "bar", Value: "foo"},
   528  			},
   529  			labels.Labels{
   530  				{Name: "bar", Value: "foo"},
   531  				{Name: "bar_extracted", Value: "bar"},
   532  			},
   533  		},
   534  		{
   535  			"multiple labels extracted",
   536  			mustStage(NewRegexpParser("status=(?P<status>\\w+),latency=(?P<latency>\\w+)(ms|ns)")),
   537  			[]byte("status=200,latency=500ms"),
   538  			labels.Labels{
   539  				{Name: "app", Value: "foo"},
   540  			},
   541  			labels.Labels{
   542  				{Name: "app", Value: "foo"},
   543  				{Name: "status", Value: "200"},
   544  				{Name: "latency", Value: "500"},
   545  			},
   546  		},
   547  	}
   548  	for _, tt := range tests {
   549  		t.Run(tt.name, func(t *testing.T) {
   550  			b := NewBaseLabelsBuilder().ForLabels(tt.lbs, tt.lbs.Hash())
   551  			b.Reset()
   552  			_, _ = tt.parser.Process(0, tt.line, b)
   553  			sort.Sort(tt.want)
   554  			require.Equal(t, tt.want, b.LabelsResult().Labels())
   555  		})
   556  	}
   557  }
   558  
   559  func Test_logfmtParser_Parse(t *testing.T) {
   560  	tests := []struct {
   561  		name string
   562  		line []byte
   563  		lbs  labels.Labels
   564  		want labels.Labels
   565  	}{
   566  		{
   567  			"not logfmt",
   568  			[]byte("foobar====wqe=sdad1r"),
   569  			labels.Labels{
   570  				{Name: "foo", Value: "bar"},
   571  			},
   572  			labels.Labels{
   573  				{Name: "foo", Value: "bar"},
   574  				{Name: "__error__", Value: "LogfmtParserErr"},
   575  				{Name: "__error_details__", Value: "logfmt syntax error at pos 8 : unexpected '='"},
   576  			},
   577  		},
   578  		{
   579  			"utf8 error rune",
   580  			[]byte(`buzz=foo bar=�f`),
   581  			labels.Labels{},
   582  			labels.Labels{
   583  				{Name: "buzz", Value: "foo"},
   584  				{Name: "bar", Value: ""},
   585  			},
   586  		},
   587  		{
   588  			"key alone logfmt",
   589  			[]byte("buzz bar=foo"),
   590  			labels.Labels{
   591  				{Name: "foo", Value: "bar"},
   592  			},
   593  			labels.Labels{
   594  				{Name: "foo", Value: "bar"},
   595  				{Name: "bar", Value: "foo"},
   596  				{Name: "buzz", Value: ""},
   597  			},
   598  		},
   599  		{
   600  			"quoted logfmt",
   601  			[]byte(`foobar="foo bar"`),
   602  			labels.Labels{
   603  				{Name: "foo", Value: "bar"},
   604  			},
   605  			labels.Labels{
   606  				{Name: "foo", Value: "bar"},
   607  				{Name: "foobar", Value: "foo bar"},
   608  			},
   609  		},
   610  		{
   611  			"escaped control chars in logfmt",
   612  			[]byte(`foobar="foo\nbar\tbaz"`),
   613  			labels.Labels{
   614  				{Name: "a", Value: "b"},
   615  			},
   616  			labels.Labels{
   617  				{Name: "a", Value: "b"},
   618  				{Name: "foobar", Value: "foo\nbar\tbaz"},
   619  			},
   620  		},
   621  		{
   622  			"literal control chars in logfmt",
   623  			[]byte("foobar=\"foo\nbar\tbaz\""),
   624  			labels.Labels{
   625  				{Name: "a", Value: "b"},
   626  			},
   627  			labels.Labels{
   628  				{Name: "a", Value: "b"},
   629  				{Name: "foobar", Value: "foo\nbar\tbaz"},
   630  			},
   631  		},
   632  		{
   633  			"escaped slash logfmt",
   634  			[]byte(`foobar="foo ba\\r baz"`),
   635  			labels.Labels{
   636  				{Name: "a", Value: "b"},
   637  			},
   638  			labels.Labels{
   639  				{Name: "a", Value: "b"},
   640  				{Name: "foobar", Value: `foo ba\r baz`},
   641  			},
   642  		},
   643  		{
   644  			"literal newline and escaped slash logfmt",
   645  			[]byte("foobar=\"foo bar\nb\\\\az\""),
   646  			labels.Labels{
   647  				{Name: "a", Value: "b"},
   648  			},
   649  			labels.Labels{
   650  				{Name: "a", Value: "b"},
   651  				{Name: "foobar", Value: "foo bar\nb\\az"},
   652  			},
   653  		},
   654  		{
   655  			"double property logfmt",
   656  			[]byte(`foobar="foo bar" latency=10ms`),
   657  			labels.Labels{
   658  				{Name: "foo", Value: "bar"},
   659  			},
   660  			labels.Labels{
   661  				{Name: "foo", Value: "bar"},
   662  				{Name: "foobar", Value: "foo bar"},
   663  				{Name: "latency", Value: "10ms"},
   664  			},
   665  		},
   666  		{
   667  			"duplicate from line property",
   668  			[]byte(`foobar="foo bar" foobar=10ms`),
   669  			labels.Labels{
   670  				{Name: "foo", Value: "bar"},
   671  			},
   672  			labels.Labels{
   673  				{Name: "foo", Value: "bar"},
   674  				{Name: "foobar", Value: "10ms"},
   675  			},
   676  		},
   677  		{
   678  			"duplicate property",
   679  			[]byte(`foo="foo bar" foobar=10ms`),
   680  			labels.Labels{
   681  				{Name: "foo", Value: "bar"},
   682  			},
   683  			labels.Labels{
   684  				{Name: "foo", Value: "bar"},
   685  				{Name: "foo_extracted", Value: "foo bar"},
   686  				{Name: "foobar", Value: "10ms"},
   687  			},
   688  		},
   689  		{
   690  			"invalid key names",
   691  			[]byte(`foo="foo bar" foo.bar=10ms test-dash=foo`),
   692  			labels.Labels{
   693  				{Name: "foo", Value: "bar"},
   694  			},
   695  			labels.Labels{
   696  				{Name: "foo", Value: "bar"},
   697  				{Name: "foo_extracted", Value: "foo bar"},
   698  				{Name: "foo_bar", Value: "10ms"},
   699  				{Name: "test_dash", Value: "foo"},
   700  			},
   701  		},
   702  		{
   703  			"nil",
   704  			nil,
   705  			labels.Labels{
   706  				{Name: "foo", Value: "bar"},
   707  			},
   708  			labels.Labels{
   709  				{Name: "foo", Value: "bar"},
   710  			},
   711  		},
   712  	}
   713  	p := NewLogfmtParser()
   714  	for _, tt := range tests {
   715  		t.Run(tt.name, func(t *testing.T) {
   716  			b := NewBaseLabelsBuilder().ForLabels(tt.lbs, tt.lbs.Hash())
   717  			b.Reset()
   718  			_, _ = p.Process(0, tt.line, b)
   719  			sort.Sort(tt.want)
   720  			require.Equal(t, tt.want, b.LabelsResult().Labels())
   721  		})
   722  	}
   723  }
   724  
   725  func Test_unpackParser_Parse(t *testing.T) {
   726  	tests := []struct {
   727  		name string
   728  		line []byte
   729  		lbs  labels.Labels
   730  
   731  		wantLbs  labels.Labels
   732  		wantLine []byte
   733  	}{
   734  		{
   735  			"should extract only map[string]string",
   736  			[]byte(`{"bar":1,"app":"foo","namespace":"prod","_entry":"some message","pod":{"uid":"1"}}`),
   737  			labels.Labels{{Name: "cluster", Value: "us-central1"}},
   738  			labels.Labels{
   739  				{Name: "app", Value: "foo"},
   740  				{Name: "namespace", Value: "prod"},
   741  				{Name: "cluster", Value: "us-central1"},
   742  			},
   743  			[]byte(`some message`),
   744  		},
   745  		{
   746  			"wrong json",
   747  			[]byte(`"app":"foo","namespace":"prod","_entry":"some message","pod":{"uid":"1"}`),
   748  			labels.Labels{},
   749  			labels.Labels{
   750  				{Name: "__error__", Value: "JSONParserErr"},
   751  				{Name: "__error_details__", Value: "expecting json object(6), but it is not"},
   752  			},
   753  			[]byte(`"app":"foo","namespace":"prod","_entry":"some message","pod":{"uid":"1"}`),
   754  		},
   755  		{
   756  			"not a map",
   757  			[]byte(`["foo","bar"]`),
   758  			labels.Labels{{Name: "cluster", Value: "us-central1"}},
   759  			labels.Labels{
   760  				{Name: "__error__", Value: "JSONParserErr"},
   761  				{Name: "__error_details__", Value: "expecting json object(6), but it is not"},
   762  				{Name: "cluster", Value: "us-central1"},
   763  			},
   764  			[]byte(`["foo","bar"]`),
   765  		},
   766  		{
   767  			"should rename",
   768  			[]byte(`{"bar":1,"app":"foo","namespace":"prod","_entry":"some message","pod":{"uid":"1"}}`),
   769  			labels.Labels{
   770  				{Name: "cluster", Value: "us-central1"},
   771  				{Name: "app", Value: "bar"},
   772  			},
   773  			labels.Labels{
   774  				{Name: "app", Value: "bar"},
   775  				{Name: "app_extracted", Value: "foo"},
   776  				{Name: "namespace", Value: "prod"},
   777  				{Name: "cluster", Value: "us-central1"},
   778  			},
   779  			[]byte(`some message`),
   780  		},
   781  		{
   782  			"should not change log and labels if no packed entry",
   783  			[]byte(`{"bar":1,"app":"foo","namespace":"prod","pod":{"uid":"1"}}`),
   784  			labels.Labels{
   785  				{Name: "app", Value: "bar"},
   786  				{Name: "cluster", Value: "us-central1"},
   787  			},
   788  			labels.Labels{
   789  				{Name: "app", Value: "bar"},
   790  				{Name: "cluster", Value: "us-central1"},
   791  			},
   792  			[]byte(`{"bar":1,"app":"foo","namespace":"prod","pod":{"uid":"1"}}`),
   793  		},
   794  		{
   795  			"non json with escaped quotes",
   796  			[]byte(`{"_entry":"I0303 17:49:45.976518    1526 kubelet_getters.go:178] \"Pod status updated\" pod=\"openshift-etcd/etcd-ip-10-0-150-50.us-east-2.compute.internal\" status=Running"}`),
   797  			labels.Labels{
   798  				{Name: "app", Value: "bar"},
   799  				{Name: "cluster", Value: "us-central1"},
   800  			},
   801  			labels.Labels{
   802  				{Name: "app", Value: "bar"},
   803  				{Name: "cluster", Value: "us-central1"},
   804  			},
   805  			[]byte(`I0303 17:49:45.976518    1526 kubelet_getters.go:178] "Pod status updated" pod="openshift-etcd/etcd-ip-10-0-150-50.us-east-2.compute.internal" status=Running`),
   806  		},
   807  	}
   808  	for _, tt := range tests {
   809  		j := NewUnpackParser()
   810  		t.Run(tt.name, func(t *testing.T) {
   811  			b := NewBaseLabelsBuilder().ForLabels(tt.lbs, tt.lbs.Hash())
   812  			b.Reset()
   813  			copy := string(tt.line)
   814  			l, _ := j.Process(0, tt.line, b)
   815  			sort.Sort(tt.wantLbs)
   816  			require.Equal(t, tt.wantLbs, b.LabelsResult().Labels())
   817  			require.Equal(t, tt.wantLine, l)
   818  			require.Equal(t, string(tt.wantLine), string(l))
   819  			require.Equal(t, copy, string(tt.line), "the original log line should not be mutated")
   820  		})
   821  	}
   822  }
   823  
   824  func Test_PatternParser(t *testing.T) {
   825  	tests := []struct {
   826  		pattern string
   827  		line    []byte
   828  		lbs     labels.Labels
   829  		want    labels.Labels
   830  	}{
   831  		{
   832  			`<ip> <userid> <user> [<_>] "<method> <path> <_>" <status> <size>`,
   833  			[]byte(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`),
   834  			labels.Labels{
   835  				{Name: "foo", Value: "bar"},
   836  			},
   837  			labels.Labels{
   838  				{Name: "foo", Value: "bar"},
   839  				{Name: "ip", Value: "127.0.0.1"},
   840  				{Name: "userid", Value: "user-identifier"},
   841  				{Name: "user", Value: "frank"},
   842  				{Name: "method", Value: "GET"},
   843  				{Name: "path", Value: "/apache_pb.gif"},
   844  				{Name: "status", Value: "200"},
   845  				{Name: "size", Value: "2326"},
   846  			},
   847  		},
   848  		{
   849  			`<_> msg="<method> <path> (<status>) <duration>"`,
   850  			[]byte(`level=debug ts=2021-05-19T07:54:26.864644382Z caller=logging.go:66 traceID=7fbb92fd0eb9c65d msg="POST /loki/api/v1/push (204) 1.238734ms"`),
   851  			labels.Labels{
   852  				{Name: "method", Value: "bar"},
   853  			},
   854  			labels.Labels{
   855  				{Name: "method", Value: "bar"},
   856  				{Name: "method_extracted", Value: "POST"},
   857  				{Name: "path", Value: "/loki/api/v1/push"},
   858  				{Name: "status", Value: "204"},
   859  				{Name: "duration", Value: "1.238734ms"},
   860  			},
   861  		},
   862  		{
   863  			`foo <f>"`,
   864  			[]byte(`bar`),
   865  			labels.Labels{
   866  				{Name: "method", Value: "bar"},
   867  			},
   868  			labels.Labels{
   869  				{Name: "method", Value: "bar"},
   870  			},
   871  		},
   872  	}
   873  
   874  	for _, tt := range tests {
   875  		tt := tt
   876  		t.Run(tt.pattern, func(t *testing.T) {
   877  			t.Parallel()
   878  			b := NewBaseLabelsBuilder().ForLabels(tt.lbs, tt.lbs.Hash())
   879  			b.Reset()
   880  			pp, err := NewPatternParser(tt.pattern)
   881  			require.NoError(t, err)
   882  			_, _ = pp.Process(0, tt.line, b)
   883  			sort.Sort(tt.want)
   884  			require.Equal(t, tt.want, b.LabelsResult().Labels())
   885  		})
   886  	}
   887  }