github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/logql/log/parser.go (about)

     1  package log
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"strings"
     9  	"unicode/utf8"
    10  
    11  	"github.com/grafana/loki/pkg/logql/log/jsonexpr"
    12  	"github.com/grafana/loki/pkg/logql/log/logfmt"
    13  	"github.com/grafana/loki/pkg/logql/log/pattern"
    14  	"github.com/grafana/loki/pkg/logqlmodel"
    15  
    16  	"github.com/grafana/regexp"
    17  	jsoniter "github.com/json-iterator/go"
    18  	"github.com/prometheus/common/model"
    19  )
    20  
    21  const (
    22  	jsonSpacer      = '_'
    23  	duplicateSuffix = "_extracted"
    24  	trueString      = "true"
    25  	falseString     = "false"
    26  )
    27  
    28  var (
    29  	_ Stage = &JSONParser{}
    30  	_ Stage = &RegexpParser{}
    31  	_ Stage = &LogfmtParser{}
    32  
    33  	errUnexpectedJSONObject = fmt.Errorf("expecting json object(%d), but it is not", jsoniter.ObjectValue)
    34  	errMissingCapture       = errors.New("at least one named capture must be supplied")
    35  )
    36  
    37  type JSONParser struct {
    38  	buf []byte // buffer used to build json keys
    39  	lbs *LabelsBuilder
    40  
    41  	keys internedStringSet
    42  }
    43  
    44  // NewJSONParser creates a log stage that can parse a json log line and add properties as labels.
    45  func NewJSONParser() *JSONParser {
    46  	return &JSONParser{
    47  		buf:  make([]byte, 0, 1024),
    48  		keys: internedStringSet{},
    49  	}
    50  }
    51  
    52  func (j *JSONParser) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) {
    53  	if lbs.ParserLabelHints().NoLabels() {
    54  		return line, true
    55  	}
    56  	it := jsoniter.ConfigFastest.BorrowIterator(line)
    57  	defer jsoniter.ConfigFastest.ReturnIterator(it)
    58  
    59  	// reset the state.
    60  	j.buf = j.buf[:0]
    61  	j.lbs = lbs
    62  
    63  	if err := j.readObject(it); err != nil {
    64  		lbs.SetErr(errJSON)
    65  		lbs.SetErrorDetails(err.Error())
    66  		return line, true
    67  	}
    68  	return line, true
    69  }
    70  
    71  func (j *JSONParser) readObject(it *jsoniter.Iterator) error {
    72  	// we only care about object and values.
    73  	if nextType := it.WhatIsNext(); nextType != jsoniter.ObjectValue {
    74  		return errUnexpectedJSONObject
    75  	}
    76  	_ = it.ReadMapCB(j.parseMap(""))
    77  	if it.Error != nil && it.Error != io.EOF {
    78  		return it.Error
    79  	}
    80  	return nil
    81  }
    82  
    83  func (j *JSONParser) parseMap(prefix string) func(iter *jsoniter.Iterator, field string) bool {
    84  	return func(iter *jsoniter.Iterator, field string) bool {
    85  		switch iter.WhatIsNext() {
    86  		// are we looking at a value that needs to be added ?
    87  		case jsoniter.StringValue, jsoniter.NumberValue, jsoniter.BoolValue:
    88  			j.parseLabelValue(iter, prefix, field)
    89  		// Or another new object based on a prefix.
    90  		case jsoniter.ObjectValue:
    91  			if key, ok := j.nextKeyPrefix(prefix, field); ok {
    92  				return iter.ReadMapCB(j.parseMap(key))
    93  			}
    94  			// If this keys is not expected we skip the object
    95  			iter.Skip()
    96  		default:
    97  			iter.Skip()
    98  		}
    99  		return true
   100  	}
   101  }
   102  
   103  func (j *JSONParser) nextKeyPrefix(prefix, field string) (string, bool) {
   104  	// first time we add return the field as prefix.
   105  	if len(prefix) == 0 {
   106  		field = sanitizeLabelKey(field, true)
   107  		if j.lbs.ParserLabelHints().ShouldExtractPrefix(field) {
   108  			return field, true
   109  		}
   110  		return "", false
   111  	}
   112  	// otherwise we build the prefix and check using the buffer
   113  	j.buf = j.buf[:0]
   114  	j.buf = append(j.buf, prefix...)
   115  	j.buf = append(j.buf, byte(jsonSpacer))
   116  	j.buf = append(j.buf, sanitizeLabelKey(field, false)...)
   117  	// if matches keep going
   118  	if j.lbs.ParserLabelHints().ShouldExtractPrefix(unsafeGetString(j.buf)) {
   119  		return string(j.buf), true
   120  	}
   121  	return "", false
   122  }
   123  
   124  func (j *JSONParser) parseLabelValue(iter *jsoniter.Iterator, prefix, field string) {
   125  	// the first time we use the field as label key.
   126  	if len(prefix) == 0 {
   127  		key, ok := j.keys.Get(unsafeGetBytes(field), func() (string, bool) {
   128  			field = sanitizeLabelKey(field, true)
   129  			if !j.lbs.ParserLabelHints().ShouldExtract(field) {
   130  				return "", false
   131  			}
   132  			if j.lbs.BaseHas(field) {
   133  				field = field + duplicateSuffix
   134  			}
   135  			return field, true
   136  		})
   137  		if !ok {
   138  			iter.Skip()
   139  			return
   140  		}
   141  		j.lbs.Set(key, readValue(iter))
   142  		return
   143  
   144  	}
   145  	// otherwise we build the label key using the buffer
   146  	j.buf = j.buf[:0]
   147  	j.buf = append(j.buf, prefix...)
   148  	j.buf = append(j.buf, byte(jsonSpacer))
   149  	j.buf = append(j.buf, sanitizeLabelKey(field, false)...)
   150  	key, ok := j.keys.Get(j.buf, func() (string, bool) {
   151  		if j.lbs.BaseHas(string(j.buf)) {
   152  			j.buf = append(j.buf, duplicateSuffix...)
   153  		}
   154  		if !j.lbs.ParserLabelHints().ShouldExtract(string(j.buf)) {
   155  			return "", false
   156  		}
   157  		return string(j.buf), true
   158  	})
   159  	if !ok {
   160  		iter.Skip()
   161  		return
   162  	}
   163  	j.lbs.Set(key, readValue(iter))
   164  }
   165  
   166  func (j *JSONParser) RequiredLabelNames() []string { return []string{} }
   167  
   168  func readValue(iter *jsoniter.Iterator) string {
   169  	switch iter.WhatIsNext() {
   170  	case jsoniter.StringValue:
   171  		v := iter.ReadString()
   172  		// the rune error replacement is rejected by Prometheus, so we skip it.
   173  		if strings.ContainsRune(v, utf8.RuneError) {
   174  			return ""
   175  		}
   176  		return v
   177  	case jsoniter.NumberValue:
   178  		return iter.ReadNumber().String()
   179  	case jsoniter.BoolValue:
   180  		if iter.ReadBool() {
   181  			return trueString
   182  		}
   183  		return falseString
   184  	default:
   185  		iter.Skip()
   186  		return ""
   187  	}
   188  }
   189  
   190  type RegexpParser struct {
   191  	regex     *regexp.Regexp
   192  	nameIndex map[int]string
   193  
   194  	keys internedStringSet
   195  }
   196  
   197  // NewRegexpParser creates a new log stage that can extract labels from a log line using a regex expression.
   198  // The regex expression must contains at least one named match. If the regex doesn't match the line is not filtered out.
   199  func NewRegexpParser(re string) (*RegexpParser, error) {
   200  	regex, err := regexp.Compile(re)
   201  	if err != nil {
   202  		return nil, err
   203  	}
   204  	if regex.NumSubexp() == 0 {
   205  		return nil, errMissingCapture
   206  	}
   207  	nameIndex := map[int]string{}
   208  	uniqueNames := map[string]struct{}{}
   209  	for i, n := range regex.SubexpNames() {
   210  		if n != "" {
   211  			if !model.LabelName(n).IsValid() {
   212  				return nil, fmt.Errorf("invalid extracted label name '%s'", n)
   213  			}
   214  			if _, ok := uniqueNames[n]; ok {
   215  				return nil, fmt.Errorf("duplicate extracted label name '%s'", n)
   216  			}
   217  			nameIndex[i] = n
   218  			uniqueNames[n] = struct{}{}
   219  		}
   220  	}
   221  	if len(nameIndex) == 0 {
   222  		return nil, errMissingCapture
   223  	}
   224  	return &RegexpParser{
   225  		regex:     regex,
   226  		nameIndex: nameIndex,
   227  		keys:      internedStringSet{},
   228  	}, nil
   229  }
   230  
   231  func (r *RegexpParser) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) {
   232  	for i, value := range r.regex.FindSubmatch(line) {
   233  		if name, ok := r.nameIndex[i]; ok {
   234  			key, ok := r.keys.Get(unsafeGetBytes(name), func() (string, bool) {
   235  				sanitize := sanitizeLabelKey(name, true)
   236  				if len(sanitize) == 0 {
   237  					return "", false
   238  				}
   239  				if lbs.BaseHas(sanitize) {
   240  					sanitize = fmt.Sprintf("%s%s", sanitize, duplicateSuffix)
   241  				}
   242  				return sanitize, true
   243  			})
   244  			if !ok {
   245  				continue
   246  			}
   247  			lbs.Set(key, string(value))
   248  		}
   249  	}
   250  	return line, true
   251  }
   252  
   253  func (r *RegexpParser) RequiredLabelNames() []string { return []string{} }
   254  
   255  type LogfmtParser struct {
   256  	dec  *logfmt.Decoder
   257  	keys internedStringSet
   258  }
   259  
   260  // NewLogfmtParser creates a parser that can extract labels from a logfmt log line.
   261  // Each keyval is extracted into a respective label.
   262  func NewLogfmtParser() *LogfmtParser {
   263  	return &LogfmtParser{
   264  		dec:  logfmt.NewDecoder(nil),
   265  		keys: internedStringSet{},
   266  	}
   267  }
   268  
   269  func (l *LogfmtParser) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) {
   270  	if lbs.ParserLabelHints().NoLabels() {
   271  		return line, true
   272  	}
   273  	l.dec.Reset(line)
   274  	for l.dec.ScanKeyval() {
   275  		key, ok := l.keys.Get(l.dec.Key(), func() (string, bool) {
   276  			sanitized := sanitizeLabelKey(string(l.dec.Key()), true)
   277  			if !lbs.ParserLabelHints().ShouldExtract(sanitized) {
   278  				return "", false
   279  			}
   280  			if len(sanitized) == 0 {
   281  				return "", false
   282  			}
   283  			if lbs.BaseHas(sanitized) {
   284  				sanitized = fmt.Sprintf("%s%s", sanitized, duplicateSuffix)
   285  			}
   286  			return sanitized, true
   287  		})
   288  		if !ok {
   289  			continue
   290  		}
   291  		val := l.dec.Value()
   292  		// the rune error replacement is rejected by Prometheus, so we skip it.
   293  		if bytes.ContainsRune(val, utf8.RuneError) {
   294  			val = nil
   295  		}
   296  		lbs.Set(key, string(val))
   297  	}
   298  	if l.dec.Err() != nil {
   299  		lbs.SetErr(errLogfmt)
   300  		lbs.SetErrorDetails(l.dec.Err().Error())
   301  		return line, true
   302  	}
   303  	return line, true
   304  }
   305  
   306  func (l *LogfmtParser) RequiredLabelNames() []string { return []string{} }
   307  
   308  type PatternParser struct {
   309  	matcher pattern.Matcher
   310  	names   []string
   311  }
   312  
   313  func NewPatternParser(pn string) (*PatternParser, error) {
   314  	m, err := pattern.New(pn)
   315  	if err != nil {
   316  		return nil, err
   317  	}
   318  	for _, name := range m.Names() {
   319  		if !model.LabelName(name).IsValid() {
   320  			return nil, fmt.Errorf("invalid capture label name '%s'", name)
   321  		}
   322  	}
   323  	return &PatternParser{
   324  		matcher: m,
   325  		names:   m.Names(),
   326  	}, nil
   327  }
   328  
   329  func (l *PatternParser) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) {
   330  	if lbs.ParserLabelHints().NoLabels() {
   331  		return line, true
   332  	}
   333  	matches := l.matcher.Matches(line)
   334  	names := l.names[:len(matches)]
   335  	for i, m := range matches {
   336  		name := names[i]
   337  		if !lbs.parserKeyHints.ShouldExtract(name) {
   338  			continue
   339  		}
   340  		if lbs.BaseHas(name) {
   341  			name = name + duplicateSuffix
   342  		}
   343  
   344  		lbs.Set(name, string(m))
   345  	}
   346  	return line, true
   347  }
   348  
   349  func (l *PatternParser) RequiredLabelNames() []string { return []string{} }
   350  
   351  type JSONExpressionParser struct {
   352  	expressions map[string][]interface{}
   353  
   354  	keys internedStringSet
   355  }
   356  
   357  func NewJSONExpressionParser(expressions []JSONExpression) (*JSONExpressionParser, error) {
   358  	paths := make(map[string][]interface{})
   359  
   360  	for _, exp := range expressions {
   361  		path, err := jsonexpr.Parse(exp.Expression, false)
   362  		if err != nil {
   363  			return nil, fmt.Errorf("cannot parse expression [%s]: %w", exp.Expression, err)
   364  		}
   365  
   366  		if !model.LabelName(exp.Identifier).IsValid() {
   367  			return nil, fmt.Errorf("invalid extracted label name '%s'", exp.Identifier)
   368  		}
   369  
   370  		paths[exp.Identifier] = path
   371  	}
   372  
   373  	return &JSONExpressionParser{
   374  		expressions: paths,
   375  		keys:        internedStringSet{},
   376  	}, nil
   377  }
   378  
   379  func (j *JSONExpressionParser) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) {
   380  	if lbs.ParserLabelHints().NoLabels() {
   381  		return line, true
   382  	}
   383  
   384  	if !jsoniter.ConfigFastest.Valid(line) {
   385  		lbs.SetErr(errJSON)
   386  		return line, true
   387  	}
   388  
   389  	for identifier, paths := range j.expressions {
   390  		result := jsoniter.ConfigFastest.Get(line, paths...).ToString()
   391  		key, _ := j.keys.Get(unsafeGetBytes(identifier), func() (string, bool) {
   392  			if lbs.BaseHas(identifier) {
   393  				identifier = identifier + duplicateSuffix
   394  			}
   395  			return identifier, true
   396  		})
   397  
   398  		lbs.Set(key, result)
   399  	}
   400  
   401  	return line, true
   402  }
   403  
   404  func (j *JSONExpressionParser) RequiredLabelNames() []string { return []string{} }
   405  
   406  type UnpackParser struct {
   407  	lbsBuffer []string
   408  
   409  	keys internedStringSet
   410  }
   411  
   412  // NewUnpackParser creates a new unpack stage.
   413  // The unpack stage will parse a json log line as map[string]string where each key will be translated into labels.
   414  // A special key _entry will also be used to replace the original log line. This is to be used in conjunction with Promtail pack stage.
   415  // see https://grafana.com/docs/loki/latest/clients/promtail/stages/pack/
   416  func NewUnpackParser() *UnpackParser {
   417  	return &UnpackParser{
   418  		lbsBuffer: make([]string, 0, 16),
   419  		keys:      internedStringSet{},
   420  	}
   421  }
   422  
   423  func (UnpackParser) RequiredLabelNames() []string { return []string{} }
   424  
   425  func (u *UnpackParser) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) {
   426  	if lbs.ParserLabelHints().NoLabels() {
   427  		return line, true
   428  	}
   429  	u.lbsBuffer = u.lbsBuffer[:0]
   430  	it := jsoniter.ConfigFastest.BorrowIterator(line)
   431  	defer jsoniter.ConfigFastest.ReturnIterator(it)
   432  
   433  	entry, err := u.unpack(it, line, lbs)
   434  	if err != nil {
   435  		lbs.SetErr(errJSON)
   436  		lbs.SetErrorDetails(err.Error())
   437  		return line, true
   438  	}
   439  	return entry, true
   440  }
   441  
   442  func (u *UnpackParser) unpack(it *jsoniter.Iterator, entry []byte, lbs *LabelsBuilder) ([]byte, error) {
   443  	// we only care about object and values.
   444  	if nextType := it.WhatIsNext(); nextType != jsoniter.ObjectValue {
   445  		return nil, errUnexpectedJSONObject
   446  	}
   447  	var isPacked bool
   448  	_ = it.ReadMapCB(func(iter *jsoniter.Iterator, field string) bool {
   449  		switch iter.WhatIsNext() {
   450  		case jsoniter.StringValue:
   451  			// we only unpack map[string]string. Anything else is skipped.
   452  			if field == logqlmodel.PackedEntryKey {
   453  				// todo(ctovena): we should just reslice the original line since the property is contiguous
   454  				// but jsoniter doesn't allow us to do this right now.
   455  				// https://github.com/buger/jsonparser might do a better job at this.
   456  				entry = []byte(iter.ReadString())
   457  				isPacked = true
   458  				return true
   459  			}
   460  			key, ok := u.keys.Get(unsafeGetBytes(field), func() (string, bool) {
   461  				if !lbs.ParserLabelHints().ShouldExtract(field) {
   462  					return "", false
   463  				}
   464  				if lbs.BaseHas(field) {
   465  					field = field + duplicateSuffix
   466  				}
   467  				return field, true
   468  			})
   469  			if !ok {
   470  				iter.Skip()
   471  				return true
   472  			}
   473  
   474  			// append to the buffer of labels
   475  			u.lbsBuffer = append(u.lbsBuffer, key, iter.ReadString())
   476  		default:
   477  			iter.Skip()
   478  		}
   479  		return true
   480  	})
   481  	if it.Error != nil && it.Error != io.EOF {
   482  		return nil, it.Error
   483  	}
   484  	// flush the buffer if we found a packed entry.
   485  	if isPacked {
   486  		for i := 0; i < len(u.lbsBuffer); i = i + 2 {
   487  			lbs.Set(u.lbsBuffer[i], u.lbsBuffer[i+1])
   488  		}
   489  	}
   490  	return entry, nil
   491  }