github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/logql/log/parser.go (about) 1 package log 2 3 import ( 4 "bytes" 5 "errors" 6 "fmt" 7 "io" 8 "strings" 9 "unicode/utf8" 10 11 "github.com/grafana/loki/pkg/logql/log/jsonexpr" 12 "github.com/grafana/loki/pkg/logql/log/logfmt" 13 "github.com/grafana/loki/pkg/logql/log/pattern" 14 "github.com/grafana/loki/pkg/logqlmodel" 15 16 "github.com/grafana/regexp" 17 jsoniter "github.com/json-iterator/go" 18 "github.com/prometheus/common/model" 19 ) 20 21 const ( 22 jsonSpacer = '_' 23 duplicateSuffix = "_extracted" 24 trueString = "true" 25 falseString = "false" 26 ) 27 28 var ( 29 _ Stage = &JSONParser{} 30 _ Stage = &RegexpParser{} 31 _ Stage = &LogfmtParser{} 32 33 errUnexpectedJSONObject = fmt.Errorf("expecting json object(%d), but it is not", jsoniter.ObjectValue) 34 errMissingCapture = errors.New("at least one named capture must be supplied") 35 ) 36 37 type JSONParser struct { 38 buf []byte // buffer used to build json keys 39 lbs *LabelsBuilder 40 41 keys internedStringSet 42 } 43 44 // NewJSONParser creates a log stage that can parse a json log line and add properties as labels. 45 func NewJSONParser() *JSONParser { 46 return &JSONParser{ 47 buf: make([]byte, 0, 1024), 48 keys: internedStringSet{}, 49 } 50 } 51 52 func (j *JSONParser) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) { 53 if lbs.ParserLabelHints().NoLabels() { 54 return line, true 55 } 56 it := jsoniter.ConfigFastest.BorrowIterator(line) 57 defer jsoniter.ConfigFastest.ReturnIterator(it) 58 59 // reset the state. 60 j.buf = j.buf[:0] 61 j.lbs = lbs 62 63 if err := j.readObject(it); err != nil { 64 lbs.SetErr(errJSON) 65 lbs.SetErrorDetails(err.Error()) 66 return line, true 67 } 68 return line, true 69 } 70 71 func (j *JSONParser) readObject(it *jsoniter.Iterator) error { 72 // we only care about object and values. 73 if nextType := it.WhatIsNext(); nextType != jsoniter.ObjectValue { 74 return errUnexpectedJSONObject 75 } 76 _ = it.ReadMapCB(j.parseMap("")) 77 if it.Error != nil && it.Error != io.EOF { 78 return it.Error 79 } 80 return nil 81 } 82 83 func (j *JSONParser) parseMap(prefix string) func(iter *jsoniter.Iterator, field string) bool { 84 return func(iter *jsoniter.Iterator, field string) bool { 85 switch iter.WhatIsNext() { 86 // are we looking at a value that needs to be added ? 87 case jsoniter.StringValue, jsoniter.NumberValue, jsoniter.BoolValue: 88 j.parseLabelValue(iter, prefix, field) 89 // Or another new object based on a prefix. 90 case jsoniter.ObjectValue: 91 if key, ok := j.nextKeyPrefix(prefix, field); ok { 92 return iter.ReadMapCB(j.parseMap(key)) 93 } 94 // If this keys is not expected we skip the object 95 iter.Skip() 96 default: 97 iter.Skip() 98 } 99 return true 100 } 101 } 102 103 func (j *JSONParser) nextKeyPrefix(prefix, field string) (string, bool) { 104 // first time we add return the field as prefix. 105 if len(prefix) == 0 { 106 field = sanitizeLabelKey(field, true) 107 if j.lbs.ParserLabelHints().ShouldExtractPrefix(field) { 108 return field, true 109 } 110 return "", false 111 } 112 // otherwise we build the prefix and check using the buffer 113 j.buf = j.buf[:0] 114 j.buf = append(j.buf, prefix...) 115 j.buf = append(j.buf, byte(jsonSpacer)) 116 j.buf = append(j.buf, sanitizeLabelKey(field, false)...) 117 // if matches keep going 118 if j.lbs.ParserLabelHints().ShouldExtractPrefix(unsafeGetString(j.buf)) { 119 return string(j.buf), true 120 } 121 return "", false 122 } 123 124 func (j *JSONParser) parseLabelValue(iter *jsoniter.Iterator, prefix, field string) { 125 // the first time we use the field as label key. 126 if len(prefix) == 0 { 127 key, ok := j.keys.Get(unsafeGetBytes(field), func() (string, bool) { 128 field = sanitizeLabelKey(field, true) 129 if !j.lbs.ParserLabelHints().ShouldExtract(field) { 130 return "", false 131 } 132 if j.lbs.BaseHas(field) { 133 field = field + duplicateSuffix 134 } 135 return field, true 136 }) 137 if !ok { 138 iter.Skip() 139 return 140 } 141 j.lbs.Set(key, readValue(iter)) 142 return 143 144 } 145 // otherwise we build the label key using the buffer 146 j.buf = j.buf[:0] 147 j.buf = append(j.buf, prefix...) 148 j.buf = append(j.buf, byte(jsonSpacer)) 149 j.buf = append(j.buf, sanitizeLabelKey(field, false)...) 150 key, ok := j.keys.Get(j.buf, func() (string, bool) { 151 if j.lbs.BaseHas(string(j.buf)) { 152 j.buf = append(j.buf, duplicateSuffix...) 153 } 154 if !j.lbs.ParserLabelHints().ShouldExtract(string(j.buf)) { 155 return "", false 156 } 157 return string(j.buf), true 158 }) 159 if !ok { 160 iter.Skip() 161 return 162 } 163 j.lbs.Set(key, readValue(iter)) 164 } 165 166 func (j *JSONParser) RequiredLabelNames() []string { return []string{} } 167 168 func readValue(iter *jsoniter.Iterator) string { 169 switch iter.WhatIsNext() { 170 case jsoniter.StringValue: 171 v := iter.ReadString() 172 // the rune error replacement is rejected by Prometheus, so we skip it. 173 if strings.ContainsRune(v, utf8.RuneError) { 174 return "" 175 } 176 return v 177 case jsoniter.NumberValue: 178 return iter.ReadNumber().String() 179 case jsoniter.BoolValue: 180 if iter.ReadBool() { 181 return trueString 182 } 183 return falseString 184 default: 185 iter.Skip() 186 return "" 187 } 188 } 189 190 type RegexpParser struct { 191 regex *regexp.Regexp 192 nameIndex map[int]string 193 194 keys internedStringSet 195 } 196 197 // NewRegexpParser creates a new log stage that can extract labels from a log line using a regex expression. 198 // The regex expression must contains at least one named match. If the regex doesn't match the line is not filtered out. 199 func NewRegexpParser(re string) (*RegexpParser, error) { 200 regex, err := regexp.Compile(re) 201 if err != nil { 202 return nil, err 203 } 204 if regex.NumSubexp() == 0 { 205 return nil, errMissingCapture 206 } 207 nameIndex := map[int]string{} 208 uniqueNames := map[string]struct{}{} 209 for i, n := range regex.SubexpNames() { 210 if n != "" { 211 if !model.LabelName(n).IsValid() { 212 return nil, fmt.Errorf("invalid extracted label name '%s'", n) 213 } 214 if _, ok := uniqueNames[n]; ok { 215 return nil, fmt.Errorf("duplicate extracted label name '%s'", n) 216 } 217 nameIndex[i] = n 218 uniqueNames[n] = struct{}{} 219 } 220 } 221 if len(nameIndex) == 0 { 222 return nil, errMissingCapture 223 } 224 return &RegexpParser{ 225 regex: regex, 226 nameIndex: nameIndex, 227 keys: internedStringSet{}, 228 }, nil 229 } 230 231 func (r *RegexpParser) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) { 232 for i, value := range r.regex.FindSubmatch(line) { 233 if name, ok := r.nameIndex[i]; ok { 234 key, ok := r.keys.Get(unsafeGetBytes(name), func() (string, bool) { 235 sanitize := sanitizeLabelKey(name, true) 236 if len(sanitize) == 0 { 237 return "", false 238 } 239 if lbs.BaseHas(sanitize) { 240 sanitize = fmt.Sprintf("%s%s", sanitize, duplicateSuffix) 241 } 242 return sanitize, true 243 }) 244 if !ok { 245 continue 246 } 247 lbs.Set(key, string(value)) 248 } 249 } 250 return line, true 251 } 252 253 func (r *RegexpParser) RequiredLabelNames() []string { return []string{} } 254 255 type LogfmtParser struct { 256 dec *logfmt.Decoder 257 keys internedStringSet 258 } 259 260 // NewLogfmtParser creates a parser that can extract labels from a logfmt log line. 261 // Each keyval is extracted into a respective label. 262 func NewLogfmtParser() *LogfmtParser { 263 return &LogfmtParser{ 264 dec: logfmt.NewDecoder(nil), 265 keys: internedStringSet{}, 266 } 267 } 268 269 func (l *LogfmtParser) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) { 270 if lbs.ParserLabelHints().NoLabels() { 271 return line, true 272 } 273 l.dec.Reset(line) 274 for l.dec.ScanKeyval() { 275 key, ok := l.keys.Get(l.dec.Key(), func() (string, bool) { 276 sanitized := sanitizeLabelKey(string(l.dec.Key()), true) 277 if !lbs.ParserLabelHints().ShouldExtract(sanitized) { 278 return "", false 279 } 280 if len(sanitized) == 0 { 281 return "", false 282 } 283 if lbs.BaseHas(sanitized) { 284 sanitized = fmt.Sprintf("%s%s", sanitized, duplicateSuffix) 285 } 286 return sanitized, true 287 }) 288 if !ok { 289 continue 290 } 291 val := l.dec.Value() 292 // the rune error replacement is rejected by Prometheus, so we skip it. 293 if bytes.ContainsRune(val, utf8.RuneError) { 294 val = nil 295 } 296 lbs.Set(key, string(val)) 297 } 298 if l.dec.Err() != nil { 299 lbs.SetErr(errLogfmt) 300 lbs.SetErrorDetails(l.dec.Err().Error()) 301 return line, true 302 } 303 return line, true 304 } 305 306 func (l *LogfmtParser) RequiredLabelNames() []string { return []string{} } 307 308 type PatternParser struct { 309 matcher pattern.Matcher 310 names []string 311 } 312 313 func NewPatternParser(pn string) (*PatternParser, error) { 314 m, err := pattern.New(pn) 315 if err != nil { 316 return nil, err 317 } 318 for _, name := range m.Names() { 319 if !model.LabelName(name).IsValid() { 320 return nil, fmt.Errorf("invalid capture label name '%s'", name) 321 } 322 } 323 return &PatternParser{ 324 matcher: m, 325 names: m.Names(), 326 }, nil 327 } 328 329 func (l *PatternParser) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) { 330 if lbs.ParserLabelHints().NoLabels() { 331 return line, true 332 } 333 matches := l.matcher.Matches(line) 334 names := l.names[:len(matches)] 335 for i, m := range matches { 336 name := names[i] 337 if !lbs.parserKeyHints.ShouldExtract(name) { 338 continue 339 } 340 if lbs.BaseHas(name) { 341 name = name + duplicateSuffix 342 } 343 344 lbs.Set(name, string(m)) 345 } 346 return line, true 347 } 348 349 func (l *PatternParser) RequiredLabelNames() []string { return []string{} } 350 351 type JSONExpressionParser struct { 352 expressions map[string][]interface{} 353 354 keys internedStringSet 355 } 356 357 func NewJSONExpressionParser(expressions []JSONExpression) (*JSONExpressionParser, error) { 358 paths := make(map[string][]interface{}) 359 360 for _, exp := range expressions { 361 path, err := jsonexpr.Parse(exp.Expression, false) 362 if err != nil { 363 return nil, fmt.Errorf("cannot parse expression [%s]: %w", exp.Expression, err) 364 } 365 366 if !model.LabelName(exp.Identifier).IsValid() { 367 return nil, fmt.Errorf("invalid extracted label name '%s'", exp.Identifier) 368 } 369 370 paths[exp.Identifier] = path 371 } 372 373 return &JSONExpressionParser{ 374 expressions: paths, 375 keys: internedStringSet{}, 376 }, nil 377 } 378 379 func (j *JSONExpressionParser) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) { 380 if lbs.ParserLabelHints().NoLabels() { 381 return line, true 382 } 383 384 if !jsoniter.ConfigFastest.Valid(line) { 385 lbs.SetErr(errJSON) 386 return line, true 387 } 388 389 for identifier, paths := range j.expressions { 390 result := jsoniter.ConfigFastest.Get(line, paths...).ToString() 391 key, _ := j.keys.Get(unsafeGetBytes(identifier), func() (string, bool) { 392 if lbs.BaseHas(identifier) { 393 identifier = identifier + duplicateSuffix 394 } 395 return identifier, true 396 }) 397 398 lbs.Set(key, result) 399 } 400 401 return line, true 402 } 403 404 func (j *JSONExpressionParser) RequiredLabelNames() []string { return []string{} } 405 406 type UnpackParser struct { 407 lbsBuffer []string 408 409 keys internedStringSet 410 } 411 412 // NewUnpackParser creates a new unpack stage. 413 // The unpack stage will parse a json log line as map[string]string where each key will be translated into labels. 414 // A special key _entry will also be used to replace the original log line. This is to be used in conjunction with Promtail pack stage. 415 // see https://grafana.com/docs/loki/latest/clients/promtail/stages/pack/ 416 func NewUnpackParser() *UnpackParser { 417 return &UnpackParser{ 418 lbsBuffer: make([]string, 0, 16), 419 keys: internedStringSet{}, 420 } 421 } 422 423 func (UnpackParser) RequiredLabelNames() []string { return []string{} } 424 425 func (u *UnpackParser) Process(_ int64, line []byte, lbs *LabelsBuilder) ([]byte, bool) { 426 if lbs.ParserLabelHints().NoLabels() { 427 return line, true 428 } 429 u.lbsBuffer = u.lbsBuffer[:0] 430 it := jsoniter.ConfigFastest.BorrowIterator(line) 431 defer jsoniter.ConfigFastest.ReturnIterator(it) 432 433 entry, err := u.unpack(it, line, lbs) 434 if err != nil { 435 lbs.SetErr(errJSON) 436 lbs.SetErrorDetails(err.Error()) 437 return line, true 438 } 439 return entry, true 440 } 441 442 func (u *UnpackParser) unpack(it *jsoniter.Iterator, entry []byte, lbs *LabelsBuilder) ([]byte, error) { 443 // we only care about object and values. 444 if nextType := it.WhatIsNext(); nextType != jsoniter.ObjectValue { 445 return nil, errUnexpectedJSONObject 446 } 447 var isPacked bool 448 _ = it.ReadMapCB(func(iter *jsoniter.Iterator, field string) bool { 449 switch iter.WhatIsNext() { 450 case jsoniter.StringValue: 451 // we only unpack map[string]string. Anything else is skipped. 452 if field == logqlmodel.PackedEntryKey { 453 // todo(ctovena): we should just reslice the original line since the property is contiguous 454 // but jsoniter doesn't allow us to do this right now. 455 // https://github.com/buger/jsonparser might do a better job at this. 456 entry = []byte(iter.ReadString()) 457 isPacked = true 458 return true 459 } 460 key, ok := u.keys.Get(unsafeGetBytes(field), func() (string, bool) { 461 if !lbs.ParserLabelHints().ShouldExtract(field) { 462 return "", false 463 } 464 if lbs.BaseHas(field) { 465 field = field + duplicateSuffix 466 } 467 return field, true 468 }) 469 if !ok { 470 iter.Skip() 471 return true 472 } 473 474 // append to the buffer of labels 475 u.lbsBuffer = append(u.lbsBuffer, key, iter.ReadString()) 476 default: 477 iter.Skip() 478 } 479 return true 480 }) 481 if it.Error != nil && it.Error != io.EOF { 482 return nil, it.Error 483 } 484 // flush the buffer if we found a packed entry. 485 if isPacked { 486 for i := 0; i < len(u.lbsBuffer); i = i + 2 { 487 lbs.Set(u.lbsBuffer[i], u.lbsBuffer[i+1]) 488 } 489 } 490 return entry, nil 491 }