github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/logql/syntax/lex.go (about) 1 package syntax 2 3 import ( 4 "strings" 5 "text/scanner" 6 "time" 7 "unicode" 8 "unicode/utf8" 9 10 "github.com/dustin/go-humanize" 11 "github.com/prometheus/common/model" 12 "github.com/prometheus/prometheus/util/strutil" 13 14 "github.com/grafana/loki/pkg/logqlmodel" 15 ) 16 17 var tokens = map[string]int{ 18 ",": COMMA, 19 ".": DOT, 20 "{": OPEN_BRACE, 21 "}": CLOSE_BRACE, 22 "=": EQ, 23 OpTypeNEQ: NEQ, 24 "=~": RE, 25 "!~": NRE, 26 "|=": PIPE_EXACT, 27 "|~": PIPE_MATCH, 28 OpPipe: PIPE, 29 OpUnwrap: UNWRAP, 30 "(": OPEN_PARENTHESIS, 31 ")": CLOSE_PARENTHESIS, 32 "by": BY, 33 "without": WITHOUT, 34 "bool": BOOL, 35 "[": OPEN_BRACKET, 36 "]": CLOSE_BRACKET, 37 OpLabelReplace: LABEL_REPLACE, 38 OpOffset: OFFSET, 39 OpOn: ON, 40 OpIgnoring: IGNORING, 41 OpGroupLeft: GROUP_LEFT, 42 OpGroupRight: GROUP_RIGHT, 43 44 // binops 45 OpTypeOr: OR, 46 OpTypeAnd: AND, 47 OpTypeUnless: UNLESS, 48 OpTypeAdd: ADD, 49 OpTypeSub: SUB, 50 OpTypeMul: MUL, 51 OpTypeDiv: DIV, 52 OpTypeMod: MOD, 53 OpTypePow: POW, 54 // comparison binops 55 OpTypeCmpEQ: CMP_EQ, 56 OpTypeGT: GT, 57 OpTypeGTE: GTE, 58 OpTypeLT: LT, 59 OpTypeLTE: LTE, 60 61 // parsers 62 OpParserTypeJSON: JSON, 63 OpParserTypeRegexp: REGEXP, 64 OpParserTypeLogfmt: LOGFMT, 65 OpParserTypeUnpack: UNPACK, 66 OpParserTypePattern: PATTERN, 67 68 // fmt 69 OpFmtLabel: LABEL_FMT, 70 OpFmtLine: LINE_FMT, 71 72 // filter functions 73 OpFilterIP: IP, 74 } 75 76 // functionTokens are tokens that needs to be suffixes with parenthesis 77 var functionTokens = map[string]int{ 78 // range vec ops 79 OpRangeTypeRate: RATE, 80 OpRangeTypeRateCounter: RATE_COUNTER, 81 OpRangeTypeCount: COUNT_OVER_TIME, 82 OpRangeTypeBytesRate: BYTES_RATE, 83 OpRangeTypeBytes: BYTES_OVER_TIME, 84 OpRangeTypeAvg: AVG_OVER_TIME, 85 OpRangeTypeSum: SUM_OVER_TIME, 86 OpRangeTypeMin: MIN_OVER_TIME, 87 OpRangeTypeMax: MAX_OVER_TIME, 88 OpRangeTypeStdvar: STDVAR_OVER_TIME, 89 OpRangeTypeStddev: STDDEV_OVER_TIME, 90 OpRangeTypeQuantile: QUANTILE_OVER_TIME, 91 OpRangeTypeFirst: FIRST_OVER_TIME, 92 OpRangeTypeLast: LAST_OVER_TIME, 93 OpRangeTypeAbsent: ABSENT_OVER_TIME, 94 95 // vec ops 96 OpTypeSum: SUM, 97 OpTypeAvg: AVG, 98 OpTypeMax: MAX, 99 OpTypeMin: MIN, 100 OpTypeCount: COUNT, 101 OpTypeStddev: STDDEV, 102 OpTypeStdvar: STDVAR, 103 OpTypeBottomK: BOTTOMK, 104 OpTypeTopK: TOPK, 105 OpLabelReplace: LABEL_REPLACE, 106 107 // conversion Op 108 OpConvBytes: BYTES_CONV, 109 OpConvDuration: DURATION_CONV, 110 OpConvDurationSeconds: DURATION_SECONDS_CONV, 111 112 // filterOp 113 OpFilterIP: IP, 114 } 115 116 type lexer struct { 117 scanner.Scanner 118 errs []logqlmodel.ParseError 119 builder strings.Builder 120 } 121 122 func (l *lexer) Lex(lval *exprSymType) int { 123 r := l.Scan() 124 125 switch r { 126 case '#': 127 // Scan until a newline or EOF is encountered 128 for next := l.Peek(); !(next == '\n' || next == scanner.EOF); next = l.Next() { 129 } 130 131 return l.Lex(lval) 132 133 case scanner.EOF: 134 return 0 135 136 case scanner.Int, scanner.Float: 137 numberText := l.TokenText() 138 139 duration, ok := tryScanDuration(numberText, &l.Scanner) 140 if ok { 141 lval.duration = duration 142 return DURATION 143 } 144 145 bytes, ok := tryScanBytes(numberText, &l.Scanner) 146 if ok { 147 lval.bytes = bytes 148 return BYTES 149 } 150 151 lval.str = numberText 152 return NUMBER 153 154 case scanner.String, scanner.RawString: 155 var err error 156 tokenText := l.TokenText() 157 if !utf8.ValidString(tokenText) { 158 l.Error("invalid UTF-8 rune") 159 return 0 160 } 161 lval.str, err = strutil.Unquote(tokenText) 162 if err != nil { 163 l.Error(err.Error()) 164 return 0 165 } 166 return STRING 167 } 168 169 // scanning duration tokens 170 if r == '[' { 171 l.builder.Reset() 172 for r := l.Next(); r != scanner.EOF; r = l.Next() { 173 if r == ']' { 174 i, err := model.ParseDuration(l.builder.String()) 175 if err != nil { 176 l.Error(err.Error()) 177 return 0 178 } 179 lval.duration = time.Duration(i) 180 return RANGE 181 } 182 _, _ = l.builder.WriteRune(r) 183 } 184 l.Error("missing closing ']' in duration") 185 return 0 186 } 187 188 tokenText := l.TokenText() 189 tokenNext := tokenText + string(l.Peek()) 190 if tok, ok := functionTokens[tokenNext]; ok { 191 // create a copy to advance to the entire token for testing suffix 192 sc := l.Scanner 193 sc.Next() 194 if isFunction(sc) { 195 l.Next() 196 return tok 197 } 198 } 199 200 if tok, ok := functionTokens[tokenText]; ok { 201 if !isFunction(l.Scanner) { 202 lval.str = tokenText 203 return IDENTIFIER 204 } 205 return tok 206 } 207 208 if tok, ok := tokens[tokenNext]; ok { 209 l.Next() 210 return tok 211 } 212 213 if tok, ok := tokens[tokenText]; ok { 214 return tok 215 } 216 217 lval.str = tokenText 218 return IDENTIFIER 219 } 220 221 func (l *lexer) Error(msg string) { 222 l.errs = append(l.errs, logqlmodel.NewParseError(msg, l.Line, l.Column)) 223 } 224 225 func tryScanDuration(number string, l *scanner.Scanner) (time.Duration, bool) { 226 var sb strings.Builder 227 sb.WriteString(number) 228 // copy the scanner to avoid advancing it in case it's not a duration. 229 s := *l 230 consumed := 0 231 for r := s.Peek(); r != scanner.EOF && !unicode.IsSpace(r); r = s.Peek() { 232 if !unicode.IsNumber(r) && !isDurationRune(r) && r != '.' { 233 break 234 } 235 _, _ = sb.WriteRune(r) 236 _ = s.Next() 237 consumed++ 238 } 239 240 if consumed == 0 { 241 return 0, false 242 } 243 // we've found more characters before a whitespace or the end 244 durationString := sb.String() 245 duration, err := parseDuration(durationString) 246 if err != nil { 247 return 0, false 248 } 249 250 // we need to consume the scanner, now that we know this is a duration. 251 for i := 0; i < consumed; i++ { 252 _ = l.Next() 253 } 254 255 return duration, true 256 } 257 258 func parseDuration(d string) (time.Duration, error) { 259 var duration time.Duration 260 // Try to parse promql style durations first, to ensure that we support the same duration 261 // units as promql 262 prometheusDuration, err := model.ParseDuration(d) 263 if err != nil { 264 // Fall back to standard library's time.ParseDuration if a promql style 265 // duration couldn't be parsed. 266 duration, err = time.ParseDuration(d) 267 if err != nil { 268 return 0, err 269 } 270 } else { 271 duration = time.Duration(prometheusDuration) 272 } 273 274 return duration, nil 275 } 276 277 func isDurationRune(r rune) bool { 278 // "ns", "us" (or "µs"), "ms", "s", "m", "h", "d", "w", "y". 279 switch r { 280 case 'n', 'u', 'µ', 'm', 's', 'h', 'd', 'w', 'y': 281 return true 282 default: 283 return false 284 } 285 } 286 287 func tryScanBytes(number string, l *scanner.Scanner) (uint64, bool) { 288 var sb strings.Builder 289 sb.WriteString(number) 290 // copy the scanner to avoid advancing it in case it's not a duration. 291 s := *l 292 consumed := 0 293 for r := s.Peek(); r != scanner.EOF && !unicode.IsSpace(r); r = s.Peek() { 294 if !unicode.IsNumber(r) && !isBytesSizeRune(r) && r != '.' { 295 break 296 } 297 _, _ = sb.WriteRune(r) 298 _ = s.Next() 299 consumed++ 300 } 301 302 if consumed == 0 { 303 return 0, false 304 } 305 // we've found more characters before a whitespace or the end 306 b, err := humanize.ParseBytes(sb.String()) 307 if err != nil { 308 return 0, false 309 } 310 // we need to consume the scanner, now that we know this is a duration. 311 for i := 0; i < consumed; i++ { 312 _ = l.Next() 313 } 314 return b, true 315 } 316 317 func isBytesSizeRune(r rune) bool { 318 // Accept: B, kB, MB, GB, TB, PB, KB, KiB, MiB, GiB, TiB, PiB 319 // Do not accept: EB, ZB, YB, PiB, ZiB and YiB. They are not supported since the value migh not be represented in an uint64 320 switch r { 321 case 'B', 'i', 'k', 'K', 'M', 'G', 'T', 'P': 322 return true 323 default: 324 return false 325 } 326 } 327 328 // isFunction check if the next runes are either an open parenthesis 329 // or by/without tokens. This allows to dissociate functions and identifier correctly. 330 func isFunction(sc scanner.Scanner) bool { 331 var sb strings.Builder 332 sc = trimSpace(sc) 333 for r := sc.Next(); r != scanner.EOF; r = sc.Next() { 334 sb.WriteRune(r) 335 switch sb.String() { 336 case "(": 337 return true 338 case "by", "without": 339 sc = trimSpace(sc) 340 return sc.Next() == '(' 341 } 342 } 343 return false 344 } 345 346 func trimSpace(l scanner.Scanner) scanner.Scanner { 347 for n := l.Peek(); n != scanner.EOF; n = l.Peek() { 348 if unicode.IsSpace(n) { 349 l.Next() 350 continue 351 } 352 return l 353 } 354 return l 355 }