github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/configs/legacy_promql/lex.go (about) 1 // Copyright 2015 The Prometheus Authors 2 // Licensed under the Apache License, Version 2.0 (the "License"); 3 // you may not use this file except in compliance with the License. 4 // You may obtain a copy of the License at 5 // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package promql 15 16 import ( 17 "fmt" 18 "strings" 19 "unicode" 20 "unicode/utf8" 21 ) 22 23 // item represents a token or text string returned from the scanner. 24 type item struct { 25 typ ItemType // The type of this item. 26 pos Pos // The starting position, in bytes, of this item in the input string. 27 val string // The value of this item. 28 } 29 30 // String returns a descriptive string for the item. 31 func (i item) String() string { 32 switch { 33 case i.typ == itemEOF: 34 return "EOF" 35 case i.typ == itemError: 36 return i.val 37 case i.typ == itemIdentifier || i.typ == itemMetricIdentifier: 38 return fmt.Sprintf("%q", i.val) 39 case i.typ.isKeyword(): 40 return fmt.Sprintf("<%s>", i.val) 41 case i.typ.isOperator(): 42 return fmt.Sprintf("<op:%s>", i.val) 43 case i.typ.isAggregator(): 44 return fmt.Sprintf("<aggr:%s>", i.val) 45 case len(i.val) > 10: 46 return fmt.Sprintf("%.10q...", i.val) 47 } 48 return fmt.Sprintf("%q", i.val) 49 } 50 51 // isOperator returns true if the item corresponds to a arithmetic or set operator. 52 // Returns false otherwise. 53 func (i ItemType) isOperator() bool { return i > operatorsStart && i < operatorsEnd } 54 55 // isAggregator returns true if the item belongs to the aggregator functions. 56 // Returns false otherwise 57 func (i ItemType) isAggregator() bool { return i > aggregatorsStart && i < aggregatorsEnd } 58 59 // isAggregator returns true if the item is an aggregator that takes a parameter. 60 // Returns false otherwise 61 func (i ItemType) isAggregatorWithParam() bool { 62 return i == itemTopK || i == itemBottomK || i == itemCountValues || i == itemQuantile 63 } 64 65 // isKeyword returns true if the item corresponds to a keyword. 66 // Returns false otherwise. 67 func (i ItemType) isKeyword() bool { return i > keywordsStart && i < keywordsEnd } 68 69 // isCompairsonOperator returns true if the item corresponds to a comparison operator. 70 // Returns false otherwise. 71 func (i ItemType) isComparisonOperator() bool { 72 switch i { 73 case itemEQL, itemNEQ, itemLTE, itemLSS, itemGTE, itemGTR: 74 return true 75 default: 76 return false 77 } 78 } 79 80 // isSetOperator returns whether the item corresponds to a set operator. 81 func (i ItemType) isSetOperator() bool { 82 switch i { 83 case itemLAND, itemLOR, itemLUnless: 84 return true 85 } 86 return false 87 } 88 89 // LowestPrec is a constant for operator precedence in expressions. 90 const LowestPrec = 0 // Non-operators. 91 92 // Precedence returns the operator precedence of the binary 93 // operator op. If op is not a binary operator, the result 94 // is LowestPrec. 95 func (i ItemType) precedence() int { 96 switch i { 97 case itemLOR: 98 return 1 99 case itemLAND, itemLUnless: 100 return 2 101 case itemEQL, itemNEQ, itemLTE, itemLSS, itemGTE, itemGTR: 102 return 3 103 case itemADD, itemSUB: 104 return 4 105 case itemMUL, itemDIV, itemMOD: 106 return 5 107 case itemPOW: 108 return 6 109 default: 110 return LowestPrec 111 } 112 } 113 114 func (i ItemType) isRightAssociative() bool { 115 switch i { 116 case itemPOW: 117 return true 118 default: 119 return false 120 } 121 122 } 123 124 type ItemType int 125 126 const ( 127 itemError ItemType = iota // Error occurred, value is error message 128 itemEOF 129 itemComment 130 itemIdentifier 131 itemMetricIdentifier 132 itemLeftParen 133 itemRightParen 134 itemLeftBrace 135 itemRightBrace 136 itemLeftBracket 137 itemRightBracket 138 itemComma 139 itemAssign 140 itemSemicolon 141 itemString 142 itemNumber 143 itemDuration 144 itemBlank 145 itemTimes 146 147 operatorsStart 148 // Operators. 149 itemSUB 150 itemADD 151 itemMUL 152 itemMOD 153 itemDIV 154 itemLAND 155 itemLOR 156 itemLUnless 157 itemEQL 158 itemNEQ 159 itemLTE 160 itemLSS 161 itemGTE 162 itemGTR 163 itemEQLRegex 164 itemNEQRegex 165 itemPOW 166 operatorsEnd 167 168 aggregatorsStart 169 // Aggregators. 170 itemAvg 171 itemCount 172 itemSum 173 itemMin 174 itemMax 175 itemStddev 176 itemStdvar 177 itemTopK 178 itemBottomK 179 itemCountValues 180 itemQuantile 181 aggregatorsEnd 182 183 keywordsStart 184 // Keywords. 185 itemAlert 186 itemIf 187 itemFor 188 itemLabels 189 itemAnnotations 190 itemOffset 191 itemBy 192 itemWithout 193 itemOn 194 itemIgnoring 195 itemGroupLeft 196 itemGroupRight 197 itemBool 198 keywordsEnd 199 ) 200 201 var key = map[string]ItemType{ 202 // Operators. 203 "and": itemLAND, 204 "or": itemLOR, 205 "unless": itemLUnless, 206 207 // Aggregators. 208 "sum": itemSum, 209 "avg": itemAvg, 210 "count": itemCount, 211 "min": itemMin, 212 "max": itemMax, 213 "stddev": itemStddev, 214 "stdvar": itemStdvar, 215 "topk": itemTopK, 216 "bottomk": itemBottomK, 217 "count_values": itemCountValues, 218 "quantile": itemQuantile, 219 220 // Keywords. 221 "alert": itemAlert, 222 "if": itemIf, 223 "for": itemFor, 224 "labels": itemLabels, 225 "annotations": itemAnnotations, 226 "offset": itemOffset, 227 "by": itemBy, 228 "without": itemWithout, 229 "on": itemOn, 230 "ignoring": itemIgnoring, 231 "group_left": itemGroupLeft, 232 "group_right": itemGroupRight, 233 "bool": itemBool, 234 } 235 236 // These are the default string representations for common items. It does not 237 // imply that those are the only character sequences that can be lexed to such an item. 238 var itemTypeStr = map[ItemType]string{ 239 itemLeftParen: "(", 240 itemRightParen: ")", 241 itemLeftBrace: "{", 242 itemRightBrace: "}", 243 itemLeftBracket: "[", 244 itemRightBracket: "]", 245 itemComma: ",", 246 itemAssign: "=", 247 itemSemicolon: ";", 248 itemBlank: "_", 249 itemTimes: "x", 250 251 itemSUB: "-", 252 itemADD: "+", 253 itemMUL: "*", 254 itemMOD: "%", 255 itemDIV: "/", 256 itemEQL: "==", 257 itemNEQ: "!=", 258 itemLTE: "<=", 259 itemLSS: "<", 260 itemGTE: ">=", 261 itemGTR: ">", 262 itemEQLRegex: "=~", 263 itemNEQRegex: "!~", 264 itemPOW: "^", 265 } 266 267 func init() { 268 // Add keywords to item type strings. 269 for s, ty := range key { 270 itemTypeStr[ty] = s 271 } 272 // Special numbers. 273 key["inf"] = itemNumber 274 key["nan"] = itemNumber 275 } 276 277 func (i ItemType) String() string { 278 if s, ok := itemTypeStr[i]; ok { 279 return s 280 } 281 return fmt.Sprintf("<item %d>", i) 282 } 283 284 func (i item) desc() string { 285 if _, ok := itemTypeStr[i.typ]; ok { 286 return i.String() 287 } 288 if i.typ == itemEOF { 289 return i.typ.desc() 290 } 291 return fmt.Sprintf("%s %s", i.typ.desc(), i) 292 } 293 294 func (i ItemType) desc() string { 295 switch i { 296 case itemError: 297 return "error" 298 case itemEOF: 299 return "end of input" 300 case itemComment: 301 return "comment" 302 case itemIdentifier: 303 return "identifier" 304 case itemMetricIdentifier: 305 return "metric identifier" 306 case itemString: 307 return "string" 308 case itemNumber: 309 return "number" 310 case itemDuration: 311 return "duration" 312 } 313 return fmt.Sprintf("%q", i) 314 } 315 316 const eof = -1 317 318 // stateFn represents the state of the scanner as a function that returns the next state. 319 type stateFn func(*lexer) stateFn 320 321 // Pos is the position in a string. 322 type Pos int 323 324 // lexer holds the state of the scanner. 325 type lexer struct { 326 input string // The string being scanned. 327 state stateFn // The next lexing function to enter. 328 pos Pos // Current position in the input. 329 start Pos // Start position of this item. 330 width Pos // Width of last rune read from input. 331 lastPos Pos // Position of most recent item returned by nextItem. 332 items chan item // Channel of scanned items. 333 334 parenDepth int // Nesting depth of ( ) exprs. 335 braceOpen bool // Whether a { is opened. 336 bracketOpen bool // Whether a [ is opened. 337 stringOpen rune // Quote rune of the string currently being read. 338 339 // seriesDesc is set when a series description for the testing 340 // language is lexed. 341 seriesDesc bool 342 } 343 344 // next returns the next rune in the input. 345 func (l *lexer) next() rune { 346 if int(l.pos) >= len(l.input) { 347 l.width = 0 348 return eof 349 } 350 r, w := utf8.DecodeRuneInString(l.input[l.pos:]) 351 l.width = Pos(w) 352 l.pos += l.width 353 return r 354 } 355 356 // peek returns but does not consume the next rune in the input. 357 func (l *lexer) peek() rune { 358 r := l.next() 359 l.backup() 360 return r 361 } 362 363 // backup steps back one rune. Can only be called once per call of next. 364 func (l *lexer) backup() { 365 l.pos -= l.width 366 } 367 368 // emit passes an item back to the client. 369 func (l *lexer) emit(t ItemType) { 370 l.items <- item{t, l.start, l.input[l.start:l.pos]} 371 l.start = l.pos 372 } 373 374 // ignore skips over the pending input before this point. 375 func (l *lexer) ignore() { 376 l.start = l.pos 377 } 378 379 // accept consumes the next rune if it's from the valid set. 380 func (l *lexer) accept(valid string) bool { 381 if strings.ContainsRune(valid, l.next()) { 382 return true 383 } 384 l.backup() 385 return false 386 } 387 388 // acceptRun consumes a run of runes from the valid set. 389 func (l *lexer) acceptRun(valid string) { 390 for strings.ContainsRune(valid, l.next()) { 391 // consume 392 } 393 l.backup() 394 } 395 396 // lineNumber reports which line we're on, based on the position of 397 // the previous item returned by nextItem. Doing it this way 398 // means we don't have to worry about peek double counting. 399 func (l *lexer) lineNumber() int { 400 return 1 + strings.Count(l.input[:l.lastPos], "\n") 401 } 402 403 // linePosition reports at which character in the current line 404 // we are on. 405 func (l *lexer) linePosition() int { 406 lb := strings.LastIndex(l.input[:l.lastPos], "\n") 407 if lb == -1 { 408 return 1 + int(l.lastPos) 409 } 410 return 1 + int(l.lastPos) - lb 411 } 412 413 // errorf returns an error token and terminates the scan by passing 414 // back a nil pointer that will be the next state, terminating l.nextItem. 415 func (l *lexer) errorf(format string, args ...interface{}) stateFn { 416 l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)} 417 return nil 418 } 419 420 // nextItem returns the next item from the input. 421 func (l *lexer) nextItem() item { 422 item := <-l.items 423 l.lastPos = item.pos 424 return item 425 } 426 427 // lex creates a new scanner for the input string. 428 func lex(input string) *lexer { 429 l := &lexer{ 430 input: input, 431 items: make(chan item), 432 } 433 go l.run() 434 return l 435 } 436 437 // run runs the state machine for the lexer. 438 func (l *lexer) run() { 439 for l.state = lexStatements; l.state != nil; { 440 l.state = l.state(l) 441 } 442 close(l.items) 443 } 444 445 // lineComment is the character that starts a line comment. 446 const lineComment = "#" 447 448 // lexStatements is the top-level state for lexing. 449 func lexStatements(l *lexer) stateFn { 450 if l.braceOpen { 451 return lexInsideBraces 452 } 453 if strings.HasPrefix(l.input[l.pos:], lineComment) { 454 return lexLineComment 455 } 456 457 switch r := l.next(); { 458 case r == eof: 459 if l.parenDepth != 0 { 460 return l.errorf("unclosed left parenthesis") 461 } else if l.bracketOpen { 462 return l.errorf("unclosed left bracket") 463 } 464 l.emit(itemEOF) 465 return nil 466 case r == ',': 467 l.emit(itemComma) 468 case isSpace(r): 469 return lexSpace 470 case r == '*': 471 l.emit(itemMUL) 472 case r == '/': 473 l.emit(itemDIV) 474 case r == '%': 475 l.emit(itemMOD) 476 case r == '+': 477 l.emit(itemADD) 478 case r == '-': 479 l.emit(itemSUB) 480 case r == '^': 481 l.emit(itemPOW) 482 case r == '=': 483 if t := l.peek(); t == '=' { 484 l.next() 485 l.emit(itemEQL) 486 } else if t == '~' { 487 return l.errorf("unexpected character after '=': %q", t) 488 } else { 489 l.emit(itemAssign) 490 } 491 case r == '!': 492 if t := l.next(); t == '=' { 493 l.emit(itemNEQ) 494 } else { 495 return l.errorf("unexpected character after '!': %q", t) 496 } 497 case r == '<': 498 if t := l.peek(); t == '=' { 499 l.next() 500 l.emit(itemLTE) 501 } else { 502 l.emit(itemLSS) 503 } 504 case r == '>': 505 if t := l.peek(); t == '=' { 506 l.next() 507 l.emit(itemGTE) 508 } else { 509 l.emit(itemGTR) 510 } 511 case isDigit(r) || (r == '.' && isDigit(l.peek())): 512 l.backup() 513 return lexNumberOrDuration 514 case r == '"' || r == '\'': 515 l.stringOpen = r 516 return lexString 517 case r == '`': 518 l.stringOpen = r 519 return lexRawString 520 case isAlpha(r) || r == ':': 521 l.backup() 522 return lexKeywordOrIdentifier 523 case r == '(': 524 l.emit(itemLeftParen) 525 l.parenDepth++ 526 return lexStatements 527 case r == ')': 528 l.emit(itemRightParen) 529 l.parenDepth-- 530 if l.parenDepth < 0 { 531 return l.errorf("unexpected right parenthesis %q", r) 532 } 533 return lexStatements 534 case r == '{': 535 l.emit(itemLeftBrace) 536 l.braceOpen = true 537 return lexInsideBraces(l) 538 case r == '[': 539 if l.bracketOpen { 540 return l.errorf("unexpected left bracket %q", r) 541 } 542 l.emit(itemLeftBracket) 543 l.bracketOpen = true 544 return lexDuration 545 case r == ']': 546 if !l.bracketOpen { 547 return l.errorf("unexpected right bracket %q", r) 548 } 549 l.emit(itemRightBracket) 550 l.bracketOpen = false 551 552 default: 553 return l.errorf("unexpected character: %q", r) 554 } 555 return lexStatements 556 } 557 558 // lexInsideBraces scans the inside of a vector selector. Keywords are ignored and 559 // scanned as identifiers. 560 func lexInsideBraces(l *lexer) stateFn { 561 if strings.HasPrefix(l.input[l.pos:], lineComment) { 562 return lexLineComment 563 } 564 565 switch r := l.next(); { 566 case r == eof: 567 return l.errorf("unexpected end of input inside braces") 568 case isSpace(r): 569 return lexSpace 570 case isAlpha(r): 571 l.backup() 572 return lexIdentifier 573 case r == ',': 574 l.emit(itemComma) 575 case r == '"' || r == '\'': 576 l.stringOpen = r 577 return lexString 578 case r == '`': 579 l.stringOpen = r 580 return lexRawString 581 case r == '=': 582 if l.next() == '~' { 583 l.emit(itemEQLRegex) 584 break 585 } 586 l.backup() 587 l.emit(itemEQL) 588 case r == '!': 589 switch nr := l.next(); { 590 case nr == '~': 591 l.emit(itemNEQRegex) 592 case nr == '=': 593 l.emit(itemNEQ) 594 default: 595 return l.errorf("unexpected character after '!' inside braces: %q", nr) 596 } 597 case r == '{': 598 return l.errorf("unexpected left brace %q", r) 599 case r == '}': 600 l.emit(itemRightBrace) 601 l.braceOpen = false 602 603 if l.seriesDesc { 604 return lexValueSequence 605 } 606 return lexStatements 607 default: 608 return l.errorf("unexpected character inside braces: %q", r) 609 } 610 return lexInsideBraces 611 } 612 613 // lexValueSequence scans a value sequence of a series description. 614 func lexValueSequence(l *lexer) stateFn { 615 switch r := l.next(); { 616 case r == eof: 617 return lexStatements 618 case isSpace(r): 619 lexSpace(l) 620 case r == '+': 621 l.emit(itemADD) 622 case r == '-': 623 l.emit(itemSUB) 624 case r == 'x': 625 l.emit(itemTimes) 626 case r == '_': 627 l.emit(itemBlank) 628 case isDigit(r) || (r == '.' && isDigit(l.peek())): 629 l.backup() 630 lexNumber(l) 631 case isAlpha(r): 632 l.backup() 633 // We might lex invalid items here but this will be caught by the parser. 634 return lexKeywordOrIdentifier 635 default: 636 return l.errorf("unexpected character in series sequence: %q", r) 637 } 638 return lexValueSequence 639 } 640 641 // lexEscape scans a string escape sequence. The initial escaping character (\) 642 // has already been seen. 643 // 644 // NOTE: This function as well as the helper function digitVal() and associated 645 // tests have been adapted from the corresponding functions in the "go/scanner" 646 // package of the Go standard library to work for Prometheus-style strings. 647 // None of the actual escaping/quoting logic was changed in this function - it 648 // was only modified to integrate with our lexer. 649 func lexEscape(l *lexer) { 650 var n int 651 var base, max uint32 652 653 ch := l.next() 654 switch ch { 655 case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', l.stringOpen: 656 return 657 case '0', '1', '2', '3', '4', '5', '6', '7': 658 n, base, max = 3, 8, 255 659 case 'x': 660 ch = l.next() 661 n, base, max = 2, 16, 255 662 case 'u': 663 ch = l.next() 664 n, base, max = 4, 16, unicode.MaxRune 665 case 'U': 666 ch = l.next() 667 n, base, max = 8, 16, unicode.MaxRune 668 case eof: 669 l.errorf("escape sequence not terminated") 670 default: 671 l.errorf("unknown escape sequence %#U", ch) 672 } 673 674 var x uint32 675 for n > 0 { 676 d := uint32(digitVal(ch)) 677 if d >= base { 678 if ch == eof { 679 l.errorf("escape sequence not terminated") 680 } 681 l.errorf("illegal character %#U in escape sequence", ch) 682 } 683 x = x*base + d 684 ch = l.next() 685 n-- 686 } 687 688 if x > max || 0xD800 <= x && x < 0xE000 { 689 l.errorf("escape sequence is an invalid Unicode code point") 690 } 691 } 692 693 // digitVal returns the digit value of a rune or 16 in case the rune does not 694 // represent a valid digit. 695 func digitVal(ch rune) int { 696 switch { 697 case '0' <= ch && ch <= '9': 698 return int(ch - '0') 699 case 'a' <= ch && ch <= 'f': 700 return int(ch - 'a' + 10) 701 case 'A' <= ch && ch <= 'F': 702 return int(ch - 'A' + 10) 703 } 704 return 16 // Larger than any legal digit val. 705 } 706 707 // lexString scans a quoted string. The initial quote has already been seen. 708 func lexString(l *lexer) stateFn { 709 Loop: 710 for { 711 switch l.next() { 712 case '\\': 713 lexEscape(l) 714 case utf8.RuneError: 715 return l.errorf("invalid UTF-8 rune") 716 case eof, '\n': 717 return l.errorf("unterminated quoted string") 718 case l.stringOpen: 719 break Loop 720 } 721 } 722 l.emit(itemString) 723 return lexStatements 724 } 725 726 // lexRawString scans a raw quoted string. The initial quote has already been seen. 727 func lexRawString(l *lexer) stateFn { 728 Loop: 729 for { 730 switch l.next() { 731 case utf8.RuneError: 732 return l.errorf("invalid UTF-8 rune") 733 case eof: 734 return l.errorf("unterminated raw string") 735 case l.stringOpen: 736 break Loop 737 } 738 } 739 l.emit(itemString) 740 return lexStatements 741 } 742 743 // lexSpace scans a run of space characters. One space has already been seen. 744 func lexSpace(l *lexer) stateFn { 745 for isSpace(l.peek()) { 746 l.next() 747 } 748 l.ignore() 749 return lexStatements 750 } 751 752 // lexLineComment scans a line comment. Left comment marker is known to be present. 753 func lexLineComment(l *lexer) stateFn { 754 l.pos += Pos(len(lineComment)) 755 for r := l.next(); !isEndOfLine(r) && r != eof; { 756 r = l.next() 757 } 758 l.backup() 759 l.emit(itemComment) 760 return lexStatements 761 } 762 763 func lexDuration(l *lexer) stateFn { 764 if l.scanNumber() { 765 return l.errorf("missing unit character in duration") 766 } 767 // Next two chars must be a valid unit and a non-alphanumeric. 768 if l.accept("smhdwy") { 769 if isAlphaNumeric(l.next()) { 770 return l.errorf("bad duration syntax: %q", l.input[l.start:l.pos]) 771 } 772 l.backup() 773 l.emit(itemDuration) 774 return lexStatements 775 } 776 return l.errorf("bad duration syntax: %q", l.input[l.start:l.pos]) 777 } 778 779 // lexNumber scans a number: decimal, hex, oct or float. 780 func lexNumber(l *lexer) stateFn { 781 if !l.scanNumber() { 782 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) 783 } 784 l.emit(itemNumber) 785 return lexStatements 786 } 787 788 // lexNumberOrDuration scans a number or a duration item. 789 func lexNumberOrDuration(l *lexer) stateFn { 790 if l.scanNumber() { 791 l.emit(itemNumber) 792 return lexStatements 793 } 794 // Next two chars must be a valid unit and a non-alphanumeric. 795 if l.accept("smhdwy") { 796 if isAlphaNumeric(l.next()) { 797 return l.errorf("bad number or duration syntax: %q", l.input[l.start:l.pos]) 798 } 799 l.backup() 800 l.emit(itemDuration) 801 return lexStatements 802 } 803 return l.errorf("bad number or duration syntax: %q", l.input[l.start:l.pos]) 804 } 805 806 // scanNumber scans numbers of different formats. The scanned item is 807 // not necessarily a valid number. This case is caught by the parser. 808 func (l *lexer) scanNumber() bool { 809 digits := "0123456789" 810 // Disallow hexadecimal in series descriptions as the syntax is ambiguous. 811 if !l.seriesDesc && l.accept("0") && l.accept("xX") { 812 digits = "0123456789abcdefABCDEF" 813 } 814 l.acceptRun(digits) 815 if l.accept(".") { 816 l.acceptRun(digits) 817 } 818 if l.accept("eE") { 819 l.accept("+-") 820 l.acceptRun("0123456789") 821 } 822 // Next thing must not be alphanumeric unless it's the times token 823 // for series repetitions. 824 if r := l.peek(); (l.seriesDesc && r == 'x') || !isAlphaNumeric(r) { 825 return true 826 } 827 return false 828 } 829 830 // lexIdentifier scans an alphanumeric identifier. The next character 831 // is known to be a letter. 832 func lexIdentifier(l *lexer) stateFn { 833 for isAlphaNumeric(l.next()) { 834 // absorb 835 } 836 l.backup() 837 l.emit(itemIdentifier) 838 return lexStatements 839 } 840 841 // lexKeywordOrIdentifier scans an alphanumeric identifier which may contain 842 // a colon rune. If the identifier is a keyword the respective keyword item 843 // is scanned. 844 func lexKeywordOrIdentifier(l *lexer) stateFn { 845 Loop: 846 for { 847 switch r := l.next(); { 848 case isAlphaNumeric(r) || r == ':': 849 // absorb. 850 default: 851 l.backup() 852 word := l.input[l.start:l.pos] 853 if kw, ok := key[strings.ToLower(word)]; ok { 854 l.emit(kw) 855 } else if !strings.Contains(word, ":") { 856 l.emit(itemIdentifier) 857 } else { 858 l.emit(itemMetricIdentifier) 859 } 860 break Loop 861 } 862 } 863 if l.seriesDesc && l.peek() != '{' { 864 return lexValueSequence 865 } 866 return lexStatements 867 } 868 869 func isSpace(r rune) bool { 870 return r == ' ' || r == '\t' || r == '\n' || r == '\r' 871 } 872 873 // isEndOfLine reports whether r is an end-of-line character. 874 func isEndOfLine(r rune) bool { 875 return r == '\r' || r == '\n' 876 } 877 878 // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. 879 func isAlphaNumeric(r rune) bool { 880 return isAlpha(r) || isDigit(r) 881 } 882 883 // isDigit reports whether r is a digit. Note: we cannot use unicode.IsDigit() 884 // instead because that also classifies non-Latin digits as digits. See 885 // https://github.com/prometheus/prometheus/issues/939. 886 func isDigit(r rune) bool { 887 return '0' <= r && r <= '9' 888 } 889 890 // isAlpha reports whether r is an alphabetic or underscore. 891 func isAlpha(r rune) bool { 892 return r == '_' || ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z') 893 } 894 895 // isLabel reports whether the string can be used as label. 896 func isLabel(s string) bool { 897 if len(s) == 0 || !isAlpha(rune(s[0])) { 898 return false 899 } 900 for _, c := range s[1:] { 901 if !isAlphaNumeric(c) { 902 return false 903 } 904 } 905 return true 906 }