github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/configs/legacy_promql/lex.go (about)

     1  // Copyright 2015 The Prometheus Authors
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package promql
    15  
    16  import (
    17  	"fmt"
    18  	"strings"
    19  	"unicode"
    20  	"unicode/utf8"
    21  )
    22  
    23  // item represents a token or text string returned from the scanner.
    24  type item struct {
    25  	typ ItemType // The type of this item.
    26  	pos Pos      // The starting position, in bytes, of this item in the input string.
    27  	val string   // The value of this item.
    28  }
    29  
    30  // String returns a descriptive string for the item.
    31  func (i item) String() string {
    32  	switch {
    33  	case i.typ == itemEOF:
    34  		return "EOF"
    35  	case i.typ == itemError:
    36  		return i.val
    37  	case i.typ == itemIdentifier || i.typ == itemMetricIdentifier:
    38  		return fmt.Sprintf("%q", i.val)
    39  	case i.typ.isKeyword():
    40  		return fmt.Sprintf("<%s>", i.val)
    41  	case i.typ.isOperator():
    42  		return fmt.Sprintf("<op:%s>", i.val)
    43  	case i.typ.isAggregator():
    44  		return fmt.Sprintf("<aggr:%s>", i.val)
    45  	case len(i.val) > 10:
    46  		return fmt.Sprintf("%.10q...", i.val)
    47  	}
    48  	return fmt.Sprintf("%q", i.val)
    49  }
    50  
    51  // isOperator returns true if the item corresponds to a arithmetic or set operator.
    52  // Returns false otherwise.
    53  func (i ItemType) isOperator() bool { return i > operatorsStart && i < operatorsEnd }
    54  
    55  // isAggregator returns true if the item belongs to the aggregator functions.
    56  // Returns false otherwise
    57  func (i ItemType) isAggregator() bool { return i > aggregatorsStart && i < aggregatorsEnd }
    58  
    59  // isAggregator returns true if the item is an aggregator that takes a parameter.
    60  // Returns false otherwise
    61  func (i ItemType) isAggregatorWithParam() bool {
    62  	return i == itemTopK || i == itemBottomK || i == itemCountValues || i == itemQuantile
    63  }
    64  
    65  // isKeyword returns true if the item corresponds to a keyword.
    66  // Returns false otherwise.
    67  func (i ItemType) isKeyword() bool { return i > keywordsStart && i < keywordsEnd }
    68  
    69  // isCompairsonOperator returns true if the item corresponds to a comparison operator.
    70  // Returns false otherwise.
    71  func (i ItemType) isComparisonOperator() bool {
    72  	switch i {
    73  	case itemEQL, itemNEQ, itemLTE, itemLSS, itemGTE, itemGTR:
    74  		return true
    75  	default:
    76  		return false
    77  	}
    78  }
    79  
    80  // isSetOperator returns whether the item corresponds to a set operator.
    81  func (i ItemType) isSetOperator() bool {
    82  	switch i {
    83  	case itemLAND, itemLOR, itemLUnless:
    84  		return true
    85  	}
    86  	return false
    87  }
    88  
    89  // LowestPrec is a constant for operator precedence in expressions.
    90  const LowestPrec = 0 // Non-operators.
    91  
    92  // Precedence returns the operator precedence of the binary
    93  // operator op. If op is not a binary operator, the result
    94  // is LowestPrec.
    95  func (i ItemType) precedence() int {
    96  	switch i {
    97  	case itemLOR:
    98  		return 1
    99  	case itemLAND, itemLUnless:
   100  		return 2
   101  	case itemEQL, itemNEQ, itemLTE, itemLSS, itemGTE, itemGTR:
   102  		return 3
   103  	case itemADD, itemSUB:
   104  		return 4
   105  	case itemMUL, itemDIV, itemMOD:
   106  		return 5
   107  	case itemPOW:
   108  		return 6
   109  	default:
   110  		return LowestPrec
   111  	}
   112  }
   113  
   114  func (i ItemType) isRightAssociative() bool {
   115  	switch i {
   116  	case itemPOW:
   117  		return true
   118  	default:
   119  		return false
   120  	}
   121  
   122  }
   123  
   124  type ItemType int
   125  
   126  const (
   127  	itemError ItemType = iota // Error occurred, value is error message
   128  	itemEOF
   129  	itemComment
   130  	itemIdentifier
   131  	itemMetricIdentifier
   132  	itemLeftParen
   133  	itemRightParen
   134  	itemLeftBrace
   135  	itemRightBrace
   136  	itemLeftBracket
   137  	itemRightBracket
   138  	itemComma
   139  	itemAssign
   140  	itemSemicolon
   141  	itemString
   142  	itemNumber
   143  	itemDuration
   144  	itemBlank
   145  	itemTimes
   146  
   147  	operatorsStart
   148  	// Operators.
   149  	itemSUB
   150  	itemADD
   151  	itemMUL
   152  	itemMOD
   153  	itemDIV
   154  	itemLAND
   155  	itemLOR
   156  	itemLUnless
   157  	itemEQL
   158  	itemNEQ
   159  	itemLTE
   160  	itemLSS
   161  	itemGTE
   162  	itemGTR
   163  	itemEQLRegex
   164  	itemNEQRegex
   165  	itemPOW
   166  	operatorsEnd
   167  
   168  	aggregatorsStart
   169  	// Aggregators.
   170  	itemAvg
   171  	itemCount
   172  	itemSum
   173  	itemMin
   174  	itemMax
   175  	itemStddev
   176  	itemStdvar
   177  	itemTopK
   178  	itemBottomK
   179  	itemCountValues
   180  	itemQuantile
   181  	aggregatorsEnd
   182  
   183  	keywordsStart
   184  	// Keywords.
   185  	itemAlert
   186  	itemIf
   187  	itemFor
   188  	itemLabels
   189  	itemAnnotations
   190  	itemOffset
   191  	itemBy
   192  	itemWithout
   193  	itemOn
   194  	itemIgnoring
   195  	itemGroupLeft
   196  	itemGroupRight
   197  	itemBool
   198  	keywordsEnd
   199  )
   200  
   201  var key = map[string]ItemType{
   202  	// Operators.
   203  	"and":    itemLAND,
   204  	"or":     itemLOR,
   205  	"unless": itemLUnless,
   206  
   207  	// Aggregators.
   208  	"sum":          itemSum,
   209  	"avg":          itemAvg,
   210  	"count":        itemCount,
   211  	"min":          itemMin,
   212  	"max":          itemMax,
   213  	"stddev":       itemStddev,
   214  	"stdvar":       itemStdvar,
   215  	"topk":         itemTopK,
   216  	"bottomk":      itemBottomK,
   217  	"count_values": itemCountValues,
   218  	"quantile":     itemQuantile,
   219  
   220  	// Keywords.
   221  	"alert":       itemAlert,
   222  	"if":          itemIf,
   223  	"for":         itemFor,
   224  	"labels":      itemLabels,
   225  	"annotations": itemAnnotations,
   226  	"offset":      itemOffset,
   227  	"by":          itemBy,
   228  	"without":     itemWithout,
   229  	"on":          itemOn,
   230  	"ignoring":    itemIgnoring,
   231  	"group_left":  itemGroupLeft,
   232  	"group_right": itemGroupRight,
   233  	"bool":        itemBool,
   234  }
   235  
   236  // These are the default string representations for common items. It does not
   237  // imply that those are the only character sequences that can be lexed to such an item.
   238  var itemTypeStr = map[ItemType]string{
   239  	itemLeftParen:    "(",
   240  	itemRightParen:   ")",
   241  	itemLeftBrace:    "{",
   242  	itemRightBrace:   "}",
   243  	itemLeftBracket:  "[",
   244  	itemRightBracket: "]",
   245  	itemComma:        ",",
   246  	itemAssign:       "=",
   247  	itemSemicolon:    ";",
   248  	itemBlank:        "_",
   249  	itemTimes:        "x",
   250  
   251  	itemSUB:      "-",
   252  	itemADD:      "+",
   253  	itemMUL:      "*",
   254  	itemMOD:      "%",
   255  	itemDIV:      "/",
   256  	itemEQL:      "==",
   257  	itemNEQ:      "!=",
   258  	itemLTE:      "<=",
   259  	itemLSS:      "<",
   260  	itemGTE:      ">=",
   261  	itemGTR:      ">",
   262  	itemEQLRegex: "=~",
   263  	itemNEQRegex: "!~",
   264  	itemPOW:      "^",
   265  }
   266  
   267  func init() {
   268  	// Add keywords to item type strings.
   269  	for s, ty := range key {
   270  		itemTypeStr[ty] = s
   271  	}
   272  	// Special numbers.
   273  	key["inf"] = itemNumber
   274  	key["nan"] = itemNumber
   275  }
   276  
   277  func (i ItemType) String() string {
   278  	if s, ok := itemTypeStr[i]; ok {
   279  		return s
   280  	}
   281  	return fmt.Sprintf("<item %d>", i)
   282  }
   283  
   284  func (i item) desc() string {
   285  	if _, ok := itemTypeStr[i.typ]; ok {
   286  		return i.String()
   287  	}
   288  	if i.typ == itemEOF {
   289  		return i.typ.desc()
   290  	}
   291  	return fmt.Sprintf("%s %s", i.typ.desc(), i)
   292  }
   293  
   294  func (i ItemType) desc() string {
   295  	switch i {
   296  	case itemError:
   297  		return "error"
   298  	case itemEOF:
   299  		return "end of input"
   300  	case itemComment:
   301  		return "comment"
   302  	case itemIdentifier:
   303  		return "identifier"
   304  	case itemMetricIdentifier:
   305  		return "metric identifier"
   306  	case itemString:
   307  		return "string"
   308  	case itemNumber:
   309  		return "number"
   310  	case itemDuration:
   311  		return "duration"
   312  	}
   313  	return fmt.Sprintf("%q", i)
   314  }
   315  
   316  const eof = -1
   317  
   318  // stateFn represents the state of the scanner as a function that returns the next state.
   319  type stateFn func(*lexer) stateFn
   320  
   321  // Pos is the position in a string.
   322  type Pos int
   323  
   324  // lexer holds the state of the scanner.
   325  type lexer struct {
   326  	input   string    // The string being scanned.
   327  	state   stateFn   // The next lexing function to enter.
   328  	pos     Pos       // Current position in the input.
   329  	start   Pos       // Start position of this item.
   330  	width   Pos       // Width of last rune read from input.
   331  	lastPos Pos       // Position of most recent item returned by nextItem.
   332  	items   chan item // Channel of scanned items.
   333  
   334  	parenDepth  int  // Nesting depth of ( ) exprs.
   335  	braceOpen   bool // Whether a { is opened.
   336  	bracketOpen bool // Whether a [ is opened.
   337  	stringOpen  rune // Quote rune of the string currently being read.
   338  
   339  	// seriesDesc is set when a series description for the testing
   340  	// language is lexed.
   341  	seriesDesc bool
   342  }
   343  
   344  // next returns the next rune in the input.
   345  func (l *lexer) next() rune {
   346  	if int(l.pos) >= len(l.input) {
   347  		l.width = 0
   348  		return eof
   349  	}
   350  	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
   351  	l.width = Pos(w)
   352  	l.pos += l.width
   353  	return r
   354  }
   355  
   356  // peek returns but does not consume the next rune in the input.
   357  func (l *lexer) peek() rune {
   358  	r := l.next()
   359  	l.backup()
   360  	return r
   361  }
   362  
   363  // backup steps back one rune. Can only be called once per call of next.
   364  func (l *lexer) backup() {
   365  	l.pos -= l.width
   366  }
   367  
   368  // emit passes an item back to the client.
   369  func (l *lexer) emit(t ItemType) {
   370  	l.items <- item{t, l.start, l.input[l.start:l.pos]}
   371  	l.start = l.pos
   372  }
   373  
   374  // ignore skips over the pending input before this point.
   375  func (l *lexer) ignore() {
   376  	l.start = l.pos
   377  }
   378  
   379  // accept consumes the next rune if it's from the valid set.
   380  func (l *lexer) accept(valid string) bool {
   381  	if strings.ContainsRune(valid, l.next()) {
   382  		return true
   383  	}
   384  	l.backup()
   385  	return false
   386  }
   387  
   388  // acceptRun consumes a run of runes from the valid set.
   389  func (l *lexer) acceptRun(valid string) {
   390  	for strings.ContainsRune(valid, l.next()) {
   391  		// consume
   392  	}
   393  	l.backup()
   394  }
   395  
   396  // lineNumber reports which line we're on, based on the position of
   397  // the previous item returned by nextItem. Doing it this way
   398  // means we don't have to worry about peek double counting.
   399  func (l *lexer) lineNumber() int {
   400  	return 1 + strings.Count(l.input[:l.lastPos], "\n")
   401  }
   402  
   403  // linePosition reports at which character in the current line
   404  // we are on.
   405  func (l *lexer) linePosition() int {
   406  	lb := strings.LastIndex(l.input[:l.lastPos], "\n")
   407  	if lb == -1 {
   408  		return 1 + int(l.lastPos)
   409  	}
   410  	return 1 + int(l.lastPos) - lb
   411  }
   412  
   413  // errorf returns an error token and terminates the scan by passing
   414  // back a nil pointer that will be the next state, terminating l.nextItem.
   415  func (l *lexer) errorf(format string, args ...interface{}) stateFn {
   416  	l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)}
   417  	return nil
   418  }
   419  
   420  // nextItem returns the next item from the input.
   421  func (l *lexer) nextItem() item {
   422  	item := <-l.items
   423  	l.lastPos = item.pos
   424  	return item
   425  }
   426  
   427  // lex creates a new scanner for the input string.
   428  func lex(input string) *lexer {
   429  	l := &lexer{
   430  		input: input,
   431  		items: make(chan item),
   432  	}
   433  	go l.run()
   434  	return l
   435  }
   436  
   437  // run runs the state machine for the lexer.
   438  func (l *lexer) run() {
   439  	for l.state = lexStatements; l.state != nil; {
   440  		l.state = l.state(l)
   441  	}
   442  	close(l.items)
   443  }
   444  
   445  // lineComment is the character that starts a line comment.
   446  const lineComment = "#"
   447  
   448  // lexStatements is the top-level state for lexing.
   449  func lexStatements(l *lexer) stateFn {
   450  	if l.braceOpen {
   451  		return lexInsideBraces
   452  	}
   453  	if strings.HasPrefix(l.input[l.pos:], lineComment) {
   454  		return lexLineComment
   455  	}
   456  
   457  	switch r := l.next(); {
   458  	case r == eof:
   459  		if l.parenDepth != 0 {
   460  			return l.errorf("unclosed left parenthesis")
   461  		} else if l.bracketOpen {
   462  			return l.errorf("unclosed left bracket")
   463  		}
   464  		l.emit(itemEOF)
   465  		return nil
   466  	case r == ',':
   467  		l.emit(itemComma)
   468  	case isSpace(r):
   469  		return lexSpace
   470  	case r == '*':
   471  		l.emit(itemMUL)
   472  	case r == '/':
   473  		l.emit(itemDIV)
   474  	case r == '%':
   475  		l.emit(itemMOD)
   476  	case r == '+':
   477  		l.emit(itemADD)
   478  	case r == '-':
   479  		l.emit(itemSUB)
   480  	case r == '^':
   481  		l.emit(itemPOW)
   482  	case r == '=':
   483  		if t := l.peek(); t == '=' {
   484  			l.next()
   485  			l.emit(itemEQL)
   486  		} else if t == '~' {
   487  			return l.errorf("unexpected character after '=': %q", t)
   488  		} else {
   489  			l.emit(itemAssign)
   490  		}
   491  	case r == '!':
   492  		if t := l.next(); t == '=' {
   493  			l.emit(itemNEQ)
   494  		} else {
   495  			return l.errorf("unexpected character after '!': %q", t)
   496  		}
   497  	case r == '<':
   498  		if t := l.peek(); t == '=' {
   499  			l.next()
   500  			l.emit(itemLTE)
   501  		} else {
   502  			l.emit(itemLSS)
   503  		}
   504  	case r == '>':
   505  		if t := l.peek(); t == '=' {
   506  			l.next()
   507  			l.emit(itemGTE)
   508  		} else {
   509  			l.emit(itemGTR)
   510  		}
   511  	case isDigit(r) || (r == '.' && isDigit(l.peek())):
   512  		l.backup()
   513  		return lexNumberOrDuration
   514  	case r == '"' || r == '\'':
   515  		l.stringOpen = r
   516  		return lexString
   517  	case r == '`':
   518  		l.stringOpen = r
   519  		return lexRawString
   520  	case isAlpha(r) || r == ':':
   521  		l.backup()
   522  		return lexKeywordOrIdentifier
   523  	case r == '(':
   524  		l.emit(itemLeftParen)
   525  		l.parenDepth++
   526  		return lexStatements
   527  	case r == ')':
   528  		l.emit(itemRightParen)
   529  		l.parenDepth--
   530  		if l.parenDepth < 0 {
   531  			return l.errorf("unexpected right parenthesis %q", r)
   532  		}
   533  		return lexStatements
   534  	case r == '{':
   535  		l.emit(itemLeftBrace)
   536  		l.braceOpen = true
   537  		return lexInsideBraces(l)
   538  	case r == '[':
   539  		if l.bracketOpen {
   540  			return l.errorf("unexpected left bracket %q", r)
   541  		}
   542  		l.emit(itemLeftBracket)
   543  		l.bracketOpen = true
   544  		return lexDuration
   545  	case r == ']':
   546  		if !l.bracketOpen {
   547  			return l.errorf("unexpected right bracket %q", r)
   548  		}
   549  		l.emit(itemRightBracket)
   550  		l.bracketOpen = false
   551  
   552  	default:
   553  		return l.errorf("unexpected character: %q", r)
   554  	}
   555  	return lexStatements
   556  }
   557  
   558  // lexInsideBraces scans the inside of a vector selector. Keywords are ignored and
   559  // scanned as identifiers.
   560  func lexInsideBraces(l *lexer) stateFn {
   561  	if strings.HasPrefix(l.input[l.pos:], lineComment) {
   562  		return lexLineComment
   563  	}
   564  
   565  	switch r := l.next(); {
   566  	case r == eof:
   567  		return l.errorf("unexpected end of input inside braces")
   568  	case isSpace(r):
   569  		return lexSpace
   570  	case isAlpha(r):
   571  		l.backup()
   572  		return lexIdentifier
   573  	case r == ',':
   574  		l.emit(itemComma)
   575  	case r == '"' || r == '\'':
   576  		l.stringOpen = r
   577  		return lexString
   578  	case r == '`':
   579  		l.stringOpen = r
   580  		return lexRawString
   581  	case r == '=':
   582  		if l.next() == '~' {
   583  			l.emit(itemEQLRegex)
   584  			break
   585  		}
   586  		l.backup()
   587  		l.emit(itemEQL)
   588  	case r == '!':
   589  		switch nr := l.next(); {
   590  		case nr == '~':
   591  			l.emit(itemNEQRegex)
   592  		case nr == '=':
   593  			l.emit(itemNEQ)
   594  		default:
   595  			return l.errorf("unexpected character after '!' inside braces: %q", nr)
   596  		}
   597  	case r == '{':
   598  		return l.errorf("unexpected left brace %q", r)
   599  	case r == '}':
   600  		l.emit(itemRightBrace)
   601  		l.braceOpen = false
   602  
   603  		if l.seriesDesc {
   604  			return lexValueSequence
   605  		}
   606  		return lexStatements
   607  	default:
   608  		return l.errorf("unexpected character inside braces: %q", r)
   609  	}
   610  	return lexInsideBraces
   611  }
   612  
   613  // lexValueSequence scans a value sequence of a series description.
   614  func lexValueSequence(l *lexer) stateFn {
   615  	switch r := l.next(); {
   616  	case r == eof:
   617  		return lexStatements
   618  	case isSpace(r):
   619  		lexSpace(l)
   620  	case r == '+':
   621  		l.emit(itemADD)
   622  	case r == '-':
   623  		l.emit(itemSUB)
   624  	case r == 'x':
   625  		l.emit(itemTimes)
   626  	case r == '_':
   627  		l.emit(itemBlank)
   628  	case isDigit(r) || (r == '.' && isDigit(l.peek())):
   629  		l.backup()
   630  		lexNumber(l)
   631  	case isAlpha(r):
   632  		l.backup()
   633  		// We might lex invalid items here but this will be caught by the parser.
   634  		return lexKeywordOrIdentifier
   635  	default:
   636  		return l.errorf("unexpected character in series sequence: %q", r)
   637  	}
   638  	return lexValueSequence
   639  }
   640  
   641  // lexEscape scans a string escape sequence. The initial escaping character (\)
   642  // has already been seen.
   643  //
   644  // NOTE: This function as well as the helper function digitVal() and associated
   645  // tests have been adapted from the corresponding functions in the "go/scanner"
   646  // package of the Go standard library to work for Prometheus-style strings.
   647  // None of the actual escaping/quoting logic was changed in this function - it
   648  // was only modified to integrate with our lexer.
   649  func lexEscape(l *lexer) {
   650  	var n int
   651  	var base, max uint32
   652  
   653  	ch := l.next()
   654  	switch ch {
   655  	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', l.stringOpen:
   656  		return
   657  	case '0', '1', '2', '3', '4', '5', '6', '7':
   658  		n, base, max = 3, 8, 255
   659  	case 'x':
   660  		ch = l.next()
   661  		n, base, max = 2, 16, 255
   662  	case 'u':
   663  		ch = l.next()
   664  		n, base, max = 4, 16, unicode.MaxRune
   665  	case 'U':
   666  		ch = l.next()
   667  		n, base, max = 8, 16, unicode.MaxRune
   668  	case eof:
   669  		l.errorf("escape sequence not terminated")
   670  	default:
   671  		l.errorf("unknown escape sequence %#U", ch)
   672  	}
   673  
   674  	var x uint32
   675  	for n > 0 {
   676  		d := uint32(digitVal(ch))
   677  		if d >= base {
   678  			if ch == eof {
   679  				l.errorf("escape sequence not terminated")
   680  			}
   681  			l.errorf("illegal character %#U in escape sequence", ch)
   682  		}
   683  		x = x*base + d
   684  		ch = l.next()
   685  		n--
   686  	}
   687  
   688  	if x > max || 0xD800 <= x && x < 0xE000 {
   689  		l.errorf("escape sequence is an invalid Unicode code point")
   690  	}
   691  }
   692  
   693  // digitVal returns the digit value of a rune or 16 in case the rune does not
   694  // represent a valid digit.
   695  func digitVal(ch rune) int {
   696  	switch {
   697  	case '0' <= ch && ch <= '9':
   698  		return int(ch - '0')
   699  	case 'a' <= ch && ch <= 'f':
   700  		return int(ch - 'a' + 10)
   701  	case 'A' <= ch && ch <= 'F':
   702  		return int(ch - 'A' + 10)
   703  	}
   704  	return 16 // Larger than any legal digit val.
   705  }
   706  
   707  // lexString scans a quoted string. The initial quote has already been seen.
   708  func lexString(l *lexer) stateFn {
   709  Loop:
   710  	for {
   711  		switch l.next() {
   712  		case '\\':
   713  			lexEscape(l)
   714  		case utf8.RuneError:
   715  			return l.errorf("invalid UTF-8 rune")
   716  		case eof, '\n':
   717  			return l.errorf("unterminated quoted string")
   718  		case l.stringOpen:
   719  			break Loop
   720  		}
   721  	}
   722  	l.emit(itemString)
   723  	return lexStatements
   724  }
   725  
   726  // lexRawString scans a raw quoted string. The initial quote has already been seen.
   727  func lexRawString(l *lexer) stateFn {
   728  Loop:
   729  	for {
   730  		switch l.next() {
   731  		case utf8.RuneError:
   732  			return l.errorf("invalid UTF-8 rune")
   733  		case eof:
   734  			return l.errorf("unterminated raw string")
   735  		case l.stringOpen:
   736  			break Loop
   737  		}
   738  	}
   739  	l.emit(itemString)
   740  	return lexStatements
   741  }
   742  
   743  // lexSpace scans a run of space characters. One space has already been seen.
   744  func lexSpace(l *lexer) stateFn {
   745  	for isSpace(l.peek()) {
   746  		l.next()
   747  	}
   748  	l.ignore()
   749  	return lexStatements
   750  }
   751  
   752  // lexLineComment scans a line comment. Left comment marker is known to be present.
   753  func lexLineComment(l *lexer) stateFn {
   754  	l.pos += Pos(len(lineComment))
   755  	for r := l.next(); !isEndOfLine(r) && r != eof; {
   756  		r = l.next()
   757  	}
   758  	l.backup()
   759  	l.emit(itemComment)
   760  	return lexStatements
   761  }
   762  
   763  func lexDuration(l *lexer) stateFn {
   764  	if l.scanNumber() {
   765  		return l.errorf("missing unit character in duration")
   766  	}
   767  	// Next two chars must be a valid unit and a non-alphanumeric.
   768  	if l.accept("smhdwy") {
   769  		if isAlphaNumeric(l.next()) {
   770  			return l.errorf("bad duration syntax: %q", l.input[l.start:l.pos])
   771  		}
   772  		l.backup()
   773  		l.emit(itemDuration)
   774  		return lexStatements
   775  	}
   776  	return l.errorf("bad duration syntax: %q", l.input[l.start:l.pos])
   777  }
   778  
   779  // lexNumber scans a number: decimal, hex, oct or float.
   780  func lexNumber(l *lexer) stateFn {
   781  	if !l.scanNumber() {
   782  		return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
   783  	}
   784  	l.emit(itemNumber)
   785  	return lexStatements
   786  }
   787  
   788  // lexNumberOrDuration scans a number or a duration item.
   789  func lexNumberOrDuration(l *lexer) stateFn {
   790  	if l.scanNumber() {
   791  		l.emit(itemNumber)
   792  		return lexStatements
   793  	}
   794  	// Next two chars must be a valid unit and a non-alphanumeric.
   795  	if l.accept("smhdwy") {
   796  		if isAlphaNumeric(l.next()) {
   797  			return l.errorf("bad number or duration syntax: %q", l.input[l.start:l.pos])
   798  		}
   799  		l.backup()
   800  		l.emit(itemDuration)
   801  		return lexStatements
   802  	}
   803  	return l.errorf("bad number or duration syntax: %q", l.input[l.start:l.pos])
   804  }
   805  
   806  // scanNumber scans numbers of different formats. The scanned item is
   807  // not necessarily a valid number. This case is caught by the parser.
   808  func (l *lexer) scanNumber() bool {
   809  	digits := "0123456789"
   810  	// Disallow hexadecimal in series descriptions as the syntax is ambiguous.
   811  	if !l.seriesDesc && l.accept("0") && l.accept("xX") {
   812  		digits = "0123456789abcdefABCDEF"
   813  	}
   814  	l.acceptRun(digits)
   815  	if l.accept(".") {
   816  		l.acceptRun(digits)
   817  	}
   818  	if l.accept("eE") {
   819  		l.accept("+-")
   820  		l.acceptRun("0123456789")
   821  	}
   822  	// Next thing must not be alphanumeric unless it's the times token
   823  	// for series repetitions.
   824  	if r := l.peek(); (l.seriesDesc && r == 'x') || !isAlphaNumeric(r) {
   825  		return true
   826  	}
   827  	return false
   828  }
   829  
   830  // lexIdentifier scans an alphanumeric identifier. The next character
   831  // is known to be a letter.
   832  func lexIdentifier(l *lexer) stateFn {
   833  	for isAlphaNumeric(l.next()) {
   834  		// absorb
   835  	}
   836  	l.backup()
   837  	l.emit(itemIdentifier)
   838  	return lexStatements
   839  }
   840  
   841  // lexKeywordOrIdentifier scans an alphanumeric identifier which may contain
   842  // a colon rune. If the identifier is a keyword the respective keyword item
   843  // is scanned.
   844  func lexKeywordOrIdentifier(l *lexer) stateFn {
   845  Loop:
   846  	for {
   847  		switch r := l.next(); {
   848  		case isAlphaNumeric(r) || r == ':':
   849  			// absorb.
   850  		default:
   851  			l.backup()
   852  			word := l.input[l.start:l.pos]
   853  			if kw, ok := key[strings.ToLower(word)]; ok {
   854  				l.emit(kw)
   855  			} else if !strings.Contains(word, ":") {
   856  				l.emit(itemIdentifier)
   857  			} else {
   858  				l.emit(itemMetricIdentifier)
   859  			}
   860  			break Loop
   861  		}
   862  	}
   863  	if l.seriesDesc && l.peek() != '{' {
   864  		return lexValueSequence
   865  	}
   866  	return lexStatements
   867  }
   868  
   869  func isSpace(r rune) bool {
   870  	return r == ' ' || r == '\t' || r == '\n' || r == '\r'
   871  }
   872  
   873  // isEndOfLine reports whether r is an end-of-line character.
   874  func isEndOfLine(r rune) bool {
   875  	return r == '\r' || r == '\n'
   876  }
   877  
   878  // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
   879  func isAlphaNumeric(r rune) bool {
   880  	return isAlpha(r) || isDigit(r)
   881  }
   882  
   883  // isDigit reports whether r is a digit. Note: we cannot use unicode.IsDigit()
   884  // instead because that also classifies non-Latin digits as digits. See
   885  // https://github.com/prometheus/prometheus/issues/939.
   886  func isDigit(r rune) bool {
   887  	return '0' <= r && r <= '9'
   888  }
   889  
   890  // isAlpha reports whether r is an alphabetic or underscore.
   891  func isAlpha(r rune) bool {
   892  	return r == '_' || ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z')
   893  }
   894  
   895  // isLabel reports whether the string can be used as label.
   896  func isLabel(s string) bool {
   897  	if len(s) == 0 || !isAlpha(rune(s[0])) {
   898  		return false
   899  	}
   900  	for _, c := range s[1:] {
   901  		if !isAlphaNumeric(c) {
   902  			return false
   903  		}
   904  	}
   905  	return true
   906  }