bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/bosun/expr/parse/lex.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package parse
     6  
     7  import (
     8  	"fmt"
     9  	"strings"
    10  	"unicode"
    11  	"unicode/utf8"
    12  )
    13  
    14  // item represents a token or text string returned from the scanner.
    15  type item struct {
    16  	typ itemType // The type of this item.
    17  	pos Pos      // The starting position, in bytes, of this item in the input string.
    18  	val string   // The value of this item.
    19  }
    20  
    21  func (i item) String() string {
    22  	switch {
    23  	case i.typ == itemEOF:
    24  		return "EOF"
    25  	case i.typ == itemError:
    26  		return i.val
    27  	case len(i.val) > 10:
    28  		return fmt.Sprintf("%.10q...", i.val)
    29  	}
    30  	return fmt.Sprintf("%q", i.val)
    31  }
    32  
    33  // itemType identifies the type of lex items.
    34  type itemType int
    35  
    36  const (
    37  	itemError itemType = iota // error occurred; value is text of error
    38  	itemEOF
    39  	itemNot       // '!'
    40  	itemAnd       // '&&'
    41  	itemOr        // '||'
    42  	itemGreater   // '>'
    43  	itemLess      // '<'
    44  	itemGreaterEq // '>='
    45  	itemLessEq    // '<='
    46  	itemEq        // '=='
    47  	itemNotEq     // '!='
    48  	itemPlus      // '+'
    49  	itemMinus     // '-'
    50  	itemMult      // '*'
    51  	itemDiv       // '/'
    52  	itemMod       // '%'
    53  	itemNumber    // simple number
    54  	itemComma
    55  	itemLeftParen
    56  	itemRightParen
    57  	itemString
    58  	itemFunc
    59  	itemTripleQuotedString
    60  	itemPow // '**'
    61  	itemExpr
    62  	itemPrefix // [prefix]
    63  )
    64  
    65  const eof = -1
    66  
    67  // stateFn represents the state of the scanner as a function that returns the next state.
    68  type stateFn func(*lexer) stateFn
    69  
    70  // lexer holds the state of the scanner.
    71  type lexer struct {
    72  	input   string    // the string being scanned
    73  	state   stateFn   // the next lexing function to enter
    74  	pos     Pos       // current position in the input
    75  	start   Pos       // start position of this item
    76  	width   Pos       // width of last rune read from input
    77  	lastPos Pos       // position of most recent item returned by nextItem
    78  	items   chan item // channel of scanned items
    79  }
    80  
    81  // next returns the next rune in the input.
    82  func (l *lexer) next() rune {
    83  	if int(l.pos) >= len(l.input) {
    84  		l.width = 0
    85  		return eof
    86  	}
    87  	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
    88  	l.width = Pos(w)
    89  	l.pos += l.width
    90  	return r
    91  }
    92  
    93  // peek returns but does not consume the next rune in the input.
    94  func (l *lexer) peek() rune {
    95  	r := l.next()
    96  	l.backup()
    97  	return r
    98  }
    99  
   100  // backup steps back one rune. Can only be called once per call of next.
   101  func (l *lexer) backup() {
   102  	l.pos -= l.width
   103  }
   104  
   105  // emit passes an item back to the client.
   106  func (l *lexer) emit(t itemType) {
   107  	l.items <- item{t, l.start, l.input[l.start:l.pos]}
   108  	l.start = l.pos
   109  }
   110  
   111  // accept consumes the next rune if it's from the valid set.
   112  func (l *lexer) accept(valid string) bool {
   113  	if strings.IndexRune(valid, l.next()) >= 0 {
   114  		return true
   115  	}
   116  	l.backup()
   117  	return false
   118  }
   119  
   120  // acceptRun consumes a run of runes from the valid set.
   121  func (l *lexer) acceptRun(valid string) {
   122  	for strings.IndexRune(valid, l.next()) >= 0 {
   123  	}
   124  	l.backup()
   125  }
   126  
   127  // ignore skips over the pending input before this point.
   128  func (l *lexer) ignore() {
   129  	l.start = l.pos
   130  }
   131  
   132  // lineNumber reports which line we're on, based on the position of
   133  // the previous item returned by nextItem. Doing it this way
   134  // means we don't have to worry about peek double counting.
   135  func (l *lexer) lineNumber() int {
   136  	return 1 + strings.Count(l.input[:l.lastPos], "\n")
   137  }
   138  
   139  // errorf returns an error token and terminates the scan by passing
   140  // back a nil pointer that will be the next state, terminating l.nextItem.
   141  func (l *lexer) errorf(format string, args ...interface{}) stateFn {
   142  	l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)}
   143  	return nil
   144  }
   145  
   146  // nextItem returns the next item from the input.
   147  func (l *lexer) nextItem() item {
   148  	item := <-l.items
   149  	l.lastPos = item.pos
   150  	return item
   151  }
   152  
   153  // lex creates a new scanner for the input string.
   154  func lex(input string) *lexer {
   155  	l := &lexer{
   156  		input: input,
   157  		items: make(chan item),
   158  	}
   159  	go l.run()
   160  	return l
   161  }
   162  
   163  // run runs the state machine for the lexer.
   164  func (l *lexer) run() {
   165  	for l.state = lexItem; l.state != nil; {
   166  		l.state = l.state(l)
   167  	}
   168  }
   169  
   170  // state functions
   171  
   172  func lexItem(l *lexer) stateFn {
   173  Loop:
   174  	for {
   175  		switch r := l.next(); {
   176  		case isSymbol(r):
   177  			return lexSymbol
   178  		case isNumber(r):
   179  			l.backup()
   180  			return lexNumber
   181  		case unicode.IsLetter(r):
   182  			return lexFunc
   183  		case r == '(':
   184  			l.emit(itemLeftParen)
   185  		case r == ')':
   186  			l.emit(itemRightParen)
   187  		case r == '[':
   188  			return lexPrefixBegin
   189  		case r == '"':
   190  			return lexString
   191  		case r == '\'':
   192  			return lexStringTripleBegin
   193  		case r == ',':
   194  			l.emit(itemComma)
   195  		case isSpace(r):
   196  			l.ignore()
   197  		case r == eof:
   198  			l.emit(itemEOF)
   199  			break Loop
   200  		default:
   201  			return l.errorf("invalid character: %s", string(r))
   202  		}
   203  	}
   204  	return nil
   205  }
   206  
   207  // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This
   208  // isn't a perfect number scanner - for instance it accepts "." and "0x0.2"
   209  // and "089" - but when it's wrong the input is invalid and the parser (via
   210  // strconv) will notice.
   211  func lexNumber(l *lexer) stateFn {
   212  	if !l.scanNumber() {
   213  		return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
   214  	}
   215  	l.emit(itemNumber)
   216  	return lexItem
   217  }
   218  
   219  func (l *lexer) scanNumber() bool {
   220  	// Is it hex?
   221  	digits := "0123456789"
   222  	if l.accept("0") && l.accept("xX") {
   223  		digits = "0123456789abcdefABCDEF"
   224  	}
   225  	l.acceptRun(digits)
   226  	if l.accept(".") {
   227  		l.acceptRun(digits)
   228  	}
   229  	if l.accept("eE") {
   230  		l.accept("+-")
   231  		l.acceptRun("0123456789")
   232  	}
   233  	return true
   234  }
   235  
   236  const symbols = "!<>=&|+-*/%"
   237  
   238  func lexSymbol(l *lexer) stateFn {
   239  	l.acceptRun(symbols)
   240  	s := l.input[l.start:l.pos]
   241  	switch s {
   242  	case "!":
   243  		l.emit(itemNot)
   244  	case "&&":
   245  		l.emit(itemAnd)
   246  	case "||":
   247  		l.emit(itemOr)
   248  	case ">":
   249  		l.emit(itemGreater)
   250  	case "<":
   251  		l.emit(itemLess)
   252  	case ">=":
   253  		l.emit(itemGreaterEq)
   254  	case "<=":
   255  		l.emit(itemLessEq)
   256  	case "==":
   257  		l.emit(itemEq)
   258  	case "!=":
   259  		l.emit(itemNotEq)
   260  	case "+":
   261  		l.emit(itemPlus)
   262  	case "-":
   263  		l.emit(itemMinus)
   264  	case "*":
   265  		l.emit(itemMult)
   266  	case "**":
   267  		l.emit(itemPow)
   268  	case "/":
   269  		l.emit(itemDiv)
   270  	case "%":
   271  		l.emit(itemMod)
   272  	default:
   273  		l.emit(itemError)
   274  	}
   275  	return lexItem
   276  }
   277  
   278  func lexFunc(l *lexer) stateFn {
   279  	for {
   280  		switch r := l.next(); {
   281  		case unicode.IsLetter(r):
   282  			// absorb
   283  		default:
   284  			l.backup()
   285  			if l.input[l.start:l.pos] == "expr" {
   286  				l.emit(itemExpr)
   287  				return lexItem
   288  			}
   289  			l.emit(itemFunc)
   290  			return lexItem
   291  		}
   292  	}
   293  }
   294  
   295  func lexString(l *lexer) stateFn {
   296  	for {
   297  		switch l.next() {
   298  		case '"':
   299  			l.emit(itemString)
   300  			return lexItem
   301  		case eof:
   302  			return l.errorf("unterminated string")
   303  		}
   304  	}
   305  }
   306  
   307  func lexPrefixBegin(l *lexer) stateFn {
   308  	for {
   309  		switch l.next() {
   310  		case '"':
   311  			return lexPrefixEnd
   312  		case eof:
   313  			return l.errorf("unterminated prefix string, must use double quotes e.g [\"foo\"]")
   314  		}
   315  	}
   316  }
   317  
   318  func lexPrefixEnd(l *lexer) stateFn {
   319  	for {
   320  		switch l.next() {
   321  		case '"':
   322  			if l.next() == ']' {
   323  				l.emit(itemPrefix)
   324  				return lexItem
   325  			}
   326  		case eof:
   327  			return l.errorf("unterminated prefix string, must use double quotes e.g [\"foo\"]")
   328  		}
   329  	}
   330  }
   331  
   332  func lexStringTripleBegin(l *lexer) stateFn {
   333  	for {
   334  		switch l.next() {
   335  		case '\'':
   336  			//Check for triple quoted string
   337  			if l.next() == '\'' {
   338  				return lexStringTripleEnd
   339  			} else {
   340  				l.backup()
   341  			}
   342  			return l.errorf("invalid start of string, must use double qutoes or triple single quotes")
   343  		case eof:
   344  			return l.errorf("unterminated string")
   345  		}
   346  	}
   347  }
   348  
   349  func lexStringTripleEnd(l *lexer) stateFn {
   350  	count := 0
   351  	for {
   352  		switch l.next() {
   353  		case '\'':
   354  			count++
   355  			if count == 3 {
   356  				l.emit(itemTripleQuotedString)
   357  				return lexItem
   358  			}
   359  		case eof:
   360  			return l.errorf("unterminated string")
   361  		default:
   362  			count = 0
   363  		}
   364  	}
   365  }
   366  
   367  // isSpace reports whether r is a space character.
   368  func isSpace(r rune) bool {
   369  	return unicode.IsSpace(r)
   370  }
   371  
   372  func isVarchar(r rune) bool {
   373  	return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
   374  }
   375  
   376  func isSymbol(r rune) bool {
   377  	return strings.IndexRune(symbols, r) != -1
   378  }
   379  
   380  func isNumber(r rune) bool {
   381  	return unicode.IsDigit(r) || r == '.'
   382  }