bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/bosun/expr/parse/lex.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package parse 6 7 import ( 8 "fmt" 9 "strings" 10 "unicode" 11 "unicode/utf8" 12 ) 13 14 // item represents a token or text string returned from the scanner. 15 type item struct { 16 typ itemType // The type of this item. 17 pos Pos // The starting position, in bytes, of this item in the input string. 18 val string // The value of this item. 19 } 20 21 func (i item) String() string { 22 switch { 23 case i.typ == itemEOF: 24 return "EOF" 25 case i.typ == itemError: 26 return i.val 27 case len(i.val) > 10: 28 return fmt.Sprintf("%.10q...", i.val) 29 } 30 return fmt.Sprintf("%q", i.val) 31 } 32 33 // itemType identifies the type of lex items. 34 type itemType int 35 36 const ( 37 itemError itemType = iota // error occurred; value is text of error 38 itemEOF 39 itemNot // '!' 40 itemAnd // '&&' 41 itemOr // '||' 42 itemGreater // '>' 43 itemLess // '<' 44 itemGreaterEq // '>=' 45 itemLessEq // '<=' 46 itemEq // '==' 47 itemNotEq // '!=' 48 itemPlus // '+' 49 itemMinus // '-' 50 itemMult // '*' 51 itemDiv // '/' 52 itemMod // '%' 53 itemNumber // simple number 54 itemComma 55 itemLeftParen 56 itemRightParen 57 itemString 58 itemFunc 59 itemTripleQuotedString 60 itemPow // '**' 61 itemExpr 62 itemPrefix // [prefix] 63 ) 64 65 const eof = -1 66 67 // stateFn represents the state of the scanner as a function that returns the next state. 68 type stateFn func(*lexer) stateFn 69 70 // lexer holds the state of the scanner. 71 type lexer struct { 72 input string // the string being scanned 73 state stateFn // the next lexing function to enter 74 pos Pos // current position in the input 75 start Pos // start position of this item 76 width Pos // width of last rune read from input 77 lastPos Pos // position of most recent item returned by nextItem 78 items chan item // channel of scanned items 79 } 80 81 // next returns the next rune in the input. 82 func (l *lexer) next() rune { 83 if int(l.pos) >= len(l.input) { 84 l.width = 0 85 return eof 86 } 87 r, w := utf8.DecodeRuneInString(l.input[l.pos:]) 88 l.width = Pos(w) 89 l.pos += l.width 90 return r 91 } 92 93 // peek returns but does not consume the next rune in the input. 94 func (l *lexer) peek() rune { 95 r := l.next() 96 l.backup() 97 return r 98 } 99 100 // backup steps back one rune. Can only be called once per call of next. 101 func (l *lexer) backup() { 102 l.pos -= l.width 103 } 104 105 // emit passes an item back to the client. 106 func (l *lexer) emit(t itemType) { 107 l.items <- item{t, l.start, l.input[l.start:l.pos]} 108 l.start = l.pos 109 } 110 111 // accept consumes the next rune if it's from the valid set. 112 func (l *lexer) accept(valid string) bool { 113 if strings.IndexRune(valid, l.next()) >= 0 { 114 return true 115 } 116 l.backup() 117 return false 118 } 119 120 // acceptRun consumes a run of runes from the valid set. 121 func (l *lexer) acceptRun(valid string) { 122 for strings.IndexRune(valid, l.next()) >= 0 { 123 } 124 l.backup() 125 } 126 127 // ignore skips over the pending input before this point. 128 func (l *lexer) ignore() { 129 l.start = l.pos 130 } 131 132 // lineNumber reports which line we're on, based on the position of 133 // the previous item returned by nextItem. Doing it this way 134 // means we don't have to worry about peek double counting. 135 func (l *lexer) lineNumber() int { 136 return 1 + strings.Count(l.input[:l.lastPos], "\n") 137 } 138 139 // errorf returns an error token and terminates the scan by passing 140 // back a nil pointer that will be the next state, terminating l.nextItem. 141 func (l *lexer) errorf(format string, args ...interface{}) stateFn { 142 l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)} 143 return nil 144 } 145 146 // nextItem returns the next item from the input. 147 func (l *lexer) nextItem() item { 148 item := <-l.items 149 l.lastPos = item.pos 150 return item 151 } 152 153 // lex creates a new scanner for the input string. 154 func lex(input string) *lexer { 155 l := &lexer{ 156 input: input, 157 items: make(chan item), 158 } 159 go l.run() 160 return l 161 } 162 163 // run runs the state machine for the lexer. 164 func (l *lexer) run() { 165 for l.state = lexItem; l.state != nil; { 166 l.state = l.state(l) 167 } 168 } 169 170 // state functions 171 172 func lexItem(l *lexer) stateFn { 173 Loop: 174 for { 175 switch r := l.next(); { 176 case isSymbol(r): 177 return lexSymbol 178 case isNumber(r): 179 l.backup() 180 return lexNumber 181 case unicode.IsLetter(r): 182 return lexFunc 183 case r == '(': 184 l.emit(itemLeftParen) 185 case r == ')': 186 l.emit(itemRightParen) 187 case r == '[': 188 return lexPrefixBegin 189 case r == '"': 190 return lexString 191 case r == '\'': 192 return lexStringTripleBegin 193 case r == ',': 194 l.emit(itemComma) 195 case isSpace(r): 196 l.ignore() 197 case r == eof: 198 l.emit(itemEOF) 199 break Loop 200 default: 201 return l.errorf("invalid character: %s", string(r)) 202 } 203 } 204 return nil 205 } 206 207 // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This 208 // isn't a perfect number scanner - for instance it accepts "." and "0x0.2" 209 // and "089" - but when it's wrong the input is invalid and the parser (via 210 // strconv) will notice. 211 func lexNumber(l *lexer) stateFn { 212 if !l.scanNumber() { 213 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) 214 } 215 l.emit(itemNumber) 216 return lexItem 217 } 218 219 func (l *lexer) scanNumber() bool { 220 // Is it hex? 221 digits := "0123456789" 222 if l.accept("0") && l.accept("xX") { 223 digits = "0123456789abcdefABCDEF" 224 } 225 l.acceptRun(digits) 226 if l.accept(".") { 227 l.acceptRun(digits) 228 } 229 if l.accept("eE") { 230 l.accept("+-") 231 l.acceptRun("0123456789") 232 } 233 return true 234 } 235 236 const symbols = "!<>=&|+-*/%" 237 238 func lexSymbol(l *lexer) stateFn { 239 l.acceptRun(symbols) 240 s := l.input[l.start:l.pos] 241 switch s { 242 case "!": 243 l.emit(itemNot) 244 case "&&": 245 l.emit(itemAnd) 246 case "||": 247 l.emit(itemOr) 248 case ">": 249 l.emit(itemGreater) 250 case "<": 251 l.emit(itemLess) 252 case ">=": 253 l.emit(itemGreaterEq) 254 case "<=": 255 l.emit(itemLessEq) 256 case "==": 257 l.emit(itemEq) 258 case "!=": 259 l.emit(itemNotEq) 260 case "+": 261 l.emit(itemPlus) 262 case "-": 263 l.emit(itemMinus) 264 case "*": 265 l.emit(itemMult) 266 case "**": 267 l.emit(itemPow) 268 case "/": 269 l.emit(itemDiv) 270 case "%": 271 l.emit(itemMod) 272 default: 273 l.emit(itemError) 274 } 275 return lexItem 276 } 277 278 func lexFunc(l *lexer) stateFn { 279 for { 280 switch r := l.next(); { 281 case unicode.IsLetter(r): 282 // absorb 283 default: 284 l.backup() 285 if l.input[l.start:l.pos] == "expr" { 286 l.emit(itemExpr) 287 return lexItem 288 } 289 l.emit(itemFunc) 290 return lexItem 291 } 292 } 293 } 294 295 func lexString(l *lexer) stateFn { 296 for { 297 switch l.next() { 298 case '"': 299 l.emit(itemString) 300 return lexItem 301 case eof: 302 return l.errorf("unterminated string") 303 } 304 } 305 } 306 307 func lexPrefixBegin(l *lexer) stateFn { 308 for { 309 switch l.next() { 310 case '"': 311 return lexPrefixEnd 312 case eof: 313 return l.errorf("unterminated prefix string, must use double quotes e.g [\"foo\"]") 314 } 315 } 316 } 317 318 func lexPrefixEnd(l *lexer) stateFn { 319 for { 320 switch l.next() { 321 case '"': 322 if l.next() == ']' { 323 l.emit(itemPrefix) 324 return lexItem 325 } 326 case eof: 327 return l.errorf("unterminated prefix string, must use double quotes e.g [\"foo\"]") 328 } 329 } 330 } 331 332 func lexStringTripleBegin(l *lexer) stateFn { 333 for { 334 switch l.next() { 335 case '\'': 336 //Check for triple quoted string 337 if l.next() == '\'' { 338 return lexStringTripleEnd 339 } else { 340 l.backup() 341 } 342 return l.errorf("invalid start of string, must use double qutoes or triple single quotes") 343 case eof: 344 return l.errorf("unterminated string") 345 } 346 } 347 } 348 349 func lexStringTripleEnd(l *lexer) stateFn { 350 count := 0 351 for { 352 switch l.next() { 353 case '\'': 354 count++ 355 if count == 3 { 356 l.emit(itemTripleQuotedString) 357 return lexItem 358 } 359 case eof: 360 return l.errorf("unterminated string") 361 default: 362 count = 0 363 } 364 } 365 } 366 367 // isSpace reports whether r is a space character. 368 func isSpace(r rune) bool { 369 return unicode.IsSpace(r) 370 } 371 372 func isVarchar(r rune) bool { 373 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) 374 } 375 376 func isSymbol(r rune) bool { 377 return strings.IndexRune(symbols, r) != -1 378 } 379 380 func isNumber(r rune) bool { 381 return unicode.IsDigit(r) || r == '.' 382 }