github.com/huandu/go@v0.0.0-20151114150818-04e615e41150/src/text/template/parse/lex.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package parse 6 7 import ( 8 "fmt" 9 "strings" 10 "unicode" 11 "unicode/utf8" 12 ) 13 14 // item represents a token or text string returned from the scanner. 15 type item struct { 16 typ itemType // The type of this item. 17 pos Pos // The starting position, in bytes, of this item in the input string. 18 val string // The value of this item. 19 } 20 21 func (i item) String() string { 22 switch { 23 case i.typ == itemEOF: 24 return "EOF" 25 case i.typ == itemError: 26 return i.val 27 case i.typ > itemKeyword: 28 return fmt.Sprintf("<%s>", i.val) 29 case len(i.val) > 10: 30 return fmt.Sprintf("%.10q...", i.val) 31 } 32 return fmt.Sprintf("%q", i.val) 33 } 34 35 // itemType identifies the type of lex items. 36 type itemType int 37 38 const ( 39 itemError itemType = iota // error occurred; value is text of error 40 itemBool // boolean constant 41 itemChar // printable ASCII character; grab bag for comma etc. 42 itemCharConstant // character constant 43 itemComplex // complex constant (1+2i); imaginary is just a number 44 itemColonEquals // colon-equals (':=') introducing a declaration 45 itemEOF 46 itemField // alphanumeric identifier starting with '.' 47 itemIdentifier // alphanumeric identifier not starting with '.' 48 itemLeftDelim // left action delimiter 49 itemLeftParen // '(' inside action 50 itemNumber // simple number, including imaginary 51 itemPipe // pipe symbol 52 itemRawString // raw quoted string (includes quotes) 53 itemRightDelim // right action delimiter 54 itemRightParen // ')' inside action 55 itemSpace // run of spaces separating arguments 56 itemString // quoted string (includes quotes) 57 itemText // plain text 58 itemVariable // variable starting with '$', such as '$' or '$1' or '$hello' 59 // Keywords appear after all the rest. 60 itemKeyword // used only to delimit the keywords 61 itemDot // the cursor, spelled '.' 62 itemDefine // define keyword 63 itemElse // else keyword 64 itemEnd // end keyword 65 itemIf // if keyword 66 itemNil // the untyped nil constant, easiest to treat as a keyword 67 itemRange // range keyword 68 itemTemplate // template keyword 69 itemWith // with keyword 70 ) 71 72 var key = map[string]itemType{ 73 ".": itemDot, 74 "define": itemDefine, 75 "else": itemElse, 76 "end": itemEnd, 77 "if": itemIf, 78 "range": itemRange, 79 "nil": itemNil, 80 "template": itemTemplate, 81 "with": itemWith, 82 } 83 84 const eof = -1 85 86 // stateFn represents the state of the scanner as a function that returns the next state. 87 type stateFn func(*lexer) stateFn 88 89 // lexer holds the state of the scanner. 90 type lexer struct { 91 name string // the name of the input; used only for error reports 92 input string // the string being scanned 93 leftDelim string // start of action 94 rightDelim string // end of action 95 state stateFn // the next lexing function to enter 96 pos Pos // current position in the input 97 start Pos // start position of this item 98 width Pos // width of last rune read from input 99 lastPos Pos // position of most recent item returned by nextItem 100 items chan item // channel of scanned items 101 parenDepth int // nesting depth of ( ) exprs 102 } 103 104 // next returns the next rune in the input. 105 func (l *lexer) next() rune { 106 if int(l.pos) >= len(l.input) { 107 l.width = 0 108 return eof 109 } 110 r, w := utf8.DecodeRuneInString(l.input[l.pos:]) 111 l.width = Pos(w) 112 l.pos += l.width 113 return r 114 } 115 116 // peek returns but does not consume the next rune in the input. 117 func (l *lexer) peek() rune { 118 r := l.next() 119 l.backup() 120 return r 121 } 122 123 // backup steps back one rune. Can only be called once per call of next. 124 func (l *lexer) backup() { 125 l.pos -= l.width 126 } 127 128 // emit passes an item back to the client. 129 func (l *lexer) emit(t itemType) { 130 l.items <- item{t, l.start, l.input[l.start:l.pos]} 131 l.start = l.pos 132 } 133 134 // ignore skips over the pending input before this point. 135 func (l *lexer) ignore() { 136 l.start = l.pos 137 } 138 139 // accept consumes the next rune if it's from the valid set. 140 func (l *lexer) accept(valid string) bool { 141 if strings.IndexRune(valid, l.next()) >= 0 { 142 return true 143 } 144 l.backup() 145 return false 146 } 147 148 // acceptRun consumes a run of runes from the valid set. 149 func (l *lexer) acceptRun(valid string) { 150 for strings.IndexRune(valid, l.next()) >= 0 { 151 } 152 l.backup() 153 } 154 155 // lineNumber reports which line we're on, based on the position of 156 // the previous item returned by nextItem. Doing it this way 157 // means we don't have to worry about peek double counting. 158 func (l *lexer) lineNumber() int { 159 return 1 + strings.Count(l.input[:l.lastPos], "\n") 160 } 161 162 // errorf returns an error token and terminates the scan by passing 163 // back a nil pointer that will be the next state, terminating l.nextItem. 164 func (l *lexer) errorf(format string, args ...interface{}) stateFn { 165 l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)} 166 return nil 167 } 168 169 // nextItem returns the next item from the input. 170 // Called by the parser, not in the lexing goroutine. 171 func (l *lexer) nextItem() item { 172 item := <-l.items 173 l.lastPos = item.pos 174 return item 175 } 176 177 // drain drains the output so the lexing goroutine will exit. 178 // Called by the parser, not in the lexing goroutine. 179 func (l *lexer) drain() { 180 for range l.items { 181 } 182 } 183 184 // lex creates a new scanner for the input string. 185 func lex(name, input, left, right string) *lexer { 186 if left == "" { 187 left = leftDelim 188 } 189 if right == "" { 190 right = rightDelim 191 } 192 l := &lexer{ 193 name: name, 194 input: input, 195 leftDelim: left, 196 rightDelim: right, 197 items: make(chan item), 198 } 199 go l.run() 200 return l 201 } 202 203 // run runs the state machine for the lexer. 204 func (l *lexer) run() { 205 for l.state = lexText; l.state != nil; { 206 l.state = l.state(l) 207 } 208 close(l.items) 209 } 210 211 // state functions 212 213 const ( 214 leftDelim = "{{" 215 rightDelim = "}}" 216 leftComment = "/*" 217 rightComment = "*/" 218 ) 219 220 // lexText scans until an opening action delimiter, "{{". 221 func lexText(l *lexer) stateFn { 222 for { 223 if strings.HasPrefix(l.input[l.pos:], l.leftDelim) { 224 if l.pos > l.start { 225 l.emit(itemText) 226 } 227 return lexLeftDelim 228 } 229 if l.next() == eof { 230 break 231 } 232 } 233 // Correctly reached EOF. 234 if l.pos > l.start { 235 l.emit(itemText) 236 } 237 l.emit(itemEOF) 238 return nil 239 } 240 241 // lexLeftDelim scans the left delimiter, which is known to be present. 242 func lexLeftDelim(l *lexer) stateFn { 243 l.pos += Pos(len(l.leftDelim)) 244 if strings.HasPrefix(l.input[l.pos:], leftComment) { 245 return lexComment 246 } 247 l.emit(itemLeftDelim) 248 l.parenDepth = 0 249 return lexInsideAction 250 } 251 252 // lexComment scans a comment. The left comment marker is known to be present. 253 func lexComment(l *lexer) stateFn { 254 l.pos += Pos(len(leftComment)) 255 i := strings.Index(l.input[l.pos:], rightComment) 256 if i < 0 { 257 return l.errorf("unclosed comment") 258 } 259 l.pos += Pos(i + len(rightComment)) 260 if !strings.HasPrefix(l.input[l.pos:], l.rightDelim) { 261 return l.errorf("comment ends before closing delimiter") 262 263 } 264 l.pos += Pos(len(l.rightDelim)) 265 l.ignore() 266 return lexText 267 } 268 269 // lexRightDelim scans the right delimiter, which is known to be present. 270 func lexRightDelim(l *lexer) stateFn { 271 l.pos += Pos(len(l.rightDelim)) 272 l.emit(itemRightDelim) 273 return lexText 274 } 275 276 // lexInsideAction scans the elements inside action delimiters. 277 func lexInsideAction(l *lexer) stateFn { 278 // Either number, quoted string, or identifier. 279 // Spaces separate arguments; runs of spaces turn into itemSpace. 280 // Pipe symbols separate and are emitted. 281 if strings.HasPrefix(l.input[l.pos:], l.rightDelim) { 282 if l.parenDepth == 0 { 283 return lexRightDelim 284 } 285 return l.errorf("unclosed left paren") 286 } 287 switch r := l.next(); { 288 case r == eof || isEndOfLine(r): 289 return l.errorf("unclosed action") 290 case isSpace(r): 291 return lexSpace 292 case r == ':': 293 if l.next() != '=' { 294 return l.errorf("expected :=") 295 } 296 l.emit(itemColonEquals) 297 case r == '|': 298 l.emit(itemPipe) 299 case r == '"': 300 return lexQuote 301 case r == '`': 302 return lexRawQuote 303 case r == '$': 304 return lexVariable 305 case r == '\'': 306 return lexChar 307 case r == '.': 308 // special look-ahead for ".field" so we don't break l.backup(). 309 if l.pos < Pos(len(l.input)) { 310 r := l.input[l.pos] 311 if r < '0' || '9' < r { 312 return lexField 313 } 314 } 315 fallthrough // '.' can start a number. 316 case r == '+' || r == '-' || ('0' <= r && r <= '9'): 317 l.backup() 318 return lexNumber 319 case isAlphaNumeric(r): 320 l.backup() 321 return lexIdentifier 322 case r == '(': 323 l.emit(itemLeftParen) 324 l.parenDepth++ 325 case r == ')': 326 l.emit(itemRightParen) 327 l.parenDepth-- 328 if l.parenDepth < 0 { 329 return l.errorf("unexpected right paren %#U", r) 330 } 331 case r <= unicode.MaxASCII && unicode.IsPrint(r): 332 l.emit(itemChar) 333 return lexInsideAction 334 default: 335 return l.errorf("unrecognized character in action: %#U", r) 336 } 337 return lexInsideAction 338 } 339 340 // lexSpace scans a run of space characters. 341 // One space has already been seen. 342 func lexSpace(l *lexer) stateFn { 343 for isSpace(l.peek()) { 344 l.next() 345 } 346 l.emit(itemSpace) 347 return lexInsideAction 348 } 349 350 // lexIdentifier scans an alphanumeric. 351 func lexIdentifier(l *lexer) stateFn { 352 Loop: 353 for { 354 switch r := l.next(); { 355 case isAlphaNumeric(r): 356 // absorb. 357 default: 358 l.backup() 359 word := l.input[l.start:l.pos] 360 if !l.atTerminator() { 361 return l.errorf("bad character %#U", r) 362 } 363 switch { 364 case key[word] > itemKeyword: 365 l.emit(key[word]) 366 case word[0] == '.': 367 l.emit(itemField) 368 case word == "true", word == "false": 369 l.emit(itemBool) 370 default: 371 l.emit(itemIdentifier) 372 } 373 break Loop 374 } 375 } 376 return lexInsideAction 377 } 378 379 // lexField scans a field: .Alphanumeric. 380 // The . has been scanned. 381 func lexField(l *lexer) stateFn { 382 return lexFieldOrVariable(l, itemField) 383 } 384 385 // lexVariable scans a Variable: $Alphanumeric. 386 // The $ has been scanned. 387 func lexVariable(l *lexer) stateFn { 388 if l.atTerminator() { // Nothing interesting follows -> "$". 389 l.emit(itemVariable) 390 return lexInsideAction 391 } 392 return lexFieldOrVariable(l, itemVariable) 393 } 394 395 // lexVariable scans a field or variable: [.$]Alphanumeric. 396 // The . or $ has been scanned. 397 func lexFieldOrVariable(l *lexer, typ itemType) stateFn { 398 if l.atTerminator() { // Nothing interesting follows -> "." or "$". 399 if typ == itemVariable { 400 l.emit(itemVariable) 401 } else { 402 l.emit(itemDot) 403 } 404 return lexInsideAction 405 } 406 var r rune 407 for { 408 r = l.next() 409 if !isAlphaNumeric(r) { 410 l.backup() 411 break 412 } 413 } 414 if !l.atTerminator() { 415 return l.errorf("bad character %#U", r) 416 } 417 l.emit(typ) 418 return lexInsideAction 419 } 420 421 // atTerminator reports whether the input is at valid termination character to 422 // appear after an identifier. Breaks .X.Y into two pieces. Also catches cases 423 // like "$x+2" not being acceptable without a space, in case we decide one 424 // day to implement arithmetic. 425 func (l *lexer) atTerminator() bool { 426 r := l.peek() 427 if isSpace(r) || isEndOfLine(r) { 428 return true 429 } 430 switch r { 431 case eof, '.', ',', '|', ':', ')', '(': 432 return true 433 } 434 // Does r start the delimiter? This can be ambiguous (with delim=="//", $x/2 will 435 // succeed but should fail) but only in extremely rare cases caused by willfully 436 // bad choice of delimiter. 437 if rd, _ := utf8.DecodeRuneInString(l.rightDelim); rd == r { 438 return true 439 } 440 return false 441 } 442 443 // lexChar scans a character constant. The initial quote is already 444 // scanned. Syntax checking is done by the parser. 445 func lexChar(l *lexer) stateFn { 446 Loop: 447 for { 448 switch l.next() { 449 case '\\': 450 if r := l.next(); r != eof && r != '\n' { 451 break 452 } 453 fallthrough 454 case eof, '\n': 455 return l.errorf("unterminated character constant") 456 case '\'': 457 break Loop 458 } 459 } 460 l.emit(itemCharConstant) 461 return lexInsideAction 462 } 463 464 // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This 465 // isn't a perfect number scanner - for instance it accepts "." and "0x0.2" 466 // and "089" - but when it's wrong the input is invalid and the parser (via 467 // strconv) will notice. 468 func lexNumber(l *lexer) stateFn { 469 if !l.scanNumber() { 470 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) 471 } 472 if sign := l.peek(); sign == '+' || sign == '-' { 473 // Complex: 1+2i. No spaces, must end in 'i'. 474 if !l.scanNumber() || l.input[l.pos-1] != 'i' { 475 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) 476 } 477 l.emit(itemComplex) 478 } else { 479 l.emit(itemNumber) 480 } 481 return lexInsideAction 482 } 483 484 func (l *lexer) scanNumber() bool { 485 // Optional leading sign. 486 l.accept("+-") 487 // Is it hex? 488 digits := "0123456789" 489 if l.accept("0") && l.accept("xX") { 490 digits = "0123456789abcdefABCDEF" 491 } 492 l.acceptRun(digits) 493 if l.accept(".") { 494 l.acceptRun(digits) 495 } 496 if l.accept("eE") { 497 l.accept("+-") 498 l.acceptRun("0123456789") 499 } 500 // Is it imaginary? 501 l.accept("i") 502 // Next thing mustn't be alphanumeric. 503 if isAlphaNumeric(l.peek()) { 504 l.next() 505 return false 506 } 507 return true 508 } 509 510 // lexQuote scans a quoted string. 511 func lexQuote(l *lexer) stateFn { 512 Loop: 513 for { 514 switch l.next() { 515 case '\\': 516 if r := l.next(); r != eof && r != '\n' { 517 break 518 } 519 fallthrough 520 case eof, '\n': 521 return l.errorf("unterminated quoted string") 522 case '"': 523 break Loop 524 } 525 } 526 l.emit(itemString) 527 return lexInsideAction 528 } 529 530 // lexRawQuote scans a raw quoted string. 531 func lexRawQuote(l *lexer) stateFn { 532 Loop: 533 for { 534 switch l.next() { 535 case eof: 536 return l.errorf("unterminated raw quoted string") 537 case '`': 538 break Loop 539 } 540 } 541 l.emit(itemRawString) 542 return lexInsideAction 543 } 544 545 // isSpace reports whether r is a space character. 546 func isSpace(r rune) bool { 547 return r == ' ' || r == '\t' 548 } 549 550 // isEndOfLine reports whether r is an end-of-line character. 551 func isEndOfLine(r rune) bool { 552 return r == '\r' || r == '\n' 553 } 554 555 // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. 556 func isAlphaNumeric(r rune) bool { 557 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) 558 }