github.com/panjjo/go@v0.0.0-20161104043856-d62b31386338/src/text/template/parse/lex.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package parse 6 7 import ( 8 "fmt" 9 "strings" 10 "unicode" 11 "unicode/utf8" 12 ) 13 14 // item represents a token or text string returned from the scanner. 15 type item struct { 16 typ itemType // The type of this item. 17 pos Pos // The starting position, in bytes, of this item in the input string. 18 val string // The value of this item. 19 } 20 21 func (i item) String() string { 22 switch { 23 case i.typ == itemEOF: 24 return "EOF" 25 case i.typ == itemError: 26 return i.val 27 case i.typ > itemKeyword: 28 return fmt.Sprintf("<%s>", i.val) 29 case len(i.val) > 10: 30 return fmt.Sprintf("%.10q...", i.val) 31 } 32 return fmt.Sprintf("%q", i.val) 33 } 34 35 // itemType identifies the type of lex items. 36 type itemType int 37 38 const ( 39 itemError itemType = iota // error occurred; value is text of error 40 itemBool // boolean constant 41 itemChar // printable ASCII character; grab bag for comma etc. 42 itemCharConstant // character constant 43 itemComplex // complex constant (1+2i); imaginary is just a number 44 itemColonEquals // colon-equals (':=') introducing a declaration 45 itemEOF 46 itemField // alphanumeric identifier starting with '.' 47 itemIdentifier // alphanumeric identifier not starting with '.' 48 itemLeftDelim // left action delimiter 49 itemLeftParen // '(' inside action 50 itemNumber // simple number, including imaginary 51 itemPipe // pipe symbol 52 itemRawString // raw quoted string (includes quotes) 53 itemRightDelim // right action delimiter 54 itemRightParen // ')' inside action 55 itemSpace // run of spaces separating arguments 56 itemString // quoted string (includes quotes) 57 itemText // plain text 58 itemVariable // variable starting with '$', such as '$' or '$1' or '$hello' 59 // Keywords appear after all the rest. 60 itemKeyword // used only to delimit the keywords 61 itemBlock // block keyword 62 itemDot // the cursor, spelled '.' 63 itemDefine // define keyword 64 itemElse // else keyword 65 itemEnd // end keyword 66 itemIf // if keyword 67 itemNil // the untyped nil constant, easiest to treat as a keyword 68 itemRange // range keyword 69 itemTemplate // template keyword 70 itemWith // with keyword 71 ) 72 73 var key = map[string]itemType{ 74 ".": itemDot, 75 "block": itemBlock, 76 "define": itemDefine, 77 "else": itemElse, 78 "end": itemEnd, 79 "if": itemIf, 80 "range": itemRange, 81 "nil": itemNil, 82 "template": itemTemplate, 83 "with": itemWith, 84 } 85 86 const eof = -1 87 88 // Trimming spaces. 89 // If the action begins "{{- " rather than "{{", then all space/tab/newlines 90 // preceding the action are trimmed; conversely if it ends " -}}" the 91 // leading spaces are trimmed. This is done entirely in the lexer; the 92 // parser never sees it happen. We require an ASCII space to be 93 // present to avoid ambiguity with things like "{{-3}}". It reads 94 // better with the space present anyway. For simplicity, only ASCII 95 // space does the job. 96 const ( 97 spaceChars = " \t\r\n" // These are the space characters defined by Go itself. 98 leftTrimMarker = "- " // Attached to left delimiter, trims trailing spaces from preceding text. 99 rightTrimMarker = " -" // Attached to right delimiter, trims leading spaces from following text. 100 trimMarkerLen = Pos(len(leftTrimMarker)) 101 ) 102 103 // stateFn represents the state of the scanner as a function that returns the next state. 104 type stateFn func(*lexer) stateFn 105 106 // lexer holds the state of the scanner. 107 type lexer struct { 108 name string // the name of the input; used only for error reports 109 input string // the string being scanned 110 leftDelim string // start of action 111 rightDelim string // end of action 112 state stateFn // the next lexing function to enter 113 pos Pos // current position in the input 114 start Pos // start position of this item 115 width Pos // width of last rune read from input 116 lastPos Pos // position of most recent item returned by nextItem 117 items chan item // channel of scanned items 118 parenDepth int // nesting depth of ( ) exprs 119 } 120 121 // next returns the next rune in the input. 122 func (l *lexer) next() rune { 123 if int(l.pos) >= len(l.input) { 124 l.width = 0 125 return eof 126 } 127 r, w := utf8.DecodeRuneInString(l.input[l.pos:]) 128 l.width = Pos(w) 129 l.pos += l.width 130 return r 131 } 132 133 // peek returns but does not consume the next rune in the input. 134 func (l *lexer) peek() rune { 135 r := l.next() 136 l.backup() 137 return r 138 } 139 140 // backup steps back one rune. Can only be called once per call of next. 141 func (l *lexer) backup() { 142 l.pos -= l.width 143 } 144 145 // emit passes an item back to the client. 146 func (l *lexer) emit(t itemType) { 147 l.items <- item{t, l.start, l.input[l.start:l.pos]} 148 l.start = l.pos 149 } 150 151 // ignore skips over the pending input before this point. 152 func (l *lexer) ignore() { 153 l.start = l.pos 154 } 155 156 // accept consumes the next rune if it's from the valid set. 157 func (l *lexer) accept(valid string) bool { 158 if strings.ContainsRune(valid, l.next()) { 159 return true 160 } 161 l.backup() 162 return false 163 } 164 165 // acceptRun consumes a run of runes from the valid set. 166 func (l *lexer) acceptRun(valid string) { 167 for strings.ContainsRune(valid, l.next()) { 168 } 169 l.backup() 170 } 171 172 // lineNumber reports which line we're on, based on the position of 173 // the previous item returned by nextItem. Doing it this way 174 // means we don't have to worry about peek double counting. 175 func (l *lexer) lineNumber() int { 176 return 1 + strings.Count(l.input[:l.lastPos], "\n") 177 } 178 179 // errorf returns an error token and terminates the scan by passing 180 // back a nil pointer that will be the next state, terminating l.nextItem. 181 func (l *lexer) errorf(format string, args ...interface{}) stateFn { 182 l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)} 183 return nil 184 } 185 186 // nextItem returns the next item from the input. 187 // Called by the parser, not in the lexing goroutine. 188 func (l *lexer) nextItem() item { 189 item := <-l.items 190 l.lastPos = item.pos 191 return item 192 } 193 194 // drain drains the output so the lexing goroutine will exit. 195 // Called by the parser, not in the lexing goroutine. 196 func (l *lexer) drain() { 197 for range l.items { 198 } 199 } 200 201 // lex creates a new scanner for the input string. 202 func lex(name, input, left, right string) *lexer { 203 if left == "" { 204 left = leftDelim 205 } 206 if right == "" { 207 right = rightDelim 208 } 209 l := &lexer{ 210 name: name, 211 input: input, 212 leftDelim: left, 213 rightDelim: right, 214 items: make(chan item), 215 } 216 go l.run() 217 return l 218 } 219 220 // run runs the state machine for the lexer. 221 func (l *lexer) run() { 222 for l.state = lexText; l.state != nil; { 223 l.state = l.state(l) 224 } 225 close(l.items) 226 } 227 228 // state functions 229 230 const ( 231 leftDelim = "{{" 232 rightDelim = "}}" 233 leftComment = "/*" 234 rightComment = "*/" 235 ) 236 237 // lexText scans until an opening action delimiter, "{{". 238 func lexText(l *lexer) stateFn { 239 l.width = 0 240 if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 { 241 ldn := Pos(len(l.leftDelim)) 242 l.pos += Pos(x) 243 trimLength := Pos(0) 244 if strings.HasPrefix(l.input[l.pos+ldn:], leftTrimMarker) { 245 trimLength = rightTrimLength(l.input[l.start:l.pos]) 246 } 247 l.pos -= trimLength 248 if l.pos > l.start { 249 l.emit(itemText) 250 } 251 l.pos += trimLength 252 l.ignore() 253 return lexLeftDelim 254 } else { 255 l.pos = Pos(len(l.input)) 256 } 257 // Correctly reached EOF. 258 if l.pos > l.start { 259 l.emit(itemText) 260 } 261 l.emit(itemEOF) 262 return nil 263 } 264 265 // rightTrimLength returns the length of the spaces at the end of the string. 266 func rightTrimLength(s string) Pos { 267 return Pos(len(s) - len(strings.TrimRight(s, spaceChars))) 268 } 269 270 // atRightDelim reports whether the lexer is at a right delimiter, possibly preceded by a trim marker. 271 func (l *lexer) atRightDelim() (delim, trimSpaces bool) { 272 if strings.HasPrefix(l.input[l.pos:], l.rightDelim) { 273 return true, false 274 } 275 // The right delim might have the marker before. 276 if strings.HasPrefix(l.input[l.pos:], rightTrimMarker) { 277 if strings.HasPrefix(l.input[l.pos+trimMarkerLen:], l.rightDelim) { 278 return true, true 279 } 280 } 281 return false, false 282 } 283 284 // leftTrimLength returns the length of the spaces at the beginning of the string. 285 func leftTrimLength(s string) Pos { 286 return Pos(len(s) - len(strings.TrimLeft(s, spaceChars))) 287 } 288 289 // lexLeftDelim scans the left delimiter, which is known to be present, possibly with a trim marker. 290 func lexLeftDelim(l *lexer) stateFn { 291 l.pos += Pos(len(l.leftDelim)) 292 trimSpace := strings.HasPrefix(l.input[l.pos:], leftTrimMarker) 293 afterMarker := Pos(0) 294 if trimSpace { 295 afterMarker = trimMarkerLen 296 } 297 if strings.HasPrefix(l.input[l.pos+afterMarker:], leftComment) { 298 l.pos += afterMarker 299 l.ignore() 300 return lexComment 301 } 302 l.emit(itemLeftDelim) 303 l.pos += afterMarker 304 l.ignore() 305 l.parenDepth = 0 306 return lexInsideAction 307 } 308 309 // lexComment scans a comment. The left comment marker is known to be present. 310 func lexComment(l *lexer) stateFn { 311 l.pos += Pos(len(leftComment)) 312 i := strings.Index(l.input[l.pos:], rightComment) 313 if i < 0 { 314 return l.errorf("unclosed comment") 315 } 316 l.pos += Pos(i + len(rightComment)) 317 delim, trimSpace := l.atRightDelim() 318 if !delim { 319 return l.errorf("comment ends before closing delimiter") 320 } 321 if trimSpace { 322 l.pos += trimMarkerLen 323 } 324 l.pos += Pos(len(l.rightDelim)) 325 if trimSpace { 326 l.pos += leftTrimLength(l.input[l.pos:]) 327 } 328 l.ignore() 329 return lexText 330 } 331 332 // lexRightDelim scans the right delimiter, which is known to be present, possibly with a trim marker. 333 func lexRightDelim(l *lexer) stateFn { 334 trimSpace := strings.HasPrefix(l.input[l.pos:], rightTrimMarker) 335 if trimSpace { 336 l.pos += trimMarkerLen 337 l.ignore() 338 } 339 l.pos += Pos(len(l.rightDelim)) 340 l.emit(itemRightDelim) 341 if trimSpace { 342 l.pos += leftTrimLength(l.input[l.pos:]) 343 l.ignore() 344 } 345 return lexText 346 } 347 348 // lexInsideAction scans the elements inside action delimiters. 349 func lexInsideAction(l *lexer) stateFn { 350 // Either number, quoted string, or identifier. 351 // Spaces separate arguments; runs of spaces turn into itemSpace. 352 // Pipe symbols separate and are emitted. 353 delim, _ := l.atRightDelim() 354 if delim { 355 if l.parenDepth == 0 { 356 return lexRightDelim 357 } 358 return l.errorf("unclosed left paren") 359 } 360 switch r := l.next(); { 361 case r == eof || isEndOfLine(r): 362 return l.errorf("unclosed action") 363 case isSpace(r): 364 return lexSpace 365 case r == ':': 366 if l.next() != '=' { 367 return l.errorf("expected :=") 368 } 369 l.emit(itemColonEquals) 370 case r == '|': 371 l.emit(itemPipe) 372 case r == '"': 373 return lexQuote 374 case r == '`': 375 return lexRawQuote 376 case r == '$': 377 return lexVariable 378 case r == '\'': 379 return lexChar 380 case r == '.': 381 // special look-ahead for ".field" so we don't break l.backup(). 382 if l.pos < Pos(len(l.input)) { 383 r := l.input[l.pos] 384 if r < '0' || '9' < r { 385 return lexField 386 } 387 } 388 fallthrough // '.' can start a number. 389 case r == '+' || r == '-' || ('0' <= r && r <= '9'): 390 l.backup() 391 return lexNumber 392 case isAlphaNumeric(r): 393 l.backup() 394 return lexIdentifier 395 case r == '(': 396 l.emit(itemLeftParen) 397 l.parenDepth++ 398 case r == ')': 399 l.emit(itemRightParen) 400 l.parenDepth-- 401 if l.parenDepth < 0 { 402 return l.errorf("unexpected right paren %#U", r) 403 } 404 case r <= unicode.MaxASCII && unicode.IsPrint(r): 405 l.emit(itemChar) 406 return lexInsideAction 407 default: 408 return l.errorf("unrecognized character in action: %#U", r) 409 } 410 return lexInsideAction 411 } 412 413 // lexSpace scans a run of space characters. 414 // One space has already been seen. 415 func lexSpace(l *lexer) stateFn { 416 for isSpace(l.peek()) { 417 l.next() 418 } 419 l.emit(itemSpace) 420 return lexInsideAction 421 } 422 423 // lexIdentifier scans an alphanumeric. 424 func lexIdentifier(l *lexer) stateFn { 425 Loop: 426 for { 427 switch r := l.next(); { 428 case isAlphaNumeric(r): 429 // absorb. 430 default: 431 l.backup() 432 word := l.input[l.start:l.pos] 433 if !l.atTerminator() { 434 return l.errorf("bad character %#U", r) 435 } 436 switch { 437 case key[word] > itemKeyword: 438 l.emit(key[word]) 439 case word[0] == '.': 440 l.emit(itemField) 441 case word == "true", word == "false": 442 l.emit(itemBool) 443 default: 444 l.emit(itemIdentifier) 445 } 446 break Loop 447 } 448 } 449 return lexInsideAction 450 } 451 452 // lexField scans a field: .Alphanumeric. 453 // The . has been scanned. 454 func lexField(l *lexer) stateFn { 455 return lexFieldOrVariable(l, itemField) 456 } 457 458 // lexVariable scans a Variable: $Alphanumeric. 459 // The $ has been scanned. 460 func lexVariable(l *lexer) stateFn { 461 if l.atTerminator() { // Nothing interesting follows -> "$". 462 l.emit(itemVariable) 463 return lexInsideAction 464 } 465 return lexFieldOrVariable(l, itemVariable) 466 } 467 468 // lexVariable scans a field or variable: [.$]Alphanumeric. 469 // The . or $ has been scanned. 470 func lexFieldOrVariable(l *lexer, typ itemType) stateFn { 471 if l.atTerminator() { // Nothing interesting follows -> "." or "$". 472 if typ == itemVariable { 473 l.emit(itemVariable) 474 } else { 475 l.emit(itemDot) 476 } 477 return lexInsideAction 478 } 479 var r rune 480 for { 481 r = l.next() 482 if !isAlphaNumeric(r) { 483 l.backup() 484 break 485 } 486 } 487 if !l.atTerminator() { 488 return l.errorf("bad character %#U", r) 489 } 490 l.emit(typ) 491 return lexInsideAction 492 } 493 494 // atTerminator reports whether the input is at valid termination character to 495 // appear after an identifier. Breaks .X.Y into two pieces. Also catches cases 496 // like "$x+2" not being acceptable without a space, in case we decide one 497 // day to implement arithmetic. 498 func (l *lexer) atTerminator() bool { 499 r := l.peek() 500 if isSpace(r) || isEndOfLine(r) { 501 return true 502 } 503 switch r { 504 case eof, '.', ',', '|', ':', ')', '(': 505 return true 506 } 507 // Does r start the delimiter? This can be ambiguous (with delim=="//", $x/2 will 508 // succeed but should fail) but only in extremely rare cases caused by willfully 509 // bad choice of delimiter. 510 if rd, _ := utf8.DecodeRuneInString(l.rightDelim); rd == r { 511 return true 512 } 513 return false 514 } 515 516 // lexChar scans a character constant. The initial quote is already 517 // scanned. Syntax checking is done by the parser. 518 func lexChar(l *lexer) stateFn { 519 Loop: 520 for { 521 switch l.next() { 522 case '\\': 523 if r := l.next(); r != eof && r != '\n' { 524 break 525 } 526 fallthrough 527 case eof, '\n': 528 return l.errorf("unterminated character constant") 529 case '\'': 530 break Loop 531 } 532 } 533 l.emit(itemCharConstant) 534 return lexInsideAction 535 } 536 537 // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This 538 // isn't a perfect number scanner - for instance it accepts "." and "0x0.2" 539 // and "089" - but when it's wrong the input is invalid and the parser (via 540 // strconv) will notice. 541 func lexNumber(l *lexer) stateFn { 542 if !l.scanNumber() { 543 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) 544 } 545 if sign := l.peek(); sign == '+' || sign == '-' { 546 // Complex: 1+2i. No spaces, must end in 'i'. 547 if !l.scanNumber() || l.input[l.pos-1] != 'i' { 548 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) 549 } 550 l.emit(itemComplex) 551 } else { 552 l.emit(itemNumber) 553 } 554 return lexInsideAction 555 } 556 557 func (l *lexer) scanNumber() bool { 558 // Optional leading sign. 559 l.accept("+-") 560 // Is it hex? 561 digits := "0123456789" 562 if l.accept("0") && l.accept("xX") { 563 digits = "0123456789abcdefABCDEF" 564 } 565 l.acceptRun(digits) 566 if l.accept(".") { 567 l.acceptRun(digits) 568 } 569 if l.accept("eE") { 570 l.accept("+-") 571 l.acceptRun("0123456789") 572 } 573 // Is it imaginary? 574 l.accept("i") 575 // Next thing mustn't be alphanumeric. 576 if isAlphaNumeric(l.peek()) { 577 l.next() 578 return false 579 } 580 return true 581 } 582 583 // lexQuote scans a quoted string. 584 func lexQuote(l *lexer) stateFn { 585 Loop: 586 for { 587 switch l.next() { 588 case '\\': 589 if r := l.next(); r != eof && r != '\n' { 590 break 591 } 592 fallthrough 593 case eof, '\n': 594 return l.errorf("unterminated quoted string") 595 case '"': 596 break Loop 597 } 598 } 599 l.emit(itemString) 600 return lexInsideAction 601 } 602 603 // lexRawQuote scans a raw quoted string. 604 func lexRawQuote(l *lexer) stateFn { 605 Loop: 606 for { 607 switch l.next() { 608 case eof: 609 return l.errorf("unterminated raw quoted string") 610 case '`': 611 break Loop 612 } 613 } 614 l.emit(itemRawString) 615 return lexInsideAction 616 } 617 618 // isSpace reports whether r is a space character. 619 func isSpace(r rune) bool { 620 return r == ' ' || r == '\t' 621 } 622 623 // isEndOfLine reports whether r is an end-of-line character. 624 func isEndOfLine(r rune) bool { 625 return r == '\r' || r == '\n' 626 } 627 628 // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. 629 func isAlphaNumeric(r rune) bool { 630 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) 631 }