github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/text/template/parse/lex.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package parse 6 7 import ( 8 "fmt" 9 "strings" 10 "unicode" 11 "unicode/utf8" 12 ) 13 14 // item represents a token or text string returned from the scanner. 15 type item struct { 16 typ itemType // The type of this item. 17 pos Pos // The starting position, in bytes, of this item in the input string. 18 val string // The value of this item. 19 line int // The line number at the start of this item. 20 } 21 22 func (i item) String() string { 23 switch { 24 case i.typ == itemEOF: 25 return "EOF" 26 case i.typ == itemError: 27 return i.val 28 case i.typ > itemKeyword: 29 return fmt.Sprintf("<%s>", i.val) 30 case len(i.val) > 10: 31 return fmt.Sprintf("%.10q...", i.val) 32 } 33 return fmt.Sprintf("%q", i.val) 34 } 35 36 // itemType identifies the type of lex items. 37 type itemType int 38 39 const ( 40 itemError itemType = iota // error occurred; value is text of error 41 itemBool // boolean constant 42 itemChar // printable ASCII character; grab bag for comma etc. 43 itemCharConstant // character constant 44 itemComment // comment text 45 itemComplex // complex constant (1+2i); imaginary is just a number 46 itemAssign // equals ('=') introducing an assignment 47 itemDeclare // colon-equals (':=') introducing a declaration 48 itemEOF 49 itemField // alphanumeric identifier starting with '.' 50 itemIdentifier // alphanumeric identifier not starting with '.' 51 itemLeftDelim // left action delimiter 52 itemLeftParen // '(' inside action 53 itemNumber // simple number, including imaginary 54 itemPipe // pipe symbol 55 itemRawString // raw quoted string (includes quotes) 56 itemRightDelim // right action delimiter 57 itemRightParen // ')' inside action 58 itemSpace // run of spaces separating arguments 59 itemString // quoted string (includes quotes) 60 itemText // plain text 61 itemVariable // variable starting with '$', such as '$' or '$1' or '$hello' 62 // Keywords appear after all the rest. 63 itemKeyword // used only to delimit the keywords 64 itemBlock // block keyword 65 itemDot // the cursor, spelled '.' 66 itemDefine // define keyword 67 itemElse // else keyword 68 itemEnd // end keyword 69 itemIf // if keyword 70 itemNil // the untyped nil constant, easiest to treat as a keyword 71 itemRange // range keyword 72 itemTemplate // template keyword 73 itemWith // with keyword 74 ) 75 76 var key = map[string]itemType{ 77 ".": itemDot, 78 "block": itemBlock, 79 "define": itemDefine, 80 "else": itemElse, 81 "end": itemEnd, 82 "if": itemIf, 83 "range": itemRange, 84 "nil": itemNil, 85 "template": itemTemplate, 86 "with": itemWith, 87 } 88 89 const eof = -1 90 91 // Trimming spaces. 92 // If the action begins "{{- " rather than "{{", then all space/tab/newlines 93 // preceding the action are trimmed; conversely if it ends " -}}" the 94 // leading spaces are trimmed. This is done entirely in the lexer; the 95 // parser never sees it happen. We require an ASCII space (' ', \t, \r, \n) 96 // to be present to avoid ambiguity with things like "{{-3}}". It reads 97 // better with the space present anyway. For simplicity, only ASCII 98 // does the job. 99 const ( 100 spaceChars = " \t\r\n" // These are the space characters defined by Go itself. 101 trimMarker = '-' // Attached to left/right delimiter, trims trailing spaces from preceding/following text. 102 trimMarkerLen = Pos(1 + 1) // marker plus space before or after 103 ) 104 105 // stateFn represents the state of the scanner as a function that returns the next state. 106 type stateFn func(*lexer) stateFn 107 108 // lexer holds the state of the scanner. 109 type lexer struct { 110 name string // the name of the input; used only for error reports 111 input string // the string being scanned 112 leftDelim string // start of action 113 rightDelim string // end of action 114 emitComment bool // emit itemComment tokens. 115 pos Pos // current position in the input 116 start Pos // start position of this item 117 width Pos // width of last rune read from input 118 items chan item // channel of scanned items 119 parenDepth int // nesting depth of ( ) exprs 120 line int // 1+number of newlines seen 121 startLine int // start line of this item 122 } 123 124 // next returns the next rune in the input. 125 func (l *lexer) next() rune { 126 if int(l.pos) >= len(l.input) { 127 l.width = 0 128 return eof 129 } 130 r, w := utf8.DecodeRuneInString(l.input[l.pos:]) 131 l.width = Pos(w) 132 l.pos += l.width 133 if r == '\n' { 134 l.line++ 135 } 136 return r 137 } 138 139 // peek returns but does not consume the next rune in the input. 140 func (l *lexer) peek() rune { 141 r := l.next() 142 l.backup() 143 return r 144 } 145 146 // backup steps back one rune. Can only be called once per call of next. 147 func (l *lexer) backup() { 148 l.pos -= l.width 149 // Correct newline count. 150 if l.width == 1 && l.input[l.pos] == '\n' { 151 l.line-- 152 } 153 } 154 155 // emit passes an item back to the client. 156 func (l *lexer) emit(t itemType) { 157 l.items <- item{t, l.start, l.input[l.start:l.pos], l.startLine} 158 l.start = l.pos 159 l.startLine = l.line 160 } 161 162 // ignore skips over the pending input before this point. 163 func (l *lexer) ignore() { 164 l.line += strings.Count(l.input[l.start:l.pos], "\n") 165 l.start = l.pos 166 l.startLine = l.line 167 } 168 169 // accept consumes the next rune if it's from the valid set. 170 func (l *lexer) accept(valid string) bool { 171 if strings.ContainsRune(valid, l.next()) { 172 return true 173 } 174 l.backup() 175 return false 176 } 177 178 // acceptRun consumes a run of runes from the valid set. 179 func (l *lexer) acceptRun(valid string) { 180 for strings.ContainsRune(valid, l.next()) { 181 } 182 l.backup() 183 } 184 185 // errorf returns an error token and terminates the scan by passing 186 // back a nil pointer that will be the next state, terminating l.nextItem. 187 func (l *lexer) errorf(format string, args ...interface{}) stateFn { 188 l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine} 189 return nil 190 } 191 192 // nextItem returns the next item from the input. 193 // Called by the parser, not in the lexing goroutine. 194 func (l *lexer) nextItem() item { 195 return <-l.items 196 } 197 198 // drain drains the output so the lexing goroutine will exit. 199 // Called by the parser, not in the lexing goroutine. 200 func (l *lexer) drain() { 201 for range l.items { 202 } 203 } 204 205 // lex creates a new scanner for the input string. 206 func lex(name, input, left, right string, emitComment bool) *lexer { 207 if left == "" { 208 left = leftDelim 209 } 210 if right == "" { 211 right = rightDelim 212 } 213 l := &lexer{ 214 name: name, 215 input: input, 216 leftDelim: left, 217 rightDelim: right, 218 emitComment: emitComment, 219 items: make(chan item), 220 line: 1, 221 startLine: 1, 222 } 223 go l.run() 224 return l 225 } 226 227 // run runs the state machine for the lexer. 228 func (l *lexer) run() { 229 for state := lexText; state != nil; { 230 state = state(l) 231 } 232 close(l.items) 233 } 234 235 // state functions 236 237 const ( 238 leftDelim = "{{" 239 rightDelim = "}}" 240 leftComment = "/*" 241 rightComment = "*/" 242 ) 243 244 // lexText scans until an opening action delimiter, "{{". 245 func lexText(l *lexer) stateFn { 246 l.width = 0 247 if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 { 248 ldn := Pos(len(l.leftDelim)) 249 l.pos += Pos(x) 250 trimLength := Pos(0) 251 if hasLeftTrimMarker(l.input[l.pos+ldn:]) { 252 trimLength = rightTrimLength(l.input[l.start:l.pos]) 253 } 254 l.pos -= trimLength 255 if l.pos > l.start { 256 l.line += strings.Count(l.input[l.start:l.pos], "\n") 257 l.emit(itemText) 258 } 259 l.pos += trimLength 260 l.ignore() 261 return lexLeftDelim 262 } 263 l.pos = Pos(len(l.input)) 264 // Correctly reached EOF. 265 if l.pos > l.start { 266 l.line += strings.Count(l.input[l.start:l.pos], "\n") 267 l.emit(itemText) 268 } 269 l.emit(itemEOF) 270 return nil 271 } 272 273 // rightTrimLength returns the length of the spaces at the end of the string. 274 func rightTrimLength(s string) Pos { 275 return Pos(len(s) - len(strings.TrimRight(s, spaceChars))) 276 } 277 278 // atRightDelim reports whether the lexer is at a right delimiter, possibly preceded by a trim marker. 279 func (l *lexer) atRightDelim() (delim, trimSpaces bool) { 280 if hasRightTrimMarker(l.input[l.pos:]) && strings.HasPrefix(l.input[l.pos+trimMarkerLen:], l.rightDelim) { // With trim marker. 281 return true, true 282 } 283 if strings.HasPrefix(l.input[l.pos:], l.rightDelim) { // Without trim marker. 284 return true, false 285 } 286 return false, false 287 } 288 289 // leftTrimLength returns the length of the spaces at the beginning of the string. 290 func leftTrimLength(s string) Pos { 291 return Pos(len(s) - len(strings.TrimLeft(s, spaceChars))) 292 } 293 294 // lexLeftDelim scans the left delimiter, which is known to be present, possibly with a trim marker. 295 func lexLeftDelim(l *lexer) stateFn { 296 l.pos += Pos(len(l.leftDelim)) 297 trimSpace := hasLeftTrimMarker(l.input[l.pos:]) 298 afterMarker := Pos(0) 299 if trimSpace { 300 afterMarker = trimMarkerLen 301 } 302 if strings.HasPrefix(l.input[l.pos+afterMarker:], leftComment) { 303 l.pos += afterMarker 304 l.ignore() 305 return lexComment 306 } 307 l.emit(itemLeftDelim) 308 l.pos += afterMarker 309 l.ignore() 310 l.parenDepth = 0 311 return lexInsideAction 312 } 313 314 // lexComment scans a comment. The left comment marker is known to be present. 315 func lexComment(l *lexer) stateFn { 316 l.pos += Pos(len(leftComment)) 317 i := strings.Index(l.input[l.pos:], rightComment) 318 if i < 0 { 319 return l.errorf("unclosed comment") 320 } 321 l.pos += Pos(i + len(rightComment)) 322 delim, trimSpace := l.atRightDelim() 323 if !delim { 324 return l.errorf("comment ends before closing delimiter") 325 } 326 if l.emitComment { 327 l.emit(itemComment) 328 } 329 if trimSpace { 330 l.pos += trimMarkerLen 331 } 332 l.pos += Pos(len(l.rightDelim)) 333 if trimSpace { 334 l.pos += leftTrimLength(l.input[l.pos:]) 335 } 336 l.ignore() 337 return lexText 338 } 339 340 // lexRightDelim scans the right delimiter, which is known to be present, possibly with a trim marker. 341 func lexRightDelim(l *lexer) stateFn { 342 trimSpace := hasRightTrimMarker(l.input[l.pos:]) 343 if trimSpace { 344 l.pos += trimMarkerLen 345 l.ignore() 346 } 347 l.pos += Pos(len(l.rightDelim)) 348 l.emit(itemRightDelim) 349 if trimSpace { 350 l.pos += leftTrimLength(l.input[l.pos:]) 351 l.ignore() 352 } 353 return lexText 354 } 355 356 // lexInsideAction scans the elements inside action delimiters. 357 func lexInsideAction(l *lexer) stateFn { 358 // Either number, quoted string, or identifier. 359 // Spaces separate arguments; runs of spaces turn into itemSpace. 360 // Pipe symbols separate and are emitted. 361 delim, _ := l.atRightDelim() 362 if delim { 363 if l.parenDepth == 0 { 364 return lexRightDelim 365 } 366 return l.errorf("unclosed left paren") 367 } 368 switch r := l.next(); { 369 case r == eof: 370 return l.errorf("unclosed action") 371 case isSpace(r): 372 l.backup() // Put space back in case we have " -}}". 373 return lexSpace 374 case r == '=': 375 l.emit(itemAssign) 376 case r == ':': 377 if l.next() != '=' { 378 return l.errorf("expected :=") 379 } 380 l.emit(itemDeclare) 381 case r == '|': 382 l.emit(itemPipe) 383 case r == '"': 384 return lexQuote 385 case r == '`': 386 return lexRawQuote 387 case r == '$': 388 return lexVariable 389 case r == '\'': 390 return lexChar 391 case r == '.': 392 // special look-ahead for ".field" so we don't break l.backup(). 393 if l.pos < Pos(len(l.input)) { 394 r := l.input[l.pos] 395 if r < '0' || '9' < r { 396 return lexField 397 } 398 } 399 fallthrough // '.' can start a number. 400 case r == '+' || r == '-' || ('0' <= r && r <= '9'): 401 l.backup() 402 return lexNumber 403 case isAlphaNumeric(r): 404 l.backup() 405 return lexIdentifier 406 case r == '(': 407 l.emit(itemLeftParen) 408 l.parenDepth++ 409 case r == ')': 410 l.emit(itemRightParen) 411 l.parenDepth-- 412 if l.parenDepth < 0 { 413 return l.errorf("unexpected right paren %#U", r) 414 } 415 case r <= unicode.MaxASCII && unicode.IsPrint(r): 416 l.emit(itemChar) 417 default: 418 return l.errorf("unrecognized character in action: %#U", r) 419 } 420 return lexInsideAction 421 } 422 423 // lexSpace scans a run of space characters. 424 // We have not consumed the first space, which is known to be present. 425 // Take care if there is a trim-marked right delimiter, which starts with a space. 426 func lexSpace(l *lexer) stateFn { 427 var r rune 428 var numSpaces int 429 for { 430 r = l.peek() 431 if !isSpace(r) { 432 break 433 } 434 l.next() 435 numSpaces++ 436 } 437 // Be careful about a trim-marked closing delimiter, which has a minus 438 // after a space. We know there is a space, so check for the '-' that might follow. 439 if hasRightTrimMarker(l.input[l.pos-1:]) && strings.HasPrefix(l.input[l.pos-1+trimMarkerLen:], l.rightDelim) { 440 l.backup() // Before the space. 441 if numSpaces == 1 { 442 return lexRightDelim // On the delim, so go right to that. 443 } 444 } 445 l.emit(itemSpace) 446 return lexInsideAction 447 } 448 449 // lexIdentifier scans an alphanumeric. 450 func lexIdentifier(l *lexer) stateFn { 451 Loop: 452 for { 453 switch r := l.next(); { 454 case isAlphaNumeric(r): 455 // absorb. 456 default: 457 l.backup() 458 word := l.input[l.start:l.pos] 459 if !l.atTerminator() { 460 return l.errorf("bad character %#U", r) 461 } 462 switch { 463 case key[word] > itemKeyword: 464 l.emit(key[word]) 465 case word[0] == '.': 466 l.emit(itemField) 467 case word == "true", word == "false": 468 l.emit(itemBool) 469 default: 470 l.emit(itemIdentifier) 471 } 472 break Loop 473 } 474 } 475 return lexInsideAction 476 } 477 478 // lexField scans a field: .Alphanumeric. 479 // The . has been scanned. 480 func lexField(l *lexer) stateFn { 481 return lexFieldOrVariable(l, itemField) 482 } 483 484 // lexVariable scans a Variable: $Alphanumeric. 485 // The $ has been scanned. 486 func lexVariable(l *lexer) stateFn { 487 if l.atTerminator() { // Nothing interesting follows -> "$". 488 l.emit(itemVariable) 489 return lexInsideAction 490 } 491 return lexFieldOrVariable(l, itemVariable) 492 } 493 494 // lexVariable scans a field or variable: [.$]Alphanumeric. 495 // The . or $ has been scanned. 496 func lexFieldOrVariable(l *lexer, typ itemType) stateFn { 497 if l.atTerminator() { // Nothing interesting follows -> "." or "$". 498 if typ == itemVariable { 499 l.emit(itemVariable) 500 } else { 501 l.emit(itemDot) 502 } 503 return lexInsideAction 504 } 505 var r rune 506 for { 507 r = l.next() 508 if !isAlphaNumeric(r) { 509 l.backup() 510 break 511 } 512 } 513 if !l.atTerminator() { 514 return l.errorf("bad character %#U", r) 515 } 516 l.emit(typ) 517 return lexInsideAction 518 } 519 520 // atTerminator reports whether the input is at valid termination character to 521 // appear after an identifier. Breaks .X.Y into two pieces. Also catches cases 522 // like "$x+2" not being acceptable without a space, in case we decide one 523 // day to implement arithmetic. 524 func (l *lexer) atTerminator() bool { 525 r := l.peek() 526 if isSpace(r) { 527 return true 528 } 529 switch r { 530 case eof, '.', ',', '|', ':', ')', '(': 531 return true 532 } 533 // Does r start the delimiter? This can be ambiguous (with delim=="//", $x/2 will 534 // succeed but should fail) but only in extremely rare cases caused by willfully 535 // bad choice of delimiter. 536 if rd, _ := utf8.DecodeRuneInString(l.rightDelim); rd == r { 537 return true 538 } 539 return false 540 } 541 542 // lexChar scans a character constant. The initial quote is already 543 // scanned. Syntax checking is done by the parser. 544 func lexChar(l *lexer) stateFn { 545 Loop: 546 for { 547 switch l.next() { 548 case '\\': 549 if r := l.next(); r != eof && r != '\n' { 550 break 551 } 552 fallthrough 553 case eof, '\n': 554 return l.errorf("unterminated character constant") 555 case '\'': 556 break Loop 557 } 558 } 559 l.emit(itemCharConstant) 560 return lexInsideAction 561 } 562 563 // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This 564 // isn't a perfect number scanner - for instance it accepts "." and "0x0.2" 565 // and "089" - but when it's wrong the input is invalid and the parser (via 566 // strconv) will notice. 567 func lexNumber(l *lexer) stateFn { 568 if !l.scanNumber() { 569 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) 570 } 571 if sign := l.peek(); sign == '+' || sign == '-' { 572 // Complex: 1+2i. No spaces, must end in 'i'. 573 if !l.scanNumber() || l.input[l.pos-1] != 'i' { 574 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) 575 } 576 l.emit(itemComplex) 577 } else { 578 l.emit(itemNumber) 579 } 580 return lexInsideAction 581 } 582 583 func (l *lexer) scanNumber() bool { 584 // Optional leading sign. 585 l.accept("+-") 586 // Is it hex? 587 digits := "0123456789_" 588 if l.accept("0") { 589 // Note: Leading 0 does not mean octal in floats. 590 if l.accept("xX") { 591 digits = "0123456789abcdefABCDEF_" 592 } else if l.accept("oO") { 593 digits = "01234567_" 594 } else if l.accept("bB") { 595 digits = "01_" 596 } 597 } 598 l.acceptRun(digits) 599 if l.accept(".") { 600 l.acceptRun(digits) 601 } 602 if len(digits) == 10+1 && l.accept("eE") { 603 l.accept("+-") 604 l.acceptRun("0123456789_") 605 } 606 if len(digits) == 16+6+1 && l.accept("pP") { 607 l.accept("+-") 608 l.acceptRun("0123456789_") 609 } 610 // Is it imaginary? 611 l.accept("i") 612 // Next thing mustn't be alphanumeric. 613 if isAlphaNumeric(l.peek()) { 614 l.next() 615 return false 616 } 617 return true 618 } 619 620 // lexQuote scans a quoted string. 621 func lexQuote(l *lexer) stateFn { 622 Loop: 623 for { 624 switch l.next() { 625 case '\\': 626 if r := l.next(); r != eof && r != '\n' { 627 break 628 } 629 fallthrough 630 case eof, '\n': 631 return l.errorf("unterminated quoted string") 632 case '"': 633 break Loop 634 } 635 } 636 l.emit(itemString) 637 return lexInsideAction 638 } 639 640 // lexRawQuote scans a raw quoted string. 641 func lexRawQuote(l *lexer) stateFn { 642 Loop: 643 for { 644 switch l.next() { 645 case eof: 646 return l.errorf("unterminated raw quoted string") 647 case '`': 648 break Loop 649 } 650 } 651 l.emit(itemRawString) 652 return lexInsideAction 653 } 654 655 // isSpace reports whether r is a space character. 656 func isSpace(r rune) bool { 657 return r == ' ' || r == '\t' || r == '\r' || r == '\n' 658 } 659 660 // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. 661 func isAlphaNumeric(r rune) bool { 662 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) 663 } 664 665 func hasLeftTrimMarker(s string) bool { 666 return len(s) >= 2 && s[0] == trimMarker && isSpace(rune(s[1])) 667 } 668 669 func hasRightTrimMarker(s string) bool { 670 return len(s) >= 2 && isSpace(rune(s[0])) && s[1] == trimMarker 671 }