github.com/yukk001/go1.10.8@v0.0.0-20190813125351-6df2d3982e20/src/text/template/parse/lex.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package parse 6 7 import ( 8 "fmt" 9 "strings" 10 "unicode" 11 "unicode/utf8" 12 ) 13 14 // item represents a token or text string returned from the scanner. 15 type item struct { 16 typ itemType // The type of this item. 17 pos Pos // The starting position, in bytes, of this item in the input string. 18 val string // The value of this item. 19 line int // The line number at the start of this item. 20 } 21 22 func (i item) String() string { 23 switch { 24 case i.typ == itemEOF: 25 return "EOF" 26 case i.typ == itemError: 27 return i.val 28 case i.typ > itemKeyword: 29 return fmt.Sprintf("<%s>", i.val) 30 case len(i.val) > 10: 31 return fmt.Sprintf("%.10q...", i.val) 32 } 33 return fmt.Sprintf("%q", i.val) 34 } 35 36 // itemType identifies the type of lex items. 37 type itemType int 38 39 const ( 40 itemError itemType = iota // error occurred; value is text of error 41 itemBool // boolean constant 42 itemChar // printable ASCII character; grab bag for comma etc. 43 itemCharConstant // character constant 44 itemComplex // complex constant (1+2i); imaginary is just a number 45 itemColonEquals // colon-equals (':=') introducing a declaration 46 itemEOF 47 itemField // alphanumeric identifier starting with '.' 48 itemIdentifier // alphanumeric identifier not starting with '.' 49 itemLeftDelim // left action delimiter 50 itemLeftParen // '(' inside action 51 itemNumber // simple number, including imaginary 52 itemPipe // pipe symbol 53 itemRawString // raw quoted string (includes quotes) 54 itemRightDelim // right action delimiter 55 itemRightParen // ')' inside action 56 itemSpace // run of spaces separating arguments 57 itemString // quoted string (includes quotes) 58 itemText // plain text 59 itemVariable // variable starting with '$', such as '$' or '$1' or '$hello' 60 // Keywords appear after all the rest. 61 itemKeyword // used only to delimit the keywords 62 itemBlock // block keyword 63 itemDot // the cursor, spelled '.' 64 itemDefine // define keyword 65 itemElse // else keyword 66 itemEnd // end keyword 67 itemIf // if keyword 68 itemNil // the untyped nil constant, easiest to treat as a keyword 69 itemRange // range keyword 70 itemTemplate // template keyword 71 itemWith // with keyword 72 ) 73 74 var key = map[string]itemType{ 75 ".": itemDot, 76 "block": itemBlock, 77 "define": itemDefine, 78 "else": itemElse, 79 "end": itemEnd, 80 "if": itemIf, 81 "range": itemRange, 82 "nil": itemNil, 83 "template": itemTemplate, 84 "with": itemWith, 85 } 86 87 const eof = -1 88 89 // Trimming spaces. 90 // If the action begins "{{- " rather than "{{", then all space/tab/newlines 91 // preceding the action are trimmed; conversely if it ends " -}}" the 92 // leading spaces are trimmed. This is done entirely in the lexer; the 93 // parser never sees it happen. We require an ASCII space to be 94 // present to avoid ambiguity with things like "{{-3}}". It reads 95 // better with the space present anyway. For simplicity, only ASCII 96 // space does the job. 97 const ( 98 spaceChars = " \t\r\n" // These are the space characters defined by Go itself. 99 leftTrimMarker = "- " // Attached to left delimiter, trims trailing spaces from preceding text. 100 rightTrimMarker = " -" // Attached to right delimiter, trims leading spaces from following text. 101 trimMarkerLen = Pos(len(leftTrimMarker)) 102 ) 103 104 // stateFn represents the state of the scanner as a function that returns the next state. 105 type stateFn func(*lexer) stateFn 106 107 // lexer holds the state of the scanner. 108 type lexer struct { 109 name string // the name of the input; used only for error reports 110 input string // the string being scanned 111 leftDelim string // start of action 112 rightDelim string // end of action 113 pos Pos // current position in the input 114 start Pos // start position of this item 115 width Pos // width of last rune read from input 116 items chan item // channel of scanned items 117 parenDepth int // nesting depth of ( ) exprs 118 line int // 1+number of newlines seen 119 } 120 121 // next returns the next rune in the input. 122 func (l *lexer) next() rune { 123 if int(l.pos) >= len(l.input) { 124 l.width = 0 125 return eof 126 } 127 r, w := utf8.DecodeRuneInString(l.input[l.pos:]) 128 l.width = Pos(w) 129 l.pos += l.width 130 if r == '\n' { 131 l.line++ 132 } 133 return r 134 } 135 136 // peek returns but does not consume the next rune in the input. 137 func (l *lexer) peek() rune { 138 r := l.next() 139 l.backup() 140 return r 141 } 142 143 // backup steps back one rune. Can only be called once per call of next. 144 func (l *lexer) backup() { 145 l.pos -= l.width 146 // Correct newline count. 147 if l.width == 1 && l.input[l.pos] == '\n' { 148 l.line-- 149 } 150 } 151 152 // emit passes an item back to the client. 153 func (l *lexer) emit(t itemType) { 154 l.items <- item{t, l.start, l.input[l.start:l.pos], l.line} 155 // Some items contain text internally. If so, count their newlines. 156 switch t { 157 case itemText, itemRawString, itemLeftDelim, itemRightDelim: 158 l.line += strings.Count(l.input[l.start:l.pos], "\n") 159 } 160 l.start = l.pos 161 } 162 163 // ignore skips over the pending input before this point. 164 func (l *lexer) ignore() { 165 l.line += strings.Count(l.input[l.start:l.pos], "\n") 166 l.start = l.pos 167 } 168 169 // accept consumes the next rune if it's from the valid set. 170 func (l *lexer) accept(valid string) bool { 171 if strings.ContainsRune(valid, l.next()) { 172 return true 173 } 174 l.backup() 175 return false 176 } 177 178 // acceptRun consumes a run of runes from the valid set. 179 func (l *lexer) acceptRun(valid string) { 180 for strings.ContainsRune(valid, l.next()) { 181 } 182 l.backup() 183 } 184 185 // errorf returns an error token and terminates the scan by passing 186 // back a nil pointer that will be the next state, terminating l.nextItem. 187 func (l *lexer) errorf(format string, args ...interface{}) stateFn { 188 l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.line} 189 return nil 190 } 191 192 // nextItem returns the next item from the input. 193 // Called by the parser, not in the lexing goroutine. 194 func (l *lexer) nextItem() item { 195 return <-l.items 196 } 197 198 // drain drains the output so the lexing goroutine will exit. 199 // Called by the parser, not in the lexing goroutine. 200 func (l *lexer) drain() { 201 for range l.items { 202 } 203 } 204 205 // lex creates a new scanner for the input string. 206 func lex(name, input, left, right string) *lexer { 207 if left == "" { 208 left = leftDelim 209 } 210 if right == "" { 211 right = rightDelim 212 } 213 l := &lexer{ 214 name: name, 215 input: input, 216 leftDelim: left, 217 rightDelim: right, 218 items: make(chan item), 219 line: 1, 220 } 221 go l.run() 222 return l 223 } 224 225 // run runs the state machine for the lexer. 226 func (l *lexer) run() { 227 for state := lexText; state != nil; { 228 state = state(l) 229 } 230 close(l.items) 231 } 232 233 // state functions 234 235 const ( 236 leftDelim = "{{" 237 rightDelim = "}}" 238 leftComment = "/*" 239 rightComment = "*/" 240 ) 241 242 // lexText scans until an opening action delimiter, "{{". 243 func lexText(l *lexer) stateFn { 244 l.width = 0 245 if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 { 246 ldn := Pos(len(l.leftDelim)) 247 l.pos += Pos(x) 248 trimLength := Pos(0) 249 if strings.HasPrefix(l.input[l.pos+ldn:], leftTrimMarker) { 250 trimLength = rightTrimLength(l.input[l.start:l.pos]) 251 } 252 l.pos -= trimLength 253 if l.pos > l.start { 254 l.emit(itemText) 255 } 256 l.pos += trimLength 257 l.ignore() 258 return lexLeftDelim 259 } else { 260 l.pos = Pos(len(l.input)) 261 } 262 // Correctly reached EOF. 263 if l.pos > l.start { 264 l.emit(itemText) 265 } 266 l.emit(itemEOF) 267 return nil 268 } 269 270 // rightTrimLength returns the length of the spaces at the end of the string. 271 func rightTrimLength(s string) Pos { 272 return Pos(len(s) - len(strings.TrimRight(s, spaceChars))) 273 } 274 275 // atRightDelim reports whether the lexer is at a right delimiter, possibly preceded by a trim marker. 276 func (l *lexer) atRightDelim() (delim, trimSpaces bool) { 277 if strings.HasPrefix(l.input[l.pos:], l.rightDelim) { 278 return true, false 279 } 280 // The right delim might have the marker before. 281 if strings.HasPrefix(l.input[l.pos:], rightTrimMarker) && 282 strings.HasPrefix(l.input[l.pos+trimMarkerLen:], l.rightDelim) { 283 return true, true 284 } 285 return false, false 286 } 287 288 // leftTrimLength returns the length of the spaces at the beginning of the string. 289 func leftTrimLength(s string) Pos { 290 return Pos(len(s) - len(strings.TrimLeft(s, spaceChars))) 291 } 292 293 // lexLeftDelim scans the left delimiter, which is known to be present, possibly with a trim marker. 294 func lexLeftDelim(l *lexer) stateFn { 295 l.pos += Pos(len(l.leftDelim)) 296 trimSpace := strings.HasPrefix(l.input[l.pos:], leftTrimMarker) 297 afterMarker := Pos(0) 298 if trimSpace { 299 afterMarker = trimMarkerLen 300 } 301 if strings.HasPrefix(l.input[l.pos+afterMarker:], leftComment) { 302 l.pos += afterMarker 303 l.ignore() 304 return lexComment 305 } 306 l.emit(itemLeftDelim) 307 l.pos += afterMarker 308 l.ignore() 309 l.parenDepth = 0 310 return lexInsideAction 311 } 312 313 // lexComment scans a comment. The left comment marker is known to be present. 314 func lexComment(l *lexer) stateFn { 315 l.pos += Pos(len(leftComment)) 316 i := strings.Index(l.input[l.pos:], rightComment) 317 if i < 0 { 318 return l.errorf("unclosed comment") 319 } 320 l.pos += Pos(i + len(rightComment)) 321 delim, trimSpace := l.atRightDelim() 322 if !delim { 323 return l.errorf("comment ends before closing delimiter") 324 } 325 if trimSpace { 326 l.pos += trimMarkerLen 327 } 328 l.pos += Pos(len(l.rightDelim)) 329 if trimSpace { 330 l.pos += leftTrimLength(l.input[l.pos:]) 331 } 332 l.ignore() 333 return lexText 334 } 335 336 // lexRightDelim scans the right delimiter, which is known to be present, possibly with a trim marker. 337 func lexRightDelim(l *lexer) stateFn { 338 trimSpace := strings.HasPrefix(l.input[l.pos:], rightTrimMarker) 339 if trimSpace { 340 l.pos += trimMarkerLen 341 l.ignore() 342 } 343 l.pos += Pos(len(l.rightDelim)) 344 l.emit(itemRightDelim) 345 if trimSpace { 346 l.pos += leftTrimLength(l.input[l.pos:]) 347 l.ignore() 348 } 349 return lexText 350 } 351 352 // lexInsideAction scans the elements inside action delimiters. 353 func lexInsideAction(l *lexer) stateFn { 354 // Either number, quoted string, or identifier. 355 // Spaces separate arguments; runs of spaces turn into itemSpace. 356 // Pipe symbols separate and are emitted. 357 delim, _ := l.atRightDelim() 358 if delim { 359 if l.parenDepth == 0 { 360 return lexRightDelim 361 } 362 return l.errorf("unclosed left paren") 363 } 364 switch r := l.next(); { 365 case r == eof || isEndOfLine(r): 366 return l.errorf("unclosed action") 367 case isSpace(r): 368 return lexSpace 369 case r == ':': 370 if l.next() != '=' { 371 return l.errorf("expected :=") 372 } 373 l.emit(itemColonEquals) 374 case r == '|': 375 l.emit(itemPipe) 376 case r == '"': 377 return lexQuote 378 case r == '`': 379 return lexRawQuote 380 case r == '$': 381 return lexVariable 382 case r == '\'': 383 return lexChar 384 case r == '.': 385 // special look-ahead for ".field" so we don't break l.backup(). 386 if l.pos < Pos(len(l.input)) { 387 r := l.input[l.pos] 388 if r < '0' || '9' < r { 389 return lexField 390 } 391 } 392 fallthrough // '.' can start a number. 393 case r == '+' || r == '-' || ('0' <= r && r <= '9'): 394 l.backup() 395 return lexNumber 396 case isAlphaNumeric(r): 397 l.backup() 398 return lexIdentifier 399 case r == '(': 400 l.emit(itemLeftParen) 401 l.parenDepth++ 402 case r == ')': 403 l.emit(itemRightParen) 404 l.parenDepth-- 405 if l.parenDepth < 0 { 406 return l.errorf("unexpected right paren %#U", r) 407 } 408 case r <= unicode.MaxASCII && unicode.IsPrint(r): 409 l.emit(itemChar) 410 return lexInsideAction 411 default: 412 return l.errorf("unrecognized character in action: %#U", r) 413 } 414 return lexInsideAction 415 } 416 417 // lexSpace scans a run of space characters. 418 // One space has already been seen. 419 func lexSpace(l *lexer) stateFn { 420 for isSpace(l.peek()) { 421 l.next() 422 } 423 l.emit(itemSpace) 424 return lexInsideAction 425 } 426 427 // lexIdentifier scans an alphanumeric. 428 func lexIdentifier(l *lexer) stateFn { 429 Loop: 430 for { 431 switch r := l.next(); { 432 case isAlphaNumeric(r): 433 // absorb. 434 default: 435 l.backup() 436 word := l.input[l.start:l.pos] 437 if !l.atTerminator() { 438 return l.errorf("bad character %#U", r) 439 } 440 switch { 441 case key[word] > itemKeyword: 442 l.emit(key[word]) 443 case word[0] == '.': 444 l.emit(itemField) 445 case word == "true", word == "false": 446 l.emit(itemBool) 447 default: 448 l.emit(itemIdentifier) 449 } 450 break Loop 451 } 452 } 453 return lexInsideAction 454 } 455 456 // lexField scans a field: .Alphanumeric. 457 // The . has been scanned. 458 func lexField(l *lexer) stateFn { 459 return lexFieldOrVariable(l, itemField) 460 } 461 462 // lexVariable scans a Variable: $Alphanumeric. 463 // The $ has been scanned. 464 func lexVariable(l *lexer) stateFn { 465 if l.atTerminator() { // Nothing interesting follows -> "$". 466 l.emit(itemVariable) 467 return lexInsideAction 468 } 469 return lexFieldOrVariable(l, itemVariable) 470 } 471 472 // lexVariable scans a field or variable: [.$]Alphanumeric. 473 // The . or $ has been scanned. 474 func lexFieldOrVariable(l *lexer, typ itemType) stateFn { 475 if l.atTerminator() { // Nothing interesting follows -> "." or "$". 476 if typ == itemVariable { 477 l.emit(itemVariable) 478 } else { 479 l.emit(itemDot) 480 } 481 return lexInsideAction 482 } 483 var r rune 484 for { 485 r = l.next() 486 if !isAlphaNumeric(r) { 487 l.backup() 488 break 489 } 490 } 491 if !l.atTerminator() { 492 return l.errorf("bad character %#U", r) 493 } 494 l.emit(typ) 495 return lexInsideAction 496 } 497 498 // atTerminator reports whether the input is at valid termination character to 499 // appear after an identifier. Breaks .X.Y into two pieces. Also catches cases 500 // like "$x+2" not being acceptable without a space, in case we decide one 501 // day to implement arithmetic. 502 func (l *lexer) atTerminator() bool { 503 r := l.peek() 504 if isSpace(r) || isEndOfLine(r) { 505 return true 506 } 507 switch r { 508 case eof, '.', ',', '|', ':', ')', '(': 509 return true 510 } 511 // Does r start the delimiter? This can be ambiguous (with delim=="//", $x/2 will 512 // succeed but should fail) but only in extremely rare cases caused by willfully 513 // bad choice of delimiter. 514 if rd, _ := utf8.DecodeRuneInString(l.rightDelim); rd == r { 515 return true 516 } 517 return false 518 } 519 520 // lexChar scans a character constant. The initial quote is already 521 // scanned. Syntax checking is done by the parser. 522 func lexChar(l *lexer) stateFn { 523 Loop: 524 for { 525 switch l.next() { 526 case '\\': 527 if r := l.next(); r != eof && r != '\n' { 528 break 529 } 530 fallthrough 531 case eof, '\n': 532 return l.errorf("unterminated character constant") 533 case '\'': 534 break Loop 535 } 536 } 537 l.emit(itemCharConstant) 538 return lexInsideAction 539 } 540 541 // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This 542 // isn't a perfect number scanner - for instance it accepts "." and "0x0.2" 543 // and "089" - but when it's wrong the input is invalid and the parser (via 544 // strconv) will notice. 545 func lexNumber(l *lexer) stateFn { 546 if !l.scanNumber() { 547 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) 548 } 549 if sign := l.peek(); sign == '+' || sign == '-' { 550 // Complex: 1+2i. No spaces, must end in 'i'. 551 if !l.scanNumber() || l.input[l.pos-1] != 'i' { 552 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) 553 } 554 l.emit(itemComplex) 555 } else { 556 l.emit(itemNumber) 557 } 558 return lexInsideAction 559 } 560 561 func (l *lexer) scanNumber() bool { 562 // Optional leading sign. 563 l.accept("+-") 564 // Is it hex? 565 digits := "0123456789" 566 if l.accept("0") && l.accept("xX") { 567 digits = "0123456789abcdefABCDEF" 568 } 569 l.acceptRun(digits) 570 if l.accept(".") { 571 l.acceptRun(digits) 572 } 573 if l.accept("eE") { 574 l.accept("+-") 575 l.acceptRun("0123456789") 576 } 577 // Is it imaginary? 578 l.accept("i") 579 // Next thing mustn't be alphanumeric. 580 if isAlphaNumeric(l.peek()) { 581 l.next() 582 return false 583 } 584 return true 585 } 586 587 // lexQuote scans a quoted string. 588 func lexQuote(l *lexer) stateFn { 589 Loop: 590 for { 591 switch l.next() { 592 case '\\': 593 if r := l.next(); r != eof && r != '\n' { 594 break 595 } 596 fallthrough 597 case eof, '\n': 598 return l.errorf("unterminated quoted string") 599 case '"': 600 break Loop 601 } 602 } 603 l.emit(itemString) 604 return lexInsideAction 605 } 606 607 // lexRawQuote scans a raw quoted string. 608 func lexRawQuote(l *lexer) stateFn { 609 startLine := l.line 610 Loop: 611 for { 612 switch l.next() { 613 case eof: 614 // Restore line number to location of opening quote. 615 // We will error out so it's ok just to overwrite the field. 616 l.line = startLine 617 return l.errorf("unterminated raw quoted string") 618 case '`': 619 break Loop 620 } 621 } 622 l.emit(itemRawString) 623 return lexInsideAction 624 } 625 626 // isSpace reports whether r is a space character. 627 func isSpace(r rune) bool { 628 return r == ' ' || r == '\t' 629 } 630 631 // isEndOfLine reports whether r is an end-of-line character. 632 func isEndOfLine(r rune) bool { 633 return r == '\r' || r == '\n' 634 } 635 636 // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. 637 func isAlphaNumeric(r rune) bool { 638 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) 639 }