github.com/tidwall/go@v0.0.0-20170415222209-6694a6888b7d/src/text/template/parse/lex.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package parse 6 7 import ( 8 "fmt" 9 "strings" 10 "unicode" 11 "unicode/utf8" 12 ) 13 14 // item represents a token or text string returned from the scanner. 15 type item struct { 16 typ itemType // The type of this item. 17 pos Pos // The starting position, in bytes, of this item in the input string. 18 val string // The value of this item. 19 line int // The line number at the start of this item. 20 } 21 22 func (i item) String() string { 23 switch { 24 case i.typ == itemEOF: 25 return "EOF" 26 case i.typ == itemError: 27 return i.val 28 case i.typ > itemKeyword: 29 return fmt.Sprintf("<%s>", i.val) 30 case len(i.val) > 10: 31 return fmt.Sprintf("%.10q...", i.val) 32 } 33 return fmt.Sprintf("%q", i.val) 34 } 35 36 // itemType identifies the type of lex items. 37 type itemType int 38 39 const ( 40 itemError itemType = iota // error occurred; value is text of error 41 itemBool // boolean constant 42 itemChar // printable ASCII character; grab bag for comma etc. 43 itemCharConstant // character constant 44 itemComplex // complex constant (1+2i); imaginary is just a number 45 itemColonEquals // colon-equals (':=') introducing a declaration 46 itemEOF 47 itemField // alphanumeric identifier starting with '.' 48 itemIdentifier // alphanumeric identifier not starting with '.' 49 itemLeftDelim // left action delimiter 50 itemLeftParen // '(' inside action 51 itemNumber // simple number, including imaginary 52 itemPipe // pipe symbol 53 itemRawString // raw quoted string (includes quotes) 54 itemRightDelim // right action delimiter 55 itemRightParen // ')' inside action 56 itemSpace // run of spaces separating arguments 57 itemString // quoted string (includes quotes) 58 itemText // plain text 59 itemVariable // variable starting with '$', such as '$' or '$1' or '$hello' 60 // Keywords appear after all the rest. 61 itemKeyword // used only to delimit the keywords 62 itemBlock // block keyword 63 itemDot // the cursor, spelled '.' 64 itemDefine // define keyword 65 itemElse // else keyword 66 itemEnd // end keyword 67 itemIf // if keyword 68 itemNil // the untyped nil constant, easiest to treat as a keyword 69 itemRange // range keyword 70 itemTemplate // template keyword 71 itemWith // with keyword 72 ) 73 74 var key = map[string]itemType{ 75 ".": itemDot, 76 "block": itemBlock, 77 "define": itemDefine, 78 "else": itemElse, 79 "end": itemEnd, 80 "if": itemIf, 81 "range": itemRange, 82 "nil": itemNil, 83 "template": itemTemplate, 84 "with": itemWith, 85 } 86 87 const eof = -1 88 89 // Trimming spaces. 90 // If the action begins "{{- " rather than "{{", then all space/tab/newlines 91 // preceding the action are trimmed; conversely if it ends " -}}" the 92 // leading spaces are trimmed. This is done entirely in the lexer; the 93 // parser never sees it happen. We require an ASCII space to be 94 // present to avoid ambiguity with things like "{{-3}}". It reads 95 // better with the space present anyway. For simplicity, only ASCII 96 // space does the job. 97 const ( 98 spaceChars = " \t\r\n" // These are the space characters defined by Go itself. 99 leftTrimMarker = "- " // Attached to left delimiter, trims trailing spaces from preceding text. 100 rightTrimMarker = " -" // Attached to right delimiter, trims leading spaces from following text. 101 trimMarkerLen = Pos(len(leftTrimMarker)) 102 ) 103 104 // stateFn represents the state of the scanner as a function that returns the next state. 105 type stateFn func(*lexer) stateFn 106 107 // lexer holds the state of the scanner. 108 type lexer struct { 109 name string // the name of the input; used only for error reports 110 input string // the string being scanned 111 leftDelim string // start of action 112 rightDelim string // end of action 113 state stateFn // the next lexing function to enter 114 pos Pos // current position in the input 115 start Pos // start position of this item 116 width Pos // width of last rune read from input 117 lastPos Pos // position of most recent item returned by nextItem 118 items chan item // channel of scanned items 119 parenDepth int // nesting depth of ( ) exprs 120 line int // 1+number of newlines seen 121 } 122 123 // next returns the next rune in the input. 124 func (l *lexer) next() rune { 125 if int(l.pos) >= len(l.input) { 126 l.width = 0 127 return eof 128 } 129 r, w := utf8.DecodeRuneInString(l.input[l.pos:]) 130 l.width = Pos(w) 131 l.pos += l.width 132 if r == '\n' { 133 l.line++ 134 } 135 return r 136 } 137 138 // peek returns but does not consume the next rune in the input. 139 func (l *lexer) peek() rune { 140 r := l.next() 141 l.backup() 142 return r 143 } 144 145 // backup steps back one rune. Can only be called once per call of next. 146 func (l *lexer) backup() { 147 l.pos -= l.width 148 // Correct newline count. 149 if l.width == 1 && l.input[l.pos] == '\n' { 150 l.line-- 151 } 152 } 153 154 // emit passes an item back to the client. 155 func (l *lexer) emit(t itemType) { 156 l.items <- item{t, l.start, l.input[l.start:l.pos], l.line} 157 // Some items contain text internally. If so, count their newlines. 158 switch t { 159 case itemText, itemRawString, itemLeftDelim, itemRightDelim: 160 l.line += strings.Count(l.input[l.start:l.pos], "\n") 161 } 162 l.start = l.pos 163 } 164 165 // ignore skips over the pending input before this point. 166 func (l *lexer) ignore() { 167 l.start = l.pos 168 } 169 170 // accept consumes the next rune if it's from the valid set. 171 func (l *lexer) accept(valid string) bool { 172 if strings.ContainsRune(valid, l.next()) { 173 return true 174 } 175 l.backup() 176 return false 177 } 178 179 // acceptRun consumes a run of runes from the valid set. 180 func (l *lexer) acceptRun(valid string) { 181 for strings.ContainsRune(valid, l.next()) { 182 } 183 l.backup() 184 } 185 186 // errorf returns an error token and terminates the scan by passing 187 // back a nil pointer that will be the next state, terminating l.nextItem. 188 func (l *lexer) errorf(format string, args ...interface{}) stateFn { 189 l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.line} 190 return nil 191 } 192 193 // nextItem returns the next item from the input. 194 // Called by the parser, not in the lexing goroutine. 195 func (l *lexer) nextItem() item { 196 item := <-l.items 197 l.lastPos = item.pos 198 return item 199 } 200 201 // drain drains the output so the lexing goroutine will exit. 202 // Called by the parser, not in the lexing goroutine. 203 func (l *lexer) drain() { 204 for range l.items { 205 } 206 } 207 208 // lex creates a new scanner for the input string. 209 func lex(name, input, left, right string) *lexer { 210 if left == "" { 211 left = leftDelim 212 } 213 if right == "" { 214 right = rightDelim 215 } 216 l := &lexer{ 217 name: name, 218 input: input, 219 leftDelim: left, 220 rightDelim: right, 221 items: make(chan item), 222 line: 1, 223 } 224 go l.run() 225 return l 226 } 227 228 // run runs the state machine for the lexer. 229 func (l *lexer) run() { 230 for l.state = lexText; l.state != nil; { 231 l.state = l.state(l) 232 } 233 close(l.items) 234 } 235 236 // state functions 237 238 const ( 239 leftDelim = "{{" 240 rightDelim = "}}" 241 leftComment = "/*" 242 rightComment = "*/" 243 ) 244 245 // lexText scans until an opening action delimiter, "{{". 246 func lexText(l *lexer) stateFn { 247 l.width = 0 248 if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 { 249 ldn := Pos(len(l.leftDelim)) 250 l.pos += Pos(x) 251 trimLength := Pos(0) 252 if strings.HasPrefix(l.input[l.pos+ldn:], leftTrimMarker) { 253 trimLength = rightTrimLength(l.input[l.start:l.pos]) 254 } 255 l.pos -= trimLength 256 if l.pos > l.start { 257 l.emit(itemText) 258 } 259 l.pos += trimLength 260 l.ignore() 261 return lexLeftDelim 262 } else { 263 l.pos = Pos(len(l.input)) 264 } 265 // Correctly reached EOF. 266 if l.pos > l.start { 267 l.emit(itemText) 268 } 269 l.emit(itemEOF) 270 return nil 271 } 272 273 // rightTrimLength returns the length of the spaces at the end of the string. 274 func rightTrimLength(s string) Pos { 275 return Pos(len(s) - len(strings.TrimRight(s, spaceChars))) 276 } 277 278 // atRightDelim reports whether the lexer is at a right delimiter, possibly preceded by a trim marker. 279 func (l *lexer) atRightDelim() (delim, trimSpaces bool) { 280 if strings.HasPrefix(l.input[l.pos:], l.rightDelim) { 281 return true, false 282 } 283 // The right delim might have the marker before. 284 if strings.HasPrefix(l.input[l.pos:], rightTrimMarker) { 285 if strings.HasPrefix(l.input[l.pos+trimMarkerLen:], l.rightDelim) { 286 return true, true 287 } 288 } 289 return false, false 290 } 291 292 // leftTrimLength returns the length of the spaces at the beginning of the string. 293 func leftTrimLength(s string) Pos { 294 return Pos(len(s) - len(strings.TrimLeft(s, spaceChars))) 295 } 296 297 // lexLeftDelim scans the left delimiter, which is known to be present, possibly with a trim marker. 298 func lexLeftDelim(l *lexer) stateFn { 299 l.pos += Pos(len(l.leftDelim)) 300 trimSpace := strings.HasPrefix(l.input[l.pos:], leftTrimMarker) 301 afterMarker := Pos(0) 302 if trimSpace { 303 afterMarker = trimMarkerLen 304 } 305 if strings.HasPrefix(l.input[l.pos+afterMarker:], leftComment) { 306 l.pos += afterMarker 307 l.ignore() 308 return lexComment 309 } 310 l.emit(itemLeftDelim) 311 l.pos += afterMarker 312 l.ignore() 313 l.parenDepth = 0 314 return lexInsideAction 315 } 316 317 // lexComment scans a comment. The left comment marker is known to be present. 318 func lexComment(l *lexer) stateFn { 319 l.pos += Pos(len(leftComment)) 320 i := strings.Index(l.input[l.pos:], rightComment) 321 if i < 0 { 322 return l.errorf("unclosed comment") 323 } 324 l.pos += Pos(i + len(rightComment)) 325 delim, trimSpace := l.atRightDelim() 326 if !delim { 327 return l.errorf("comment ends before closing delimiter") 328 } 329 if trimSpace { 330 l.pos += trimMarkerLen 331 } 332 l.pos += Pos(len(l.rightDelim)) 333 if trimSpace { 334 l.pos += leftTrimLength(l.input[l.pos:]) 335 } 336 l.ignore() 337 return lexText 338 } 339 340 // lexRightDelim scans the right delimiter, which is known to be present, possibly with a trim marker. 341 func lexRightDelim(l *lexer) stateFn { 342 trimSpace := strings.HasPrefix(l.input[l.pos:], rightTrimMarker) 343 if trimSpace { 344 l.pos += trimMarkerLen 345 l.ignore() 346 } 347 l.pos += Pos(len(l.rightDelim)) 348 l.emit(itemRightDelim) 349 if trimSpace { 350 l.pos += leftTrimLength(l.input[l.pos:]) 351 l.ignore() 352 } 353 return lexText 354 } 355 356 // lexInsideAction scans the elements inside action delimiters. 357 func lexInsideAction(l *lexer) stateFn { 358 // Either number, quoted string, or identifier. 359 // Spaces separate arguments; runs of spaces turn into itemSpace. 360 // Pipe symbols separate and are emitted. 361 delim, _ := l.atRightDelim() 362 if delim { 363 if l.parenDepth == 0 { 364 return lexRightDelim 365 } 366 return l.errorf("unclosed left paren") 367 } 368 switch r := l.next(); { 369 case r == eof || isEndOfLine(r): 370 return l.errorf("unclosed action") 371 case isSpace(r): 372 return lexSpace 373 case r == ':': 374 if l.next() != '=' { 375 return l.errorf("expected :=") 376 } 377 l.emit(itemColonEquals) 378 case r == '|': 379 l.emit(itemPipe) 380 case r == '"': 381 return lexQuote 382 case r == '`': 383 return lexRawQuote 384 case r == '$': 385 return lexVariable 386 case r == '\'': 387 return lexChar 388 case r == '.': 389 // special look-ahead for ".field" so we don't break l.backup(). 390 if l.pos < Pos(len(l.input)) { 391 r := l.input[l.pos] 392 if r < '0' || '9' < r { 393 return lexField 394 } 395 } 396 fallthrough // '.' can start a number. 397 case r == '+' || r == '-' || ('0' <= r && r <= '9'): 398 l.backup() 399 return lexNumber 400 case isAlphaNumeric(r): 401 l.backup() 402 return lexIdentifier 403 case r == '(': 404 l.emit(itemLeftParen) 405 l.parenDepth++ 406 case r == ')': 407 l.emit(itemRightParen) 408 l.parenDepth-- 409 if l.parenDepth < 0 { 410 return l.errorf("unexpected right paren %#U", r) 411 } 412 case r <= unicode.MaxASCII && unicode.IsPrint(r): 413 l.emit(itemChar) 414 return lexInsideAction 415 default: 416 return l.errorf("unrecognized character in action: %#U", r) 417 } 418 return lexInsideAction 419 } 420 421 // lexSpace scans a run of space characters. 422 // One space has already been seen. 423 func lexSpace(l *lexer) stateFn { 424 for isSpace(l.peek()) { 425 l.next() 426 } 427 l.emit(itemSpace) 428 return lexInsideAction 429 } 430 431 // lexIdentifier scans an alphanumeric. 432 func lexIdentifier(l *lexer) stateFn { 433 Loop: 434 for { 435 switch r := l.next(); { 436 case isAlphaNumeric(r): 437 // absorb. 438 default: 439 l.backup() 440 word := l.input[l.start:l.pos] 441 if !l.atTerminator() { 442 return l.errorf("bad character %#U", r) 443 } 444 switch { 445 case key[word] > itemKeyword: 446 l.emit(key[word]) 447 case word[0] == '.': 448 l.emit(itemField) 449 case word == "true", word == "false": 450 l.emit(itemBool) 451 default: 452 l.emit(itemIdentifier) 453 } 454 break Loop 455 } 456 } 457 return lexInsideAction 458 } 459 460 // lexField scans a field: .Alphanumeric. 461 // The . has been scanned. 462 func lexField(l *lexer) stateFn { 463 return lexFieldOrVariable(l, itemField) 464 } 465 466 // lexVariable scans a Variable: $Alphanumeric. 467 // The $ has been scanned. 468 func lexVariable(l *lexer) stateFn { 469 if l.atTerminator() { // Nothing interesting follows -> "$". 470 l.emit(itemVariable) 471 return lexInsideAction 472 } 473 return lexFieldOrVariable(l, itemVariable) 474 } 475 476 // lexVariable scans a field or variable: [.$]Alphanumeric. 477 // The . or $ has been scanned. 478 func lexFieldOrVariable(l *lexer, typ itemType) stateFn { 479 if l.atTerminator() { // Nothing interesting follows -> "." or "$". 480 if typ == itemVariable { 481 l.emit(itemVariable) 482 } else { 483 l.emit(itemDot) 484 } 485 return lexInsideAction 486 } 487 var r rune 488 for { 489 r = l.next() 490 if !isAlphaNumeric(r) { 491 l.backup() 492 break 493 } 494 } 495 if !l.atTerminator() { 496 return l.errorf("bad character %#U", r) 497 } 498 l.emit(typ) 499 return lexInsideAction 500 } 501 502 // atTerminator reports whether the input is at valid termination character to 503 // appear after an identifier. Breaks .X.Y into two pieces. Also catches cases 504 // like "$x+2" not being acceptable without a space, in case we decide one 505 // day to implement arithmetic. 506 func (l *lexer) atTerminator() bool { 507 r := l.peek() 508 if isSpace(r) || isEndOfLine(r) { 509 return true 510 } 511 switch r { 512 case eof, '.', ',', '|', ':', ')', '(': 513 return true 514 } 515 // Does r start the delimiter? This can be ambiguous (with delim=="//", $x/2 will 516 // succeed but should fail) but only in extremely rare cases caused by willfully 517 // bad choice of delimiter. 518 if rd, _ := utf8.DecodeRuneInString(l.rightDelim); rd == r { 519 return true 520 } 521 return false 522 } 523 524 // lexChar scans a character constant. The initial quote is already 525 // scanned. Syntax checking is done by the parser. 526 func lexChar(l *lexer) stateFn { 527 Loop: 528 for { 529 switch l.next() { 530 case '\\': 531 if r := l.next(); r != eof && r != '\n' { 532 break 533 } 534 fallthrough 535 case eof, '\n': 536 return l.errorf("unterminated character constant") 537 case '\'': 538 break Loop 539 } 540 } 541 l.emit(itemCharConstant) 542 return lexInsideAction 543 } 544 545 // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This 546 // isn't a perfect number scanner - for instance it accepts "." and "0x0.2" 547 // and "089" - but when it's wrong the input is invalid and the parser (via 548 // strconv) will notice. 549 func lexNumber(l *lexer) stateFn { 550 if !l.scanNumber() { 551 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) 552 } 553 if sign := l.peek(); sign == '+' || sign == '-' { 554 // Complex: 1+2i. No spaces, must end in 'i'. 555 if !l.scanNumber() || l.input[l.pos-1] != 'i' { 556 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) 557 } 558 l.emit(itemComplex) 559 } else { 560 l.emit(itemNumber) 561 } 562 return lexInsideAction 563 } 564 565 func (l *lexer) scanNumber() bool { 566 // Optional leading sign. 567 l.accept("+-") 568 // Is it hex? 569 digits := "0123456789" 570 if l.accept("0") && l.accept("xX") { 571 digits = "0123456789abcdefABCDEF" 572 } 573 l.acceptRun(digits) 574 if l.accept(".") { 575 l.acceptRun(digits) 576 } 577 if l.accept("eE") { 578 l.accept("+-") 579 l.acceptRun("0123456789") 580 } 581 // Is it imaginary? 582 l.accept("i") 583 // Next thing mustn't be alphanumeric. 584 if isAlphaNumeric(l.peek()) { 585 l.next() 586 return false 587 } 588 return true 589 } 590 591 // lexQuote scans a quoted string. 592 func lexQuote(l *lexer) stateFn { 593 Loop: 594 for { 595 switch l.next() { 596 case '\\': 597 if r := l.next(); r != eof && r != '\n' { 598 break 599 } 600 fallthrough 601 case eof, '\n': 602 return l.errorf("unterminated quoted string") 603 case '"': 604 break Loop 605 } 606 } 607 l.emit(itemString) 608 return lexInsideAction 609 } 610 611 // lexRawQuote scans a raw quoted string. 612 func lexRawQuote(l *lexer) stateFn { 613 startLine := l.line 614 Loop: 615 for { 616 switch l.next() { 617 case eof: 618 // Restore line number to location of opening quote. 619 // We will error out so it's ok just to overwrite the field. 620 l.line = startLine 621 return l.errorf("unterminated raw quoted string") 622 case '`': 623 break Loop 624 } 625 } 626 l.emit(itemRawString) 627 return lexInsideAction 628 } 629 630 // isSpace reports whether r is a space character. 631 func isSpace(r rune) bool { 632 return r == ' ' || r == '\t' 633 } 634 635 // isEndOfLine reports whether r is an end-of-line character. 636 func isEndOfLine(r rune) bool { 637 return r == '\r' || r == '\n' 638 } 639 640 // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. 641 func isAlphaNumeric(r rune) bool { 642 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) 643 }