github.com/mdempsky/go@v0.0.0-20151201204031-5dd372bd1e70/src/text/template/parse/lex.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package parse 6 7 import ( 8 "fmt" 9 "strings" 10 "unicode" 11 "unicode/utf8" 12 ) 13 14 // item represents a token or text string returned from the scanner. 15 type item struct { 16 typ itemType // The type of this item. 17 pos Pos // The starting position, in bytes, of this item in the input string. 18 val string // The value of this item. 19 } 20 21 func (i item) String() string { 22 switch { 23 case i.typ == itemEOF: 24 return "EOF" 25 case i.typ == itemError: 26 return i.val 27 case i.typ > itemKeyword: 28 return fmt.Sprintf("<%s>", i.val) 29 case len(i.val) > 10: 30 return fmt.Sprintf("%.10q...", i.val) 31 } 32 return fmt.Sprintf("%q", i.val) 33 } 34 35 // itemType identifies the type of lex items. 36 type itemType int 37 38 const ( 39 itemError itemType = iota // error occurred; value is text of error 40 itemBool // boolean constant 41 itemChar // printable ASCII character; grab bag for comma etc. 42 itemCharConstant // character constant 43 itemComplex // complex constant (1+2i); imaginary is just a number 44 itemColonEquals // colon-equals (':=') introducing a declaration 45 itemEOF 46 itemField // alphanumeric identifier starting with '.' 47 itemIdentifier // alphanumeric identifier not starting with '.' 48 itemLeftDelim // left action delimiter 49 itemLeftParen // '(' inside action 50 itemNumber // simple number, including imaginary 51 itemPipe // pipe symbol 52 itemRawString // raw quoted string (includes quotes) 53 itemRightDelim // right action delimiter 54 itemRightParen // ')' inside action 55 itemSpace // run of spaces separating arguments 56 itemString // quoted string (includes quotes) 57 itemText // plain text 58 itemVariable // variable starting with '$', such as '$' or '$1' or '$hello' 59 // Keywords appear after all the rest. 60 itemKeyword // used only to delimit the keywords 61 itemBlock // block keyword 62 itemDot // the cursor, spelled '.' 63 itemDefine // define keyword 64 itemElse // else keyword 65 itemEnd // end keyword 66 itemIf // if keyword 67 itemNil // the untyped nil constant, easiest to treat as a keyword 68 itemRange // range keyword 69 itemTemplate // template keyword 70 itemWith // with keyword 71 ) 72 73 var key = map[string]itemType{ 74 ".": itemDot, 75 "block": itemBlock, 76 "define": itemDefine, 77 "else": itemElse, 78 "end": itemEnd, 79 "if": itemIf, 80 "range": itemRange, 81 "nil": itemNil, 82 "template": itemTemplate, 83 "with": itemWith, 84 } 85 86 const eof = -1 87 88 // Trimming spaces. 89 // If the action begins "{{- " rather than "{{", then all space/tab/newlines 90 // preceding the action are trimmed; conversely if it ends " -}}" the 91 // leading spaces are trimmed. This is done entirely in the lexer; the 92 // parser never sees it happen. We require an ASCII space to be 93 // present to avoid ambiguity with things like "{{-3}}". It reads 94 // better with the space present anyway. For simplicity, only ASCII 95 // space does the job. 96 const ( 97 spaceChars = " \t\r\n" // These are the space characters defined by Go itself. 98 leftTrimMarker = "- " // Attached to left delimiter, trims trailing spaces from preceding text. 99 rightTrimMarker = " -" // Attached to right delimiter, trims leading spaces from following text. 100 trimMarkerLen = Pos(len(leftTrimMarker)) 101 ) 102 103 // stateFn represents the state of the scanner as a function that returns the next state. 104 type stateFn func(*lexer) stateFn 105 106 // lexer holds the state of the scanner. 107 type lexer struct { 108 name string // the name of the input; used only for error reports 109 input string // the string being scanned 110 leftDelim string // start of action 111 rightDelim string // end of action 112 state stateFn // the next lexing function to enter 113 pos Pos // current position in the input 114 start Pos // start position of this item 115 width Pos // width of last rune read from input 116 lastPos Pos // position of most recent item returned by nextItem 117 items chan item // channel of scanned items 118 parenDepth int // nesting depth of ( ) exprs 119 } 120 121 // next returns the next rune in the input. 122 func (l *lexer) next() rune { 123 if int(l.pos) >= len(l.input) { 124 l.width = 0 125 return eof 126 } 127 r, w := utf8.DecodeRuneInString(l.input[l.pos:]) 128 l.width = Pos(w) 129 l.pos += l.width 130 return r 131 } 132 133 // peek returns but does not consume the next rune in the input. 134 func (l *lexer) peek() rune { 135 r := l.next() 136 l.backup() 137 return r 138 } 139 140 // backup steps back one rune. Can only be called once per call of next. 141 func (l *lexer) backup() { 142 l.pos -= l.width 143 } 144 145 // emit passes an item back to the client. 146 func (l *lexer) emit(t itemType) { 147 l.items <- item{t, l.start, l.input[l.start:l.pos]} 148 l.start = l.pos 149 } 150 151 // ignore skips over the pending input before this point. 152 func (l *lexer) ignore() { 153 l.start = l.pos 154 } 155 156 // accept consumes the next rune if it's from the valid set. 157 func (l *lexer) accept(valid string) bool { 158 if strings.IndexRune(valid, l.next()) >= 0 { 159 return true 160 } 161 l.backup() 162 return false 163 } 164 165 // acceptRun consumes a run of runes from the valid set. 166 func (l *lexer) acceptRun(valid string) { 167 for strings.IndexRune(valid, l.next()) >= 0 { 168 } 169 l.backup() 170 } 171 172 // lineNumber reports which line we're on, based on the position of 173 // the previous item returned by nextItem. Doing it this way 174 // means we don't have to worry about peek double counting. 175 func (l *lexer) lineNumber() int { 176 return 1 + strings.Count(l.input[:l.lastPos], "\n") 177 } 178 179 // errorf returns an error token and terminates the scan by passing 180 // back a nil pointer that will be the next state, terminating l.nextItem. 181 func (l *lexer) errorf(format string, args ...interface{}) stateFn { 182 l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)} 183 return nil 184 } 185 186 // nextItem returns the next item from the input. 187 // Called by the parser, not in the lexing goroutine. 188 func (l *lexer) nextItem() item { 189 item := <-l.items 190 l.lastPos = item.pos 191 return item 192 } 193 194 // drain drains the output so the lexing goroutine will exit. 195 // Called by the parser, not in the lexing goroutine. 196 func (l *lexer) drain() { 197 for range l.items { 198 } 199 } 200 201 // lex creates a new scanner for the input string. 202 func lex(name, input, left, right string) *lexer { 203 if left == "" { 204 left = leftDelim 205 } 206 if right == "" { 207 right = rightDelim 208 } 209 l := &lexer{ 210 name: name, 211 input: input, 212 leftDelim: left, 213 rightDelim: right, 214 items: make(chan item), 215 } 216 go l.run() 217 return l 218 } 219 220 // run runs the state machine for the lexer. 221 func (l *lexer) run() { 222 for l.state = lexText; l.state != nil; { 223 l.state = l.state(l) 224 } 225 close(l.items) 226 } 227 228 // state functions 229 230 const ( 231 leftDelim = "{{" 232 rightDelim = "}}" 233 leftComment = "/*" 234 rightComment = "*/" 235 ) 236 237 // lexText scans until an opening action delimiter, "{{". 238 func lexText(l *lexer) stateFn { 239 for { 240 delim, trimSpace := l.atLeftDelim() 241 if delim { 242 trimLength := Pos(0) 243 if trimSpace { 244 trimLength = rightTrimLength(l.input[l.start:l.pos]) 245 } 246 l.pos -= trimLength 247 if l.pos > l.start { 248 l.emit(itemText) 249 } 250 l.pos += trimLength 251 l.ignore() 252 return lexLeftDelim 253 } 254 if l.next() == eof { 255 break 256 } 257 } 258 // Correctly reached EOF. 259 if l.pos > l.start { 260 l.emit(itemText) 261 } 262 l.emit(itemEOF) 263 return nil 264 } 265 266 // atLeftDelim reports whether the lexer is at a left delimiter, possibly followed by a trim marker. 267 func (l *lexer) atLeftDelim() (delim, trimSpaces bool) { 268 if !strings.HasPrefix(l.input[l.pos:], l.leftDelim) { 269 return false, false 270 } 271 // The left delim might have the marker afterwards. 272 trimSpaces = strings.HasPrefix(l.input[l.pos+Pos(len(l.leftDelim)):], leftTrimMarker) 273 return true, trimSpaces 274 } 275 276 // rightTrimLength returns the length of the spaces at the end of the string. 277 func rightTrimLength(s string) Pos { 278 return Pos(len(s) - len(strings.TrimRight(s, spaceChars))) 279 } 280 281 // atRightDelim reports whether the lexer is at a right delimiter, possibly preceded by a trim marker. 282 func (l *lexer) atRightDelim() (delim, trimSpaces bool) { 283 if strings.HasPrefix(l.input[l.pos:], l.rightDelim) { 284 return true, false 285 } 286 // The right delim might have the marker before. 287 if strings.HasPrefix(l.input[l.pos:], rightTrimMarker) { 288 if strings.HasPrefix(l.input[l.pos+trimMarkerLen:], l.rightDelim) { 289 return true, true 290 } 291 } 292 return false, false 293 } 294 295 // leftTrimLength returns the length of the spaces at the beginning of the string. 296 func leftTrimLength(s string) Pos { 297 return Pos(len(s) - len(strings.TrimLeft(s, spaceChars))) 298 } 299 300 // lexLeftDelim scans the left delimiter, which is known to be present, possibly with a trim marker. 301 func lexLeftDelim(l *lexer) stateFn { 302 l.pos += Pos(len(l.leftDelim)) 303 trimSpace := strings.HasPrefix(l.input[l.pos:], leftTrimMarker) 304 afterMarker := Pos(0) 305 if trimSpace { 306 afterMarker = trimMarkerLen 307 } 308 if strings.HasPrefix(l.input[l.pos+afterMarker:], leftComment) { 309 l.pos += afterMarker 310 l.ignore() 311 return lexComment 312 } 313 l.emit(itemLeftDelim) 314 l.pos += afterMarker 315 l.ignore() 316 l.parenDepth = 0 317 return lexInsideAction 318 } 319 320 // lexComment scans a comment. The left comment marker is known to be present. 321 func lexComment(l *lexer) stateFn { 322 l.pos += Pos(len(leftComment)) 323 i := strings.Index(l.input[l.pos:], rightComment) 324 if i < 0 { 325 return l.errorf("unclosed comment") 326 } 327 l.pos += Pos(i + len(rightComment)) 328 delim, trimSpace := l.atRightDelim() 329 if !delim { 330 return l.errorf("comment ends before closing delimiter") 331 } 332 if trimSpace { 333 l.pos += trimMarkerLen 334 } 335 l.pos += Pos(len(l.rightDelim)) 336 if trimSpace { 337 l.pos += leftTrimLength(l.input[l.pos:]) 338 } 339 l.ignore() 340 return lexText 341 } 342 343 // lexRightDelim scans the right delimiter, which is known to be present, possibly with a trim marker. 344 func lexRightDelim(l *lexer) stateFn { 345 trimSpace := strings.HasPrefix(l.input[l.pos:], rightTrimMarker) 346 if trimSpace { 347 l.pos += trimMarkerLen 348 l.ignore() 349 } 350 l.pos += Pos(len(l.rightDelim)) 351 l.emit(itemRightDelim) 352 if trimSpace { 353 l.pos += leftTrimLength(l.input[l.pos:]) 354 l.ignore() 355 } 356 return lexText 357 } 358 359 // lexInsideAction scans the elements inside action delimiters. 360 func lexInsideAction(l *lexer) stateFn { 361 // Either number, quoted string, or identifier. 362 // Spaces separate arguments; runs of spaces turn into itemSpace. 363 // Pipe symbols separate and are emitted. 364 delim, _ := l.atRightDelim() 365 if delim { 366 if l.parenDepth == 0 { 367 return lexRightDelim 368 } 369 return l.errorf("unclosed left paren") 370 } 371 switch r := l.next(); { 372 case r == eof || isEndOfLine(r): 373 return l.errorf("unclosed action") 374 case isSpace(r): 375 return lexSpace 376 case r == ':': 377 if l.next() != '=' { 378 return l.errorf("expected :=") 379 } 380 l.emit(itemColonEquals) 381 case r == '|': 382 l.emit(itemPipe) 383 case r == '"': 384 return lexQuote 385 case r == '`': 386 return lexRawQuote 387 case r == '$': 388 return lexVariable 389 case r == '\'': 390 return lexChar 391 case r == '.': 392 // special look-ahead for ".field" so we don't break l.backup(). 393 if l.pos < Pos(len(l.input)) { 394 r := l.input[l.pos] 395 if r < '0' || '9' < r { 396 return lexField 397 } 398 } 399 fallthrough // '.' can start a number. 400 case r == '+' || r == '-' || ('0' <= r && r <= '9'): 401 l.backup() 402 return lexNumber 403 case isAlphaNumeric(r): 404 l.backup() 405 return lexIdentifier 406 case r == '(': 407 l.emit(itemLeftParen) 408 l.parenDepth++ 409 case r == ')': 410 l.emit(itemRightParen) 411 l.parenDepth-- 412 if l.parenDepth < 0 { 413 return l.errorf("unexpected right paren %#U", r) 414 } 415 case r <= unicode.MaxASCII && unicode.IsPrint(r): 416 l.emit(itemChar) 417 return lexInsideAction 418 default: 419 return l.errorf("unrecognized character in action: %#U", r) 420 } 421 return lexInsideAction 422 } 423 424 // lexSpace scans a run of space characters. 425 // One space has already been seen. 426 func lexSpace(l *lexer) stateFn { 427 for isSpace(l.peek()) { 428 l.next() 429 } 430 l.emit(itemSpace) 431 return lexInsideAction 432 } 433 434 // lexIdentifier scans an alphanumeric. 435 func lexIdentifier(l *lexer) stateFn { 436 Loop: 437 for { 438 switch r := l.next(); { 439 case isAlphaNumeric(r): 440 // absorb. 441 default: 442 l.backup() 443 word := l.input[l.start:l.pos] 444 if !l.atTerminator() { 445 return l.errorf("bad character %#U", r) 446 } 447 switch { 448 case key[word] > itemKeyword: 449 l.emit(key[word]) 450 case word[0] == '.': 451 l.emit(itemField) 452 case word == "true", word == "false": 453 l.emit(itemBool) 454 default: 455 l.emit(itemIdentifier) 456 } 457 break Loop 458 } 459 } 460 return lexInsideAction 461 } 462 463 // lexField scans a field: .Alphanumeric. 464 // The . has been scanned. 465 func lexField(l *lexer) stateFn { 466 return lexFieldOrVariable(l, itemField) 467 } 468 469 // lexVariable scans a Variable: $Alphanumeric. 470 // The $ has been scanned. 471 func lexVariable(l *lexer) stateFn { 472 if l.atTerminator() { // Nothing interesting follows -> "$". 473 l.emit(itemVariable) 474 return lexInsideAction 475 } 476 return lexFieldOrVariable(l, itemVariable) 477 } 478 479 // lexVariable scans a field or variable: [.$]Alphanumeric. 480 // The . or $ has been scanned. 481 func lexFieldOrVariable(l *lexer, typ itemType) stateFn { 482 if l.atTerminator() { // Nothing interesting follows -> "." or "$". 483 if typ == itemVariable { 484 l.emit(itemVariable) 485 } else { 486 l.emit(itemDot) 487 } 488 return lexInsideAction 489 } 490 var r rune 491 for { 492 r = l.next() 493 if !isAlphaNumeric(r) { 494 l.backup() 495 break 496 } 497 } 498 if !l.atTerminator() { 499 return l.errorf("bad character %#U", r) 500 } 501 l.emit(typ) 502 return lexInsideAction 503 } 504 505 // atTerminator reports whether the input is at valid termination character to 506 // appear after an identifier. Breaks .X.Y into two pieces. Also catches cases 507 // like "$x+2" not being acceptable without a space, in case we decide one 508 // day to implement arithmetic. 509 func (l *lexer) atTerminator() bool { 510 r := l.peek() 511 if isSpace(r) || isEndOfLine(r) { 512 return true 513 } 514 switch r { 515 case eof, '.', ',', '|', ':', ')', '(': 516 return true 517 } 518 // Does r start the delimiter? This can be ambiguous (with delim=="//", $x/2 will 519 // succeed but should fail) but only in extremely rare cases caused by willfully 520 // bad choice of delimiter. 521 if rd, _ := utf8.DecodeRuneInString(l.rightDelim); rd == r { 522 return true 523 } 524 return false 525 } 526 527 // lexChar scans a character constant. The initial quote is already 528 // scanned. Syntax checking is done by the parser. 529 func lexChar(l *lexer) stateFn { 530 Loop: 531 for { 532 switch l.next() { 533 case '\\': 534 if r := l.next(); r != eof && r != '\n' { 535 break 536 } 537 fallthrough 538 case eof, '\n': 539 return l.errorf("unterminated character constant") 540 case '\'': 541 break Loop 542 } 543 } 544 l.emit(itemCharConstant) 545 return lexInsideAction 546 } 547 548 // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This 549 // isn't a perfect number scanner - for instance it accepts "." and "0x0.2" 550 // and "089" - but when it's wrong the input is invalid and the parser (via 551 // strconv) will notice. 552 func lexNumber(l *lexer) stateFn { 553 if !l.scanNumber() { 554 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) 555 } 556 if sign := l.peek(); sign == '+' || sign == '-' { 557 // Complex: 1+2i. No spaces, must end in 'i'. 558 if !l.scanNumber() || l.input[l.pos-1] != 'i' { 559 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) 560 } 561 l.emit(itemComplex) 562 } else { 563 l.emit(itemNumber) 564 } 565 return lexInsideAction 566 } 567 568 func (l *lexer) scanNumber() bool { 569 // Optional leading sign. 570 l.accept("+-") 571 // Is it hex? 572 digits := "0123456789" 573 if l.accept("0") && l.accept("xX") { 574 digits = "0123456789abcdefABCDEF" 575 } 576 l.acceptRun(digits) 577 if l.accept(".") { 578 l.acceptRun(digits) 579 } 580 if l.accept("eE") { 581 l.accept("+-") 582 l.acceptRun("0123456789") 583 } 584 // Is it imaginary? 585 l.accept("i") 586 // Next thing mustn't be alphanumeric. 587 if isAlphaNumeric(l.peek()) { 588 l.next() 589 return false 590 } 591 return true 592 } 593 594 // lexQuote scans a quoted string. 595 func lexQuote(l *lexer) stateFn { 596 Loop: 597 for { 598 switch l.next() { 599 case '\\': 600 if r := l.next(); r != eof && r != '\n' { 601 break 602 } 603 fallthrough 604 case eof, '\n': 605 return l.errorf("unterminated quoted string") 606 case '"': 607 break Loop 608 } 609 } 610 l.emit(itemString) 611 return lexInsideAction 612 } 613 614 // lexRawQuote scans a raw quoted string. 615 func lexRawQuote(l *lexer) stateFn { 616 Loop: 617 for { 618 switch l.next() { 619 case eof: 620 return l.errorf("unterminated raw quoted string") 621 case '`': 622 break Loop 623 } 624 } 625 l.emit(itemRawString) 626 return lexInsideAction 627 } 628 629 // isSpace reports whether r is a space character. 630 func isSpace(r rune) bool { 631 return r == ' ' || r == '\t' 632 } 633 634 // isEndOfLine reports whether r is an end-of-line character. 635 func isEndOfLine(r rune) bool { 636 return r == '\r' || r == '\n' 637 } 638 639 // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. 640 func isAlphaNumeric(r rune) bool { 641 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) 642 }