github.com/megatontech/mynoteforgo@v0.0.0-20200507084910-5d0c6ea6e890/源码/text/template/parse/lex.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package parse 6 7 import ( 8 "fmt" 9 "strings" 10 "unicode" 11 "unicode/utf8" 12 ) 13 14 // item represents a token or text string returned from the scanner. 15 type item struct { 16 typ itemType // The type of this item. 17 pos Pos // The starting position, in bytes, of this item in the input string. 18 val string // The value of this item. 19 line int // The line number at the start of this item. 20 } 21 22 func (i item) String() string { 23 switch { 24 case i.typ == itemEOF: 25 return "EOF" 26 case i.typ == itemError: 27 return i.val 28 case i.typ > itemKeyword: 29 return fmt.Sprintf("<%s>", i.val) 30 case len(i.val) > 10: 31 return fmt.Sprintf("%.10q...", i.val) 32 } 33 return fmt.Sprintf("%q", i.val) 34 } 35 36 // itemType identifies the type of lex items. 37 type itemType int 38 39 const ( 40 itemError itemType = iota // error occurred; value is text of error 41 itemBool // boolean constant 42 itemChar // printable ASCII character; grab bag for comma etc. 43 itemCharConstant // character constant 44 itemComplex // complex constant (1+2i); imaginary is just a number 45 itemAssign // equals ('=') introducing an assignment 46 itemDeclare // colon-equals (':=') introducing a declaration 47 itemEOF 48 itemField // alphanumeric identifier starting with '.' 49 itemIdentifier // alphanumeric identifier not starting with '.' 50 itemLeftDelim // left action delimiter 51 itemLeftParen // '(' inside action 52 itemNumber // simple number, including imaginary 53 itemPipe // pipe symbol 54 itemRawString // raw quoted string (includes quotes) 55 itemRightDelim // right action delimiter 56 itemRightParen // ')' inside action 57 itemSpace // run of spaces separating arguments 58 itemString // quoted string (includes quotes) 59 itemText // plain text 60 itemVariable // variable starting with '$', such as '$' or '$1' or '$hello' 61 // Keywords appear after all the rest. 62 itemKeyword // used only to delimit the keywords 63 itemBlock // block keyword 64 itemDot // the cursor, spelled '.' 65 itemDefine // define keyword 66 itemElse // else keyword 67 itemEnd // end keyword 68 itemIf // if keyword 69 itemNil // the untyped nil constant, easiest to treat as a keyword 70 itemRange // range keyword 71 itemTemplate // template keyword 72 itemWith // with keyword 73 ) 74 75 var key = map[string]itemType{ 76 ".": itemDot, 77 "block": itemBlock, 78 "define": itemDefine, 79 "else": itemElse, 80 "end": itemEnd, 81 "if": itemIf, 82 "range": itemRange, 83 "nil": itemNil, 84 "template": itemTemplate, 85 "with": itemWith, 86 } 87 88 const eof = -1 89 90 // Trimming spaces. 91 // If the action begins "{{- " rather than "{{", then all space/tab/newlines 92 // preceding the action are trimmed; conversely if it ends " -}}" the 93 // leading spaces are trimmed. This is done entirely in the lexer; the 94 // parser never sees it happen. We require an ASCII space to be 95 // present to avoid ambiguity with things like "{{-3}}". It reads 96 // better with the space present anyway. For simplicity, only ASCII 97 // space does the job. 98 const ( 99 spaceChars = " \t\r\n" // These are the space characters defined by Go itself. 100 leftTrimMarker = "- " // Attached to left delimiter, trims trailing spaces from preceding text. 101 rightTrimMarker = " -" // Attached to right delimiter, trims leading spaces from following text. 102 trimMarkerLen = Pos(len(leftTrimMarker)) 103 ) 104 105 // stateFn represents the state of the scanner as a function that returns the next state. 106 type stateFn func(*lexer) stateFn 107 108 // lexer holds the state of the scanner. 109 type lexer struct { 110 name string // the name of the input; used only for error reports 111 input string // the string being scanned 112 leftDelim string // start of action 113 rightDelim string // end of action 114 pos Pos // current position in the input 115 start Pos // start position of this item 116 width Pos // width of last rune read from input 117 items chan item // channel of scanned items 118 parenDepth int // nesting depth of ( ) exprs 119 line int // 1+number of newlines seen 120 startLine int // start line of this item 121 } 122 123 // next returns the next rune in the input. 124 func (l *lexer) next() rune { 125 if int(l.pos) >= len(l.input) { 126 l.width = 0 127 return eof 128 } 129 r, w := utf8.DecodeRuneInString(l.input[l.pos:]) 130 l.width = Pos(w) 131 l.pos += l.width 132 if r == '\n' { 133 l.line++ 134 } 135 return r 136 } 137 138 // peek returns but does not consume the next rune in the input. 139 func (l *lexer) peek() rune { 140 r := l.next() 141 l.backup() 142 return r 143 } 144 145 // backup steps back one rune. Can only be called once per call of next. 146 func (l *lexer) backup() { 147 l.pos -= l.width 148 // Correct newline count. 149 if l.width == 1 && l.input[l.pos] == '\n' { 150 l.line-- 151 } 152 } 153 154 // emit passes an item back to the client. 155 func (l *lexer) emit(t itemType) { 156 l.items <- item{t, l.start, l.input[l.start:l.pos], l.startLine} 157 l.start = l.pos 158 l.startLine = l.line 159 } 160 161 // ignore skips over the pending input before this point. 162 func (l *lexer) ignore() { 163 l.line += strings.Count(l.input[l.start:l.pos], "\n") 164 l.start = l.pos 165 l.startLine = l.line 166 } 167 168 // accept consumes the next rune if it's from the valid set. 169 func (l *lexer) accept(valid string) bool { 170 if strings.ContainsRune(valid, l.next()) { 171 return true 172 } 173 l.backup() 174 return false 175 } 176 177 // acceptRun consumes a run of runes from the valid set. 178 func (l *lexer) acceptRun(valid string) { 179 for strings.ContainsRune(valid, l.next()) { 180 } 181 l.backup() 182 } 183 184 // errorf returns an error token and terminates the scan by passing 185 // back a nil pointer that will be the next state, terminating l.nextItem. 186 func (l *lexer) errorf(format string, args ...interface{}) stateFn { 187 l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine} 188 return nil 189 } 190 191 // nextItem returns the next item from the input. 192 // Called by the parser, not in the lexing goroutine. 193 func (l *lexer) nextItem() item { 194 return <-l.items 195 } 196 197 // drain drains the output so the lexing goroutine will exit. 198 // Called by the parser, not in the lexing goroutine. 199 func (l *lexer) drain() { 200 for range l.items { 201 } 202 } 203 204 // lex creates a new scanner for the input string. 205 func lex(name, input, left, right string) *lexer { 206 if left == "" { 207 left = leftDelim 208 } 209 if right == "" { 210 right = rightDelim 211 } 212 l := &lexer{ 213 name: name, 214 input: input, 215 leftDelim: left, 216 rightDelim: right, 217 items: make(chan item), 218 line: 1, 219 startLine: 1, 220 } 221 go l.run() 222 return l 223 } 224 225 // run runs the state machine for the lexer. 226 func (l *lexer) run() { 227 for state := lexText; state != nil; { 228 state = state(l) 229 } 230 close(l.items) 231 } 232 233 // state functions 234 235 const ( 236 leftDelim = "{{" 237 rightDelim = "}}" 238 leftComment = "/*" 239 rightComment = "*/" 240 ) 241 242 // lexText scans until an opening action delimiter, "{{". 243 func lexText(l *lexer) stateFn { 244 l.width = 0 245 if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 { 246 ldn := Pos(len(l.leftDelim)) 247 l.pos += Pos(x) 248 trimLength := Pos(0) 249 if strings.HasPrefix(l.input[l.pos+ldn:], leftTrimMarker) { 250 trimLength = rightTrimLength(l.input[l.start:l.pos]) 251 } 252 l.pos -= trimLength 253 if l.pos > l.start { 254 l.line += strings.Count(l.input[l.start:l.pos], "\n") 255 l.emit(itemText) 256 } 257 l.pos += trimLength 258 l.ignore() 259 return lexLeftDelim 260 } 261 l.pos = Pos(len(l.input)) 262 // Correctly reached EOF. 263 if l.pos > l.start { 264 l.line += strings.Count(l.input[l.start:l.pos], "\n") 265 l.emit(itemText) 266 } 267 l.emit(itemEOF) 268 return nil 269 } 270 271 // rightTrimLength returns the length of the spaces at the end of the string. 272 func rightTrimLength(s string) Pos { 273 return Pos(len(s) - len(strings.TrimRight(s, spaceChars))) 274 } 275 276 // atRightDelim reports whether the lexer is at a right delimiter, possibly preceded by a trim marker. 277 func (l *lexer) atRightDelim() (delim, trimSpaces bool) { 278 if strings.HasPrefix(l.input[l.pos:], l.rightDelim) { 279 return true, false 280 } 281 // The right delim might have the marker before. 282 if strings.HasPrefix(l.input[l.pos:], rightTrimMarker) && 283 strings.HasPrefix(l.input[l.pos+trimMarkerLen:], l.rightDelim) { 284 return true, true 285 } 286 return false, false 287 } 288 289 // leftTrimLength returns the length of the spaces at the beginning of the string. 290 func leftTrimLength(s string) Pos { 291 return Pos(len(s) - len(strings.TrimLeft(s, spaceChars))) 292 } 293 294 // lexLeftDelim scans the left delimiter, which is known to be present, possibly with a trim marker. 295 func lexLeftDelim(l *lexer) stateFn { 296 l.pos += Pos(len(l.leftDelim)) 297 trimSpace := strings.HasPrefix(l.input[l.pos:], leftTrimMarker) 298 afterMarker := Pos(0) 299 if trimSpace { 300 afterMarker = trimMarkerLen 301 } 302 if strings.HasPrefix(l.input[l.pos+afterMarker:], leftComment) { 303 l.pos += afterMarker 304 l.ignore() 305 return lexComment 306 } 307 l.emit(itemLeftDelim) 308 l.pos += afterMarker 309 l.ignore() 310 l.parenDepth = 0 311 return lexInsideAction 312 } 313 314 // lexComment scans a comment. The left comment marker is known to be present. 315 func lexComment(l *lexer) stateFn { 316 l.pos += Pos(len(leftComment)) 317 i := strings.Index(l.input[l.pos:], rightComment) 318 if i < 0 { 319 return l.errorf("unclosed comment") 320 } 321 l.pos += Pos(i + len(rightComment)) 322 delim, trimSpace := l.atRightDelim() 323 if !delim { 324 return l.errorf("comment ends before closing delimiter") 325 } 326 if trimSpace { 327 l.pos += trimMarkerLen 328 } 329 l.pos += Pos(len(l.rightDelim)) 330 if trimSpace { 331 l.pos += leftTrimLength(l.input[l.pos:]) 332 } 333 l.ignore() 334 return lexText 335 } 336 337 // lexRightDelim scans the right delimiter, which is known to be present, possibly with a trim marker. 338 func lexRightDelim(l *lexer) stateFn { 339 trimSpace := strings.HasPrefix(l.input[l.pos:], rightTrimMarker) 340 if trimSpace { 341 l.pos += trimMarkerLen 342 l.ignore() 343 } 344 l.pos += Pos(len(l.rightDelim)) 345 l.emit(itemRightDelim) 346 if trimSpace { 347 l.pos += leftTrimLength(l.input[l.pos:]) 348 l.ignore() 349 } 350 return lexText 351 } 352 353 // lexInsideAction scans the elements inside action delimiters. 354 func lexInsideAction(l *lexer) stateFn { 355 // Either number, quoted string, or identifier. 356 // Spaces separate arguments; runs of spaces turn into itemSpace. 357 // Pipe symbols separate and are emitted. 358 delim, _ := l.atRightDelim() 359 if delim { 360 if l.parenDepth == 0 { 361 return lexRightDelim 362 } 363 return l.errorf("unclosed left paren") 364 } 365 switch r := l.next(); { 366 case r == eof || isEndOfLine(r): 367 return l.errorf("unclosed action") 368 case isSpace(r): 369 return lexSpace 370 case r == '=': 371 l.emit(itemAssign) 372 case r == ':': 373 if l.next() != '=' { 374 return l.errorf("expected :=") 375 } 376 l.emit(itemDeclare) 377 case r == '|': 378 l.emit(itemPipe) 379 case r == '"': 380 return lexQuote 381 case r == '`': 382 return lexRawQuote 383 case r == '$': 384 return lexVariable 385 case r == '\'': 386 return lexChar 387 case r == '.': 388 // special look-ahead for ".field" so we don't break l.backup(). 389 if l.pos < Pos(len(l.input)) { 390 r := l.input[l.pos] 391 if r < '0' || '9' < r { 392 return lexField 393 } 394 } 395 fallthrough // '.' can start a number. 396 case r == '+' || r == '-' || ('0' <= r && r <= '9'): 397 l.backup() 398 return lexNumber 399 case isAlphaNumeric(r): 400 l.backup() 401 return lexIdentifier 402 case r == '(': 403 l.emit(itemLeftParen) 404 l.parenDepth++ 405 case r == ')': 406 l.emit(itemRightParen) 407 l.parenDepth-- 408 if l.parenDepth < 0 { 409 return l.errorf("unexpected right paren %#U", r) 410 } 411 case r <= unicode.MaxASCII && unicode.IsPrint(r): 412 l.emit(itemChar) 413 return lexInsideAction 414 default: 415 return l.errorf("unrecognized character in action: %#U", r) 416 } 417 return lexInsideAction 418 } 419 420 // lexSpace scans a run of space characters. 421 // One space has already been seen. 422 func lexSpace(l *lexer) stateFn { 423 for isSpace(l.peek()) { 424 l.next() 425 } 426 l.emit(itemSpace) 427 return lexInsideAction 428 } 429 430 // lexIdentifier scans an alphanumeric. 431 func lexIdentifier(l *lexer) stateFn { 432 Loop: 433 for { 434 switch r := l.next(); { 435 case isAlphaNumeric(r): 436 // absorb. 437 default: 438 l.backup() 439 word := l.input[l.start:l.pos] 440 if !l.atTerminator() { 441 return l.errorf("bad character %#U", r) 442 } 443 switch { 444 case key[word] > itemKeyword: 445 l.emit(key[word]) 446 case word[0] == '.': 447 l.emit(itemField) 448 case word == "true", word == "false": 449 l.emit(itemBool) 450 default: 451 l.emit(itemIdentifier) 452 } 453 break Loop 454 } 455 } 456 return lexInsideAction 457 } 458 459 // lexField scans a field: .Alphanumeric. 460 // The . has been scanned. 461 func lexField(l *lexer) stateFn { 462 return lexFieldOrVariable(l, itemField) 463 } 464 465 // lexVariable scans a Variable: $Alphanumeric. 466 // The $ has been scanned. 467 func lexVariable(l *lexer) stateFn { 468 if l.atTerminator() { // Nothing interesting follows -> "$". 469 l.emit(itemVariable) 470 return lexInsideAction 471 } 472 return lexFieldOrVariable(l, itemVariable) 473 } 474 475 // lexVariable scans a field or variable: [.$]Alphanumeric. 476 // The . or $ has been scanned. 477 func lexFieldOrVariable(l *lexer, typ itemType) stateFn { 478 if l.atTerminator() { // Nothing interesting follows -> "." or "$". 479 if typ == itemVariable { 480 l.emit(itemVariable) 481 } else { 482 l.emit(itemDot) 483 } 484 return lexInsideAction 485 } 486 var r rune 487 for { 488 r = l.next() 489 if !isAlphaNumeric(r) { 490 l.backup() 491 break 492 } 493 } 494 if !l.atTerminator() { 495 return l.errorf("bad character %#U", r) 496 } 497 l.emit(typ) 498 return lexInsideAction 499 } 500 501 // atTerminator reports whether the input is at valid termination character to 502 // appear after an identifier. Breaks .X.Y into two pieces. Also catches cases 503 // like "$x+2" not being acceptable without a space, in case we decide one 504 // day to implement arithmetic. 505 func (l *lexer) atTerminator() bool { 506 r := l.peek() 507 if isSpace(r) || isEndOfLine(r) { 508 return true 509 } 510 switch r { 511 case eof, '.', ',', '|', ':', ')', '(': 512 return true 513 } 514 // Does r start the delimiter? This can be ambiguous (with delim=="//", $x/2 will 515 // succeed but should fail) but only in extremely rare cases caused by willfully 516 // bad choice of delimiter. 517 if rd, _ := utf8.DecodeRuneInString(l.rightDelim); rd == r { 518 return true 519 } 520 return false 521 } 522 523 // lexChar scans a character constant. The initial quote is already 524 // scanned. Syntax checking is done by the parser. 525 func lexChar(l *lexer) stateFn { 526 Loop: 527 for { 528 switch l.next() { 529 case '\\': 530 if r := l.next(); r != eof && r != '\n' { 531 break 532 } 533 fallthrough 534 case eof, '\n': 535 return l.errorf("unterminated character constant") 536 case '\'': 537 break Loop 538 } 539 } 540 l.emit(itemCharConstant) 541 return lexInsideAction 542 } 543 544 // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This 545 // isn't a perfect number scanner - for instance it accepts "." and "0x0.2" 546 // and "089" - but when it's wrong the input is invalid and the parser (via 547 // strconv) will notice. 548 func lexNumber(l *lexer) stateFn { 549 if !l.scanNumber() { 550 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) 551 } 552 if sign := l.peek(); sign == '+' || sign == '-' { 553 // Complex: 1+2i. No spaces, must end in 'i'. 554 if !l.scanNumber() || l.input[l.pos-1] != 'i' { 555 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) 556 } 557 l.emit(itemComplex) 558 } else { 559 l.emit(itemNumber) 560 } 561 return lexInsideAction 562 } 563 564 func (l *lexer) scanNumber() bool { 565 // Optional leading sign. 566 l.accept("+-") 567 // Is it hex? 568 digits := "0123456789" 569 if l.accept("0") && l.accept("xX") { 570 digits = "0123456789abcdefABCDEF" 571 } 572 l.acceptRun(digits) 573 if l.accept(".") { 574 l.acceptRun(digits) 575 } 576 if l.accept("eE") { 577 l.accept("+-") 578 l.acceptRun("0123456789") 579 } 580 // Is it imaginary? 581 l.accept("i") 582 // Next thing mustn't be alphanumeric. 583 if isAlphaNumeric(l.peek()) { 584 l.next() 585 return false 586 } 587 return true 588 } 589 590 // lexQuote scans a quoted string. 591 func lexQuote(l *lexer) stateFn { 592 Loop: 593 for { 594 switch l.next() { 595 case '\\': 596 if r := l.next(); r != eof && r != '\n' { 597 break 598 } 599 fallthrough 600 case eof, '\n': 601 return l.errorf("unterminated quoted string") 602 case '"': 603 break Loop 604 } 605 } 606 l.emit(itemString) 607 return lexInsideAction 608 } 609 610 // lexRawQuote scans a raw quoted string. 611 func lexRawQuote(l *lexer) stateFn { 612 Loop: 613 for { 614 switch l.next() { 615 case eof: 616 return l.errorf("unterminated raw quoted string") 617 case '`': 618 break Loop 619 } 620 } 621 l.emit(itemRawString) 622 return lexInsideAction 623 } 624 625 // isSpace reports whether r is a space character. 626 func isSpace(r rune) bool { 627 return r == ' ' || r == '\t' 628 } 629 630 // isEndOfLine reports whether r is an end-of-line character. 631 func isEndOfLine(r rune) bool { 632 return r == '\r' || r == '\n' 633 } 634 635 // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. 636 func isAlphaNumeric(r rune) bool { 637 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) 638 }