github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/text/template/parse/lex.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package parse 6 7 import ( 8 "fmt" 9 "strings" 10 "unicode" 11 "unicode/utf8" 12 ) 13 14 // item represents a token or text string returned from the scanner. 15 type item struct { 16 typ itemType // The type of this item. 17 pos Pos // The starting position, in bytes, of this item in the input string. 18 val string // The value of this item. 19 line int // The line number at the start of this item. 20 } 21 22 func (i item) String() string { 23 switch { 24 case i.typ == itemEOF: 25 return "EOF" 26 case i.typ == itemError: 27 return i.val 28 case i.typ > itemKeyword: 29 return fmt.Sprintf("<%s>", i.val) 30 case len(i.val) > 10: 31 return fmt.Sprintf("%.10q...", i.val) 32 } 33 return fmt.Sprintf("%q", i.val) 34 } 35 36 // itemType identifies the type of lex items. 37 type itemType int 38 39 const ( 40 itemError itemType = iota // error occurred; value is text of error 41 itemBool // boolean constant 42 itemChar // printable ASCII character; grab bag for comma etc. 43 itemCharConstant // character constant 44 itemComment // comment text 45 itemComplex // complex constant (1+2i); imaginary is just a number 46 itemAssign // equals ('=') introducing an assignment 47 itemDeclare // colon-equals (':=') introducing a declaration 48 itemEOF 49 itemField // alphanumeric identifier starting with '.' 50 itemIdentifier // alphanumeric identifier not starting with '.' 51 itemLeftDelim // left action delimiter 52 itemLeftParen // '(' inside action 53 itemNumber // simple number, including imaginary 54 itemPipe // pipe symbol 55 itemRawString // raw quoted string (includes quotes) 56 itemRightDelim // right action delimiter 57 itemRightParen // ')' inside action 58 itemSpace // run of spaces separating arguments 59 itemString // quoted string (includes quotes) 60 itemText // plain text 61 itemVariable // variable starting with '$', such as '$' or '$1' or '$hello' 62 // Keywords appear after all the rest. 63 itemKeyword // used only to delimit the keywords 64 itemBlock // block keyword 65 itemBreak // break keyword 66 itemContinue // continue keyword 67 itemDot // the cursor, spelled '.' 68 itemDefine // define keyword 69 itemElse // else keyword 70 itemEnd // end keyword 71 itemIf // if keyword 72 itemNil // the untyped nil constant, easiest to treat as a keyword 73 itemRange // range keyword 74 itemTemplate // template keyword 75 itemWith // with keyword 76 ) 77 78 var key = map[string]itemType{ 79 ".": itemDot, 80 "block": itemBlock, 81 "break": itemBreak, 82 "continue": itemContinue, 83 "define": itemDefine, 84 "else": itemElse, 85 "end": itemEnd, 86 "if": itemIf, 87 "range": itemRange, 88 "nil": itemNil, 89 "template": itemTemplate, 90 "with": itemWith, 91 } 92 93 const eof = -1 94 95 // Trimming spaces. 96 // If the action begins "{{- " rather than "{{", then all space/tab/newlines 97 // preceding the action are trimmed; conversely if it ends " -}}" the 98 // leading spaces are trimmed. This is done entirely in the lexer; the 99 // parser never sees it happen. We require an ASCII space (' ', \t, \r, \n) 100 // to be present to avoid ambiguity with things like "{{-3}}". It reads 101 // better with the space present anyway. For simplicity, only ASCII 102 // does the job. 103 const ( 104 spaceChars = " \t\r\n" // These are the space characters defined by Go itself. 105 trimMarker = '-' // Attached to left/right delimiter, trims trailing spaces from preceding/following text. 106 trimMarkerLen = Pos(1 + 1) // marker plus space before or after 107 ) 108 109 // stateFn represents the state of the scanner as a function that returns the next state. 110 type stateFn func(*lexer) stateFn 111 112 // lexer holds the state of the scanner. 113 type lexer struct { 114 name string // the name of the input; used only for error reports 115 input string // the string being scanned 116 leftDelim string // start of action marker 117 rightDelim string // end of action marker 118 pos Pos // current position in the input 119 start Pos // start position of this item 120 atEOF bool // we have hit the end of input and returned eof 121 parenDepth int // nesting depth of ( ) exprs 122 line int // 1+number of newlines seen 123 startLine int // start line of this item 124 item item // item to return to parser 125 insideAction bool // are we inside an action? 126 options lexOptions 127 } 128 129 // lexOptions control behavior of the lexer. All default to false. 130 type lexOptions struct { 131 emitComment bool // emit itemComment tokens. 132 breakOK bool // break keyword allowed 133 continueOK bool // continue keyword allowed 134 } 135 136 // next returns the next rune in the input. 137 func (l *lexer) next() rune { 138 if int(l.pos) >= len(l.input) { 139 l.atEOF = true 140 return eof 141 } 142 r, w := utf8.DecodeRuneInString(l.input[l.pos:]) 143 l.pos += Pos(w) 144 if r == '\n' { 145 l.line++ 146 } 147 return r 148 } 149 150 // peek returns but does not consume the next rune in the input. 151 func (l *lexer) peek() rune { 152 r := l.next() 153 l.backup() 154 return r 155 } 156 157 // backup steps back one rune. 158 func (l *lexer) backup() { 159 if !l.atEOF && l.pos > 0 { 160 r, w := utf8.DecodeLastRuneInString(l.input[:l.pos]) 161 l.pos -= Pos(w) 162 // Correct newline count. 163 if r == '\n' { 164 l.line-- 165 } 166 } 167 } 168 169 // thisItem returns the item at the current input point with the specified type 170 // and advances the input. 171 func (l *lexer) thisItem(t itemType) item { 172 i := item{t, l.start, l.input[l.start:l.pos], l.startLine} 173 l.start = l.pos 174 l.startLine = l.line 175 return i 176 } 177 178 // emit passes the trailing text as an item back to the parser. 179 func (l *lexer) emit(t itemType) stateFn { 180 return l.emitItem(l.thisItem(t)) 181 } 182 183 // emitItem passes the specified item to the parser. 184 func (l *lexer) emitItem(i item) stateFn { 185 l.item = i 186 return nil 187 } 188 189 // ignore skips over the pending input before this point. 190 // It tracks newlines in the ignored text, so use it only 191 // for text that is skipped without calling l.next. 192 func (l *lexer) ignore() { 193 l.line += strings.Count(l.input[l.start:l.pos], "\n") 194 l.start = l.pos 195 l.startLine = l.line 196 } 197 198 // accept consumes the next rune if it's from the valid set. 199 func (l *lexer) accept(valid string) bool { 200 if strings.ContainsRune(valid, l.next()) { 201 return true 202 } 203 l.backup() 204 return false 205 } 206 207 // acceptRun consumes a run of runes from the valid set. 208 func (l *lexer) acceptRun(valid string) { 209 for strings.ContainsRune(valid, l.next()) { 210 } 211 l.backup() 212 } 213 214 // errorf returns an error token and terminates the scan by passing 215 // back a nil pointer that will be the next state, terminating l.nextItem. 216 func (l *lexer) errorf(format string, args ...any) stateFn { 217 l.item = item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine} 218 l.start = 0 219 l.pos = 0 220 l.input = l.input[:0] 221 return nil 222 } 223 224 // nextItem returns the next item from the input. 225 // Called by the parser, not in the lexing goroutine. 226 func (l *lexer) nextItem() item { 227 l.item = item{itemEOF, l.pos, "EOF", l.startLine} 228 state := lexText 229 if l.insideAction { 230 state = lexInsideAction 231 } 232 for { 233 state = state(l) 234 if state == nil { 235 return l.item 236 } 237 } 238 } 239 240 // lex creates a new scanner for the input string. 241 func lex(name, input, left, right string) *lexer { 242 if left == "" { 243 left = leftDelim 244 } 245 if right == "" { 246 right = rightDelim 247 } 248 l := &lexer{ 249 name: name, 250 input: input, 251 leftDelim: left, 252 rightDelim: right, 253 line: 1, 254 startLine: 1, 255 insideAction: false, 256 } 257 return l 258 } 259 260 // state functions 261 262 const ( 263 leftDelim = "{{" 264 rightDelim = "}}" 265 leftComment = "/*" 266 rightComment = "*/" 267 ) 268 269 // lexText scans until an opening action delimiter, "{{". 270 func lexText(l *lexer) stateFn { 271 if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 { 272 if x > 0 { 273 l.pos += Pos(x) 274 // Do we trim any trailing space? 275 trimLength := Pos(0) 276 delimEnd := l.pos + Pos(len(l.leftDelim)) 277 if hasLeftTrimMarker(l.input[delimEnd:]) { 278 trimLength = rightTrimLength(l.input[l.start:l.pos]) 279 } 280 l.pos -= trimLength 281 l.line += strings.Count(l.input[l.start:l.pos], "\n") 282 i := l.thisItem(itemText) 283 l.pos += trimLength 284 l.ignore() 285 if len(i.val) > 0 { 286 return l.emitItem(i) 287 } 288 } 289 return lexLeftDelim 290 } 291 l.pos = Pos(len(l.input)) 292 // Correctly reached EOF. 293 if l.pos > l.start { 294 l.line += strings.Count(l.input[l.start:l.pos], "\n") 295 return l.emit(itemText) 296 } 297 return l.emit(itemEOF) 298 } 299 300 // rightTrimLength returns the length of the spaces at the end of the string. 301 func rightTrimLength(s string) Pos { 302 return Pos(len(s) - len(strings.TrimRight(s, spaceChars))) 303 } 304 305 // atRightDelim reports whether the lexer is at a right delimiter, possibly preceded by a trim marker. 306 func (l *lexer) atRightDelim() (delim, trimSpaces bool) { 307 if hasRightTrimMarker(l.input[l.pos:]) && strings.HasPrefix(l.input[l.pos+trimMarkerLen:], l.rightDelim) { // With trim marker. 308 return true, true 309 } 310 if strings.HasPrefix(l.input[l.pos:], l.rightDelim) { // Without trim marker. 311 return true, false 312 } 313 return false, false 314 } 315 316 // leftTrimLength returns the length of the spaces at the beginning of the string. 317 func leftTrimLength(s string) Pos { 318 return Pos(len(s) - len(strings.TrimLeft(s, spaceChars))) 319 } 320 321 // lexLeftDelim scans the left delimiter, which is known to be present, possibly with a trim marker. 322 // (The text to be trimmed has already been emitted.) 323 func lexLeftDelim(l *lexer) stateFn { 324 l.pos += Pos(len(l.leftDelim)) 325 trimSpace := hasLeftTrimMarker(l.input[l.pos:]) 326 afterMarker := Pos(0) 327 if trimSpace { 328 afterMarker = trimMarkerLen 329 } 330 if strings.HasPrefix(l.input[l.pos+afterMarker:], leftComment) { 331 l.pos += afterMarker 332 l.ignore() 333 return lexComment 334 } 335 i := l.thisItem(itemLeftDelim) 336 l.insideAction = true 337 l.pos += afterMarker 338 l.ignore() 339 l.parenDepth = 0 340 return l.emitItem(i) 341 } 342 343 // lexComment scans a comment. The left comment marker is known to be present. 344 func lexComment(l *lexer) stateFn { 345 l.pos += Pos(len(leftComment)) 346 x := strings.Index(l.input[l.pos:], rightComment) 347 if x < 0 { 348 return l.errorf("unclosed comment") 349 } 350 l.pos += Pos(x + len(rightComment)) 351 delim, trimSpace := l.atRightDelim() 352 if !delim { 353 return l.errorf("comment ends before closing delimiter") 354 } 355 i := l.thisItem(itemComment) 356 if trimSpace { 357 l.pos += trimMarkerLen 358 } 359 l.pos += Pos(len(l.rightDelim)) 360 if trimSpace { 361 l.pos += leftTrimLength(l.input[l.pos:]) 362 } 363 l.ignore() 364 if l.options.emitComment { 365 return l.emitItem(i) 366 } 367 return lexText 368 } 369 370 // lexRightDelim scans the right delimiter, which is known to be present, possibly with a trim marker. 371 func lexRightDelim(l *lexer) stateFn { 372 _, trimSpace := l.atRightDelim() 373 if trimSpace { 374 l.pos += trimMarkerLen 375 l.ignore() 376 } 377 l.pos += Pos(len(l.rightDelim)) 378 i := l.thisItem(itemRightDelim) 379 if trimSpace { 380 l.pos += leftTrimLength(l.input[l.pos:]) 381 l.ignore() 382 } 383 l.insideAction = false 384 return l.emitItem(i) 385 } 386 387 // lexInsideAction scans the elements inside action delimiters. 388 func lexInsideAction(l *lexer) stateFn { 389 // Either number, quoted string, or identifier. 390 // Spaces separate arguments; runs of spaces turn into itemSpace. 391 // Pipe symbols separate and are emitted. 392 delim, _ := l.atRightDelim() 393 if delim { 394 if l.parenDepth == 0 { 395 return lexRightDelim 396 } 397 return l.errorf("unclosed left paren") 398 } 399 switch r := l.next(); { 400 case r == eof: 401 return l.errorf("unclosed action") 402 case isSpace(r): 403 l.backup() // Put space back in case we have " -}}". 404 return lexSpace 405 case r == '=': 406 return l.emit(itemAssign) 407 case r == ':': 408 if l.next() != '=' { 409 return l.errorf("expected :=") 410 } 411 return l.emit(itemDeclare) 412 case r == '|': 413 return l.emit(itemPipe) 414 case r == '"': 415 return lexQuote 416 case r == '`': 417 return lexRawQuote 418 case r == '$': 419 return lexVariable 420 case r == '\'': 421 return lexChar 422 case r == '.': 423 // special look-ahead for ".field" so we don't break l.backup(). 424 if l.pos < Pos(len(l.input)) { 425 r := l.input[l.pos] 426 if r < '0' || '9' < r { 427 return lexField 428 } 429 } 430 fallthrough // '.' can start a number. 431 case r == '+' || r == '-' || ('0' <= r && r <= '9'): 432 l.backup() 433 return lexNumber 434 case isAlphaNumeric(r): 435 l.backup() 436 return lexIdentifier 437 case r == '(': 438 l.parenDepth++ 439 return l.emit(itemLeftParen) 440 case r == ')': 441 l.parenDepth-- 442 if l.parenDepth < 0 { 443 return l.errorf("unexpected right paren") 444 } 445 return l.emit(itemRightParen) 446 case r <= unicode.MaxASCII && unicode.IsPrint(r): 447 return l.emit(itemChar) 448 default: 449 return l.errorf("unrecognized character in action: %#U", r) 450 } 451 } 452 453 // lexSpace scans a run of space characters. 454 // We have not consumed the first space, which is known to be present. 455 // Take care if there is a trim-marked right delimiter, which starts with a space. 456 func lexSpace(l *lexer) stateFn { 457 var r rune 458 var numSpaces int 459 for { 460 r = l.peek() 461 if !isSpace(r) { 462 break 463 } 464 l.next() 465 numSpaces++ 466 } 467 // Be careful about a trim-marked closing delimiter, which has a minus 468 // after a space. We know there is a space, so check for the '-' that might follow. 469 if hasRightTrimMarker(l.input[l.pos-1:]) && strings.HasPrefix(l.input[l.pos-1+trimMarkerLen:], l.rightDelim) { 470 l.backup() // Before the space. 471 if numSpaces == 1 { 472 return lexRightDelim // On the delim, so go right to that. 473 } 474 } 475 return l.emit(itemSpace) 476 } 477 478 // lexIdentifier scans an alphanumeric. 479 func lexIdentifier(l *lexer) stateFn { 480 for { 481 switch r := l.next(); { 482 case isAlphaNumeric(r): 483 // absorb. 484 default: 485 l.backup() 486 word := l.input[l.start:l.pos] 487 if !l.atTerminator() { 488 return l.errorf("bad character %#U", r) 489 } 490 switch { 491 case key[word] > itemKeyword: 492 item := key[word] 493 if item == itemBreak && !l.options.breakOK || item == itemContinue && !l.options.continueOK { 494 return l.emit(itemIdentifier) 495 } 496 return l.emit(item) 497 case word[0] == '.': 498 return l.emit(itemField) 499 case word == "true", word == "false": 500 return l.emit(itemBool) 501 default: 502 return l.emit(itemIdentifier) 503 } 504 } 505 } 506 } 507 508 // lexField scans a field: .Alphanumeric. 509 // The . has been scanned. 510 func lexField(l *lexer) stateFn { 511 return lexFieldOrVariable(l, itemField) 512 } 513 514 // lexVariable scans a Variable: $Alphanumeric. 515 // The $ has been scanned. 516 func lexVariable(l *lexer) stateFn { 517 if l.atTerminator() { // Nothing interesting follows -> "$". 518 return l.emit(itemVariable) 519 } 520 return lexFieldOrVariable(l, itemVariable) 521 } 522 523 // lexFieldOrVariable scans a field or variable: [.$]Alphanumeric. 524 // The . or $ has been scanned. 525 func lexFieldOrVariable(l *lexer, typ itemType) stateFn { 526 if l.atTerminator() { // Nothing interesting follows -> "." or "$". 527 if typ == itemVariable { 528 return l.emit(itemVariable) 529 } 530 return l.emit(itemDot) 531 } 532 var r rune 533 for { 534 r = l.next() 535 if !isAlphaNumeric(r) { 536 l.backup() 537 break 538 } 539 } 540 if !l.atTerminator() { 541 return l.errorf("bad character %#U", r) 542 } 543 return l.emit(typ) 544 } 545 546 // atTerminator reports whether the input is at valid termination character to 547 // appear after an identifier. Breaks .X.Y into two pieces. Also catches cases 548 // like "$x+2" not being acceptable without a space, in case we decide one 549 // day to implement arithmetic. 550 func (l *lexer) atTerminator() bool { 551 r := l.peek() 552 if isSpace(r) { 553 return true 554 } 555 switch r { 556 case eof, '.', ',', '|', ':', ')', '(': 557 return true 558 } 559 return strings.HasPrefix(l.input[l.pos:], l.rightDelim) 560 } 561 562 // lexChar scans a character constant. The initial quote is already 563 // scanned. Syntax checking is done by the parser. 564 func lexChar(l *lexer) stateFn { 565 Loop: 566 for { 567 switch l.next() { 568 case '\\': 569 if r := l.next(); r != eof && r != '\n' { 570 break 571 } 572 fallthrough 573 case eof, '\n': 574 return l.errorf("unterminated character constant") 575 case '\'': 576 break Loop 577 } 578 } 579 return l.emit(itemCharConstant) 580 } 581 582 // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This 583 // isn't a perfect number scanner - for instance it accepts "." and "0x0.2" 584 // and "089" - but when it's wrong the input is invalid and the parser (via 585 // strconv) will notice. 586 func lexNumber(l *lexer) stateFn { 587 if !l.scanNumber() { 588 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) 589 } 590 if sign := l.peek(); sign == '+' || sign == '-' { 591 // Complex: 1+2i. No spaces, must end in 'i'. 592 if !l.scanNumber() || l.input[l.pos-1] != 'i' { 593 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) 594 } 595 return l.emit(itemComplex) 596 } 597 return l.emit(itemNumber) 598 } 599 600 func (l *lexer) scanNumber() bool { 601 // Optional leading sign. 602 l.accept("+-") 603 // Is it hex? 604 digits := "0123456789_" 605 if l.accept("0") { 606 // Note: Leading 0 does not mean octal in floats. 607 if l.accept("xX") { 608 digits = "0123456789abcdefABCDEF_" 609 } else if l.accept("oO") { 610 digits = "01234567_" 611 } else if l.accept("bB") { 612 digits = "01_" 613 } 614 } 615 l.acceptRun(digits) 616 if l.accept(".") { 617 l.acceptRun(digits) 618 } 619 if len(digits) == 10+1 && l.accept("eE") { 620 l.accept("+-") 621 l.acceptRun("0123456789_") 622 } 623 if len(digits) == 16+6+1 && l.accept("pP") { 624 l.accept("+-") 625 l.acceptRun("0123456789_") 626 } 627 // Is it imaginary? 628 l.accept("i") 629 // Next thing mustn't be alphanumeric. 630 if isAlphaNumeric(l.peek()) { 631 l.next() 632 return false 633 } 634 return true 635 } 636 637 // lexQuote scans a quoted string. 638 func lexQuote(l *lexer) stateFn { 639 Loop: 640 for { 641 switch l.next() { 642 case '\\': 643 if r := l.next(); r != eof && r != '\n' { 644 break 645 } 646 fallthrough 647 case eof, '\n': 648 return l.errorf("unterminated quoted string") 649 case '"': 650 break Loop 651 } 652 } 653 return l.emit(itemString) 654 } 655 656 // lexRawQuote scans a raw quoted string. 657 func lexRawQuote(l *lexer) stateFn { 658 Loop: 659 for { 660 switch l.next() { 661 case eof: 662 return l.errorf("unterminated raw quoted string") 663 case '`': 664 break Loop 665 } 666 } 667 return l.emit(itemRawString) 668 } 669 670 // isSpace reports whether r is a space character. 671 func isSpace(r rune) bool { 672 return r == ' ' || r == '\t' || r == '\r' || r == '\n' 673 } 674 675 // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. 676 func isAlphaNumeric(r rune) bool { 677 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) 678 } 679 680 func hasLeftTrimMarker(s string) bool { 681 return len(s) >= 2 && s[0] == trimMarker && isSpace(rune(s[1])) 682 } 683 684 func hasRightTrimMarker(s string) bool { 685 return len(s) >= 2 && isSpace(rune(s[0])) && s[1] == trimMarker 686 }