gitee.com/lonely0422/gometalinter.git@v3.0.1-0.20190307123442-32416ab75314+incompatible/_linters/src/github.com/BurntSushi/toml/lex.go (about) 1 package toml 2 3 import ( 4 "fmt" 5 "strings" 6 "unicode" 7 "unicode/utf8" 8 ) 9 10 type itemType int 11 12 const ( 13 itemError itemType = iota 14 itemNIL // used in the parser to indicate no type 15 itemEOF 16 itemText 17 itemString 18 itemRawString 19 itemMultilineString 20 itemRawMultilineString 21 itemBool 22 itemInteger 23 itemFloat 24 itemDatetime 25 itemArray // the start of an array 26 itemArrayEnd 27 itemTableStart 28 itemTableEnd 29 itemArrayTableStart 30 itemArrayTableEnd 31 itemKeyStart 32 itemCommentStart 33 itemInlineTableStart 34 itemInlineTableEnd 35 ) 36 37 const ( 38 eof = 0 39 comma = ',' 40 tableStart = '[' 41 tableEnd = ']' 42 arrayTableStart = '[' 43 arrayTableEnd = ']' 44 tableSep = '.' 45 keySep = '=' 46 arrayStart = '[' 47 arrayEnd = ']' 48 commentStart = '#' 49 stringStart = '"' 50 stringEnd = '"' 51 rawStringStart = '\'' 52 rawStringEnd = '\'' 53 inlineTableStart = '{' 54 inlineTableEnd = '}' 55 ) 56 57 type stateFn func(lx *lexer) stateFn 58 59 type lexer struct { 60 input string 61 start int 62 pos int 63 line int 64 state stateFn 65 items chan item 66 67 // Allow for backing up up to three runes. 68 // This is necessary because TOML contains 3-rune tokens (""" and '''). 69 prevWidths [3]int 70 nprev int // how many of prevWidths are in use 71 // If we emit an eof, we can still back up, but it is not OK to call 72 // next again. 73 atEOF bool 74 75 // A stack of state functions used to maintain context. 76 // The idea is to reuse parts of the state machine in various places. 77 // For example, values can appear at the top level or within arbitrarily 78 // nested arrays. The last state on the stack is used after a value has 79 // been lexed. Similarly for comments. 80 stack []stateFn 81 } 82 83 type item struct { 84 typ itemType 85 val string 86 line int 87 } 88 89 func (lx *lexer) nextItem() item { 90 for { 91 select { 92 case item := <-lx.items: 93 return item 94 default: 95 lx.state = lx.state(lx) 96 } 97 } 98 } 99 100 func lex(input string) *lexer { 101 lx := &lexer{ 102 input: input, 103 state: lexTop, 104 line: 1, 105 items: make(chan item, 10), 106 stack: make([]stateFn, 0, 10), 107 } 108 return lx 109 } 110 111 func (lx *lexer) push(state stateFn) { 112 lx.stack = append(lx.stack, state) 113 } 114 115 func (lx *lexer) pop() stateFn { 116 if len(lx.stack) == 0 { 117 return lx.errorf("BUG in lexer: no states to pop") 118 } 119 last := lx.stack[len(lx.stack)-1] 120 lx.stack = lx.stack[0 : len(lx.stack)-1] 121 return last 122 } 123 124 func (lx *lexer) current() string { 125 return lx.input[lx.start:lx.pos] 126 } 127 128 func (lx *lexer) emit(typ itemType) { 129 lx.items <- item{typ, lx.current(), lx.line} 130 lx.start = lx.pos 131 } 132 133 func (lx *lexer) emitTrim(typ itemType) { 134 lx.items <- item{typ, strings.TrimSpace(lx.current()), lx.line} 135 lx.start = lx.pos 136 } 137 138 func (lx *lexer) next() (r rune) { 139 if lx.atEOF { 140 panic("next called after EOF") 141 } 142 if lx.pos >= len(lx.input) { 143 lx.atEOF = true 144 return eof 145 } 146 147 if lx.input[lx.pos] == '\n' { 148 lx.line++ 149 } 150 lx.prevWidths[2] = lx.prevWidths[1] 151 lx.prevWidths[1] = lx.prevWidths[0] 152 if lx.nprev < 3 { 153 lx.nprev++ 154 } 155 r, w := utf8.DecodeRuneInString(lx.input[lx.pos:]) 156 lx.prevWidths[0] = w 157 lx.pos += w 158 return r 159 } 160 161 // ignore skips over the pending input before this point. 162 func (lx *lexer) ignore() { 163 lx.start = lx.pos 164 } 165 166 // backup steps back one rune. Can be called only twice between calls to next. 167 func (lx *lexer) backup() { 168 if lx.atEOF { 169 lx.atEOF = false 170 return 171 } 172 if lx.nprev < 1 { 173 panic("backed up too far") 174 } 175 w := lx.prevWidths[0] 176 lx.prevWidths[0] = lx.prevWidths[1] 177 lx.prevWidths[1] = lx.prevWidths[2] 178 lx.nprev-- 179 lx.pos -= w 180 if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' { 181 lx.line-- 182 } 183 } 184 185 // accept consumes the next rune if it's equal to `valid`. 186 func (lx *lexer) accept(valid rune) bool { 187 if lx.next() == valid { 188 return true 189 } 190 lx.backup() 191 return false 192 } 193 194 // peek returns but does not consume the next rune in the input. 195 func (lx *lexer) peek() rune { 196 r := lx.next() 197 lx.backup() 198 return r 199 } 200 201 // skip ignores all input that matches the given predicate. 202 func (lx *lexer) skip(pred func(rune) bool) { 203 for { 204 r := lx.next() 205 if pred(r) { 206 continue 207 } 208 lx.backup() 209 lx.ignore() 210 return 211 } 212 } 213 214 // errorf stops all lexing by emitting an error and returning `nil`. 215 // Note that any value that is a character is escaped if it's a special 216 // character (newlines, tabs, etc.). 217 func (lx *lexer) errorf(format string, values ...interface{}) stateFn { 218 lx.items <- item{ 219 itemError, 220 fmt.Sprintf(format, values...), 221 lx.line, 222 } 223 return nil 224 } 225 226 // lexTop consumes elements at the top level of TOML data. 227 func lexTop(lx *lexer) stateFn { 228 r := lx.next() 229 if isWhitespace(r) || isNL(r) { 230 return lexSkip(lx, lexTop) 231 } 232 switch r { 233 case commentStart: 234 lx.push(lexTop) 235 return lexCommentStart 236 case tableStart: 237 return lexTableStart 238 case eof: 239 if lx.pos > lx.start { 240 return lx.errorf("unexpected EOF") 241 } 242 lx.emit(itemEOF) 243 return nil 244 } 245 246 // At this point, the only valid item can be a key, so we back up 247 // and let the key lexer do the rest. 248 lx.backup() 249 lx.push(lexTopEnd) 250 return lexKeyStart 251 } 252 253 // lexTopEnd is entered whenever a top-level item has been consumed. (A value 254 // or a table.) It must see only whitespace, and will turn back to lexTop 255 // upon a newline. If it sees EOF, it will quit the lexer successfully. 256 func lexTopEnd(lx *lexer) stateFn { 257 r := lx.next() 258 switch { 259 case r == commentStart: 260 // a comment will read to a newline for us. 261 lx.push(lexTop) 262 return lexCommentStart 263 case isWhitespace(r): 264 return lexTopEnd 265 case isNL(r): 266 lx.ignore() 267 return lexTop 268 case r == eof: 269 lx.emit(itemEOF) 270 return nil 271 } 272 return lx.errorf("expected a top-level item to end with a newline, "+ 273 "comment, or EOF, but got %q instead", r) 274 } 275 276 // lexTable lexes the beginning of a table. Namely, it makes sure that 277 // it starts with a character other than '.' and ']'. 278 // It assumes that '[' has already been consumed. 279 // It also handles the case that this is an item in an array of tables. 280 // e.g., '[[name]]'. 281 func lexTableStart(lx *lexer) stateFn { 282 if lx.peek() == arrayTableStart { 283 lx.next() 284 lx.emit(itemArrayTableStart) 285 lx.push(lexArrayTableEnd) 286 } else { 287 lx.emit(itemTableStart) 288 lx.push(lexTableEnd) 289 } 290 return lexTableNameStart 291 } 292 293 func lexTableEnd(lx *lexer) stateFn { 294 lx.emit(itemTableEnd) 295 return lexTopEnd 296 } 297 298 func lexArrayTableEnd(lx *lexer) stateFn { 299 if r := lx.next(); r != arrayTableEnd { 300 return lx.errorf("expected end of table array name delimiter %q, "+ 301 "but got %q instead", arrayTableEnd, r) 302 } 303 lx.emit(itemArrayTableEnd) 304 return lexTopEnd 305 } 306 307 func lexTableNameStart(lx *lexer) stateFn { 308 lx.skip(isWhitespace) 309 switch r := lx.peek(); { 310 case r == tableEnd || r == eof: 311 return lx.errorf("unexpected end of table name " + 312 "(table names cannot be empty)") 313 case r == tableSep: 314 return lx.errorf("unexpected table separator " + 315 "(table names cannot be empty)") 316 case r == stringStart || r == rawStringStart: 317 lx.ignore() 318 lx.push(lexTableNameEnd) 319 return lexValue // reuse string lexing 320 default: 321 return lexBareTableName 322 } 323 } 324 325 // lexBareTableName lexes the name of a table. It assumes that at least one 326 // valid character for the table has already been read. 327 func lexBareTableName(lx *lexer) stateFn { 328 r := lx.next() 329 if isBareKeyChar(r) { 330 return lexBareTableName 331 } 332 lx.backup() 333 lx.emit(itemText) 334 return lexTableNameEnd 335 } 336 337 // lexTableNameEnd reads the end of a piece of a table name, optionally 338 // consuming whitespace. 339 func lexTableNameEnd(lx *lexer) stateFn { 340 lx.skip(isWhitespace) 341 switch r := lx.next(); { 342 case isWhitespace(r): 343 return lexTableNameEnd 344 case r == tableSep: 345 lx.ignore() 346 return lexTableNameStart 347 case r == tableEnd: 348 return lx.pop() 349 default: 350 return lx.errorf("expected '.' or ']' to end table name, "+ 351 "but got %q instead", r) 352 } 353 } 354 355 // lexKeyStart consumes a key name up until the first non-whitespace character. 356 // lexKeyStart will ignore whitespace. 357 func lexKeyStart(lx *lexer) stateFn { 358 r := lx.peek() 359 switch { 360 case r == keySep: 361 return lx.errorf("unexpected key separator %q", keySep) 362 case isWhitespace(r) || isNL(r): 363 lx.next() 364 return lexSkip(lx, lexKeyStart) 365 case r == stringStart || r == rawStringStart: 366 lx.ignore() 367 lx.emit(itemKeyStart) 368 lx.push(lexKeyEnd) 369 return lexValue // reuse string lexing 370 default: 371 lx.ignore() 372 lx.emit(itemKeyStart) 373 return lexBareKey 374 } 375 } 376 377 // lexBareKey consumes the text of a bare key. Assumes that the first character 378 // (which is not whitespace) has not yet been consumed. 379 func lexBareKey(lx *lexer) stateFn { 380 switch r := lx.next(); { 381 case isBareKeyChar(r): 382 return lexBareKey 383 case isWhitespace(r): 384 lx.backup() 385 lx.emit(itemText) 386 return lexKeyEnd 387 case r == keySep: 388 lx.backup() 389 lx.emit(itemText) 390 return lexKeyEnd 391 default: 392 return lx.errorf("bare keys cannot contain %q", r) 393 } 394 } 395 396 // lexKeyEnd consumes the end of a key and trims whitespace (up to the key 397 // separator). 398 func lexKeyEnd(lx *lexer) stateFn { 399 switch r := lx.next(); { 400 case r == keySep: 401 return lexSkip(lx, lexValue) 402 case isWhitespace(r): 403 return lexSkip(lx, lexKeyEnd) 404 default: 405 return lx.errorf("expected key separator %q, but got %q instead", 406 keySep, r) 407 } 408 } 409 410 // lexValue starts the consumption of a value anywhere a value is expected. 411 // lexValue will ignore whitespace. 412 // After a value is lexed, the last state on the next is popped and returned. 413 func lexValue(lx *lexer) stateFn { 414 // We allow whitespace to precede a value, but NOT newlines. 415 // In array syntax, the array states are responsible for ignoring newlines. 416 r := lx.next() 417 switch { 418 case isWhitespace(r): 419 return lexSkip(lx, lexValue) 420 case isDigit(r): 421 lx.backup() // avoid an extra state and use the same as above 422 return lexNumberOrDateStart 423 } 424 switch r { 425 case arrayStart: 426 lx.ignore() 427 lx.emit(itemArray) 428 return lexArrayValue 429 case inlineTableStart: 430 lx.ignore() 431 lx.emit(itemInlineTableStart) 432 return lexInlineTableValue 433 case stringStart: 434 if lx.accept(stringStart) { 435 if lx.accept(stringStart) { 436 lx.ignore() // Ignore """ 437 return lexMultilineString 438 } 439 lx.backup() 440 } 441 lx.ignore() // ignore the '"' 442 return lexString 443 case rawStringStart: 444 if lx.accept(rawStringStart) { 445 if lx.accept(rawStringStart) { 446 lx.ignore() // Ignore """ 447 return lexMultilineRawString 448 } 449 lx.backup() 450 } 451 lx.ignore() // ignore the "'" 452 return lexRawString 453 case '+', '-': 454 return lexNumberStart 455 case '.': // special error case, be kind to users 456 return lx.errorf("floats must start with a digit, not '.'") 457 } 458 if unicode.IsLetter(r) { 459 // Be permissive here; lexBool will give a nice error if the 460 // user wrote something like 461 // x = foo 462 // (i.e. not 'true' or 'false' but is something else word-like.) 463 lx.backup() 464 return lexBool 465 } 466 return lx.errorf("expected value but found %q instead", r) 467 } 468 469 // lexArrayValue consumes one value in an array. It assumes that '[' or ',' 470 // have already been consumed. All whitespace and newlines are ignored. 471 func lexArrayValue(lx *lexer) stateFn { 472 r := lx.next() 473 switch { 474 case isWhitespace(r) || isNL(r): 475 return lexSkip(lx, lexArrayValue) 476 case r == commentStart: 477 lx.push(lexArrayValue) 478 return lexCommentStart 479 case r == comma: 480 return lx.errorf("unexpected comma") 481 case r == arrayEnd: 482 // NOTE(caleb): The spec isn't clear about whether you can have 483 // a trailing comma or not, so we'll allow it. 484 return lexArrayEnd 485 } 486 487 lx.backup() 488 lx.push(lexArrayValueEnd) 489 return lexValue 490 } 491 492 // lexArrayValueEnd consumes everything between the end of an array value and 493 // the next value (or the end of the array): it ignores whitespace and newlines 494 // and expects either a ',' or a ']'. 495 func lexArrayValueEnd(lx *lexer) stateFn { 496 r := lx.next() 497 switch { 498 case isWhitespace(r) || isNL(r): 499 return lexSkip(lx, lexArrayValueEnd) 500 case r == commentStart: 501 lx.push(lexArrayValueEnd) 502 return lexCommentStart 503 case r == comma: 504 lx.ignore() 505 return lexArrayValue // move on to the next value 506 case r == arrayEnd: 507 return lexArrayEnd 508 } 509 return lx.errorf( 510 "expected a comma or array terminator %q, but got %q instead", 511 arrayEnd, r, 512 ) 513 } 514 515 // lexArrayEnd finishes the lexing of an array. 516 // It assumes that a ']' has just been consumed. 517 func lexArrayEnd(lx *lexer) stateFn { 518 lx.ignore() 519 lx.emit(itemArrayEnd) 520 return lx.pop() 521 } 522 523 // lexInlineTableValue consumes one key/value pair in an inline table. 524 // It assumes that '{' or ',' have already been consumed. Whitespace is ignored. 525 func lexInlineTableValue(lx *lexer) stateFn { 526 r := lx.next() 527 switch { 528 case isWhitespace(r): 529 return lexSkip(lx, lexInlineTableValue) 530 case isNL(r): 531 return lx.errorf("newlines not allowed within inline tables") 532 case r == commentStart: 533 lx.push(lexInlineTableValue) 534 return lexCommentStart 535 case r == comma: 536 return lx.errorf("unexpected comma") 537 case r == inlineTableEnd: 538 return lexInlineTableEnd 539 } 540 lx.backup() 541 lx.push(lexInlineTableValueEnd) 542 return lexKeyStart 543 } 544 545 // lexInlineTableValueEnd consumes everything between the end of an inline table 546 // key/value pair and the next pair (or the end of the table): 547 // it ignores whitespace and expects either a ',' or a '}'. 548 func lexInlineTableValueEnd(lx *lexer) stateFn { 549 r := lx.next() 550 switch { 551 case isWhitespace(r): 552 return lexSkip(lx, lexInlineTableValueEnd) 553 case isNL(r): 554 return lx.errorf("newlines not allowed within inline tables") 555 case r == commentStart: 556 lx.push(lexInlineTableValueEnd) 557 return lexCommentStart 558 case r == comma: 559 lx.ignore() 560 return lexInlineTableValue 561 case r == inlineTableEnd: 562 return lexInlineTableEnd 563 } 564 return lx.errorf("expected a comma or an inline table terminator %q, "+ 565 "but got %q instead", inlineTableEnd, r) 566 } 567 568 // lexInlineTableEnd finishes the lexing of an inline table. 569 // It assumes that a '}' has just been consumed. 570 func lexInlineTableEnd(lx *lexer) stateFn { 571 lx.ignore() 572 lx.emit(itemInlineTableEnd) 573 return lx.pop() 574 } 575 576 // lexString consumes the inner contents of a string. It assumes that the 577 // beginning '"' has already been consumed and ignored. 578 func lexString(lx *lexer) stateFn { 579 r := lx.next() 580 switch { 581 case r == eof: 582 return lx.errorf("unexpected EOF") 583 case isNL(r): 584 return lx.errorf("strings cannot contain newlines") 585 case r == '\\': 586 lx.push(lexString) 587 return lexStringEscape 588 case r == stringEnd: 589 lx.backup() 590 lx.emit(itemString) 591 lx.next() 592 lx.ignore() 593 return lx.pop() 594 } 595 return lexString 596 } 597 598 // lexMultilineString consumes the inner contents of a string. It assumes that 599 // the beginning '"""' has already been consumed and ignored. 600 func lexMultilineString(lx *lexer) stateFn { 601 switch lx.next() { 602 case eof: 603 return lx.errorf("unexpected EOF") 604 case '\\': 605 return lexMultilineStringEscape 606 case stringEnd: 607 if lx.accept(stringEnd) { 608 if lx.accept(stringEnd) { 609 lx.backup() 610 lx.backup() 611 lx.backup() 612 lx.emit(itemMultilineString) 613 lx.next() 614 lx.next() 615 lx.next() 616 lx.ignore() 617 return lx.pop() 618 } 619 lx.backup() 620 } 621 } 622 return lexMultilineString 623 } 624 625 // lexRawString consumes a raw string. Nothing can be escaped in such a string. 626 // It assumes that the beginning "'" has already been consumed and ignored. 627 func lexRawString(lx *lexer) stateFn { 628 r := lx.next() 629 switch { 630 case r == eof: 631 return lx.errorf("unexpected EOF") 632 case isNL(r): 633 return lx.errorf("strings cannot contain newlines") 634 case r == rawStringEnd: 635 lx.backup() 636 lx.emit(itemRawString) 637 lx.next() 638 lx.ignore() 639 return lx.pop() 640 } 641 return lexRawString 642 } 643 644 // lexMultilineRawString consumes a raw string. Nothing can be escaped in such 645 // a string. It assumes that the beginning "'''" has already been consumed and 646 // ignored. 647 func lexMultilineRawString(lx *lexer) stateFn { 648 switch lx.next() { 649 case eof: 650 return lx.errorf("unexpected EOF") 651 case rawStringEnd: 652 if lx.accept(rawStringEnd) { 653 if lx.accept(rawStringEnd) { 654 lx.backup() 655 lx.backup() 656 lx.backup() 657 lx.emit(itemRawMultilineString) 658 lx.next() 659 lx.next() 660 lx.next() 661 lx.ignore() 662 return lx.pop() 663 } 664 lx.backup() 665 } 666 } 667 return lexMultilineRawString 668 } 669 670 // lexMultilineStringEscape consumes an escaped character. It assumes that the 671 // preceding '\\' has already been consumed. 672 func lexMultilineStringEscape(lx *lexer) stateFn { 673 // Handle the special case first: 674 if isNL(lx.next()) { 675 return lexMultilineString 676 } 677 lx.backup() 678 lx.push(lexMultilineString) 679 return lexStringEscape(lx) 680 } 681 682 func lexStringEscape(lx *lexer) stateFn { 683 r := lx.next() 684 switch r { 685 case 'b': 686 fallthrough 687 case 't': 688 fallthrough 689 case 'n': 690 fallthrough 691 case 'f': 692 fallthrough 693 case 'r': 694 fallthrough 695 case '"': 696 fallthrough 697 case '\\': 698 return lx.pop() 699 case 'u': 700 return lexShortUnicodeEscape 701 case 'U': 702 return lexLongUnicodeEscape 703 } 704 return lx.errorf("invalid escape character %q; only the following "+ 705 "escape characters are allowed: "+ 706 `\b, \t, \n, \f, \r, \", \\, \uXXXX, and \UXXXXXXXX`, r) 707 } 708 709 func lexShortUnicodeEscape(lx *lexer) stateFn { 710 var r rune 711 for i := 0; i < 4; i++ { 712 r = lx.next() 713 if !isHexadecimal(r) { 714 return lx.errorf(`expected four hexadecimal digits after '\u', `+ 715 "but got %q instead", lx.current()) 716 } 717 } 718 return lx.pop() 719 } 720 721 func lexLongUnicodeEscape(lx *lexer) stateFn { 722 var r rune 723 for i := 0; i < 8; i++ { 724 r = lx.next() 725 if !isHexadecimal(r) { 726 return lx.errorf(`expected eight hexadecimal digits after '\U', `+ 727 "but got %q instead", lx.current()) 728 } 729 } 730 return lx.pop() 731 } 732 733 // lexNumberOrDateStart consumes either an integer, a float, or datetime. 734 func lexNumberOrDateStart(lx *lexer) stateFn { 735 r := lx.next() 736 if isDigit(r) { 737 return lexNumberOrDate 738 } 739 switch r { 740 case '_': 741 return lexNumber 742 case 'e', 'E': 743 return lexFloat 744 case '.': 745 return lx.errorf("floats must start with a digit, not '.'") 746 } 747 return lx.errorf("expected a digit but got %q", r) 748 } 749 750 // lexNumberOrDate consumes either an integer, float or datetime. 751 func lexNumberOrDate(lx *lexer) stateFn { 752 r := lx.next() 753 if isDigit(r) { 754 return lexNumberOrDate 755 } 756 switch r { 757 case '-': 758 return lexDatetime 759 case '_': 760 return lexNumber 761 case '.', 'e', 'E': 762 return lexFloat 763 } 764 765 lx.backup() 766 lx.emit(itemInteger) 767 return lx.pop() 768 } 769 770 // lexDatetime consumes a Datetime, to a first approximation. 771 // The parser validates that it matches one of the accepted formats. 772 func lexDatetime(lx *lexer) stateFn { 773 r := lx.next() 774 if isDigit(r) { 775 return lexDatetime 776 } 777 switch r { 778 case '-', 'T', ':', '.', 'Z', '+': 779 return lexDatetime 780 } 781 782 lx.backup() 783 lx.emit(itemDatetime) 784 return lx.pop() 785 } 786 787 // lexNumberStart consumes either an integer or a float. It assumes that a sign 788 // has already been read, but that *no* digits have been consumed. 789 // lexNumberStart will move to the appropriate integer or float states. 790 func lexNumberStart(lx *lexer) stateFn { 791 // We MUST see a digit. Even floats have to start with a digit. 792 r := lx.next() 793 if !isDigit(r) { 794 if r == '.' { 795 return lx.errorf("floats must start with a digit, not '.'") 796 } 797 return lx.errorf("expected a digit but got %q", r) 798 } 799 return lexNumber 800 } 801 802 // lexNumber consumes an integer or a float after seeing the first digit. 803 func lexNumber(lx *lexer) stateFn { 804 r := lx.next() 805 if isDigit(r) { 806 return lexNumber 807 } 808 switch r { 809 case '_': 810 return lexNumber 811 case '.', 'e', 'E': 812 return lexFloat 813 } 814 815 lx.backup() 816 lx.emit(itemInteger) 817 return lx.pop() 818 } 819 820 // lexFloat consumes the elements of a float. It allows any sequence of 821 // float-like characters, so floats emitted by the lexer are only a first 822 // approximation and must be validated by the parser. 823 func lexFloat(lx *lexer) stateFn { 824 r := lx.next() 825 if isDigit(r) { 826 return lexFloat 827 } 828 switch r { 829 case '_', '.', '-', '+', 'e', 'E': 830 return lexFloat 831 } 832 833 lx.backup() 834 lx.emit(itemFloat) 835 return lx.pop() 836 } 837 838 // lexBool consumes a bool string: 'true' or 'false. 839 func lexBool(lx *lexer) stateFn { 840 var rs []rune 841 for { 842 r := lx.next() 843 if !unicode.IsLetter(r) { 844 lx.backup() 845 break 846 } 847 rs = append(rs, r) 848 } 849 s := string(rs) 850 switch s { 851 case "true", "false": 852 lx.emit(itemBool) 853 return lx.pop() 854 } 855 return lx.errorf("expected value but found %q instead", s) 856 } 857 858 // lexCommentStart begins the lexing of a comment. It will emit 859 // itemCommentStart and consume no characters, passing control to lexComment. 860 func lexCommentStart(lx *lexer) stateFn { 861 lx.ignore() 862 lx.emit(itemCommentStart) 863 return lexComment 864 } 865 866 // lexComment lexes an entire comment. It assumes that '#' has been consumed. 867 // It will consume *up to* the first newline character, and pass control 868 // back to the last state on the stack. 869 func lexComment(lx *lexer) stateFn { 870 r := lx.peek() 871 if isNL(r) || r == eof { 872 lx.emit(itemText) 873 return lx.pop() 874 } 875 lx.next() 876 return lexComment 877 } 878 879 // lexSkip ignores all slurped input and moves on to the next state. 880 func lexSkip(lx *lexer, nextState stateFn) stateFn { 881 return func(lx *lexer) stateFn { 882 lx.ignore() 883 return nextState 884 } 885 } 886 887 // isWhitespace returns true if `r` is a whitespace character according 888 // to the spec. 889 func isWhitespace(r rune) bool { 890 return r == '\t' || r == ' ' 891 } 892 893 func isNL(r rune) bool { 894 return r == '\n' || r == '\r' 895 } 896 897 func isDigit(r rune) bool { 898 return r >= '0' && r <= '9' 899 } 900 901 func isHexadecimal(r rune) bool { 902 return (r >= '0' && r <= '9') || 903 (r >= 'a' && r <= 'f') || 904 (r >= 'A' && r <= 'F') 905 } 906 907 func isBareKeyChar(r rune) bool { 908 return (r >= 'A' && r <= 'Z') || 909 (r >= 'a' && r <= 'z') || 910 (r >= '0' && r <= '9') || 911 r == '_' || 912 r == '-' 913 } 914 915 func (itype itemType) String() string { 916 switch itype { 917 case itemError: 918 return "Error" 919 case itemNIL: 920 return "NIL" 921 case itemEOF: 922 return "EOF" 923 case itemText: 924 return "Text" 925 case itemString, itemRawString, itemMultilineString, itemRawMultilineString: 926 return "String" 927 case itemBool: 928 return "Bool" 929 case itemInteger: 930 return "Integer" 931 case itemFloat: 932 return "Float" 933 case itemDatetime: 934 return "DateTime" 935 case itemTableStart: 936 return "TableStart" 937 case itemTableEnd: 938 return "TableEnd" 939 case itemKeyStart: 940 return "KeyStart" 941 case itemArray: 942 return "Array" 943 case itemArrayEnd: 944 return "ArrayEnd" 945 case itemCommentStart: 946 return "CommentStart" 947 } 948 panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype))) 949 } 950 951 func (item item) String() string { 952 return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val) 953 }