github.com/google/skylark@v0.0.0-20181101142754-a5f7082aabed/syntax/scan.go (about) 1 // Copyright 2017 The Bazel Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package syntax 6 7 // A lexical scanner for Skylark. 8 9 import ( 10 "fmt" 11 "io" 12 "io/ioutil" 13 "log" 14 "math/big" 15 "strconv" 16 "strings" 17 "unicode" 18 "unicode/utf8" 19 ) 20 21 // A Token represents a Skylark lexical token. 22 type Token int8 23 24 const ( 25 ILLEGAL Token = iota 26 EOF 27 28 NEWLINE 29 INDENT 30 OUTDENT 31 32 // Tokens with values 33 IDENT // x 34 INT // 123 35 FLOAT // 1.23e45 36 STRING // "foo" or 'foo' or '''foo''' or r'foo' or r"foo" 37 38 // Punctuation 39 PLUS // + 40 MINUS // - 41 STAR // * 42 SLASH // / 43 SLASHSLASH // // 44 PERCENT // % 45 AMP // & 46 PIPE // | 47 CIRCUMFLEX // ^ 48 LTLT // << 49 GTGT // >> 50 TILDE // ~ 51 DOT // . 52 COMMA // , 53 EQ // = 54 SEMI // ; 55 COLON // : 56 LPAREN // ( 57 RPAREN // ) 58 LBRACK // [ 59 RBRACK // ] 60 LBRACE // { 61 RBRACE // } 62 LT // < 63 GT // > 64 GE // >= 65 LE // <= 66 EQL // == 67 NEQ // != 68 PLUS_EQ // += (keep order consistent with PLUS..GTGT) 69 MINUS_EQ // -= 70 STAR_EQ // *= 71 SLASH_EQ // /= 72 SLASHSLASH_EQ // //= 73 PERCENT_EQ // %= 74 AMP_EQ // &= 75 PIPE_EQ // |= 76 CIRCUMFLEX_EQ // ^= 77 LTLT_EQ // <<= 78 GTGT_EQ // >>= 79 STARSTAR // ** 80 81 // Keywords 82 AND 83 BREAK 84 CONTINUE 85 DEF 86 ELIF 87 ELSE 88 FOR 89 IF 90 IN 91 LAMBDA 92 LOAD 93 NOT 94 NOT_IN // synthesized by parser from NOT IN 95 OR 96 PASS 97 RETURN 98 99 maxToken 100 ) 101 102 func (tok Token) String() string { return tokenNames[tok] } 103 104 // GoString is like String but quotes punctuation tokens. 105 // Use Sprintf("%#v", tok) when constructing error messages. 106 func (tok Token) GoString() string { 107 if tok >= PLUS && tok <= STARSTAR { 108 return "'" + tokenNames[tok] + "'" 109 } 110 return tokenNames[tok] 111 } 112 113 var tokenNames = [...]string{ 114 ILLEGAL: "illegal token", 115 EOF: "end of file", 116 NEWLINE: "newline", 117 INDENT: "indent", 118 OUTDENT: "outdent", 119 IDENT: "identifier", 120 INT: "int literal", 121 FLOAT: "float literal", 122 STRING: "string literal", 123 PLUS: "+", 124 MINUS: "-", 125 STAR: "*", 126 SLASH: "/", 127 SLASHSLASH: "//", 128 PERCENT: "%", 129 AMP: "&", 130 PIPE: "|", 131 CIRCUMFLEX: "^", 132 LTLT: "<<", 133 GTGT: ">>", 134 TILDE: "~", 135 DOT: ".", 136 COMMA: ",", 137 EQ: "=", 138 SEMI: ";", 139 COLON: ":", 140 LPAREN: "(", 141 RPAREN: ")", 142 LBRACK: "[", 143 RBRACK: "]", 144 LBRACE: "{", 145 RBRACE: "]", 146 LT: "<", 147 GT: ">", 148 GE: ">=", 149 LE: "<=", 150 EQL: "==", 151 NEQ: "!=", 152 PLUS_EQ: "+=", 153 MINUS_EQ: "-=", 154 STAR_EQ: "*=", 155 SLASH_EQ: "/=", 156 SLASHSLASH_EQ: "//=", 157 PERCENT_EQ: "%=", 158 AMP_EQ: "&=", 159 PIPE_EQ: "|=", 160 CIRCUMFLEX_EQ: "^=", 161 LTLT_EQ: "<<=", 162 GTGT_EQ: ">>=", 163 STARSTAR: "**", 164 AND: "and", 165 BREAK: "break", 166 CONTINUE: "continue", 167 DEF: "def", 168 ELIF: "elif", 169 ELSE: "else", 170 FOR: "for", 171 IF: "if", 172 IN: "in", 173 LAMBDA: "lambda", 174 LOAD: "load", 175 NOT: "not", 176 NOT_IN: "not in", 177 OR: "or", 178 PASS: "pass", 179 RETURN: "return", 180 } 181 182 // A Position describes the location of a rune of input. 183 type Position struct { 184 file *string // filename (indirect for compactness) 185 Line int32 // 1-based line number 186 Col int32 // 1-based column number (strictly: rune) 187 } 188 189 // IsValid reports whether the position is valid. 190 func (p Position) IsValid() bool { 191 return p.Line >= 1 192 } 193 194 // Filename returns the name of the file containing this position. 195 func (p Position) Filename() string { 196 if p.file != nil { 197 return *p.file 198 } 199 return "<unknown>" 200 } 201 202 // MakePosition returns position with the specified components. 203 func MakePosition(file *string, line, col int32) Position { return Position{file, line, col} } 204 205 // add returns the position at the end of s, assuming it starts at p. 206 func (p Position) add(s string) Position { 207 if n := strings.Count(s, "\n"); n > 0 { 208 p.Line += int32(n) 209 s = s[strings.LastIndex(s, "\n")+1:] 210 p.Col = 1 211 } 212 p.Col += int32(utf8.RuneCountInString(s)) 213 return p 214 } 215 216 func (p Position) String() string { 217 if p.Col > 0 { 218 return fmt.Sprintf("%s:%d:%d", p.Filename(), p.Line, p.Col) 219 } 220 return fmt.Sprintf("%s:%d", p.Filename(), p.Line) 221 } 222 223 func (p Position) isBefore(q Position) bool { 224 if p.Line != q.Line { 225 return p.Line < q.Line 226 } 227 return p.Col < q.Col 228 } 229 230 // An scanner represents a single input file being parsed. 231 type scanner struct { 232 complete []byte // entire input 233 rest []byte // rest of input 234 token []byte // token being scanned 235 pos Position // current input position 236 depth int // nesting of [ ] { } ( ) 237 indentstk []int // stack of indentation levels 238 dents int // number of saved INDENT (>0) or OUTDENT (<0) tokens to return 239 lineStart bool // after NEWLINE; convert spaces to indentation tokens 240 keepComments bool // accumulate comments in slice 241 lineComments []Comment // list of full line comments (if keepComments) 242 suffixComments []Comment // list of suffix comments (if keepComments) 243 } 244 245 func newScanner(filename string, src interface{}, keepComments bool) (*scanner, error) { 246 data, err := readSource(filename, src) 247 if err != nil { 248 return nil, err 249 } 250 return &scanner{ 251 complete: data, 252 rest: data, 253 pos: Position{file: &filename, Line: 1, Col: 1}, 254 indentstk: make([]int, 1, 10), // []int{0} + spare capacity 255 lineStart: true, 256 keepComments: keepComments, 257 }, nil 258 } 259 260 func readSource(filename string, src interface{}) (data []byte, err error) { 261 switch src := src.(type) { 262 case string: 263 data = []byte(src) 264 case []byte: 265 data = src 266 case io.Reader: 267 data, err = ioutil.ReadAll(src) 268 case nil: 269 data, err = ioutil.ReadFile(filename) 270 default: 271 return nil, fmt.Errorf("invalid source: %T", src) 272 } 273 if err != nil { 274 return nil, fmt.Errorf("reading %s: %s", filename, err) 275 } 276 return data, nil 277 } 278 279 // An Error describes the nature and position of a scanner or parser error. 280 type Error struct { 281 Pos Position 282 Msg string 283 } 284 285 func (e Error) Error() string { return e.Pos.String() + ": " + e.Msg } 286 287 // errorf is called to report an error. 288 // errorf does not return: it panics. 289 func (sc *scanner) error(pos Position, s string) { 290 panic(Error{pos, s}) 291 } 292 293 func (sc *scanner) errorf(pos Position, format string, args ...interface{}) { 294 sc.error(pos, fmt.Sprintf(format, args...)) 295 } 296 297 func (sc *scanner) recover(err *error) { 298 // The scanner and parser panic both for routine errors like 299 // syntax errors and for programmer bugs like array index 300 // errors. Turn both into error returns. Catching bug panics 301 // is especially important when processing many files. 302 switch e := recover().(type) { 303 case nil: 304 // no panic 305 case Error: 306 *err = e 307 default: 308 *err = Error{sc.pos, fmt.Sprintf("internal error: %v", e)} 309 if debug { 310 log.Fatal(*err) 311 } 312 } 313 } 314 315 // eof reports whether the input has reached end of file. 316 func (sc *scanner) eof() bool { 317 return len(sc.rest) == 0 318 } 319 320 // peekRune returns the next rune in the input without consuming it. 321 // Newlines in Unix, DOS, or Mac format are treated as one rune, '\n'. 322 func (sc *scanner) peekRune() rune { 323 if len(sc.rest) == 0 { 324 return 0 325 } 326 327 // fast path: ASCII 328 if b := sc.rest[0]; b < utf8.RuneSelf { 329 if b == '\r' { 330 return '\n' 331 } 332 return rune(b) 333 } 334 335 r, _ := utf8.DecodeRune(sc.rest) 336 return r 337 } 338 339 // readRune consumes and returns the next rune in the input. 340 // Newlines in Unix, DOS, or Mac format are treated as one rune, '\n'. 341 func (sc *scanner) readRune() rune { 342 if len(sc.rest) == 0 { 343 sc.error(sc.pos, "internal scanner error: readRune at EOF") 344 return 0 // unreachable but eliminates bounds-check below 345 } 346 347 // fast path: ASCII 348 if b := sc.rest[0]; b < utf8.RuneSelf { 349 r := rune(b) 350 sc.rest = sc.rest[1:] 351 if r == '\r' { 352 if len(sc.rest) > 0 && sc.rest[0] == '\n' { 353 sc.rest = sc.rest[1:] 354 } 355 r = '\n' 356 } 357 if r == '\n' { 358 sc.pos.Line++ 359 sc.pos.Col = 1 360 } else { 361 sc.pos.Col++ 362 } 363 return r 364 } 365 366 r, size := utf8.DecodeRune(sc.rest) 367 sc.rest = sc.rest[size:] 368 sc.pos.Col++ 369 return r 370 } 371 372 // tokenValue records the position and value associated with each token. 373 type tokenValue struct { 374 raw string // raw text of token 375 int int64 // decoded int 376 bigInt *big.Int // decoded integers > int64 377 float float64 // decoded float 378 string string // decoded string 379 pos Position // start position of token 380 triple bool // was string triple quoted? 381 } 382 383 // startToken marks the beginning of the next input token. 384 // It must be followed by a call to endToken once the token has 385 // been consumed using readRune. 386 func (sc *scanner) startToken(val *tokenValue) { 387 sc.token = sc.rest 388 val.raw = "" 389 val.pos = sc.pos 390 } 391 392 // endToken marks the end of an input token. 393 // It records the actual token string in val.raw if the caller 394 // has not done that already. 395 func (sc *scanner) endToken(val *tokenValue) { 396 if val.raw == "" { 397 val.raw = string(sc.token[:len(sc.token)-len(sc.rest)]) 398 } 399 } 400 401 // nextToken is called by the parser to obtain the next input token. 402 // It returns the token value and sets val to the data associated with 403 // the token. 404 // 405 // For all our input tokens, the associated data is val.pos (the 406 // position where the token begins), val.raw (the input string 407 // corresponding to the token). For string and int tokens, the string 408 // and int fields additionally contain the token's interpreted value. 409 func (sc *scanner) nextToken(val *tokenValue) Token { 410 411 // The following distribution of tokens guides case ordering: 412 // 413 // COMMA 27 % 414 // STRING 23 % 415 // IDENT 15 % 416 // EQL 11 % 417 // LBRACK 5.5 % 418 // RBRACK 5.5 % 419 // NEWLINE 3 % 420 // LPAREN 2.9 % 421 // RPAREN 2.9 % 422 // INT 2 % 423 // others < 1 % 424 // 425 // Although NEWLINE tokens are infrequent, and lineStart is 426 // usually (~97%) false on entry, skipped newlines account for 427 // about 50% of all iterations of the 'start' loop. 428 429 start: 430 var c rune 431 432 // Deal with leading spaces and indentation. 433 blank := false 434 savedLineStart := sc.lineStart 435 if sc.lineStart { 436 sc.lineStart = false 437 col := 0 438 for { 439 c = sc.peekRune() 440 if c == ' ' { 441 col++ 442 sc.readRune() 443 } else if c == '\t' { 444 const tab = 8 445 col += int(tab - (sc.pos.Col-1)%tab) 446 sc.readRune() 447 } else { 448 break 449 } 450 } 451 // The third clause is "trailing spaces without newline at EOF". 452 if c == '#' || c == '\n' || c == 0 && col > 0 { 453 blank = true 454 } 455 456 // Compute indentation level for non-blank lines not 457 // inside an expression. This is not the common case. 458 if !blank && sc.depth == 0 { 459 cur := sc.indentstk[len(sc.indentstk)-1] 460 if col > cur { 461 // indent 462 sc.dents++ 463 sc.indentstk = append(sc.indentstk, col) 464 } else if col < cur { 465 // dedent(s) 466 for len(sc.indentstk) > 0 && col < sc.indentstk[len(sc.indentstk)-1] { 467 sc.dents-- 468 sc.indentstk = sc.indentstk[:len(sc.indentstk)-1] // pop 469 } 470 if col != sc.indentstk[len(sc.indentstk)-1] { 471 sc.error(sc.pos, "unindent does not match any outer indentation level") 472 } 473 } 474 } 475 } 476 477 // Return saved indentation tokens. 478 if sc.dents != 0 { 479 sc.startToken(val) 480 sc.endToken(val) 481 if sc.dents < 0 { 482 sc.dents++ 483 return OUTDENT 484 } else { 485 sc.dents-- 486 return INDENT 487 } 488 } 489 490 // start of line proper 491 c = sc.peekRune() 492 493 // Skip spaces. 494 for c == ' ' || c == '\t' { 495 sc.readRune() 496 c = sc.peekRune() 497 } 498 499 // comment 500 if c == '#' { 501 if sc.keepComments { 502 sc.startToken(val) 503 } 504 // Consume up to newline (included). 505 for c != 0 && c != '\n' { 506 sc.readRune() 507 c = sc.peekRune() 508 } 509 if sc.keepComments { 510 sc.endToken(val) 511 if blank { 512 sc.lineComments = append(sc.lineComments, Comment{val.pos, val.raw}) 513 } else { 514 sc.suffixComments = append(sc.suffixComments, Comment{val.pos, val.raw}) 515 } 516 } 517 } 518 519 // newline 520 if c == '\n' { 521 sc.lineStart = true 522 if blank || sc.depth > 0 { 523 // Ignore blank lines, or newlines within expressions (common case). 524 sc.readRune() 525 goto start 526 } 527 // At top-level (not in an expression). 528 sc.startToken(val) 529 sc.readRune() 530 val.raw = "\n" 531 return NEWLINE 532 } 533 534 // end of file 535 if c == 0 { 536 // Emit OUTDENTs for unfinished indentation, 537 // preceded by a NEWLINE if we haven't just emitted one. 538 if len(sc.indentstk) > 1 { 539 if savedLineStart { 540 sc.dents = 1 - len(sc.indentstk) 541 sc.indentstk = sc.indentstk[1:] 542 goto start 543 } else { 544 sc.lineStart = true 545 sc.startToken(val) 546 val.raw = "\n" 547 return NEWLINE 548 } 549 } 550 551 sc.startToken(val) 552 sc.endToken(val) 553 return EOF 554 } 555 556 // line continuation 557 if c == '\\' { 558 sc.readRune() 559 if sc.peekRune() != '\n' { 560 sc.errorf(sc.pos, "stray backslash in program") 561 } 562 sc.readRune() 563 goto start 564 } 565 566 // start of the next token 567 sc.startToken(val) 568 569 // comma (common case) 570 if c == ',' { 571 sc.readRune() 572 sc.endToken(val) 573 return COMMA 574 } 575 576 // string literal 577 if c == '"' || c == '\'' { 578 return sc.scanString(val, c) 579 } 580 581 // identifier or keyword 582 if isIdentStart(c) { 583 // raw string literal 584 if c == 'r' && len(sc.rest) > 1 && (sc.rest[1] == '"' || sc.rest[1] == '\'') { 585 sc.readRune() 586 c = sc.peekRune() 587 return sc.scanString(val, c) 588 } 589 590 for isIdent(c) { 591 sc.readRune() 592 c = sc.peekRune() 593 } 594 sc.endToken(val) 595 if k, ok := keywordToken[val.raw]; ok { 596 return k 597 } 598 599 return IDENT 600 } 601 602 // brackets 603 switch c { 604 case '[', '(', '{': 605 sc.depth++ 606 sc.readRune() 607 sc.endToken(val) 608 switch c { 609 case '[': 610 return LBRACK 611 case '(': 612 return LPAREN 613 case '{': 614 return LBRACE 615 } 616 panic("unreachable") 617 618 case ']', ')', '}': 619 if sc.depth == 0 { 620 sc.error(sc.pos, "indentation error") 621 } else { 622 sc.depth-- 623 } 624 sc.readRune() 625 sc.endToken(val) 626 switch c { 627 case ']': 628 return RBRACK 629 case ')': 630 return RPAREN 631 case '}': 632 return RBRACE 633 } 634 panic("unreachable") 635 } 636 637 // int or float literal, or period 638 if isdigit(c) || c == '.' { 639 return sc.scanNumber(val, c) 640 } 641 642 // other punctuation 643 defer sc.endToken(val) 644 switch c { 645 case '=', '<', '>', '!', '+', '-', '%', '/', '&', '|', '^', '~': // possibly followed by '=' 646 start := sc.pos 647 sc.readRune() 648 if sc.peekRune() == '=' { 649 sc.readRune() 650 switch c { 651 case '<': 652 return LE 653 case '>': 654 return GE 655 case '=': 656 return EQL 657 case '!': 658 return NEQ 659 case '+': 660 return PLUS_EQ 661 case '-': 662 return MINUS_EQ 663 case '/': 664 return SLASH_EQ 665 case '%': 666 return PERCENT_EQ 667 case '&': 668 return AMP_EQ 669 case '|': 670 return PIPE_EQ 671 case '^': 672 return CIRCUMFLEX_EQ 673 } 674 } 675 switch c { 676 case '=': 677 return EQ 678 case '<': 679 if sc.peekRune() == '<' { 680 sc.readRune() 681 if sc.peekRune() == '=' { 682 sc.readRune() 683 return LTLT_EQ 684 } else { 685 return LTLT 686 } 687 } 688 return LT 689 case '>': 690 if sc.peekRune() == '>' { 691 sc.readRune() 692 if sc.peekRune() == '=' { 693 sc.readRune() 694 return GTGT_EQ 695 } else { 696 return GTGT 697 } 698 } 699 return GT 700 case '!': 701 sc.error(start, "unexpected input character '!'") 702 case '+': 703 return PLUS 704 case '-': 705 return MINUS 706 case '/': 707 if sc.peekRune() == '/' { 708 sc.readRune() 709 if sc.peekRune() == '=' { 710 sc.readRune() 711 return SLASHSLASH_EQ 712 } else { 713 return SLASHSLASH 714 } 715 } 716 return SLASH 717 case '%': 718 return PERCENT 719 case '&': 720 return AMP 721 case '|': 722 return PIPE 723 case '^': 724 return CIRCUMFLEX 725 case '~': 726 return TILDE 727 } 728 panic("unreachable") 729 730 case ':', ';': // single-char tokens (except comma) 731 sc.readRune() 732 switch c { 733 case ':': 734 return COLON 735 case ';': 736 return SEMI 737 } 738 panic("unreachable") 739 740 case '*': // possibly followed by '*' or '=' 741 sc.readRune() 742 switch sc.peekRune() { 743 case '*': 744 sc.readRune() 745 return STARSTAR 746 case '=': 747 sc.readRune() 748 return STAR_EQ 749 } 750 return STAR 751 } 752 753 sc.errorf(sc.pos, "unexpected input character %#q", c) 754 panic("unreachable") 755 } 756 757 func (sc *scanner) scanString(val *tokenValue, quote rune) Token { 758 start := sc.pos 759 triple := len(sc.rest) >= 3 && sc.rest[0] == byte(quote) && sc.rest[1] == byte(quote) && sc.rest[2] == byte(quote) 760 sc.readRune() 761 if triple { 762 sc.readRune() 763 sc.readRune() 764 } 765 766 quoteCount := 0 767 for { 768 if sc.eof() { 769 sc.error(val.pos, "unexpected EOF in string") 770 } 771 c := sc.readRune() 772 if c == '\n' && !triple { 773 sc.error(val.pos, "unexpected newline in string") 774 } 775 if c == quote { 776 quoteCount++ 777 if !triple || quoteCount == 3 { 778 break 779 } 780 } else { 781 quoteCount = 0 782 } 783 if c == '\\' { 784 if sc.eof() { 785 sc.error(val.pos, "unexpected EOF in string") 786 } 787 sc.readRune() 788 } 789 } 790 791 sc.endToken(val) 792 s, _, err := unquote(val.raw) 793 if err != nil { 794 sc.error(start, err.Error()) 795 } 796 val.string = s 797 return STRING 798 } 799 800 func (sc *scanner) scanNumber(val *tokenValue, c rune) Token { 801 // https://github.com/google/skylark/blob/master/doc/spec.md#lexical-elements 802 // 803 // Python features not supported: 804 // - integer literals of >64 bits of precision 805 // - 123L or 123l long suffix 806 // - traditional octal: 0755 807 // https://docs.python.org/2/reference/lexical_analysis.html#integer-and-long-integer-literals 808 809 start := sc.pos 810 fraction, exponent := false, false 811 812 if c == '.' { 813 // dot or start of fraction 814 sc.readRune() 815 c = sc.peekRune() 816 if !isdigit(c) { 817 sc.endToken(val) 818 return DOT 819 } 820 fraction = true 821 } else if c == '0' { 822 // hex, octal, binary or float 823 sc.readRune() 824 c = sc.peekRune() 825 826 if c == '.' { 827 fraction = true 828 } else if c == 'x' || c == 'X' { 829 // hex 830 sc.readRune() 831 c = sc.peekRune() 832 if !isxdigit(c) { 833 sc.error(start, "invalid hex literal") 834 } 835 for isxdigit(c) { 836 sc.readRune() 837 c = sc.peekRune() 838 } 839 } else if c == 'o' || c == 'O' { 840 // octal 841 sc.readRune() 842 c = sc.peekRune() 843 if !isodigit(c) { 844 sc.error(sc.pos, "invalid octal literal") 845 } 846 for isodigit(c) { 847 sc.readRune() 848 c = sc.peekRune() 849 } 850 } else if c == 'b' || c == 'B' { 851 // binary 852 sc.readRune() 853 c = sc.peekRune() 854 if !isbdigit(c) { 855 sc.error(sc.pos, "invalid binary literal") 856 } 857 for isbdigit(c) { 858 sc.readRune() 859 c = sc.peekRune() 860 } 861 } else { 862 // float (or obsolete octal "0755") 863 allzeros, octal := true, true 864 for isdigit(c) { 865 if c != '0' { 866 allzeros = false 867 } 868 if c > '7' { 869 octal = false 870 } 871 sc.readRune() 872 c = sc.peekRune() 873 } 874 if c == '.' { 875 fraction = true 876 } else if c == 'e' || c == 'E' { 877 exponent = true 878 } else if octal && !allzeros { 879 // We must support old octal until the Java 880 // implementation groks the new one. 881 // TODO(adonovan): reenable the check. 882 if false { 883 sc.endToken(val) 884 sc.errorf(sc.pos, "obsolete form of octal literal; use 0o%s", val.raw[1:]) 885 } 886 } 887 } 888 } else { 889 // decimal 890 for isdigit(c) { 891 sc.readRune() 892 c = sc.peekRune() 893 } 894 895 if c == '.' { 896 fraction = true 897 } else if c == 'e' || c == 'E' { 898 exponent = true 899 } 900 } 901 902 if fraction { 903 sc.readRune() // consume '.' 904 c = sc.peekRune() 905 for isdigit(c) { 906 sc.readRune() 907 c = sc.peekRune() 908 } 909 910 if c == 'e' || c == 'E' { 911 exponent = true 912 } 913 } 914 915 if exponent { 916 sc.readRune() // consume [eE] 917 c = sc.peekRune() 918 if c == '+' || c == '-' { 919 sc.readRune() 920 c = sc.peekRune() 921 if !isdigit(c) { 922 sc.error(sc.pos, "invalid float literal") 923 } 924 } 925 for isdigit(c) { 926 sc.readRune() 927 c = sc.peekRune() 928 } 929 } 930 931 sc.endToken(val) 932 if fraction || exponent { 933 var err error 934 val.float, err = strconv.ParseFloat(val.raw, 64) 935 if err != nil { 936 sc.error(sc.pos, "invalid float literal") 937 } 938 return FLOAT 939 } else { 940 var err error 941 s := val.raw 942 val.bigInt = nil 943 if len(s) > 2 && s[0] == '0' && (s[1] == 'o' || s[1] == 'O') { 944 val.int, err = strconv.ParseInt(s[2:], 8, 64) 945 } else if len(s) > 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B') { 946 val.int, err = strconv.ParseInt(s[2:], 2, 64) 947 } else { 948 val.int, err = strconv.ParseInt(s, 0, 64) 949 if err != nil { 950 num := new(big.Int) 951 var ok bool = true 952 val.bigInt, ok = num.SetString(s, 0) 953 if ok { 954 err = nil 955 } 956 } 957 } 958 if err != nil { 959 sc.error(start, "invalid int literal") 960 } 961 return INT 962 } 963 } 964 965 // isIdent reports whether c is an identifier rune. 966 func isIdent(c rune) bool { 967 return isdigit(c) || isIdentStart(c) 968 } 969 970 func isIdentStart(c rune) bool { 971 return 'a' <= c && c <= 'z' || 972 'A' <= c && c <= 'Z' || 973 c == '_' || 974 unicode.IsLetter(c) 975 } 976 977 func isdigit(c rune) bool { return '0' <= c && c <= '9' } 978 func isodigit(c rune) bool { return '0' <= c && c <= '7' } 979 func isxdigit(c rune) bool { return isdigit(c) || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f' } 980 func isbdigit(c rune) bool { return '0' == c || c == '1' } 981 982 // keywordToken records the special tokens for 983 // strings that should not be treated as ordinary identifiers. 984 var keywordToken = map[string]Token{ 985 "and": AND, 986 "break": BREAK, 987 "continue": CONTINUE, 988 "def": DEF, 989 "elif": ELIF, 990 "else": ELSE, 991 "for": FOR, 992 "if": IF, 993 "in": IN, 994 "lambda": LAMBDA, 995 "load": LOAD, 996 "not": NOT, 997 "or": OR, 998 "pass": PASS, 999 "return": RETURN, 1000 1001 // reserved words: 1002 "as": ILLEGAL, 1003 // "assert": ILLEGAL, // heavily used by our tests 1004 "class": ILLEGAL, 1005 "del": ILLEGAL, 1006 "except": ILLEGAL, 1007 "finally": ILLEGAL, 1008 "from": ILLEGAL, 1009 "global": ILLEGAL, 1010 "import": ILLEGAL, 1011 "is": ILLEGAL, 1012 "nonlocal": ILLEGAL, 1013 "raise": ILLEGAL, 1014 "try": ILLEGAL, 1015 "while": ILLEGAL, 1016 "with": ILLEGAL, 1017 "yield": ILLEGAL, 1018 }