github.com/google/skylark@v0.0.0-20181101142754-a5f7082aabed/syntax/scan.go (about)

     1  // Copyright 2017 The Bazel Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package syntax
     6  
     7  // A lexical scanner for Skylark.
     8  
     9  import (
    10  	"fmt"
    11  	"io"
    12  	"io/ioutil"
    13  	"log"
    14  	"math/big"
    15  	"strconv"
    16  	"strings"
    17  	"unicode"
    18  	"unicode/utf8"
    19  )
    20  
    21  // A Token represents a Skylark lexical token.
    22  type Token int8
    23  
    24  const (
    25  	ILLEGAL Token = iota
    26  	EOF
    27  
    28  	NEWLINE
    29  	INDENT
    30  	OUTDENT
    31  
    32  	// Tokens with values
    33  	IDENT  // x
    34  	INT    // 123
    35  	FLOAT  // 1.23e45
    36  	STRING // "foo" or 'foo' or '''foo''' or r'foo' or r"foo"
    37  
    38  	// Punctuation
    39  	PLUS          // +
    40  	MINUS         // -
    41  	STAR          // *
    42  	SLASH         // /
    43  	SLASHSLASH    // //
    44  	PERCENT       // %
    45  	AMP           // &
    46  	PIPE          // |
    47  	CIRCUMFLEX    // ^
    48  	LTLT          // <<
    49  	GTGT          // >>
    50  	TILDE         // ~
    51  	DOT           // .
    52  	COMMA         // ,
    53  	EQ            // =
    54  	SEMI          // ;
    55  	COLON         // :
    56  	LPAREN        // (
    57  	RPAREN        // )
    58  	LBRACK        // [
    59  	RBRACK        // ]
    60  	LBRACE        // {
    61  	RBRACE        // }
    62  	LT            // <
    63  	GT            // >
    64  	GE            // >=
    65  	LE            // <=
    66  	EQL           // ==
    67  	NEQ           // !=
    68  	PLUS_EQ       // +=    (keep order consistent with PLUS..GTGT)
    69  	MINUS_EQ      // -=
    70  	STAR_EQ       // *=
    71  	SLASH_EQ      // /=
    72  	SLASHSLASH_EQ // //=
    73  	PERCENT_EQ    // %=
    74  	AMP_EQ        // &=
    75  	PIPE_EQ       // |=
    76  	CIRCUMFLEX_EQ // ^=
    77  	LTLT_EQ       // <<=
    78  	GTGT_EQ       // >>=
    79  	STARSTAR      // **
    80  
    81  	// Keywords
    82  	AND
    83  	BREAK
    84  	CONTINUE
    85  	DEF
    86  	ELIF
    87  	ELSE
    88  	FOR
    89  	IF
    90  	IN
    91  	LAMBDA
    92  	LOAD
    93  	NOT
    94  	NOT_IN // synthesized by parser from NOT IN
    95  	OR
    96  	PASS
    97  	RETURN
    98  
    99  	maxToken
   100  )
   101  
   102  func (tok Token) String() string { return tokenNames[tok] }
   103  
   104  // GoString is like String but quotes punctuation tokens.
   105  // Use Sprintf("%#v", tok) when constructing error messages.
   106  func (tok Token) GoString() string {
   107  	if tok >= PLUS && tok <= STARSTAR {
   108  		return "'" + tokenNames[tok] + "'"
   109  	}
   110  	return tokenNames[tok]
   111  }
   112  
   113  var tokenNames = [...]string{
   114  	ILLEGAL:       "illegal token",
   115  	EOF:           "end of file",
   116  	NEWLINE:       "newline",
   117  	INDENT:        "indent",
   118  	OUTDENT:       "outdent",
   119  	IDENT:         "identifier",
   120  	INT:           "int literal",
   121  	FLOAT:         "float literal",
   122  	STRING:        "string literal",
   123  	PLUS:          "+",
   124  	MINUS:         "-",
   125  	STAR:          "*",
   126  	SLASH:         "/",
   127  	SLASHSLASH:    "//",
   128  	PERCENT:       "%",
   129  	AMP:           "&",
   130  	PIPE:          "|",
   131  	CIRCUMFLEX:    "^",
   132  	LTLT:          "<<",
   133  	GTGT:          ">>",
   134  	TILDE:         "~",
   135  	DOT:           ".",
   136  	COMMA:         ",",
   137  	EQ:            "=",
   138  	SEMI:          ";",
   139  	COLON:         ":",
   140  	LPAREN:        "(",
   141  	RPAREN:        ")",
   142  	LBRACK:        "[",
   143  	RBRACK:        "]",
   144  	LBRACE:        "{",
   145  	RBRACE:        "]",
   146  	LT:            "<",
   147  	GT:            ">",
   148  	GE:            ">=",
   149  	LE:            "<=",
   150  	EQL:           "==",
   151  	NEQ:           "!=",
   152  	PLUS_EQ:       "+=",
   153  	MINUS_EQ:      "-=",
   154  	STAR_EQ:       "*=",
   155  	SLASH_EQ:      "/=",
   156  	SLASHSLASH_EQ: "//=",
   157  	PERCENT_EQ:    "%=",
   158  	AMP_EQ:        "&=",
   159  	PIPE_EQ:       "|=",
   160  	CIRCUMFLEX_EQ: "^=",
   161  	LTLT_EQ:       "<<=",
   162  	GTGT_EQ:       ">>=",
   163  	STARSTAR:      "**",
   164  	AND:           "and",
   165  	BREAK:         "break",
   166  	CONTINUE:      "continue",
   167  	DEF:           "def",
   168  	ELIF:          "elif",
   169  	ELSE:          "else",
   170  	FOR:           "for",
   171  	IF:            "if",
   172  	IN:            "in",
   173  	LAMBDA:        "lambda",
   174  	LOAD:          "load",
   175  	NOT:           "not",
   176  	NOT_IN:        "not in",
   177  	OR:            "or",
   178  	PASS:          "pass",
   179  	RETURN:        "return",
   180  }
   181  
   182  // A Position describes the location of a rune of input.
   183  type Position struct {
   184  	file *string // filename (indirect for compactness)
   185  	Line int32   // 1-based line number
   186  	Col  int32   // 1-based column number (strictly: rune)
   187  }
   188  
   189  // IsValid reports whether the position is valid.
   190  func (p Position) IsValid() bool {
   191  	return p.Line >= 1
   192  }
   193  
   194  // Filename returns the name of the file containing this position.
   195  func (p Position) Filename() string {
   196  	if p.file != nil {
   197  		return *p.file
   198  	}
   199  	return "<unknown>"
   200  }
   201  
   202  // MakePosition returns position with the specified components.
   203  func MakePosition(file *string, line, col int32) Position { return Position{file, line, col} }
   204  
   205  // add returns the position at the end of s, assuming it starts at p.
   206  func (p Position) add(s string) Position {
   207  	if n := strings.Count(s, "\n"); n > 0 {
   208  		p.Line += int32(n)
   209  		s = s[strings.LastIndex(s, "\n")+1:]
   210  		p.Col = 1
   211  	}
   212  	p.Col += int32(utf8.RuneCountInString(s))
   213  	return p
   214  }
   215  
   216  func (p Position) String() string {
   217  	if p.Col > 0 {
   218  		return fmt.Sprintf("%s:%d:%d", p.Filename(), p.Line, p.Col)
   219  	}
   220  	return fmt.Sprintf("%s:%d", p.Filename(), p.Line)
   221  }
   222  
   223  func (p Position) isBefore(q Position) bool {
   224  	if p.Line != q.Line {
   225  		return p.Line < q.Line
   226  	}
   227  	return p.Col < q.Col
   228  }
   229  
   230  // An scanner represents a single input file being parsed.
   231  type scanner struct {
   232  	complete       []byte    // entire input
   233  	rest           []byte    // rest of input
   234  	token          []byte    // token being scanned
   235  	pos            Position  // current input position
   236  	depth          int       // nesting of [ ] { } ( )
   237  	indentstk      []int     // stack of indentation levels
   238  	dents          int       // number of saved INDENT (>0) or OUTDENT (<0) tokens to return
   239  	lineStart      bool      // after NEWLINE; convert spaces to indentation tokens
   240  	keepComments   bool      // accumulate comments in slice
   241  	lineComments   []Comment // list of full line comments (if keepComments)
   242  	suffixComments []Comment // list of suffix comments (if keepComments)
   243  }
   244  
   245  func newScanner(filename string, src interface{}, keepComments bool) (*scanner, error) {
   246  	data, err := readSource(filename, src)
   247  	if err != nil {
   248  		return nil, err
   249  	}
   250  	return &scanner{
   251  		complete:     data,
   252  		rest:         data,
   253  		pos:          Position{file: &filename, Line: 1, Col: 1},
   254  		indentstk:    make([]int, 1, 10), // []int{0} + spare capacity
   255  		lineStart:    true,
   256  		keepComments: keepComments,
   257  	}, nil
   258  }
   259  
   260  func readSource(filename string, src interface{}) (data []byte, err error) {
   261  	switch src := src.(type) {
   262  	case string:
   263  		data = []byte(src)
   264  	case []byte:
   265  		data = src
   266  	case io.Reader:
   267  		data, err = ioutil.ReadAll(src)
   268  	case nil:
   269  		data, err = ioutil.ReadFile(filename)
   270  	default:
   271  		return nil, fmt.Errorf("invalid source: %T", src)
   272  	}
   273  	if err != nil {
   274  		return nil, fmt.Errorf("reading %s: %s", filename, err)
   275  	}
   276  	return data, nil
   277  }
   278  
   279  // An Error describes the nature and position of a scanner or parser error.
   280  type Error struct {
   281  	Pos Position
   282  	Msg string
   283  }
   284  
   285  func (e Error) Error() string { return e.Pos.String() + ": " + e.Msg }
   286  
   287  // errorf is called to report an error.
   288  // errorf does not return: it panics.
   289  func (sc *scanner) error(pos Position, s string) {
   290  	panic(Error{pos, s})
   291  }
   292  
   293  func (sc *scanner) errorf(pos Position, format string, args ...interface{}) {
   294  	sc.error(pos, fmt.Sprintf(format, args...))
   295  }
   296  
   297  func (sc *scanner) recover(err *error) {
   298  	// The scanner and parser panic both for routine errors like
   299  	// syntax errors and for programmer bugs like array index
   300  	// errors.  Turn both into error returns.  Catching bug panics
   301  	// is especially important when processing many files.
   302  	switch e := recover().(type) {
   303  	case nil:
   304  		// no panic
   305  	case Error:
   306  		*err = e
   307  	default:
   308  		*err = Error{sc.pos, fmt.Sprintf("internal error: %v", e)}
   309  		if debug {
   310  			log.Fatal(*err)
   311  		}
   312  	}
   313  }
   314  
   315  // eof reports whether the input has reached end of file.
   316  func (sc *scanner) eof() bool {
   317  	return len(sc.rest) == 0
   318  }
   319  
   320  // peekRune returns the next rune in the input without consuming it.
   321  // Newlines in Unix, DOS, or Mac format are treated as one rune, '\n'.
   322  func (sc *scanner) peekRune() rune {
   323  	if len(sc.rest) == 0 {
   324  		return 0
   325  	}
   326  
   327  	// fast path: ASCII
   328  	if b := sc.rest[0]; b < utf8.RuneSelf {
   329  		if b == '\r' {
   330  			return '\n'
   331  		}
   332  		return rune(b)
   333  	}
   334  
   335  	r, _ := utf8.DecodeRune(sc.rest)
   336  	return r
   337  }
   338  
   339  // readRune consumes and returns the next rune in the input.
   340  // Newlines in Unix, DOS, or Mac format are treated as one rune, '\n'.
   341  func (sc *scanner) readRune() rune {
   342  	if len(sc.rest) == 0 {
   343  		sc.error(sc.pos, "internal scanner error: readRune at EOF")
   344  		return 0 // unreachable but eliminates bounds-check below
   345  	}
   346  
   347  	// fast path: ASCII
   348  	if b := sc.rest[0]; b < utf8.RuneSelf {
   349  		r := rune(b)
   350  		sc.rest = sc.rest[1:]
   351  		if r == '\r' {
   352  			if len(sc.rest) > 0 && sc.rest[0] == '\n' {
   353  				sc.rest = sc.rest[1:]
   354  			}
   355  			r = '\n'
   356  		}
   357  		if r == '\n' {
   358  			sc.pos.Line++
   359  			sc.pos.Col = 1
   360  		} else {
   361  			sc.pos.Col++
   362  		}
   363  		return r
   364  	}
   365  
   366  	r, size := utf8.DecodeRune(sc.rest)
   367  	sc.rest = sc.rest[size:]
   368  	sc.pos.Col++
   369  	return r
   370  }
   371  
   372  // tokenValue records the position and value associated with each token.
   373  type tokenValue struct {
   374  	raw    string   // raw text of token
   375  	int    int64    // decoded int
   376  	bigInt *big.Int // decoded integers > int64
   377  	float  float64  // decoded float
   378  	string string   // decoded string
   379  	pos    Position // start position of token
   380  	triple bool     // was string triple quoted?
   381  }
   382  
   383  // startToken marks the beginning of the next input token.
   384  // It must be followed by a call to endToken once the token has
   385  // been consumed using readRune.
   386  func (sc *scanner) startToken(val *tokenValue) {
   387  	sc.token = sc.rest
   388  	val.raw = ""
   389  	val.pos = sc.pos
   390  }
   391  
   392  // endToken marks the end of an input token.
   393  // It records the actual token string in val.raw if the caller
   394  // has not done that already.
   395  func (sc *scanner) endToken(val *tokenValue) {
   396  	if val.raw == "" {
   397  		val.raw = string(sc.token[:len(sc.token)-len(sc.rest)])
   398  	}
   399  }
   400  
   401  // nextToken is called by the parser to obtain the next input token.
   402  // It returns the token value and sets val to the data associated with
   403  // the token.
   404  //
   405  // For all our input tokens, the associated data is val.pos (the
   406  // position where the token begins), val.raw (the input string
   407  // corresponding to the token).  For string and int tokens, the string
   408  // and int fields additionally contain the token's interpreted value.
   409  func (sc *scanner) nextToken(val *tokenValue) Token {
   410  
   411  	// The following distribution of tokens guides case ordering:
   412  	//
   413  	//      COMMA          27   %
   414  	//      STRING         23   %
   415  	//      IDENT          15   %
   416  	//      EQL            11   %
   417  	//      LBRACK          5.5 %
   418  	//      RBRACK          5.5 %
   419  	//      NEWLINE         3   %
   420  	//      LPAREN          2.9 %
   421  	//      RPAREN          2.9 %
   422  	//      INT             2   %
   423  	//      others        < 1   %
   424  	//
   425  	// Although NEWLINE tokens are infrequent, and lineStart is
   426  	// usually (~97%) false on entry, skipped newlines account for
   427  	// about 50% of all iterations of the 'start' loop.
   428  
   429  start:
   430  	var c rune
   431  
   432  	// Deal with leading spaces and indentation.
   433  	blank := false
   434  	savedLineStart := sc.lineStart
   435  	if sc.lineStart {
   436  		sc.lineStart = false
   437  		col := 0
   438  		for {
   439  			c = sc.peekRune()
   440  			if c == ' ' {
   441  				col++
   442  				sc.readRune()
   443  			} else if c == '\t' {
   444  				const tab = 8
   445  				col += int(tab - (sc.pos.Col-1)%tab)
   446  				sc.readRune()
   447  			} else {
   448  				break
   449  			}
   450  		}
   451  		// The third clause is "trailing spaces without newline at EOF".
   452  		if c == '#' || c == '\n' || c == 0 && col > 0 {
   453  			blank = true
   454  		}
   455  
   456  		// Compute indentation level for non-blank lines not
   457  		// inside an expression.  This is not the common case.
   458  		if !blank && sc.depth == 0 {
   459  			cur := sc.indentstk[len(sc.indentstk)-1]
   460  			if col > cur {
   461  				// indent
   462  				sc.dents++
   463  				sc.indentstk = append(sc.indentstk, col)
   464  			} else if col < cur {
   465  				// dedent(s)
   466  				for len(sc.indentstk) > 0 && col < sc.indentstk[len(sc.indentstk)-1] {
   467  					sc.dents--
   468  					sc.indentstk = sc.indentstk[:len(sc.indentstk)-1] // pop
   469  				}
   470  				if col != sc.indentstk[len(sc.indentstk)-1] {
   471  					sc.error(sc.pos, "unindent does not match any outer indentation level")
   472  				}
   473  			}
   474  		}
   475  	}
   476  
   477  	// Return saved indentation tokens.
   478  	if sc.dents != 0 {
   479  		sc.startToken(val)
   480  		sc.endToken(val)
   481  		if sc.dents < 0 {
   482  			sc.dents++
   483  			return OUTDENT
   484  		} else {
   485  			sc.dents--
   486  			return INDENT
   487  		}
   488  	}
   489  
   490  	// start of line proper
   491  	c = sc.peekRune()
   492  
   493  	// Skip spaces.
   494  	for c == ' ' || c == '\t' {
   495  		sc.readRune()
   496  		c = sc.peekRune()
   497  	}
   498  
   499  	// comment
   500  	if c == '#' {
   501  		if sc.keepComments {
   502  			sc.startToken(val)
   503  		}
   504  		// Consume up to newline (included).
   505  		for c != 0 && c != '\n' {
   506  			sc.readRune()
   507  			c = sc.peekRune()
   508  		}
   509  		if sc.keepComments {
   510  			sc.endToken(val)
   511  			if blank {
   512  				sc.lineComments = append(sc.lineComments, Comment{val.pos, val.raw})
   513  			} else {
   514  				sc.suffixComments = append(sc.suffixComments, Comment{val.pos, val.raw})
   515  			}
   516  		}
   517  	}
   518  
   519  	// newline
   520  	if c == '\n' {
   521  		sc.lineStart = true
   522  		if blank || sc.depth > 0 {
   523  			// Ignore blank lines, or newlines within expressions (common case).
   524  			sc.readRune()
   525  			goto start
   526  		}
   527  		// At top-level (not in an expression).
   528  		sc.startToken(val)
   529  		sc.readRune()
   530  		val.raw = "\n"
   531  		return NEWLINE
   532  	}
   533  
   534  	// end of file
   535  	if c == 0 {
   536  		// Emit OUTDENTs for unfinished indentation,
   537  		// preceded by a NEWLINE if we haven't just emitted one.
   538  		if len(sc.indentstk) > 1 {
   539  			if savedLineStart {
   540  				sc.dents = 1 - len(sc.indentstk)
   541  				sc.indentstk = sc.indentstk[1:]
   542  				goto start
   543  			} else {
   544  				sc.lineStart = true
   545  				sc.startToken(val)
   546  				val.raw = "\n"
   547  				return NEWLINE
   548  			}
   549  		}
   550  
   551  		sc.startToken(val)
   552  		sc.endToken(val)
   553  		return EOF
   554  	}
   555  
   556  	// line continuation
   557  	if c == '\\' {
   558  		sc.readRune()
   559  		if sc.peekRune() != '\n' {
   560  			sc.errorf(sc.pos, "stray backslash in program")
   561  		}
   562  		sc.readRune()
   563  		goto start
   564  	}
   565  
   566  	// start of the next token
   567  	sc.startToken(val)
   568  
   569  	// comma (common case)
   570  	if c == ',' {
   571  		sc.readRune()
   572  		sc.endToken(val)
   573  		return COMMA
   574  	}
   575  
   576  	// string literal
   577  	if c == '"' || c == '\'' {
   578  		return sc.scanString(val, c)
   579  	}
   580  
   581  	// identifier or keyword
   582  	if isIdentStart(c) {
   583  		// raw string literal
   584  		if c == 'r' && len(sc.rest) > 1 && (sc.rest[1] == '"' || sc.rest[1] == '\'') {
   585  			sc.readRune()
   586  			c = sc.peekRune()
   587  			return sc.scanString(val, c)
   588  		}
   589  
   590  		for isIdent(c) {
   591  			sc.readRune()
   592  			c = sc.peekRune()
   593  		}
   594  		sc.endToken(val)
   595  		if k, ok := keywordToken[val.raw]; ok {
   596  			return k
   597  		}
   598  
   599  		return IDENT
   600  	}
   601  
   602  	// brackets
   603  	switch c {
   604  	case '[', '(', '{':
   605  		sc.depth++
   606  		sc.readRune()
   607  		sc.endToken(val)
   608  		switch c {
   609  		case '[':
   610  			return LBRACK
   611  		case '(':
   612  			return LPAREN
   613  		case '{':
   614  			return LBRACE
   615  		}
   616  		panic("unreachable")
   617  
   618  	case ']', ')', '}':
   619  		if sc.depth == 0 {
   620  			sc.error(sc.pos, "indentation error")
   621  		} else {
   622  			sc.depth--
   623  		}
   624  		sc.readRune()
   625  		sc.endToken(val)
   626  		switch c {
   627  		case ']':
   628  			return RBRACK
   629  		case ')':
   630  			return RPAREN
   631  		case '}':
   632  			return RBRACE
   633  		}
   634  		panic("unreachable")
   635  	}
   636  
   637  	// int or float literal, or period
   638  	if isdigit(c) || c == '.' {
   639  		return sc.scanNumber(val, c)
   640  	}
   641  
   642  	// other punctuation
   643  	defer sc.endToken(val)
   644  	switch c {
   645  	case '=', '<', '>', '!', '+', '-', '%', '/', '&', '|', '^', '~': // possibly followed by '='
   646  		start := sc.pos
   647  		sc.readRune()
   648  		if sc.peekRune() == '=' {
   649  			sc.readRune()
   650  			switch c {
   651  			case '<':
   652  				return LE
   653  			case '>':
   654  				return GE
   655  			case '=':
   656  				return EQL
   657  			case '!':
   658  				return NEQ
   659  			case '+':
   660  				return PLUS_EQ
   661  			case '-':
   662  				return MINUS_EQ
   663  			case '/':
   664  				return SLASH_EQ
   665  			case '%':
   666  				return PERCENT_EQ
   667  			case '&':
   668  				return AMP_EQ
   669  			case '|':
   670  				return PIPE_EQ
   671  			case '^':
   672  				return CIRCUMFLEX_EQ
   673  			}
   674  		}
   675  		switch c {
   676  		case '=':
   677  			return EQ
   678  		case '<':
   679  			if sc.peekRune() == '<' {
   680  				sc.readRune()
   681  				if sc.peekRune() == '=' {
   682  					sc.readRune()
   683  					return LTLT_EQ
   684  				} else {
   685  					return LTLT
   686  				}
   687  			}
   688  			return LT
   689  		case '>':
   690  			if sc.peekRune() == '>' {
   691  				sc.readRune()
   692  				if sc.peekRune() == '=' {
   693  					sc.readRune()
   694  					return GTGT_EQ
   695  				} else {
   696  					return GTGT
   697  				}
   698  			}
   699  			return GT
   700  		case '!':
   701  			sc.error(start, "unexpected input character '!'")
   702  		case '+':
   703  			return PLUS
   704  		case '-':
   705  			return MINUS
   706  		case '/':
   707  			if sc.peekRune() == '/' {
   708  				sc.readRune()
   709  				if sc.peekRune() == '=' {
   710  					sc.readRune()
   711  					return SLASHSLASH_EQ
   712  				} else {
   713  					return SLASHSLASH
   714  				}
   715  			}
   716  			return SLASH
   717  		case '%':
   718  			return PERCENT
   719  		case '&':
   720  			return AMP
   721  		case '|':
   722  			return PIPE
   723  		case '^':
   724  			return CIRCUMFLEX
   725  		case '~':
   726  			return TILDE
   727  		}
   728  		panic("unreachable")
   729  
   730  	case ':', ';': // single-char tokens (except comma)
   731  		sc.readRune()
   732  		switch c {
   733  		case ':':
   734  			return COLON
   735  		case ';':
   736  			return SEMI
   737  		}
   738  		panic("unreachable")
   739  
   740  	case '*': // possibly followed by '*' or '='
   741  		sc.readRune()
   742  		switch sc.peekRune() {
   743  		case '*':
   744  			sc.readRune()
   745  			return STARSTAR
   746  		case '=':
   747  			sc.readRune()
   748  			return STAR_EQ
   749  		}
   750  		return STAR
   751  	}
   752  
   753  	sc.errorf(sc.pos, "unexpected input character %#q", c)
   754  	panic("unreachable")
   755  }
   756  
   757  func (sc *scanner) scanString(val *tokenValue, quote rune) Token {
   758  	start := sc.pos
   759  	triple := len(sc.rest) >= 3 && sc.rest[0] == byte(quote) && sc.rest[1] == byte(quote) && sc.rest[2] == byte(quote)
   760  	sc.readRune()
   761  	if triple {
   762  		sc.readRune()
   763  		sc.readRune()
   764  	}
   765  
   766  	quoteCount := 0
   767  	for {
   768  		if sc.eof() {
   769  			sc.error(val.pos, "unexpected EOF in string")
   770  		}
   771  		c := sc.readRune()
   772  		if c == '\n' && !triple {
   773  			sc.error(val.pos, "unexpected newline in string")
   774  		}
   775  		if c == quote {
   776  			quoteCount++
   777  			if !triple || quoteCount == 3 {
   778  				break
   779  			}
   780  		} else {
   781  			quoteCount = 0
   782  		}
   783  		if c == '\\' {
   784  			if sc.eof() {
   785  				sc.error(val.pos, "unexpected EOF in string")
   786  			}
   787  			sc.readRune()
   788  		}
   789  	}
   790  
   791  	sc.endToken(val)
   792  	s, _, err := unquote(val.raw)
   793  	if err != nil {
   794  		sc.error(start, err.Error())
   795  	}
   796  	val.string = s
   797  	return STRING
   798  }
   799  
   800  func (sc *scanner) scanNumber(val *tokenValue, c rune) Token {
   801  	// https://github.com/google/skylark/blob/master/doc/spec.md#lexical-elements
   802  	//
   803  	// Python features not supported:
   804  	// - integer literals of >64 bits of precision
   805  	// - 123L or 123l long suffix
   806  	// - traditional octal: 0755
   807  	// https://docs.python.org/2/reference/lexical_analysis.html#integer-and-long-integer-literals
   808  
   809  	start := sc.pos
   810  	fraction, exponent := false, false
   811  
   812  	if c == '.' {
   813  		// dot or start of fraction
   814  		sc.readRune()
   815  		c = sc.peekRune()
   816  		if !isdigit(c) {
   817  			sc.endToken(val)
   818  			return DOT
   819  		}
   820  		fraction = true
   821  	} else if c == '0' {
   822  		// hex, octal, binary or float
   823  		sc.readRune()
   824  		c = sc.peekRune()
   825  
   826  		if c == '.' {
   827  			fraction = true
   828  		} else if c == 'x' || c == 'X' {
   829  			// hex
   830  			sc.readRune()
   831  			c = sc.peekRune()
   832  			if !isxdigit(c) {
   833  				sc.error(start, "invalid hex literal")
   834  			}
   835  			for isxdigit(c) {
   836  				sc.readRune()
   837  				c = sc.peekRune()
   838  			}
   839  		} else if c == 'o' || c == 'O' {
   840  			// octal
   841  			sc.readRune()
   842  			c = sc.peekRune()
   843  			if !isodigit(c) {
   844  				sc.error(sc.pos, "invalid octal literal")
   845  			}
   846  			for isodigit(c) {
   847  				sc.readRune()
   848  				c = sc.peekRune()
   849  			}
   850  		} else if c == 'b' || c == 'B' {
   851  			// binary
   852  			sc.readRune()
   853  			c = sc.peekRune()
   854  			if !isbdigit(c) {
   855  				sc.error(sc.pos, "invalid binary literal")
   856  			}
   857  			for isbdigit(c) {
   858  				sc.readRune()
   859  				c = sc.peekRune()
   860  			}
   861  		} else {
   862  			// float (or obsolete octal "0755")
   863  			allzeros, octal := true, true
   864  			for isdigit(c) {
   865  				if c != '0' {
   866  					allzeros = false
   867  				}
   868  				if c > '7' {
   869  					octal = false
   870  				}
   871  				sc.readRune()
   872  				c = sc.peekRune()
   873  			}
   874  			if c == '.' {
   875  				fraction = true
   876  			} else if c == 'e' || c == 'E' {
   877  				exponent = true
   878  			} else if octal && !allzeros {
   879  				// We must support old octal until the Java
   880  				// implementation groks the new one.
   881  				// TODO(adonovan): reenable the check.
   882  				if false {
   883  					sc.endToken(val)
   884  					sc.errorf(sc.pos, "obsolete form of octal literal; use 0o%s", val.raw[1:])
   885  				}
   886  			}
   887  		}
   888  	} else {
   889  		// decimal
   890  		for isdigit(c) {
   891  			sc.readRune()
   892  			c = sc.peekRune()
   893  		}
   894  
   895  		if c == '.' {
   896  			fraction = true
   897  		} else if c == 'e' || c == 'E' {
   898  			exponent = true
   899  		}
   900  	}
   901  
   902  	if fraction {
   903  		sc.readRune() // consume '.'
   904  		c = sc.peekRune()
   905  		for isdigit(c) {
   906  			sc.readRune()
   907  			c = sc.peekRune()
   908  		}
   909  
   910  		if c == 'e' || c == 'E' {
   911  			exponent = true
   912  		}
   913  	}
   914  
   915  	if exponent {
   916  		sc.readRune() // consume [eE]
   917  		c = sc.peekRune()
   918  		if c == '+' || c == '-' {
   919  			sc.readRune()
   920  			c = sc.peekRune()
   921  			if !isdigit(c) {
   922  				sc.error(sc.pos, "invalid float literal")
   923  			}
   924  		}
   925  		for isdigit(c) {
   926  			sc.readRune()
   927  			c = sc.peekRune()
   928  		}
   929  	}
   930  
   931  	sc.endToken(val)
   932  	if fraction || exponent {
   933  		var err error
   934  		val.float, err = strconv.ParseFloat(val.raw, 64)
   935  		if err != nil {
   936  			sc.error(sc.pos, "invalid float literal")
   937  		}
   938  		return FLOAT
   939  	} else {
   940  		var err error
   941  		s := val.raw
   942  		val.bigInt = nil
   943  		if len(s) > 2 && s[0] == '0' && (s[1] == 'o' || s[1] == 'O') {
   944  			val.int, err = strconv.ParseInt(s[2:], 8, 64)
   945  		} else if len(s) > 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B') {
   946  			val.int, err = strconv.ParseInt(s[2:], 2, 64)
   947  		} else {
   948  			val.int, err = strconv.ParseInt(s, 0, 64)
   949  			if err != nil {
   950  				num := new(big.Int)
   951  				var ok bool = true
   952  				val.bigInt, ok = num.SetString(s, 0)
   953  				if ok {
   954  					err = nil
   955  				}
   956  			}
   957  		}
   958  		if err != nil {
   959  			sc.error(start, "invalid int literal")
   960  		}
   961  		return INT
   962  	}
   963  }
   964  
   965  // isIdent reports whether c is an identifier rune.
   966  func isIdent(c rune) bool {
   967  	return isdigit(c) || isIdentStart(c)
   968  }
   969  
   970  func isIdentStart(c rune) bool {
   971  	return 'a' <= c && c <= 'z' ||
   972  		'A' <= c && c <= 'Z' ||
   973  		c == '_' ||
   974  		unicode.IsLetter(c)
   975  }
   976  
   977  func isdigit(c rune) bool  { return '0' <= c && c <= '9' }
   978  func isodigit(c rune) bool { return '0' <= c && c <= '7' }
   979  func isxdigit(c rune) bool { return isdigit(c) || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f' }
   980  func isbdigit(c rune) bool { return '0' == c || c == '1' }
   981  
   982  // keywordToken records the special tokens for
   983  // strings that should not be treated as ordinary identifiers.
   984  var keywordToken = map[string]Token{
   985  	"and":      AND,
   986  	"break":    BREAK,
   987  	"continue": CONTINUE,
   988  	"def":      DEF,
   989  	"elif":     ELIF,
   990  	"else":     ELSE,
   991  	"for":      FOR,
   992  	"if":       IF,
   993  	"in":       IN,
   994  	"lambda":   LAMBDA,
   995  	"load":     LOAD,
   996  	"not":      NOT,
   997  	"or":       OR,
   998  	"pass":     PASS,
   999  	"return":   RETURN,
  1000  
  1001  	// reserved words:
  1002  	"as": ILLEGAL,
  1003  	// "assert":   ILLEGAL, // heavily used by our tests
  1004  	"class":    ILLEGAL,
  1005  	"del":      ILLEGAL,
  1006  	"except":   ILLEGAL,
  1007  	"finally":  ILLEGAL,
  1008  	"from":     ILLEGAL,
  1009  	"global":   ILLEGAL,
  1010  	"import":   ILLEGAL,
  1011  	"is":       ILLEGAL,
  1012  	"nonlocal": ILLEGAL,
  1013  	"raise":    ILLEGAL,
  1014  	"try":      ILLEGAL,
  1015  	"while":    ILLEGAL,
  1016  	"with":     ILLEGAL,
  1017  	"yield":    ILLEGAL,
  1018  }