
     1  // Copyright 2017 The Bazel Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     5  package syntax
     7  // A lexical scanner for exprcore.
     9  import (
    10  	"fmt"
    11  	"io"
    12  	"io/ioutil"
    13  	"log"
    14  	"math/big"
    15  	"os"
    16  	"strconv"
    17  	"strings"
    18  	"unicode"
    19  	"unicode/utf8"
    20  )
    22  // A Token represents a exprcore lexical token.
    23  type Token int8
    25  const (
    26  	ILLEGAL Token = iota
    27  	EOF
    29  	NEWLINE
    30  	INDENT
    31  	OUTDENT
    33  	// Tokens with values
    34  	IDENT  // x
    35  	INT    // 123
    36  	FLOAT  // 1.23e45
    37  	STRING // "foo" or 'foo' or '''foo''' or r'foo' or r"foo"
    39  	SHELL        //  $ foo bar
    40  	DSHELL_START // the start of a shell expression with expandation
    41  	DSHELL_PART  // an expandation within a shell expression
    42  	DSHELL_END   // the end of a DSHELL
    44  	// Punctuation
    45  	PLUS          // +
    46  	MINUS         // -
    47  	STAR          // *
    48  	SLASH         // /
    49  	SLASHSLASH    // //
    50  	PERCENT       // %
    51  	AMP           // &
    52  	PIPE          // |
    53  	CIRCUMFLEX    // ^
    54  	LTLT          // <<
    55  	GTGT          // >>
    56  	TILDE         // ~
    57  	DOT           // .
    58  	COMMA         // ,
    59  	EQ            // =
    60  	SEMI          // ;
    61  	COLON         // :
    62  	LPAREN        // (
    63  	RPAREN        // )
    64  	LBRACK        // [
    65  	RBRACK        // ]
    66  	LBRACE        // {
    67  	RBRACE        // }
    68  	LT            // <
    69  	GT            // >
    70  	GE            // >=
    71  	LE            // <=
    72  	EQL           // ==
    73  	NEQ           // !=
    74  	PLUS_EQ       // +=    (keep order consistent with PLUS..GTGT)
    75  	MINUS_EQ      // -=
    76  	STAR_EQ       // *=
    77  	SLASH_EQ      // /=
    78  	SLASHSLASH_EQ // //=
    79  	PERCENT_EQ    // %=
    80  	AMP_EQ        // &=
    81  	PIPE_EQ       // |=
    82  	CIRCUMFLEX_EQ // ^=
    83  	LTLT_EQ       // <<=
    84  	GTGT_EQ       // >>=
    85  	ARROW         // =>
    86  	AT            // @
    87  	PERCENT_BRACE // %{
    88  	STARSTAR      // **
    90  	// Keywords
    91  	AND
    92  	BREAK
    93  	CONTINUE
    94  	DEF
    95  	ELIF
    96  	ELSE
    97  	FOR
    98  	IF
    99  	IN
   100  	LAMBDA
   101  	LOAD
   102  	IMPORT
   103  	AS
   104  	USING
   105  	NOT
   106  	NOT_IN // synthesized by parser from NOT IN
   107  	OR
   108  	PASS
   109  	RETURN
   110  	WHILE
   112  	maxToken
   113  )
   115  func (tok Token) String() string { return tokenNames[tok] }
   117  // GoString is like String but quotes punctuation tokens.
   118  // Use Sprintf("%#v", tok) when constructing error messages.
   119  func (tok Token) GoString() string {
   120  	if tok >= PLUS && tok <= STARSTAR {
   121  		return "'" + tokenNames[tok] + "'"
   122  	}
   123  	return tokenNames[tok]
   124  }
   126  var tokenNames = [...]string{
   127  	ILLEGAL:       "illegal token",
   128  	EOF:           "end of file",
   129  	NEWLINE:       "newline",
   130  	INDENT:        "indent",
   131  	OUTDENT:       "outdent",
   132  	IDENT:         "identifier",
   133  	INT:           "int literal",
   134  	FLOAT:         "float literal",
   135  	STRING:        "string literal",
   136  	SHELL:         "a shell expression",
   137  	DSHELL_START:  "the start of a dynamic shell expression",
   138  	DSHELL_PART:   "part of a dynamic shell expression",
   139  	DSHELL_END:    "the end of a dynamic shell expression",
   140  	PLUS:          "+",
   141  	MINUS:         "-",
   142  	STAR:          "*",
   143  	SLASH:         "/",
   144  	SLASHSLASH:    "//",
   145  	PERCENT:       "%",
   146  	PERCENT_BRACE: "%{",
   147  	AMP:           "&",
   148  	PIPE:          "|",
   149  	CIRCUMFLEX:    "^",
   150  	LTLT:          "<<",
   151  	GTGT:          ">>",
   152  	TILDE:         "~",
   153  	DOT:           ".",
   154  	COMMA:         ",",
   155  	EQ:            "=",
   156  	SEMI:          ";",
   157  	COLON:         ":",
   158  	LPAREN:        "(",
   159  	RPAREN:        ")",
   160  	LBRACK:        "[",
   161  	RBRACK:        "]",
   162  	LBRACE:        "{",
   163  	RBRACE:        "}",
   164  	LT:            "<",
   165  	GT:            ">",
   166  	GE:            ">=",
   167  	LE:            "<=",
   168  	EQL:           "==",
   169  	NEQ:           "!=",
   170  	PLUS_EQ:       "+=",
   171  	MINUS_EQ:      "-=",
   172  	STAR_EQ:       "*=",
   173  	SLASH_EQ:      "/=",
   174  	SLASHSLASH_EQ: "//=",
   175  	PERCENT_EQ:    "%=",
   176  	AMP_EQ:        "&=",
   177  	PIPE_EQ:       "|=",
   178  	CIRCUMFLEX_EQ: "^=",
   179  	LTLT_EQ:       "<<=",
   180  	GTGT_EQ:       ">>=",
   181  	STARSTAR:      "**",
   182  	ARROW:         "=>",
   183  	AT:            "@",
   184  	AND:           "and",
   185  	BREAK:         "break",
   186  	CONTINUE:      "continue",
   187  	DEF:           "def",
   188  	ELIF:          "elif",
   189  	ELSE:          "else",
   190  	FOR:           "for",
   191  	IF:            "if",
   192  	IN:            "in",
   193  	LAMBDA:        "lambda",
   194  	LOAD:          "load",
   195  	IMPORT:        "import",
   196  	AS:            "as",
   197  	USING:         "using",
   198  	NOT:           "not",
   199  	NOT_IN:        "not in",
   200  	OR:            "or",
   201  	PASS:          "pass",
   202  	RETURN:        "return",
   203  	WHILE:         "while",
   204  }
   206  // A Position describes the location of a rune of input.
   207  type Position struct {
   208  	file *string // filename (indirect for compactness)
   209  	Line int32   // 1-based line number; 0 if line unknown
   210  	Col  int32   // 1-based column (rune) number; 0 if column unknown
   211  }
   213  // IsValid reports whether the position is valid.
   214  func (p Position) IsValid() bool { return p.file != nil }
   216  // Filename returns the name of the file containing this position.
   217  func (p Position) Filename() string {
   218  	if p.file != nil {
   219  		return *p.file
   220  	}
   221  	return "<invalid>"
   222  }
   224  // MakePosition returns position with the specified components.
   225  func MakePosition(file *string, line, col int32) Position { return Position{file, line, col} }
   227  // add returns the position at the end of s, assuming it starts at p.
   228  func (p Position) add(s string) Position {
   229  	if n := strings.Count(s, "\n"); n > 0 {
   230  		p.Line += int32(n)
   231  		s = s[strings.LastIndex(s, "\n")+1:]
   232  		p.Col = 1
   233  	}
   234  	p.Col += int32(utf8.RuneCountInString(s))
   235  	return p
   236  }
   238  func (p Position) String() string {
   239  	file := p.Filename()
   240  	if p.Line > 0 {
   241  		if p.Col > 0 {
   242  			return fmt.Sprintf("%s:%d:%d", file, p.Line, p.Col)
   243  		}
   244  		return fmt.Sprintf("%s:%d", file, p.Line)
   245  	}
   246  	return file
   247  }
   249  func (p Position) isBefore(q Position) bool {
   250  	if p.Line != q.Line {
   251  		return p.Line < q.Line
   252  	}
   253  	return p.Col < q.Col
   254  }
   256  // An scanner represents a single input file being parsed.
   257  type scanner struct {
   258  	rest           []byte    // rest of input (in REPL, a line of input)
   259  	token          []byte    // token being scanned
   260  	pos            Position  // current input position
   261  	depth          int       // nesting of [ ] { } ( )
   262  	indentstk      []int     // stack of indentation levels
   263  	dents          int       // number of saved INDENT (>0) or OUTDENT (<0) tokens to return
   264  	lineStart      bool      // after NEWLINE; convert spaces to indentation tokens
   265  	keepComments   bool      // accumulate comments in slice
   266  	lineComments   []Comment // list of full line comments (if keepComments)
   267  	suffixComments []Comment // list of suffix comments (if keepComments)
   269  	insertSemi bool // insert a semicolon before next newline
   271  	interpDepth     int // how far inside ${'s we are
   272  	interpExprDepth int // how far inside `'s we are
   274  	readline func() ([]byte, error) // read next line of input (REPL only)
   275  }
   277  func newScanner(filename string, src interface{}, keepComments bool) (*scanner, error) {
   278  	sc := &scanner{
   279  		pos:          Position{file: &filename, Line: 1, Col: 1},
   280  		indentstk:    make([]int, 1, 10), // []int{0} + spare capacity
   281  		lineStart:    true,
   282  		keepComments: keepComments,
   283  	}
   284  	sc.readline, _ = src.(func() ([]byte, error)) // REPL only
   285  	if sc.readline == nil {
   286  		data, err := readSource(filename, src)
   287  		if err != nil {
   288  			return nil, err
   289  		}
   290 = data
   291  	}
   292  	return sc, nil
   293  }
   295  func readSource(filename string, src interface{}) ([]byte, error) {
   296  	switch src := src.(type) {
   297  	case string:
   298  		return []byte(src), nil
   299  	case []byte:
   300  		return src, nil
   301  	case io.Reader:
   302  		data, err := ioutil.ReadAll(src)
   303  		if err != nil {
   304  			err = &os.PathError{Op: "read", Path: filename, Err: err}
   305  			return nil, err
   306  		}
   307  		return data, nil
   308  	case nil:
   309  		return ioutil.ReadFile(filename)
   310  	default:
   311  		return nil, fmt.Errorf("invalid source: %T", src)
   312  	}
   313  }
   315  // An Error describes the nature and position of a scanner or parser error.
   316  type Error struct {
   317  	Pos Position
   318  	Msg string
   319  }
   321  func (e Error) Error() string { return e.Pos.String() + ": " + e.Msg }
   323  // errorf is called to report an error.
   324  // errorf does not return: it panics.
   325  func (sc *scanner) error(pos Position, s string) {
   326  	panic(Error{pos, s})
   327  }
   329  func (sc *scanner) errorf(pos Position, format string, args ...interface{}) {
   330  	sc.error(pos, fmt.Sprintf(format, args...))
   331  }
   333  func (sc *scanner) recover(err *error) {
   334  	// The scanner and parser panic both for routine errors like
   335  	// syntax errors and for programmer bugs like array index
   336  	// errors.  Turn both into error returns.  Catching bug panics
   337  	// is especially important when processing many files.
   338  	switch e := recover().(type) {
   339  	case nil:
   340  		// no panic
   341  	case Error:
   342  		*err = e
   343  	default:
   344  		*err = Error{sc.pos, fmt.Sprintf("internal error: %v", e)}
   345  		if debug {
   346  			log.Fatal(*err)
   347  		}
   348  	}
   349  }
   351  // eof reports whether the input has reached end of file.
   352  func (sc *scanner) eof() bool {
   353  	return len( == 0 && !sc.readLine()
   354  }
   356  // readLine attempts to read another line of input.
   357  // Precondition: len(
   358  func (sc *scanner) readLine() bool {
   359  	if sc.readline != nil {
   360  		var err error
   361, err = sc.readline()
   362  		if err != nil {
   363  			sc.errorf(sc.pos, "%v", err) // EOF or ErrInterrupt
   364  		}
   365  		return len( > 0
   366  	}
   367  	return false
   368  }
   370  // peekRune returns the next rune in the input without consuming it.
   371  // Newlines in Unix, DOS, or Mac format are treated as one rune, '\n'.
   372  func (sc *scanner) peekRune() rune {
   373  	// TODO(adonovan): opt: measure and perhaps inline eof.
   374  	if sc.eof() {
   375  		return 0
   376  	}
   378  	// fast path: ASCII
   379  	if b :=[0]; b < utf8.RuneSelf {
   380  		if b == '\r' {
   381  			return '\n'
   382  		}
   383  		return rune(b)
   384  	}
   386  	r, _ := utf8.DecodeRune(
   387  	return r
   388  }
   390  // readRune consumes and returns the next rune in the input.
   391  // Newlines in Unix, DOS, or Mac format are treated as one rune, '\n'.
   392  func (sc *scanner) readRune() rune {
   393  	// eof() has been inlined here, both to avoid a call
   394  	// and to establish len(rest)>0 to avoid a bounds check.
   395  	if len( == 0 {
   396  		if !sc.readLine() {
   397  			sc.error(sc.pos, "internal scanner error: readRune at EOF")
   398  		}
   399  		// Redundant, but eliminates the bounds-check below.
   400  		if len( == 0 {
   401  			return 0
   402  		}
   403  	}
   405  	// fast path: ASCII
   406  	if b :=[0]; b < utf8.RuneSelf {
   407  		r := rune(b)
   408 =[1:]
   409  		if r == '\r' {
   410  			if len( > 0 &&[0] == '\n' {
   411 =[1:]
   412  			}
   413  			r = '\n'
   414  		}
   415  		if r == '\n' {
   416  			sc.pos.Line++
   417  			sc.pos.Col = 1
   418  		} else {
   419  			sc.pos.Col++
   420  		}
   421  		return r
   422  	}
   424  	r, size := utf8.DecodeRune(
   425 =[size:]
   426  	sc.pos.Col++
   427  	return r
   428  }
   430  // tokenValue records the position and value associated with each token.
   431  type tokenValue struct {
   432  	raw    string   // raw text of token
   433  	int    int64    // decoded int
   434  	bigInt *big.Int // decoded integers > int64
   435  	float  float64  // decoded float
   436  	string string   // decoded string
   437  	pos    Position // start position of token
   438  }
   440  // startToken marks the beginning of the next input token.
   441  // It must be followed by a call to endToken once the token has
   442  // been consumed using readRune.
   443  func (sc *scanner) startToken(val *tokenValue) {
   444  	sc.token =
   445  	val.raw = ""
   446  	val.pos = sc.pos
   447  }
   449  // endToken marks the end of an input token.
   450  // It records the actual token string in val.raw if the caller
   451  // has not done that already.
   452  func (sc *scanner) endToken(val *tokenValue) {
   453  	if val.raw == "" {
   454  		val.raw = string(sc.token[:len(sc.token)-len(])
   455  	}
   456  }
   458  // nextToken is called by the parser to obtain the next input token.
   459  // It returns the token value and sets val to the data associated with
   460  // the token.
   461  //
   462  // For all our input tokens, the associated data is val.pos (the
   463  // position where the token begins), val.raw (the input string
   464  // corresponding to the token).  For string and int tokens, the string
   465  // and int fields additionally contain the token's interpreted value.
   466  func (sc *scanner) nextToken(val *tokenValue) Token {
   468  	// The following distribution of tokens guides case ordering:
   469  	//
   470  	//      COMMA          27   %
   471  	//      STRING         23   %
   472  	//      IDENT          15   %
   473  	//      EQL            11   %
   474  	//      LBRACK          5.5 %
   475  	//      RBRACK          5.5 %
   476  	//      NEWLINE         3   %
   477  	//      LPAREN          2.9 %
   478  	//      RPAREN          2.9 %
   479  	//      INT             2   %
   480  	//      others        < 1   %
   481  	//
   482  	// Although NEWLINE tokens are infrequent, and lineStart is
   483  	// usually (~97%) false on entry, skipped newlines account for
   484  	// about 50% of all iterations of the 'start' loop.
   486  	insertSemi := false
   488  	// Replace the value with the updated on on every time through
   489  	defer func() {
   490  		sc.insertSemi = insertSemi
   491  	}()
   493  start:
   494  	var c rune
   496  	// Deal with leading spaces and indentation.
   497  	blank := false
   498  	/*
   499  		savedLineStart := sc.lineStart
   500  		if sc.lineStart {
   501  			sc.lineStart = false
   502  			col := 0
   503  			for {
   504  				c = sc.peekRune()
   505  				if c == ' ' {
   506  					col++
   507  					sc.readRune()
   508  				} else if c == '\t' {
   509  					const tab = 8
   510  					col += int(tab - (sc.pos.Col-1)%tab)
   511  					sc.readRune()
   512  				} else {
   513  					break
   514  				}
   515  			}
   517  			// The third clause matches EOF.
   518  			if c == '#' || c == '\n' || c == 0 {
   519  				blank = true
   520  			}
   522  			// Compute indentation level for non-blank lines not
   523  			// inside an expression.  This is not the common case.
   524  			if false { // !blank { //  && sc.depth == 0 {
   525  				cur := sc.indentstk[len(sc.indentstk)-1]
   526  				if col > cur {
   527  					// indent
   528  					sc.dents++
   529  					sc.indentstk = append(sc.indentstk, col)
   530  				} else if col < cur {
   531  					// outdent(s)
   532  					for len(sc.indentstk) > 0 && col < sc.indentstk[len(sc.indentstk)-1] {
   533  						sc.dents--
   534  						sc.indentstk = sc.indentstk[:len(sc.indentstk)-1] // pop
   535  					}
   536  					if col != sc.indentstk[len(sc.indentstk)-1] {
   537  						sc.error(sc.pos, "unindent does not match any outer indentation level")
   538  					}
   539  				}
   540  			}
   541  	*/
   543  	// Return saved indentation tokens.
   544  	/*
   545  		if sc.dents != 0 {
   546  			sc.startToken(val)
   547  			sc.endToken(val)
   548  			if sc.dents < 0 {
   549  				sc.dents++
   550  				return OUTDENT
   551  			} else {
   552  				sc.dents--
   553  				return INDENT
   554  			}
   555  		}
   556  	*/
   558  	// start of line proper
   559  	c = sc.peekRune()
   561  	// Skip spaces.
   562  	for c == ' ' || c == '\t' || (c == '\n' && !sc.insertSemi) || c == '\r' {
   563  		sc.readRune()
   564  		c = sc.peekRune()
   565  	}
   567  	// comment
   568  	if c == '#' {
   569  		if sc.keepComments {
   570  			sc.startToken(val)
   571  		}
   572  		// Consume up to newline (included).
   573  		for c != 0 && c != '\n' {
   574  			sc.readRune()
   575  			c = sc.peekRune()
   576  		}
   578  		if sc.keepComments {
   579  			sc.endToken(val)
   580  			if blank {
   581  				sc.lineComments = append(sc.lineComments, Comment{val.pos, val.raw})
   582  			} else {
   583  				sc.suffixComments = append(sc.suffixComments, Comment{val.pos, val.raw})
   584  			}
   585  		}
   587  		goto start
   588  	}
   590  	// newline
   591  	if c == '\n' {
   592  		// Only seen if insertSemi was true because otherwise the loop above will eat newlines
   593  		sc.lineStart = true
   595  		// Ignore newlines within expressions (common case).
   596  		/*
   597  			if sc.depth > 0 {
   598  				sc.readRune()
   599  				goto start
   600  			}
   601  		*/
   603  		// Ignore blank lines, except in the REPL,
   604  		// where they emit OUTDENTs and NEWLINE.
   605  		/*
   606  			if blank {
   607  				if sc.readline == nil {
   608  					sc.readRune()
   609  					goto start
   610  				} else if len(sc.indentstk) > 1 {
   611  					sc.dents = 1 - len(sc.indentstk)
   612  					sc.indentstk = sc.indentstk[:1]
   613  					goto start
   614  				}
   615  			}
   616  		*/
   618  		// At top-level (not in an expression).
   619  		sc.startToken(val)
   620  		sc.readRune()
   621  		val.raw = "\n"
   622  		return SEMI
   623  	}
   625  	// end of file
   626  	if c == 0 {
   627  		// Emit OUTDENTs for unfinished indentation,
   628  		// preceded by a NEWLINE if we haven't just emitted one.
   629  		/*
   630  			if len(sc.indentstk) > 1 {
   631  				if savedLineStart {
   632  					sc.dents = 1 - len(sc.indentstk)
   633  					sc.indentstk = sc.indentstk[:1]
   634  					goto start
   635  				} else {
   636  					sc.lineStart = true
   637  					sc.startToken(val)
   638  					val.raw = "\n"
   639  					return NEWLINE
   640  				}
   641  			}
   642  		*/
   644  		if sc.insertSemi {
   645  			sc.startToken(val)
   646  			sc.endToken(val)
   647  			return SEMI
   648  		}
   650  		sc.startToken(val)
   651  		sc.endToken(val)
   652  		return EOF
   653  	}
   655  	// line continuation
   656  	if c == '\\' {
   657  		sc.readRune()
   658  		if sc.peekRune() != '\n' {
   659  			sc.errorf(sc.pos, "stray backslash in program")
   660  		}
   661  		sc.readRune()
   662  		goto start
   663  	}
   665  	// start of the next token
   666  	sc.startToken(val)
   668  	// comma (common case)
   669  	if c == ',' {
   670  		sc.readRune()
   671  		sc.endToken(val)
   672  		return COMMA
   673  	}
   675  	// string literal
   676  	if c == '"' || c == '\'' {
   677  		insertSemi = true
   678  		return sc.scanString(val, c)
   679  	}
   681  	if c == '`' {
   682  		tok := sc.scanShellExpr(val)
   683  		if tok == SHELL {
   684  			insertSemi = true
   685  		}
   687  		return tok
   688  	}
   690  	if c == '$' {
   691  		tok := sc.scanShell(val)
   692  		if tok == SHELL {
   693  			insertSemi = true
   694  		}
   696  		return tok
   697  	}
   699  	// identifier or keyword
   700  	if isIdentStart(c) {
   701  		// raw string literal
   702  		if c == 'r' && len( > 1 && ([1] == '"' ||[1] == '\'') {
   703  			insertSemi = true
   704  			sc.readRune()
   705  			c = sc.peekRune()
   706  			return sc.scanString(val, c)
   707  		}
   709  		for isIdent(c) {
   710  			sc.readRune()
   711  			c = sc.peekRune()
   712  		}
   713  		sc.endToken(val)
   714  		if k, ok := keywordToken[val.raw]; ok {
   715  			switch k {
   716  			case BREAK, CONTINUE, PASS, RETURN:
   717  				insertSemi = true
   718  			}
   720  			return k
   721  		}
   723  		insertSemi = true
   724  		return IDENT
   725  	}
   727  	// brackets
   728  	switch c {
   729  	case '[', '(', '{':
   730  		sc.depth++
   731  		sc.readRune()
   732  		sc.endToken(val)
   733  		switch c {
   734  		case '[':
   735  			return LBRACK
   736  		case '(':
   737  			return LPAREN
   738  		case '{':
   739  			return LBRACE
   740  		}
   741  		panic("unreachable")
   743  	case ']', ')', '}':
   744  		if c == '}' {
   745  			if sc.interpDepth > 0 {
   746  				tok := sc.scanMoreShell(val)
   747  				if tok == DSHELL_END {
   748  					insertSemi = true
   749  				}
   751  				return tok
   752  			}
   754  			if sc.interpExprDepth > 0 {
   755  				tok := sc.scanMoreShellExpr(val)
   756  				if tok == DSHELL_END {
   757  					insertSemi = true
   758  				}
   760  				return tok
   761  			}
   762  		}
   764  		if sc.depth == 0 {
   765  			sc.errorf(sc.pos, "unexpected %q", c)
   766  		} else {
   767  			sc.depth--
   768  		}
   769  		sc.readRune()
   770  		sc.endToken(val)
   772  		insertSemi = true
   773  		switch c {
   774  		case ']':
   775  			return RBRACK
   776  		case ')':
   777  			return RPAREN
   778  		case '}':
   779  			return RBRACE
   780  		}
   781  		panic("unreachable")
   782  	}
   784  	// int or float literal, or period
   785  	if isdigit(c) || c == '.' {
   786  		insertSemi = true
   787  		return sc.scanNumber(val, c)
   788  	}
   790  	// other punctuation
   791  	defer sc.endToken(val)
   792  	switch c {
   793  	case '=', '<', '>', '!', '+', '-', '%', '/', '&', '|', '^': // possibly followed by '='
   794  		start := sc.pos
   795  		sc.readRune()
   796  		if sc.peekRune() == '=' {
   797  			sc.readRune()
   798  			switch c {
   799  			case '<':
   800  				return LE
   801  			case '>':
   802  				return GE
   803  			case '=':
   804  				return EQL
   805  			case '!':
   806  				return NEQ
   807  			case '+':
   808  				return PLUS_EQ
   809  			case '-':
   810  				return MINUS_EQ
   811  			case '/':
   812  				return SLASH_EQ
   813  			case '%':
   814  				return PERCENT_EQ
   815  			case '&':
   816  				return AMP_EQ
   817  			case '|':
   818  				return PIPE_EQ
   819  			case '^':
   820  				return CIRCUMFLEX_EQ
   821  			}
   822  		}
   823  		switch c {
   824  		case '=':
   825  			if sc.peekRune() == '>' {
   826  				sc.readRune()
   827  				return ARROW
   828  			}
   830  			return EQ
   831  		case '<':
   832  			if sc.peekRune() == '<' {
   833  				sc.readRune()
   834  				if sc.peekRune() == '=' {
   835  					sc.readRune()
   836  					return LTLT_EQ
   837  				} else {
   838  					return LTLT
   839  				}
   840  			}
   841  			return LT
   842  		case '>':
   843  			if sc.peekRune() == '>' {
   844  				sc.readRune()
   845  				if sc.peekRune() == '=' {
   846  					sc.readRune()
   847  					return GTGT_EQ
   848  				} else {
   849  					return GTGT
   850  				}
   851  			}
   852  			return GT
   853  		case '!':
   854  			sc.error(start, "unexpected input character '!'")
   855  		case '+':
   856  			return PLUS
   857  		case '-':
   858  			return MINUS
   859  		case '/':
   860  			if sc.peekRune() == '/' {
   861  				sc.readRune()
   862  				if sc.peekRune() == '=' {
   863  					sc.readRune()
   864  					return SLASHSLASH_EQ
   865  				} else {
   866  					return SLASHSLASH
   867  				}
   868  			}
   869  			return SLASH
   870  		case '%':
   871  			if sc.peekRune() == '{' {
   872  				sc.readRune()
   873  				sc.depth++
   874  				return PERCENT_BRACE
   875  			}
   877  			return PERCENT
   878  		case '&':
   879  			return AMP
   880  		case '|':
   881  			return PIPE
   882  		case '^':
   883  			return CIRCUMFLEX
   884  		}
   885  		panic("unreachable")
   887  	case ':', ';', '~', '@': // single-char tokens (except comma)
   888  		sc.readRune()
   889  		switch c {
   890  		case ':':
   891  			return COLON
   892  		case ';':
   893  			return SEMI
   894  		case '~':
   895  			return TILDE
   896  		case '@':
   897  			return AT
   898  		}
   899  		panic("unreachable")
   901  	case '*': // possibly followed by '*' or '='
   902  		sc.readRune()
   903  		switch sc.peekRune() {
   904  		case '*':
   905  			sc.readRune()
   906  			return STARSTAR
   907  		case '=':
   908  			sc.readRune()
   909  			return STAR_EQ
   910  		}
   911  		return STAR
   912  	}
   914  	sc.errorf(sc.pos, "unexpected input character %#q", c)
   915  	panic("unreachable")
   916  }
   918  func (sc *scanner) scanShellExpr(val *tokenValue) Token {
   919  	sc.readRune()
   921  	var (
   922  		raw       strings.Builder
   923  		hasExpand bool
   924  	)
   926  	for sc.peekRune() == ' ' {
   927  		sc.readRune()
   928  	}
   930  	for {
   931  		if sc.eof() {
   932  			break
   933  		}
   935  		c := sc.readRune()
   936  		if c == '`' {
   937  			break
   938  		}
   940  		if c == '$' {
   941  			nc := sc.peekRune()
   942  			if nc == '{' {
   943  				sc.readRune()
   944  				sc.interpExprDepth++
   945  				hasExpand = true
   946  				break
   947  			}
   948  		} else if c == '\\' {
   949  			if sc.eof() {
   950  				sc.error(val.pos, "unexpected EOF in string")
   951  			}
   952  			c = sc.readRune()
   953  		}
   955  		raw.WriteRune(c)
   956  	}
   958  	val.string = raw.String()
   960  	if hasExpand {
   961  		return DSHELL_START
   962  	} else {
   963  		return SHELL
   964  	}
   965  }
   967  func (sc *scanner) scanShell(val *tokenValue) Token {
   968  	sc.readRune()
   970  	var (
   971  		raw       strings.Builder
   972  		hasExpand bool
   973  	)
   975  	for sc.peekRune() == ' ' {
   976  		sc.readRune()
   977  	}
   979  	for {
   980  		if sc.eof() {
   981  			break
   982  		}
   984  		c := sc.peekRune()
   985  		if c == '\n' {
   986  			break
   987  		}
   989  		sc.readRune()
   990  		if c == '$' {
   991  			nc := sc.peekRune()
   992  			if nc == '{' {
   993  				sc.readRune()
   994  				sc.interpDepth++
   995  				hasExpand = true
   996  				break
   997  			}
   998  		} else if c == '\\' {
   999  			if sc.eof() {
  1000  				sc.error(val.pos, "unexpected EOF in string")
  1001  			}
  1002  			c = sc.readRune()
  1003  		}
  1005  		raw.WriteRune(c)
  1006  	}
  1008  	val.string = raw.String()
  1010  	if hasExpand {
  1011  		return DSHELL_START
  1012  	} else {
  1013  		return SHELL
  1014  	}
  1015  }
  1017  func (sc *scanner) scanMoreShell(val *tokenValue) Token {
  1018  	sc.interpDepth--
  1020  	sc.readRune()
  1022  	var (
  1023  		raw       strings.Builder
  1024  		hasExpand bool
  1025  	)
  1027  	for {
  1028  		if sc.eof() {
  1029  			break
  1030  		}
  1032  		c := sc.peekRune()
  1033  		if c == '\n' {
  1034  			break
  1035  		}
  1037  		sc.readRune()
  1039  		if c == '$' {
  1040  			nc := sc.peekRune()
  1041  			if nc == '{' {
  1042  				sc.readRune()
  1043  				sc.interpDepth++
  1044  				hasExpand = true
  1045  				break
  1046  			}
  1047  		} else if c == '\\' {
  1048  			if sc.eof() {
  1049  				sc.error(val.pos, "unexpected EOF in string")
  1050  			}
  1051  			c = sc.readRune()
  1052  		}
  1054  		raw.WriteRune(c)
  1055  	}
  1057  	val.string = raw.String()
  1059  	if !hasExpand {
  1060  		return DSHELL_END
  1061  	} else {
  1062  		return DSHELL_PART
  1063  	}
  1064  }
  1066  func (sc *scanner) scanMoreShellExpr(val *tokenValue) Token {
  1067  	sc.interpDepth--
  1069  	sc.readRune()
  1071  	var (
  1072  		raw       strings.Builder
  1073  		hasExpand bool
  1074  	)
  1076  	for {
  1077  		if sc.eof() {
  1078  			sc.error(val.pos, "unexpected EOF in string")
  1079  		}
  1081  		c := sc.readRune()
  1082  		if c == '`' {
  1083  			break
  1084  		}
  1086  		if c == '$' {
  1087  			nc := sc.peekRune()
  1088  			if nc == '{' {
  1089  				sc.readRune()
  1090  				sc.interpExprDepth++
  1091  				hasExpand = true
  1092  				break
  1093  			}
  1094  		} else if c == '\\' {
  1095  			if sc.eof() {
  1096  				sc.error(val.pos, "unexpected EOF in string")
  1097  			}
  1098  			c = sc.readRune()
  1099  		}
  1101  		raw.WriteRune(c)
  1102  	}
  1104  	val.string = raw.String()
  1106  	if !hasExpand {
  1107  		return DSHELL_END
  1108  	} else {
  1109  		return DSHELL_PART
  1110  	}
  1111  }
  1113  func (sc *scanner) scanString(val *tokenValue, quote rune) Token {
  1114  	start := sc.pos
  1115  	triple := len( >= 3 &&[0] == byte(quote) &&[1] == byte(quote) &&[2] == byte(quote)
  1116  	sc.readRune()
  1118  	// String literals may contain escaped or unescaped newlines,
  1119  	// causing them to span multiple lines (gulps) of REPL input;
  1120  	// they are the only such token. Thus we cannot call endToken,
  1121  	// as it assumes is unchanged since startToken.
  1122  	// Instead, buffer the token here.
  1123  	// TODO(adonovan): opt: buffer only if we encounter a newline.
  1124  	raw := new(strings.Builder)
  1126  	// Copy the prefix, e.g. r' or " (see startToken).
  1127  	raw.Write(sc.token[:len(sc.token)-len(])
  1129  	if !triple {
  1130  		// single-quoted string literal
  1131  		for {
  1132  			if sc.eof() {
  1133  				sc.error(val.pos, "unexpected EOF in string")
  1134  			}
  1135  			c := sc.readRune()
  1136  			raw.WriteRune(c)
  1137  			if c == quote {
  1138  				break
  1139  			}
  1140  			if c == '\n' {
  1141  				sc.error(val.pos, "unexpected newline in string")
  1142  			}
  1143  			if c == '\\' {
  1144  				if sc.eof() {
  1145  					sc.error(val.pos, "unexpected EOF in string")
  1146  				}
  1147  				c = sc.readRune()
  1148  				raw.WriteRune(c)
  1149  			}
  1150  		}
  1151  	} else {
  1152  		// triple-quoted string literal
  1153  		sc.readRune()
  1154  		raw.WriteRune(quote)
  1155  		sc.readRune()
  1156  		raw.WriteRune(quote)
  1158  		quoteCount := 0
  1159  		for {
  1160  			if sc.eof() {
  1161  				sc.error(val.pos, "unexpected EOF in string")
  1162  			}
  1163  			c := sc.readRune()
  1164  			raw.WriteRune(c)
  1165  			if c == quote {
  1166  				quoteCount++
  1167  				if quoteCount == 3 {
  1168  					break
  1169  				}
  1170  			} else {
  1171  				quoteCount = 0
  1172  			}
  1173  			if c == '\\' {
  1174  				if sc.eof() {
  1175  					sc.error(val.pos, "unexpected EOF in string")
  1176  				}
  1177  				c = sc.readRune()
  1178  				raw.WriteRune(c)
  1179  			}
  1180  		}
  1181  	}
  1182  	val.raw = raw.String()
  1184  	s, _, err := unquote(val.raw)
  1185  	if err != nil {
  1186  		sc.error(start, err.Error())
  1187  	}
  1188  	val.string = s
  1189  	return STRING
  1190  }
  1192  func (sc *scanner) scanNumber(val *tokenValue, c rune) Token {
  1193  	//
  1194  	//
  1195  	// Python features not supported:
  1196  	// - integer literals of >64 bits of precision
  1197  	// - 123L or 123l long suffix
  1198  	// - traditional octal: 0755
  1199  	//
  1201  	start := sc.pos
  1202  	fraction, exponent := false, false
  1204  	if c == '.' {
  1205  		// dot or start of fraction
  1206  		sc.readRune()
  1207  		c = sc.peekRune()
  1208  		if !isdigit(c) {
  1209  			sc.endToken(val)
  1210  			return DOT
  1211  		}
  1212  		fraction = true
  1213  	} else if c == '0' {
  1214  		// hex, octal, binary or float
  1215  		sc.readRune()
  1216  		c = sc.peekRune()
  1218  		if c == '.' {
  1219  			fraction = true
  1220  		} else if c == 'x' || c == 'X' {
  1221  			// hex
  1222  			sc.readRune()
  1223  			c = sc.peekRune()
  1224  			if !isxdigit(c) {
  1225  				sc.error(start, "invalid hex literal")
  1226  			}
  1227  			for isxdigit(c) {
  1228  				sc.readRune()
  1229  				c = sc.peekRune()
  1230  			}
  1231  		} else if c == 'o' || c == 'O' {
  1232  			// octal
  1233  			sc.readRune()
  1234  			c = sc.peekRune()
  1235  			if !isodigit(c) {
  1236  				sc.error(sc.pos, "invalid octal literal")
  1237  			}
  1238  			for isodigit(c) {
  1239  				sc.readRune()
  1240  				c = sc.peekRune()
  1241  			}
  1242  		} else if c == 'b' || c == 'B' {
  1243  			// binary
  1244  			sc.readRune()
  1245  			c = sc.peekRune()
  1246  			if !isbdigit(c) {
  1247  				sc.error(sc.pos, "invalid binary literal")
  1248  			}
  1249  			for isbdigit(c) {
  1250  				sc.readRune()
  1251  				c = sc.peekRune()
  1252  			}
  1253  		} else {
  1254  			// float (or obsolete octal "0755")
  1255  			allzeros, octal := true, true
  1256  			for isdigit(c) {
  1257  				if c != '0' {
  1258  					allzeros = false
  1259  				}
  1260  				if c > '7' {
  1261  					octal = false
  1262  				}
  1263  				sc.readRune()
  1264  				c = sc.peekRune()
  1265  			}
  1266  			if c == '.' {
  1267  				fraction = true
  1268  			} else if c == 'e' || c == 'E' {
  1269  				exponent = true
  1270  			} else if octal && !allzeros {
  1271  				sc.endToken(val)
  1272  				sc.errorf(sc.pos, "obsolete form of octal literal; use 0o%s", val.raw[1:])
  1273  			}
  1274  		}
  1275  	} else {
  1276  		// decimal
  1277  		for isdigit(c) {
  1278  			sc.readRune()
  1279  			c = sc.peekRune()
  1280  		}
  1282  		if c == '.' {
  1283  			fraction = true
  1284  		} else if c == 'e' || c == 'E' {
  1285  			exponent = true
  1286  		}
  1287  	}
  1289  	if fraction {
  1290  		sc.readRune() // consume '.'
  1291  		c = sc.peekRune()
  1292  		for isdigit(c) {
  1293  			sc.readRune()
  1294  			c = sc.peekRune()
  1295  		}
  1297  		if c == 'e' || c == 'E' {
  1298  			exponent = true
  1299  		}
  1300  	}
  1302  	if exponent {
  1303  		sc.readRune() // consume [eE]
  1304  		c = sc.peekRune()
  1305  		if c == '+' || c == '-' {
  1306  			sc.readRune()
  1307  			c = sc.peekRune()
  1308  			if !isdigit(c) {
  1309  				sc.error(sc.pos, "invalid float literal")
  1310  			}
  1311  		}
  1312  		for isdigit(c) {
  1313  			sc.readRune()
  1314  			c = sc.peekRune()
  1315  		}
  1316  	}
  1318  	sc.endToken(val)
  1319  	if fraction || exponent {
  1320  		var err error
  1321  		val.float, err = strconv.ParseFloat(val.raw, 64)
  1322  		if err != nil {
  1323  			sc.error(sc.pos, "invalid float literal")
  1324  		}
  1325  		return FLOAT
  1326  	} else {
  1327  		var err error
  1328  		s := val.raw
  1329  		val.bigInt = nil
  1330  		if len(s) > 2 && s[0] == '0' && (s[1] == 'o' || s[1] == 'O') {
  1331, err = strconv.ParseInt(s[2:], 8, 64)
  1332  		} else if len(s) > 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B') {
  1333, err = strconv.ParseInt(s[2:], 2, 64)
  1334  		} else {
  1335, err = strconv.ParseInt(s, 0, 64)
  1336  			if err != nil {
  1337  				num := new(big.Int)
  1338  				var ok bool
  1339  				val.bigInt, ok = num.SetString(s, 0)
  1340  				if ok {
  1341  					err = nil
  1342  				}
  1343  			}
  1344  		}
  1345  		if err != nil {
  1346  			sc.error(start, "invalid int literal")
  1347  		}
  1348  		return INT
  1349  	}
  1350  }
  1352  // isIdent reports whether c is an identifier rune.
  1353  func isIdent(c rune) bool {
  1354  	return isdigit(c) || isIdentStart(c)
  1355  }
  1357  func isIdentStart(c rune) bool {
  1358  	return 'a' <= c && c <= 'z' ||
  1359  		'A' <= c && c <= 'Z' ||
  1360  		c == '_' ||
  1361  		unicode.IsLetter(c)
  1362  }
  1364  func isdigit(c rune) bool  { return '0' <= c && c <= '9' }
  1365  func isodigit(c rune) bool { return '0' <= c && c <= '7' }
  1366  func isxdigit(c rune) bool { return isdigit(c) || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f' }
  1367  func isbdigit(c rune) bool { return '0' == c || c == '1' }
  1369  // keywordToken records the special tokens for
  1370  // strings that should not be treated as ordinary identifiers.
  1371  var keywordToken = map[string]Token{
  1372  	"and":      AND,
  1373  	"break":    BREAK,
  1374  	"continue": CONTINUE,
  1375  	"def":      DEF,
  1376  	"elif":     ELIF,
  1377  	"else":     ELSE,
  1378  	"for":      FOR,
  1379  	"if":       IF,
  1380  	"in":       IN,
  1381  	"lambda":   LAMBDA,
  1382  	"load":     LOAD,
  1383  	"import":   IMPORT,
  1384  	"as":       AS,
  1385  	"using":    USING,
  1386  	"not":      NOT,
  1387  	"or":       OR,
  1388  	"pass":     PASS,
  1389  	"return":   RETURN,
  1390  	"while":    WHILE,
  1392  	// reserved words:
  1393  	// "assert":   ILLEGAL, // heavily used by our tests
  1394  	"class":    ILLEGAL,
  1395  	"del":      ILLEGAL,
  1396  	"except":   ILLEGAL,
  1397  	"finally":  ILLEGAL,
  1398  	"from":     ILLEGAL,
  1399  	"global":   ILLEGAL,
  1400  	"is":       ILLEGAL,
  1401  	"nonlocal": ILLEGAL,
  1402  	"raise":    ILLEGAL,
  1403  	"try":      ILLEGAL,
  1404  	"with":     ILLEGAL,
  1405  	"yield":    ILLEGAL,
  1406  }