github.com/corona10/go@v0.0.0-20180224231303-7a218942be57/src/go/scanner/scanner.go

github.com/corona10/go@v0.0.0-20180224231303-7a218942be57/src/go/scanner/scanner.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package scanner implements a scanner for Go source text.
     6  // It takes a []byte as source which can then be tokenized
     7  // through repeated calls to the Scan method.
     8  //
     9  package scanner
    10  
    11  import (
    12  	"bytes"
    13  	"fmt"
    14  	"go/token"
    15  	"path/filepath"
    16  	"strconv"
    17  	"unicode"
    18  	"unicode/utf8"
    19  )
    20  
    21  // An ErrorHandler may be provided to Scanner.Init. If a syntax error is
    22  // encountered and a handler was installed, the handler is called with a
    23  // position and an error message. The position points to the beginning of
    24  // the offending token.
    25  //
    26  type ErrorHandler func(pos token.Position, msg string)
    27  
    28  // A Scanner holds the scanner's internal state while processing
    29  // a given text. It can be allocated as part of another data
    30  // structure but must be initialized via Init before use.
    31  //
    32  type Scanner struct {
    33  	// immutable state
    34  	file *token.File  // source file handle
    35  	dir  string       // directory portion of file.Name()
    36  	src  []byte       // source
    37  	err  ErrorHandler // error reporting; or nil
    38  	mode Mode         // scanning mode
    39  
    40  	// scanning state
    41  	ch         rune // current character
    42  	offset     int  // character offset
    43  	rdOffset   int  // reading offset (position after current character)
    44  	lineOffset int  // current line offset
    45  	insertSemi bool // insert a semicolon before next newline
    46  
    47  	// public state - ok to modify
    48  	ErrorCount int // number of errors encountered
    49  }
    50  
    51  const bom = 0xFEFF // byte order mark, only permitted as very first character
    52  
    53  // Read the next Unicode char into s.ch.
    54  // s.ch < 0 means end-of-file.
    55  //
    56  func (s *Scanner) next() {
    57  	if s.rdOffset < len(s.src) {
    58  		s.offset = s.rdOffset
    59  		if s.ch == '\n' {
    60  			s.lineOffset = s.offset
    61  			s.file.AddLine(s.offset)
    62  		}
    63  		r, w := rune(s.src[s.rdOffset]), 1
    64  		switch {
    65  		case r == 0:
    66  			s.error(s.offset, "illegal character NUL")
    67  		case r >= utf8.RuneSelf:
    68  			// not ASCII
    69  			r, w = utf8.DecodeRune(s.src[s.rdOffset:])
    70  			if r == utf8.RuneError && w == 1 {
    71  				s.error(s.offset, "illegal UTF-8 encoding")
    72  			} else if r == bom && s.offset > 0 {
    73  				s.error(s.offset, "illegal byte order mark")
    74  			}
    75  		}
    76  		s.rdOffset += w
    77  		s.ch = r
    78  	} else {
    79  		s.offset = len(s.src)
    80  		if s.ch == '\n' {
    81  			s.lineOffset = s.offset
    82  			s.file.AddLine(s.offset)
    83  		}
    84  		s.ch = -1 // eof
    85  	}
    86  }
    87  
    88  // A mode value is a set of flags (or 0).
    89  // They control scanner behavior.
    90  //
    91  type Mode uint
    92  
    93  const (
    94  	ScanComments    Mode = 1 << iota // return comments as COMMENT tokens
    95  	dontInsertSemis                  // do not automatically insert semicolons - for testing only
    96  )
    97  
    98  // Init prepares the scanner s to tokenize the text src by setting the
    99  // scanner at the beginning of src. The scanner uses the file set file
   100  // for position information and it adds line information for each line.
   101  // It is ok to re-use the same file when re-scanning the same file as
   102  // line information which is already present is ignored. Init causes a
   103  // panic if the file size does not match the src size.
   104  //
   105  // Calls to Scan will invoke the error handler err if they encounter a
   106  // syntax error and err is not nil. Also, for each error encountered,
   107  // the Scanner field ErrorCount is incremented by one. The mode parameter
   108  // determines how comments are handled.
   109  //
   110  // Note that Init may call err if there is an error in the first character
   111  // of the file.
   112  //
   113  func (s *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode Mode) {
   114  	// Explicitly initialize all fields since a scanner may be reused.
   115  	if file.Size() != len(src) {
   116  		panic(fmt.Sprintf("file size (%d) does not match src len (%d)", file.Size(), len(src)))
   117  	}
   118  	s.file = file
   119  	s.dir, _ = filepath.Split(file.Name())
   120  	s.src = src
   121  	s.err = err
   122  	s.mode = mode
   123  
   124  	s.ch = ' '
   125  	s.offset = 0
   126  	s.rdOffset = 0
   127  	s.lineOffset = 0
   128  	s.insertSemi = false
   129  	s.ErrorCount = 0
   130  
   131  	s.next()
   132  	if s.ch == bom {
   133  		s.next() // ignore BOM at file beginning
   134  	}
   135  }
   136  
   137  func (s *Scanner) error(offs int, msg string) {
   138  	if s.err != nil {
   139  		s.err(s.file.Position(s.file.Pos(offs)), msg)
   140  	}
   141  	s.ErrorCount++
   142  }
   143  
   144  var prefix = []byte("//line ")
   145  
   146  func (s *Scanner) interpretLineComment(text []byte) {
   147  	if bytes.HasPrefix(text, prefix) {
   148  		// get filename and line number, if any
   149  		if i := bytes.LastIndex(text, []byte{':'}); i > 0 {
   150  			if line, err := strconv.Atoi(string(text[i+1:])); err == nil && line > 0 {
   151  				// valid //line filename:line comment
   152  				filename := string(bytes.TrimSpace(text[len(prefix):i]))
   153  				if filename != "" {
   154  					filename = filepath.Clean(filename)
   155  					if !filepath.IsAbs(filename) {
   156  						// make filename relative to current directory
   157  						filename = filepath.Join(s.dir, filename)
   158  					}
   159  				}
   160  				// update scanner position
   161  				s.file.AddLineInfo(s.lineOffset+len(text)+1, filename, line) // +len(text)+1 since comment applies to next line
   162  			}
   163  		}
   164  	}
   165  }
   166  
   167  func (s *Scanner) scanComment() string {
   168  	// initial '/' already consumed; s.ch == '/' || s.ch == '*'
   169  	offs := s.offset - 1 // position of initial '/'
   170  	hasCR := false
   171  
   172  	if s.ch == '/' {
   173  		//-style comment
   174  		s.next()
   175  		for s.ch != '\n' && s.ch >= 0 {
   176  			if s.ch == '\r' {
   177  				hasCR = true
   178  			}
   179  			s.next()
   180  		}
   181  		if offs == s.lineOffset {
   182  			// comment starts at the beginning of the current line
   183  			s.interpretLineComment(s.src[offs:s.offset])
   184  		}
   185  		goto exit
   186  	}
   187  
   188  	/*-style comment */
   189  	s.next()
   190  	for s.ch >= 0 {
   191  		ch := s.ch
   192  		if ch == '\r' {
   193  			hasCR = true
   194  		}
   195  		s.next()
   196  		if ch == '*' && s.ch == '/' {
   197  			s.next()
   198  			goto exit
   199  		}
   200  	}
   201  
   202  	s.error(offs, "comment not terminated")
   203  
   204  exit:
   205  	lit := s.src[offs:s.offset]
   206  	if hasCR {
   207  		lit = stripCR(lit, lit[1] == '*')
   208  	}
   209  
   210  	return string(lit)
   211  }
   212  
   213  func (s *Scanner) findLineEnd() bool {
   214  	// initial '/' already consumed
   215  
   216  	defer func(offs int) {
   217  		// reset scanner state to where it was upon calling findLineEnd
   218  		s.ch = '/'
   219  		s.offset = offs
   220  		s.rdOffset = offs + 1
   221  		s.next() // consume initial '/' again
   222  	}(s.offset - 1)
   223  
   224  	// read ahead until a newline, EOF, or non-comment token is found
   225  	for s.ch == '/' || s.ch == '*' {
   226  		if s.ch == '/' {
   227  			//-style comment always contains a newline
   228  			return true
   229  		}
   230  		/*-style comment: look for newline */
   231  		s.next()
   232  		for s.ch >= 0 {
   233  			ch := s.ch
   234  			if ch == '\n' {
   235  				return true
   236  			}
   237  			s.next()
   238  			if ch == '*' && s.ch == '/' {
   239  				s.next()
   240  				break
   241  			}
   242  		}
   243  		s.skipWhitespace() // s.insertSemi is set
   244  		if s.ch < 0 || s.ch == '\n' {
   245  			return true
   246  		}
   247  		if s.ch != '/' {
   248  			// non-comment token
   249  			return false
   250  		}
   251  		s.next() // consume '/'
   252  	}
   253  
   254  	return false
   255  }
   256  
   257  func isLetter(ch rune) bool {
   258  	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= utf8.RuneSelf && unicode.IsLetter(ch)
   259  }
   260  
   261  func isDigit(ch rune) bool {
   262  	return '0' <= ch && ch <= '9' || ch >= utf8.RuneSelf && unicode.IsDigit(ch)
   263  }
   264  
   265  func (s *Scanner) scanIdentifier() string {
   266  	offs := s.offset
   267  	for isLetter(s.ch) || isDigit(s.ch) {
   268  		s.next()
   269  	}
   270  	return string(s.src[offs:s.offset])
   271  }
   272  
   273  func digitVal(ch rune) int {
   274  	switch {
   275  	case '0' <= ch && ch <= '9':
   276  		return int(ch - '0')
   277  	case 'a' <= ch && ch <= 'f':
   278  		return int(ch - 'a' + 10)
   279  	case 'A' <= ch && ch <= 'F':
   280  		return int(ch - 'A' + 10)
   281  	}
   282  	return 16 // larger than any legal digit val
   283  }
   284  
   285  func (s *Scanner) scanMantissa(base int) {
   286  	for digitVal(s.ch) < base {
   287  		s.next()
   288  	}
   289  }
   290  
   291  func (s *Scanner) scanNumber(seenDecimalPoint bool) (token.Token, string) {
   292  	// digitVal(s.ch) < 10
   293  	offs := s.offset
   294  	tok := token.INT
   295  
   296  	if seenDecimalPoint {
   297  		offs--
   298  		tok = token.FLOAT
   299  		s.scanMantissa(10)
   300  		goto exponent
   301  	}
   302  
   303  	if s.ch == '0' {
   304  		// int or float
   305  		offs := s.offset
   306  		s.next()
   307  		if s.ch == 'x' || s.ch == 'X' {
   308  			// hexadecimal int
   309  			s.next()
   310  			s.scanMantissa(16)
   311  			if s.offset-offs <= 2 {
   312  				// only scanned "0x" or "0X"
   313  				s.error(offs, "illegal hexadecimal number")
   314  			}
   315  		} else {
   316  			// octal int or float
   317  			seenDecimalDigit := false
   318  			s.scanMantissa(8)
   319  			if s.ch == '8' || s.ch == '9' {
   320  				// illegal octal int or float
   321  				seenDecimalDigit = true
   322  				s.scanMantissa(10)
   323  			}
   324  			if s.ch == '.' || s.ch == 'e' || s.ch == 'E' || s.ch == 'i' {
   325  				goto fraction
   326  			}
   327  			// octal int
   328  			if seenDecimalDigit {
   329  				s.error(offs, "illegal octal number")
   330  			}
   331  		}
   332  		goto exit
   333  	}
   334  
   335  	// decimal int or float
   336  	s.scanMantissa(10)
   337  
   338  fraction:
   339  	if s.ch == '.' {
   340  		tok = token.FLOAT
   341  		s.next()
   342  		s.scanMantissa(10)
   343  	}
   344  
   345  exponent:
   346  	if s.ch == 'e' || s.ch == 'E' {
   347  		tok = token.FLOAT
   348  		s.next()
   349  		if s.ch == '-' || s.ch == '+' {
   350  			s.next()
   351  		}
   352  		if digitVal(s.ch) < 10 {
   353  			s.scanMantissa(10)
   354  		} else {
   355  			s.error(offs, "illegal floating-point exponent")
   356  		}
   357  	}
   358  
   359  	if s.ch == 'i' {
   360  		tok = token.IMAG
   361  		s.next()
   362  	}
   363  
   364  exit:
   365  	return tok, string(s.src[offs:s.offset])
   366  }
   367  
   368  // scanEscape parses an escape sequence where rune is the accepted
   369  // escaped quote. In case of a syntax error, it stops at the offending
   370  // character (without consuming it) and returns false. Otherwise
   371  // it returns true.
   372  func (s *Scanner) scanEscape(quote rune) bool {
   373  	offs := s.offset
   374  
   375  	var n int
   376  	var base, max uint32
   377  	switch s.ch {
   378  	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
   379  		s.next()
   380  		return true
   381  	case '0', '1', '2', '3', '4', '5', '6', '7':
   382  		n, base, max = 3, 8, 255
   383  	case 'x':
   384  		s.next()
   385  		n, base, max = 2, 16, 255
   386  	case 'u':
   387  		s.next()
   388  		n, base, max = 4, 16, unicode.MaxRune
   389  	case 'U':
   390  		s.next()
   391  		n, base, max = 8, 16, unicode.MaxRune
   392  	default:
   393  		msg := "unknown escape sequence"
   394  		if s.ch < 0 {
   395  			msg = "escape sequence not terminated"
   396  		}
   397  		s.error(offs, msg)
   398  		return false
   399  	}
   400  
   401  	var x uint32
   402  	for n > 0 {
   403  		d := uint32(digitVal(s.ch))
   404  		if d >= base {
   405  			msg := fmt.Sprintf("illegal character %#U in escape sequence", s.ch)
   406  			if s.ch < 0 {
   407  				msg = "escape sequence not terminated"
   408  			}
   409  			s.error(s.offset, msg)
   410  			return false
   411  		}
   412  		x = x*base + d
   413  		s.next()
   414  		n--
   415  	}
   416  
   417  	if x > max || 0xD800 <= x && x < 0xE000 {
   418  		s.error(offs, "escape sequence is invalid Unicode code point")
   419  		return false
   420  	}
   421  
   422  	return true
   423  }
   424  
   425  func (s *Scanner) scanRune() string {
   426  	// '\'' opening already consumed
   427  	offs := s.offset - 1
   428  
   429  	valid := true
   430  	n := 0
   431  	for {
   432  		ch := s.ch
   433  		if ch == '\n' || ch < 0 {
   434  			// only report error if we don't have one already
   435  			if valid {
   436  				s.error(offs, "rune literal not terminated")
   437  				valid = false
   438  			}
   439  			break
   440  		}
   441  		s.next()
   442  		if ch == '\'' {
   443  			break
   444  		}
   445  		n++
   446  		if ch == '\\' {
   447  			if !s.scanEscape('\'') {
   448  				valid = false
   449  			}
   450  			// continue to read to closing quote
   451  		}
   452  	}
   453  
   454  	if valid && n != 1 {
   455  		s.error(offs, "illegal rune literal")
   456  	}
   457  
   458  	return string(s.src[offs:s.offset])
   459  }
   460  
   461  func (s *Scanner) scanString() string {
   462  	// '"' opening already consumed
   463  	offs := s.offset - 1
   464  
   465  	for {
   466  		ch := s.ch
   467  		if ch == '\n' || ch < 0 {
   468  			s.error(offs, "string literal not terminated")
   469  			break
   470  		}
   471  		s.next()
   472  		if ch == '"' {
   473  			break
   474  		}
   475  		if ch == '\\' {
   476  			s.scanEscape('"')
   477  		}
   478  	}
   479  
   480  	return string(s.src[offs:s.offset])
   481  }
   482  
   483  func stripCR(b []byte, comment bool) []byte {
   484  	c := make([]byte, len(b))
   485  	i := 0
   486  	for j, ch := range b {
   487  		// In a /*-style comment, don't strip \r from *\r/ (incl.
   488  		// sequences of \r from *\r\r...\r/) since the resulting
   489  		// */ would terminate the comment too early unless the \r
   490  		// is immediately following the opening /* in which case
   491  		// it's ok because /*/ is not closed yet (issue #11151).
   492  		if ch != '\r' || comment && i > len("/*") && c[i-1] == '*' && j+1 < len(b) && b[j+1] == '/' {
   493  			c[i] = ch
   494  			i++
   495  		}
   496  	}
   497  	return c[:i]
   498  }
   499  
   500  func (s *Scanner) scanRawString() string {
   501  	// '`' opening already consumed
   502  	offs := s.offset - 1
   503  
   504  	hasCR := false
   505  	for {
   506  		ch := s.ch
   507  		if ch < 0 {
   508  			s.error(offs, "raw string literal not terminated")
   509  			break
   510  		}
   511  		s.next()
   512  		if ch == '`' {
   513  			break
   514  		}
   515  		if ch == '\r' {
   516  			hasCR = true
   517  		}
   518  	}
   519  
   520  	lit := s.src[offs:s.offset]
   521  	if hasCR {
   522  		lit = stripCR(lit, false)
   523  	}
   524  
   525  	return string(lit)
   526  }
   527  
   528  func (s *Scanner) skipWhitespace() {
   529  	for s.ch == ' ' || s.ch == '\t' || s.ch == '\n' && !s.insertSemi || s.ch == '\r' {
   530  		s.next()
   531  	}
   532  }
   533  
   534  // Helper functions for scanning multi-byte tokens such as >> += >>= .
   535  // Different routines recognize different length tok_i based on matches
   536  // of ch_i. If a token ends in '=', the result is tok1 or tok3
   537  // respectively. Otherwise, the result is tok0 if there was no other
   538  // matching character, or tok2 if the matching character was ch2.
   539  
   540  func (s *Scanner) switch2(tok0, tok1 token.Token) token.Token {
   541  	if s.ch == '=' {
   542  		s.next()
   543  		return tok1
   544  	}
   545  	return tok0
   546  }
   547  
   548  func (s *Scanner) switch3(tok0, tok1 token.Token, ch2 rune, tok2 token.Token) token.Token {
   549  	if s.ch == '=' {
   550  		s.next()
   551  		return tok1
   552  	}
   553  	if s.ch == ch2 {
   554  		s.next()
   555  		return tok2
   556  	}
   557  	return tok0
   558  }
   559  
   560  func (s *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Token) token.Token {
   561  	if s.ch == '=' {
   562  		s.next()
   563  		return tok1
   564  	}
   565  	if s.ch == ch2 {
   566  		s.next()
   567  		if s.ch == '=' {
   568  			s.next()
   569  			return tok3
   570  		}
   571  		return tok2
   572  	}
   573  	return tok0
   574  }
   575  
   576  // Scan scans the next token and returns the token position, the token,
   577  // and its literal string if applicable. The source end is indicated by
   578  // token.EOF.
   579  //
   580  // If the returned token is a literal (token.IDENT, token.INT, token.FLOAT,
   581  // token.IMAG, token.CHAR, token.STRING) or token.COMMENT, the literal string
   582  // has the corresponding value.
   583  //
   584  // If the returned token is a keyword, the literal string is the keyword.
   585  //
   586  // If the returned token is token.SEMICOLON, the corresponding
   587  // literal string is ";" if the semicolon was present in the source,
   588  // and "\n" if the semicolon was inserted because of a newline or
   589  // at EOF.
   590  //
   591  // If the returned token is token.ILLEGAL, the literal string is the
   592  // offending character.
   593  //
   594  // In all other cases, Scan returns an empty literal string.
   595  //
   596  // For more tolerant parsing, Scan will return a valid token if
   597  // possible even if a syntax error was encountered. Thus, even
   598  // if the resulting token sequence contains no illegal tokens,
   599  // a client may not assume that no error occurred. Instead it
   600  // must check the scanner's ErrorCount or the number of calls
   601  // of the error handler, if there was one installed.
   602  //
   603  // Scan adds line information to the file added to the file
   604  // set with Init. Token positions are relative to that file
   605  // and thus relative to the file set.
   606  //
   607  func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
   608  scanAgain:
   609  	s.skipWhitespace()
   610  
   611  	// current token start
   612  	pos = s.file.Pos(s.offset)
   613  
   614  	// determine token value
   615  	insertSemi := false
   616  	switch ch := s.ch; {
   617  	case isLetter(ch):
   618  		lit = s.scanIdentifier()
   619  		if len(lit) > 1 {
   620  			// keywords are longer than one letter - avoid lookup otherwise
   621  			tok = token.Lookup(lit)
   622  			switch tok {
   623  			case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:
   624  				insertSemi = true
   625  			}
   626  		} else {
   627  			insertSemi = true
   628  			tok = token.IDENT
   629  		}
   630  	case '0' <= ch && ch <= '9':
   631  		insertSemi = true
   632  		tok, lit = s.scanNumber(false)
   633  	default:
   634  		s.next() // always make progress
   635  		switch ch {
   636  		case -1:
   637  			if s.insertSemi {
   638  				s.insertSemi = false // EOF consumed
   639  				return pos, token.SEMICOLON, "\n"
   640  			}
   641  			tok = token.EOF
   642  		case '\n':
   643  			// we only reach here if s.insertSemi was
   644  			// set in the first place and exited early
   645  			// from s.skipWhitespace()
   646  			s.insertSemi = false // newline consumed
   647  			return pos, token.SEMICOLON, "\n"
   648  		case '"':
   649  			insertSemi = true
   650  			tok = token.STRING
   651  			lit = s.scanString()
   652  		case '\'':
   653  			insertSemi = true
   654  			tok = token.CHAR
   655  			lit = s.scanRune()
   656  		case '`':
   657  			insertSemi = true
   658  			tok = token.STRING
   659  			lit = s.scanRawString()
   660  		case ':':
   661  			tok = s.switch2(token.COLON, token.DEFINE)
   662  		case '.':
   663  			if '0' <= s.ch && s.ch <= '9' {
   664  				insertSemi = true
   665  				tok, lit = s.scanNumber(true)
   666  			} else if s.ch == '.' {
   667  				s.next()
   668  				if s.ch == '.' {
   669  					s.next()
   670  					tok = token.ELLIPSIS
   671  				}
   672  			} else {
   673  				tok = token.PERIOD
   674  			}
   675  		case ',':
   676  			tok = token.COMMA
   677  		case ';':
   678  			tok = token.SEMICOLON
   679  			lit = ";"
   680  		case '(':
   681  			tok = token.LPAREN
   682  		case ')':
   683  			insertSemi = true
   684  			tok = token.RPAREN
   685  		case '[':
   686  			tok = token.LBRACK
   687  		case ']':
   688  			insertSemi = true
   689  			tok = token.RBRACK
   690  		case '{':
   691  			tok = token.LBRACE
   692  		case '}':
   693  			insertSemi = true
   694  			tok = token.RBRACE
   695  		case '+':
   696  			tok = s.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC)
   697  			if tok == token.INC {
   698  				insertSemi = true
   699  			}
   700  		case '-':
   701  			tok = s.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC)
   702  			if tok == token.DEC {
   703  				insertSemi = true
   704  			}
   705  		case '*':
   706  			tok = s.switch2(token.MUL, token.MUL_ASSIGN)
   707  		case '/':
   708  			if s.ch == '/' || s.ch == '*' {
   709  				// comment
   710  				if s.insertSemi && s.findLineEnd() {
   711  					// reset position to the beginning of the comment
   712  					s.ch = '/'
   713  					s.offset = s.file.Offset(pos)
   714  					s.rdOffset = s.offset + 1
   715  					s.insertSemi = false // newline consumed
   716  					return pos, token.SEMICOLON, "\n"
   717  				}
   718  				comment := s.scanComment()
   719  				if s.mode&ScanComments == 0 {
   720  					// skip comment
   721  					s.insertSemi = false // newline consumed
   722  					goto scanAgain
   723  				}
   724  				tok = token.COMMENT
   725  				lit = comment
   726  			} else {
   727  				tok = s.switch2(token.QUO, token.QUO_ASSIGN)
   728  			}
   729  		case '%':
   730  			tok = s.switch2(token.REM, token.REM_ASSIGN)
   731  		case '^':
   732  			tok = s.switch2(token.XOR, token.XOR_ASSIGN)
   733  		case '<':
   734  			if s.ch == '-' {
   735  				s.next()
   736  				tok = token.ARROW
   737  			} else {
   738  				tok = s.switch4(token.LSS, token.LEQ, '<', token.SHL, token.SHL_ASSIGN)
   739  			}
   740  		case '>':
   741  			tok = s.switch4(token.GTR, token.GEQ, '>', token.SHR, token.SHR_ASSIGN)
   742  		case '=':
   743  			tok = s.switch2(token.ASSIGN, token.EQL)
   744  		case '!':
   745  			tok = s.switch2(token.NOT, token.NEQ)
   746  		case '&':
   747  			if s.ch == '^' {
   748  				s.next()
   749  				tok = s.switch2(token.AND_NOT, token.AND_NOT_ASSIGN)
   750  			} else {
   751  				tok = s.switch3(token.AND, token.AND_ASSIGN, '&', token.LAND)
   752  			}
   753  		case '|':
   754  			tok = s.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR)
   755  		default:
   756  			// next reports unexpected BOMs - don't repeat
   757  			if ch != bom {
   758  				s.error(s.file.Offset(pos), fmt.Sprintf("illegal character %#U", ch))
   759  			}
   760  			insertSemi = s.insertSemi // preserve insertSemi info
   761  			tok = token.ILLEGAL
   762  			lit = string(ch)
   763  		}
   764  	}
   765  	if s.mode&dontInsertSemis == 0 {
   766  		s.insertSemi = insertSemi
   767  	}
   768  
   769  	return
   770  }