github.com/zebozhuang/go@v0.0.0-20200207033046-f8a98f6f5c5d/src/go/scanner/scanner.go

github.com/zebozhuang/go@v0.0.0-20200207033046-f8a98f6f5c5d/src/go/scanner/scanner.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package scanner implements a scanner for Go source text.
     6  // It takes a []byte as source which can then be tokenized
     7  // through repeated calls to the Scan method.
     8  //
     9  package scanner
    10  
    11  import (
    12  	"bytes"
    13  	"fmt"
    14  	"go/token"
    15  	"path/filepath"
    16  	"strconv"
    17  	"unicode"
    18  	"unicode/utf8"
    19  )
    20  
    21  // An ErrorHandler may be provided to Scanner.Init. If a syntax error is
    22  // encountered and a handler was installed, the handler is called with a
    23  // position and an error message. The position points to the beginning of
    24  // the offending token.
    25  //
    26  // 错误无函数处理
    27  type ErrorHandler func(pos token.Position, msg string)
    28  
    29  // A Scanner holds the scanner's internal state while processing
    30  // a given text. It can be allocated as part of another data
    31  // structure but must be initialized via Init before use.
    32  // 描述器
    33  type Scanner struct {
    34  	// immutable state
    35  	file *token.File  // source file handle
    36  	dir  string       // directory portion of file.Name()
    37  	src  []byte       // source
    38  	err  ErrorHandler // error reporting; or nil
    39  	mode Mode         // scanning mode
    40  
    41  	// scanning state
    42  	ch         rune // current character
    43  	offset     int  // character offset
    44  	rdOffset   int  // reading offset (position after current character)
    45  	lineOffset int  // current line offset
    46  	insertSemi bool // insert a semicolon before next newline
    47  
    48  	// public state - ok to modify
    49  	ErrorCount int // number of errors encountered
    50  }
    51  
    52  const bom = 0xFEFF // byte order mark, only permitted as very first character
    53  
    54  // Read the next Unicode char into s.ch.
    55  // s.ch < 0 means end-of-file.
    56  //
    57  func (s *Scanner) next() {
    58  	if s.rdOffset < len(s.src) {
    59  		s.offset = s.rdOffset
    60  		if s.ch == '\n' {
    61  			s.lineOffset = s.offset
    62  			s.file.AddLine(s.offset)
    63  		}
    64  		r, w := rune(s.src[s.rdOffset]), 1
    65  		switch {
    66  		case r == 0:
    67  			s.error(s.offset, "illegal character NUL")
    68  		case r >= utf8.RuneSelf:
    69  			// not ASCII
    70  			r, w = utf8.DecodeRune(s.src[s.rdOffset:])
    71  			if r == utf8.RuneError && w == 1 {
    72  				s.error(s.offset, "illegal UTF-8 encoding")
    73  			} else if r == bom && s.offset > 0 {
    74  				s.error(s.offset, "illegal byte order mark")
    75  			}
    76  		}
    77  		s.rdOffset += w
    78  		s.ch = r
    79  	} else {
    80  		s.offset = len(s.src)
    81  		if s.ch == '\n' {
    82  			s.lineOffset = s.offset
    83  			s.file.AddLine(s.offset)
    84  		}
    85  		s.ch = -1 // eof
    86  	}
    87  }
    88  
    89  // A mode value is a set of flags (or 0).
    90  // They control scanner behavior.
    91  //
    92  type Mode uint
    93  
    94  const (
    95  	ScanComments    Mode = 1 << iota // return comments as COMMENT tokens
    96  	dontInsertSemis                  // do not automatically insert semicolons - for testing only
    97  )
    98  
    99  // Init prepares the scanner s to tokenize the text src by setting the
   100  // scanner at the beginning of src. The scanner uses the file set file
   101  // for position information and it adds line information for each line.
   102  // It is ok to re-use the same file when re-scanning the same file as
   103  // line information which is already present is ignored. Init causes a
   104  // panic if the file size does not match the src size.
   105  //
   106  // Calls to Scan will invoke the error handler err if they encounter a
   107  // syntax error and err is not nil. Also, for each error encountered,
   108  // the Scanner field ErrorCount is incremented by one. The mode parameter
   109  // determines how comments are handled.
   110  //
   111  // Note that Init may call err if there is an error in the first character
   112  // of the file.
   113  //
   114  func (s *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode Mode) {
   115  	// Explicitly initialize all fields since a scanner may be reused.
   116  	if file.Size() != len(src) {
   117  		panic(fmt.Sprintf("file size (%d) does not match src len (%d)", file.Size(), len(src)))
   118  	}
   119  	s.file = file
   120  	s.dir, _ = filepath.Split(file.Name())
   121  	s.src = src
   122  	s.err = err
   123  	s.mode = mode
   124  
   125  	s.ch = ' '
   126  	s.offset = 0
   127  	s.rdOffset = 0
   128  	s.lineOffset = 0
   129  	s.insertSemi = false
   130  	s.ErrorCount = 0
   131  
   132  	s.next()
   133  	if s.ch == bom {
   134  		s.next() // ignore BOM at file beginning
   135  	}
   136  }
   137  
   138  func (s *Scanner) error(offs int, msg string) {
   139  	if s.err != nil {
   140  		s.err(s.file.Position(s.file.Pos(offs)), msg)
   141  	}
   142  	s.ErrorCount++
   143  }
   144  
   145  var prefix = []byte("//line ")
   146  
   147  func (s *Scanner) interpretLineComment(text []byte) {
   148  	if bytes.HasPrefix(text, prefix) {
   149  		// get filename and line number, if any
   150  		if i := bytes.LastIndex(text, []byte{':'}); i > 0 {
   151  			if line, err := strconv.Atoi(string(text[i+1:])); err == nil && line > 0 {
   152  				// valid //line filename:line comment
   153  				filename := string(bytes.TrimSpace(text[len(prefix):i]))
   154  				if filename != "" {
   155  					filename = filepath.Clean(filename)
   156  					if !filepath.IsAbs(filename) {
   157  						// make filename relative to current directory
   158  						filename = filepath.Join(s.dir, filename)
   159  					}
   160  				}
   161  				// update scanner position
   162  				s.file.AddLineInfo(s.lineOffset+len(text)+1, filename, line) // +len(text)+1 since comment applies to next line
   163  			}
   164  		}
   165  	}
   166  }
   167  
   168  func (s *Scanner) scanComment() string {
   169  	// initial '/' already consumed; s.ch == '/' || s.ch == '*'
   170  	offs := s.offset - 1 // position of initial '/'
   171  	hasCR := false
   172  
   173  	if s.ch == '/' {
   174  		//-style comment
   175  		s.next()
   176  		for s.ch != '\n' && s.ch >= 0 {
   177  			if s.ch == '\r' {
   178  				hasCR = true
   179  			}
   180  			s.next()
   181  		}
   182  		if offs == s.lineOffset {
   183  			// comment starts at the beginning of the current line
   184  			s.interpretLineComment(s.src[offs:s.offset])
   185  		}
   186  		goto exit
   187  	}
   188  
   189  	/*-style comment */
   190  	s.next()
   191  	for s.ch >= 0 {
   192  		ch := s.ch
   193  		if ch == '\r' {
   194  			hasCR = true
   195  		}
   196  		s.next()
   197  		if ch == '*' && s.ch == '/' {
   198  			s.next()
   199  			goto exit
   200  		}
   201  	}
   202  
   203  	s.error(offs, "comment not terminated")
   204  
   205  exit:
   206  	lit := s.src[offs:s.offset]
   207  	if hasCR {
   208  		lit = stripCR(lit)
   209  	}
   210  
   211  	return string(lit)
   212  }
   213  
   214  func (s *Scanner) findLineEnd() bool {
   215  	// initial '/' already consumed
   216  
   217  	defer func(offs int) {
   218  		// reset scanner state to where it was upon calling findLineEnd
   219  		s.ch = '/'
   220  		s.offset = offs
   221  		s.rdOffset = offs + 1
   222  		s.next() // consume initial '/' again
   223  	}(s.offset - 1)
   224  
   225  	// read ahead until a newline, EOF, or non-comment token is found
   226  	for s.ch == '/' || s.ch == '*' {
   227  		if s.ch == '/' {
   228  			//-style comment always contains a newline
   229  			return true
   230  		}
   231  		/*-style comment: look for newline */
   232  		s.next()
   233  		for s.ch >= 0 {
   234  			ch := s.ch
   235  			if ch == '\n' {
   236  				return true
   237  			}
   238  			s.next()
   239  			if ch == '*' && s.ch == '/' {
   240  				s.next()
   241  				break
   242  			}
   243  		}
   244  		s.skipWhitespace() // s.insertSemi is set
   245  		if s.ch < 0 || s.ch == '\n' {
   246  			return true
   247  		}
   248  		if s.ch != '/' {
   249  			// non-comment token
   250  			return false
   251  		}
   252  		s.next() // consume '/'
   253  	}
   254  
   255  	return false
   256  }
   257  
   258  func isLetter(ch rune) bool {
   259  	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= utf8.RuneSelf && unicode.IsLetter(ch)
   260  }
   261  
   262  func isDigit(ch rune) bool {
   263  	return '0' <= ch && ch <= '9' || ch >= utf8.RuneSelf && unicode.IsDigit(ch)
   264  }
   265  
   266  func (s *Scanner) scanIdentifier() string {
   267  	offs := s.offset
   268  	for isLetter(s.ch) || isDigit(s.ch) {
   269  		s.next()
   270  	}
   271  	return string(s.src[offs:s.offset])
   272  }
   273  
   274  func digitVal(ch rune) int {
   275  	switch {
   276  	case '0' <= ch && ch <= '9':
   277  		return int(ch - '0')
   278  	case 'a' <= ch && ch <= 'f':
   279  		return int(ch - 'a' + 10)
   280  	case 'A' <= ch && ch <= 'F':
   281  		return int(ch - 'A' + 10)
   282  	}
   283  	return 16 // larger than any legal digit val
   284  }
   285  
   286  func (s *Scanner) scanMantissa(base int) {
   287  	for digitVal(s.ch) < base {
   288  		s.next()
   289  	}
   290  }
   291  
   292  func (s *Scanner) scanNumber(seenDecimalPoint bool) (token.Token, string) {
   293  	// digitVal(s.ch) < 10
   294  	offs := s.offset
   295  	tok := token.INT
   296  
   297  	if seenDecimalPoint {
   298  		offs--
   299  		tok = token.FLOAT
   300  		s.scanMantissa(10)
   301  		goto exponent
   302  	}
   303  
   304  	if s.ch == '0' {
   305  		// int or float
   306  		offs := s.offset
   307  		s.next()
   308  		if s.ch == 'x' || s.ch == 'X' {
   309  			// hexadecimal int
   310  			s.next()
   311  			s.scanMantissa(16)
   312  			if s.offset-offs <= 2 {
   313  				// only scanned "0x" or "0X"
   314  				s.error(offs, "illegal hexadecimal number")
   315  			}
   316  		} else {
   317  			// octal int or float
   318  			seenDecimalDigit := false
   319  			s.scanMantissa(8)
   320  			if s.ch == '8' || s.ch == '9' {
   321  				// illegal octal int or float
   322  				seenDecimalDigit = true
   323  				s.scanMantissa(10)
   324  			}
   325  			if s.ch == '.' || s.ch == 'e' || s.ch == 'E' || s.ch == 'i' {
   326  				goto fraction
   327  			}
   328  			// octal int
   329  			if seenDecimalDigit {
   330  				s.error(offs, "illegal octal number")
   331  			}
   332  		}
   333  		goto exit
   334  	}
   335  
   336  	// decimal int or float
   337  	s.scanMantissa(10)
   338  
   339  fraction:
   340  	if s.ch == '.' {
   341  		tok = token.FLOAT
   342  		s.next()
   343  		s.scanMantissa(10)
   344  	}
   345  
   346  exponent:
   347  	if s.ch == 'e' || s.ch == 'E' {
   348  		tok = token.FLOAT
   349  		s.next()
   350  		if s.ch == '-' || s.ch == '+' {
   351  			s.next()
   352  		}
   353  		if digitVal(s.ch) < 10 {
   354  			s.scanMantissa(10)
   355  		} else {
   356  			s.error(offs, "illegal floating-point exponent")
   357  		}
   358  	}
   359  
   360  	if s.ch == 'i' {
   361  		tok = token.IMAG
   362  		s.next()
   363  	}
   364  
   365  exit:
   366  	return tok, string(s.src[offs:s.offset])
   367  }
   368  
   369  // scanEscape parses an escape sequence where rune is the accepted
   370  // escaped quote. In case of a syntax error, it stops at the offending
   371  // character (without consuming it) and returns false. Otherwise
   372  // it returns true.
   373  func (s *Scanner) scanEscape(quote rune) bool {
   374  	offs := s.offset
   375  
   376  	var n int
   377  	var base, max uint32
   378  	switch s.ch {
   379  	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
   380  		s.next()
   381  		return true
   382  	case '0', '1', '2', '3', '4', '5', '6', '7':
   383  		n, base, max = 3, 8, 255
   384  	case 'x':
   385  		s.next()
   386  		n, base, max = 2, 16, 255
   387  	case 'u':
   388  		s.next()
   389  		n, base, max = 4, 16, unicode.MaxRune
   390  	case 'U':
   391  		s.next()
   392  		n, base, max = 8, 16, unicode.MaxRune
   393  	default:
   394  		msg := "unknown escape sequence"
   395  		if s.ch < 0 {
   396  			msg = "escape sequence not terminated"
   397  		}
   398  		s.error(offs, msg)
   399  		return false
   400  	}
   401  
   402  	var x uint32
   403  	for n > 0 {
   404  		d := uint32(digitVal(s.ch))
   405  		if d >= base {
   406  			msg := fmt.Sprintf("illegal character %#U in escape sequence", s.ch)
   407  			if s.ch < 0 {
   408  				msg = "escape sequence not terminated"
   409  			}
   410  			s.error(s.offset, msg)
   411  			return false
   412  		}
   413  		x = x*base + d
   414  		s.next()
   415  		n--
   416  	}
   417  
   418  	if x > max || 0xD800 <= x && x < 0xE000 {
   419  		s.error(offs, "escape sequence is invalid Unicode code point")
   420  		return false
   421  	}
   422  
   423  	return true
   424  }
   425  
   426  func (s *Scanner) scanRune() string {
   427  	// '\'' opening already consumed
   428  	offs := s.offset - 1
   429  
   430  	valid := true
   431  	n := 0
   432  	for {
   433  		ch := s.ch
   434  		if ch == '\n' || ch < 0 {
   435  			// only report error if we don't have one already
   436  			if valid {
   437  				s.error(offs, "rune literal not terminated")
   438  				valid = false
   439  			}
   440  			break
   441  		}
   442  		s.next()
   443  		if ch == '\'' {
   444  			break
   445  		}
   446  		n++
   447  		if ch == '\\' {
   448  			if !s.scanEscape('\'') {
   449  				valid = false
   450  			}
   451  			// continue to read to closing quote
   452  		}
   453  	}
   454  
   455  	if valid && n != 1 {
   456  		s.error(offs, "illegal rune literal")
   457  	}
   458  
   459  	return string(s.src[offs:s.offset])
   460  }
   461  
   462  func (s *Scanner) scanString() string {
   463  	// '"' opening already consumed
   464  	offs := s.offset - 1
   465  
   466  	for {
   467  		ch := s.ch
   468  		if ch == '\n' || ch < 0 {
   469  			s.error(offs, "string literal not terminated")
   470  			break
   471  		}
   472  		s.next()
   473  		if ch == '"' {
   474  			break
   475  		}
   476  		if ch == '\\' {
   477  			s.scanEscape('"')
   478  		}
   479  	}
   480  
   481  	return string(s.src[offs:s.offset])
   482  }
   483  
   484  func stripCR(b []byte) []byte {
   485  	c := make([]byte, len(b))
   486  	i := 0
   487  	for _, ch := range b {
   488  		if ch != '\r' {
   489  			c[i] = ch
   490  			i++
   491  		}
   492  	}
   493  	return c[:i]
   494  }
   495  
   496  func (s *Scanner) scanRawString() string {
   497  	// '`' opening already consumed
   498  	offs := s.offset - 1
   499  
   500  	hasCR := false
   501  	for {
   502  		ch := s.ch
   503  		if ch < 0 {
   504  			s.error(offs, "raw string literal not terminated")
   505  			break
   506  		}
   507  		s.next()
   508  		if ch == '`' {
   509  			break
   510  		}
   511  		if ch == '\r' {
   512  			hasCR = true
   513  		}
   514  	}
   515  
   516  	lit := s.src[offs:s.offset]
   517  	if hasCR {
   518  		lit = stripCR(lit)
   519  	}
   520  
   521  	return string(lit)
   522  }
   523  
   524  func (s *Scanner) skipWhitespace() {
   525  	for s.ch == ' ' || s.ch == '\t' || s.ch == '\n' && !s.insertSemi || s.ch == '\r' {
   526  		s.next()
   527  	}
   528  }
   529  
   530  // Helper functions for scanning multi-byte tokens such as >> += >>= .
   531  // Different routines recognize different length tok_i based on matches
   532  // of ch_i. If a token ends in '=', the result is tok1 or tok3
   533  // respectively. Otherwise, the result is tok0 if there was no other
   534  // matching character, or tok2 if the matching character was ch2.
   535  
   536  func (s *Scanner) switch2(tok0, tok1 token.Token) token.Token {
   537  	if s.ch == '=' {
   538  		s.next()
   539  		return tok1
   540  	}
   541  	return tok0
   542  }
   543  
   544  func (s *Scanner) switch3(tok0, tok1 token.Token, ch2 rune, tok2 token.Token) token.Token {
   545  	if s.ch == '=' {
   546  		s.next()
   547  		return tok1
   548  	}
   549  	if s.ch == ch2 {
   550  		s.next()
   551  		return tok2
   552  	}
   553  	return tok0
   554  }
   555  
   556  func (s *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Token) token.Token {
   557  	if s.ch == '=' {
   558  		s.next()
   559  		return tok1
   560  	}
   561  	if s.ch == ch2 {
   562  		s.next()
   563  		if s.ch == '=' {
   564  			s.next()
   565  			return tok3
   566  		}
   567  		return tok2
   568  	}
   569  	return tok0
   570  }
   571  
   572  // Scan scans the next token and returns the token position, the token,
   573  // and its literal string if applicable. The source end is indicated by
   574  // token.EOF.
   575  //
   576  // If the returned token is a literal (token.IDENT, token.INT, token.FLOAT,
   577  // token.IMAG, token.CHAR, token.STRING) or token.COMMENT, the literal string
   578  // has the corresponding value.
   579  //
   580  // If the returned token is a keyword, the literal string is the keyword.
   581  //
   582  // If the returned token is token.SEMICOLON, the corresponding
   583  // literal string is ";" if the semicolon was present in the source,
   584  // and "\n" if the semicolon was inserted because of a newline or
   585  // at EOF.
   586  //
   587  // If the returned token is token.ILLEGAL, the literal string is the
   588  // offending character.
   589  //
   590  // In all other cases, Scan returns an empty literal string.
   591  //
   592  // For more tolerant parsing, Scan will return a valid token if
   593  // possible even if a syntax error was encountered. Thus, even
   594  // if the resulting token sequence contains no illegal tokens,
   595  // a client may not assume that no error occurred. Instead it
   596  // must check the scanner's ErrorCount or the number of calls
   597  // of the error handler, if there was one installed.
   598  //
   599  // Scan adds line information to the file added to the file
   600  // set with Init. Token positions are relative to that file
   601  // and thus relative to the file set.
   602  //
   603  func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
   604  scanAgain:
   605  	s.skipWhitespace()
   606  
   607  	// current token start
   608  	pos = s.file.Pos(s.offset)
   609  
   610  	// determine token value
   611  	insertSemi := false
   612  	switch ch := s.ch; {
   613  	case isLetter(ch):
   614  		lit = s.scanIdentifier()
   615  		if len(lit) > 1 {
   616  			// keywords are longer than one letter - avoid lookup otherwise
   617  			tok = token.Lookup(lit)
   618  			switch tok {
   619  			case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:
   620  				insertSemi = true
   621  			}
   622  		} else {
   623  			insertSemi = true
   624  			tok = token.IDENT
   625  		}
   626  	case '0' <= ch && ch <= '9':
   627  		insertSemi = true
   628  		tok, lit = s.scanNumber(false)
   629  	default:
   630  		s.next() // always make progress
   631  		switch ch {
   632  		case -1:
   633  			if s.insertSemi {
   634  				s.insertSemi = false // EOF consumed
   635  				return pos, token.SEMICOLON, "\n"
   636  			}
   637  			tok = token.EOF
   638  		case '\n':
   639  			// we only reach here if s.insertSemi was
   640  			// set in the first place and exited early
   641  			// from s.skipWhitespace()
   642  			s.insertSemi = false // newline consumed
   643  			return pos, token.SEMICOLON, "\n"
   644  		case '"':
   645  			insertSemi = true
   646  			tok = token.STRING
   647  			lit = s.scanString()
   648  		case '\'':
   649  			insertSemi = true
   650  			tok = token.CHAR
   651  			lit = s.scanRune()
   652  		case '`':
   653  			insertSemi = true
   654  			tok = token.STRING
   655  			lit = s.scanRawString()
   656  		case ':':
   657  			tok = s.switch2(token.COLON, token.DEFINE)
   658  		case '.':
   659  			if '0' <= s.ch && s.ch <= '9' {
   660  				insertSemi = true
   661  				tok, lit = s.scanNumber(true)
   662  			} else if s.ch == '.' {
   663  				s.next()
   664  				if s.ch == '.' {
   665  					s.next()
   666  					tok = token.ELLIPSIS
   667  				}
   668  			} else {
   669  				tok = token.PERIOD
   670  			}
   671  		case ',':
   672  			tok = token.COMMA
   673  		case ';':
   674  			tok = token.SEMICOLON
   675  			lit = ";"
   676  		case '(':
   677  			tok = token.LPAREN
   678  		case ')':
   679  			insertSemi = true
   680  			tok = token.RPAREN
   681  		case '[':
   682  			tok = token.LBRACK
   683  		case ']':
   684  			insertSemi = true
   685  			tok = token.RBRACK
   686  		case '{':
   687  			tok = token.LBRACE
   688  		case '}':
   689  			insertSemi = true
   690  			tok = token.RBRACE
   691  		case '+':
   692  			tok = s.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC)
   693  			if tok == token.INC {
   694  				insertSemi = true
   695  			}
   696  		case '-':
   697  			tok = s.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC)
   698  			if tok == token.DEC {
   699  				insertSemi = true
   700  			}
   701  		case '*':
   702  			tok = s.switch2(token.MUL, token.MUL_ASSIGN)
   703  		case '/':
   704  			if s.ch == '/' || s.ch == '*' {
   705  				// comment
   706  				if s.insertSemi && s.findLineEnd() {
   707  					// reset position to the beginning of the comment
   708  					s.ch = '/'
   709  					s.offset = s.file.Offset(pos)
   710  					s.rdOffset = s.offset + 1
   711  					s.insertSemi = false // newline consumed
   712  					return pos, token.SEMICOLON, "\n"
   713  				}
   714  				comment := s.scanComment()
   715  				if s.mode&ScanComments == 0 {
   716  					// skip comment
   717  					s.insertSemi = false // newline consumed
   718  					goto scanAgain
   719  				}
   720  				tok = token.COMMENT
   721  				lit = comment
   722  			} else {
   723  				tok = s.switch2(token.QUO, token.QUO_ASSIGN)
   724  			}
   725  		case '%':
   726  			tok = s.switch2(token.REM, token.REM_ASSIGN)
   727  		case '^':
   728  			tok = s.switch2(token.XOR, token.XOR_ASSIGN)
   729  		case '<':
   730  			if s.ch == '-' {
   731  				s.next()
   732  				tok = token.ARROW
   733  			} else {
   734  				tok = s.switch4(token.LSS, token.LEQ, '<', token.SHL, token.SHL_ASSIGN)
   735  			}
   736  		case '>':
   737  			tok = s.switch4(token.GTR, token.GEQ, '>', token.SHR, token.SHR_ASSIGN)
   738  		case '=':
   739  			tok = s.switch2(token.ASSIGN, token.EQL)
   740  		case '!':
   741  			tok = s.switch2(token.NOT, token.NEQ)
   742  		case '&':
   743  			if s.ch == '^' {
   744  				s.next()
   745  				tok = s.switch2(token.AND_NOT, token.AND_NOT_ASSIGN)
   746  			} else {
   747  				tok = s.switch3(token.AND, token.AND_ASSIGN, '&', token.LAND)
   748  			}
   749  		case '|':
   750  			tok = s.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR)
   751  		default:
   752  			// next reports unexpected BOMs - don't repeat
   753  			if ch != bom {
   754  				s.error(s.file.Offset(pos), fmt.Sprintf("illegal character %#U", ch))
   755  			}
   756  			insertSemi = s.insertSemi // preserve insertSemi info
   757  			tok = token.ILLEGAL
   758  			lit = string(ch)
   759  		}
   760  	}
   761  	if s.mode&dontInsertSemis == 0 {
   762  		s.insertSemi = insertSemi
   763  	}
   764  
   765  	return
   766  }