github.com/spotify/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/go/scanner/scanner.go

github.com/spotify/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/go/scanner/scanner.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package scanner implements a scanner for Go source text.
     6  // It takes a []byte as source which can then be tokenized
     7  // through repeated calls to the Scan method.
     8  //
     9  package scanner
    10  
    11  import (
    12  	"bytes"
    13  	"fmt"
    14  	"go/token"
    15  	"path/filepath"
    16  	"strconv"
    17  	"unicode"
    18  	"unicode/utf8"
    19  )
    20  
    21  // An ErrorHandler may be provided to Scanner.Init. If a syntax error is
    22  // encountered and a handler was installed, the handler is called with a
    23  // position and an error message. The position points to the beginning of
    24  // the offending token.
    25  //
    26  type ErrorHandler func(pos token.Position, msg string)
    27  
    28  // A Scanner holds the scanner's internal state while processing
    29  // a given text.  It can be allocated as part of another data
    30  // structure but must be initialized via Init before use.
    31  //
    32  type Scanner struct {
    33  	// immutable state
    34  	file *token.File  // source file handle
    35  	dir  string       // directory portion of file.Name()
    36  	src  []byte       // source
    37  	err  ErrorHandler // error reporting; or nil
    38  	mode Mode         // scanning mode
    39  
    40  	// scanning state
    41  	ch         rune // current character
    42  	offset     int  // character offset
    43  	rdOffset   int  // reading offset (position after current character)
    44  	lineOffset int  // current line offset
    45  	insertSemi bool // insert a semicolon before next newline
    46  
    47  	// public state - ok to modify
    48  	ErrorCount int // number of errors encountered
    49  }
    50  
    51  const bom = 0xFEFF // byte order mark, only permitted as very first character
    52  
    53  // Read the next Unicode char into s.ch.
    54  // s.ch < 0 means end-of-file.
    55  //
    56  func (s *Scanner) next() {
    57  	if s.rdOffset < len(s.src) {
    58  		s.offset = s.rdOffset
    59  		if s.ch == '\n' {
    60  			s.lineOffset = s.offset
    61  			s.file.AddLine(s.offset)
    62  		}
    63  		r, w := rune(s.src[s.rdOffset]), 1
    64  		switch {
    65  		case r == 0:
    66  			s.error(s.offset, "illegal character NUL")
    67  		case r >= 0x80:
    68  			// not ASCII
    69  			r, w = utf8.DecodeRune(s.src[s.rdOffset:])
    70  			if r == utf8.RuneError && w == 1 {
    71  				s.error(s.offset, "illegal UTF-8 encoding")
    72  			} else if r == bom && s.offset > 0 {
    73  				s.error(s.offset, "illegal byte order mark")
    74  			}
    75  		}
    76  		s.rdOffset += w
    77  		s.ch = r
    78  	} else {
    79  		s.offset = len(s.src)
    80  		if s.ch == '\n' {
    81  			s.lineOffset = s.offset
    82  			s.file.AddLine(s.offset)
    83  		}
    84  		s.ch = -1 // eof
    85  	}
    86  }
    87  
    88  // A mode value is a set of flags (or 0).
    89  // They control scanner behavior.
    90  //
    91  type Mode uint
    92  
    93  const (
    94  	ScanComments    Mode = 1 << iota // return comments as COMMENT tokens
    95  	dontInsertSemis                  // do not automatically insert semicolons - for testing only
    96  )
    97  
    98  // Init prepares the scanner s to tokenize the text src by setting the
    99  // scanner at the beginning of src. The scanner uses the file set file
   100  // for position information and it adds line information for each line.
   101  // It is ok to re-use the same file when re-scanning the same file as
   102  // line information which is already present is ignored. Init causes a
   103  // panic if the file size does not match the src size.
   104  //
   105  // Calls to Scan will invoke the error handler err if they encounter a
   106  // syntax error and err is not nil. Also, for each error encountered,
   107  // the Scanner field ErrorCount is incremented by one. The mode parameter
   108  // determines how comments are handled.
   109  //
   110  // Note that Init may call err if there is an error in the first character
   111  // of the file.
   112  //
   113  func (s *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode Mode) {
   114  	// Explicitly initialize all fields since a scanner may be reused.
   115  	if file.Size() != len(src) {
   116  		panic(fmt.Sprintf("file size (%d) does not match src len (%d)", file.Size(), len(src)))
   117  	}
   118  	s.file = file
   119  	s.dir, _ = filepath.Split(file.Name())
   120  	s.src = src
   121  	s.err = err
   122  	s.mode = mode
   123  
   124  	s.ch = ' '
   125  	s.offset = 0
   126  	s.rdOffset = 0
   127  	s.lineOffset = 0
   128  	s.insertSemi = false
   129  	s.ErrorCount = 0
   130  
   131  	s.next()
   132  	if s.ch == bom {
   133  		s.next() // ignore BOM at file beginning
   134  	}
   135  }
   136  
   137  func (s *Scanner) error(offs int, msg string) {
   138  	if s.err != nil {
   139  		s.err(s.file.Position(s.file.Pos(offs)), msg)
   140  	}
   141  	s.ErrorCount++
   142  }
   143  
   144  var prefix = []byte("//line ")
   145  
   146  func (s *Scanner) interpretLineComment(text []byte) {
   147  	if bytes.HasPrefix(text, prefix) {
   148  		// get filename and line number, if any
   149  		if i := bytes.LastIndex(text, []byte{':'}); i > 0 {
   150  			if line, err := strconv.Atoi(string(text[i+1:])); err == nil && line > 0 {
   151  				// valid //line filename:line comment;
   152  				filename := filepath.Clean(string(text[len(prefix):i]))
   153  				if !filepath.IsAbs(filename) {
   154  					// make filename relative to current directory
   155  					filename = filepath.Join(s.dir, filename)
   156  				}
   157  				// update scanner position
   158  				s.file.AddLineInfo(s.lineOffset+len(text)+1, filename, line) // +len(text)+1 since comment applies to next line
   159  			}
   160  		}
   161  	}
   162  }
   163  
   164  func (s *Scanner) scanComment() string {
   165  	// initial '/' already consumed; s.ch == '/' || s.ch == '*'
   166  	offs := s.offset - 1 // position of initial '/'
   167  	hasCR := false
   168  
   169  	if s.ch == '/' {
   170  		//-style comment
   171  		s.next()
   172  		for s.ch != '\n' && s.ch >= 0 {
   173  			if s.ch == '\r' {
   174  				hasCR = true
   175  			}
   176  			s.next()
   177  		}
   178  		if offs == s.lineOffset {
   179  			// comment starts at the beginning of the current line
   180  			s.interpretLineComment(s.src[offs:s.offset])
   181  		}
   182  		goto exit
   183  	}
   184  
   185  	/*-style comment */
   186  	s.next()
   187  	for s.ch >= 0 {
   188  		ch := s.ch
   189  		if ch == '\r' {
   190  			hasCR = true
   191  		}
   192  		s.next()
   193  		if ch == '*' && s.ch == '/' {
   194  			s.next()
   195  			goto exit
   196  		}
   197  	}
   198  
   199  	s.error(offs, "comment not terminated")
   200  
   201  exit:
   202  	lit := s.src[offs:s.offset]
   203  	if hasCR {
   204  		lit = stripCR(lit)
   205  	}
   206  
   207  	return string(lit)
   208  }
   209  
   210  func (s *Scanner) findLineEnd() bool {
   211  	// initial '/' already consumed
   212  
   213  	defer func(offs int) {
   214  		// reset scanner state to where it was upon calling findLineEnd
   215  		s.ch = '/'
   216  		s.offset = offs
   217  		s.rdOffset = offs + 1
   218  		s.next() // consume initial '/' again
   219  	}(s.offset - 1)
   220  
   221  	// read ahead until a newline, EOF, or non-comment token is found
   222  	for s.ch == '/' || s.ch == '*' {
   223  		if s.ch == '/' {
   224  			//-style comment always contains a newline
   225  			return true
   226  		}
   227  		/*-style comment: look for newline */
   228  		s.next()
   229  		for s.ch >= 0 {
   230  			ch := s.ch
   231  			if ch == '\n' {
   232  				return true
   233  			}
   234  			s.next()
   235  			if ch == '*' && s.ch == '/' {
   236  				s.next()
   237  				break
   238  			}
   239  		}
   240  		s.skipWhitespace() // s.insertSemi is set
   241  		if s.ch < 0 || s.ch == '\n' {
   242  			return true
   243  		}
   244  		if s.ch != '/' {
   245  			// non-comment token
   246  			return false
   247  		}
   248  		s.next() // consume '/'
   249  	}
   250  
   251  	return false
   252  }
   253  
   254  func isLetter(ch rune) bool {
   255  	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
   256  }
   257  
   258  func isDigit(ch rune) bool {
   259  	return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
   260  }
   261  
   262  func (s *Scanner) scanIdentifier() string {
   263  	offs := s.offset
   264  	for isLetter(s.ch) || isDigit(s.ch) {
   265  		s.next()
   266  	}
   267  	return string(s.src[offs:s.offset])
   268  }
   269  
   270  func digitVal(ch rune) int {
   271  	switch {
   272  	case '0' <= ch && ch <= '9':
   273  		return int(ch - '0')
   274  	case 'a' <= ch && ch <= 'f':
   275  		return int(ch - 'a' + 10)
   276  	case 'A' <= ch && ch <= 'F':
   277  		return int(ch - 'A' + 10)
   278  	}
   279  	return 16 // larger than any legal digit val
   280  }
   281  
   282  func (s *Scanner) scanMantissa(base int) {
   283  	for digitVal(s.ch) < base {
   284  		s.next()
   285  	}
   286  }
   287  
   288  func (s *Scanner) scanNumber(seenDecimalPoint bool) (token.Token, string) {
   289  	// digitVal(s.ch) < 10
   290  	offs := s.offset
   291  	tok := token.INT
   292  
   293  	if seenDecimalPoint {
   294  		offs--
   295  		tok = token.FLOAT
   296  		s.scanMantissa(10)
   297  		goto exponent
   298  	}
   299  
   300  	if s.ch == '0' {
   301  		// int or float
   302  		offs := s.offset
   303  		s.next()
   304  		if s.ch == 'x' || s.ch == 'X' {
   305  			// hexadecimal int
   306  			s.next()
   307  			s.scanMantissa(16)
   308  			if s.offset-offs <= 2 {
   309  				// only scanned "0x" or "0X"
   310  				s.error(offs, "illegal hexadecimal number")
   311  			}
   312  		} else {
   313  			// octal int or float
   314  			seenDecimalDigit := false
   315  			s.scanMantissa(8)
   316  			if s.ch == '8' || s.ch == '9' {
   317  				// illegal octal int or float
   318  				seenDecimalDigit = true
   319  				s.scanMantissa(10)
   320  			}
   321  			if s.ch == '.' || s.ch == 'e' || s.ch == 'E' || s.ch == 'i' {
   322  				goto fraction
   323  			}
   324  			// octal int
   325  			if seenDecimalDigit {
   326  				s.error(offs, "illegal octal number")
   327  			}
   328  		}
   329  		goto exit
   330  	}
   331  
   332  	// decimal int or float
   333  	s.scanMantissa(10)
   334  
   335  fraction:
   336  	if s.ch == '.' {
   337  		tok = token.FLOAT
   338  		s.next()
   339  		s.scanMantissa(10)
   340  	}
   341  
   342  exponent:
   343  	if s.ch == 'e' || s.ch == 'E' {
   344  		tok = token.FLOAT
   345  		s.next()
   346  		if s.ch == '-' || s.ch == '+' {
   347  			s.next()
   348  		}
   349  		s.scanMantissa(10)
   350  	}
   351  
   352  	if s.ch == 'i' {
   353  		tok = token.IMAG
   354  		s.next()
   355  	}
   356  
   357  exit:
   358  	return tok, string(s.src[offs:s.offset])
   359  }
   360  
   361  func (s *Scanner) scanEscape(quote rune) {
   362  	offs := s.offset
   363  
   364  	var i, base, max uint32
   365  	switch s.ch {
   366  	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
   367  		s.next()
   368  		return
   369  	case '0', '1', '2', '3', '4', '5', '6', '7':
   370  		i, base, max = 3, 8, 255
   371  	case 'x':
   372  		s.next()
   373  		i, base, max = 2, 16, 255
   374  	case 'u':
   375  		s.next()
   376  		i, base, max = 4, 16, unicode.MaxRune
   377  	case 'U':
   378  		s.next()
   379  		i, base, max = 8, 16, unicode.MaxRune
   380  	default:
   381  		s.next() // always make progress
   382  		s.error(offs, "unknown escape sequence")
   383  		return
   384  	}
   385  
   386  	var x uint32
   387  	for ; i > 0 && s.ch != quote && s.ch >= 0; i-- {
   388  		d := uint32(digitVal(s.ch))
   389  		if d >= base {
   390  			s.error(s.offset, "illegal character in escape sequence")
   391  			break
   392  		}
   393  		x = x*base + d
   394  		s.next()
   395  	}
   396  	// in case of an error, consume remaining chars
   397  	for ; i > 0 && s.ch != quote && s.ch >= 0; i-- {
   398  		s.next()
   399  	}
   400  	if x > max || 0xD800 <= x && x < 0xE000 {
   401  		s.error(offs, "escape sequence is invalid Unicode code point")
   402  	}
   403  }
   404  
   405  func (s *Scanner) scanChar() string {
   406  	// '\'' opening already consumed
   407  	offs := s.offset - 1
   408  
   409  	n := 0
   410  	for s.ch != '\'' {
   411  		ch := s.ch
   412  		n++
   413  		s.next()
   414  		if ch == '\n' || ch < 0 {
   415  			s.error(offs, "character literal not terminated")
   416  			n = 1
   417  			break
   418  		}
   419  		if ch == '\\' {
   420  			s.scanEscape('\'')
   421  		}
   422  	}
   423  
   424  	s.next()
   425  
   426  	if n != 1 {
   427  		s.error(offs, "illegal character literal")
   428  	}
   429  
   430  	return string(s.src[offs:s.offset])
   431  }
   432  
   433  func (s *Scanner) scanString() string {
   434  	// '"' opening already consumed
   435  	offs := s.offset - 1
   436  
   437  	for s.ch != '"' {
   438  		ch := s.ch
   439  		s.next()
   440  		if ch == '\n' || ch < 0 {
   441  			s.error(offs, "string not terminated")
   442  			break
   443  		}
   444  		if ch == '\\' {
   445  			s.scanEscape('"')
   446  		}
   447  	}
   448  
   449  	s.next()
   450  
   451  	return string(s.src[offs:s.offset])
   452  }
   453  
   454  func stripCR(b []byte) []byte {
   455  	c := make([]byte, len(b))
   456  	i := 0
   457  	for _, ch := range b {
   458  		if ch != '\r' {
   459  			c[i] = ch
   460  			i++
   461  		}
   462  	}
   463  	return c[:i]
   464  }
   465  
   466  func (s *Scanner) scanRawString() string {
   467  	// '`' opening already consumed
   468  	offs := s.offset - 1
   469  
   470  	hasCR := false
   471  	for s.ch != '`' {
   472  		ch := s.ch
   473  		s.next()
   474  		if ch == '\r' {
   475  			hasCR = true
   476  		}
   477  		if ch < 0 {
   478  			s.error(offs, "string not terminated")
   479  			break
   480  		}
   481  	}
   482  
   483  	s.next()
   484  
   485  	lit := s.src[offs:s.offset]
   486  	if hasCR {
   487  		lit = stripCR(lit)
   488  	}
   489  
   490  	return string(lit)
   491  }
   492  
   493  func (s *Scanner) skipWhitespace() {
   494  	for s.ch == ' ' || s.ch == '\t' || s.ch == '\n' && !s.insertSemi || s.ch == '\r' {
   495  		s.next()
   496  	}
   497  }
   498  
   499  // Helper functions for scanning multi-byte tokens such as >> += >>= .
   500  // Different routines recognize different length tok_i based on matches
   501  // of ch_i. If a token ends in '=', the result is tok1 or tok3
   502  // respectively. Otherwise, the result is tok0 if there was no other
   503  // matching character, or tok2 if the matching character was ch2.
   504  
   505  func (s *Scanner) switch2(tok0, tok1 token.Token) token.Token {
   506  	if s.ch == '=' {
   507  		s.next()
   508  		return tok1
   509  	}
   510  	return tok0
   511  }
   512  
   513  func (s *Scanner) switch3(tok0, tok1 token.Token, ch2 rune, tok2 token.Token) token.Token {
   514  	if s.ch == '=' {
   515  		s.next()
   516  		return tok1
   517  	}
   518  	if s.ch == ch2 {
   519  		s.next()
   520  		return tok2
   521  	}
   522  	return tok0
   523  }
   524  
   525  func (s *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Token) token.Token {
   526  	if s.ch == '=' {
   527  		s.next()
   528  		return tok1
   529  	}
   530  	if s.ch == ch2 {
   531  		s.next()
   532  		if s.ch == '=' {
   533  			s.next()
   534  			return tok3
   535  		}
   536  		return tok2
   537  	}
   538  	return tok0
   539  }
   540  
   541  // Scan scans the next token and returns the token position, the token,
   542  // and its literal string if applicable. The source end is indicated by
   543  // token.EOF.
   544  //
   545  // If the returned token is a literal (token.IDENT, token.INT, token.FLOAT,
   546  // token.IMAG, token.CHAR, token.STRING) or token.COMMENT, the literal string
   547  // has the corresponding value.
   548  //
   549  // If the returned token is a keyword, the literal string is the keyword.
   550  //
   551  // If the returned token is token.SEMICOLON, the corresponding
   552  // literal string is ";" if the semicolon was present in the source,
   553  // and "\n" if the semicolon was inserted because of a newline or
   554  // at EOF.
   555  //
   556  // If the returned token is token.ILLEGAL, the literal string is the
   557  // offending character.
   558  //
   559  // In all other cases, Scan returns an empty literal string.
   560  //
   561  // For more tolerant parsing, Scan will return a valid token if
   562  // possible even if a syntax error was encountered. Thus, even
   563  // if the resulting token sequence contains no illegal tokens,
   564  // a client may not assume that no error occurred. Instead it
   565  // must check the scanner's ErrorCount or the number of calls
   566  // of the error handler, if there was one installed.
   567  //
   568  // Scan adds line information to the file added to the file
   569  // set with Init. Token positions are relative to that file
   570  // and thus relative to the file set.
   571  //
   572  func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
   573  scanAgain:
   574  	s.skipWhitespace()
   575  
   576  	// current token start
   577  	pos = s.file.Pos(s.offset)
   578  
   579  	// determine token value
   580  	insertSemi := false
   581  	switch ch := s.ch; {
   582  	case isLetter(ch):
   583  		lit = s.scanIdentifier()
   584  		if len(lit) > 1 {
   585  			// keywords are longer than one letter - avoid lookup otherwise
   586  			tok = token.Lookup(lit)
   587  			switch tok {
   588  			case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:
   589  				insertSemi = true
   590  			}
   591  		} else {
   592  			insertSemi = true
   593  			tok = token.IDENT
   594  		}
   595  	case '0' <= ch && ch <= '9':
   596  		insertSemi = true
   597  		tok, lit = s.scanNumber(false)
   598  	default:
   599  		s.next() // always make progress
   600  		switch ch {
   601  		case -1:
   602  			if s.insertSemi {
   603  				s.insertSemi = false // EOF consumed
   604  				return pos, token.SEMICOLON, "\n"
   605  			}
   606  			tok = token.EOF
   607  		case '\n':
   608  			// we only reach here if s.insertSemi was
   609  			// set in the first place and exited early
   610  			// from s.skipWhitespace()
   611  			s.insertSemi = false // newline consumed
   612  			return pos, token.SEMICOLON, "\n"
   613  		case '"':
   614  			insertSemi = true
   615  			tok = token.STRING
   616  			lit = s.scanString()
   617  		case '\'':
   618  			insertSemi = true
   619  			tok = token.CHAR
   620  			lit = s.scanChar()
   621  		case '`':
   622  			insertSemi = true
   623  			tok = token.STRING
   624  			lit = s.scanRawString()
   625  		case ':':
   626  			tok = s.switch2(token.COLON, token.DEFINE)
   627  		case '.':
   628  			if '0' <= s.ch && s.ch <= '9' {
   629  				insertSemi = true
   630  				tok, lit = s.scanNumber(true)
   631  			} else if s.ch == '.' {
   632  				s.next()
   633  				if s.ch == '.' {
   634  					s.next()
   635  					tok = token.ELLIPSIS
   636  				}
   637  			} else {
   638  				tok = token.PERIOD
   639  			}
   640  		case ',':
   641  			tok = token.COMMA
   642  		case ';':
   643  			tok = token.SEMICOLON
   644  			lit = ";"
   645  		case '(':
   646  			tok = token.LPAREN
   647  		case ')':
   648  			insertSemi = true
   649  			tok = token.RPAREN
   650  		case '[':
   651  			tok = token.LBRACK
   652  		case ']':
   653  			insertSemi = true
   654  			tok = token.RBRACK
   655  		case '{':
   656  			tok = token.LBRACE
   657  		case '}':
   658  			insertSemi = true
   659  			tok = token.RBRACE
   660  		case '+':
   661  			tok = s.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC)
   662  			if tok == token.INC {
   663  				insertSemi = true
   664  			}
   665  		case '-':
   666  			tok = s.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC)
   667  			if tok == token.DEC {
   668  				insertSemi = true
   669  			}
   670  		case '*':
   671  			tok = s.switch2(token.MUL, token.MUL_ASSIGN)
   672  		case '/':
   673  			if s.ch == '/' || s.ch == '*' {
   674  				// comment
   675  				if s.insertSemi && s.findLineEnd() {
   676  					// reset position to the beginning of the comment
   677  					s.ch = '/'
   678  					s.offset = s.file.Offset(pos)
   679  					s.rdOffset = s.offset + 1
   680  					s.insertSemi = false // newline consumed
   681  					return pos, token.SEMICOLON, "\n"
   682  				}
   683  				lit = s.scanComment()
   684  				if s.mode&ScanComments == 0 {
   685  					// skip comment
   686  					s.insertSemi = false // newline consumed
   687  					goto scanAgain
   688  				}
   689  				tok = token.COMMENT
   690  			} else {
   691  				tok = s.switch2(token.QUO, token.QUO_ASSIGN)
   692  			}
   693  		case '%':
   694  			tok = s.switch2(token.REM, token.REM_ASSIGN)
   695  		case '^':
   696  			tok = s.switch2(token.XOR, token.XOR_ASSIGN)
   697  		case '<':
   698  			if s.ch == '-' {
   699  				s.next()
   700  				tok = token.ARROW
   701  			} else {
   702  				tok = s.switch4(token.LSS, token.LEQ, '<', token.SHL, token.SHL_ASSIGN)
   703  			}
   704  		case '>':
   705  			tok = s.switch4(token.GTR, token.GEQ, '>', token.SHR, token.SHR_ASSIGN)
   706  		case '=':
   707  			tok = s.switch2(token.ASSIGN, token.EQL)
   708  		case '!':
   709  			tok = s.switch2(token.NOT, token.NEQ)
   710  		case '&':
   711  			if s.ch == '^' {
   712  				s.next()
   713  				tok = s.switch2(token.AND_NOT, token.AND_NOT_ASSIGN)
   714  			} else {
   715  				tok = s.switch3(token.AND, token.AND_ASSIGN, '&', token.LAND)
   716  			}
   717  		case '|':
   718  			tok = s.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR)
   719  		default:
   720  			// next reports unexpected BOMs - don't repeat
   721  			if ch != bom {
   722  				s.error(s.file.Offset(pos), fmt.Sprintf("illegal character %#U", ch))
   723  			}
   724  			insertSemi = s.insertSemi // preserve insertSemi info
   725  			tok = token.ILLEGAL
   726  			lit = string(ch)
   727  		}
   728  	}
   729  	if s.mode&dontInsertSemis == 0 {
   730  		s.insertSemi = insertSemi
   731  	}
   732  
   733  	return
   734  }