github.com/XiaoMi/Gaea@v1.2.5/parser/lexer.go (about)

     1  // Copyright 2016 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package parser
    15  
    16  import (
    17  	"bytes"
    18  	"fmt"
    19  	"strconv"
    20  	"strings"
    21  	"unicode"
    22  	"unicode/utf8"
    23  
    24  	"github.com/XiaoMi/Gaea/mysql"
    25  )
    26  
    27  var _ = yyLexer(&Scanner{})
    28  
    29  // Pos represents the position of a token.
    30  type Pos struct {
    31  	Line   int
    32  	Col    int
    33  	Offset int
    34  }
    35  
    36  // Scanner implements the yyLexer interface.
    37  type Scanner struct {
    38  	r   reader
    39  	buf bytes.Buffer
    40  
    41  	errs         []error
    42  	warns        []error
    43  	stmtStartPos int
    44  
    45  	// For scanning such kind of comment: /*! MySQL-specific code */ or /*+ optimizer hint */
    46  	specialComment specialCommentScanner
    47  
    48  	sqlMode mysql.SQLMode
    49  
    50  	// If the lexer should recognize keywords for window function.
    51  	// It may break the compatibility when support those keywords,
    52  	// because some application may already use them as identifiers.
    53  	supportWindowFunc bool
    54  
    55  	// lastScanOffset indicates last offset returned by scan().
    56  	// It's used to substring sql in syntax error message.
    57  	lastScanOffset int
    58  }
    59  
    60  type specialCommentScanner interface {
    61  	scan() (tok int, pos Pos, lit string)
    62  }
    63  
    64  type mysqlSpecificCodeScanner struct {
    65  	*Scanner
    66  	Pos
    67  }
    68  
    69  func (s *mysqlSpecificCodeScanner) scan() (tok int, pos Pos, lit string) {
    70  	tok, pos, lit = s.Scanner.scan()
    71  	pos.Line += s.Pos.Line
    72  	pos.Col += s.Pos.Col
    73  	pos.Offset += s.Pos.Offset
    74  	return
    75  }
    76  
    77  type optimizerHintScanner struct {
    78  	*Scanner
    79  	Pos
    80  	end bool
    81  }
    82  
    83  func (s *optimizerHintScanner) scan() (tok int, pos Pos, lit string) {
    84  	tok, pos, lit = s.Scanner.scan()
    85  	pos.Line += s.Pos.Line
    86  	pos.Col += s.Pos.Col
    87  	pos.Offset += s.Pos.Offset
    88  	if tok == 0 {
    89  		if !s.end {
    90  			tok = hintEnd
    91  			s.end = true
    92  		}
    93  	}
    94  	return
    95  }
    96  
    97  // Errors returns the errors and warns during a scan.
    98  func (s *Scanner) Errors() (warns []error, errs []error) {
    99  	return s.warns, s.errs
   100  }
   101  
   102  // reset resets the sql string to be scanned.
   103  func (s *Scanner) reset(sql string) {
   104  	s.r = reader{s: sql, p: Pos{Line: 1}}
   105  	s.buf.Reset()
   106  	s.errs = s.errs[:0]
   107  	s.warns = s.warns[:0]
   108  	s.stmtStartPos = 0
   109  	s.specialComment = nil
   110  }
   111  
   112  func (s *Scanner) stmtText() string {
   113  	endPos := s.r.pos().Offset
   114  	if s.r.s[endPos-1] == '\n' {
   115  		endPos = endPos - 1 // trim new line
   116  	}
   117  	if s.r.s[s.stmtStartPos] == '\n' {
   118  		s.stmtStartPos++
   119  	}
   120  
   121  	text := s.r.s[s.stmtStartPos:endPos]
   122  
   123  	s.stmtStartPos = endPos
   124  	return text
   125  }
   126  
   127  // Errorf tells scanner something is wrong.
   128  // Scanner satisfies yyLexer interface which need this function.
   129  func (s *Scanner) Errorf(format string, a ...interface{}) {
   130  	str := fmt.Sprintf(format, a...)
   131  	val := s.r.s[s.lastScanOffset:]
   132  	var lenStr = ""
   133  	if len(val) > 2048 {
   134  		lenStr = "(total length " + strconv.Itoa(len(val)) + ")"
   135  		val = val[:2048]
   136  	}
   137  	err := fmt.Errorf("line %d column %d near \"%s\"%s %s",
   138  		s.r.p.Line, s.r.p.Col, val, str, lenStr)
   139  	s.errs = append(s.errs, err)
   140  }
   141  
   142  // Lex returns a token and store the token value in v.
   143  // Scanner satisfies yyLexer interface.
   144  // 0 and invalid are special token id this function would return:
   145  // return 0 tells parser that scanner meets EOF,
   146  // return invalid tells parser that scanner meets illegal character.
   147  func (s *Scanner) Lex(v *yySymType) int {
   148  	tok, pos, lit := s.scan()
   149  	s.lastScanOffset = pos.Offset
   150  	v.offset = pos.Offset
   151  	v.ident = lit
   152  	if tok == identifier {
   153  		tok = handleIdent(v)
   154  	}
   155  	if tok == identifier {
   156  		if tok1 := s.isTokenIdentifier(lit, pos.Offset); tok1 != 0 {
   157  			tok = tok1
   158  		}
   159  	}
   160  	if s.sqlMode.HasANSIQuotesMode() &&
   161  		tok == stringLit &&
   162  		s.r.s[v.offset] == '"' {
   163  		tok = identifier
   164  	}
   165  
   166  	if tok == pipes && !(s.sqlMode.HasPipesAsConcatMode()) {
   167  		return pipesAsOr
   168  	}
   169  
   170  	if tok == not && s.sqlMode.HasHighNotPrecedenceMode() {
   171  		return not2
   172  	}
   173  
   174  	switch tok {
   175  	case intLit:
   176  		return toInt(s, v, lit)
   177  	case floatLit:
   178  		return toFloat(s, v, lit)
   179  	case decLit:
   180  		return toDecimal(s, v, lit)
   181  	case hexLit:
   182  		return toHex(s, v, lit)
   183  	case bitLit:
   184  		return toBit(s, v, lit)
   185  	case singleAtIdentifier, doubleAtIdentifier, cast, extract:
   186  		v.item = lit
   187  		return tok
   188  	case null:
   189  		v.item = nil
   190  	case quotedIdentifier:
   191  		tok = identifier
   192  	}
   193  	if tok == unicode.ReplacementChar && s.r.eof() {
   194  		return 0
   195  	}
   196  	return tok
   197  }
   198  
   199  // SetSQLMode sets the SQL mode for scanner.
   200  func (s *Scanner) SetSQLMode(mode mysql.SQLMode) {
   201  	s.sqlMode = mode
   202  }
   203  
   204  // GetSQLMode return the SQL mode of scanner.
   205  func (s *Scanner) GetSQLMode() mysql.SQLMode {
   206  	return s.sqlMode
   207  }
   208  
   209  // EnableWindowFunc controls whether the scanner recognize the keywords of window function.
   210  func (s *Scanner) EnableWindowFunc(val bool) {
   211  	s.supportWindowFunc = val
   212  }
   213  
   214  // NewScanner returns a new scanner object.
   215  func NewScanner(s string) *Scanner {
   216  	return &Scanner{r: reader{s: s}}
   217  }
   218  
   219  func (s *Scanner) skipWhitespace() rune {
   220  	return s.r.incAsLongAs(unicode.IsSpace)
   221  }
   222  
   223  func (s *Scanner) scan() (tok int, pos Pos, lit string) {
   224  	if s.specialComment != nil {
   225  		// Enter specialComment scan mode.
   226  		// for scanning such kind of comment: /*! MySQL-specific code */
   227  		specialComment := s.specialComment
   228  		tok, pos, lit = specialComment.scan()
   229  		if tok != 0 {
   230  			// return the specialComment scan result as the result
   231  			return
   232  		}
   233  		// leave specialComment scan mode after all stream consumed.
   234  		s.specialComment = nil
   235  	}
   236  
   237  	ch0 := s.r.peek()
   238  	if unicode.IsSpace(ch0) {
   239  		ch0 = s.skipWhitespace()
   240  	}
   241  	pos = s.r.pos()
   242  	if s.r.eof() {
   243  		// when scanner meets EOF, the returned token should be 0,
   244  		// because 0 is a special token id to remind the parser that stream is end.
   245  		return 0, pos, ""
   246  	}
   247  
   248  	if !s.r.eof() && isIdentExtend(ch0) {
   249  		return scanIdentifier(s)
   250  	}
   251  
   252  	// search a trie to get a token.
   253  	node := &ruleTable
   254  	for ch0 >= 0 && ch0 <= 255 {
   255  		if node.childs[ch0] == nil || s.r.eof() {
   256  			break
   257  		}
   258  		node = node.childs[ch0]
   259  		if node.fn != nil {
   260  			return node.fn(s)
   261  		}
   262  		s.r.inc()
   263  		ch0 = s.r.peek()
   264  	}
   265  
   266  	tok, lit = node.token, s.r.data(&pos)
   267  	return
   268  }
   269  
   270  func startWithXx(s *Scanner) (tok int, pos Pos, lit string) {
   271  	pos = s.r.pos()
   272  	s.r.inc()
   273  	if s.r.peek() == '\'' {
   274  		s.r.inc()
   275  		s.scanHex()
   276  		if s.r.peek() == '\'' {
   277  			s.r.inc()
   278  			tok, lit = hexLit, s.r.data(&pos)
   279  		} else {
   280  			tok = unicode.ReplacementChar
   281  		}
   282  		return
   283  	}
   284  	s.r.incAsLongAs(isIdentChar)
   285  	tok, lit = identifier, s.r.data(&pos)
   286  	return
   287  }
   288  
   289  func startWithNn(s *Scanner) (tok int, pos Pos, lit string) {
   290  	tok, pos, lit = scanIdentifier(s)
   291  	// The National Character Set, N'some text' or n'some test'.
   292  	// See https://dev.mysql.com/doc/refman/5.7/en/string-literals.html
   293  	// and https://dev.mysql.com/doc/refman/5.7/en/charset-national.html
   294  	if lit == "N" || lit == "n" {
   295  		if s.r.peek() == '\'' {
   296  			tok = underscoreCS
   297  			lit = "utf8"
   298  		}
   299  	}
   300  	return
   301  }
   302  
   303  func startWithBb(s *Scanner) (tok int, pos Pos, lit string) {
   304  	pos = s.r.pos()
   305  	s.r.inc()
   306  	if s.r.peek() == '\'' {
   307  		s.r.inc()
   308  		s.scanBit()
   309  		if s.r.peek() == '\'' {
   310  			s.r.inc()
   311  			tok, lit = bitLit, s.r.data(&pos)
   312  		} else {
   313  			tok = unicode.ReplacementChar
   314  		}
   315  		return
   316  	}
   317  	s.r.incAsLongAs(isIdentChar)
   318  	tok, lit = identifier, s.r.data(&pos)
   319  	return
   320  }
   321  
   322  func startWithSharp(s *Scanner) (tok int, pos Pos, lit string) {
   323  	s.r.incAsLongAs(func(ch rune) bool {
   324  		return ch != '\n'
   325  	})
   326  	return s.scan()
   327  }
   328  
   329  func startWithDash(s *Scanner) (tok int, pos Pos, lit string) {
   330  	pos = s.r.pos()
   331  	if strings.HasPrefix(s.r.s[pos.Offset:], "--") {
   332  		remainLen := len(s.r.s[pos.Offset:])
   333  		if remainLen == 2 || (remainLen > 2 && unicode.IsSpace(rune(s.r.s[pos.Offset+2]))) {
   334  			s.r.incAsLongAs(func(ch rune) bool {
   335  				return ch != '\n'
   336  			})
   337  			return s.scan()
   338  		}
   339  	}
   340  	if strings.HasPrefix(s.r.s[pos.Offset:], "->>") {
   341  		tok = juss
   342  		s.r.incN(3)
   343  		return
   344  	}
   345  	if strings.HasPrefix(s.r.s[pos.Offset:], "->") {
   346  		tok = jss
   347  		s.r.incN(2)
   348  		return
   349  	}
   350  	tok = int('-')
   351  	lit = "-"
   352  	s.r.inc()
   353  	return
   354  }
   355  
   356  func startWithSlash(s *Scanner) (tok int, pos Pos, lit string) {
   357  	pos = s.r.pos()
   358  	s.r.inc()
   359  	ch0 := s.r.peek()
   360  	if ch0 == '*' {
   361  		s.r.inc()
   362  		startWithAsterisk := false
   363  		for {
   364  			ch0 = s.r.readByte()
   365  			if startWithAsterisk && ch0 == '/' {
   366  				// Meets */, means comment end.
   367  				break
   368  			} else if ch0 == '*' {
   369  				startWithAsterisk = true
   370  			} else {
   371  				startWithAsterisk = false
   372  			}
   373  
   374  			if ch0 == unicode.ReplacementChar && s.r.eof() {
   375  				// unclosed comment
   376  				s.errs = append(s.errs, ParseErrorWith(s.r.data(&pos), s.r.p.Line))
   377  				return
   378  			}
   379  
   380  		}
   381  
   382  		comment := s.r.data(&pos)
   383  
   384  		// See https://dev.mysql.com/doc/refman/5.7/en/optimizer-hints.html
   385  		if strings.HasPrefix(comment, "/*+") {
   386  			begin := sqlOffsetInComment(comment)
   387  			end := len(comment) - 2
   388  			sql := comment[begin:end]
   389  			s.specialComment = &optimizerHintScanner{
   390  				Scanner: NewScanner(sql),
   391  				Pos: Pos{
   392  					pos.Line,
   393  					pos.Col,
   394  					pos.Offset + begin,
   395  				},
   396  			}
   397  
   398  			tok = hintBegin
   399  			return
   400  		}
   401  
   402  		// See http://dev.mysql.com/doc/refman/5.7/en/comments.html
   403  		// Convert "/*!VersionNumber MySQL-specific-code */" to "MySQL-specific-code".
   404  		if strings.HasPrefix(comment, "/*!") {
   405  			sql := specCodePattern.ReplaceAllStringFunc(comment, TrimComment)
   406  			s.specialComment = &mysqlSpecificCodeScanner{
   407  				Scanner: NewScanner(sql),
   408  				Pos: Pos{
   409  					pos.Line,
   410  					pos.Col,
   411  					pos.Offset + sqlOffsetInComment(comment),
   412  				},
   413  			}
   414  		}
   415  
   416  		return s.scan()
   417  	}
   418  	tok = int('/')
   419  	return
   420  }
   421  
   422  func sqlOffsetInComment(comment string) int {
   423  	// find the first SQL token offset in pattern like "/*!40101 mysql specific code */"
   424  	offset := 0
   425  	for i := 0; i < len(comment); i++ {
   426  		if unicode.IsSpace(rune(comment[i])) {
   427  			offset = i
   428  			break
   429  		}
   430  	}
   431  	for offset < len(comment) {
   432  		offset++
   433  		if !unicode.IsSpace(rune(comment[offset])) {
   434  			break
   435  		}
   436  	}
   437  	return offset
   438  }
   439  
   440  func startWithAt(s *Scanner) (tok int, pos Pos, lit string) {
   441  	pos = s.r.pos()
   442  	s.r.inc()
   443  
   444  	tok, lit = scanIdentifierOrString(s)
   445  	switch tok {
   446  	case '@':
   447  		s.r.inc()
   448  		stream := s.r.s[pos.Offset+2:]
   449  		var prefix string
   450  		for _, v := range []string{"global.", "session.", "local."} {
   451  			if len(v) > len(stream) {
   452  				continue
   453  			}
   454  			if strings.EqualFold(stream[:len(v)], v) {
   455  				prefix = v
   456  				s.r.incN(len(v))
   457  				break
   458  			}
   459  		}
   460  		tok, lit = scanIdentifierOrString(s)
   461  		switch tok {
   462  		case stringLit, quotedIdentifier:
   463  			tok, lit = doubleAtIdentifier, "@@"+prefix+lit
   464  		case identifier:
   465  			tok, lit = doubleAtIdentifier, s.r.data(&pos)
   466  		}
   467  	case unicode.ReplacementChar:
   468  		break
   469  	default:
   470  		tok = singleAtIdentifier
   471  	}
   472  
   473  	return
   474  }
   475  
   476  func scanIdentifier(s *Scanner) (int, Pos, string) {
   477  	pos := s.r.pos()
   478  	s.r.inc()
   479  	s.r.incAsLongAs(isIdentChar)
   480  	return identifier, pos, s.r.data(&pos)
   481  }
   482  
   483  func scanIdentifierOrString(s *Scanner) (tok int, lit string) {
   484  	ch1 := s.r.peek()
   485  	switch ch1 {
   486  	case '\'', '"':
   487  		tok, _, lit = startString(s)
   488  	case '`':
   489  		tok, _, lit = scanQuotedIdent(s)
   490  	default:
   491  		if isUserVarChar(ch1) {
   492  			pos := s.r.pos()
   493  			s.r.incAsLongAs(isUserVarChar)
   494  			tok, lit = identifier, s.r.data(&pos)
   495  		} else {
   496  			tok = int(ch1)
   497  		}
   498  	}
   499  	return
   500  }
   501  
   502  var (
   503  	quotedIdentifier = -identifier
   504  )
   505  
   506  func scanQuotedIdent(s *Scanner) (tok int, pos Pos, lit string) {
   507  	pos = s.r.pos()
   508  	s.r.inc()
   509  	s.buf.Reset()
   510  	for {
   511  		ch := s.r.readByte()
   512  		if ch == unicode.ReplacementChar && s.r.eof() {
   513  			tok = unicode.ReplacementChar
   514  			return
   515  		}
   516  		if ch == '`' {
   517  			if s.r.peek() != '`' {
   518  				// don't return identifier in case that it's interpreted as keyword token later.
   519  				tok, lit = quotedIdentifier, s.buf.String()
   520  				return
   521  			}
   522  			s.r.inc()
   523  		}
   524  		s.buf.WriteRune(ch)
   525  	}
   526  }
   527  
   528  func startString(s *Scanner) (tok int, pos Pos, lit string) {
   529  	return s.scanString()
   530  }
   531  
   532  // lazyBuf is used to avoid allocation if possible.
   533  // it has a useBuf field indicates whether bytes.Buffer is necessary. if
   534  // useBuf is false, we can avoid calling bytes.Buffer.String(), which
   535  // make a copy of data and cause allocation.
   536  type lazyBuf struct {
   537  	useBuf bool
   538  	r      *reader
   539  	b      *bytes.Buffer
   540  	p      *Pos
   541  }
   542  
   543  func (mb *lazyBuf) setUseBuf(str string) {
   544  	if !mb.useBuf {
   545  		mb.useBuf = true
   546  		mb.b.Reset()
   547  		mb.b.WriteString(str)
   548  	}
   549  }
   550  
   551  func (mb *lazyBuf) writeRune(r rune, w int) {
   552  	if mb.useBuf {
   553  		if w > 1 {
   554  			mb.b.WriteRune(r)
   555  		} else {
   556  			mb.b.WriteByte(byte(r))
   557  		}
   558  	}
   559  }
   560  
   561  func (mb *lazyBuf) data() string {
   562  	var lit string
   563  	if mb.useBuf {
   564  		lit = mb.b.String()
   565  	} else {
   566  		lit = mb.r.data(mb.p)
   567  		lit = lit[1 : len(lit)-1]
   568  	}
   569  	return lit
   570  }
   571  
   572  func (s *Scanner) scanString() (tok int, pos Pos, lit string) {
   573  	tok, pos = stringLit, s.r.pos()
   574  	mb := lazyBuf{false, &s.r, &s.buf, &pos}
   575  	ending := s.r.readByte()
   576  	ch0 := s.r.peek()
   577  	for !s.r.eof() {
   578  		if ch0 == ending {
   579  			s.r.inc()
   580  			if s.r.peek() != ending {
   581  				lit = mb.data()
   582  				return
   583  			}
   584  			str := mb.r.data(&pos)
   585  			mb.setUseBuf(str[1 : len(str)-1])
   586  		} else if ch0 == '\\' && !s.sqlMode.HasNoBackslashEscapesMode() {
   587  			mb.setUseBuf(mb.r.data(&pos)[1:])
   588  			ch0 = handleEscape(s)
   589  		}
   590  		mb.writeRune(ch0, s.r.w)
   591  		if !s.r.eof() {
   592  			s.r.inc()
   593  			ch0 = s.r.peek()
   594  		}
   595  	}
   596  
   597  	tok = unicode.ReplacementChar
   598  	return
   599  }
   600  
   601  // handleEscape handles the case in scanString when previous char is '\'.
   602  func handleEscape(s *Scanner) rune {
   603  	s.r.inc()
   604  	ch0 := s.r.peek()
   605  	/*
   606  		\" \' \\ \n \0 \b \Z \r \t ==> escape to one char
   607  		\% \_ ==> preserve both char
   608  		other ==> remove \
   609  	*/
   610  	switch ch0 {
   611  	case 'n':
   612  		ch0 = '\n'
   613  	case '0':
   614  		ch0 = 0
   615  	case 'b':
   616  		ch0 = 8
   617  	case 'Z':
   618  		ch0 = 26
   619  	case 'r':
   620  		ch0 = '\r'
   621  	case 't':
   622  		ch0 = '\t'
   623  	case '%', '_':
   624  		s.buf.WriteByte('\\')
   625  	}
   626  	return ch0
   627  }
   628  
   629  func startWithNumber(s *Scanner) (tok int, pos Pos, lit string) {
   630  	pos = s.r.pos()
   631  	tok = intLit
   632  	ch0 := s.r.readByte()
   633  	if ch0 == '0' {
   634  		tok = intLit
   635  		ch1 := s.r.peek()
   636  		switch {
   637  		case ch1 >= '0' && ch1 <= '7':
   638  			s.r.inc()
   639  			s.scanOct()
   640  		case ch1 == 'x' || ch1 == 'X':
   641  			s.r.inc()
   642  			p1 := s.r.pos()
   643  			s.scanHex()
   644  			p2 := s.r.pos()
   645  			// 0x, 0x7fz3 are identifier
   646  			if p1 == p2 || isDigit(s.r.peek()) {
   647  				s.r.incAsLongAs(isIdentChar)
   648  				return identifier, pos, s.r.data(&pos)
   649  			}
   650  			tok = hexLit
   651  		case ch1 == 'b':
   652  			s.r.inc()
   653  			p1 := s.r.pos()
   654  			s.scanBit()
   655  			p2 := s.r.pos()
   656  			// 0b, 0b123, 0b1ab are identifier
   657  			if p1 == p2 || isDigit(s.r.peek()) {
   658  				s.r.incAsLongAs(isIdentChar)
   659  				return identifier, pos, s.r.data(&pos)
   660  			}
   661  			tok = bitLit
   662  		case ch1 == '.':
   663  			return s.scanFloat(&pos)
   664  		case ch1 == 'B':
   665  			s.r.incAsLongAs(isIdentChar)
   666  			return identifier, pos, s.r.data(&pos)
   667  		}
   668  	}
   669  
   670  	s.scanDigits()
   671  	ch0 = s.r.peek()
   672  	if ch0 == '.' || ch0 == 'e' || ch0 == 'E' {
   673  		return s.scanFloat(&pos)
   674  	}
   675  
   676  	// Identifiers may begin with a digit but unless quoted may not consist solely of digits.
   677  	if !s.r.eof() && isIdentChar(ch0) {
   678  		s.r.incAsLongAs(isIdentChar)
   679  		return identifier, pos, s.r.data(&pos)
   680  	}
   681  	lit = s.r.data(&pos)
   682  	return
   683  }
   684  
   685  func startWithDot(s *Scanner) (tok int, pos Pos, lit string) {
   686  	pos = s.r.pos()
   687  	s.r.inc()
   688  	save := s.r.pos()
   689  	if isDigit(s.r.peek()) {
   690  		tok, _, lit = s.scanFloat(&pos)
   691  		if s.r.eof() || !isIdentChar(s.r.peek()) {
   692  			return
   693  		}
   694  		// Fail to parse a float, reset to dot.
   695  		s.r.p = save
   696  	}
   697  	tok, lit = int('.'), "."
   698  	return
   699  }
   700  
   701  func (s *Scanner) scanOct() {
   702  	s.r.incAsLongAs(func(ch rune) bool {
   703  		return ch >= '0' && ch <= '7'
   704  	})
   705  }
   706  
   707  func (s *Scanner) scanHex() {
   708  	s.r.incAsLongAs(func(ch rune) bool {
   709  		return ch >= '0' && ch <= '9' ||
   710  			ch >= 'a' && ch <= 'f' ||
   711  			ch >= 'A' && ch <= 'F'
   712  	})
   713  }
   714  
   715  func (s *Scanner) scanBit() {
   716  	s.r.incAsLongAs(func(ch rune) bool {
   717  		return ch == '0' || ch == '1'
   718  	})
   719  }
   720  
   721  func (s *Scanner) scanFloat(beg *Pos) (tok int, pos Pos, lit string) {
   722  	s.r.p = *beg
   723  	// float = D1 . D2 e D3
   724  	s.scanDigits()
   725  	ch0 := s.r.peek()
   726  	if ch0 == '.' {
   727  		s.r.inc()
   728  		s.scanDigits()
   729  		ch0 = s.r.peek()
   730  	}
   731  	if ch0 == 'e' || ch0 == 'E' {
   732  		s.r.inc()
   733  		ch0 = s.r.peek()
   734  		if ch0 == '-' || ch0 == '+' || isDigit(ch0) {
   735  			s.r.inc()
   736  			s.scanDigits()
   737  			tok = floatLit
   738  		} else {
   739  			// D1 . D2 e XX when XX is not D3, parse the result to an identifier.
   740  			// 9e9e = 9e9(float) + e(identifier)
   741  			// 9est = 9est(identifier)
   742  			s.r.incAsLongAs(isIdentChar)
   743  			tok = identifier
   744  		}
   745  	} else {
   746  		tok = decLit
   747  	}
   748  	pos, lit = *beg, s.r.data(beg)
   749  	return
   750  }
   751  
   752  func (s *Scanner) scanDigits() string {
   753  	pos := s.r.pos()
   754  	s.r.incAsLongAs(isDigit)
   755  	return s.r.data(&pos)
   756  }
   757  
   758  type reader struct {
   759  	s string
   760  	p Pos
   761  	w int
   762  }
   763  
   764  var eof = Pos{-1, -1, -1}
   765  
   766  func (r *reader) eof() bool {
   767  	return r.p.Offset >= len(r.s)
   768  }
   769  
   770  // peek() peeks a rune from underlying reader.
   771  // if reader meets EOF, it will return unicode.ReplacementChar. to distinguish from
   772  // the real unicode.ReplacementChar, the caller should call r.eof() again to check.
   773  func (r *reader) peek() rune {
   774  	if r.eof() {
   775  		return unicode.ReplacementChar
   776  	}
   777  	v, w := rune(r.s[r.p.Offset]), 1
   778  	switch {
   779  	case v == 0:
   780  		r.w = w
   781  		return v // illegal UTF-8 encoding
   782  	case v >= 0x80:
   783  		v, w = utf8.DecodeRuneInString(r.s[r.p.Offset:])
   784  		if v == utf8.RuneError && w == 1 {
   785  			v = rune(r.s[r.p.Offset]) // illegal UTF-8 encoding
   786  		}
   787  	}
   788  	r.w = w
   789  	return v
   790  }
   791  
   792  // inc increase the position offset of the reader.
   793  // peek must be called before calling inc!
   794  func (r *reader) inc() {
   795  	if r.s[r.p.Offset] == '\n' {
   796  		r.p.Line++
   797  		r.p.Col = 0
   798  	}
   799  	r.p.Offset += r.w
   800  	r.p.Col++
   801  }
   802  
   803  func (r *reader) incN(n int) {
   804  	for i := 0; i < n; i++ {
   805  		r.inc()
   806  	}
   807  }
   808  
   809  func (r *reader) readByte() (ch rune) {
   810  	ch = r.peek()
   811  	if ch == unicode.ReplacementChar && r.eof() {
   812  		return
   813  	}
   814  	r.inc()
   815  	return
   816  }
   817  
   818  func (r *reader) pos() Pos {
   819  	return r.p
   820  }
   821  
   822  func (r *reader) data(from *Pos) string {
   823  	return r.s[from.Offset:r.p.Offset]
   824  }
   825  
   826  func (r *reader) incAsLongAs(fn func(rune) bool) rune {
   827  	for {
   828  		ch := r.peek()
   829  		if !fn(ch) {
   830  			return ch
   831  		}
   832  		if ch == unicode.ReplacementChar && r.eof() {
   833  			return 0
   834  		}
   835  		r.inc()
   836  	}
   837  }