github.com/hirochachacha/plua@v0.0.0-20170217012138-c82f520cc725/compiler/scanner/scanner.go

github.com/hirochachacha/plua@v0.0.0-20170217012138-c82f520cc725/compiler/scanner/scanner.go (about)

     1  // Original: src/go/scanner/scanner.go
     2  //
     3  // Copyright 2009 The Go Authors. All rights reserved.
     4  // Portions Copyright 2016 Hiroshi Ioka. All rights reserved.
     5  //
     6  // Redistribution and use in source and binary forms, with or without
     7  // modification, are permitted provided that the following conditions are
     8  // met:
     9  //
    10  //    * Redistributions of source code must retain the above copyright
    11  // notice, this list of conditions and the following disclaimer.
    12  //    * Redistributions in binary form must reproduce the above
    13  // copyright notice, this list of conditions and the following disclaimer
    14  // in the documentation and/or other materials provided with the
    15  // distribution.
    16  //    * Neither the name of Google Inc. nor the names of its
    17  // contributors may be used to endorse or promote products derived from
    18  // this software without specific prior written permission.
    19  //
    20  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    21  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    22  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
    23  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
    24  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    25  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
    26  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    27  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    28  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    29  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    30  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    31  
    32  package scanner
    33  
    34  import (
    35  	"bytes"
    36  	"errors"
    37  	"fmt"
    38  	"io"
    39  	"unicode"
    40  
    41  	"github.com/hirochachacha/plua/compiler/token"
    42  	"github.com/hirochachacha/plua/position"
    43  )
    44  
    45  const (
    46  	maxConsecutiveEmptyReads = 100
    47  	bom1                     = 0xFE
    48  	bom                      = "\xFE\xFF"
    49  	utf8bom1                 = 0xEF
    50  	utf8bom                  = "\xEF\xBB\xBF"
    51  )
    52  
    53  var (
    54  	errInvalidLongStringDelimiter       = errors.New("invalid long string delimiter")
    55  	errIllegalHexadecimalNumber         = errors.New("illegal hexadecimal number")
    56  	errIllegalNumber                    = errors.New("illegal number")
    57  	errInvalidEscapeSequence            = errors.New("escape sequence is invalid Unicode code point")
    58  	errUnknownEscapeSequence            = errors.New("unknown escape sequence")
    59  	errMissingBracketInEscapeSequence   = errors.New("missing bracket in escape sequence")
    60  	errIllegalCharacterInEscapeSequence = errors.New("illegal character in escape sequence")
    61  	errUnterminatedString               = errors.New("unterminated string literal")
    62  	errUnterminatedLongString           = errors.New("unterminated long string literal")
    63  )
    64  
    65  type Mode uint
    66  
    67  const (
    68  	ScanComments = 1 << iota
    69  )
    70  
    71  type ScanState struct {
    72  	sourceName string
    73  	shebang    string
    74  
    75  	mode Mode
    76  
    77  	r io.Reader
    78  
    79  	buf    []byte
    80  	start  int
    81  	end    int
    82  	_mark  int
    83  	filled bool
    84  
    85  	clip bytes.Buffer
    86  
    87  	ch int
    88  
    89  	offset     int
    90  	lineOffset int
    91  	line       int
    92  
    93  	err error
    94  }
    95  
    96  type bailout struct{}
    97  
    98  func Scan(r io.Reader, srcname string, mode Mode) *ScanState {
    99  	s := &ScanState{
   100  		r:          r,
   101  		sourceName: srcname,
   102  		buf:        make([]byte, 4096),
   103  		mode:       mode,
   104  		_mark:      -1,
   105  		lineOffset: -1,
   106  		line:       1,
   107  	}
   108  
   109  	return s
   110  }
   111  
   112  func (s *ScanState) Reset(r io.Reader, srcname string, mode Mode) {
   113  	s.sourceName = srcname
   114  	s.shebang = ""
   115  
   116  	s.mode = mode
   117  	s.r = r
   118  
   119  	s.start = 0
   120  	s.end = 0
   121  	s._mark = -1
   122  	s.filled = false
   123  
   124  	s.clip.Reset()
   125  
   126  	s.offset = 0
   127  	s.lineOffset = -1
   128  	s.line = 1
   129  
   130  	s.err = nil
   131  }
   132  
   133  func (s *ScanState) SourceName() string {
   134  	return s.sourceName
   135  }
   136  
   137  func (s *ScanState) Shebang() string {
   138  	return s.shebang
   139  }
   140  
   141  func (s *ScanState) Token() (tok token.Token, err error) {
   142  	var typ token.Type
   143  	var pos position.Position
   144  	var lit string
   145  
   146  	defer func() {
   147  		if r := recover(); r != nil {
   148  			_ = r.(bailout)
   149  
   150  			err = s.err
   151  			tok = token.Token{Type: typ, Pos: pos, Lit: lit}
   152  			s.err = nil
   153  			s._mark = -1
   154  		}
   155  	}()
   156  
   157  	if s.offset == 0 {
   158  		s.init()
   159  
   160  		if s.ch == bom1 || s.ch == utf8bom1 {
   161  			s.skipBom()
   162  		}
   163  
   164  		if s.ch == '#' {
   165  			s.shebang = s.scanSheBang()
   166  		}
   167  	}
   168  
   169  scanAgain:
   170  	s.skipSpace()
   171  
   172  	pos = s.pos()
   173  
   174  	switch ch := s.ch; {
   175  	case isLetter(ch):
   176  		lit = s.scanIdentifier()
   177  		if len(lit) > 1 {
   178  			// keywords are longer than one letter - avoid lookup otherwise
   179  			typ = token.Lookup(lit)
   180  		} else {
   181  			typ = token.NAME
   182  		}
   183  	case isDigit(ch):
   184  		typ, lit = s.scanNumber(false)
   185  	default:
   186  		switch ch {
   187  		case -1:
   188  			typ = token.EOF
   189  		case '"', '\'':
   190  			typ = token.STRING
   191  			lit = s.scanString(ch)
   192  		case ':':
   193  			s.next()
   194  
   195  			if s.ch == ':' {
   196  				s.next()
   197  				typ = token.LABEL
   198  			} else {
   199  				typ = token.COLON
   200  			}
   201  		case '.':
   202  			switch p := s.peek(2); p {
   203  			case "..":
   204  				s.next()
   205  				s.next()
   206  				if s.ch == '.' {
   207  					s.next()
   208  					typ = token.ELLIPSIS
   209  				} else {
   210  					typ = token.CONCAT
   211  				}
   212  			default:
   213  				if len(p) == 2 && '0' <= p[1] && p[1] <= '9' {
   214  					typ, lit = s.scanNumber(true)
   215  				} else {
   216  					s.next()
   217  					typ = token.PERIOD
   218  				}
   219  			}
   220  		case ',':
   221  			s.next()
   222  
   223  			typ = token.COMMA
   224  		case ';':
   225  			s.next()
   226  
   227  			typ = token.SEMICOLON
   228  		case '(':
   229  			s.next()
   230  
   231  			typ = token.LPAREN
   232  		case ')':
   233  			s.next()
   234  
   235  			typ = token.RPAREN
   236  		case '{':
   237  			s.next()
   238  
   239  			typ = token.LBRACE
   240  		case '}':
   241  			s.next()
   242  
   243  			typ = token.RBRACE
   244  		case '[':
   245  			switch s.peek(2) {
   246  			case "[[":
   247  				typ = token.STRING
   248  				lit = s.scanLongString(true)
   249  			case "[=":
   250  				typ = token.STRING
   251  				lit = s.scanLongString(false)
   252  			default:
   253  				s.next()
   254  
   255  				typ = token.LBRACK
   256  			}
   257  		case ']':
   258  			s.next()
   259  
   260  			typ = token.RBRACK
   261  		case '+':
   262  			s.next()
   263  
   264  			typ = token.ADD
   265  		case '-':
   266  			if s.peek(2) == "--" {
   267  				typ = token.COMMENT
   268  
   269  				lit = s.scanComment()
   270  
   271  				if s.mode&ScanComments == 0 {
   272  					goto scanAgain
   273  				}
   274  			} else {
   275  				s.next()
   276  
   277  				typ = token.SUB
   278  			}
   279  		case '*':
   280  			s.next()
   281  
   282  			typ = token.MUL
   283  		case '%':
   284  			s.next()
   285  
   286  			typ = token.MOD
   287  		case '^':
   288  			s.next()
   289  
   290  			typ = token.POW
   291  		case '/':
   292  			s.next()
   293  
   294  			if s.ch == '/' {
   295  				s.next()
   296  				typ = token.IDIV
   297  			} else {
   298  				typ = token.DIV
   299  			}
   300  		case '&':
   301  			s.next()
   302  
   303  			typ = token.BAND
   304  		case '|':
   305  			s.next()
   306  
   307  			typ = token.BOR
   308  		case '~':
   309  			s.next()
   310  
   311  			if s.ch == '=' {
   312  				s.next()
   313  				typ = token.NE
   314  			} else {
   315  				typ = token.BXOR
   316  			}
   317  		case '<':
   318  			s.next()
   319  
   320  			switch s.ch {
   321  			case '<':
   322  				s.next()
   323  				typ = token.SHL
   324  			case '=':
   325  				s.next()
   326  				typ = token.LE
   327  			default:
   328  				typ = token.LT
   329  			}
   330  		case '>':
   331  			s.next()
   332  
   333  			switch s.ch {
   334  			case '>':
   335  				s.next()
   336  				typ = token.SHR
   337  			case '=':
   338  				s.next()
   339  				typ = token.GE
   340  			default:
   341  				typ = token.GT
   342  			}
   343  		case '=':
   344  			s.next()
   345  
   346  			if s.ch == '=' {
   347  				s.next()
   348  				typ = token.EQ
   349  			} else {
   350  				typ = token.ASSIGN
   351  			}
   352  		case '#':
   353  			s.next()
   354  
   355  			typ = token.LEN
   356  		default:
   357  			s.next()
   358  			s.error(pos, fmt.Errorf("illegal character %c", ch))
   359  			typ = token.ILLEGAL
   360  			lit = string(ch)
   361  		}
   362  	}
   363  
   364  	tok = token.Token{Type: typ, Pos: pos, Lit: lit}
   365  
   366  	return
   367  }
   368  
   369  func (s *ScanState) skipBom() {
   370  	switch {
   371  	case s.ch == bom1 && s.peek(2) == bom:
   372  		s.next()
   373  		s.next()
   374  	case s.ch == utf8bom1 && s.peek(3) == utf8bom:
   375  		s.next()
   376  		s.next()
   377  		s.next()
   378  	}
   379  }
   380  
   381  func trimRightCR(s string) string {
   382  	if len(s) > 0 && s[len(s)-1] == '\r' {
   383  		s = s[:len(s)-1]
   384  	}
   385  	return s
   386  }
   387  
   388  func (s *ScanState) scanSheBang() (shebang string) {
   389  	s.mark()
   390  
   391  	s.next()
   392  	for s.ch != '\n' {
   393  		if s.ch == -1 {
   394  			return trimRightCR(s.capture())
   395  		}
   396  		s.next()
   397  	}
   398  
   399  	shebang = trimRightCR(s.capture())
   400  
   401  	s.next()
   402  
   403  	return
   404  }
   405  
   406  func (s *ScanState) scanComment() (lit string) {
   407  	var err error
   408  
   409  	s.mark()
   410  
   411  	s.next() // skip '-'
   412  	s.next() // skip '-'
   413  
   414  	if s.ch == '[' {
   415  		s.next()
   416  		switch s.ch {
   417  		case '[':
   418  			err = s.skipLongString(true, true)
   419  			if err != nil {
   420  				s.error(s.pos(), err)
   421  			}
   422  
   423  			lit = s.capture()
   424  
   425  			return
   426  		case '=':
   427  			err = s.skipLongString(true, false)
   428  			if err != nil {
   429  				s.error(s.pos(), err)
   430  			}
   431  
   432  			lit = s.capture()
   433  
   434  			return
   435  		}
   436  	}
   437  
   438  	for s.ch != '\n' && s.ch >= 0 {
   439  		s.next()
   440  	}
   441  
   442  	lit = trimRightCR(s.capture())
   443  
   444  	return
   445  }
   446  
   447  func (s *ScanState) scanIdentifier() (lit string) {
   448  	s.mark()
   449  
   450  	s.next()
   451  
   452  	for isLetter(s.ch) || isDigit(s.ch) {
   453  		s.next()
   454  	}
   455  
   456  	return s.capture()
   457  }
   458  
   459  func (s *ScanState) skipMantissa(base int) {
   460  	for digitVal(s.ch) < base {
   461  		s.next()
   462  	}
   463  }
   464  
   465  func (s *ScanState) scanNumber(seenDecimalPoint bool) (tok token.Type, lit string) {
   466  	s.mark()
   467  
   468  	tok = token.INT
   469  
   470  	base := 10
   471  
   472  	ioff := s.offset
   473  	ipos := s.pos()
   474  
   475  	if seenDecimalPoint {
   476  		s.next() // skip .
   477  		tok = token.FLOAT
   478  
   479  		if s.ch == '.' {
   480  			s.error(s.pos(), errIllegalNumber)
   481  		}
   482  
   483  		s.skipMantissa(base)
   484  
   485  		goto exponent
   486  	}
   487  
   488  	if s.ch == '0' {
   489  		// int or float
   490  		s.next()
   491  
   492  		// hexadecimal int or float
   493  		if s.ch == 'x' || s.ch == 'X' {
   494  			s.next()
   495  
   496  			base = 16
   497  		}
   498  	}
   499  
   500  	s.skipMantissa(base)
   501  
   502  	if s.ch == '.' {
   503  		tok = token.FLOAT
   504  		s.next()
   505  
   506  		if s.ch == '.' {
   507  			s.error(s.pos(), errIllegalNumber)
   508  		}
   509  
   510  		s.skipMantissa(base)
   511  	}
   512  
   513  exponent:
   514  	if base == 16 {
   515  		if s.offset-ioff <= 2 {
   516  			// only scanned "0x" or "0X"
   517  			s.error(ipos, errIllegalHexadecimalNumber)
   518  		}
   519  
   520  		if s.ch == 'p' || s.ch == 'P' {
   521  			tok = token.FLOAT
   522  			s.next()
   523  
   524  			if s.ch == '-' || s.ch == '+' {
   525  				s.next()
   526  			}
   527  
   528  			poff := s.offset
   529  
   530  			s.skipMantissa(10)
   531  
   532  			if s.offset-poff == 0 {
   533  				// only scanned "p"
   534  				s.error(s.pos(), errIllegalHexadecimalNumber)
   535  			}
   536  		}
   537  	} else {
   538  		if s.ch == 'e' || s.ch == 'E' {
   539  			tok = token.FLOAT
   540  			s.next()
   541  
   542  			if s.ch == '-' || s.ch == '+' {
   543  				s.next()
   544  			}
   545  
   546  			poff := s.offset
   547  
   548  			s.skipMantissa(base)
   549  
   550  			if s.offset-poff == 0 {
   551  				// only scanned "e"
   552  				s.error(s.pos(), errIllegalNumber)
   553  			}
   554  		}
   555  	}
   556  
   557  	lit = s.capture()
   558  
   559  	return
   560  }
   561  
   562  func (s *ScanState) scanString(quote int) (lit string) {
   563  	s.mark()
   564  
   565  	s.next()
   566  
   567  	for s.ch != quote {
   568  		if s.ch == '\n' || s.ch == '\r' || s.ch < 0 {
   569  			lit = s.capture()
   570  
   571  			s.error(s.pos(), errUnterminatedString)
   572  
   573  			return
   574  		}
   575  
   576  		if s.ch == '\\' {
   577  			s.skipEscape(quote)
   578  		} else {
   579  			s.next()
   580  		}
   581  	}
   582  
   583  	s.next()
   584  
   585  	lit = s.capture()
   586  
   587  	return
   588  }
   589  
   590  func (s *ScanState) skipEscape(quote int) {
   591  	s.next()
   592  
   593  	pos := s.pos()
   594  
   595  	var pred func(int) bool
   596  	var i, base, max uint32
   597  
   598  	switch s.ch {
   599  	case '\r':
   600  		s.next()
   601  		if s.ch == '\n' { // CRLN
   602  			s.next()
   603  		}
   604  	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '\n', '\'', '"':
   605  		s.next()
   606  		return
   607  	case 'z':
   608  		s.next()
   609  		s.skipSpace()
   610  		return
   611  	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   612  		i, base, max = 3, 10, 255
   613  		pred = isDigit
   614  	case 'x':
   615  		s.next()
   616  		i, base, max = 2, 16, 255
   617  		pred = isXdigit
   618  	case 'u':
   619  		s.next()
   620  
   621  		if s.ch != '{' {
   622  			s.error(pos, errMissingBracketInEscapeSequence)
   623  
   624  			return
   625  		}
   626  
   627  		s.next()
   628  
   629  		i, base, max = 8, 16, unicode.MaxRune
   630  		pred = isXdigit
   631  	default:
   632  		ch := s.ch
   633  		s.next() // always make progress
   634  		s.error(pos, fmt.Errorf("unknown escape sequence %c", ch))
   635  
   636  		return
   637  	}
   638  
   639  	var x uint32
   640  
   641  	j := i
   642  	for ; j > 0 && s.ch != quote && pred(s.ch); j-- {
   643  		d := uint32(digitVal(s.ch))
   644  		if d >= base {
   645  			// if not unicode
   646  			if max != unicode.MaxRune {
   647  				s.error(pos, fmt.Errorf("illegal character %c in escape sequence", s.ch))
   648  			}
   649  
   650  			break
   651  		}
   652  
   653  		// check overflow
   654  		if x > (unicode.MaxRune-d)/base {
   655  			s.error(pos, fmt.Errorf("escape sequence is invalid Unicode code point %c", s.ch))
   656  
   657  			return
   658  		}
   659  
   660  		x = x*base + d
   661  
   662  		s.next()
   663  	}
   664  
   665  	// hex
   666  	if i == 2 {
   667  		if j > 0 {
   668  			s.error(pos, errUnknownEscapeSequence)
   669  
   670  			return
   671  		}
   672  	}
   673  
   674  	// unicode
   675  	if max == unicode.MaxRune {
   676  		if s.ch != '}' {
   677  			s.error(pos, errMissingBracketInEscapeSequence)
   678  
   679  			return
   680  		}
   681  
   682  		s.next()
   683  
   684  		if 0xD800 <= x && x < 0xE000 {
   685  			s.error(pos, fmt.Errorf("escape sequence is invalid Unicode code point %c", s.ch))
   686  		}
   687  
   688  		return
   689  	}
   690  
   691  	if x > max {
   692  		s.error(pos, errInvalidEscapeSequence)
   693  	}
   694  }
   695  
   696  func (s *ScanState) scanLongString(simple bool) (lit string) {
   697  	var err error
   698  
   699  	s.mark()
   700  
   701  	s.next()
   702  
   703  	err = s.skipLongString(false, simple)
   704  	if err != nil {
   705  		s.error(s.pos(), err)
   706  	}
   707  
   708  	lit = s.capture()
   709  
   710  	return
   711  }
   712  
   713  func (s *ScanState) skipLongString(comment bool, simple bool) (err error) {
   714  	s.next()
   715  
   716  	if simple {
   717  		for {
   718  			for s.ch != ']' {
   719  				if s.ch < 0 {
   720  					err = errUnterminatedLongString
   721  
   722  					return
   723  				}
   724  				s.next()
   725  			}
   726  
   727  			s.next()
   728  
   729  			if s.ch == ']' {
   730  				s.next()
   731  				break
   732  			}
   733  		}
   734  
   735  		return
   736  	}
   737  
   738  	depth := 1
   739  
   740  	for s.ch == '=' {
   741  		depth++
   742  		s.next()
   743  	}
   744  
   745  	if s.ch != '[' {
   746  		if comment {
   747  			for s.ch != '\n' && s.ch != '\r' && s.ch >= 0 {
   748  				s.next()
   749  			}
   750  			return
   751  		}
   752  
   753  		err = errInvalidLongStringDelimiter
   754  
   755  		return
   756  	}
   757  
   758  	s.next()
   759  
   760  	for {
   761  		_depth := depth
   762  		for s.ch != ']' {
   763  			if s.ch < 0 {
   764  				err = errUnterminatedLongString
   765  
   766  				return
   767  			}
   768  			s.next()
   769  		}
   770  
   771  		s.next()
   772  
   773  		for s.ch == '=' {
   774  			_depth--
   775  			s.next()
   776  		}
   777  
   778  		if _depth != 0 {
   779  			continue
   780  		}
   781  
   782  		if s.ch == ']' {
   783  			s.next()
   784  			break
   785  		}
   786  	}
   787  
   788  	return
   789  }
   790  
   791  func (s *ScanState) skipSpace() {
   792  	for isSpace(s.ch) {
   793  		s.next()
   794  	}
   795  }
   796  
   797  func (s *ScanState) error(pos position.Position, err error) {
   798  	pos.SourceName = s.sourceName
   799  
   800  	s.err = &Error{
   801  		Pos: pos,
   802  		Err: err,
   803  	}
   804  
   805  	panic(bailout{})
   806  }
   807  
   808  func (s *ScanState) pos() position.Position {
   809  	return position.Position{
   810  		Line:   s.line,
   811  		Column: s.offset - s.lineOffset,
   812  	}
   813  }
   814  
   815  func (s *ScanState) mark() {
   816  	if s._mark != -1 {
   817  		panic("mark twice")
   818  	}
   819  
   820  	s._mark = s.start
   821  }
   822  
   823  func (s *ScanState) capture() string {
   824  	if s._mark == -1 {
   825  		panic("no mark")
   826  	}
   827  
   828  	buf := s.buf[s._mark:s.start]
   829  
   830  	s._mark = -1
   831  
   832  	if s.clip.Len() > 0 {
   833  		s.clip.Write(buf)
   834  		buf = s.clip.Bytes()
   835  		s.clip.Reset()
   836  	}
   837  
   838  	return string(buf)
   839  }
   840  
   841  func (s *ScanState) init() {
   842  	s.fill()
   843  
   844  	if s.start == s.end {
   845  		s.ch = -1
   846  		s.start = 0
   847  		s.end = 0
   848  
   849  		return
   850  	}
   851  
   852  	s.ch = int(s.buf[s.start])
   853  }
   854  
   855  func (s *ScanState) next() {
   856  	if s.ch == -1 {
   857  		return
   858  	}
   859  
   860  	if s.ch == '\n' {
   861  		s.lineOffset = s.offset
   862  		s.line++
   863  	}
   864  
   865  	s.start++
   866  	s.offset++
   867  
   868  	if s.start == s.end {
   869  		s.fill()
   870  		if s.start == s.end {
   871  			s.ch = -1
   872  			s.start = 0
   873  			s.end = 0
   874  
   875  			return
   876  		}
   877  	}
   878  
   879  	s.ch = int(s.buf[s.start])
   880  }
   881  
   882  func (s *ScanState) peek(n int) string {
   883  	if n > s.end-s.start {
   884  		s.fill()
   885  		if n > s.end-s.start {
   886  			return string(s.buf[s.start:s.end])
   887  		}
   888  	}
   889  
   890  	return string(s.buf[s.start : s.start+n])
   891  }
   892  
   893  func (s *ScanState) fill() {
   894  	if s.filled {
   895  		return
   896  	}
   897  
   898  	if s.start > 0 {
   899  		if s._mark != -1 {
   900  			s.clip.Write(s.buf[s._mark:s.start])
   901  
   902  			s._mark = 0
   903  		}
   904  
   905  		copy(s.buf, s.buf[s.start:s.end])
   906  		s.end -= s.start
   907  		s.start = 0
   908  	}
   909  
   910  	for i := maxConsecutiveEmptyReads; i > 0; i-- {
   911  		n, err := s.r.Read(s.buf[s.end:])
   912  		if err == io.EOF {
   913  			s.filled = true
   914  
   915  			return
   916  		}
   917  		if n < 0 {
   918  			panic("reader returned negative count from Read")
   919  		}
   920  		s.end += n
   921  		if err != nil {
   922  			s.error(position.NoPos, err)
   923  			return
   924  		}
   925  
   926  		if n > 0 {
   927  			return
   928  		}
   929  	}
   930  	s.error(position.NoPos, io.ErrNoProgress)
   931  }
   932  
   933  func digitVal(ch int) int {
   934  	switch {
   935  	case uint(ch)-'0' < 10:
   936  		return int(ch - '0')
   937  	case uint(ch)-'a' < 6:
   938  		return int(ch - 'a' + 10)
   939  	case uint(ch)-'A' < 6:
   940  		return int(ch - 'A' + 10)
   941  	}
   942  
   943  	return 16 // larger than any legal digit val
   944  }
   945  
   946  func isSpace(ch int) bool {
   947  	return ch == ' ' || uint(ch)-'\t' < 5
   948  }
   949  
   950  func isLetter(ch int) bool {
   951  	return ch == '_' || (uint(ch)|32)-'a' < 26
   952  }
   953  
   954  func isDigit(ch int) bool {
   955  	return uint(ch)-'0' < 10
   956  }
   957  
   958  func isXdigit(ch int) bool {
   959  	return uint(ch)-'0' < 10 || (uint(ch)|32)-'a' < 6
   960  }