github.com/gagliardetto/golang-go@v0.0.0-20201020153340-53909ea70814/cmd/compile/internal/syntax/scanner.go

github.com/gagliardetto/golang-go@v0.0.0-20201020153340-53909ea70814/cmd/compile/internal/syntax/scanner.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file implements scanner, a lexical tokenizer for
     6  // Go source. After initialization, consecutive calls of
     7  // next advance the scanner one token at a time.
     8  //
     9  // This file, source.go, and tokens.go are self-contained
    10  // (go tool compile scanner.go source.go tokens.go compiles)
    11  // and thus could be made into its own package.
    12  
    13  package syntax
    14  
    15  import (
    16  	"fmt"
    17  	"io"
    18  	"unicode"
    19  	"unicode/utf8"
    20  )
    21  
    22  // The mode flags below control which comments are reported
    23  // by calling the error handler. If no flag is set, comments
    24  // are ignored.
    25  const (
    26  	comments   uint = 1 << iota // call handler for all comments
    27  	directives                  // call handler for directives only
    28  )
    29  
    30  type scanner struct {
    31  	source
    32  	mode   uint
    33  	nlsemi bool // if set '\n' and EOF translate to ';'
    34  
    35  	// current token, valid after calling next()
    36  	line, col uint
    37  	tok       token
    38  	lit       string   // valid if tok is _Name, _Literal, or _Semi ("semicolon", "newline", or "EOF"); may be malformed if bad is true
    39  	bad       bool     // valid if tok is _Literal, true if a syntax error occurred, lit may be malformed
    40  	kind      LitKind  // valid if tok is _Literal
    41  	op        Operator // valid if tok is _Operator, _AssignOp, or _IncOp
    42  	prec      int      // valid if tok is _Operator, _AssignOp, or _IncOp
    43  }
    44  
    45  func (s *scanner) init(src io.Reader, errh func(line, col uint, msg string), mode uint) {
    46  	s.source.init(src, errh)
    47  	s.mode = mode
    48  	s.nlsemi = false
    49  }
    50  
    51  // errorf reports an error at the most recently read character position.
    52  func (s *scanner) errorf(format string, args ...interface{}) {
    53  	s.bad = true
    54  	s.error(fmt.Sprintf(format, args...))
    55  }
    56  
    57  // errorAtf reports an error at a byte column offset relative to the current token start.
    58  func (s *scanner) errorAtf(offset int, format string, args ...interface{}) {
    59  	s.bad = true
    60  	s.errh(s.line, s.col+uint(offset), fmt.Sprintf(format, args...))
    61  }
    62  
    63  // next advances the scanner by reading the next token.
    64  //
    65  // If a read, source encoding, or lexical error occurs, next calls
    66  // the installed error handler with the respective error position
    67  // and message. The error message is guaranteed to be non-empty and
    68  // never starts with a '/'. The error handler must exist.
    69  //
    70  // If the scanner mode includes the comments flag and a comment
    71  // (including comments containing directives) is encountered, the
    72  // error handler is also called with each comment position and text
    73  // (including opening /* or // and closing */, but without a newline
    74  // at the end of line comments). Comment text always starts with a /
    75  // which can be used to distinguish these handler calls from errors.
    76  //
    77  // If the scanner mode includes the directives (but not the comments)
    78  // flag, only comments containing a //line, /*line, or //go: directive
    79  // are reported, in the same way as regular comments. Directives in
    80  // //-style comments are only recognized if they are at the beginning
    81  // of a line.
    82  //
    83  func (s *scanner) next() {
    84  	nlsemi := s.nlsemi
    85  	s.nlsemi = false
    86  
    87  redo:
    88  	// skip white space
    89  	c := s.getr()
    90  	for c == ' ' || c == '\t' || c == '\n' && !nlsemi || c == '\r' {
    91  		c = s.getr()
    92  	}
    93  
    94  	// token start
    95  	s.line, s.col = s.source.line0, s.source.col0
    96  
    97  	if isLetter(c) || c >= utf8.RuneSelf && s.isIdentRune(c, true) {
    98  		s.ident()
    99  		return
   100  	}
   101  
   102  	switch c {
   103  	case -1:
   104  		if nlsemi {
   105  			s.lit = "EOF"
   106  			s.tok = _Semi
   107  			break
   108  		}
   109  		s.tok = _EOF
   110  
   111  	case '\n':
   112  		s.lit = "newline"
   113  		s.tok = _Semi
   114  
   115  	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   116  		s.number(c)
   117  
   118  	case '"':
   119  		s.stdString()
   120  
   121  	case '`':
   122  		s.rawString()
   123  
   124  	case '\'':
   125  		s.rune()
   126  
   127  	case '(':
   128  		s.tok = _Lparen
   129  
   130  	case '[':
   131  		s.tok = _Lbrack
   132  
   133  	case '{':
   134  		s.tok = _Lbrace
   135  
   136  	case ',':
   137  		s.tok = _Comma
   138  
   139  	case ';':
   140  		s.lit = "semicolon"
   141  		s.tok = _Semi
   142  
   143  	case ')':
   144  		s.nlsemi = true
   145  		s.tok = _Rparen
   146  
   147  	case ']':
   148  		s.nlsemi = true
   149  		s.tok = _Rbrack
   150  
   151  	case '}':
   152  		s.nlsemi = true
   153  		s.tok = _Rbrace
   154  
   155  	case ':':
   156  		if s.getr() == '=' {
   157  			s.tok = _Define
   158  			break
   159  		}
   160  		s.ungetr()
   161  		s.tok = _Colon
   162  
   163  	case '.':
   164  		c = s.getr()
   165  		if isDecimal(c) {
   166  			s.ungetr()
   167  			s.unread(1) // correct position of '.' (needed by startLit in number)
   168  			s.number('.')
   169  			break
   170  		}
   171  		if c == '.' {
   172  			c = s.getr()
   173  			if c == '.' {
   174  				s.tok = _DotDotDot
   175  				break
   176  			}
   177  			s.unread(1)
   178  		}
   179  		s.ungetr()
   180  		s.tok = _Dot
   181  
   182  	case '+':
   183  		s.op, s.prec = Add, precAdd
   184  		c = s.getr()
   185  		if c != '+' {
   186  			goto assignop
   187  		}
   188  		s.nlsemi = true
   189  		s.tok = _IncOp
   190  
   191  	case '-':
   192  		s.op, s.prec = Sub, precAdd
   193  		c = s.getr()
   194  		if c != '-' {
   195  			goto assignop
   196  		}
   197  		s.nlsemi = true
   198  		s.tok = _IncOp
   199  
   200  	case '*':
   201  		s.op, s.prec = Mul, precMul
   202  		// don't goto assignop - want _Star token
   203  		if s.getr() == '=' {
   204  			s.tok = _AssignOp
   205  			break
   206  		}
   207  		s.ungetr()
   208  		s.tok = _Star
   209  
   210  	case '/':
   211  		c = s.getr()
   212  		if c == '/' {
   213  			s.lineComment()
   214  			goto redo
   215  		}
   216  		if c == '*' {
   217  			s.fullComment()
   218  			if s.source.line > s.line && nlsemi {
   219  				// A multi-line comment acts like a newline;
   220  				// it translates to a ';' if nlsemi is set.
   221  				s.lit = "newline"
   222  				s.tok = _Semi
   223  				break
   224  			}
   225  			goto redo
   226  		}
   227  		s.op, s.prec = Div, precMul
   228  		goto assignop
   229  
   230  	case '%':
   231  		s.op, s.prec = Rem, precMul
   232  		c = s.getr()
   233  		goto assignop
   234  
   235  	case '&':
   236  		c = s.getr()
   237  		if c == '&' {
   238  			s.op, s.prec = AndAnd, precAndAnd
   239  			s.tok = _Operator
   240  			break
   241  		}
   242  		s.op, s.prec = And, precMul
   243  		if c == '^' {
   244  			s.op = AndNot
   245  			c = s.getr()
   246  		}
   247  		goto assignop
   248  
   249  	case '|':
   250  		c = s.getr()
   251  		if c == '|' {
   252  			s.op, s.prec = OrOr, precOrOr
   253  			s.tok = _Operator
   254  			break
   255  		}
   256  		s.op, s.prec = Or, precAdd
   257  		goto assignop
   258  
   259  	case '^':
   260  		s.op, s.prec = Xor, precAdd
   261  		c = s.getr()
   262  		goto assignop
   263  
   264  	case '<':
   265  		c = s.getr()
   266  		if c == '=' {
   267  			s.op, s.prec = Leq, precCmp
   268  			s.tok = _Operator
   269  			break
   270  		}
   271  		if c == '<' {
   272  			s.op, s.prec = Shl, precMul
   273  			c = s.getr()
   274  			goto assignop
   275  		}
   276  		if c == '-' {
   277  			s.tok = _Arrow
   278  			break
   279  		}
   280  		s.ungetr()
   281  		s.op, s.prec = Lss, precCmp
   282  		s.tok = _Operator
   283  
   284  	case '>':
   285  		c = s.getr()
   286  		if c == '=' {
   287  			s.op, s.prec = Geq, precCmp
   288  			s.tok = _Operator
   289  			break
   290  		}
   291  		if c == '>' {
   292  			s.op, s.prec = Shr, precMul
   293  			c = s.getr()
   294  			goto assignop
   295  		}
   296  		s.ungetr()
   297  		s.op, s.prec = Gtr, precCmp
   298  		s.tok = _Operator
   299  
   300  	case '=':
   301  		if s.getr() == '=' {
   302  			s.op, s.prec = Eql, precCmp
   303  			s.tok = _Operator
   304  			break
   305  		}
   306  		s.ungetr()
   307  		s.tok = _Assign
   308  
   309  	case '!':
   310  		if s.getr() == '=' {
   311  			s.op, s.prec = Neq, precCmp
   312  			s.tok = _Operator
   313  			break
   314  		}
   315  		s.ungetr()
   316  		s.op, s.prec = Not, 0
   317  		s.tok = _Operator
   318  
   319  	default:
   320  		s.tok = 0
   321  		s.errorf("invalid character %#U", c)
   322  		goto redo
   323  	}
   324  
   325  	return
   326  
   327  assignop:
   328  	if c == '=' {
   329  		s.tok = _AssignOp
   330  		return
   331  	}
   332  	s.ungetr()
   333  	s.tok = _Operator
   334  }
   335  
   336  func isLetter(c rune) bool {
   337  	return 'a' <= lower(c) && lower(c) <= 'z' || c == '_'
   338  }
   339  
   340  func (s *scanner) ident() {
   341  	s.startLit()
   342  
   343  	// accelerate common case (7bit ASCII)
   344  	c := s.getr()
   345  	for isLetter(c) || isDecimal(c) {
   346  		c = s.getr()
   347  	}
   348  
   349  	// general case
   350  	if c >= utf8.RuneSelf {
   351  		for s.isIdentRune(c, false) {
   352  			c = s.getr()
   353  		}
   354  	}
   355  	s.ungetr()
   356  
   357  	lit := s.stopLit()
   358  
   359  	// possibly a keyword
   360  	if len(lit) >= 2 {
   361  		if tok := keywordMap[hash(lit)]; tok != 0 && tokStrFast(tok) == string(lit) {
   362  			s.nlsemi = contains(1<<_Break|1<<_Continue|1<<_Fallthrough|1<<_Return, tok)
   363  			s.tok = tok
   364  			return
   365  		}
   366  	}
   367  
   368  	s.nlsemi = true
   369  	s.lit = string(lit)
   370  	s.tok = _Name
   371  }
   372  
   373  // tokStrFast is a faster version of token.String, which assumes that tok
   374  // is one of the valid tokens - and can thus skip bounds checks.
   375  func tokStrFast(tok token) string {
   376  	return _token_name[_token_index[tok-1]:_token_index[tok]]
   377  }
   378  
   379  func (s *scanner) isIdentRune(c rune, first bool) bool {
   380  	switch {
   381  	case unicode.IsLetter(c) || c == '_':
   382  		// ok
   383  	case unicode.IsDigit(c):
   384  		if first {
   385  			s.errorf("identifier cannot begin with digit %#U", c)
   386  		}
   387  	case c >= utf8.RuneSelf:
   388  		s.errorf("invalid identifier character %#U", c)
   389  	default:
   390  		return false
   391  	}
   392  	return true
   393  }
   394  
   395  // hash is a perfect hash function for keywords.
   396  // It assumes that s has at least length 2.
   397  func hash(s []byte) uint {
   398  	return (uint(s[0])<<4 ^ uint(s[1]) + uint(len(s))) & uint(len(keywordMap)-1)
   399  }
   400  
   401  var keywordMap [1 << 6]token // size must be power of two
   402  
   403  func init() {
   404  	// populate keywordMap
   405  	for tok := _Break; tok <= _Var; tok++ {
   406  		h := hash([]byte(tok.String()))
   407  		if keywordMap[h] != 0 {
   408  			panic("imperfect hash")
   409  		}
   410  		keywordMap[h] = tok
   411  	}
   412  }
   413  
   414  func lower(c rune) rune     { return ('a' - 'A') | c } // returns lower-case c iff c is ASCII letter
   415  func isDecimal(c rune) bool { return '0' <= c && c <= '9' }
   416  func isHex(c rune) bool     { return '0' <= c && c <= '9' || 'a' <= lower(c) && lower(c) <= 'f' }
   417  
   418  // digits accepts the sequence { digit | '_' } starting with c0.
   419  // If base <= 10, digits accepts any decimal digit but records
   420  // the index (relative to the literal start) of a digit >= base
   421  // in *invalid, if *invalid < 0.
   422  // digits returns the first rune that is not part of the sequence
   423  // anymore, and a bitset describing whether the sequence contained
   424  // digits (bit 0 is set), or separators '_' (bit 1 is set).
   425  func (s *scanner) digits(c0 rune, base int, invalid *int) (c rune, digsep int) {
   426  	c = c0
   427  	if base <= 10 {
   428  		max := rune('0' + base)
   429  		for isDecimal(c) || c == '_' {
   430  			ds := 1
   431  			if c == '_' {
   432  				ds = 2
   433  			} else if c >= max && *invalid < 0 {
   434  				*invalid = int(s.col0 - s.col) // record invalid rune index
   435  			}
   436  			digsep |= ds
   437  			c = s.getr()
   438  		}
   439  	} else {
   440  		for isHex(c) || c == '_' {
   441  			ds := 1
   442  			if c == '_' {
   443  				ds = 2
   444  			}
   445  			digsep |= ds
   446  			c = s.getr()
   447  		}
   448  	}
   449  	return
   450  }
   451  
   452  func (s *scanner) number(c rune) {
   453  	s.startLit()
   454  	s.bad = false
   455  
   456  	base := 10        // number base
   457  	prefix := rune(0) // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b'
   458  	digsep := 0       // bit 0: digit present, bit 1: '_' present
   459  	invalid := -1     // index of invalid digit in literal, or < 0
   460  
   461  	// integer part
   462  	var ds int
   463  	if c != '.' {
   464  		s.kind = IntLit
   465  		if c == '0' {
   466  			c = s.getr()
   467  			switch lower(c) {
   468  			case 'x':
   469  				c = s.getr()
   470  				base, prefix = 16, 'x'
   471  			case 'o':
   472  				c = s.getr()
   473  				base, prefix = 8, 'o'
   474  			case 'b':
   475  				c = s.getr()
   476  				base, prefix = 2, 'b'
   477  			default:
   478  				base, prefix = 8, '0'
   479  				digsep = 1 // leading 0
   480  			}
   481  		}
   482  		c, ds = s.digits(c, base, &invalid)
   483  		digsep |= ds
   484  	}
   485  
   486  	// fractional part
   487  	if c == '.' {
   488  		s.kind = FloatLit
   489  		if prefix == 'o' || prefix == 'b' {
   490  			s.errorf("invalid radix point in %s", litname(prefix))
   491  		}
   492  		c, ds = s.digits(s.getr(), base, &invalid)
   493  		digsep |= ds
   494  	}
   495  
   496  	if digsep&1 == 0 && !s.bad {
   497  		s.errorf("%s has no digits", litname(prefix))
   498  	}
   499  
   500  	// exponent
   501  	if e := lower(c); e == 'e' || e == 'p' {
   502  		if !s.bad {
   503  			switch {
   504  			case e == 'e' && prefix != 0 && prefix != '0':
   505  				s.errorf("%q exponent requires decimal mantissa", c)
   506  			case e == 'p' && prefix != 'x':
   507  				s.errorf("%q exponent requires hexadecimal mantissa", c)
   508  			}
   509  		}
   510  		c = s.getr()
   511  		s.kind = FloatLit
   512  		if c == '+' || c == '-' {
   513  			c = s.getr()
   514  		}
   515  		c, ds = s.digits(c, 10, nil)
   516  		digsep |= ds
   517  		if ds&1 == 0 && !s.bad {
   518  			s.errorf("exponent has no digits")
   519  		}
   520  	} else if prefix == 'x' && s.kind == FloatLit && !s.bad {
   521  		s.errorf("hexadecimal mantissa requires a 'p' exponent")
   522  	}
   523  
   524  	// suffix 'i'
   525  	if c == 'i' {
   526  		s.kind = ImagLit
   527  		c = s.getr()
   528  	}
   529  	s.ungetr()
   530  
   531  	s.nlsemi = true
   532  	s.lit = string(s.stopLit())
   533  	s.tok = _Literal
   534  
   535  	if s.kind == IntLit && invalid >= 0 && !s.bad {
   536  		s.errorAtf(invalid, "invalid digit %q in %s", s.lit[invalid], litname(prefix))
   537  	}
   538  
   539  	if digsep&2 != 0 && !s.bad {
   540  		if i := invalidSep(s.lit); i >= 0 {
   541  			s.errorAtf(i, "'_' must separate successive digits")
   542  		}
   543  	}
   544  }
   545  
   546  func litname(prefix rune) string {
   547  	switch prefix {
   548  	case 'x':
   549  		return "hexadecimal literal"
   550  	case 'o', '0':
   551  		return "octal literal"
   552  	case 'b':
   553  		return "binary literal"
   554  	}
   555  	return "decimal literal"
   556  }
   557  
   558  // invalidSep returns the index of the first invalid separator in x, or -1.
   559  func invalidSep(x string) int {
   560  	x1 := ' ' // prefix char, we only care if it's 'x'
   561  	d := '.'  // digit, one of '_', '0' (a digit), or '.' (anything else)
   562  	i := 0
   563  
   564  	// a prefix counts as a digit
   565  	if len(x) >= 2 && x[0] == '0' {
   566  		x1 = lower(rune(x[1]))
   567  		if x1 == 'x' || x1 == 'o' || x1 == 'b' {
   568  			d = '0'
   569  			i = 2
   570  		}
   571  	}
   572  
   573  	// mantissa and exponent
   574  	for ; i < len(x); i++ {
   575  		p := d // previous digit
   576  		d = rune(x[i])
   577  		switch {
   578  		case d == '_':
   579  			if p != '0' {
   580  				return i
   581  			}
   582  		case isDecimal(d) || x1 == 'x' && isHex(d):
   583  			d = '0'
   584  		default:
   585  			if p == '_' {
   586  				return i - 1
   587  			}
   588  			d = '.'
   589  		}
   590  	}
   591  	if d == '_' {
   592  		return len(x) - 1
   593  	}
   594  
   595  	return -1
   596  }
   597  
   598  func (s *scanner) rune() {
   599  	s.startLit()
   600  	s.bad = false
   601  
   602  	n := 0
   603  	for ; ; n++ {
   604  		r := s.getr()
   605  		if r == '\'' {
   606  			break
   607  		}
   608  		if r == '\\' {
   609  			s.escape('\'')
   610  			continue
   611  		}
   612  		if r == '\n' {
   613  			s.ungetr() // assume newline is not part of literal
   614  			if !s.bad {
   615  				s.errorf("newline in character literal")
   616  			}
   617  			break
   618  		}
   619  		if r < 0 {
   620  			if !s.bad {
   621  				s.errorAtf(0, "invalid character literal (missing closing ')")
   622  			}
   623  			break
   624  		}
   625  	}
   626  
   627  	if !s.bad {
   628  		if n == 0 {
   629  			s.errorf("empty character literal or unescaped ' in character literal")
   630  		} else if n != 1 {
   631  			s.errorAtf(0, "invalid character literal (more than one character)")
   632  		}
   633  	}
   634  
   635  	s.nlsemi = true
   636  	s.lit = string(s.stopLit())
   637  	s.kind = RuneLit
   638  	s.tok = _Literal
   639  }
   640  
   641  func (s *scanner) stdString() {
   642  	s.startLit()
   643  	s.bad = false
   644  
   645  	for {
   646  		r := s.getr()
   647  		if r == '"' {
   648  			break
   649  		}
   650  		if r == '\\' {
   651  			s.escape('"')
   652  			continue
   653  		}
   654  		if r == '\n' {
   655  			s.ungetr() // assume newline is not part of literal
   656  			s.errorf("newline in string")
   657  			break
   658  		}
   659  		if r < 0 {
   660  			s.errorAtf(0, "string not terminated")
   661  			break
   662  		}
   663  	}
   664  
   665  	s.nlsemi = true
   666  	s.lit = string(s.stopLit())
   667  	s.kind = StringLit
   668  	s.tok = _Literal
   669  }
   670  
   671  func (s *scanner) rawString() {
   672  	s.startLit()
   673  	s.bad = false
   674  
   675  	for {
   676  		r := s.getr()
   677  		if r == '`' {
   678  			break
   679  		}
   680  		if r < 0 {
   681  			s.errorAtf(0, "string not terminated")
   682  			break
   683  		}
   684  	}
   685  	// We leave CRs in the string since they are part of the
   686  	// literal (even though they are not part of the literal
   687  	// value).
   688  
   689  	s.nlsemi = true
   690  	s.lit = string(s.stopLit())
   691  	s.kind = StringLit
   692  	s.tok = _Literal
   693  }
   694  
   695  func (s *scanner) comment(text string) {
   696  	s.errh(s.line, s.col, text)
   697  }
   698  
   699  func (s *scanner) skipLine(r rune) {
   700  	for r >= 0 {
   701  		if r == '\n' {
   702  			s.ungetr() // don't consume '\n' - needed for nlsemi logic
   703  			break
   704  		}
   705  		r = s.getr()
   706  	}
   707  }
   708  
   709  func (s *scanner) lineComment() {
   710  	r := s.getr()
   711  
   712  	if s.mode&comments != 0 {
   713  		s.startLit()
   714  		s.skipLine(r)
   715  		s.comment("//" + string(s.stopLit()))
   716  		return
   717  	}
   718  
   719  	// directives must start at the beginning of the line (s.col == colbase)
   720  	if s.mode&directives == 0 || s.col != colbase || (r != 'g' && r != 'l') {
   721  		s.skipLine(r)
   722  		return
   723  	}
   724  
   725  	// recognize go: or line directives
   726  	prefix := "go:"
   727  	if r == 'l' {
   728  		prefix = "line "
   729  	}
   730  	for _, m := range prefix {
   731  		if r != m {
   732  			s.skipLine(r)
   733  			return
   734  		}
   735  		r = s.getr()
   736  	}
   737  
   738  	// directive text
   739  	s.startLit()
   740  	s.skipLine(r)
   741  	s.comment("//" + prefix + string(s.stopLit()))
   742  }
   743  
   744  func (s *scanner) skipComment(r rune) bool {
   745  	for r >= 0 {
   746  		for r == '*' {
   747  			r = s.getr()
   748  			if r == '/' {
   749  				return true
   750  			}
   751  		}
   752  		r = s.getr()
   753  	}
   754  	s.errorAtf(0, "comment not terminated")
   755  	return false
   756  }
   757  
   758  func (s *scanner) fullComment() {
   759  	r := s.getr()
   760  
   761  	if s.mode&comments != 0 {
   762  		s.startLit()
   763  		if s.skipComment(r) {
   764  			s.comment("/*" + string(s.stopLit()))
   765  		} else {
   766  			s.killLit() // not a complete comment - ignore
   767  		}
   768  		return
   769  	}
   770  
   771  	if s.mode&directives == 0 || r != 'l' {
   772  		s.skipComment(r)
   773  		return
   774  	}
   775  
   776  	// recognize line directive
   777  	const prefix = "line "
   778  	for _, m := range prefix {
   779  		if r != m {
   780  			s.skipComment(r)
   781  			return
   782  		}
   783  		r = s.getr()
   784  	}
   785  
   786  	// directive text
   787  	s.startLit()
   788  	if s.skipComment(r) {
   789  		s.comment("/*" + prefix + string(s.stopLit()))
   790  	} else {
   791  		s.killLit() // not a complete comment - ignore
   792  	}
   793  }
   794  
   795  func (s *scanner) escape(quote rune) {
   796  	var n int
   797  	var base, max uint32
   798  
   799  	c := s.getr()
   800  	switch c {
   801  	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
   802  		return
   803  	case '0', '1', '2', '3', '4', '5', '6', '7':
   804  		n, base, max = 3, 8, 255
   805  	case 'x':
   806  		c = s.getr()
   807  		n, base, max = 2, 16, 255
   808  	case 'u':
   809  		c = s.getr()
   810  		n, base, max = 4, 16, unicode.MaxRune
   811  	case 'U':
   812  		c = s.getr()
   813  		n, base, max = 8, 16, unicode.MaxRune
   814  	default:
   815  		if c < 0 {
   816  			return // complain in caller about EOF
   817  		}
   818  		s.errorf("unknown escape sequence")
   819  		return
   820  	}
   821  
   822  	var x uint32
   823  	for i := n; i > 0; i-- {
   824  		d := base
   825  		switch {
   826  		case isDecimal(c):
   827  			d = uint32(c) - '0'
   828  		case 'a' <= lower(c) && lower(c) <= 'f':
   829  			d = uint32(lower(c)) - ('a' - 10)
   830  		}
   831  		if d >= base {
   832  			if c < 0 {
   833  				return // complain in caller about EOF
   834  			}
   835  			kind := "hex"
   836  			if base == 8 {
   837  				kind = "octal"
   838  			}
   839  			s.errorf("non-%s character in escape sequence: %c", kind, c)
   840  			s.ungetr()
   841  			return
   842  		}
   843  		// d < base
   844  		x = x*base + d
   845  		c = s.getr()
   846  	}
   847  	s.ungetr()
   848  
   849  	if x > max && base == 8 {
   850  		s.errorf("octal escape value > 255: %d", x)
   851  		return
   852  	}
   853  
   854  	if x > max || 0xD800 <= x && x < 0xE000 /* surrogate range */ {
   855  		s.errorf("escape sequence is invalid Unicode code point %#U", x)
   856  	}
   857  }