github.com/Khushbukela/protoreflect@v1.0.1/desc/protoparse/lexer.go

github.com/Khushbukela/protoreflect@v1.0.1/desc/protoparse/lexer.go (about)

     1  package protoparse
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"math"
    10  	"strconv"
    11  	"strings"
    12  	"unicode/utf8"
    13  
    14  	"github.com/jhump/protoreflect/desc/protoparse/ast"
    15  )
    16  
    17  type runeReader struct {
    18  	rr     *bufio.Reader
    19  	marked []rune
    20  	unread []rune
    21  	err    error
    22  }
    23  
    24  func (rr *runeReader) readRune() (r rune, size int, err error) {
    25  	if rr.err != nil {
    26  		return 0, 0, rr.err
    27  	}
    28  	if len(rr.unread) > 0 {
    29  		r := rr.unread[len(rr.unread)-1]
    30  		rr.unread = rr.unread[:len(rr.unread)-1]
    31  		if rr.marked != nil {
    32  			rr.marked = append(rr.marked, r)
    33  		}
    34  		return r, utf8.RuneLen(r), nil
    35  	}
    36  	r, sz, err := rr.rr.ReadRune()
    37  	if err != nil {
    38  		rr.err = err
    39  	} else if rr.marked != nil {
    40  		rr.marked = append(rr.marked, r)
    41  	}
    42  	return r, sz, err
    43  }
    44  
    45  func (rr *runeReader) unreadRune(r rune) {
    46  	if rr.marked != nil {
    47  		if rr.marked[len(rr.marked)-1] != r {
    48  			panic("unread rune is not the same as last marked rune!")
    49  		}
    50  		rr.marked = rr.marked[:len(rr.marked)-1]
    51  	}
    52  	rr.unread = append(rr.unread, r)
    53  }
    54  
    55  func (rr *runeReader) startMark(initial rune) {
    56  	rr.marked = []rune{initial}
    57  }
    58  
    59  func (rr *runeReader) endMark() string {
    60  	m := string(rr.marked)
    61  	rr.marked = rr.marked[:0]
    62  	return m
    63  }
    64  
    65  type protoLex struct {
    66  	filename string
    67  	input    *runeReader
    68  	errs     *errorHandler
    69  	res      *ast.FileNode
    70  
    71  	lineNo int
    72  	colNo  int
    73  	offset int
    74  
    75  	prevSym ast.TerminalNode
    76  	eof     ast.TerminalNode
    77  
    78  	prevLineNo int
    79  	prevColNo  int
    80  	prevOffset int
    81  	comments   []ast.Comment
    82  	ws         []rune
    83  }
    84  
    85  var utf8Bom = []byte{0xEF, 0xBB, 0xBF}
    86  
    87  func newLexer(in io.Reader, filename string, errs *errorHandler) *protoLex {
    88  	br := bufio.NewReader(in)
    89  
    90  	// if file has UTF8 byte order marker preface, consume it
    91  	marker, err := br.Peek(3)
    92  	if err == nil && bytes.Equal(marker, utf8Bom) {
    93  		_, _ = br.Discard(3)
    94  	}
    95  
    96  	return &protoLex{
    97  		input:    &runeReader{rr: br},
    98  		filename: filename,
    99  		errs:     errs,
   100  	}
   101  }
   102  
   103  var keywords = map[string]int{
   104  	"syntax":     _SYNTAX,
   105  	"import":     _IMPORT,
   106  	"weak":       _WEAK,
   107  	"public":     _PUBLIC,
   108  	"package":    _PACKAGE,
   109  	"option":     _OPTION,
   110  	"true":       _TRUE,
   111  	"false":      _FALSE,
   112  	"inf":        _INF,
   113  	"nan":        _NAN,
   114  	"repeated":   _REPEATED,
   115  	"optional":   _OPTIONAL,
   116  	"required":   _REQUIRED,
   117  	"double":     _DOUBLE,
   118  	"float":      _FLOAT,
   119  	"int32":      _INT32,
   120  	"int64":      _INT64,
   121  	"uint32":     _UINT32,
   122  	"uint64":     _UINT64,
   123  	"sint32":     _SINT32,
   124  	"sint64":     _SINT64,
   125  	"fixed32":    _FIXED32,
   126  	"fixed64":    _FIXED64,
   127  	"sfixed32":   _SFIXED32,
   128  	"sfixed64":   _SFIXED64,
   129  	"bool":       _BOOL,
   130  	"string":     _STRING,
   131  	"bytes":      _BYTES,
   132  	"group":      _GROUP,
   133  	"oneof":      _ONEOF,
   134  	"map":        _MAP,
   135  	"extensions": _EXTENSIONS,
   136  	"to":         _TO,
   137  	"max":        _MAX,
   138  	"reserved":   _RESERVED,
   139  	"enum":       _ENUM,
   140  	"message":    _MESSAGE,
   141  	"extend":     _EXTEND,
   142  	"service":    _SERVICE,
   143  	"rpc":        _RPC,
   144  	"stream":     _STREAM,
   145  	"returns":    _RETURNS,
   146  }
   147  
   148  func (l *protoLex) cur() SourcePos {
   149  	return SourcePos{
   150  		Filename: l.filename,
   151  		Offset:   l.offset,
   152  		Line:     l.lineNo + 1,
   153  		Col:      l.colNo + 1,
   154  	}
   155  }
   156  
   157  func (l *protoLex) adjustPos(consumedChars ...rune) {
   158  	for _, c := range consumedChars {
   159  		switch c {
   160  		case '\n':
   161  			// new line, back to first column
   162  			l.colNo = 0
   163  			l.lineNo++
   164  		case '\r':
   165  			// no adjustment
   166  		case '\t':
   167  			// advance to next tab stop
   168  			mod := l.colNo % 8
   169  			l.colNo += 8 - mod
   170  		default:
   171  			l.colNo++
   172  		}
   173  	}
   174  }
   175  
   176  func (l *protoLex) prev() *SourcePos {
   177  	if l.prevSym == nil {
   178  		return &SourcePos{
   179  			Filename: l.filename,
   180  			Offset:   0,
   181  			Line:     1,
   182  			Col:      1,
   183  		}
   184  	}
   185  	return l.prevSym.Start()
   186  }
   187  
   188  func (l *protoLex) Lex(lval *protoSymType) int {
   189  	if l.errs.err != nil {
   190  		// if error reporter already returned non-nil error,
   191  		// we can skip the rest of the input
   192  		return 0
   193  	}
   194  
   195  	l.prevLineNo = l.lineNo
   196  	l.prevColNo = l.colNo
   197  	l.prevOffset = l.offset
   198  	l.comments = nil
   199  	l.ws = nil
   200  	l.input.endMark() // reset, just in case
   201  
   202  	for {
   203  		c, n, err := l.input.readRune()
   204  		if err == io.EOF {
   205  			// we're not actually returning a rune, but this will associate
   206  			// accumulated comments as a trailing comment on last symbol
   207  			// (if appropriate)
   208  			l.setRune(lval, 0)
   209  			l.eof = lval.b
   210  			return 0
   211  		} else if err != nil {
   212  			// we don't call setError because we don't want it wrapped
   213  			// with a source position because it's I/O, not syntax
   214  			lval.err = err
   215  			_ = l.errs.handleError(err)
   216  			return _ERROR
   217  		}
   218  
   219  		l.prevLineNo = l.lineNo
   220  		l.prevColNo = l.colNo
   221  		l.prevOffset = l.offset
   222  
   223  		l.offset += n
   224  		l.adjustPos(c)
   225  		if strings.ContainsRune("\n\r\t\f\v ", c) {
   226  			l.ws = append(l.ws, c)
   227  			continue
   228  		}
   229  
   230  		l.input.startMark(c)
   231  		if c == '.' {
   232  			// decimal literals could start with a dot
   233  			cn, _, err := l.input.readRune()
   234  			if err != nil {
   235  				l.setRune(lval, c)
   236  				return int(c)
   237  			}
   238  			if cn >= '0' && cn <= '9' {
   239  				l.adjustPos(cn)
   240  				token := l.readNumber(c, cn)
   241  				f, err := parseFloat(token)
   242  				if err != nil {
   243  					l.setError(lval, numError(err, "float", token))
   244  					return _ERROR
   245  				}
   246  				l.setFloat(lval, f)
   247  				return _FLOAT_LIT
   248  			}
   249  			l.input.unreadRune(cn)
   250  			l.setRune(lval, c)
   251  			return int(c)
   252  		}
   253  
   254  		if c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') {
   255  			// identifier
   256  			token := []rune{c}
   257  			token = l.readIdentifier(token)
   258  			str := string(token)
   259  			if t, ok := keywords[str]; ok {
   260  				l.setIdent(lval, str)
   261  				return t
   262  			}
   263  			l.setIdent(lval, str)
   264  			return _NAME
   265  		}
   266  
   267  		if c >= '0' && c <= '9' {
   268  			// integer or float literal
   269  			token := l.readNumber(c)
   270  			if strings.HasPrefix(token, "0x") || strings.HasPrefix(token, "0X") {
   271  				// hexadecimal
   272  				ui, err := strconv.ParseUint(token[2:], 16, 64)
   273  				if err != nil {
   274  					l.setError(lval, numError(err, "hexadecimal integer", token[2:]))
   275  					return _ERROR
   276  				}
   277  				l.setInt(lval, ui)
   278  				return _INT_LIT
   279  			}
   280  			if strings.Contains(token, ".") || strings.Contains(token, "e") || strings.Contains(token, "E") {
   281  				// floating point!
   282  				f, err := parseFloat(token)
   283  				if err != nil {
   284  					l.setError(lval, numError(err, "float", token))
   285  					return _ERROR
   286  				}
   287  				l.setFloat(lval, f)
   288  				return _FLOAT_LIT
   289  			}
   290  			// integer! (decimal or octal)
   291  			base := 10
   292  			if token[0] == '0' {
   293  				base = 8
   294  			}
   295  			ui, err := strconv.ParseUint(token, base, 64)
   296  			if err != nil {
   297  				kind := "integer"
   298  				if base == 8 {
   299  					kind = "octal integer"
   300  				}
   301  				if numErr, ok := err.(*strconv.NumError); ok && numErr.Err == strconv.ErrRange {
   302  					// if it's too big to be an int, parse it as a float
   303  					var f float64
   304  					kind = "float"
   305  					f, err = parseFloat(token)
   306  					if err == nil {
   307  						l.setFloat(lval, f)
   308  						return _FLOAT_LIT
   309  					}
   310  				}
   311  				l.setError(lval, numError(err, kind, token))
   312  				return _ERROR
   313  			}
   314  			l.setInt(lval, ui)
   315  			return _INT_LIT
   316  		}
   317  
   318  		if c == '\'' || c == '"' {
   319  			// string literal
   320  			str, err := l.readStringLiteral(c)
   321  			if err != nil {
   322  				l.setError(lval, err)
   323  				return _ERROR
   324  			}
   325  			l.setString(lval, str)
   326  			return _STRING_LIT
   327  		}
   328  
   329  		if c == '/' {
   330  			// comment
   331  			cn, _, err := l.input.readRune()
   332  			if err != nil {
   333  				l.setRune(lval, '/')
   334  				return int(c)
   335  			}
   336  			if cn == '/' {
   337  				l.adjustPos(cn)
   338  				hitNewline, hasErr := l.skipToEndOfLineComment(lval)
   339  				if hasErr {
   340  					return _ERROR
   341  				}
   342  				comment := l.newComment()
   343  				comment.PosRange.End.Col++
   344  				if hitNewline {
   345  					// we don't do this inside of skipToEndOfLineComment
   346  					// because we want to know the length of previous
   347  					// line for calculation above
   348  					l.adjustPos('\n')
   349  				}
   350  				l.comments = append(l.comments, comment)
   351  				continue
   352  			}
   353  			if cn == '*' {
   354  				l.adjustPos(cn)
   355  				ok, hasErr := l.skipToEndOfBlockComment(lval)
   356  				if hasErr {
   357  					return _ERROR
   358  				}
   359  				if !ok {
   360  					l.setError(lval, errors.New("block comment never terminates, unexpected EOF"))
   361  					return _ERROR
   362  				}
   363  				l.comments = append(l.comments, l.newComment())
   364  				continue
   365  			}
   366  			l.input.unreadRune(cn)
   367  		}
   368  
   369  		if c < 32 || c == 127 {
   370  			l.setError(lval, errors.New("invalid control character"))
   371  			return _ERROR
   372  		}
   373  		if !strings.ContainsRune(";,.:=-+(){}[]<>", c) {
   374  			l.setError(lval, errors.New("invalid character"))
   375  			return _ERROR
   376  		}
   377  		l.setRune(lval, c)
   378  		return int(c)
   379  	}
   380  }
   381  
   382  func parseFloat(token string) (float64, error) {
   383  	// strconv.ParseFloat allows _ to separate digits, but protobuf does not
   384  	if strings.ContainsRune(token, '_') {
   385  		return 0, &strconv.NumError{
   386  			Func: "parseFloat",
   387  			Num:  token,
   388  			Err:  strconv.ErrSyntax,
   389  		}
   390  	}
   391  	f, err := strconv.ParseFloat(token, 64)
   392  	if err == nil {
   393  		return f, nil
   394  	}
   395  	if numErr, ok := err.(*strconv.NumError); ok && numErr.Err == strconv.ErrRange && math.IsInf(f, 1) {
   396  		// protoc doesn't complain about float overflow and instead just uses "infinity"
   397  		// so we mirror that behavior by just returning infinity and ignoring the error
   398  		return f, nil
   399  	}
   400  	return f, err
   401  }
   402  
   403  func (l *protoLex) posRange() ast.PosRange {
   404  	return ast.PosRange{
   405  		Start: SourcePos{
   406  			Filename: l.filename,
   407  			Offset:   l.prevOffset,
   408  			Line:     l.prevLineNo + 1,
   409  			Col:      l.prevColNo + 1,
   410  		},
   411  		End: l.cur(),
   412  	}
   413  }
   414  
   415  func (l *protoLex) newComment() ast.Comment {
   416  	ws := string(l.ws)
   417  	l.ws = l.ws[:0]
   418  	return ast.Comment{
   419  		PosRange:          l.posRange(),
   420  		LeadingWhitespace: ws,
   421  		Text:              l.input.endMark(),
   422  	}
   423  }
   424  
   425  func (l *protoLex) newTokenInfo() ast.TokenInfo {
   426  	ws := string(l.ws)
   427  	l.ws = nil
   428  	return ast.TokenInfo{
   429  		PosRange:          l.posRange(),
   430  		LeadingComments:   l.comments,
   431  		LeadingWhitespace: ws,
   432  		RawText:           l.input.endMark(),
   433  	}
   434  }
   435  
   436  func (l *protoLex) setPrev(n ast.TerminalNode, isDot bool) {
   437  	nStart := n.Start().Line
   438  	if _, ok := n.(*ast.RuneNode); ok {
   439  		// This is really gross, but there are many cases where we don't want
   440  		// to attribute comments to punctuation (like commas, equals, semicolons)
   441  		// and would instead prefer to attribute comments to a more meaningful
   442  		// element in the AST.
   443  		//
   444  		// So if it's a simple node OTHER THAN PERIOD (since that is not just
   445  		// punctuation but typically part of a qualified identifier), don't
   446  		// attribute comments to it. We do that with this TOTAL HACK: adjusting
   447  		// the start line makes leading comments appear detached so logic below
   448  		// will naturally associated trailing comment to previous symbol
   449  		if !isDot {
   450  			nStart += 2
   451  		}
   452  	}
   453  	if l.prevSym != nil && len(n.LeadingComments()) > 0 && l.prevSym.End().Line < nStart {
   454  		// we may need to re-attribute the first comment to
   455  		// instead be previous node's trailing comment
   456  		prevEnd := l.prevSym.End().Line
   457  		comments := n.LeadingComments()
   458  		c := comments[0]
   459  		commentStart := c.Start.Line
   460  		if commentStart == prevEnd {
   461  			// comment is on same line as previous symbol
   462  			n.PopLeadingComment()
   463  			l.prevSym.PushTrailingComment(c)
   464  		} else if commentStart == prevEnd+1 {
   465  			// comment is right after previous symbol; see if it is detached
   466  			// and if so re-attribute
   467  			singleLineStyle := strings.HasPrefix(c.Text, "//")
   468  			line := c.End.Line
   469  			groupEnd := -1
   470  			for i := 1; i < len(comments); i++ {
   471  				c := comments[i]
   472  				newGroup := false
   473  				if !singleLineStyle || c.Start.Line > line+1 {
   474  					// we've found a gap between comments, which means the
   475  					// previous comments were detached
   476  					newGroup = true
   477  				} else {
   478  					line = c.End.Line
   479  					singleLineStyle = strings.HasPrefix(comments[i].Text, "//")
   480  					if !singleLineStyle {
   481  						// we've found a switch from // comments to /*
   482  						// consider that a new group which means the
   483  						// previous comments were detached
   484  						newGroup = true
   485  					}
   486  				}
   487  				if newGroup {
   488  					groupEnd = i
   489  					break
   490  				}
   491  			}
   492  
   493  			if groupEnd == -1 {
   494  				// just one group of comments; we'll mark it as a trailing
   495  				// comment if it immediately follows previous symbol and is
   496  				// detached from current symbol
   497  				c1 := comments[0]
   498  				c2 := comments[len(comments)-1]
   499  				if c1.Start.Line <= prevEnd+1 && c2.End.Line < nStart-1 {
   500  					groupEnd = len(comments)
   501  				}
   502  			}
   503  
   504  			for i := 0; i < groupEnd; i++ {
   505  				l.prevSym.PushTrailingComment(n.PopLeadingComment())
   506  			}
   507  		}
   508  	}
   509  
   510  	l.prevSym = n
   511  }
   512  
   513  func (l *protoLex) setString(lval *protoSymType, val string) {
   514  	lval.s = ast.NewStringLiteralNode(val, l.newTokenInfo())
   515  	l.setPrev(lval.s, false)
   516  }
   517  
   518  func (l *protoLex) setIdent(lval *protoSymType, val string) {
   519  	lval.id = ast.NewIdentNode(val, l.newTokenInfo())
   520  	l.setPrev(lval.id, false)
   521  }
   522  
   523  func (l *protoLex) setInt(lval *protoSymType, val uint64) {
   524  	lval.i = ast.NewUintLiteralNode(val, l.newTokenInfo())
   525  	l.setPrev(lval.i, false)
   526  }
   527  
   528  func (l *protoLex) setFloat(lval *protoSymType, val float64) {
   529  	lval.f = ast.NewFloatLiteralNode(val, l.newTokenInfo())
   530  	l.setPrev(lval.f, false)
   531  }
   532  
   533  func (l *protoLex) setRune(lval *protoSymType, val rune) {
   534  	lval.b = ast.NewRuneNode(val, l.newTokenInfo())
   535  	l.setPrev(lval.b, val == '.')
   536  }
   537  
   538  func (l *protoLex) setError(lval *protoSymType, err error) {
   539  	lval.err = l.addSourceError(err)
   540  }
   541  
   542  func (l *protoLex) readNumber(sofar ...rune) string {
   543  	token := sofar
   544  	allowExpSign := false
   545  	for {
   546  		c, _, err := l.input.readRune()
   547  		if err != nil {
   548  			break
   549  		}
   550  		if (c == '-' || c == '+') && !allowExpSign {
   551  			l.input.unreadRune(c)
   552  			break
   553  		}
   554  		allowExpSign = false
   555  		if c != '.' && c != '_' && (c < '0' || c > '9') &&
   556  			(c < 'a' || c > 'z') && (c < 'A' || c > 'Z') &&
   557  			c != '-' && c != '+' {
   558  			// no more chars in the number token
   559  			l.input.unreadRune(c)
   560  			break
   561  		}
   562  		if c == 'e' || c == 'E' {
   563  			// scientific notation char can be followed by
   564  			// an exponent sign
   565  			allowExpSign = true
   566  		}
   567  		l.adjustPos(c)
   568  		token = append(token, c)
   569  	}
   570  	return string(token)
   571  }
   572  
   573  func numError(err error, kind, s string) error {
   574  	ne, ok := err.(*strconv.NumError)
   575  	if !ok {
   576  		return err
   577  	}
   578  	if ne.Err == strconv.ErrRange {
   579  		return fmt.Errorf("value out of range for %s: %s", kind, s)
   580  	}
   581  	// syntax error
   582  	return fmt.Errorf("invalid syntax in %s value: %s", kind, s)
   583  }
   584  
   585  func (l *protoLex) readIdentifier(sofar []rune) []rune {
   586  	token := sofar
   587  	for {
   588  		c, _, err := l.input.readRune()
   589  		if err != nil {
   590  			break
   591  		}
   592  		if c != '_' && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z') && (c < '0' || c > '9') {
   593  			l.input.unreadRune(c)
   594  			break
   595  		}
   596  		l.adjustPos(c)
   597  		token = append(token, c)
   598  	}
   599  	return token
   600  }
   601  
   602  func (l *protoLex) readStringLiteral(quote rune) (string, error) {
   603  	var buf bytes.Buffer
   604  	for {
   605  		c, _, err := l.input.readRune()
   606  		if err != nil {
   607  			if err == io.EOF {
   608  				err = io.ErrUnexpectedEOF
   609  			}
   610  			return "", err
   611  		}
   612  		if c == '\n' {
   613  			return "", errors.New("encountered end-of-line before end of string literal")
   614  		}
   615  		l.adjustPos(c)
   616  		if c == quote {
   617  			break
   618  		}
   619  		if c == 0 {
   620  			return "", errors.New("null character ('\\0') not allowed in string literal")
   621  		}
   622  		if c == '\\' {
   623  			// escape sequence
   624  			c, _, err = l.input.readRune()
   625  			if err != nil {
   626  				return "", err
   627  			}
   628  			l.adjustPos(c)
   629  			if c == 'x' || c == 'X' {
   630  				// hex escape
   631  				c, _, err := l.input.readRune()
   632  				if err != nil {
   633  					return "", err
   634  				}
   635  				l.adjustPos(c)
   636  				c2, _, err := l.input.readRune()
   637  				if err != nil {
   638  					return "", err
   639  				}
   640  				var hex string
   641  				if (c2 < '0' || c2 > '9') && (c2 < 'a' || c2 > 'f') && (c2 < 'A' || c2 > 'F') {
   642  					l.input.unreadRune(c2)
   643  					hex = string(c)
   644  				} else {
   645  					l.adjustPos(c2)
   646  					hex = string([]rune{c, c2})
   647  				}
   648  				i, err := strconv.ParseInt(hex, 16, 32)
   649  				if err != nil {
   650  					return "", fmt.Errorf("invalid hex escape: \\x%q", hex)
   651  				}
   652  				buf.WriteByte(byte(i))
   653  
   654  			} else if c >= '0' && c <= '7' {
   655  				// octal escape
   656  				c2, _, err := l.input.readRune()
   657  				if err != nil {
   658  					return "", err
   659  				}
   660  				var octal string
   661  				if c2 < '0' || c2 > '7' {
   662  					l.input.unreadRune(c2)
   663  					octal = string(c)
   664  				} else {
   665  					l.adjustPos(c2)
   666  					c3, _, err := l.input.readRune()
   667  					if err != nil {
   668  						return "", err
   669  					}
   670  					if c3 < '0' || c3 > '7' {
   671  						l.input.unreadRune(c3)
   672  						octal = string([]rune{c, c2})
   673  					} else {
   674  						l.adjustPos(c3)
   675  						octal = string([]rune{c, c2, c3})
   676  					}
   677  				}
   678  				i, err := strconv.ParseInt(octal, 8, 32)
   679  				if err != nil {
   680  					return "", fmt.Errorf("invalid octal escape: \\%q", octal)
   681  				}
   682  				if i > 0xff {
   683  					return "", fmt.Errorf("octal escape is out range, must be between 0 and 377: \\%q", octal)
   684  				}
   685  				buf.WriteByte(byte(i))
   686  
   687  			} else if c == 'u' {
   688  				// short unicode escape
   689  				u := make([]rune, 4)
   690  				for i := range u {
   691  					c, _, err := l.input.readRune()
   692  					if err != nil {
   693  						return "", err
   694  					}
   695  					l.adjustPos(c)
   696  					u[i] = c
   697  				}
   698  				i, err := strconv.ParseInt(string(u), 16, 32)
   699  				if err != nil {
   700  					return "", fmt.Errorf("invalid unicode escape: \\u%q", string(u))
   701  				}
   702  				buf.WriteRune(rune(i))
   703  
   704  			} else if c == 'U' {
   705  				// long unicode escape
   706  				u := make([]rune, 8)
   707  				for i := range u {
   708  					c, _, err := l.input.readRune()
   709  					if err != nil {
   710  						return "", err
   711  					}
   712  					l.adjustPos(c)
   713  					u[i] = c
   714  				}
   715  				i, err := strconv.ParseInt(string(u), 16, 32)
   716  				if err != nil {
   717  					return "", fmt.Errorf("invalid unicode escape: \\U%q", string(u))
   718  				}
   719  				if i > 0x10ffff || i < 0 {
   720  					return "", fmt.Errorf("unicode escape is out of range, must be between 0 and 0x10ffff: \\U%q", string(u))
   721  				}
   722  				buf.WriteRune(rune(i))
   723  
   724  			} else if c == 'a' {
   725  				buf.WriteByte('\a')
   726  			} else if c == 'b' {
   727  				buf.WriteByte('\b')
   728  			} else if c == 'f' {
   729  				buf.WriteByte('\f')
   730  			} else if c == 'n' {
   731  				buf.WriteByte('\n')
   732  			} else if c == 'r' {
   733  				buf.WriteByte('\r')
   734  			} else if c == 't' {
   735  				buf.WriteByte('\t')
   736  			} else if c == 'v' {
   737  				buf.WriteByte('\v')
   738  			} else if c == '\\' {
   739  				buf.WriteByte('\\')
   740  			} else if c == '\'' {
   741  				buf.WriteByte('\'')
   742  			} else if c == '"' {
   743  				buf.WriteByte('"')
   744  			} else if c == '?' {
   745  				buf.WriteByte('?')
   746  			} else {
   747  				return "", fmt.Errorf("invalid escape sequence: %q", "\\"+string(c))
   748  			}
   749  		} else {
   750  			buf.WriteRune(c)
   751  		}
   752  	}
   753  	return buf.String(), nil
   754  }
   755  
   756  func (l *protoLex) skipToEndOfLineComment(lval *protoSymType) (ok, hasErr bool) {
   757  	for {
   758  		c, _, err := l.input.readRune()
   759  		if err != nil {
   760  			return false, false
   761  		}
   762  		switch c {
   763  		case '\n':
   764  			return true, false
   765  		case 0:
   766  			l.setError(lval, errors.New("invalid control character"))
   767  			return false, true
   768  		}
   769  		l.adjustPos(c)
   770  	}
   771  }
   772  
   773  func (l *protoLex) skipToEndOfBlockComment(lval *protoSymType) (ok, hasErr bool) {
   774  	for {
   775  		c, _, err := l.input.readRune()
   776  		if err != nil {
   777  			return false, false
   778  		}
   779  		if c == 0 {
   780  			l.setError(lval, errors.New("invalid control character"))
   781  			return false, true
   782  		}
   783  		l.adjustPos(c)
   784  		if c == '*' {
   785  			c, _, err := l.input.readRune()
   786  			if err != nil {
   787  				return false, false
   788  			}
   789  			if c == '/' {
   790  				l.adjustPos(c)
   791  				return true, false
   792  			}
   793  			l.input.unreadRune(c)
   794  		}
   795  	}
   796  }
   797  
   798  func (l *protoLex) addSourceError(err error) ErrorWithPos {
   799  	ewp, ok := err.(ErrorWithPos)
   800  	if !ok {
   801  		ewp = ErrorWithSourcePos{Pos: l.prev(), Underlying: err}
   802  	}
   803  	_ = l.errs.handleError(ewp)
   804  	return ewp
   805  }
   806  
   807  func (l *protoLex) Error(s string) {
   808  	_ = l.addSourceError(errors.New(s))
   809  }