github.com/bakjos/protoreflect@v1.9.2/desc/protoparse/lexer.go (about)

     1  package protoparse
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"strconv"
    10  	"strings"
    11  	"unicode/utf8"
    12  
    13  	"github.com/bakjos/protoreflect/desc/protoparse/ast"
    14  )
    15  
    16  type runeReader struct {
    17  	rr     *bufio.Reader
    18  	marked []rune
    19  	unread []rune
    20  	err    error
    21  }
    22  
    23  func (rr *runeReader) readRune() (r rune, size int, err error) {
    24  	if rr.err != nil {
    25  		return 0, 0, rr.err
    26  	}
    27  	if len(rr.unread) > 0 {
    28  		r := rr.unread[len(rr.unread)-1]
    29  		rr.unread = rr.unread[:len(rr.unread)-1]
    30  		if rr.marked != nil {
    31  			rr.marked = append(rr.marked, r)
    32  		}
    33  		return r, utf8.RuneLen(r), nil
    34  	}
    35  	r, sz, err := rr.rr.ReadRune()
    36  	if err != nil {
    37  		rr.err = err
    38  	} else if rr.marked != nil {
    39  		rr.marked = append(rr.marked, r)
    40  	}
    41  	return r, sz, err
    42  }
    43  
    44  func (rr *runeReader) unreadRune(r rune) {
    45  	if rr.marked != nil {
    46  		if rr.marked[len(rr.marked)-1] != r {
    47  			panic("unread rune is not the same as last marked rune!")
    48  		}
    49  		rr.marked = rr.marked[:len(rr.marked)-1]
    50  	}
    51  	rr.unread = append(rr.unread, r)
    52  }
    53  
    54  func (rr *runeReader) startMark(initial rune) {
    55  	rr.marked = []rune{initial}
    56  }
    57  
    58  func (rr *runeReader) endMark() string {
    59  	m := string(rr.marked)
    60  	rr.marked = rr.marked[:0]
    61  	return m
    62  }
    63  
    64  func lexError(l protoLexer, pos *SourcePos, err string) {
    65  	pl := l.(*protoLex)
    66  	_ = pl.errs.handleErrorWithPos(pos, err)
    67  }
    68  
    69  type protoLex struct {
    70  	filename string
    71  	input    *runeReader
    72  	errs     *errorHandler
    73  	res      *ast.FileNode
    74  
    75  	lineNo int
    76  	colNo  int
    77  	offset int
    78  
    79  	prevSym ast.TerminalNode
    80  	eof     ast.TerminalNode
    81  
    82  	prevLineNo int
    83  	prevColNo  int
    84  	prevOffset int
    85  	comments   []ast.Comment
    86  	ws         []rune
    87  }
    88  
    89  var utf8Bom = []byte{0xEF, 0xBB, 0xBF}
    90  
    91  func newLexer(in io.Reader, filename string, errs *errorHandler) *protoLex {
    92  	br := bufio.NewReader(in)
    93  
    94  	// if file has UTF8 byte order marker preface, consume it
    95  	marker, err := br.Peek(3)
    96  	if err == nil && bytes.Equal(marker, utf8Bom) {
    97  		_, _ = br.Discard(3)
    98  	}
    99  
   100  	return &protoLex{
   101  		input:    &runeReader{rr: br},
   102  		filename: filename,
   103  		errs:     errs,
   104  	}
   105  }
   106  
   107  var keywords = map[string]int{
   108  	"syntax":     _SYNTAX,
   109  	"import":     _IMPORT,
   110  	"weak":       _WEAK,
   111  	"public":     _PUBLIC,
   112  	"package":    _PACKAGE,
   113  	"option":     _OPTION,
   114  	"true":       _TRUE,
   115  	"false":      _FALSE,
   116  	"inf":        _INF,
   117  	"nan":        _NAN,
   118  	"repeated":   _REPEATED,
   119  	"optional":   _OPTIONAL,
   120  	"required":   _REQUIRED,
   121  	"double":     _DOUBLE,
   122  	"float":      _FLOAT,
   123  	"int32":      _INT32,
   124  	"int64":      _INT64,
   125  	"uint32":     _UINT32,
   126  	"uint64":     _UINT64,
   127  	"sint32":     _SINT32,
   128  	"sint64":     _SINT64,
   129  	"fixed32":    _FIXED32,
   130  	"fixed64":    _FIXED64,
   131  	"sfixed32":   _SFIXED32,
   132  	"sfixed64":   _SFIXED64,
   133  	"bool":       _BOOL,
   134  	"string":     _STRING,
   135  	"bytes":      _BYTES,
   136  	"group":      _GROUP,
   137  	"oneof":      _ONEOF,
   138  	"map":        _MAP,
   139  	"extensions": _EXTENSIONS,
   140  	"to":         _TO,
   141  	"max":        _MAX,
   142  	"reserved":   _RESERVED,
   143  	"enum":       _ENUM,
   144  	"message":    _MESSAGE,
   145  	"extend":     _EXTEND,
   146  	"service":    _SERVICE,
   147  	"rpc":        _RPC,
   148  	"stream":     _STREAM,
   149  	"returns":    _RETURNS,
   150  }
   151  
   152  func (l *protoLex) cur() SourcePos {
   153  	return SourcePos{
   154  		Filename: l.filename,
   155  		Offset:   l.offset,
   156  		Line:     l.lineNo + 1,
   157  		Col:      l.colNo + 1,
   158  	}
   159  }
   160  
   161  func (l *protoLex) adjustPos(consumedChars ...rune) {
   162  	for _, c := range consumedChars {
   163  		switch c {
   164  		case '\n':
   165  			// new line, back to first column
   166  			l.colNo = 0
   167  			l.lineNo++
   168  		case '\r':
   169  			// no adjustment
   170  		case '\t':
   171  			// advance to next tab stop
   172  			mod := l.colNo % 8
   173  			l.colNo += 8 - mod
   174  		default:
   175  			l.colNo++
   176  		}
   177  	}
   178  }
   179  
   180  func (l *protoLex) prev() *SourcePos {
   181  	if l.prevSym == nil {
   182  		return &SourcePos{
   183  			Filename: l.filename,
   184  			Offset:   0,
   185  			Line:     1,
   186  			Col:      1,
   187  		}
   188  	}
   189  	return l.prevSym.Start()
   190  }
   191  
   192  func (l *protoLex) Lex(lval *protoSymType) int {
   193  	if l.errs.err != nil {
   194  		// if error reporter already returned non-nil error,
   195  		// we can skip the rest of the input
   196  		return 0
   197  	}
   198  
   199  	l.prevLineNo = l.lineNo
   200  	l.prevColNo = l.colNo
   201  	l.prevOffset = l.offset
   202  	l.comments = nil
   203  	l.ws = nil
   204  	l.input.endMark() // reset, just in case
   205  
   206  	for {
   207  		c, n, err := l.input.readRune()
   208  		if err == io.EOF {
   209  			// we're not actually returning a rune, but this will associate
   210  			// accumulated comments as a trailing comment on last symbol
   211  			// (if appropriate)
   212  			l.setRune(lval, 0)
   213  			l.eof = lval.b
   214  			return 0
   215  		} else if err != nil {
   216  			// we don't call setError because we don't want it wrapped
   217  			// with a source position because it's I/O, not syntax
   218  			lval.err = err
   219  			_ = l.errs.handleError(err)
   220  			return _ERROR
   221  		}
   222  
   223  		l.prevLineNo = l.lineNo
   224  		l.prevColNo = l.colNo
   225  		l.prevOffset = l.offset
   226  
   227  		l.offset += n
   228  		l.adjustPos(c)
   229  		if strings.ContainsRune("\n\r\t ", c) {
   230  			l.ws = append(l.ws, c)
   231  			continue
   232  		}
   233  
   234  		l.input.startMark(c)
   235  		if c == '.' {
   236  			// decimal literals could start with a dot
   237  			cn, _, err := l.input.readRune()
   238  			if err != nil {
   239  				l.setRune(lval, c)
   240  				return int(c)
   241  			}
   242  			if cn >= '0' && cn <= '9' {
   243  				l.adjustPos(cn)
   244  				token := []rune{c, cn}
   245  				token = l.readNumber(token, false, true)
   246  				f, err := strconv.ParseFloat(string(token), 64)
   247  				if err != nil {
   248  					l.setError(lval, err)
   249  					return _ERROR
   250  				}
   251  				l.setFloat(lval, f)
   252  				return _FLOAT_LIT
   253  			}
   254  			l.input.unreadRune(cn)
   255  			l.setRune(lval, c)
   256  			return int(c)
   257  		}
   258  
   259  		if c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') {
   260  			// identifier
   261  			token := []rune{c}
   262  			token = l.readIdentifier(token)
   263  			str := string(token)
   264  			if t, ok := keywords[str]; ok {
   265  				l.setIdent(lval, str)
   266  				return t
   267  			}
   268  			l.setIdent(lval, str)
   269  			return _NAME
   270  		}
   271  
   272  		if c >= '0' && c <= '9' {
   273  			// integer or float literal
   274  			if c == '0' {
   275  				cn, _, err := l.input.readRune()
   276  				if err != nil {
   277  					l.setInt(lval, 0)
   278  					return _INT_LIT
   279  				}
   280  				if cn == 'x' || cn == 'X' {
   281  					cnn, _, err := l.input.readRune()
   282  					if err != nil {
   283  						l.input.unreadRune(cn)
   284  						l.setInt(lval, 0)
   285  						return _INT_LIT
   286  					}
   287  					if (cnn >= '0' && cnn <= '9') || (cnn >= 'a' && cnn <= 'f') || (cnn >= 'A' && cnn <= 'F') {
   288  						// hexadecimal!
   289  						l.adjustPos(cn, cnn)
   290  						token := []rune{cnn}
   291  						token = l.readHexNumber(token)
   292  						ui, err := strconv.ParseUint(string(token), 16, 64)
   293  						if err != nil {
   294  							l.setError(lval, err)
   295  							return _ERROR
   296  						}
   297  						l.setInt(lval, ui)
   298  						return _INT_LIT
   299  					}
   300  					l.input.unreadRune(cnn)
   301  					l.input.unreadRune(cn)
   302  					l.setInt(lval, 0)
   303  					return _INT_LIT
   304  				} else {
   305  					l.input.unreadRune(cn)
   306  				}
   307  			}
   308  			token := []rune{c}
   309  			token = l.readNumber(token, true, true)
   310  			numstr := string(token)
   311  			if strings.Contains(numstr, ".") || strings.Contains(numstr, "e") || strings.Contains(numstr, "E") {
   312  				// floating point!
   313  				f, err := strconv.ParseFloat(numstr, 64)
   314  				if err != nil {
   315  					l.setError(lval, err)
   316  					return _ERROR
   317  				}
   318  				l.setFloat(lval, f)
   319  				return _FLOAT_LIT
   320  			}
   321  			// integer! (decimal or octal)
   322  			ui, err := strconv.ParseUint(numstr, 0, 64)
   323  			if err != nil {
   324  				if numErr, ok := err.(*strconv.NumError); ok && numErr.Err == strconv.ErrRange {
   325  					// if it's too big to be an int, parse it as a float
   326  					var f float64
   327  					f, err = strconv.ParseFloat(numstr, 64)
   328  					if err == nil {
   329  						l.setFloat(lval, f)
   330  						return _FLOAT_LIT
   331  					}
   332  				}
   333  				l.setError(lval, err)
   334  				return _ERROR
   335  			}
   336  			l.setInt(lval, ui)
   337  			return _INT_LIT
   338  		}
   339  
   340  		if c == '\'' || c == '"' {
   341  			// string literal
   342  			str, err := l.readStringLiteral(c)
   343  			if err != nil {
   344  				l.setError(lval, err)
   345  				return _ERROR
   346  			}
   347  			l.setString(lval, str)
   348  			return _STRING_LIT
   349  		}
   350  
   351  		if c == '/' {
   352  			// comment
   353  			cn, _, err := l.input.readRune()
   354  			if err != nil {
   355  				l.setRune(lval, '/')
   356  				return int(c)
   357  			}
   358  			if cn == '/' {
   359  				l.adjustPos(cn)
   360  				hitNewline := l.skipToEndOfLineComment()
   361  				comment := l.newComment()
   362  				comment.PosRange.End.Col++
   363  				if hitNewline {
   364  					// we don't do this inside of skipToEndOfLineComment
   365  					// because we want to know the length of previous
   366  					// line for calculation above
   367  					l.adjustPos('\n')
   368  				}
   369  				l.comments = append(l.comments, comment)
   370  				continue
   371  			}
   372  			if cn == '*' {
   373  				l.adjustPos(cn)
   374  				if ok := l.skipToEndOfBlockComment(); !ok {
   375  					l.setError(lval, errors.New("block comment never terminates, unexpected EOF"))
   376  					return _ERROR
   377  				} else {
   378  					l.comments = append(l.comments, l.newComment())
   379  				}
   380  				continue
   381  			}
   382  			l.input.unreadRune(cn)
   383  		}
   384  
   385  		l.setRune(lval, c)
   386  		return int(c)
   387  	}
   388  }
   389  
   390  func (l *protoLex) posRange() ast.PosRange {
   391  	return ast.PosRange{
   392  		Start: SourcePos{
   393  			Filename: l.filename,
   394  			Offset:   l.prevOffset,
   395  			Line:     l.prevLineNo + 1,
   396  			Col:      l.prevColNo + 1,
   397  		},
   398  		End: l.cur(),
   399  	}
   400  }
   401  
   402  func (l *protoLex) newComment() ast.Comment {
   403  	ws := string(l.ws)
   404  	l.ws = l.ws[:0]
   405  	return ast.Comment{
   406  		PosRange:          l.posRange(),
   407  		LeadingWhitespace: ws,
   408  		Text:              l.input.endMark(),
   409  	}
   410  }
   411  
   412  func (l *protoLex) newTokenInfo() ast.TokenInfo {
   413  	ws := string(l.ws)
   414  	l.ws = nil
   415  	return ast.TokenInfo{
   416  		PosRange:          l.posRange(),
   417  		LeadingComments:   l.comments,
   418  		LeadingWhitespace: ws,
   419  		RawText:           l.input.endMark(),
   420  	}
   421  }
   422  
   423  func (l *protoLex) setPrev(n ast.TerminalNode, isDot bool) {
   424  	nStart := n.Start().Line
   425  	if _, ok := n.(*ast.RuneNode); ok {
   426  		// This is really gross, but there are many cases where we don't want
   427  		// to attribute comments to punctuation (like commas, equals, semicolons)
   428  		// and would instead prefer to attribute comments to a more meaningful
   429  		// element in the AST.
   430  		//
   431  		// So if it's a simple node OTHER THAN PERIOD (since that is not just
   432  		// punctuation but typically part of a qualified identifier), don't
   433  		// attribute comments to it. We do that with this TOTAL HACK: adjusting
   434  		// the start line makes leading comments appear detached so logic below
   435  		// will naturally associated trailing comment to previous symbol
   436  		if !isDot {
   437  			nStart += 2
   438  		}
   439  	}
   440  	if l.prevSym != nil && len(n.LeadingComments()) > 0 && l.prevSym.End().Line < nStart {
   441  		// we may need to re-attribute the first comment to
   442  		// instead be previous node's trailing comment
   443  		prevEnd := l.prevSym.End().Line
   444  		comments := n.LeadingComments()
   445  		c := comments[0]
   446  		commentStart := c.Start.Line
   447  		if commentStart == prevEnd {
   448  			// comment is on same line as previous symbol
   449  			n.PopLeadingComment()
   450  			l.prevSym.PushTrailingComment(c)
   451  		} else if commentStart == prevEnd+1 {
   452  			// comment is right after previous symbol; see if it is detached
   453  			// and if so re-attribute
   454  			singleLineStyle := strings.HasPrefix(c.Text, "//")
   455  			line := c.End.Line
   456  			groupEnd := -1
   457  			for i := 1; i < len(comments); i++ {
   458  				c := comments[i]
   459  				newGroup := false
   460  				if !singleLineStyle || c.Start.Line > line+1 {
   461  					// we've found a gap between comments, which means the
   462  					// previous comments were detached
   463  					newGroup = true
   464  				} else {
   465  					line = c.End.Line
   466  					singleLineStyle = strings.HasPrefix(comments[i].Text, "//")
   467  					if !singleLineStyle {
   468  						// we've found a switch from // comments to /*
   469  						// consider that a new group which means the
   470  						// previous comments were detached
   471  						newGroup = true
   472  					}
   473  				}
   474  				if newGroup {
   475  					groupEnd = i
   476  					break
   477  				}
   478  			}
   479  
   480  			if groupEnd == -1 {
   481  				// just one group of comments; we'll mark it as a trailing
   482  				// comment if it immediately follows previous symbol and is
   483  				// detached from current symbol
   484  				c1 := comments[0]
   485  				c2 := comments[len(comments)-1]
   486  				if c1.Start.Line <= prevEnd+1 && c2.End.Line < nStart-1 {
   487  					groupEnd = len(comments)
   488  				}
   489  			}
   490  
   491  			for i := 0; i < groupEnd; i++ {
   492  				l.prevSym.PushTrailingComment(n.PopLeadingComment())
   493  			}
   494  		}
   495  	}
   496  
   497  	l.prevSym = n
   498  }
   499  
   500  func (l *protoLex) setString(lval *protoSymType, val string) {
   501  	lval.s = ast.NewStringLiteralNode(val, l.newTokenInfo())
   502  	l.setPrev(lval.s, false)
   503  }
   504  
   505  func (l *protoLex) setIdent(lval *protoSymType, val string) {
   506  	lval.id = ast.NewIdentNode(val, l.newTokenInfo())
   507  	l.setPrev(lval.id, false)
   508  }
   509  
   510  func (l *protoLex) setInt(lval *protoSymType, val uint64) {
   511  	lval.i = ast.NewUintLiteralNode(val, l.newTokenInfo())
   512  	l.setPrev(lval.i, false)
   513  }
   514  
   515  func (l *protoLex) setFloat(lval *protoSymType, val float64) {
   516  	lval.f = ast.NewFloatLiteralNode(val, l.newTokenInfo())
   517  	l.setPrev(lval.f, false)
   518  }
   519  
   520  func (l *protoLex) setRune(lval *protoSymType, val rune) {
   521  	lval.b = ast.NewRuneNode(val, l.newTokenInfo())
   522  	l.setPrev(lval.b, val == '.')
   523  }
   524  
   525  func (l *protoLex) setError(lval *protoSymType, err error) {
   526  	lval.err = l.addSourceError(err)
   527  }
   528  
   529  func (l *protoLex) readNumber(sofar []rune, allowDot bool, allowExp bool) []rune {
   530  	token := sofar
   531  	for {
   532  		c, _, err := l.input.readRune()
   533  		if err != nil {
   534  			break
   535  		}
   536  		if c == '.' {
   537  			if !allowDot {
   538  				l.input.unreadRune(c)
   539  				break
   540  			}
   541  			allowDot = false
   542  		} else if c == 'e' || c == 'E' {
   543  			if !allowExp {
   544  				l.input.unreadRune(c)
   545  				break
   546  			}
   547  			allowExp = false
   548  			cn, _, err := l.input.readRune()
   549  			if err != nil {
   550  				l.input.unreadRune(c)
   551  				break
   552  			}
   553  			if cn == '-' || cn == '+' {
   554  				cnn, _, err := l.input.readRune()
   555  				if err != nil {
   556  					l.input.unreadRune(cn)
   557  					l.input.unreadRune(c)
   558  					break
   559  				}
   560  				if cnn < '0' || cnn > '9' {
   561  					l.input.unreadRune(cnn)
   562  					l.input.unreadRune(cn)
   563  					l.input.unreadRune(c)
   564  					break
   565  				}
   566  				l.adjustPos(c)
   567  				token = append(token, c)
   568  				c, cn = cn, cnn
   569  			} else if cn < '0' || cn > '9' {
   570  				l.input.unreadRune(cn)
   571  				l.input.unreadRune(c)
   572  				break
   573  			}
   574  			l.adjustPos(c)
   575  			token = append(token, c)
   576  			c = cn
   577  		} else if c < '0' || c > '9' {
   578  			l.input.unreadRune(c)
   579  			break
   580  		}
   581  		l.adjustPos(c)
   582  		token = append(token, c)
   583  	}
   584  	return token
   585  }
   586  
   587  func (l *protoLex) readHexNumber(sofar []rune) []rune {
   588  	token := sofar
   589  	for {
   590  		c, _, err := l.input.readRune()
   591  		if err != nil {
   592  			break
   593  		}
   594  		if (c < 'a' || c > 'f') && (c < 'A' || c > 'F') && (c < '0' || c > '9') {
   595  			l.input.unreadRune(c)
   596  			break
   597  		}
   598  		l.adjustPos(c)
   599  		token = append(token, c)
   600  	}
   601  	return token
   602  }
   603  
   604  func (l *protoLex) readIdentifier(sofar []rune) []rune {
   605  	token := sofar
   606  	for {
   607  		c, _, err := l.input.readRune()
   608  		if err != nil {
   609  			break
   610  		}
   611  		if c != '_' && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z') && (c < '0' || c > '9') {
   612  			l.input.unreadRune(c)
   613  			break
   614  		}
   615  		l.adjustPos(c)
   616  		token = append(token, c)
   617  	}
   618  	return token
   619  }
   620  
   621  func (l *protoLex) readStringLiteral(quote rune) (string, error) {
   622  	var buf bytes.Buffer
   623  	for {
   624  		c, _, err := l.input.readRune()
   625  		if err != nil {
   626  			if err == io.EOF {
   627  				err = io.ErrUnexpectedEOF
   628  			}
   629  			return "", err
   630  		}
   631  		if c == '\n' {
   632  			return "", errors.New("encountered end-of-line before end of string literal")
   633  		}
   634  		l.adjustPos(c)
   635  		if c == quote {
   636  			break
   637  		}
   638  		if c == 0 {
   639  			return "", errors.New("null character ('\\0') not allowed in string literal")
   640  		}
   641  		if c == '\\' {
   642  			// escape sequence
   643  			c, _, err = l.input.readRune()
   644  			if err != nil {
   645  				return "", err
   646  			}
   647  			l.adjustPos(c)
   648  			if c == 'x' || c == 'X' {
   649  				// hex escape
   650  				c, _, err := l.input.readRune()
   651  				if err != nil {
   652  					return "", err
   653  				}
   654  				l.adjustPos(c)
   655  				c2, _, err := l.input.readRune()
   656  				if err != nil {
   657  					return "", err
   658  				}
   659  				var hex string
   660  				if (c2 < '0' || c2 > '9') && (c2 < 'a' || c2 > 'f') && (c2 < 'A' || c2 > 'F') {
   661  					l.input.unreadRune(c2)
   662  					hex = string(c)
   663  				} else {
   664  					l.adjustPos(c2)
   665  					hex = string([]rune{c, c2})
   666  				}
   667  				i, err := strconv.ParseInt(hex, 16, 32)
   668  				if err != nil {
   669  					return "", fmt.Errorf("invalid hex escape: \\x%q", hex)
   670  				}
   671  				buf.WriteByte(byte(i))
   672  
   673  			} else if c >= '0' && c <= '7' {
   674  				// octal escape
   675  				c2, _, err := l.input.readRune()
   676  				if err != nil {
   677  					return "", err
   678  				}
   679  				var octal string
   680  				if c2 < '0' || c2 > '7' {
   681  					l.input.unreadRune(c2)
   682  					octal = string(c)
   683  				} else {
   684  					l.adjustPos(c2)
   685  					c3, _, err := l.input.readRune()
   686  					if err != nil {
   687  						return "", err
   688  					}
   689  					if c3 < '0' || c3 > '7' {
   690  						l.input.unreadRune(c3)
   691  						octal = string([]rune{c, c2})
   692  					} else {
   693  						l.adjustPos(c3)
   694  						octal = string([]rune{c, c2, c3})
   695  					}
   696  				}
   697  				i, err := strconv.ParseInt(octal, 8, 32)
   698  				if err != nil {
   699  					return "", fmt.Errorf("invalid octal escape: \\%q", octal)
   700  				}
   701  				if i > 0xff {
   702  					return "", fmt.Errorf("octal escape is out range, must be between 0 and 377: \\%q", octal)
   703  				}
   704  				buf.WriteByte(byte(i))
   705  
   706  			} else if c == 'u' {
   707  				// short unicode escape
   708  				u := make([]rune, 4)
   709  				for i := range u {
   710  					c, _, err := l.input.readRune()
   711  					if err != nil {
   712  						return "", err
   713  					}
   714  					l.adjustPos(c)
   715  					u[i] = c
   716  				}
   717  				i, err := strconv.ParseInt(string(u), 16, 32)
   718  				if err != nil {
   719  					return "", fmt.Errorf("invalid unicode escape: \\u%q", string(u))
   720  				}
   721  				buf.WriteRune(rune(i))
   722  
   723  			} else if c == 'U' {
   724  				// long unicode escape
   725  				u := make([]rune, 8)
   726  				for i := range u {
   727  					c, _, err := l.input.readRune()
   728  					if err != nil {
   729  						return "", err
   730  					}
   731  					l.adjustPos(c)
   732  					u[i] = c
   733  				}
   734  				i, err := strconv.ParseInt(string(u), 16, 32)
   735  				if err != nil {
   736  					return "", fmt.Errorf("invalid unicode escape: \\U%q", string(u))
   737  				}
   738  				if i > 0x10ffff || i < 0 {
   739  					return "", fmt.Errorf("unicode escape is out of range, must be between 0 and 0x10ffff: \\U%q", string(u))
   740  				}
   741  				buf.WriteRune(rune(i))
   742  
   743  			} else if c == 'a' {
   744  				buf.WriteByte('\a')
   745  			} else if c == 'b' {
   746  				buf.WriteByte('\b')
   747  			} else if c == 'f' {
   748  				buf.WriteByte('\f')
   749  			} else if c == 'n' {
   750  				buf.WriteByte('\n')
   751  			} else if c == 'r' {
   752  				buf.WriteByte('\r')
   753  			} else if c == 't' {
   754  				buf.WriteByte('\t')
   755  			} else if c == 'v' {
   756  				buf.WriteByte('\v')
   757  			} else if c == '\\' {
   758  				buf.WriteByte('\\')
   759  			} else if c == '\'' {
   760  				buf.WriteByte('\'')
   761  			} else if c == '"' {
   762  				buf.WriteByte('"')
   763  			} else if c == '?' {
   764  				buf.WriteByte('?')
   765  			} else {
   766  				return "", fmt.Errorf("invalid escape sequence: %q", "\\"+string(c))
   767  			}
   768  		} else {
   769  			buf.WriteRune(c)
   770  		}
   771  	}
   772  	return buf.String(), nil
   773  }
   774  
   775  func (l *protoLex) skipToEndOfLineComment() bool {
   776  	for {
   777  		c, _, err := l.input.readRune()
   778  		if err != nil {
   779  			return false
   780  		}
   781  		if c == '\n' {
   782  			return true
   783  		}
   784  		l.adjustPos(c)
   785  	}
   786  }
   787  
   788  func (l *protoLex) skipToEndOfBlockComment() bool {
   789  	for {
   790  		c, _, err := l.input.readRune()
   791  		if err != nil {
   792  			return false
   793  		}
   794  		l.adjustPos(c)
   795  		if c == '*' {
   796  			c, _, err := l.input.readRune()
   797  			if err != nil {
   798  				return false
   799  			}
   800  			if c == '/' {
   801  				l.adjustPos(c)
   802  				return true
   803  			}
   804  			l.input.unreadRune(c)
   805  		}
   806  	}
   807  }
   808  
   809  func (l *protoLex) addSourceError(err error) ErrorWithPos {
   810  	ewp, ok := err.(ErrorWithPos)
   811  	if !ok {
   812  		ewp = ErrorWithSourcePos{Pos: l.prev(), Underlying: err}
   813  	}
   814  	_ = l.errs.handleError(ewp)
   815  	return ewp
   816  }
   817  
   818  func (l *protoLex) Error(s string) {
   819  	_ = l.addSourceError(errors.New(s))
   820  }