github.com/syumai/protoreflect@v1.7.1-0.20200810020253-2ac7e3b3a321/desc/protoparse/parser.go

github.com/syumai/protoreflect@v1.7.1-0.20200810020253-2ac7e3b3a321/desc/protoparse/parser.go (about)

     1  package protoparse
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"io/ioutil"
     9  	"math"
    10  	"os"
    11  	"path/filepath"
    12  	"sort"
    13  	"strings"
    14  
    15  	"github.com/golang/protobuf/proto"
    16  	dpb "github.com/golang/protobuf/protoc-gen-go/descriptor"
    17  
    18  	"github.com/syumai/protoreflect/desc"
    19  	"github.com/syumai/protoreflect/desc/internal"
    20  )
    21  
    22  //go:generate goyacc -o proto.y.go -p proto proto.y
    23  
    24  func init() {
    25  	protoErrorVerbose = true
    26  
    27  	// fix up the generated "token name" array so that error messages are nicer
    28  	setTokenName(_STRING_LIT, "string literal")
    29  	setTokenName(_INT_LIT, "int literal")
    30  	setTokenName(_FLOAT_LIT, "float literal")
    31  	setTokenName(_NAME, "identifier")
    32  	setTokenName(_ERROR, "error")
    33  	// for keywords, just show the keyword itself wrapped in quotes
    34  	for str, i := range keywords {
    35  		setTokenName(i, fmt.Sprintf(`"%s"`, str))
    36  	}
    37  }
    38  
    39  func setTokenName(token int, text string) {
    40  	// NB: this is based on logic in generated parse code that translates the
    41  	// int returned from the lexer into an internal token number.
    42  	var intern int
    43  	if token < len(protoTok1) {
    44  		intern = protoTok1[token]
    45  	} else {
    46  		if token >= protoPrivate {
    47  			if token < protoPrivate+len(protoTok2) {
    48  				intern = protoTok2[token-protoPrivate]
    49  			}
    50  		}
    51  		if intern == 0 {
    52  			for i := 0; i+1 < len(protoTok3); i += 2 {
    53  				if protoTok3[i] == token {
    54  					intern = protoTok3[i+1]
    55  					break
    56  				}
    57  			}
    58  		}
    59  	}
    60  
    61  	if intern >= 1 && intern-1 < len(protoToknames) {
    62  		protoToknames[intern-1] = text
    63  		return
    64  	}
    65  
    66  	panic(fmt.Sprintf("Unknown token value: %d", token))
    67  }
    68  
    69  // FileAccessor is an abstraction for opening proto source files. It takes the
    70  // name of the file to open and returns either the input reader or an error.
    71  type FileAccessor func(filename string) (io.ReadCloser, error)
    72  
    73  // FileContentsFromMap returns a FileAccessor that uses the given map of file
    74  // contents. This allows proto source files to be constructed in memory and
    75  // easily supplied to a parser. The map keys are the paths to the proto source
    76  // files, and the values are the actual proto source contents.
    77  func FileContentsFromMap(files map[string]string) FileAccessor {
    78  	return func(filename string) (io.ReadCloser, error) {
    79  		contents, ok := files[filename]
    80  		if !ok {
    81  			return nil, os.ErrNotExist
    82  		}
    83  		return ioutil.NopCloser(strings.NewReader(contents)), nil
    84  	}
    85  }
    86  
    87  // Parser parses proto source into descriptors.
    88  type Parser struct {
    89  	// The paths used to search for dependencies that are referenced in import
    90  	// statements in proto source files. If no import paths are provided then
    91  	// "." (current directory) is assumed to be the only import path.
    92  	//
    93  	// This setting is only used during ParseFiles operations. Since calls to
    94  	// ParseFilesButDoNotLink do not link, there is no need to load and parse
    95  	// dependencies.
    96  	ImportPaths []string
    97  
    98  	// If true, the supplied file names/paths need not necessarily match how the
    99  	// files are referenced in import statements. The parser will attempt to
   100  	// match import statements to supplied paths, "guessing" the import paths
   101  	// for the files. Note that this inference is not perfect and link errors
   102  	// could result. It works best when all proto files are organized such that
   103  	// a single import path can be inferred (e.g. all files under a single tree
   104  	// with import statements all being relative to the root of this tree).
   105  	InferImportPaths bool
   106  
   107  	// LookupImport is a function that accepts a filename and
   108  	// returns a file descriptor, which will be consulted when resolving imports.
   109  	// This allows a compiled Go proto in another Go module to be referenced
   110  	// in the proto(s) being parsed.
   111  	//
   112  	// In the event of a filename collision, Accessor is consulted first,
   113  	// then LookupImport is consulted, and finally the well-known protos
   114  	// are used.
   115  	//
   116  	// For example, in order to automatically look up compiled Go protos that
   117  	// have been imported and be able to use them as imports, set this to
   118  	// desc.LoadFileDescriptor.
   119  	LookupImport func(string) (*desc.FileDescriptor, error)
   120  
   121  	// LookupImportProto has the same functionality as LookupImport, however it returns
   122  	// a FileDescriptorProto instead of a FileDescriptor.
   123  	//
   124  	// It is an error to set both LookupImport and LookupImportProto.
   125  	LookupImportProto func(string) (*dpb.FileDescriptorProto, error)
   126  
   127  	// Used to create a reader for a given filename, when loading proto source
   128  	// file contents. If unset, os.Open is used. If ImportPaths is also empty
   129  	// then relative paths are will be relative to the process's current working
   130  	// directory.
   131  	Accessor FileAccessor
   132  
   133  	// If true, the resulting file descriptors will retain source code info,
   134  	// that maps elements to their location in the source files as well as
   135  	// includes comments found during parsing (and attributed to elements of
   136  	// the source file).
   137  	IncludeSourceCodeInfo bool
   138  
   139  	// If true, the results from ParseFilesButDoNotLink will be passed through
   140  	// some additional validations. But only constraints that do not require
   141  	// linking can be checked. These include proto2 vs. proto3 language features,
   142  	// looking for incorrect usage of reserved names or tags, and ensuring that
   143  	// fields have unique tags and that enum values have unique numbers (unless
   144  	// the enum allows aliases).
   145  	ValidateUnlinkedFiles bool
   146  
   147  	// If true, the results from ParseFilesButDoNotLink will have options
   148  	// interpreted. Any uninterpretable options (including any custom options or
   149  	// options that refer to message and enum types, which can only be
   150  	// interpreted after linking) will be left in uninterpreted_options. Also,
   151  	// the "default" pseudo-option for fields can only be interpreted for scalar
   152  	// fields, excluding enums. (Interpreting default values for enum fields
   153  	// requires resolving enum names, which requires linking.)
   154  	InterpretOptionsInUnlinkedFiles bool
   155  
   156  	// A custom reporter of syntax and link errors. If not specified, the
   157  	// default reporter just returns the reported error, which causes parsing
   158  	// to abort after encountering a single error.
   159  	//
   160  	// The reporter is not invoked for system or I/O errors, only for syntax and
   161  	// link errors.
   162  	ErrorReporter ErrorReporter
   163  
   164  	// A custom reporter of warnings. If not specified, warning messages are ignored.
   165  	WarningReporter WarningReporter
   166  }
   167  
   168  // ParseFiles parses the named files into descriptors. The returned slice has
   169  // the same number of entries as the give filenames, in the same order. So the
   170  // first returned descriptor corresponds to the first given name, and so on.
   171  //
   172  // All dependencies for all specified files (including transitive dependencies)
   173  // must be accessible via the parser's Accessor or a link error will occur. The
   174  // exception to this rule is that files can import standard Google-provided
   175  // files -- e.g. google/protobuf/*.proto -- without needing to supply sources
   176  // for these files. Like protoc, this parser has a built-in version of these
   177  // files it can use if they aren't explicitly supplied.
   178  //
   179  // If the Parser has no ErrorReporter set and a syntax or link error occurs,
   180  // parsing will abort with the first such error encountered. If there is an
   181  // ErrorReporter configured and it returns non-nil, parsing will abort with the
   182  // error it returns. If syntax or link errors are encountered but the configured
   183  // ErrorReporter always returns nil, the parse fails with ErrInvalidSource.
   184  func (p Parser) ParseFiles(filenames ...string) ([]*desc.FileDescriptor, error) {
   185  	accessor := p.Accessor
   186  	if accessor == nil {
   187  		accessor = func(name string) (io.ReadCloser, error) {
   188  			return os.Open(name)
   189  		}
   190  	}
   191  	paths := p.ImportPaths
   192  	if len(paths) > 0 {
   193  		acc := accessor
   194  		accessor = func(name string) (io.ReadCloser, error) {
   195  			var ret error
   196  			for _, path := range paths {
   197  				f, err := acc(filepath.Join(path, name))
   198  				if err != nil {
   199  					if ret == nil {
   200  						ret = err
   201  					}
   202  					continue
   203  				}
   204  				return f, nil
   205  			}
   206  			return nil, ret
   207  		}
   208  	}
   209  	lookupImport, err := p.getLookupImport()
   210  	if err != nil {
   211  		return nil, err
   212  	}
   213  
   214  	protos := map[string]*parseResult{}
   215  	results := &parseResults{resultsByFilename: protos}
   216  	errs := newErrorHandler(p.ErrorReporter, p.WarningReporter)
   217  	parseProtoFiles(accessor, filenames, errs, true, true, results, lookupImport)
   218  	if err := errs.getError(); err != nil {
   219  		return nil, err
   220  	}
   221  	if p.InferImportPaths {
   222  		// TODO: if this re-writes one of the names in filenames, lookups below will break
   223  		protos = fixupFilenames(protos)
   224  	}
   225  	linkedProtos, err := newLinker(results, errs).linkFiles()
   226  	if err != nil {
   227  		return nil, err
   228  	}
   229  	if p.IncludeSourceCodeInfo {
   230  		for name, fd := range linkedProtos {
   231  			pr := protos[name]
   232  			fd.AsFileDescriptorProto().SourceCodeInfo = pr.generateSourceCodeInfo()
   233  			internal.RecomputeSourceInfo(fd)
   234  		}
   235  	}
   236  	fds := make([]*desc.FileDescriptor, len(filenames))
   237  	for i, name := range filenames {
   238  		fd := linkedProtos[name]
   239  		fds[i] = fd
   240  	}
   241  	return fds, nil
   242  }
   243  
   244  // ParseFilesButDoNotLink parses the named files into descriptor protos. The
   245  // results are just protos, not fully-linked descriptors. It is possible that
   246  // descriptors are invalid and still be returned in parsed form without error
   247  // due to the fact that the linking step is skipped (and thus many validation
   248  // steps omitted).
   249  //
   250  // There are a few side effects to not linking the descriptors:
   251  //   1. No options will be interpreted. Options can refer to extensions or have
   252  //      message and enum types. Without linking, these extension and type
   253  //      references are not resolved, so the options may not be interpretable.
   254  //      So all options will appear in UninterpretedOption fields of the various
   255  //      descriptor options messages.
   256  //   2. Type references will not be resolved. This means that the actual type
   257  //      names in the descriptors may be unqualified and even relative to the
   258  //      scope in which the type reference appears. This goes for fields that
   259  //      have message and enum types. It also applies to methods and their
   260  //      references to request and response message types.
   261  //   3. Enum fields are not known. Until a field's type reference is resolved
   262  //      (during linking), it is not known whether the type refers to a message
   263  //      or an enum. So all fields with such type references have their Type set
   264  //      to TYPE_MESSAGE.
   265  //
   266  // This method will still validate the syntax of parsed files. If the parser's
   267  // ValidateUnlinkedFiles field is true, additional checks, beyond syntax will
   268  // also be performed.
   269  //
   270  // If the Parser has no ErrorReporter set and a syntax or link error occurs,
   271  // parsing will abort with the first such error encountered. If there is an
   272  // ErrorReporter configured and it returns non-nil, parsing will abort with the
   273  // error it returns. If syntax or link errors are encountered but the configured
   274  // ErrorReporter always returns nil, the parse fails with ErrInvalidSource.
   275  func (p Parser) ParseFilesButDoNotLink(filenames ...string) ([]*dpb.FileDescriptorProto, error) {
   276  	accessor := p.Accessor
   277  	if accessor == nil {
   278  		accessor = func(name string) (io.ReadCloser, error) {
   279  			return os.Open(name)
   280  		}
   281  	}
   282  	lookupImport, err := p.getLookupImport()
   283  	if err != nil {
   284  		return nil, err
   285  	}
   286  
   287  	protos := map[string]*parseResult{}
   288  	errs := newErrorHandler(p.ErrorReporter, p.WarningReporter)
   289  	parseProtoFiles(accessor, filenames, errs, false, p.ValidateUnlinkedFiles, &parseResults{resultsByFilename: protos}, lookupImport)
   290  	if err := errs.getError(); err != nil {
   291  		return nil, err
   292  	}
   293  	if p.InferImportPaths {
   294  		// TODO: if this re-writes one of the names in filenames, lookups below will break
   295  		protos = fixupFilenames(protos)
   296  	}
   297  	fds := make([]*dpb.FileDescriptorProto, len(filenames))
   298  	for i, name := range filenames {
   299  		pr := protos[name]
   300  		fd := pr.fd
   301  		if p.InterpretOptionsInUnlinkedFiles {
   302  			// parsing options will be best effort
   303  			pr.lenient = true
   304  			// we don't want the real error reporter see any errors
   305  			pr.errs.errReporter = func(err ErrorWithPos) error {
   306  				return err
   307  			}
   308  			_ = interpretFileOptions(pr, poorFileDescriptorish{FileDescriptorProto: fd})
   309  		}
   310  		if p.IncludeSourceCodeInfo {
   311  			fd.SourceCodeInfo = pr.generateSourceCodeInfo()
   312  		}
   313  		fds[i] = fd
   314  	}
   315  	return fds, nil
   316  }
   317  
   318  func (p Parser) getLookupImport() (func(string) (*dpb.FileDescriptorProto, error), error) {
   319  	if p.LookupImport != nil && p.LookupImportProto != nil {
   320  		return nil, ErrLookupImportAndProtoSet
   321  	}
   322  	if p.LookupImportProto != nil {
   323  		return p.LookupImportProto, nil
   324  	}
   325  	if p.LookupImport != nil {
   326  		return func(path string) (*dpb.FileDescriptorProto, error) {
   327  			value, err := p.LookupImport(path)
   328  			if value != nil {
   329  				return value.AsFileDescriptorProto(), err
   330  			}
   331  			return nil, err
   332  		}, nil
   333  	}
   334  	return nil, nil
   335  }
   336  
   337  func fixupFilenames(protos map[string]*parseResult) map[string]*parseResult {
   338  	// In the event that the given filenames (keys in the supplied map) do not
   339  	// match the actual paths used in 'import' statements in the files, we try
   340  	// to revise names in the protos so that they will match and be linkable.
   341  	revisedProtos := map[string]*parseResult{}
   342  
   343  	protoPaths := map[string]struct{}{}
   344  	// TODO: this is O(n^2) but could likely be O(n) with a clever data structure (prefix tree that is indexed backwards?)
   345  	importCandidates := map[string]map[string]struct{}{}
   346  	candidatesAvailable := map[string]struct{}{}
   347  	for name := range protos {
   348  		candidatesAvailable[name] = struct{}{}
   349  		for _, f := range protos {
   350  			for _, imp := range f.fd.Dependency {
   351  				if strings.HasSuffix(name, imp) {
   352  					candidates := importCandidates[imp]
   353  					if candidates == nil {
   354  						candidates = map[string]struct{}{}
   355  						importCandidates[imp] = candidates
   356  					}
   357  					candidates[name] = struct{}{}
   358  				}
   359  			}
   360  		}
   361  	}
   362  	for imp, candidates := range importCandidates {
   363  		// if we found multiple possible candidates, use the one that is an exact match
   364  		// if it exists, and otherwise, guess that it's the shortest path (fewest elements)
   365  		var best string
   366  		for c := range candidates {
   367  			if _, ok := candidatesAvailable[c]; !ok {
   368  				// already used this candidate and re-written its filename accordingly
   369  				continue
   370  			}
   371  			if c == imp {
   372  				// exact match!
   373  				best = c
   374  				break
   375  			}
   376  			if best == "" {
   377  				best = c
   378  			} else {
   379  				// HACK: we can't actually tell which files is supposed to match
   380  				// this import, so arbitrarily pick the "shorter" one (fewest
   381  				// path elements) or, on a tie, the lexically earlier one
   382  				minLen := strings.Count(best, string(filepath.Separator))
   383  				cLen := strings.Count(c, string(filepath.Separator))
   384  				if cLen < minLen || (cLen == minLen && c < best) {
   385  					best = c
   386  				}
   387  			}
   388  		}
   389  		if best != "" {
   390  			prefix := best[:len(best)-len(imp)]
   391  			if len(prefix) > 0 {
   392  				protoPaths[prefix] = struct{}{}
   393  			}
   394  			f := protos[best]
   395  			f.fd.Name = proto.String(imp)
   396  			revisedProtos[imp] = f
   397  			delete(candidatesAvailable, best)
   398  		}
   399  	}
   400  
   401  	if len(candidatesAvailable) == 0 {
   402  		return revisedProtos
   403  	}
   404  
   405  	if len(protoPaths) == 0 {
   406  		for c := range candidatesAvailable {
   407  			revisedProtos[c] = protos[c]
   408  		}
   409  		return revisedProtos
   410  	}
   411  
   412  	// Any remaining candidates are entry-points (not imported by others), so
   413  	// the best bet to "fixing" their file name is to see if they're in one of
   414  	// the proto paths we found, and if so strip that prefix.
   415  	protoPathStrs := make([]string, len(protoPaths))
   416  	i := 0
   417  	for p := range protoPaths {
   418  		protoPathStrs[i] = p
   419  		i++
   420  	}
   421  	sort.Strings(protoPathStrs)
   422  	// we look at paths in reverse order, so we'll use a longer proto path if
   423  	// there is more than one match
   424  	for c := range candidatesAvailable {
   425  		var imp string
   426  		for i := len(protoPathStrs) - 1; i >= 0; i-- {
   427  			p := protoPathStrs[i]
   428  			if strings.HasPrefix(c, p) {
   429  				imp = c[len(p):]
   430  				break
   431  			}
   432  		}
   433  		if imp != "" {
   434  			f := protos[c]
   435  			f.fd.Name = proto.String(imp)
   436  			revisedProtos[imp] = f
   437  		} else {
   438  			revisedProtos[c] = protos[c]
   439  		}
   440  	}
   441  
   442  	return revisedProtos
   443  }
   444  
   445  func parseProtoFiles(acc FileAccessor, filenames []string, errs *errorHandler, recursive, validate bool, parsed *parseResults, lookupImport func(string) (*dpb.FileDescriptorProto, error)) {
   446  	for _, name := range filenames {
   447  		parseProtoFile(acc, name, nil, errs, recursive, validate, parsed, lookupImport)
   448  		if errs.err != nil {
   449  			return
   450  		}
   451  	}
   452  }
   453  
   454  func parseProtoFile(acc FileAccessor, filename string, importLoc *SourcePos, errs *errorHandler, recursive, validate bool, parsed *parseResults, lookupImport func(string) (*dpb.FileDescriptorProto, error)) {
   455  	if parsed.has(filename) {
   456  		return
   457  	}
   458  	if lookupImport == nil {
   459  		lookupImport = func(string) (*dpb.FileDescriptorProto, error) {
   460  			return nil, errors.New("no import lookup function")
   461  		}
   462  	}
   463  	in, err := acc(filename)
   464  	var result *parseResult
   465  	if err == nil {
   466  		// try to parse the bytes accessed
   467  		func() {
   468  			defer func() {
   469  				// if we've already parsed contents, an error
   470  				// closing need not fail this operation
   471  				_ = in.Close()
   472  			}()
   473  			result = parseProto(filename, in, errs, validate)
   474  		}()
   475  	} else if d, lookupErr := lookupImport(filename); lookupErr == nil {
   476  		// This is a user-provided descriptor, which is acting similarly to a
   477  		// well-known import.
   478  		result = &parseResult{fd: proto.Clone(d).(*dpb.FileDescriptorProto)}
   479  	} else if d, ok := standardImports[filename]; ok {
   480  		// it's a well-known import
   481  		// (we clone it to make sure we're not sharing state with other
   482  		//  parsers, which could result in unsafe races if multiple
   483  		//  parsers are trying to access it concurrently)
   484  		result = &parseResult{fd: proto.Clone(d).(*dpb.FileDescriptorProto)}
   485  	} else {
   486  		if !strings.Contains(err.Error(), filename) {
   487  			// an error message that doesn't indicate the file is awful!
   488  			// this cannot be %w as this is not compatible with go <= 1.13
   489  			err = errorWithFilename{
   490  				underlying: err,
   491  				filename:   filename,
   492  			}
   493  		}
   494  		// The top-level loop in parseProtoFiles calls this with nil for the top-level files
   495  		// importLoc is only for imports, otherwise we do not want to return a ErrorWithSourcePos
   496  		// ErrorWithSourcePos should always have a non-nil SourcePos
   497  		if importLoc != nil {
   498  			// associate the error with the import line
   499  			err = ErrorWithSourcePos{
   500  				Pos:        importLoc,
   501  				Underlying: err,
   502  			}
   503  		}
   504  		_ = errs.handleError(err)
   505  		return
   506  	}
   507  
   508  	parsed.add(filename, result)
   509  
   510  	if errs.err != nil {
   511  		return // abort
   512  	}
   513  
   514  	if recursive {
   515  		fd := result.fd
   516  		decl := result.getFileNode(fd)
   517  		fnode, ok := decl.(*fileNode)
   518  		if !ok {
   519  			// no AST for this file? use imports in descriptor
   520  			for _, dep := range fd.Dependency {
   521  				parseProtoFile(acc, dep, decl.start(), errs, true, validate, parsed, lookupImport)
   522  				if errs.getError() != nil {
   523  					return // abort
   524  				}
   525  			}
   526  			return
   527  		}
   528  		// we have an AST; use it so we can report import location in errors
   529  		for _, dep := range fnode.imports {
   530  			parseProtoFile(acc, dep.name.val, dep.name.start(), errs, true, validate, parsed, lookupImport)
   531  			if errs.getError() != nil {
   532  				return // abort
   533  			}
   534  		}
   535  	}
   536  }
   537  
   538  type parseResults struct {
   539  	resultsByFilename map[string]*parseResult
   540  	filenames         []string
   541  }
   542  
   543  func (r *parseResults) has(filename string) bool {
   544  	_, ok := r.resultsByFilename[filename]
   545  	return ok
   546  }
   547  
   548  func (r *parseResults) add(filename string, result *parseResult) {
   549  	r.resultsByFilename[filename] = result
   550  	r.filenames = append(r.filenames, filename)
   551  }
   552  
   553  type parseResult struct {
   554  	// handles any errors encountered during parsing, construction of file descriptor,
   555  	// or validation
   556  	errs *errorHandler
   557  
   558  	// the parsed file descriptor
   559  	fd *dpb.FileDescriptorProto
   560  
   561  	// if set to true, enables lenient interpretation of options, where
   562  	// unrecognized options will be left uninterpreted instead of resulting in a
   563  	// link error
   564  	lenient bool
   565  
   566  	// a map of elements in the descriptor to nodes in the AST
   567  	// (for extracting position information when validating the descriptor)
   568  	nodes map[proto.Message]node
   569  
   570  	// a map of uninterpreted option AST nodes to their relative path
   571  	// in the resulting options message
   572  	interpretedOptions map[*optionNode][]int32
   573  }
   574  
   575  func (r *parseResult) getFileNode(f *dpb.FileDescriptorProto) fileDecl {
   576  	if r.nodes == nil {
   577  		return noSourceNode{pos: unknownPos(f.GetName())}
   578  	}
   579  	return r.nodes[f].(fileDecl)
   580  }
   581  
   582  func (r *parseResult) getOptionNode(o *dpb.UninterpretedOption) optionDecl {
   583  	if r.nodes == nil {
   584  		return noSourceNode{pos: unknownPos(r.fd.GetName())}
   585  	}
   586  	return r.nodes[o].(optionDecl)
   587  }
   588  
   589  func (r *parseResult) getOptionNamePartNode(o *dpb.UninterpretedOption_NamePart) node {
   590  	if r.nodes == nil {
   591  		return noSourceNode{pos: unknownPos(r.fd.GetName())}
   592  	}
   593  	return r.nodes[o]
   594  }
   595  
   596  func (r *parseResult) getFieldNode(f *dpb.FieldDescriptorProto) fieldDecl {
   597  	if r.nodes == nil {
   598  		return noSourceNode{pos: unknownPos(r.fd.GetName())}
   599  	}
   600  	return r.nodes[f].(fieldDecl)
   601  }
   602  
   603  func (r *parseResult) getExtensionRangeNode(e *dpb.DescriptorProto_ExtensionRange) rangeDecl {
   604  	if r.nodes == nil {
   605  		return noSourceNode{pos: unknownPos(r.fd.GetName())}
   606  	}
   607  	return r.nodes[e].(rangeDecl)
   608  }
   609  
   610  func (r *parseResult) getMessageReservedRangeNode(rr *dpb.DescriptorProto_ReservedRange) rangeDecl {
   611  	if r.nodes == nil {
   612  		return noSourceNode{pos: unknownPos(r.fd.GetName())}
   613  	}
   614  	return r.nodes[rr].(rangeDecl)
   615  }
   616  
   617  func (r *parseResult) getEnumNode(e *dpb.EnumDescriptorProto) node {
   618  	if r.nodes == nil {
   619  		return noSourceNode{pos: unknownPos(r.fd.GetName())}
   620  	}
   621  	return r.nodes[e]
   622  }
   623  
   624  func (r *parseResult) getEnumValueNode(e *dpb.EnumValueDescriptorProto) enumValueDecl {
   625  	if r.nodes == nil {
   626  		return noSourceNode{pos: unknownPos(r.fd.GetName())}
   627  	}
   628  	return r.nodes[e].(enumValueDecl)
   629  }
   630  
   631  func (r *parseResult) getEnumReservedRangeNode(rr *dpb.EnumDescriptorProto_EnumReservedRange) rangeDecl {
   632  	if r.nodes == nil {
   633  		return noSourceNode{pos: unknownPos(r.fd.GetName())}
   634  	}
   635  	return r.nodes[rr].(rangeDecl)
   636  }
   637  
   638  func (r *parseResult) getMethodNode(m *dpb.MethodDescriptorProto) methodDecl {
   639  	if r.nodes == nil {
   640  		return noSourceNode{pos: unknownPos(r.fd.GetName())}
   641  	}
   642  	return r.nodes[m].(methodDecl)
   643  }
   644  
   645  func (r *parseResult) putFileNode(f *dpb.FileDescriptorProto, n *fileNode) {
   646  	r.nodes[f] = n
   647  }
   648  
   649  func (r *parseResult) putOptionNode(o *dpb.UninterpretedOption, n *optionNode) {
   650  	r.nodes[o] = n
   651  }
   652  
   653  func (r *parseResult) putOptionNamePartNode(o *dpb.UninterpretedOption_NamePart, n *optionNamePartNode) {
   654  	r.nodes[o] = n
   655  }
   656  
   657  func (r *parseResult) putMessageNode(m *dpb.DescriptorProto, n msgDecl) {
   658  	r.nodes[m] = n
   659  }
   660  
   661  func (r *parseResult) putFieldNode(f *dpb.FieldDescriptorProto, n fieldDecl) {
   662  	r.nodes[f] = n
   663  }
   664  
   665  func (r *parseResult) putOneOfNode(o *dpb.OneofDescriptorProto, n *oneOfNode) {
   666  	r.nodes[o] = n
   667  }
   668  
   669  func (r *parseResult) putExtensionRangeNode(e *dpb.DescriptorProto_ExtensionRange, n *rangeNode) {
   670  	r.nodes[e] = n
   671  }
   672  
   673  func (r *parseResult) putMessageReservedRangeNode(rr *dpb.DescriptorProto_ReservedRange, n *rangeNode) {
   674  	r.nodes[rr] = n
   675  }
   676  
   677  func (r *parseResult) putEnumNode(e *dpb.EnumDescriptorProto, n *enumNode) {
   678  	r.nodes[e] = n
   679  }
   680  
   681  func (r *parseResult) putEnumValueNode(e *dpb.EnumValueDescriptorProto, n *enumValueNode) {
   682  	r.nodes[e] = n
   683  }
   684  
   685  func (r *parseResult) putEnumReservedRangeNode(rr *dpb.EnumDescriptorProto_EnumReservedRange, n *rangeNode) {
   686  	r.nodes[rr] = n
   687  }
   688  
   689  func (r *parseResult) putServiceNode(s *dpb.ServiceDescriptorProto, n *serviceNode) {
   690  	r.nodes[s] = n
   691  }
   692  
   693  func (r *parseResult) putMethodNode(m *dpb.MethodDescriptorProto, n *methodNode) {
   694  	r.nodes[m] = n
   695  }
   696  
   697  func parseProto(filename string, r io.Reader, errs *errorHandler, validate bool) *parseResult {
   698  	beforeErrs := errs.errsReported
   699  	lx := newLexer(r, filename, errs)
   700  	protoParse(lx)
   701  
   702  	res := createParseResult(filename, lx.res, errs)
   703  	if validate && errs.err == nil {
   704  		validateBasic(res, errs.errsReported > beforeErrs)
   705  	}
   706  
   707  	return res
   708  }
   709  
   710  func createParseResult(filename string, file *fileNode, errs *errorHandler) *parseResult {
   711  	res := &parseResult{
   712  		errs:               errs,
   713  		nodes:              map[proto.Message]node{},
   714  		interpretedOptions: map[*optionNode][]int32{},
   715  	}
   716  	if file == nil {
   717  		// nil AST means there was an error that prevented any parsing
   718  		// or the file was empty; synthesize empty non-nil AST
   719  		file = &fileNode{}
   720  	}
   721  	if file.first == nil {
   722  		n := noSourceNode{pos: unknownPos(filename)}
   723  		file.setRange(&n, &n)
   724  	}
   725  	res.createFileDescriptor(filename, file)
   726  	return res
   727  }
   728  
   729  func toNameParts(ident *compoundIdentNode) []*optionNamePartNode {
   730  	parts := strings.Split(ident.val, ".")
   731  	ret := make([]*optionNamePartNode, len(parts))
   732  	offset := 0
   733  	for i, p := range parts {
   734  		ret[i] = &optionNamePartNode{text: ident, offset: offset, length: len(p)}
   735  		ret[i].setRange(ident, ident)
   736  		offset += len(p) + 1
   737  	}
   738  	return ret
   739  }
   740  
   741  func checkTag(pos *SourcePos, v uint64, maxTag int32) error {
   742  	if v < 1 {
   743  		return errorWithPos(pos, "tag number %d must be greater than zero", v)
   744  	} else if v > uint64(maxTag) {
   745  		return errorWithPos(pos, "tag number %d is higher than max allowed tag number (%d)", v, maxTag)
   746  	} else if v >= internal.SpecialReservedStart && v <= internal.SpecialReservedEnd {
   747  		return errorWithPos(pos, "tag number %d is in disallowed reserved range %d-%d", v, internal.SpecialReservedStart, internal.SpecialReservedEnd)
   748  	}
   749  	return nil
   750  }
   751  
   752  func checkExtensionTagsInFile(fd *desc.FileDescriptor, res *parseResult) error {
   753  	for _, fld := range fd.GetExtensions() {
   754  		if err := checkExtensionTag(fld, res); err != nil {
   755  			return err
   756  		}
   757  	}
   758  	for _, md := range fd.GetMessageTypes() {
   759  		if err := checkExtensionTagsInMessage(md, res); err != nil {
   760  			return err
   761  		}
   762  	}
   763  	return nil
   764  }
   765  
   766  func checkExtensionTagsInMessage(md *desc.MessageDescriptor, res *parseResult) error {
   767  	for _, fld := range md.GetNestedExtensions() {
   768  		if err := checkExtensionTag(fld, res); err != nil {
   769  			return err
   770  		}
   771  	}
   772  	for _, nmd := range md.GetNestedMessageTypes() {
   773  		if err := checkExtensionTagsInMessage(nmd, res); err != nil {
   774  			return err
   775  		}
   776  	}
   777  	return nil
   778  }
   779  
   780  func checkExtensionTag(fld *desc.FieldDescriptor, res *parseResult) error {
   781  	// NB: This is kind of gross that we don't enforce this in validateBasic(). But it would
   782  	// require doing some minimal linking there (to identify the extendee and locate its
   783  	// descriptor). To keep the code simpler, we just wait until things are fully linked.
   784  
   785  	// In validateBasic() we just made sure these were within bounds for any message. But
   786  	// now that things are linked, we can check if the extendee is messageset wire format
   787  	// and, if not, enforce tighter limit.
   788  	if !fld.GetOwner().GetMessageOptions().GetMessageSetWireFormat() && fld.GetNumber() > internal.MaxNormalTag {
   789  		pos := res.nodes[fld.AsFieldDescriptorProto()].(fieldDecl).fieldTag().start()
   790  		return errorWithPos(pos, "tag number %d is higher than max allowed tag number (%d)", fld.GetNumber(), internal.MaxNormalTag)
   791  	}
   792  	return nil
   793  }
   794  
   795  func aggToString(agg []*aggregateEntryNode, buf *bytes.Buffer) {
   796  	buf.WriteString("{")
   797  	for _, a := range agg {
   798  		buf.WriteString(" ")
   799  		buf.WriteString(a.name.value())
   800  		if v, ok := a.val.(*aggregateLiteralNode); ok {
   801  			aggToString(v.elements, buf)
   802  		} else {
   803  			buf.WriteString(": ")
   804  			elementToString(a.val.value(), buf)
   805  		}
   806  	}
   807  	buf.WriteString(" }")
   808  }
   809  
   810  func elementToString(v interface{}, buf *bytes.Buffer) {
   811  	switch v := v.(type) {
   812  	case bool, int64, uint64, identifier:
   813  		_, _ = fmt.Fprintf(buf, "%v", v)
   814  	case float64:
   815  		if math.IsInf(v, 1) {
   816  			buf.WriteString(": inf")
   817  		} else if math.IsInf(v, -1) {
   818  			buf.WriteString(": -inf")
   819  		} else if math.IsNaN(v) {
   820  			buf.WriteString(": nan")
   821  		} else {
   822  			_, _ = fmt.Fprintf(buf, ": %v", v)
   823  		}
   824  	case string:
   825  		buf.WriteRune('"')
   826  		writeEscapedBytes(buf, []byte(v))
   827  		buf.WriteRune('"')
   828  	case []valueNode:
   829  		buf.WriteString(": [")
   830  		first := true
   831  		for _, e := range v {
   832  			if first {
   833  				first = false
   834  			} else {
   835  				buf.WriteString(", ")
   836  			}
   837  			elementToString(e.value(), buf)
   838  		}
   839  		buf.WriteString("]")
   840  	case []*aggregateEntryNode:
   841  		aggToString(v, buf)
   842  	}
   843  }
   844  
   845  func writeEscapedBytes(buf *bytes.Buffer, b []byte) {
   846  	for _, c := range b {
   847  		switch c {
   848  		case '\n':
   849  			buf.WriteString("\\n")
   850  		case '\r':
   851  			buf.WriteString("\\r")
   852  		case '\t':
   853  			buf.WriteString("\\t")
   854  		case '"':
   855  			buf.WriteString("\\\"")
   856  		case '\'':
   857  			buf.WriteString("\\'")
   858  		case '\\':
   859  			buf.WriteString("\\\\")
   860  		default:
   861  			if c >= 0x20 && c <= 0x7f && c != '"' && c != '\\' {
   862  				// simple printable characters
   863  				buf.WriteByte(c)
   864  			} else {
   865  				// use octal escape for all other values
   866  				buf.WriteRune('\\')
   867  				buf.WriteByte('0' + ((c >> 6) & 0x7))
   868  				buf.WriteByte('0' + ((c >> 3) & 0x7))
   869  				buf.WriteByte('0' + (c & 0x7))
   870  			}
   871  		}
   872  	}
   873  }