github.com/bakjos/protoreflect@v1.9.2/desc/protoparse/parser.go

github.com/bakjos/protoreflect@v1.9.2/desc/protoparse/parser.go (about)

     1  package protoparse
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"io/ioutil"
     9  	"math"
    10  	"os"
    11  	"path/filepath"
    12  	"sort"
    13  	"strings"
    14  
    15  	"github.com/golang/protobuf/proto"
    16  	dpb "github.com/golang/protobuf/protoc-gen-go/descriptor"
    17  
    18  	"github.com/bakjos/protoreflect/desc"
    19  	"github.com/bakjos/protoreflect/desc/internal"
    20  	"github.com/bakjos/protoreflect/desc/protoparse/ast"
    21  )
    22  
    23  //go:generate goyacc -o proto.y.go -p proto proto.y
    24  
    25  func init() {
    26  	protoErrorVerbose = true
    27  
    28  	// fix up the generated "token name" array so that error messages are nicer
    29  	setTokenName(_STRING_LIT, "string literal")
    30  	setTokenName(_INT_LIT, "int literal")
    31  	setTokenName(_FLOAT_LIT, "float literal")
    32  	setTokenName(_NAME, "identifier")
    33  	setTokenName(_ERROR, "error")
    34  	// for keywords, just show the keyword itself wrapped in quotes
    35  	for str, i := range keywords {
    36  		setTokenName(i, fmt.Sprintf(`"%s"`, str))
    37  	}
    38  }
    39  
    40  func setTokenName(token int, text string) {
    41  	// NB: this is based on logic in generated parse code that translates the
    42  	// int returned from the lexer into an internal token number.
    43  	var intern int
    44  	if token < len(protoTok1) {
    45  		intern = protoTok1[token]
    46  	} else {
    47  		if token >= protoPrivate {
    48  			if token < protoPrivate+len(protoTok2) {
    49  				intern = protoTok2[token-protoPrivate]
    50  			}
    51  		}
    52  		if intern == 0 {
    53  			for i := 0; i+1 < len(protoTok3); i += 2 {
    54  				if protoTok3[i] == token {
    55  					intern = protoTok3[i+1]
    56  					break
    57  				}
    58  			}
    59  		}
    60  	}
    61  
    62  	if intern >= 1 && intern-1 < len(protoToknames) {
    63  		protoToknames[intern-1] = text
    64  		return
    65  	}
    66  
    67  	panic(fmt.Sprintf("Unknown token value: %d", token))
    68  }
    69  
    70  // FileAccessor is an abstraction for opening proto source files. It takes the
    71  // name of the file to open and returns either the input reader or an error.
    72  type FileAccessor func(filename string) (io.ReadCloser, error)
    73  
    74  // FileContentsFromMap returns a FileAccessor that uses the given map of file
    75  // contents. This allows proto source files to be constructed in memory and
    76  // easily supplied to a parser. The map keys are the paths to the proto source
    77  // files, and the values are the actual proto source contents.
    78  func FileContentsFromMap(files map[string]string) FileAccessor {
    79  	return func(filename string) (io.ReadCloser, error) {
    80  		contents, ok := files[filename]
    81  		if !ok {
    82  			return nil, os.ErrNotExist
    83  		}
    84  		return ioutil.NopCloser(strings.NewReader(contents)), nil
    85  	}
    86  }
    87  
    88  // Parser parses proto source into descriptors.
    89  type Parser struct {
    90  	// The paths used to search for dependencies that are referenced in import
    91  	// statements in proto source files. If no import paths are provided then
    92  	// "." (current directory) is assumed to be the only import path.
    93  	//
    94  	// This setting is only used during ParseFiles operations. Since calls to
    95  	// ParseFilesButDoNotLink do not link, there is no need to load and parse
    96  	// dependencies.
    97  	ImportPaths []string
    98  
    99  	// If true, the supplied file names/paths need not necessarily match how the
   100  	// files are referenced in import statements. The parser will attempt to
   101  	// match import statements to supplied paths, "guessing" the import paths
   102  	// for the files. Note that this inference is not perfect and link errors
   103  	// could result. It works best when all proto files are organized such that
   104  	// a single import path can be inferred (e.g. all files under a single tree
   105  	// with import statements all being relative to the root of this tree).
   106  	InferImportPaths bool
   107  
   108  	// LookupImport is a function that accepts a filename and
   109  	// returns a file descriptor, which will be consulted when resolving imports.
   110  	// This allows a compiled Go proto in another Go module to be referenced
   111  	// in the proto(s) being parsed.
   112  	//
   113  	// In the event of a filename collision, Accessor is consulted first,
   114  	// then LookupImport is consulted, and finally the well-known protos
   115  	// are used.
   116  	//
   117  	// For example, in order to automatically look up compiled Go protos that
   118  	// have been imported and be able to use them as imports, set this to
   119  	// desc.LoadFileDescriptor.
   120  	LookupImport func(string) (*desc.FileDescriptor, error)
   121  
   122  	// LookupImportProto has the same functionality as LookupImport, however it returns
   123  	// a FileDescriptorProto instead of a FileDescriptor.
   124  	//
   125  	// It is an error to set both LookupImport and LookupImportProto.
   126  	LookupImportProto func(string) (*dpb.FileDescriptorProto, error)
   127  
   128  	// Used to create a reader for a given filename, when loading proto source
   129  	// file contents. If unset, os.Open is used. If ImportPaths is also empty
   130  	// then relative paths are will be relative to the process's current working
   131  	// directory.
   132  	Accessor FileAccessor
   133  
   134  	// If true, the resulting file descriptors will retain source code info,
   135  	// that maps elements to their location in the source files as well as
   136  	// includes comments found during parsing (and attributed to elements of
   137  	// the source file).
   138  	IncludeSourceCodeInfo bool
   139  
   140  	// If true, the results from ParseFilesButDoNotLink will be passed through
   141  	// some additional validations. But only constraints that do not require
   142  	// linking can be checked. These include proto2 vs. proto3 language features,
   143  	// looking for incorrect usage of reserved names or tags, and ensuring that
   144  	// fields have unique tags and that enum values have unique numbers (unless
   145  	// the enum allows aliases).
   146  	ValidateUnlinkedFiles bool
   147  
   148  	// If true, the results from ParseFilesButDoNotLink will have options
   149  	// interpreted. Any uninterpretable options (including any custom options or
   150  	// options that refer to message and enum types, which can only be
   151  	// interpreted after linking) will be left in uninterpreted_options. Also,
   152  	// the "default" pseudo-option for fields can only be interpreted for scalar
   153  	// fields, excluding enums. (Interpreting default values for enum fields
   154  	// requires resolving enum names, which requires linking.)
   155  	InterpretOptionsInUnlinkedFiles bool
   156  
   157  	// A custom reporter of syntax and link errors. If not specified, the
   158  	// default reporter just returns the reported error, which causes parsing
   159  	// to abort after encountering a single error.
   160  	//
   161  	// The reporter is not invoked for system or I/O errors, only for syntax and
   162  	// link errors.
   163  	ErrorReporter ErrorReporter
   164  
   165  	// A custom reporter of warnings. If not specified, warning messages are ignored.
   166  	WarningReporter WarningReporter
   167  }
   168  
   169  // ParseFiles parses the named files into descriptors. The returned slice has
   170  // the same number of entries as the give filenames, in the same order. So the
   171  // first returned descriptor corresponds to the first given name, and so on.
   172  //
   173  // All dependencies for all specified files (including transitive dependencies)
   174  // must be accessible via the parser's Accessor or a link error will occur. The
   175  // exception to this rule is that files can import standard Google-provided
   176  // files -- e.g. google/protobuf/*.proto -- without needing to supply sources
   177  // for these files. Like protoc, this parser has a built-in version of these
   178  // files it can use if they aren't explicitly supplied.
   179  //
   180  // If the Parser has no ErrorReporter set and a syntax or link error occurs,
   181  // parsing will abort with the first such error encountered. If there is an
   182  // ErrorReporter configured and it returns non-nil, parsing will abort with the
   183  // error it returns. If syntax or link errors are encountered but the configured
   184  // ErrorReporter always returns nil, the parse fails with ErrInvalidSource.
   185  func (p Parser) ParseFiles(filenames ...string) ([]*desc.FileDescriptor, error) {
   186  	accessor := p.Accessor
   187  	if accessor == nil {
   188  		accessor = func(name string) (io.ReadCloser, error) {
   189  			return os.Open(name)
   190  		}
   191  	}
   192  	paths := p.ImportPaths
   193  	if len(paths) > 0 {
   194  		acc := accessor
   195  		accessor = func(name string) (io.ReadCloser, error) {
   196  			var ret error
   197  			for _, path := range paths {
   198  				f, err := acc(filepath.Join(path, name))
   199  				if err != nil {
   200  					if ret == nil {
   201  						ret = err
   202  					}
   203  					continue
   204  				}
   205  				return f, nil
   206  			}
   207  			return nil, ret
   208  		}
   209  	}
   210  	lookupImport, err := p.getLookupImport()
   211  	if err != nil {
   212  		return nil, err
   213  	}
   214  
   215  	protos := map[string]*parseResult{}
   216  	results := &parseResults{
   217  		resultsByFilename:      protos,
   218  		recursive:              true,
   219  		validate:               true,
   220  		createDescriptorProtos: true,
   221  	}
   222  	errs := newErrorHandler(p.ErrorReporter, p.WarningReporter)
   223  	parseProtoFiles(accessor, filenames, errs, results, lookupImport)
   224  	if err := errs.getError(); err != nil {
   225  		return nil, err
   226  	}
   227  	if p.InferImportPaths {
   228  		// TODO: if this re-writes one of the names in filenames, lookups below will break
   229  		protos = fixupFilenames(protos)
   230  	}
   231  	l := newLinker(results, errs)
   232  	linkedProtos, err := l.linkFiles()
   233  	if err != nil {
   234  		return nil, err
   235  	}
   236  	// Now we're done linking, so we can check to see if any imports were unused
   237  	for _, file := range filenames {
   238  		l.checkForUnusedImports(file)
   239  	}
   240  	if p.IncludeSourceCodeInfo {
   241  		for name, fd := range linkedProtos {
   242  			pr := protos[name]
   243  			fd.AsFileDescriptorProto().SourceCodeInfo = pr.generateSourceCodeInfo()
   244  			internal.RecomputeSourceInfo(fd)
   245  		}
   246  	}
   247  	fds := make([]*desc.FileDescriptor, len(filenames))
   248  	for i, name := range filenames {
   249  		fd := linkedProtos[name]
   250  		fds[i] = fd
   251  	}
   252  	return fds, nil
   253  }
   254  
   255  // ParseFilesButDoNotLink parses the named files into descriptor protos. The
   256  // results are just protos, not fully-linked descriptors. It is possible that
   257  // descriptors are invalid and still be returned in parsed form without error
   258  // due to the fact that the linking step is skipped (and thus many validation
   259  // steps omitted).
   260  //
   261  // There are a few side effects to not linking the descriptors:
   262  //   1. No options will be interpreted. Options can refer to extensions or have
   263  //      message and enum types. Without linking, these extension and type
   264  //      references are not resolved, so the options may not be interpretable.
   265  //      So all options will appear in UninterpretedOption fields of the various
   266  //      descriptor options messages.
   267  //   2. Type references will not be resolved. This means that the actual type
   268  //      names in the descriptors may be unqualified and even relative to the
   269  //      scope in which the type reference appears. This goes for fields that
   270  //      have message and enum types. It also applies to methods and their
   271  //      references to request and response message types.
   272  //   3. Type references are not known. For non-scalar fields, until the type
   273  //      name is resolved (during linking), it is not known whether the type
   274  //      refers to a message or an enum. So all fields with such type references
   275  //      will not have their Type set, only the TypeName.
   276  //
   277  // This method will still validate the syntax of parsed files. If the parser's
   278  // ValidateUnlinkedFiles field is true, additional checks, beyond syntax will
   279  // also be performed.
   280  //
   281  // If the Parser has no ErrorReporter set and a syntax error occurs, parsing
   282  // will abort with the first such error encountered. If there is an
   283  // ErrorReporter configured and it returns non-nil, parsing will abort with the
   284  // error it returns. If syntax errors are encountered but the configured
   285  // ErrorReporter always returns nil, the parse fails with ErrInvalidSource.
   286  func (p Parser) ParseFilesButDoNotLink(filenames ...string) ([]*dpb.FileDescriptorProto, error) {
   287  	accessor := p.Accessor
   288  	if accessor == nil {
   289  		accessor = func(name string) (io.ReadCloser, error) {
   290  			return os.Open(name)
   291  		}
   292  	}
   293  	lookupImport, err := p.getLookupImport()
   294  	if err != nil {
   295  		return nil, err
   296  	}
   297  
   298  	protos := map[string]*parseResult{}
   299  	errs := newErrorHandler(p.ErrorReporter, p.WarningReporter)
   300  	results := &parseResults{
   301  		resultsByFilename:      protos,
   302  		validate:               p.ValidateUnlinkedFiles,
   303  		createDescriptorProtos: true,
   304  	}
   305  	parseProtoFiles(accessor, filenames, errs, results, lookupImport)
   306  	if err := errs.getError(); err != nil {
   307  		return nil, err
   308  	}
   309  	if p.InferImportPaths {
   310  		// TODO: if this re-writes one of the names in filenames, lookups below will break
   311  		protos = fixupFilenames(protos)
   312  	}
   313  	fds := make([]*dpb.FileDescriptorProto, len(filenames))
   314  	for i, name := range filenames {
   315  		pr := protos[name]
   316  		fd := pr.fd
   317  		if p.InterpretOptionsInUnlinkedFiles {
   318  			// parsing options will be best effort
   319  			pr.lenient = true
   320  			// we don't want the real error reporter see any errors
   321  			pr.errs.errReporter = func(err ErrorWithPos) error {
   322  				return err
   323  			}
   324  			_ = interpretFileOptions(nil, pr, poorFileDescriptorish{FileDescriptorProto: fd})
   325  		}
   326  		if p.IncludeSourceCodeInfo {
   327  			fd.SourceCodeInfo = pr.generateSourceCodeInfo()
   328  		}
   329  		fds[i] = fd
   330  	}
   331  	return fds, nil
   332  }
   333  
   334  // ParseToAST parses the named files into ASTs, or Abstract Syntax Trees. This
   335  // is for consumers of proto files that don't care about compiling the files to
   336  // descriptors, but care deeply about a non-lossy structured representation of
   337  // the source (since descriptors are lossy). This includes formatting tools and
   338  // possibly linters, too.
   339  //
   340  // If the requested filenames include standard imports (such as
   341  // "google/protobuf/empty.proto") and no source is provided, the corresponding
   342  // AST in the returned slice will be nil. These standard imports are only
   343  // available for use as descriptors; no source is available unless it is
   344  // provided by the configured Accessor.
   345  //
   346  // If the Parser has no ErrorReporter set and a syntax error occurs, parsing
   347  // will abort with the first such error encountered. If there is an
   348  // ErrorReporter configured and it returns non-nil, parsing will abort with the
   349  // error it returns. If syntax errors are encountered but the configured
   350  // ErrorReporter always returns nil, the parse fails with ErrInvalidSource.
   351  func (p Parser) ParseToAST(filenames ...string) ([]*ast.FileNode, error) {
   352  	accessor := p.Accessor
   353  	if accessor == nil {
   354  		accessor = func(name string) (io.ReadCloser, error) {
   355  			return os.Open(name)
   356  		}
   357  	}
   358  	lookupImport, err := p.getLookupImport()
   359  	if err != nil {
   360  		return nil, err
   361  	}
   362  
   363  	protos := map[string]*parseResult{}
   364  	errs := newErrorHandler(p.ErrorReporter, p.WarningReporter)
   365  	parseProtoFiles(accessor, filenames, errs, &parseResults{resultsByFilename: protos}, lookupImport)
   366  	if err := errs.getError(); err != nil {
   367  		return nil, err
   368  	}
   369  	ret := make([]*ast.FileNode, 0, len(filenames))
   370  	for _, name := range filenames {
   371  		ret = append(ret, protos[name].root)
   372  	}
   373  	return ret, nil
   374  }
   375  
   376  func (p Parser) getLookupImport() (func(string) (*dpb.FileDescriptorProto, error), error) {
   377  	if p.LookupImport != nil && p.LookupImportProto != nil {
   378  		return nil, ErrLookupImportAndProtoSet
   379  	}
   380  	if p.LookupImportProto != nil {
   381  		return p.LookupImportProto, nil
   382  	}
   383  	if p.LookupImport != nil {
   384  		return func(path string) (*dpb.FileDescriptorProto, error) {
   385  			value, err := p.LookupImport(path)
   386  			if value != nil {
   387  				return value.AsFileDescriptorProto(), err
   388  			}
   389  			return nil, err
   390  		}, nil
   391  	}
   392  	return nil, nil
   393  }
   394  
   395  func fixupFilenames(protos map[string]*parseResult) map[string]*parseResult {
   396  	// In the event that the given filenames (keys in the supplied map) do not
   397  	// match the actual paths used in 'import' statements in the files, we try
   398  	// to revise names in the protos so that they will match and be linkable.
   399  	revisedProtos := map[string]*parseResult{}
   400  
   401  	protoPaths := map[string]struct{}{}
   402  	// TODO: this is O(n^2) but could likely be O(n) with a clever data structure (prefix tree that is indexed backwards?)
   403  	importCandidates := map[string]map[string]struct{}{}
   404  	candidatesAvailable := map[string]struct{}{}
   405  	for name := range protos {
   406  		candidatesAvailable[name] = struct{}{}
   407  		for _, f := range protos {
   408  			for _, imp := range f.fd.Dependency {
   409  				if strings.HasSuffix(name, imp) {
   410  					candidates := importCandidates[imp]
   411  					if candidates == nil {
   412  						candidates = map[string]struct{}{}
   413  						importCandidates[imp] = candidates
   414  					}
   415  					candidates[name] = struct{}{}
   416  				}
   417  			}
   418  		}
   419  	}
   420  	for imp, candidates := range importCandidates {
   421  		// if we found multiple possible candidates, use the one that is an exact match
   422  		// if it exists, and otherwise, guess that it's the shortest path (fewest elements)
   423  		var best string
   424  		for c := range candidates {
   425  			if _, ok := candidatesAvailable[c]; !ok {
   426  				// already used this candidate and re-written its filename accordingly
   427  				continue
   428  			}
   429  			if c == imp {
   430  				// exact match!
   431  				best = c
   432  				break
   433  			}
   434  			if best == "" {
   435  				best = c
   436  			} else {
   437  				// HACK: we can't actually tell which files is supposed to match
   438  				// this import, so arbitrarily pick the "shorter" one (fewest
   439  				// path elements) or, on a tie, the lexically earlier one
   440  				minLen := strings.Count(best, string(filepath.Separator))
   441  				cLen := strings.Count(c, string(filepath.Separator))
   442  				if cLen < minLen || (cLen == minLen && c < best) {
   443  					best = c
   444  				}
   445  			}
   446  		}
   447  		if best != "" {
   448  			prefix := best[:len(best)-len(imp)]
   449  			if len(prefix) > 0 {
   450  				protoPaths[prefix] = struct{}{}
   451  			}
   452  			f := protos[best]
   453  			f.fd.Name = proto.String(imp)
   454  			revisedProtos[imp] = f
   455  			delete(candidatesAvailable, best)
   456  		}
   457  	}
   458  
   459  	if len(candidatesAvailable) == 0 {
   460  		return revisedProtos
   461  	}
   462  
   463  	if len(protoPaths) == 0 {
   464  		for c := range candidatesAvailable {
   465  			revisedProtos[c] = protos[c]
   466  		}
   467  		return revisedProtos
   468  	}
   469  
   470  	// Any remaining candidates are entry-points (not imported by others), so
   471  	// the best bet to "fixing" their file name is to see if they're in one of
   472  	// the proto paths we found, and if so strip that prefix.
   473  	protoPathStrs := make([]string, len(protoPaths))
   474  	i := 0
   475  	for p := range protoPaths {
   476  		protoPathStrs[i] = p
   477  		i++
   478  	}
   479  	sort.Strings(protoPathStrs)
   480  	// we look at paths in reverse order, so we'll use a longer proto path if
   481  	// there is more than one match
   482  	for c := range candidatesAvailable {
   483  		var imp string
   484  		for i := len(protoPathStrs) - 1; i >= 0; i-- {
   485  			p := protoPathStrs[i]
   486  			if strings.HasPrefix(c, p) {
   487  				imp = c[len(p):]
   488  				break
   489  			}
   490  		}
   491  		if imp != "" {
   492  			f := protos[c]
   493  			f.fd.Name = proto.String(imp)
   494  			revisedProtos[imp] = f
   495  		} else {
   496  			revisedProtos[c] = protos[c]
   497  		}
   498  	}
   499  
   500  	return revisedProtos
   501  }
   502  
   503  func parseProtoFiles(acc FileAccessor, filenames []string, errs *errorHandler, parsed *parseResults, lookupImport func(string) (*dpb.FileDescriptorProto, error)) {
   504  	for _, name := range filenames {
   505  		parseProtoFile(acc, name, nil, errs, parsed, lookupImport)
   506  		if errs.err != nil {
   507  			return
   508  		}
   509  	}
   510  }
   511  
   512  func parseProtoFile(acc FileAccessor, filename string, importLoc *SourcePos, errs *errorHandler, results *parseResults, lookupImport func(string) (*dpb.FileDescriptorProto, error)) {
   513  	if results.has(filename) {
   514  		return
   515  	}
   516  	if lookupImport == nil {
   517  		lookupImport = func(string) (*dpb.FileDescriptorProto, error) {
   518  			return nil, errors.New("no import lookup function")
   519  		}
   520  	}
   521  	in, err := acc(filename)
   522  	var result *parseResult
   523  	if err == nil {
   524  		// try to parse the bytes accessed
   525  		func() {
   526  			defer func() {
   527  				// if we've already parsed contents, an error
   528  				// closing need not fail this operation
   529  				_ = in.Close()
   530  			}()
   531  			result = parseProto(filename, in, errs, results.validate, results.createDescriptorProtos)
   532  		}()
   533  	} else if d, lookupErr := lookupImport(filename); lookupErr == nil {
   534  		// This is a user-provided descriptor, which is acting similarly to a
   535  		// well-known import.
   536  		result = &parseResult{fd: proto.Clone(d).(*dpb.FileDescriptorProto)}
   537  	} else if d, ok := standardImports[filename]; ok {
   538  		// it's a well-known import
   539  		// (we clone it to make sure we're not sharing state with other
   540  		//  parsers, which could result in unsafe races if multiple
   541  		//  parsers are trying to access it concurrently)
   542  		result = &parseResult{fd: proto.Clone(d).(*dpb.FileDescriptorProto)}
   543  	} else {
   544  		if !strings.Contains(err.Error(), filename) {
   545  			// an error message that doesn't indicate the file is awful!
   546  			// this cannot be %w as this is not compatible with go <= 1.13
   547  			err = errorWithFilename{
   548  				underlying: err,
   549  				filename:   filename,
   550  			}
   551  		}
   552  		// The top-level loop in parseProtoFiles calls this with nil for the top-level files
   553  		// importLoc is only for imports, otherwise we do not want to return a ErrorWithSourcePos
   554  		// ErrorWithSourcePos should always have a non-nil SourcePos
   555  		if importLoc != nil {
   556  			// associate the error with the import line
   557  			err = ErrorWithSourcePos{
   558  				Pos:        importLoc,
   559  				Underlying: err,
   560  			}
   561  		}
   562  		_ = errs.handleError(err)
   563  		return
   564  	}
   565  
   566  	results.add(filename, result)
   567  
   568  	if errs.err != nil {
   569  		return // abort
   570  	}
   571  
   572  	if results.recursive {
   573  		fd := result.fd
   574  		decl := result.getFileNode(fd)
   575  		fnode, ok := decl.(*ast.FileNode)
   576  		if !ok {
   577  			// no AST for this file? use imports in descriptor
   578  			for _, dep := range fd.Dependency {
   579  				parseProtoFile(acc, dep, decl.Start(), errs, results, lookupImport)
   580  				if errs.getError() != nil {
   581  					return // abort
   582  				}
   583  			}
   584  			return
   585  		}
   586  		// we have an AST; use it so we can report import location in errors
   587  		for _, decl := range fnode.Decls {
   588  			if dep, ok := decl.(*ast.ImportNode); ok {
   589  				parseProtoFile(acc, dep.Name.AsString(), dep.Name.Start(), errs, results, lookupImport)
   590  				if errs.getError() != nil {
   591  					return // abort
   592  				}
   593  			}
   594  		}
   595  	}
   596  }
   597  
   598  type parseResults struct {
   599  	resultsByFilename map[string]*parseResult
   600  	filenames         []string
   601  
   602  	recursive, validate, createDescriptorProtos bool
   603  }
   604  
   605  func (r *parseResults) has(filename string) bool {
   606  	_, ok := r.resultsByFilename[filename]
   607  	return ok
   608  }
   609  
   610  func (r *parseResults) add(filename string, result *parseResult) {
   611  	r.resultsByFilename[filename] = result
   612  	r.filenames = append(r.filenames, filename)
   613  }
   614  
   615  type parseResult struct {
   616  	// handles any errors encountered during parsing, construction of file descriptor,
   617  	// or validation
   618  	errs *errorHandler
   619  
   620  	// the root of the AST
   621  	root *ast.FileNode
   622  	// the parsed file descriptor
   623  	fd *dpb.FileDescriptorProto
   624  
   625  	// if set to true, enables lenient interpretation of options, where
   626  	// unrecognized options will be left uninterpreted instead of resulting in a
   627  	// link error
   628  	lenient bool
   629  
   630  	// a map of elements in the descriptor to nodes in the AST
   631  	// (for extracting position information when validating the descriptor)
   632  	nodes map[proto.Message]ast.Node
   633  
   634  	// a map of uninterpreted option AST nodes to their relative path
   635  	// in the resulting options message
   636  	interpretedOptions map[*ast.OptionNode][]int32
   637  }
   638  
   639  func (r *parseResult) getFileNode(f *dpb.FileDescriptorProto) ast.FileDeclNode {
   640  	if r.nodes == nil {
   641  		return ast.NewNoSourceNode(f.GetName())
   642  	}
   643  	return r.nodes[f].(ast.FileDeclNode)
   644  }
   645  
   646  func (r *parseResult) getOptionNode(o *dpb.UninterpretedOption) ast.OptionDeclNode {
   647  	if r.nodes == nil {
   648  		return ast.NewNoSourceNode(r.fd.GetName())
   649  	}
   650  	return r.nodes[o].(ast.OptionDeclNode)
   651  }
   652  
   653  func (r *parseResult) getOptionNamePartNode(o *dpb.UninterpretedOption_NamePart) ast.Node {
   654  	if r.nodes == nil {
   655  		return ast.NewNoSourceNode(r.fd.GetName())
   656  	}
   657  	return r.nodes[o]
   658  }
   659  
   660  func (r *parseResult) getFieldNode(f *dpb.FieldDescriptorProto) ast.FieldDeclNode {
   661  	if r.nodes == nil {
   662  		return ast.NewNoSourceNode(r.fd.GetName())
   663  	}
   664  	return r.nodes[f].(ast.FieldDeclNode)
   665  }
   666  
   667  func (r *parseResult) getExtensionRangeNode(e *dpb.DescriptorProto_ExtensionRange) ast.RangeDeclNode {
   668  	if r.nodes == nil {
   669  		return ast.NewNoSourceNode(r.fd.GetName())
   670  	}
   671  	return r.nodes[e].(ast.RangeDeclNode)
   672  }
   673  
   674  func (r *parseResult) getMessageReservedRangeNode(rr *dpb.DescriptorProto_ReservedRange) ast.RangeDeclNode {
   675  	if r.nodes == nil {
   676  		return ast.NewNoSourceNode(r.fd.GetName())
   677  	}
   678  	return r.nodes[rr].(ast.RangeDeclNode)
   679  }
   680  
   681  func (r *parseResult) getEnumNode(e *dpb.EnumDescriptorProto) ast.Node {
   682  	if r.nodes == nil {
   683  		return ast.NewNoSourceNode(r.fd.GetName())
   684  	}
   685  	return r.nodes[e]
   686  }
   687  
   688  func (r *parseResult) getEnumValueNode(e *dpb.EnumValueDescriptorProto) ast.EnumValueDeclNode {
   689  	if r.nodes == nil {
   690  		return ast.NewNoSourceNode(r.fd.GetName())
   691  	}
   692  	return r.nodes[e].(ast.EnumValueDeclNode)
   693  }
   694  
   695  func (r *parseResult) getEnumReservedRangeNode(rr *dpb.EnumDescriptorProto_EnumReservedRange) ast.RangeDeclNode {
   696  	if r.nodes == nil {
   697  		return ast.NewNoSourceNode(r.fd.GetName())
   698  	}
   699  	return r.nodes[rr].(ast.RangeDeclNode)
   700  }
   701  
   702  func (r *parseResult) getMethodNode(m *dpb.MethodDescriptorProto) ast.RPCDeclNode {
   703  	if r.nodes == nil {
   704  		return ast.NewNoSourceNode(r.fd.GetName())
   705  	}
   706  	return r.nodes[m].(ast.RPCDeclNode)
   707  }
   708  
   709  func (r *parseResult) putFileNode(f *dpb.FileDescriptorProto, n *ast.FileNode) {
   710  	r.nodes[f] = n
   711  }
   712  
   713  func (r *parseResult) putOptionNode(o *dpb.UninterpretedOption, n *ast.OptionNode) {
   714  	r.nodes[o] = n
   715  }
   716  
   717  func (r *parseResult) putOptionNamePartNode(o *dpb.UninterpretedOption_NamePart, n *ast.FieldReferenceNode) {
   718  	r.nodes[o] = n
   719  }
   720  
   721  func (r *parseResult) putMessageNode(m *dpb.DescriptorProto, n ast.MessageDeclNode) {
   722  	r.nodes[m] = n
   723  }
   724  
   725  func (r *parseResult) putFieldNode(f *dpb.FieldDescriptorProto, n ast.FieldDeclNode) {
   726  	r.nodes[f] = n
   727  }
   728  
   729  func (r *parseResult) putOneOfNode(o *dpb.OneofDescriptorProto, n *ast.OneOfNode) {
   730  	r.nodes[o] = n
   731  }
   732  
   733  func (r *parseResult) putExtensionRangeNode(e *dpb.DescriptorProto_ExtensionRange, n *ast.RangeNode) {
   734  	r.nodes[e] = n
   735  }
   736  
   737  func (r *parseResult) putMessageReservedRangeNode(rr *dpb.DescriptorProto_ReservedRange, n *ast.RangeNode) {
   738  	r.nodes[rr] = n
   739  }
   740  
   741  func (r *parseResult) putEnumNode(e *dpb.EnumDescriptorProto, n *ast.EnumNode) {
   742  	r.nodes[e] = n
   743  }
   744  
   745  func (r *parseResult) putEnumValueNode(e *dpb.EnumValueDescriptorProto, n *ast.EnumValueNode) {
   746  	r.nodes[e] = n
   747  }
   748  
   749  func (r *parseResult) putEnumReservedRangeNode(rr *dpb.EnumDescriptorProto_EnumReservedRange, n *ast.RangeNode) {
   750  	r.nodes[rr] = n
   751  }
   752  
   753  func (r *parseResult) putServiceNode(s *dpb.ServiceDescriptorProto, n *ast.ServiceNode) {
   754  	r.nodes[s] = n
   755  }
   756  
   757  func (r *parseResult) putMethodNode(m *dpb.MethodDescriptorProto, n *ast.RPCNode) {
   758  	r.nodes[m] = n
   759  }
   760  
   761  func parseProto(filename string, r io.Reader, errs *errorHandler, validate, createProtos bool) *parseResult {
   762  	beforeErrs := errs.errsReported
   763  	lx := newLexer(r, filename, errs)
   764  	protoParse(lx)
   765  	if lx.res == nil || len(lx.res.Children()) == 0 {
   766  		// nil AST means there was an error that prevented any parsing
   767  		// or the file was empty; synthesize empty non-nil AST
   768  		lx.res = ast.NewEmptyFileNode(filename)
   769  	}
   770  	if lx.eof != nil {
   771  		lx.res.FinalComments = lx.eof.LeadingComments()
   772  		lx.res.FinalWhitespace = lx.eof.LeadingWhitespace()
   773  	}
   774  	res := createParseResult(filename, lx.res, errs, createProtos)
   775  	if validate && errs.err == nil {
   776  		validateBasic(res, errs.errsReported > beforeErrs)
   777  	}
   778  
   779  	return res
   780  }
   781  
   782  func createParseResult(filename string, file *ast.FileNode, errs *errorHandler, createProtos bool) *parseResult {
   783  	res := &parseResult{
   784  		errs:               errs,
   785  		root:               file,
   786  		nodes:              map[proto.Message]ast.Node{},
   787  		interpretedOptions: map[*ast.OptionNode][]int32{},
   788  	}
   789  	if createProtos {
   790  		res.createFileDescriptor(filename, file)
   791  	}
   792  	return res
   793  }
   794  
   795  func checkTag(pos *SourcePos, v uint64, maxTag int32) error {
   796  	if v < 1 {
   797  		return errorWithPos(pos, "tag number %d must be greater than zero", v)
   798  	} else if v > uint64(maxTag) {
   799  		return errorWithPos(pos, "tag number %d is higher than max allowed tag number (%d)", v, maxTag)
   800  	} else if v >= internal.SpecialReservedStart && v <= internal.SpecialReservedEnd {
   801  		return errorWithPos(pos, "tag number %d is in disallowed reserved range %d-%d", v, internal.SpecialReservedStart, internal.SpecialReservedEnd)
   802  	}
   803  	return nil
   804  }
   805  
   806  func checkExtensionsInFile(fd *desc.FileDescriptor, res *parseResult) error {
   807  	for _, fld := range fd.GetExtensions() {
   808  		if err := checkExtension(fld, res); err != nil {
   809  			return err
   810  		}
   811  	}
   812  	for _, md := range fd.GetMessageTypes() {
   813  		if err := checkExtensionsInMessage(md, res); err != nil {
   814  			return err
   815  		}
   816  	}
   817  	return nil
   818  }
   819  
   820  func checkExtensionsInMessage(md *desc.MessageDescriptor, res *parseResult) error {
   821  	for _, fld := range md.GetNestedExtensions() {
   822  		if err := checkExtension(fld, res); err != nil {
   823  			return err
   824  		}
   825  	}
   826  	for _, nmd := range md.GetNestedMessageTypes() {
   827  		if err := checkExtensionsInMessage(nmd, res); err != nil {
   828  			return err
   829  		}
   830  	}
   831  	return nil
   832  }
   833  
   834  func checkExtension(fld *desc.FieldDescriptor, res *parseResult) error {
   835  	// NB: It's a little gross that we don't enforce these in validateBasic().
   836  	// But requires some minimal linking to resolve the extendee, so we can
   837  	// interrogate its descriptor.
   838  	if fld.GetOwner().GetMessageOptions().GetMessageSetWireFormat() {
   839  		// Message set wire format requires that all extensions be messages
   840  		// themselves (no scalar extensions)
   841  		if fld.GetType() != dpb.FieldDescriptorProto_TYPE_MESSAGE {
   842  			pos := res.getFieldNode(fld.AsFieldDescriptorProto()).FieldType().Start()
   843  			return errorWithPos(pos, "messages with message-set wire format cannot contain scalar extensions, only messages")
   844  		}
   845  	} else {
   846  		// In validateBasic() we just made sure these were within bounds for any message. But
   847  		// now that things are linked, we can check if the extendee is messageset wire format
   848  		// and, if not, enforce tighter limit.
   849  		if fld.GetNumber() > internal.MaxNormalTag {
   850  			pos := res.getFieldNode(fld.AsFieldDescriptorProto()).FieldTag().Start()
   851  			return errorWithPos(pos, "tag number %d is higher than max allowed tag number (%d)", fld.GetNumber(), internal.MaxNormalTag)
   852  		}
   853  	}
   854  
   855  	return nil
   856  }
   857  
   858  func aggToString(agg []*ast.MessageFieldNode, buf *bytes.Buffer) {
   859  	buf.WriteString("{")
   860  	for _, a := range agg {
   861  		buf.WriteString(" ")
   862  		buf.WriteString(a.Name.Value())
   863  		if v, ok := a.Val.(*ast.MessageLiteralNode); ok {
   864  			aggToString(v.Elements, buf)
   865  		} else {
   866  			buf.WriteString(": ")
   867  			elementToString(a.Val.Value(), buf)
   868  		}
   869  	}
   870  	buf.WriteString(" }")
   871  }
   872  
   873  func elementToString(v interface{}, buf *bytes.Buffer) {
   874  	switch v := v.(type) {
   875  	case bool, int64, uint64, ast.Identifier:
   876  		_, _ = fmt.Fprintf(buf, "%v", v)
   877  	case float64:
   878  		if math.IsInf(v, 1) {
   879  			buf.WriteString(": inf")
   880  		} else if math.IsInf(v, -1) {
   881  			buf.WriteString(": -inf")
   882  		} else if math.IsNaN(v) {
   883  			buf.WriteString(": nan")
   884  		} else {
   885  			_, _ = fmt.Fprintf(buf, ": %v", v)
   886  		}
   887  	case string:
   888  		buf.WriteRune('"')
   889  		writeEscapedBytes(buf, []byte(v))
   890  		buf.WriteRune('"')
   891  	case []ast.ValueNode:
   892  		buf.WriteString(": [")
   893  		first := true
   894  		for _, e := range v {
   895  			if first {
   896  				first = false
   897  			} else {
   898  				buf.WriteString(", ")
   899  			}
   900  			elementToString(e.Value(), buf)
   901  		}
   902  		buf.WriteString("]")
   903  	case []*ast.MessageFieldNode:
   904  		aggToString(v, buf)
   905  	}
   906  }
   907  
   908  func writeEscapedBytes(buf *bytes.Buffer, b []byte) {
   909  	for _, c := range b {
   910  		switch c {
   911  		case '\n':
   912  			buf.WriteString("\\n")
   913  		case '\r':
   914  			buf.WriteString("\\r")
   915  		case '\t':
   916  			buf.WriteString("\\t")
   917  		case '"':
   918  			buf.WriteString("\\\"")
   919  		case '\'':
   920  			buf.WriteString("\\'")
   921  		case '\\':
   922  			buf.WriteString("\\\\")
   923  		default:
   924  			if c >= 0x20 && c <= 0x7f && c != '"' && c != '\\' {
   925  				// simple printable characters
   926  				buf.WriteByte(c)
   927  			} else {
   928  				// use octal escape for all other values
   929  				buf.WriteRune('\\')
   930  				buf.WriteByte('0' + ((c >> 6) & 0x7))
   931  				buf.WriteByte('0' + ((c >> 3) & 0x7))
   932  				buf.WriteByte('0' + (c & 0x7))
   933  			}
   934  		}
   935  	}
   936  }