github.com/syumai/protoreflect@v1.7.1-0.20200810020253-2ac7e3b3a321/desc/protoparse/parser.go (about) 1 package protoparse 2 3 import ( 4 "bytes" 5 "errors" 6 "fmt" 7 "io" 8 "io/ioutil" 9 "math" 10 "os" 11 "path/filepath" 12 "sort" 13 "strings" 14 15 "github.com/golang/protobuf/proto" 16 dpb "github.com/golang/protobuf/protoc-gen-go/descriptor" 17 18 "github.com/syumai/protoreflect/desc" 19 "github.com/syumai/protoreflect/desc/internal" 20 ) 21 22 //go:generate goyacc -o proto.y.go -p proto proto.y 23 24 func init() { 25 protoErrorVerbose = true 26 27 // fix up the generated "token name" array so that error messages are nicer 28 setTokenName(_STRING_LIT, "string literal") 29 setTokenName(_INT_LIT, "int literal") 30 setTokenName(_FLOAT_LIT, "float literal") 31 setTokenName(_NAME, "identifier") 32 setTokenName(_ERROR, "error") 33 // for keywords, just show the keyword itself wrapped in quotes 34 for str, i := range keywords { 35 setTokenName(i, fmt.Sprintf(`"%s"`, str)) 36 } 37 } 38 39 func setTokenName(token int, text string) { 40 // NB: this is based on logic in generated parse code that translates the 41 // int returned from the lexer into an internal token number. 42 var intern int 43 if token < len(protoTok1) { 44 intern = protoTok1[token] 45 } else { 46 if token >= protoPrivate { 47 if token < protoPrivate+len(protoTok2) { 48 intern = protoTok2[token-protoPrivate] 49 } 50 } 51 if intern == 0 { 52 for i := 0; i+1 < len(protoTok3); i += 2 { 53 if protoTok3[i] == token { 54 intern = protoTok3[i+1] 55 break 56 } 57 } 58 } 59 } 60 61 if intern >= 1 && intern-1 < len(protoToknames) { 62 protoToknames[intern-1] = text 63 return 64 } 65 66 panic(fmt.Sprintf("Unknown token value: %d", token)) 67 } 68 69 // FileAccessor is an abstraction for opening proto source files. It takes the 70 // name of the file to open and returns either the input reader or an error. 71 type FileAccessor func(filename string) (io.ReadCloser, error) 72 73 // FileContentsFromMap returns a FileAccessor that uses the given map of file 74 // contents. This allows proto source files to be constructed in memory and 75 // easily supplied to a parser. The map keys are the paths to the proto source 76 // files, and the values are the actual proto source contents. 77 func FileContentsFromMap(files map[string]string) FileAccessor { 78 return func(filename string) (io.ReadCloser, error) { 79 contents, ok := files[filename] 80 if !ok { 81 return nil, os.ErrNotExist 82 } 83 return ioutil.NopCloser(strings.NewReader(contents)), nil 84 } 85 } 86 87 // Parser parses proto source into descriptors. 88 type Parser struct { 89 // The paths used to search for dependencies that are referenced in import 90 // statements in proto source files. If no import paths are provided then 91 // "." (current directory) is assumed to be the only import path. 92 // 93 // This setting is only used during ParseFiles operations. Since calls to 94 // ParseFilesButDoNotLink do not link, there is no need to load and parse 95 // dependencies. 96 ImportPaths []string 97 98 // If true, the supplied file names/paths need not necessarily match how the 99 // files are referenced in import statements. The parser will attempt to 100 // match import statements to supplied paths, "guessing" the import paths 101 // for the files. Note that this inference is not perfect and link errors 102 // could result. It works best when all proto files are organized such that 103 // a single import path can be inferred (e.g. all files under a single tree 104 // with import statements all being relative to the root of this tree). 105 InferImportPaths bool 106 107 // LookupImport is a function that accepts a filename and 108 // returns a file descriptor, which will be consulted when resolving imports. 109 // This allows a compiled Go proto in another Go module to be referenced 110 // in the proto(s) being parsed. 111 // 112 // In the event of a filename collision, Accessor is consulted first, 113 // then LookupImport is consulted, and finally the well-known protos 114 // are used. 115 // 116 // For example, in order to automatically look up compiled Go protos that 117 // have been imported and be able to use them as imports, set this to 118 // desc.LoadFileDescriptor. 119 LookupImport func(string) (*desc.FileDescriptor, error) 120 121 // LookupImportProto has the same functionality as LookupImport, however it returns 122 // a FileDescriptorProto instead of a FileDescriptor. 123 // 124 // It is an error to set both LookupImport and LookupImportProto. 125 LookupImportProto func(string) (*dpb.FileDescriptorProto, error) 126 127 // Used to create a reader for a given filename, when loading proto source 128 // file contents. If unset, os.Open is used. If ImportPaths is also empty 129 // then relative paths are will be relative to the process's current working 130 // directory. 131 Accessor FileAccessor 132 133 // If true, the resulting file descriptors will retain source code info, 134 // that maps elements to their location in the source files as well as 135 // includes comments found during parsing (and attributed to elements of 136 // the source file). 137 IncludeSourceCodeInfo bool 138 139 // If true, the results from ParseFilesButDoNotLink will be passed through 140 // some additional validations. But only constraints that do not require 141 // linking can be checked. These include proto2 vs. proto3 language features, 142 // looking for incorrect usage of reserved names or tags, and ensuring that 143 // fields have unique tags and that enum values have unique numbers (unless 144 // the enum allows aliases). 145 ValidateUnlinkedFiles bool 146 147 // If true, the results from ParseFilesButDoNotLink will have options 148 // interpreted. Any uninterpretable options (including any custom options or 149 // options that refer to message and enum types, which can only be 150 // interpreted after linking) will be left in uninterpreted_options. Also, 151 // the "default" pseudo-option for fields can only be interpreted for scalar 152 // fields, excluding enums. (Interpreting default values for enum fields 153 // requires resolving enum names, which requires linking.) 154 InterpretOptionsInUnlinkedFiles bool 155 156 // A custom reporter of syntax and link errors. If not specified, the 157 // default reporter just returns the reported error, which causes parsing 158 // to abort after encountering a single error. 159 // 160 // The reporter is not invoked for system or I/O errors, only for syntax and 161 // link errors. 162 ErrorReporter ErrorReporter 163 164 // A custom reporter of warnings. If not specified, warning messages are ignored. 165 WarningReporter WarningReporter 166 } 167 168 // ParseFiles parses the named files into descriptors. The returned slice has 169 // the same number of entries as the give filenames, in the same order. So the 170 // first returned descriptor corresponds to the first given name, and so on. 171 // 172 // All dependencies for all specified files (including transitive dependencies) 173 // must be accessible via the parser's Accessor or a link error will occur. The 174 // exception to this rule is that files can import standard Google-provided 175 // files -- e.g. google/protobuf/*.proto -- without needing to supply sources 176 // for these files. Like protoc, this parser has a built-in version of these 177 // files it can use if they aren't explicitly supplied. 178 // 179 // If the Parser has no ErrorReporter set and a syntax or link error occurs, 180 // parsing will abort with the first such error encountered. If there is an 181 // ErrorReporter configured and it returns non-nil, parsing will abort with the 182 // error it returns. If syntax or link errors are encountered but the configured 183 // ErrorReporter always returns nil, the parse fails with ErrInvalidSource. 184 func (p Parser) ParseFiles(filenames ...string) ([]*desc.FileDescriptor, error) { 185 accessor := p.Accessor 186 if accessor == nil { 187 accessor = func(name string) (io.ReadCloser, error) { 188 return os.Open(name) 189 } 190 } 191 paths := p.ImportPaths 192 if len(paths) > 0 { 193 acc := accessor 194 accessor = func(name string) (io.ReadCloser, error) { 195 var ret error 196 for _, path := range paths { 197 f, err := acc(filepath.Join(path, name)) 198 if err != nil { 199 if ret == nil { 200 ret = err 201 } 202 continue 203 } 204 return f, nil 205 } 206 return nil, ret 207 } 208 } 209 lookupImport, err := p.getLookupImport() 210 if err != nil { 211 return nil, err 212 } 213 214 protos := map[string]*parseResult{} 215 results := &parseResults{resultsByFilename: protos} 216 errs := newErrorHandler(p.ErrorReporter, p.WarningReporter) 217 parseProtoFiles(accessor, filenames, errs, true, true, results, lookupImport) 218 if err := errs.getError(); err != nil { 219 return nil, err 220 } 221 if p.InferImportPaths { 222 // TODO: if this re-writes one of the names in filenames, lookups below will break 223 protos = fixupFilenames(protos) 224 } 225 linkedProtos, err := newLinker(results, errs).linkFiles() 226 if err != nil { 227 return nil, err 228 } 229 if p.IncludeSourceCodeInfo { 230 for name, fd := range linkedProtos { 231 pr := protos[name] 232 fd.AsFileDescriptorProto().SourceCodeInfo = pr.generateSourceCodeInfo() 233 internal.RecomputeSourceInfo(fd) 234 } 235 } 236 fds := make([]*desc.FileDescriptor, len(filenames)) 237 for i, name := range filenames { 238 fd := linkedProtos[name] 239 fds[i] = fd 240 } 241 return fds, nil 242 } 243 244 // ParseFilesButDoNotLink parses the named files into descriptor protos. The 245 // results are just protos, not fully-linked descriptors. It is possible that 246 // descriptors are invalid and still be returned in parsed form without error 247 // due to the fact that the linking step is skipped (and thus many validation 248 // steps omitted). 249 // 250 // There are a few side effects to not linking the descriptors: 251 // 1. No options will be interpreted. Options can refer to extensions or have 252 // message and enum types. Without linking, these extension and type 253 // references are not resolved, so the options may not be interpretable. 254 // So all options will appear in UninterpretedOption fields of the various 255 // descriptor options messages. 256 // 2. Type references will not be resolved. This means that the actual type 257 // names in the descriptors may be unqualified and even relative to the 258 // scope in which the type reference appears. This goes for fields that 259 // have message and enum types. It also applies to methods and their 260 // references to request and response message types. 261 // 3. Enum fields are not known. Until a field's type reference is resolved 262 // (during linking), it is not known whether the type refers to a message 263 // or an enum. So all fields with such type references have their Type set 264 // to TYPE_MESSAGE. 265 // 266 // This method will still validate the syntax of parsed files. If the parser's 267 // ValidateUnlinkedFiles field is true, additional checks, beyond syntax will 268 // also be performed. 269 // 270 // If the Parser has no ErrorReporter set and a syntax or link error occurs, 271 // parsing will abort with the first such error encountered. If there is an 272 // ErrorReporter configured and it returns non-nil, parsing will abort with the 273 // error it returns. If syntax or link errors are encountered but the configured 274 // ErrorReporter always returns nil, the parse fails with ErrInvalidSource. 275 func (p Parser) ParseFilesButDoNotLink(filenames ...string) ([]*dpb.FileDescriptorProto, error) { 276 accessor := p.Accessor 277 if accessor == nil { 278 accessor = func(name string) (io.ReadCloser, error) { 279 return os.Open(name) 280 } 281 } 282 lookupImport, err := p.getLookupImport() 283 if err != nil { 284 return nil, err 285 } 286 287 protos := map[string]*parseResult{} 288 errs := newErrorHandler(p.ErrorReporter, p.WarningReporter) 289 parseProtoFiles(accessor, filenames, errs, false, p.ValidateUnlinkedFiles, &parseResults{resultsByFilename: protos}, lookupImport) 290 if err := errs.getError(); err != nil { 291 return nil, err 292 } 293 if p.InferImportPaths { 294 // TODO: if this re-writes one of the names in filenames, lookups below will break 295 protos = fixupFilenames(protos) 296 } 297 fds := make([]*dpb.FileDescriptorProto, len(filenames)) 298 for i, name := range filenames { 299 pr := protos[name] 300 fd := pr.fd 301 if p.InterpretOptionsInUnlinkedFiles { 302 // parsing options will be best effort 303 pr.lenient = true 304 // we don't want the real error reporter see any errors 305 pr.errs.errReporter = func(err ErrorWithPos) error { 306 return err 307 } 308 _ = interpretFileOptions(pr, poorFileDescriptorish{FileDescriptorProto: fd}) 309 } 310 if p.IncludeSourceCodeInfo { 311 fd.SourceCodeInfo = pr.generateSourceCodeInfo() 312 } 313 fds[i] = fd 314 } 315 return fds, nil 316 } 317 318 func (p Parser) getLookupImport() (func(string) (*dpb.FileDescriptorProto, error), error) { 319 if p.LookupImport != nil && p.LookupImportProto != nil { 320 return nil, ErrLookupImportAndProtoSet 321 } 322 if p.LookupImportProto != nil { 323 return p.LookupImportProto, nil 324 } 325 if p.LookupImport != nil { 326 return func(path string) (*dpb.FileDescriptorProto, error) { 327 value, err := p.LookupImport(path) 328 if value != nil { 329 return value.AsFileDescriptorProto(), err 330 } 331 return nil, err 332 }, nil 333 } 334 return nil, nil 335 } 336 337 func fixupFilenames(protos map[string]*parseResult) map[string]*parseResult { 338 // In the event that the given filenames (keys in the supplied map) do not 339 // match the actual paths used in 'import' statements in the files, we try 340 // to revise names in the protos so that they will match and be linkable. 341 revisedProtos := map[string]*parseResult{} 342 343 protoPaths := map[string]struct{}{} 344 // TODO: this is O(n^2) but could likely be O(n) with a clever data structure (prefix tree that is indexed backwards?) 345 importCandidates := map[string]map[string]struct{}{} 346 candidatesAvailable := map[string]struct{}{} 347 for name := range protos { 348 candidatesAvailable[name] = struct{}{} 349 for _, f := range protos { 350 for _, imp := range f.fd.Dependency { 351 if strings.HasSuffix(name, imp) { 352 candidates := importCandidates[imp] 353 if candidates == nil { 354 candidates = map[string]struct{}{} 355 importCandidates[imp] = candidates 356 } 357 candidates[name] = struct{}{} 358 } 359 } 360 } 361 } 362 for imp, candidates := range importCandidates { 363 // if we found multiple possible candidates, use the one that is an exact match 364 // if it exists, and otherwise, guess that it's the shortest path (fewest elements) 365 var best string 366 for c := range candidates { 367 if _, ok := candidatesAvailable[c]; !ok { 368 // already used this candidate and re-written its filename accordingly 369 continue 370 } 371 if c == imp { 372 // exact match! 373 best = c 374 break 375 } 376 if best == "" { 377 best = c 378 } else { 379 // HACK: we can't actually tell which files is supposed to match 380 // this import, so arbitrarily pick the "shorter" one (fewest 381 // path elements) or, on a tie, the lexically earlier one 382 minLen := strings.Count(best, string(filepath.Separator)) 383 cLen := strings.Count(c, string(filepath.Separator)) 384 if cLen < minLen || (cLen == minLen && c < best) { 385 best = c 386 } 387 } 388 } 389 if best != "" { 390 prefix := best[:len(best)-len(imp)] 391 if len(prefix) > 0 { 392 protoPaths[prefix] = struct{}{} 393 } 394 f := protos[best] 395 f.fd.Name = proto.String(imp) 396 revisedProtos[imp] = f 397 delete(candidatesAvailable, best) 398 } 399 } 400 401 if len(candidatesAvailable) == 0 { 402 return revisedProtos 403 } 404 405 if len(protoPaths) == 0 { 406 for c := range candidatesAvailable { 407 revisedProtos[c] = protos[c] 408 } 409 return revisedProtos 410 } 411 412 // Any remaining candidates are entry-points (not imported by others), so 413 // the best bet to "fixing" their file name is to see if they're in one of 414 // the proto paths we found, and if so strip that prefix. 415 protoPathStrs := make([]string, len(protoPaths)) 416 i := 0 417 for p := range protoPaths { 418 protoPathStrs[i] = p 419 i++ 420 } 421 sort.Strings(protoPathStrs) 422 // we look at paths in reverse order, so we'll use a longer proto path if 423 // there is more than one match 424 for c := range candidatesAvailable { 425 var imp string 426 for i := len(protoPathStrs) - 1; i >= 0; i-- { 427 p := protoPathStrs[i] 428 if strings.HasPrefix(c, p) { 429 imp = c[len(p):] 430 break 431 } 432 } 433 if imp != "" { 434 f := protos[c] 435 f.fd.Name = proto.String(imp) 436 revisedProtos[imp] = f 437 } else { 438 revisedProtos[c] = protos[c] 439 } 440 } 441 442 return revisedProtos 443 } 444 445 func parseProtoFiles(acc FileAccessor, filenames []string, errs *errorHandler, recursive, validate bool, parsed *parseResults, lookupImport func(string) (*dpb.FileDescriptorProto, error)) { 446 for _, name := range filenames { 447 parseProtoFile(acc, name, nil, errs, recursive, validate, parsed, lookupImport) 448 if errs.err != nil { 449 return 450 } 451 } 452 } 453 454 func parseProtoFile(acc FileAccessor, filename string, importLoc *SourcePos, errs *errorHandler, recursive, validate bool, parsed *parseResults, lookupImport func(string) (*dpb.FileDescriptorProto, error)) { 455 if parsed.has(filename) { 456 return 457 } 458 if lookupImport == nil { 459 lookupImport = func(string) (*dpb.FileDescriptorProto, error) { 460 return nil, errors.New("no import lookup function") 461 } 462 } 463 in, err := acc(filename) 464 var result *parseResult 465 if err == nil { 466 // try to parse the bytes accessed 467 func() { 468 defer func() { 469 // if we've already parsed contents, an error 470 // closing need not fail this operation 471 _ = in.Close() 472 }() 473 result = parseProto(filename, in, errs, validate) 474 }() 475 } else if d, lookupErr := lookupImport(filename); lookupErr == nil { 476 // This is a user-provided descriptor, which is acting similarly to a 477 // well-known import. 478 result = &parseResult{fd: proto.Clone(d).(*dpb.FileDescriptorProto)} 479 } else if d, ok := standardImports[filename]; ok { 480 // it's a well-known import 481 // (we clone it to make sure we're not sharing state with other 482 // parsers, which could result in unsafe races if multiple 483 // parsers are trying to access it concurrently) 484 result = &parseResult{fd: proto.Clone(d).(*dpb.FileDescriptorProto)} 485 } else { 486 if !strings.Contains(err.Error(), filename) { 487 // an error message that doesn't indicate the file is awful! 488 // this cannot be %w as this is not compatible with go <= 1.13 489 err = errorWithFilename{ 490 underlying: err, 491 filename: filename, 492 } 493 } 494 // The top-level loop in parseProtoFiles calls this with nil for the top-level files 495 // importLoc is only for imports, otherwise we do not want to return a ErrorWithSourcePos 496 // ErrorWithSourcePos should always have a non-nil SourcePos 497 if importLoc != nil { 498 // associate the error with the import line 499 err = ErrorWithSourcePos{ 500 Pos: importLoc, 501 Underlying: err, 502 } 503 } 504 _ = errs.handleError(err) 505 return 506 } 507 508 parsed.add(filename, result) 509 510 if errs.err != nil { 511 return // abort 512 } 513 514 if recursive { 515 fd := result.fd 516 decl := result.getFileNode(fd) 517 fnode, ok := decl.(*fileNode) 518 if !ok { 519 // no AST for this file? use imports in descriptor 520 for _, dep := range fd.Dependency { 521 parseProtoFile(acc, dep, decl.start(), errs, true, validate, parsed, lookupImport) 522 if errs.getError() != nil { 523 return // abort 524 } 525 } 526 return 527 } 528 // we have an AST; use it so we can report import location in errors 529 for _, dep := range fnode.imports { 530 parseProtoFile(acc, dep.name.val, dep.name.start(), errs, true, validate, parsed, lookupImport) 531 if errs.getError() != nil { 532 return // abort 533 } 534 } 535 } 536 } 537 538 type parseResults struct { 539 resultsByFilename map[string]*parseResult 540 filenames []string 541 } 542 543 func (r *parseResults) has(filename string) bool { 544 _, ok := r.resultsByFilename[filename] 545 return ok 546 } 547 548 func (r *parseResults) add(filename string, result *parseResult) { 549 r.resultsByFilename[filename] = result 550 r.filenames = append(r.filenames, filename) 551 } 552 553 type parseResult struct { 554 // handles any errors encountered during parsing, construction of file descriptor, 555 // or validation 556 errs *errorHandler 557 558 // the parsed file descriptor 559 fd *dpb.FileDescriptorProto 560 561 // if set to true, enables lenient interpretation of options, where 562 // unrecognized options will be left uninterpreted instead of resulting in a 563 // link error 564 lenient bool 565 566 // a map of elements in the descriptor to nodes in the AST 567 // (for extracting position information when validating the descriptor) 568 nodes map[proto.Message]node 569 570 // a map of uninterpreted option AST nodes to their relative path 571 // in the resulting options message 572 interpretedOptions map[*optionNode][]int32 573 } 574 575 func (r *parseResult) getFileNode(f *dpb.FileDescriptorProto) fileDecl { 576 if r.nodes == nil { 577 return noSourceNode{pos: unknownPos(f.GetName())} 578 } 579 return r.nodes[f].(fileDecl) 580 } 581 582 func (r *parseResult) getOptionNode(o *dpb.UninterpretedOption) optionDecl { 583 if r.nodes == nil { 584 return noSourceNode{pos: unknownPos(r.fd.GetName())} 585 } 586 return r.nodes[o].(optionDecl) 587 } 588 589 func (r *parseResult) getOptionNamePartNode(o *dpb.UninterpretedOption_NamePart) node { 590 if r.nodes == nil { 591 return noSourceNode{pos: unknownPos(r.fd.GetName())} 592 } 593 return r.nodes[o] 594 } 595 596 func (r *parseResult) getFieldNode(f *dpb.FieldDescriptorProto) fieldDecl { 597 if r.nodes == nil { 598 return noSourceNode{pos: unknownPos(r.fd.GetName())} 599 } 600 return r.nodes[f].(fieldDecl) 601 } 602 603 func (r *parseResult) getExtensionRangeNode(e *dpb.DescriptorProto_ExtensionRange) rangeDecl { 604 if r.nodes == nil { 605 return noSourceNode{pos: unknownPos(r.fd.GetName())} 606 } 607 return r.nodes[e].(rangeDecl) 608 } 609 610 func (r *parseResult) getMessageReservedRangeNode(rr *dpb.DescriptorProto_ReservedRange) rangeDecl { 611 if r.nodes == nil { 612 return noSourceNode{pos: unknownPos(r.fd.GetName())} 613 } 614 return r.nodes[rr].(rangeDecl) 615 } 616 617 func (r *parseResult) getEnumNode(e *dpb.EnumDescriptorProto) node { 618 if r.nodes == nil { 619 return noSourceNode{pos: unknownPos(r.fd.GetName())} 620 } 621 return r.nodes[e] 622 } 623 624 func (r *parseResult) getEnumValueNode(e *dpb.EnumValueDescriptorProto) enumValueDecl { 625 if r.nodes == nil { 626 return noSourceNode{pos: unknownPos(r.fd.GetName())} 627 } 628 return r.nodes[e].(enumValueDecl) 629 } 630 631 func (r *parseResult) getEnumReservedRangeNode(rr *dpb.EnumDescriptorProto_EnumReservedRange) rangeDecl { 632 if r.nodes == nil { 633 return noSourceNode{pos: unknownPos(r.fd.GetName())} 634 } 635 return r.nodes[rr].(rangeDecl) 636 } 637 638 func (r *parseResult) getMethodNode(m *dpb.MethodDescriptorProto) methodDecl { 639 if r.nodes == nil { 640 return noSourceNode{pos: unknownPos(r.fd.GetName())} 641 } 642 return r.nodes[m].(methodDecl) 643 } 644 645 func (r *parseResult) putFileNode(f *dpb.FileDescriptorProto, n *fileNode) { 646 r.nodes[f] = n 647 } 648 649 func (r *parseResult) putOptionNode(o *dpb.UninterpretedOption, n *optionNode) { 650 r.nodes[o] = n 651 } 652 653 func (r *parseResult) putOptionNamePartNode(o *dpb.UninterpretedOption_NamePart, n *optionNamePartNode) { 654 r.nodes[o] = n 655 } 656 657 func (r *parseResult) putMessageNode(m *dpb.DescriptorProto, n msgDecl) { 658 r.nodes[m] = n 659 } 660 661 func (r *parseResult) putFieldNode(f *dpb.FieldDescriptorProto, n fieldDecl) { 662 r.nodes[f] = n 663 } 664 665 func (r *parseResult) putOneOfNode(o *dpb.OneofDescriptorProto, n *oneOfNode) { 666 r.nodes[o] = n 667 } 668 669 func (r *parseResult) putExtensionRangeNode(e *dpb.DescriptorProto_ExtensionRange, n *rangeNode) { 670 r.nodes[e] = n 671 } 672 673 func (r *parseResult) putMessageReservedRangeNode(rr *dpb.DescriptorProto_ReservedRange, n *rangeNode) { 674 r.nodes[rr] = n 675 } 676 677 func (r *parseResult) putEnumNode(e *dpb.EnumDescriptorProto, n *enumNode) { 678 r.nodes[e] = n 679 } 680 681 func (r *parseResult) putEnumValueNode(e *dpb.EnumValueDescriptorProto, n *enumValueNode) { 682 r.nodes[e] = n 683 } 684 685 func (r *parseResult) putEnumReservedRangeNode(rr *dpb.EnumDescriptorProto_EnumReservedRange, n *rangeNode) { 686 r.nodes[rr] = n 687 } 688 689 func (r *parseResult) putServiceNode(s *dpb.ServiceDescriptorProto, n *serviceNode) { 690 r.nodes[s] = n 691 } 692 693 func (r *parseResult) putMethodNode(m *dpb.MethodDescriptorProto, n *methodNode) { 694 r.nodes[m] = n 695 } 696 697 func parseProto(filename string, r io.Reader, errs *errorHandler, validate bool) *parseResult { 698 beforeErrs := errs.errsReported 699 lx := newLexer(r, filename, errs) 700 protoParse(lx) 701 702 res := createParseResult(filename, lx.res, errs) 703 if validate && errs.err == nil { 704 validateBasic(res, errs.errsReported > beforeErrs) 705 } 706 707 return res 708 } 709 710 func createParseResult(filename string, file *fileNode, errs *errorHandler) *parseResult { 711 res := &parseResult{ 712 errs: errs, 713 nodes: map[proto.Message]node{}, 714 interpretedOptions: map[*optionNode][]int32{}, 715 } 716 if file == nil { 717 // nil AST means there was an error that prevented any parsing 718 // or the file was empty; synthesize empty non-nil AST 719 file = &fileNode{} 720 } 721 if file.first == nil { 722 n := noSourceNode{pos: unknownPos(filename)} 723 file.setRange(&n, &n) 724 } 725 res.createFileDescriptor(filename, file) 726 return res 727 } 728 729 func toNameParts(ident *compoundIdentNode) []*optionNamePartNode { 730 parts := strings.Split(ident.val, ".") 731 ret := make([]*optionNamePartNode, len(parts)) 732 offset := 0 733 for i, p := range parts { 734 ret[i] = &optionNamePartNode{text: ident, offset: offset, length: len(p)} 735 ret[i].setRange(ident, ident) 736 offset += len(p) + 1 737 } 738 return ret 739 } 740 741 func checkTag(pos *SourcePos, v uint64, maxTag int32) error { 742 if v < 1 { 743 return errorWithPos(pos, "tag number %d must be greater than zero", v) 744 } else if v > uint64(maxTag) { 745 return errorWithPos(pos, "tag number %d is higher than max allowed tag number (%d)", v, maxTag) 746 } else if v >= internal.SpecialReservedStart && v <= internal.SpecialReservedEnd { 747 return errorWithPos(pos, "tag number %d is in disallowed reserved range %d-%d", v, internal.SpecialReservedStart, internal.SpecialReservedEnd) 748 } 749 return nil 750 } 751 752 func checkExtensionTagsInFile(fd *desc.FileDescriptor, res *parseResult) error { 753 for _, fld := range fd.GetExtensions() { 754 if err := checkExtensionTag(fld, res); err != nil { 755 return err 756 } 757 } 758 for _, md := range fd.GetMessageTypes() { 759 if err := checkExtensionTagsInMessage(md, res); err != nil { 760 return err 761 } 762 } 763 return nil 764 } 765 766 func checkExtensionTagsInMessage(md *desc.MessageDescriptor, res *parseResult) error { 767 for _, fld := range md.GetNestedExtensions() { 768 if err := checkExtensionTag(fld, res); err != nil { 769 return err 770 } 771 } 772 for _, nmd := range md.GetNestedMessageTypes() { 773 if err := checkExtensionTagsInMessage(nmd, res); err != nil { 774 return err 775 } 776 } 777 return nil 778 } 779 780 func checkExtensionTag(fld *desc.FieldDescriptor, res *parseResult) error { 781 // NB: This is kind of gross that we don't enforce this in validateBasic(). But it would 782 // require doing some minimal linking there (to identify the extendee and locate its 783 // descriptor). To keep the code simpler, we just wait until things are fully linked. 784 785 // In validateBasic() we just made sure these were within bounds for any message. But 786 // now that things are linked, we can check if the extendee is messageset wire format 787 // and, if not, enforce tighter limit. 788 if !fld.GetOwner().GetMessageOptions().GetMessageSetWireFormat() && fld.GetNumber() > internal.MaxNormalTag { 789 pos := res.nodes[fld.AsFieldDescriptorProto()].(fieldDecl).fieldTag().start() 790 return errorWithPos(pos, "tag number %d is higher than max allowed tag number (%d)", fld.GetNumber(), internal.MaxNormalTag) 791 } 792 return nil 793 } 794 795 func aggToString(agg []*aggregateEntryNode, buf *bytes.Buffer) { 796 buf.WriteString("{") 797 for _, a := range agg { 798 buf.WriteString(" ") 799 buf.WriteString(a.name.value()) 800 if v, ok := a.val.(*aggregateLiteralNode); ok { 801 aggToString(v.elements, buf) 802 } else { 803 buf.WriteString(": ") 804 elementToString(a.val.value(), buf) 805 } 806 } 807 buf.WriteString(" }") 808 } 809 810 func elementToString(v interface{}, buf *bytes.Buffer) { 811 switch v := v.(type) { 812 case bool, int64, uint64, identifier: 813 _, _ = fmt.Fprintf(buf, "%v", v) 814 case float64: 815 if math.IsInf(v, 1) { 816 buf.WriteString(": inf") 817 } else if math.IsInf(v, -1) { 818 buf.WriteString(": -inf") 819 } else if math.IsNaN(v) { 820 buf.WriteString(": nan") 821 } else { 822 _, _ = fmt.Fprintf(buf, ": %v", v) 823 } 824 case string: 825 buf.WriteRune('"') 826 writeEscapedBytes(buf, []byte(v)) 827 buf.WriteRune('"') 828 case []valueNode: 829 buf.WriteString(": [") 830 first := true 831 for _, e := range v { 832 if first { 833 first = false 834 } else { 835 buf.WriteString(", ") 836 } 837 elementToString(e.value(), buf) 838 } 839 buf.WriteString("]") 840 case []*aggregateEntryNode: 841 aggToString(v, buf) 842 } 843 } 844 845 func writeEscapedBytes(buf *bytes.Buffer, b []byte) { 846 for _, c := range b { 847 switch c { 848 case '\n': 849 buf.WriteString("\\n") 850 case '\r': 851 buf.WriteString("\\r") 852 case '\t': 853 buf.WriteString("\\t") 854 case '"': 855 buf.WriteString("\\\"") 856 case '\'': 857 buf.WriteString("\\'") 858 case '\\': 859 buf.WriteString("\\\\") 860 default: 861 if c >= 0x20 && c <= 0x7f && c != '"' && c != '\\' { 862 // simple printable characters 863 buf.WriteByte(c) 864 } else { 865 // use octal escape for all other values 866 buf.WriteRune('\\') 867 buf.WriteByte('0' + ((c >> 6) & 0x7)) 868 buf.WriteByte('0' + ((c >> 3) & 0x7)) 869 buf.WriteByte('0' + (c & 0x7)) 870 } 871 } 872 } 873 }