github.com/jhump/protoreflect@v1.16.0/dynamic/text.go (about) 1 package dynamic 2 3 // Marshalling and unmarshalling of dynamic messages to/from proto's standard text format 4 5 import ( 6 "bytes" 7 "fmt" 8 "io" 9 "math" 10 "reflect" 11 "sort" 12 "strconv" 13 "strings" 14 "text/scanner" 15 "unicode" 16 17 "github.com/golang/protobuf/proto" 18 "google.golang.org/protobuf/types/descriptorpb" 19 20 "github.com/jhump/protoreflect/codec" 21 "github.com/jhump/protoreflect/desc" 22 ) 23 24 // MarshalText serializes this message to bytes in the standard text format, 25 // returning an error if the operation fails. The resulting bytes will be a 26 // valid UTF8 string. 27 // 28 // This method uses a compact form: no newlines, and spaces between field 29 // identifiers and values are elided. 30 func (m *Message) MarshalText() ([]byte, error) { 31 var b indentBuffer 32 b.indentCount = -1 // no indentation 33 if err := m.marshalText(&b); err != nil { 34 return nil, err 35 } 36 return b.Bytes(), nil 37 } 38 39 // MarshalTextIndent serializes this message to bytes in the standard text 40 // format, returning an error if the operation fails. The resulting bytes will 41 // be a valid UTF8 string. 42 // 43 // This method uses a "pretty-printed" form, with each field on its own line and 44 // spaces between field identifiers and values. 45 func (m *Message) MarshalTextIndent() ([]byte, error) { 46 var b indentBuffer 47 b.indent = " " // TODO: option for indent? 48 if err := m.marshalText(&b); err != nil { 49 return nil, err 50 } 51 return b.Bytes(), nil 52 } 53 54 func (m *Message) marshalText(b *indentBuffer) error { 55 // TODO: option for emitting extended Any format? 56 first := true 57 // first the known fields 58 for _, tag := range m.knownFieldTags() { 59 itag := int32(tag) 60 v := m.values[itag] 61 fd := m.FindFieldDescriptor(itag) 62 if fd.IsMap() { 63 md := fd.GetMessageType() 64 kfd := md.FindFieldByNumber(1) 65 vfd := md.FindFieldByNumber(2) 66 mp := v.(map[interface{}]interface{}) 67 keys := make([]interface{}, 0, len(mp)) 68 for k := range mp { 69 keys = append(keys, k) 70 } 71 sort.Sort(sortable(keys)) 72 for _, mk := range keys { 73 mv := mp[mk] 74 err := b.maybeNext(&first) 75 if err != nil { 76 return err 77 } 78 err = marshalKnownFieldMapEntryText(b, fd, kfd, mk, vfd, mv) 79 if err != nil { 80 return err 81 } 82 } 83 } else if fd.IsRepeated() { 84 sl := v.([]interface{}) 85 for _, slv := range sl { 86 err := b.maybeNext(&first) 87 if err != nil { 88 return err 89 } 90 err = marshalKnownFieldText(b, fd, slv) 91 if err != nil { 92 return err 93 } 94 } 95 } else { 96 err := b.maybeNext(&first) 97 if err != nil { 98 return err 99 } 100 err = marshalKnownFieldText(b, fd, v) 101 if err != nil { 102 return err 103 } 104 } 105 } 106 // then the unknown fields 107 for _, tag := range m.unknownFieldTags() { 108 itag := int32(tag) 109 ufs := m.unknownFields[itag] 110 for _, uf := range ufs { 111 err := b.maybeNext(&first) 112 if err != nil { 113 return err 114 } 115 _, err = fmt.Fprintf(b, "%d", tag) 116 if err != nil { 117 return err 118 } 119 if uf.Encoding == proto.WireStartGroup { 120 err = b.WriteByte('{') 121 if err != nil { 122 return err 123 } 124 err = b.start() 125 if err != nil { 126 return err 127 } 128 in := codec.NewBuffer(uf.Contents) 129 err = marshalUnknownGroupText(b, in, true) 130 if err != nil { 131 return err 132 } 133 err = b.end() 134 if err != nil { 135 return err 136 } 137 err = b.WriteByte('}') 138 if err != nil { 139 return err 140 } 141 } else { 142 err = b.sep() 143 if err != nil { 144 return err 145 } 146 if uf.Encoding == proto.WireBytes { 147 err = writeString(b, string(uf.Contents)) 148 if err != nil { 149 return err 150 } 151 } else { 152 _, err = b.WriteString(strconv.FormatUint(uf.Value, 10)) 153 if err != nil { 154 return err 155 } 156 } 157 } 158 } 159 } 160 return nil 161 } 162 163 func marshalKnownFieldMapEntryText(b *indentBuffer, fd *desc.FieldDescriptor, kfd *desc.FieldDescriptor, mk interface{}, vfd *desc.FieldDescriptor, mv interface{}) error { 164 var name string 165 if fd.IsExtension() { 166 name = fmt.Sprintf("[%s]", fd.GetFullyQualifiedName()) 167 } else { 168 name = fd.GetName() 169 } 170 _, err := b.WriteString(name) 171 if err != nil { 172 return err 173 } 174 err = b.sep() 175 if err != nil { 176 return err 177 } 178 179 err = b.WriteByte('<') 180 if err != nil { 181 return err 182 } 183 err = b.start() 184 if err != nil { 185 return err 186 } 187 188 err = marshalKnownFieldText(b, kfd, mk) 189 if err != nil { 190 return err 191 } 192 err = b.next() 193 if err != nil { 194 return err 195 } 196 if !isNil(mv) { 197 err = marshalKnownFieldText(b, vfd, mv) 198 if err != nil { 199 return err 200 } 201 } 202 203 err = b.end() 204 if err != nil { 205 return err 206 } 207 return b.WriteByte('>') 208 } 209 210 func marshalKnownFieldText(b *indentBuffer, fd *desc.FieldDescriptor, v interface{}) error { 211 group := fd.GetType() == descriptorpb.FieldDescriptorProto_TYPE_GROUP 212 if group { 213 var name string 214 if fd.IsExtension() { 215 name = fmt.Sprintf("[%s]", fd.GetMessageType().GetFullyQualifiedName()) 216 } else { 217 name = fd.GetMessageType().GetName() 218 } 219 _, err := b.WriteString(name) 220 if err != nil { 221 return err 222 } 223 } else { 224 var name string 225 if fd.IsExtension() { 226 name = fmt.Sprintf("[%s]", fd.GetFullyQualifiedName()) 227 } else { 228 name = fd.GetName() 229 } 230 _, err := b.WriteString(name) 231 if err != nil { 232 return err 233 } 234 err = b.sep() 235 if err != nil { 236 return err 237 } 238 } 239 rv := reflect.ValueOf(v) 240 switch rv.Kind() { 241 case reflect.Int32, reflect.Int64: 242 ed := fd.GetEnumType() 243 if ed != nil { 244 n := int32(rv.Int()) 245 vd := ed.FindValueByNumber(n) 246 if vd == nil { 247 _, err := b.WriteString(strconv.FormatInt(rv.Int(), 10)) 248 return err 249 } else { 250 _, err := b.WriteString(vd.GetName()) 251 return err 252 } 253 } else { 254 _, err := b.WriteString(strconv.FormatInt(rv.Int(), 10)) 255 return err 256 } 257 case reflect.Uint32, reflect.Uint64: 258 _, err := b.WriteString(strconv.FormatUint(rv.Uint(), 10)) 259 return err 260 case reflect.Float32, reflect.Float64: 261 f := rv.Float() 262 var str string 263 if math.IsNaN(f) { 264 str = "nan" 265 } else if math.IsInf(f, 1) { 266 str = "inf" 267 } else if math.IsInf(f, -1) { 268 str = "-inf" 269 } else { 270 var bits int 271 if rv.Kind() == reflect.Float32 { 272 bits = 32 273 } else { 274 bits = 64 275 } 276 str = strconv.FormatFloat(rv.Float(), 'g', -1, bits) 277 } 278 _, err := b.WriteString(str) 279 return err 280 case reflect.Bool: 281 _, err := b.WriteString(strconv.FormatBool(rv.Bool())) 282 return err 283 case reflect.Slice: 284 return writeString(b, string(rv.Bytes())) 285 case reflect.String: 286 return writeString(b, rv.String()) 287 default: 288 var err error 289 if group { 290 err = b.WriteByte('{') 291 } else { 292 err = b.WriteByte('<') 293 } 294 if err != nil { 295 return err 296 } 297 err = b.start() 298 if err != nil { 299 return err 300 } 301 // must be a message 302 if dm, ok := v.(*Message); ok { 303 err = dm.marshalText(b) 304 if err != nil { 305 return err 306 } 307 } else { 308 err = proto.CompactText(b, v.(proto.Message)) 309 if err != nil { 310 return err 311 } 312 } 313 err = b.end() 314 if err != nil { 315 return err 316 } 317 if group { 318 return b.WriteByte('}') 319 } else { 320 return b.WriteByte('>') 321 } 322 } 323 } 324 325 // writeString writes a string in the protocol buffer text format. 326 // It is similar to strconv.Quote except we don't use Go escape sequences, 327 // we treat the string as a byte sequence, and we use octal escapes. 328 // These differences are to maintain interoperability with the other 329 // languages' implementations of the text format. 330 func writeString(b *indentBuffer, s string) error { 331 // use WriteByte here to get any needed indent 332 if err := b.WriteByte('"'); err != nil { 333 return err 334 } 335 // Loop over the bytes, not the runes. 336 for i := 0; i < len(s); i++ { 337 var err error 338 // Divergence from C++: we don't escape apostrophes. 339 // There's no need to escape them, and the C++ parser 340 // copes with a naked apostrophe. 341 switch c := s[i]; c { 342 case '\n': 343 _, err = b.WriteString("\\n") 344 case '\r': 345 _, err = b.WriteString("\\r") 346 case '\t': 347 _, err = b.WriteString("\\t") 348 case '"': 349 _, err = b.WriteString("\\\"") 350 case '\\': 351 _, err = b.WriteString("\\\\") 352 default: 353 if c >= 0x20 && c < 0x7f { 354 err = b.WriteByte(c) 355 } else { 356 _, err = fmt.Fprintf(b, "\\%03o", c) 357 } 358 } 359 if err != nil { 360 return err 361 } 362 } 363 return b.WriteByte('"') 364 } 365 366 func marshalUnknownGroupText(b *indentBuffer, in *codec.Buffer, topLevel bool) error { 367 first := true 368 for { 369 if in.EOF() { 370 if topLevel { 371 return nil 372 } 373 // this is a nested message: we are expecting an end-group tag, not EOF! 374 return io.ErrUnexpectedEOF 375 } 376 tag, wireType, err := in.DecodeTagAndWireType() 377 if err != nil { 378 return err 379 } 380 if wireType == proto.WireEndGroup { 381 return nil 382 } 383 err = b.maybeNext(&first) 384 if err != nil { 385 return err 386 } 387 _, err = fmt.Fprintf(b, "%d", tag) 388 if err != nil { 389 return err 390 } 391 if wireType == proto.WireStartGroup { 392 err = b.WriteByte('{') 393 if err != nil { 394 return err 395 } 396 err = b.start() 397 if err != nil { 398 return err 399 } 400 err = marshalUnknownGroupText(b, in, false) 401 if err != nil { 402 return err 403 } 404 err = b.end() 405 if err != nil { 406 return err 407 } 408 err = b.WriteByte('}') 409 if err != nil { 410 return err 411 } 412 continue 413 } else { 414 err = b.sep() 415 if err != nil { 416 return err 417 } 418 if wireType == proto.WireBytes { 419 contents, err := in.DecodeRawBytes(false) 420 if err != nil { 421 return err 422 } 423 err = writeString(b, string(contents)) 424 if err != nil { 425 return err 426 } 427 } else { 428 var v uint64 429 switch wireType { 430 case proto.WireVarint: 431 v, err = in.DecodeVarint() 432 case proto.WireFixed32: 433 v, err = in.DecodeFixed32() 434 case proto.WireFixed64: 435 v, err = in.DecodeFixed64() 436 default: 437 return proto.ErrInternalBadWireType 438 } 439 if err != nil { 440 return err 441 } 442 _, err = b.WriteString(strconv.FormatUint(v, 10)) 443 if err != nil { 444 return err 445 } 446 } 447 } 448 } 449 } 450 451 // UnmarshalText de-serializes the message that is present, in text format, in 452 // the given bytes into this message. It first resets the current message. It 453 // returns an error if the given bytes do not contain a valid encoding of this 454 // message type in the standard text format 455 func (m *Message) UnmarshalText(text []byte) error { 456 m.Reset() 457 if err := m.UnmarshalMergeText(text); err != nil { 458 return err 459 } 460 return m.Validate() 461 } 462 463 // UnmarshalMergeText de-serializes the message that is present, in text format, 464 // in the given bytes into this message. Unlike UnmarshalText, it does not first 465 // reset the message, instead merging the data in the given bytes into the 466 // existing data in this message. 467 func (m *Message) UnmarshalMergeText(text []byte) error { 468 return m.unmarshalText(newReader(text), tokenEOF) 469 } 470 471 func (m *Message) unmarshalText(tr *txtReader, end tokenType) error { 472 for { 473 tok := tr.next() 474 if tok.tokTyp == end { 475 return nil 476 } 477 if tok.tokTyp == tokenEOF { 478 return io.ErrUnexpectedEOF 479 } 480 var fd *desc.FieldDescriptor 481 var extendedAnyType *desc.MessageDescriptor 482 if tok.tokTyp == tokenInt { 483 // tag number (indicates unknown field) 484 tag, err := strconv.ParseInt(tok.val.(string), 10, 32) 485 if err != nil { 486 return err 487 } 488 itag := int32(tag) 489 fd = m.FindFieldDescriptor(itag) 490 if fd == nil { 491 // can't parse the value w/out field descriptor, so skip it 492 tok = tr.next() 493 if tok.tokTyp == tokenEOF { 494 return io.ErrUnexpectedEOF 495 } else if tok.tokTyp == tokenOpenBrace { 496 if err := skipMessageText(tr, true); err != nil { 497 return err 498 } 499 } else if tok.tokTyp == tokenColon { 500 if err := skipFieldValueText(tr); err != nil { 501 return err 502 } 503 } else { 504 return textError(tok, "Expecting a colon ':' or brace '{'; instead got %q", tok.txt) 505 } 506 tok = tr.peek() 507 if tok.tokTyp.IsSep() { 508 tr.next() // consume separator 509 } 510 continue 511 } 512 } else { 513 fieldName, err := unmarshalFieldNameText(tr, tok) 514 if err != nil { 515 return err 516 } 517 fd = m.FindFieldDescriptorByName(fieldName) 518 if fd == nil { 519 // See if it's a group name 520 for _, field := range m.md.GetFields() { 521 if field.GetType() == descriptorpb.FieldDescriptorProto_TYPE_GROUP && field.GetMessageType().GetName() == fieldName { 522 fd = field 523 break 524 } 525 } 526 if fd == nil { 527 // maybe this is an extended Any 528 if m.md.GetFullyQualifiedName() == "google.protobuf.Any" && fieldName[0] == '[' && strings.Contains(fieldName, "/") { 529 // strip surrounding "[" and "]" and extract type name from URL 530 typeUrl := fieldName[1 : len(fieldName)-1] 531 mname := typeUrl 532 if slash := strings.LastIndex(mname, "/"); slash >= 0 { 533 mname = mname[slash+1:] 534 } 535 // TODO: add a way to weave an AnyResolver to this point 536 extendedAnyType = findMessageDescriptor(mname, m.md.GetFile()) 537 if extendedAnyType == nil { 538 return textError(tok, "could not parse Any with unknown type URL %q", fieldName) 539 } 540 // field 1 is "type_url" 541 typeUrlField := m.md.FindFieldByNumber(1) 542 if err := m.TrySetField(typeUrlField, typeUrl); err != nil { 543 return err 544 } 545 } else { 546 // TODO: add a flag to just ignore unrecognized field names 547 return textError(tok, "%q is not a recognized field name of %q", fieldName, m.md.GetFullyQualifiedName()) 548 } 549 } 550 } 551 } 552 tok = tr.next() 553 if tok.tokTyp == tokenEOF { 554 return io.ErrUnexpectedEOF 555 } 556 if extendedAnyType != nil { 557 // consume optional colon; make sure this is a "start message" token 558 if tok.tokTyp == tokenColon { 559 tok = tr.next() 560 if tok.tokTyp == tokenEOF { 561 return io.ErrUnexpectedEOF 562 } 563 } 564 if tok.tokTyp.EndToken() == tokenError { 565 return textError(tok, "Expecting a '<' or '{'; instead got %q", tok.txt) 566 } 567 568 // TODO: use mf.NewMessage and, if not a dynamic message, use proto.UnmarshalText to unmarshal it 569 g := m.mf.NewDynamicMessage(extendedAnyType) 570 if err := g.unmarshalText(tr, tok.tokTyp.EndToken()); err != nil { 571 return err 572 } 573 // now we marshal the message to bytes and store in the Any 574 b, err := g.Marshal() 575 if err != nil { 576 return err 577 } 578 // field 2 is "value" 579 anyValueField := m.md.FindFieldByNumber(2) 580 if err := m.TrySetField(anyValueField, b); err != nil { 581 return err 582 } 583 584 } else if (fd.GetType() == descriptorpb.FieldDescriptorProto_TYPE_GROUP || 585 fd.GetType() == descriptorpb.FieldDescriptorProto_TYPE_MESSAGE) && 586 tok.tokTyp.EndToken() != tokenError { 587 588 // TODO: use mf.NewMessage and, if not a dynamic message, use proto.UnmarshalText to unmarshal it 589 g := m.mf.NewDynamicMessage(fd.GetMessageType()) 590 if err := g.unmarshalText(tr, tok.tokTyp.EndToken()); err != nil { 591 return err 592 } 593 if fd.IsRepeated() { 594 if err := m.TryAddRepeatedField(fd, g); err != nil { 595 return err 596 } 597 } else { 598 if err := m.TrySetField(fd, g); err != nil { 599 return err 600 } 601 } 602 } else { 603 if tok.tokTyp != tokenColon { 604 return textError(tok, "Expecting a colon ':'; instead got %q", tok.txt) 605 } 606 if err := m.unmarshalFieldValueText(fd, tr); err != nil { 607 return err 608 } 609 } 610 tok = tr.peek() 611 if tok.tokTyp.IsSep() { 612 tr.next() // consume separator 613 } 614 } 615 } 616 func findMessageDescriptor(name string, fd *desc.FileDescriptor) *desc.MessageDescriptor { 617 md := findMessageInTransitiveDeps(name, fd, map[*desc.FileDescriptor]struct{}{}) 618 if md == nil { 619 // couldn't find it; see if we have this message linked in 620 md, _ = desc.LoadMessageDescriptor(name) 621 } 622 return md 623 } 624 625 func findMessageInTransitiveDeps(name string, fd *desc.FileDescriptor, seen map[*desc.FileDescriptor]struct{}) *desc.MessageDescriptor { 626 if _, ok := seen[fd]; ok { 627 // already checked this file 628 return nil 629 } 630 seen[fd] = struct{}{} 631 md := fd.FindMessage(name) 632 if md != nil { 633 return md 634 } 635 // not in this file so recursively search its deps 636 for _, dep := range fd.GetDependencies() { 637 md = findMessageInTransitiveDeps(name, dep, seen) 638 if md != nil { 639 return md 640 } 641 } 642 // couldn't find it 643 return nil 644 } 645 646 func textError(tok *token, format string, args ...interface{}) error { 647 var msg string 648 if tok.tokTyp == tokenError { 649 msg = tok.val.(error).Error() 650 } else { 651 msg = fmt.Sprintf(format, args...) 652 } 653 return fmt.Errorf("line %d, col %d: %s", tok.pos.Line, tok.pos.Column, msg) 654 } 655 656 type setFunction func(*Message, *desc.FieldDescriptor, interface{}) error 657 658 func (m *Message) unmarshalFieldValueText(fd *desc.FieldDescriptor, tr *txtReader) error { 659 var set setFunction 660 if fd.IsRepeated() { 661 set = (*Message).addRepeatedField 662 } else { 663 set = mergeField 664 } 665 tok := tr.peek() 666 if tok.tokTyp == tokenOpenBracket { 667 tr.next() // consume tok 668 for { 669 if err := m.unmarshalFieldElementText(fd, tr, set); err != nil { 670 return err 671 } 672 tok = tr.peek() 673 if tok.tokTyp == tokenCloseBracket { 674 tr.next() // consume tok 675 return nil 676 } else if tok.tokTyp.IsSep() { 677 tr.next() // consume separator 678 } 679 } 680 } 681 return m.unmarshalFieldElementText(fd, tr, set) 682 } 683 684 func (m *Message) unmarshalFieldElementText(fd *desc.FieldDescriptor, tr *txtReader, set setFunction) error { 685 tok := tr.next() 686 if tok.tokTyp == tokenEOF { 687 return io.ErrUnexpectedEOF 688 } 689 690 var expected string 691 switch fd.GetType() { 692 case descriptorpb.FieldDescriptorProto_TYPE_BOOL: 693 if tok.tokTyp == tokenIdent { 694 if tok.val.(string) == "true" { 695 return set(m, fd, true) 696 } else if tok.val.(string) == "false" { 697 return set(m, fd, false) 698 } 699 } 700 expected = "boolean value" 701 case descriptorpb.FieldDescriptorProto_TYPE_BYTES: 702 if tok.tokTyp == tokenString { 703 return set(m, fd, []byte(tok.val.(string))) 704 } 705 expected = "bytes string value" 706 case descriptorpb.FieldDescriptorProto_TYPE_STRING: 707 if tok.tokTyp == tokenString { 708 return set(m, fd, tok.val) 709 } 710 expected = "string value" 711 case descriptorpb.FieldDescriptorProto_TYPE_FLOAT: 712 switch tok.tokTyp { 713 case tokenFloat: 714 return set(m, fd, float32(tok.val.(float64))) 715 case tokenInt: 716 if f, err := strconv.ParseFloat(tok.val.(string), 32); err != nil { 717 return err 718 } else { 719 return set(m, fd, float32(f)) 720 } 721 case tokenIdent: 722 ident := strings.ToLower(tok.val.(string)) 723 if ident == "inf" { 724 return set(m, fd, float32(math.Inf(1))) 725 } else if ident == "nan" { 726 return set(m, fd, float32(math.NaN())) 727 } 728 case tokenMinus: 729 peeked := tr.peek() 730 if peeked.tokTyp == tokenIdent { 731 ident := strings.ToLower(peeked.val.(string)) 732 if ident == "inf" { 733 tr.next() // consume peeked token 734 return set(m, fd, float32(math.Inf(-1))) 735 } 736 } 737 } 738 expected = "float value" 739 case descriptorpb.FieldDescriptorProto_TYPE_DOUBLE: 740 switch tok.tokTyp { 741 case tokenFloat: 742 return set(m, fd, tok.val) 743 case tokenInt: 744 if f, err := strconv.ParseFloat(tok.val.(string), 64); err != nil { 745 return err 746 } else { 747 return set(m, fd, f) 748 } 749 case tokenIdent: 750 ident := strings.ToLower(tok.val.(string)) 751 if ident == "inf" { 752 return set(m, fd, math.Inf(1)) 753 } else if ident == "nan" { 754 return set(m, fd, math.NaN()) 755 } 756 case tokenMinus: 757 peeked := tr.peek() 758 if peeked.tokTyp == tokenIdent { 759 ident := strings.ToLower(peeked.val.(string)) 760 if ident == "inf" { 761 tr.next() // consume peeked token 762 return set(m, fd, math.Inf(-1)) 763 } 764 } 765 } 766 expected = "float value" 767 case descriptorpb.FieldDescriptorProto_TYPE_INT32, 768 descriptorpb.FieldDescriptorProto_TYPE_SINT32, 769 descriptorpb.FieldDescriptorProto_TYPE_SFIXED32: 770 if tok.tokTyp == tokenInt { 771 if i, err := strconv.ParseInt(tok.val.(string), 10, 32); err != nil { 772 return err 773 } else { 774 return set(m, fd, int32(i)) 775 } 776 } 777 expected = "int value" 778 case descriptorpb.FieldDescriptorProto_TYPE_INT64, 779 descriptorpb.FieldDescriptorProto_TYPE_SINT64, 780 descriptorpb.FieldDescriptorProto_TYPE_SFIXED64: 781 if tok.tokTyp == tokenInt { 782 if i, err := strconv.ParseInt(tok.val.(string), 10, 64); err != nil { 783 return err 784 } else { 785 return set(m, fd, i) 786 } 787 } 788 expected = "int value" 789 case descriptorpb.FieldDescriptorProto_TYPE_UINT32, 790 descriptorpb.FieldDescriptorProto_TYPE_FIXED32: 791 if tok.tokTyp == tokenInt { 792 if i, err := strconv.ParseUint(tok.val.(string), 10, 32); err != nil { 793 return err 794 } else { 795 return set(m, fd, uint32(i)) 796 } 797 } 798 expected = "unsigned int value" 799 case descriptorpb.FieldDescriptorProto_TYPE_UINT64, 800 descriptorpb.FieldDescriptorProto_TYPE_FIXED64: 801 if tok.tokTyp == tokenInt { 802 if i, err := strconv.ParseUint(tok.val.(string), 10, 64); err != nil { 803 return err 804 } else { 805 return set(m, fd, i) 806 } 807 } 808 expected = "unsigned int value" 809 case descriptorpb.FieldDescriptorProto_TYPE_ENUM: 810 if tok.tokTyp == tokenIdent { 811 // TODO: add a flag to just ignore unrecognized enum value names? 812 vd := fd.GetEnumType().FindValueByName(tok.val.(string)) 813 if vd != nil { 814 return set(m, fd, vd.GetNumber()) 815 } 816 } else if tok.tokTyp == tokenInt { 817 if i, err := strconv.ParseInt(tok.val.(string), 10, 32); err != nil { 818 return err 819 } else { 820 return set(m, fd, int32(i)) 821 } 822 } 823 expected = fmt.Sprintf("enum %s value", fd.GetEnumType().GetFullyQualifiedName()) 824 case descriptorpb.FieldDescriptorProto_TYPE_MESSAGE, 825 descriptorpb.FieldDescriptorProto_TYPE_GROUP: 826 827 endTok := tok.tokTyp.EndToken() 828 if endTok != tokenError { 829 dm := m.mf.NewDynamicMessage(fd.GetMessageType()) 830 if err := dm.unmarshalText(tr, endTok); err != nil { 831 return err 832 } 833 // TODO: ideally we would use mf.NewMessage and, if not a dynamic message, use 834 // proto package to unmarshal it. But the text parser isn't particularly amenable 835 // to that, so we instead convert a dynamic message to a generated one if the 836 // known-type registry knows about the generated type... 837 var ktr *KnownTypeRegistry 838 if m.mf != nil { 839 ktr = m.mf.ktr 840 } 841 pm := ktr.CreateIfKnown(fd.GetMessageType().GetFullyQualifiedName()) 842 if pm != nil { 843 if err := dm.ConvertTo(pm); err != nil { 844 return set(m, fd, pm) 845 } 846 } 847 return set(m, fd, dm) 848 } 849 expected = fmt.Sprintf("message %s value", fd.GetMessageType().GetFullyQualifiedName()) 850 default: 851 return fmt.Errorf("field %q of message %q has unrecognized type: %v", fd.GetFullyQualifiedName(), m.md.GetFullyQualifiedName(), fd.GetType()) 852 } 853 854 // if we get here, token was wrong type; create error message 855 var article string 856 if strings.Contains("aieou", expected[0:1]) { 857 article = "an" 858 } else { 859 article = "a" 860 } 861 return textError(tok, "Expecting %s %s; got %q", article, expected, tok.txt) 862 } 863 864 func unmarshalFieldNameText(tr *txtReader, tok *token) (string, error) { 865 if tok.tokTyp == tokenOpenBracket || tok.tokTyp == tokenOpenParen { 866 // extension name 867 var closeType tokenType 868 var closeChar string 869 if tok.tokTyp == tokenOpenBracket { 870 closeType = tokenCloseBracket 871 closeChar = "close bracket ']'" 872 } else { 873 closeType = tokenCloseParen 874 closeChar = "close paren ')'" 875 } 876 // must be followed by an identifier 877 idents := make([]string, 0, 1) 878 for { 879 tok = tr.next() 880 if tok.tokTyp == tokenEOF { 881 return "", io.ErrUnexpectedEOF 882 } else if tok.tokTyp != tokenIdent { 883 return "", textError(tok, "Expecting an identifier; instead got %q", tok.txt) 884 } 885 idents = append(idents, tok.val.(string)) 886 // and then close bracket/paren, or "/" to keep adding URL elements to name 887 tok = tr.next() 888 if tok.tokTyp == tokenEOF { 889 return "", io.ErrUnexpectedEOF 890 } else if tok.tokTyp == closeType { 891 break 892 } else if tok.tokTyp != tokenSlash { 893 return "", textError(tok, "Expecting a %s; instead got %q", closeChar, tok.txt) 894 } 895 } 896 return "[" + strings.Join(idents, "/") + "]", nil 897 } else if tok.tokTyp == tokenIdent { 898 // normal field name 899 return tok.val.(string), nil 900 } else { 901 return "", textError(tok, "Expecting an identifier or tag number; instead got %q", tok.txt) 902 } 903 } 904 905 func skipFieldNameText(tr *txtReader) error { 906 tok := tr.next() 907 if tok.tokTyp == tokenEOF { 908 return io.ErrUnexpectedEOF 909 } else if tok.tokTyp == tokenInt || tok.tokTyp == tokenIdent { 910 return nil 911 } else { 912 _, err := unmarshalFieldNameText(tr, tok) 913 return err 914 } 915 } 916 917 func skipFieldValueText(tr *txtReader) error { 918 tok := tr.peek() 919 if tok.tokTyp == tokenOpenBracket { 920 tr.next() // consume tok 921 for { 922 if err := skipFieldElementText(tr); err != nil { 923 return err 924 } 925 tok = tr.peek() 926 if tok.tokTyp == tokenCloseBracket { 927 tr.next() // consume tok 928 return nil 929 } else if tok.tokTyp.IsSep() { 930 tr.next() // consume separator 931 } 932 933 } 934 } 935 return skipFieldElementText(tr) 936 } 937 938 func skipFieldElementText(tr *txtReader) error { 939 tok := tr.next() 940 switch tok.tokTyp { 941 case tokenEOF: 942 return io.ErrUnexpectedEOF 943 case tokenInt, tokenFloat, tokenString, tokenIdent: 944 return nil 945 case tokenOpenAngle: 946 return skipMessageText(tr, false) 947 default: 948 return textError(tok, "Expecting an angle bracket '<' or a value; instead got %q", tok.txt) 949 } 950 } 951 952 func skipMessageText(tr *txtReader, isGroup bool) error { 953 for { 954 tok := tr.peek() 955 if tok.tokTyp == tokenEOF { 956 return io.ErrUnexpectedEOF 957 } else if isGroup && tok.tokTyp == tokenCloseBrace { 958 return nil 959 } else if !isGroup && tok.tokTyp == tokenCloseAngle { 960 return nil 961 } 962 963 // field name or tag 964 if err := skipFieldNameText(tr); err != nil { 965 return err 966 } 967 968 // field value 969 tok = tr.next() 970 if tok.tokTyp == tokenEOF { 971 return io.ErrUnexpectedEOF 972 } else if tok.tokTyp == tokenOpenBrace { 973 if err := skipMessageText(tr, true); err != nil { 974 return err 975 } 976 } else if tok.tokTyp == tokenColon { 977 if err := skipFieldValueText(tr); err != nil { 978 return err 979 } 980 } else { 981 return textError(tok, "Expecting a colon ':' or brace '{'; instead got %q", tok.txt) 982 } 983 984 tok = tr.peek() 985 if tok.tokTyp.IsSep() { 986 tr.next() // consume separator 987 } 988 } 989 } 990 991 type tokenType int 992 993 const ( 994 tokenError tokenType = iota 995 tokenEOF 996 tokenIdent 997 tokenString 998 tokenInt 999 tokenFloat 1000 tokenColon 1001 tokenComma 1002 tokenSemiColon 1003 tokenOpenBrace 1004 tokenCloseBrace 1005 tokenOpenBracket 1006 tokenCloseBracket 1007 tokenOpenAngle 1008 tokenCloseAngle 1009 tokenOpenParen 1010 tokenCloseParen 1011 tokenSlash 1012 tokenMinus 1013 ) 1014 1015 func (t tokenType) IsSep() bool { 1016 return t == tokenComma || t == tokenSemiColon 1017 } 1018 1019 func (t tokenType) EndToken() tokenType { 1020 switch t { 1021 case tokenOpenAngle: 1022 return tokenCloseAngle 1023 case tokenOpenBrace: 1024 return tokenCloseBrace 1025 default: 1026 return tokenError 1027 } 1028 } 1029 1030 type token struct { 1031 tokTyp tokenType 1032 val interface{} 1033 txt string 1034 pos scanner.Position 1035 } 1036 1037 type txtReader struct { 1038 scanner scanner.Scanner 1039 peeked token 1040 havePeeked bool 1041 } 1042 1043 func newReader(text []byte) *txtReader { 1044 sc := scanner.Scanner{} 1045 sc.Init(bytes.NewReader(text)) 1046 sc.Mode = scanner.ScanIdents | scanner.ScanInts | scanner.ScanFloats | scanner.ScanChars | 1047 scanner.ScanStrings | scanner.ScanComments | scanner.SkipComments 1048 // identifiers are same restrictions as Go identifiers, except we also allow dots since 1049 // we accept fully-qualified names 1050 sc.IsIdentRune = func(ch rune, i int) bool { 1051 return ch == '_' || unicode.IsLetter(ch) || 1052 (i > 0 && unicode.IsDigit(ch)) || 1053 (i > 0 && ch == '.') 1054 } 1055 // ignore errors; we handle them if/when we see malformed tokens 1056 sc.Error = func(s *scanner.Scanner, msg string) {} 1057 return &txtReader{scanner: sc} 1058 } 1059 1060 func (p *txtReader) peek() *token { 1061 if p.havePeeked { 1062 return &p.peeked 1063 } 1064 t := p.scanner.Scan() 1065 if t == scanner.EOF { 1066 p.peeked.tokTyp = tokenEOF 1067 p.peeked.val = nil 1068 p.peeked.txt = "" 1069 p.peeked.pos = p.scanner.Position 1070 } else if err := p.processToken(t, p.scanner.TokenText(), p.scanner.Position); err != nil { 1071 p.peeked.tokTyp = tokenError 1072 p.peeked.val = err 1073 } 1074 p.havePeeked = true 1075 return &p.peeked 1076 } 1077 1078 func (p *txtReader) processToken(t rune, text string, pos scanner.Position) error { 1079 p.peeked.pos = pos 1080 p.peeked.txt = text 1081 switch t { 1082 case scanner.Ident: 1083 p.peeked.tokTyp = tokenIdent 1084 p.peeked.val = text 1085 case scanner.Int: 1086 p.peeked.tokTyp = tokenInt 1087 p.peeked.val = text // can't parse the number because we don't know if it's signed or unsigned 1088 case scanner.Float: 1089 p.peeked.tokTyp = tokenFloat 1090 var err error 1091 if p.peeked.val, err = strconv.ParseFloat(text, 64); err != nil { 1092 return err 1093 } 1094 case scanner.Char, scanner.String: 1095 p.peeked.tokTyp = tokenString 1096 var err error 1097 if p.peeked.val, err = strconv.Unquote(text); err != nil { 1098 return err 1099 } 1100 case '-': // unary minus, for negative ints and floats 1101 ch := p.scanner.Peek() 1102 if ch < '0' || ch > '9' { 1103 p.peeked.tokTyp = tokenMinus 1104 p.peeked.val = '-' 1105 } else { 1106 t := p.scanner.Scan() 1107 if t == scanner.EOF { 1108 return io.ErrUnexpectedEOF 1109 } else if t == scanner.Float { 1110 p.peeked.tokTyp = tokenFloat 1111 text += p.scanner.TokenText() 1112 p.peeked.txt = text 1113 var err error 1114 if p.peeked.val, err = strconv.ParseFloat(text, 64); err != nil { 1115 p.peeked.pos = p.scanner.Position 1116 return err 1117 } 1118 } else if t == scanner.Int { 1119 p.peeked.tokTyp = tokenInt 1120 text += p.scanner.TokenText() 1121 p.peeked.txt = text 1122 p.peeked.val = text // can't parse the number because we don't know if it's signed or unsigned 1123 } else { 1124 p.peeked.pos = p.scanner.Position 1125 return fmt.Errorf("expecting an int or float but got %q", p.scanner.TokenText()) 1126 } 1127 } 1128 case ':': 1129 p.peeked.tokTyp = tokenColon 1130 p.peeked.val = ':' 1131 case ',': 1132 p.peeked.tokTyp = tokenComma 1133 p.peeked.val = ',' 1134 case ';': 1135 p.peeked.tokTyp = tokenSemiColon 1136 p.peeked.val = ';' 1137 case '{': 1138 p.peeked.tokTyp = tokenOpenBrace 1139 p.peeked.val = '{' 1140 case '}': 1141 p.peeked.tokTyp = tokenCloseBrace 1142 p.peeked.val = '}' 1143 case '<': 1144 p.peeked.tokTyp = tokenOpenAngle 1145 p.peeked.val = '<' 1146 case '>': 1147 p.peeked.tokTyp = tokenCloseAngle 1148 p.peeked.val = '>' 1149 case '[': 1150 p.peeked.tokTyp = tokenOpenBracket 1151 p.peeked.val = '[' 1152 case ']': 1153 p.peeked.tokTyp = tokenCloseBracket 1154 p.peeked.val = ']' 1155 case '(': 1156 p.peeked.tokTyp = tokenOpenParen 1157 p.peeked.val = '(' 1158 case ')': 1159 p.peeked.tokTyp = tokenCloseParen 1160 p.peeked.val = ')' 1161 case '/': 1162 // only allowed to separate URL components in expanded Any format 1163 p.peeked.tokTyp = tokenSlash 1164 p.peeked.val = '/' 1165 default: 1166 return fmt.Errorf("invalid character: %c", t) 1167 } 1168 return nil 1169 } 1170 1171 func (p *txtReader) next() *token { 1172 t := p.peek() 1173 if t.tokTyp != tokenEOF && t.tokTyp != tokenError { 1174 p.havePeeked = false 1175 } 1176 return t 1177 }