github.com/Big-big-orange/protoreflect@v0.0.0-20240408141420-285cedfdf6a4/dynamic/text.go (about)

     1  package dynamic
     2  
     3  // Marshalling and unmarshalling of dynamic messages to/from proto's standard text format
     4  
     5  import (
     6  	"bytes"
     7  	"fmt"
     8  	"io"
     9  	"math"
    10  	"reflect"
    11  	"sort"
    12  	"strconv"
    13  	"strings"
    14  	"text/scanner"
    15  	"unicode"
    16  
    17  	"github.com/golang/protobuf/proto"
    18  	"google.golang.org/protobuf/types/descriptorpb"
    19  
    20  	"github.com/Big-big-orange/protoreflect/codec"
    21  	"github.com/Big-big-orange/protoreflect/desc"
    22  )
    23  
    24  // MarshalText serializes this message to bytes in the standard text format,
    25  // returning an error if the operation fails. The resulting bytes will be a
    26  // valid UTF8 string.
    27  //
    28  // This method uses a compact form: no newlines, and spaces between field
    29  // identifiers and values are elided.
    30  func (m *Message) MarshalText() ([]byte, error) {
    31  	var b indentBuffer
    32  	b.indentCount = -1 // no indentation
    33  	if err := m.marshalText(&b); err != nil {
    34  		return nil, err
    35  	}
    36  	return b.Bytes(), nil
    37  }
    38  
    39  // MarshalTextIndent serializes this message to bytes in the standard text
    40  // format, returning an error if the operation fails. The resulting bytes will
    41  // be a valid UTF8 string.
    42  //
    43  // This method uses a "pretty-printed" form, with each field on its own line and
    44  // spaces between field identifiers and values.
    45  func (m *Message) MarshalTextIndent() ([]byte, error) {
    46  	var b indentBuffer
    47  	b.indent = "  " // TODO: option for indent?
    48  	if err := m.marshalText(&b); err != nil {
    49  		return nil, err
    50  	}
    51  	return b.Bytes(), nil
    52  }
    53  
    54  func (m *Message) marshalText(b *indentBuffer) error {
    55  	// TODO: option for emitting extended Any format?
    56  	first := true
    57  	// first the known fields
    58  	for _, tag := range m.knownFieldTags() {
    59  		itag := int32(tag)
    60  		v := m.values[itag]
    61  		fd := m.FindFieldDescriptor(itag)
    62  		if fd.IsMap() {
    63  			md := fd.GetMessageType()
    64  			kfd := md.FindFieldByNumber(1)
    65  			vfd := md.FindFieldByNumber(2)
    66  			mp := v.(map[interface{}]interface{})
    67  			keys := make([]interface{}, 0, len(mp))
    68  			for k := range mp {
    69  				keys = append(keys, k)
    70  			}
    71  			sort.Sort(sortable(keys))
    72  			for _, mk := range keys {
    73  				mv := mp[mk]
    74  				err := b.maybeNext(&first)
    75  				if err != nil {
    76  					return err
    77  				}
    78  				err = marshalKnownFieldMapEntryText(b, fd, kfd, mk, vfd, mv)
    79  				if err != nil {
    80  					return err
    81  				}
    82  			}
    83  		} else if fd.IsRepeated() {
    84  			sl := v.([]interface{})
    85  			for _, slv := range sl {
    86  				err := b.maybeNext(&first)
    87  				if err != nil {
    88  					return err
    89  				}
    90  				err = marshalKnownFieldText(b, fd, slv)
    91  				if err != nil {
    92  					return err
    93  				}
    94  			}
    95  		} else {
    96  			err := b.maybeNext(&first)
    97  			if err != nil {
    98  				return err
    99  			}
   100  			err = marshalKnownFieldText(b, fd, v)
   101  			if err != nil {
   102  				return err
   103  			}
   104  		}
   105  	}
   106  	// then the unknown fields
   107  	for _, tag := range m.unknownFieldTags() {
   108  		itag := int32(tag)
   109  		ufs := m.unknownFields[itag]
   110  		for _, uf := range ufs {
   111  			err := b.maybeNext(&first)
   112  			if err != nil {
   113  				return err
   114  			}
   115  			_, err = fmt.Fprintf(b, "%d", tag)
   116  			if err != nil {
   117  				return err
   118  			}
   119  			if uf.Encoding == proto.WireStartGroup {
   120  				err = b.WriteByte('{')
   121  				if err != nil {
   122  					return err
   123  				}
   124  				err = b.start()
   125  				if err != nil {
   126  					return err
   127  				}
   128  				in := codec.NewBuffer(uf.Contents)
   129  				err = marshalUnknownGroupText(b, in, true)
   130  				if err != nil {
   131  					return err
   132  				}
   133  				err = b.end()
   134  				if err != nil {
   135  					return err
   136  				}
   137  				err = b.WriteByte('}')
   138  				if err != nil {
   139  					return err
   140  				}
   141  			} else {
   142  				err = b.sep()
   143  				if err != nil {
   144  					return err
   145  				}
   146  				if uf.Encoding == proto.WireBytes {
   147  					err = writeString(b, string(uf.Contents))
   148  					if err != nil {
   149  						return err
   150  					}
   151  				} else {
   152  					_, err = b.WriteString(strconv.FormatUint(uf.Value, 10))
   153  					if err != nil {
   154  						return err
   155  					}
   156  				}
   157  			}
   158  		}
   159  	}
   160  	return nil
   161  }
   162  
   163  func marshalKnownFieldMapEntryText(b *indentBuffer, fd *desc.FieldDescriptor, kfd *desc.FieldDescriptor, mk interface{}, vfd *desc.FieldDescriptor, mv interface{}) error {
   164  	var name string
   165  	if fd.IsExtension() {
   166  		name = fmt.Sprintf("[%s]", fd.GetFullyQualifiedName())
   167  	} else {
   168  		name = fd.GetName()
   169  	}
   170  	_, err := b.WriteString(name)
   171  	if err != nil {
   172  		return err
   173  	}
   174  	err = b.sep()
   175  	if err != nil {
   176  		return err
   177  	}
   178  
   179  	err = b.WriteByte('<')
   180  	if err != nil {
   181  		return err
   182  	}
   183  	err = b.start()
   184  	if err != nil {
   185  		return err
   186  	}
   187  
   188  	err = marshalKnownFieldText(b, kfd, mk)
   189  	if err != nil {
   190  		return err
   191  	}
   192  	err = b.next()
   193  	if err != nil {
   194  		return err
   195  	}
   196  	if !isNil(mv) {
   197  		err = marshalKnownFieldText(b, vfd, mv)
   198  		if err != nil {
   199  			return err
   200  		}
   201  	}
   202  
   203  	err = b.end()
   204  	if err != nil {
   205  		return err
   206  	}
   207  	return b.WriteByte('>')
   208  }
   209  
   210  func marshalKnownFieldText(b *indentBuffer, fd *desc.FieldDescriptor, v interface{}) error {
   211  	group := fd.GetType() == descriptorpb.FieldDescriptorProto_TYPE_GROUP
   212  	if group {
   213  		var name string
   214  		if fd.IsExtension() {
   215  			name = fmt.Sprintf("[%s]", fd.GetMessageType().GetFullyQualifiedName())
   216  		} else {
   217  			name = fd.GetMessageType().GetName()
   218  		}
   219  		_, err := b.WriteString(name)
   220  		if err != nil {
   221  			return err
   222  		}
   223  	} else {
   224  		var name string
   225  		if fd.IsExtension() {
   226  			name = fmt.Sprintf("[%s]", fd.GetFullyQualifiedName())
   227  		} else {
   228  			name = fd.GetName()
   229  		}
   230  		_, err := b.WriteString(name)
   231  		if err != nil {
   232  			return err
   233  		}
   234  		err = b.sep()
   235  		if err != nil {
   236  			return err
   237  		}
   238  	}
   239  	rv := reflect.ValueOf(v)
   240  	switch rv.Kind() {
   241  	case reflect.Int32, reflect.Int64:
   242  		ed := fd.GetEnumType()
   243  		if ed != nil {
   244  			n := int32(rv.Int())
   245  			vd := ed.FindValueByNumber(n)
   246  			if vd == nil {
   247  				_, err := b.WriteString(strconv.FormatInt(rv.Int(), 10))
   248  				return err
   249  			} else {
   250  				_, err := b.WriteString(vd.GetName())
   251  				return err
   252  			}
   253  		} else {
   254  			_, err := b.WriteString(strconv.FormatInt(rv.Int(), 10))
   255  			return err
   256  		}
   257  	case reflect.Uint32, reflect.Uint64:
   258  		_, err := b.WriteString(strconv.FormatUint(rv.Uint(), 10))
   259  		return err
   260  	case reflect.Float32, reflect.Float64:
   261  		f := rv.Float()
   262  		var str string
   263  		if math.IsNaN(f) {
   264  			str = "nan"
   265  		} else if math.IsInf(f, 1) {
   266  			str = "inf"
   267  		} else if math.IsInf(f, -1) {
   268  			str = "-inf"
   269  		} else {
   270  			var bits int
   271  			if rv.Kind() == reflect.Float32 {
   272  				bits = 32
   273  			} else {
   274  				bits = 64
   275  			}
   276  			str = strconv.FormatFloat(rv.Float(), 'g', -1, bits)
   277  		}
   278  		_, err := b.WriteString(str)
   279  		return err
   280  	case reflect.Bool:
   281  		_, err := b.WriteString(strconv.FormatBool(rv.Bool()))
   282  		return err
   283  	case reflect.Slice:
   284  		return writeString(b, string(rv.Bytes()))
   285  	case reflect.String:
   286  		return writeString(b, rv.String())
   287  	default:
   288  		var err error
   289  		if group {
   290  			err = b.WriteByte('{')
   291  		} else {
   292  			err = b.WriteByte('<')
   293  		}
   294  		if err != nil {
   295  			return err
   296  		}
   297  		err = b.start()
   298  		if err != nil {
   299  			return err
   300  		}
   301  		// must be a message
   302  		if dm, ok := v.(*Message); ok {
   303  			err = dm.marshalText(b)
   304  			if err != nil {
   305  				return err
   306  			}
   307  		} else {
   308  			err = proto.CompactText(b, v.(proto.Message))
   309  			if err != nil {
   310  				return err
   311  			}
   312  		}
   313  		err = b.end()
   314  		if err != nil {
   315  			return err
   316  		}
   317  		if group {
   318  			return b.WriteByte('}')
   319  		} else {
   320  			return b.WriteByte('>')
   321  		}
   322  	}
   323  }
   324  
   325  // writeString writes a string in the protocol buffer text format.
   326  // It is similar to strconv.Quote except we don't use Go escape sequences,
   327  // we treat the string as a byte sequence, and we use octal escapes.
   328  // These differences are to maintain interoperability with the other
   329  // languages' implementations of the text format.
   330  func writeString(b *indentBuffer, s string) error {
   331  	// use WriteByte here to get any needed indent
   332  	if err := b.WriteByte('"'); err != nil {
   333  		return err
   334  	}
   335  	// Loop over the bytes, not the runes.
   336  	for i := 0; i < len(s); i++ {
   337  		var err error
   338  		// Divergence from C++: we don't escape apostrophes.
   339  		// There's no need to escape them, and the C++ parser
   340  		// copes with a naked apostrophe.
   341  		switch c := s[i]; c {
   342  		case '\n':
   343  			_, err = b.WriteString("\\n")
   344  		case '\r':
   345  			_, err = b.WriteString("\\r")
   346  		case '\t':
   347  			_, err = b.WriteString("\\t")
   348  		case '"':
   349  			_, err = b.WriteString("\\\"")
   350  		case '\\':
   351  			_, err = b.WriteString("\\\\")
   352  		default:
   353  			if c >= 0x20 && c < 0x7f {
   354  				err = b.WriteByte(c)
   355  			} else {
   356  				_, err = fmt.Fprintf(b, "\\%03o", c)
   357  			}
   358  		}
   359  		if err != nil {
   360  			return err
   361  		}
   362  	}
   363  	return b.WriteByte('"')
   364  }
   365  
   366  func marshalUnknownGroupText(b *indentBuffer, in *codec.Buffer, topLevel bool) error {
   367  	first := true
   368  	for {
   369  		if in.EOF() {
   370  			if topLevel {
   371  				return nil
   372  			}
   373  			// this is a nested message: we are expecting an end-group tag, not EOF!
   374  			return io.ErrUnexpectedEOF
   375  		}
   376  		tag, wireType, err := in.DecodeTagAndWireType()
   377  		if err != nil {
   378  			return err
   379  		}
   380  		if wireType == proto.WireEndGroup {
   381  			return nil
   382  		}
   383  		err = b.maybeNext(&first)
   384  		if err != nil {
   385  			return err
   386  		}
   387  		_, err = fmt.Fprintf(b, "%d", tag)
   388  		if err != nil {
   389  			return err
   390  		}
   391  		if wireType == proto.WireStartGroup {
   392  			err = b.WriteByte('{')
   393  			if err != nil {
   394  				return err
   395  			}
   396  			err = b.start()
   397  			if err != nil {
   398  				return err
   399  			}
   400  			err = marshalUnknownGroupText(b, in, false)
   401  			if err != nil {
   402  				return err
   403  			}
   404  			err = b.end()
   405  			if err != nil {
   406  				return err
   407  			}
   408  			err = b.WriteByte('}')
   409  			if err != nil {
   410  				return err
   411  			}
   412  			continue
   413  		} else {
   414  			err = b.sep()
   415  			if err != nil {
   416  				return err
   417  			}
   418  			if wireType == proto.WireBytes {
   419  				contents, err := in.DecodeRawBytes(false)
   420  				if err != nil {
   421  					return err
   422  				}
   423  				err = writeString(b, string(contents))
   424  				if err != nil {
   425  					return err
   426  				}
   427  			} else {
   428  				var v uint64
   429  				switch wireType {
   430  				case proto.WireVarint:
   431  					v, err = in.DecodeVarint()
   432  				case proto.WireFixed32:
   433  					v, err = in.DecodeFixed32()
   434  				case proto.WireFixed64:
   435  					v, err = in.DecodeFixed64()
   436  				default:
   437  					return proto.ErrInternalBadWireType
   438  				}
   439  				if err != nil {
   440  					return err
   441  				}
   442  				_, err = b.WriteString(strconv.FormatUint(v, 10))
   443  				if err != nil {
   444  					return err
   445  				}
   446  			}
   447  		}
   448  	}
   449  }
   450  
   451  // UnmarshalText de-serializes the message that is present, in text format, in
   452  // the given bytes into this message. It first resets the current message. It
   453  // returns an error if the given bytes do not contain a valid encoding of this
   454  // message type in the standard text format
   455  func (m *Message) UnmarshalText(text []byte) error {
   456  	m.Reset()
   457  	if err := m.UnmarshalMergeText(text); err != nil {
   458  		return err
   459  	}
   460  	return m.Validate()
   461  }
   462  
   463  // UnmarshalMergeText de-serializes the message that is present, in text format,
   464  // in the given bytes into this message. Unlike UnmarshalText, it does not first
   465  // reset the message, instead merging the data in the given bytes into the
   466  // existing data in this message.
   467  func (m *Message) UnmarshalMergeText(text []byte) error {
   468  	return m.unmarshalText(newReader(text), tokenEOF)
   469  }
   470  
   471  func (m *Message) unmarshalText(tr *txtReader, end tokenType) error {
   472  	for {
   473  		tok := tr.next()
   474  		if tok.tokTyp == end {
   475  			return nil
   476  		}
   477  		if tok.tokTyp == tokenEOF {
   478  			return io.ErrUnexpectedEOF
   479  		}
   480  		var fd *desc.FieldDescriptor
   481  		var extendedAnyType *desc.MessageDescriptor
   482  		if tok.tokTyp == tokenInt {
   483  			// tag number (indicates unknown field)
   484  			tag, err := strconv.ParseInt(tok.val.(string), 10, 32)
   485  			if err != nil {
   486  				return err
   487  			}
   488  			itag := int32(tag)
   489  			fd = m.FindFieldDescriptor(itag)
   490  			if fd == nil {
   491  				// can't parse the value w/out field descriptor, so skip it
   492  				tok = tr.next()
   493  				if tok.tokTyp == tokenEOF {
   494  					return io.ErrUnexpectedEOF
   495  				} else if tok.tokTyp == tokenOpenBrace {
   496  					if err := skipMessageText(tr, true); err != nil {
   497  						return err
   498  					}
   499  				} else if tok.tokTyp == tokenColon {
   500  					if err := skipFieldValueText(tr); err != nil {
   501  						return err
   502  					}
   503  				} else {
   504  					return textError(tok, "Expecting a colon ':' or brace '{'; instead got %q", tok.txt)
   505  				}
   506  				tok = tr.peek()
   507  				if tok.tokTyp.IsSep() {
   508  					tr.next() // consume separator
   509  				}
   510  				continue
   511  			}
   512  		} else {
   513  			fieldName, err := unmarshalFieldNameText(tr, tok)
   514  			if err != nil {
   515  				return err
   516  			}
   517  			fd = m.FindFieldDescriptorByName(fieldName)
   518  			if fd == nil {
   519  				// See if it's a group name
   520  				for _, field := range m.md.GetFields() {
   521  					if field.GetType() == descriptorpb.FieldDescriptorProto_TYPE_GROUP && field.GetMessageType().GetName() == fieldName {
   522  						fd = field
   523  						break
   524  					}
   525  				}
   526  				if fd == nil {
   527  					// maybe this is an extended Any
   528  					if m.md.GetFullyQualifiedName() == "google.protobuf.Any" && fieldName[0] == '[' && strings.Contains(fieldName, "/") {
   529  						// strip surrounding "[" and "]" and extract type name from URL
   530  						typeUrl := fieldName[1 : len(fieldName)-1]
   531  						mname := typeUrl
   532  						if slash := strings.LastIndex(mname, "/"); slash >= 0 {
   533  							mname = mname[slash+1:]
   534  						}
   535  						// TODO: add a way to weave an AnyResolver to this point
   536  						extendedAnyType = findMessageDescriptor(mname, m.md.GetFile())
   537  						if extendedAnyType == nil {
   538  							return textError(tok, "could not parse Any with unknown type URL %q", fieldName)
   539  						}
   540  						// field 1 is "type_url"
   541  						typeUrlField := m.md.FindFieldByNumber(1)
   542  						if err := m.TrySetField(typeUrlField, typeUrl); err != nil {
   543  							return err
   544  						}
   545  					} else {
   546  						// TODO: add a flag to just ignore unrecognized field names
   547  						return textError(tok, "%q is not a recognized field name of %q", fieldName, m.md.GetFullyQualifiedName())
   548  					}
   549  				}
   550  			}
   551  		}
   552  		tok = tr.next()
   553  		if tok.tokTyp == tokenEOF {
   554  			return io.ErrUnexpectedEOF
   555  		}
   556  		if extendedAnyType != nil {
   557  			// consume optional colon; make sure this is a "start message" token
   558  			if tok.tokTyp == tokenColon {
   559  				tok = tr.next()
   560  				if tok.tokTyp == tokenEOF {
   561  					return io.ErrUnexpectedEOF
   562  				}
   563  			}
   564  			if tok.tokTyp.EndToken() == tokenError {
   565  				return textError(tok, "Expecting a '<' or '{'; instead got %q", tok.txt)
   566  			}
   567  
   568  			// TODO: use mf.NewMessage and, if not a dynamic message, use proto.UnmarshalText to unmarshal it
   569  			g := m.mf.NewDynamicMessage(extendedAnyType)
   570  			if err := g.unmarshalText(tr, tok.tokTyp.EndToken()); err != nil {
   571  				return err
   572  			}
   573  			// now we marshal the message to bytes and store in the Any
   574  			b, err := g.Marshal()
   575  			if err != nil {
   576  				return err
   577  			}
   578  			// field 2 is "value"
   579  			anyValueField := m.md.FindFieldByNumber(2)
   580  			if err := m.TrySetField(anyValueField, b); err != nil {
   581  				return err
   582  			}
   583  
   584  		} else if (fd.GetType() == descriptorpb.FieldDescriptorProto_TYPE_GROUP ||
   585  			fd.GetType() == descriptorpb.FieldDescriptorProto_TYPE_MESSAGE) &&
   586  			tok.tokTyp.EndToken() != tokenError {
   587  
   588  			// TODO: use mf.NewMessage and, if not a dynamic message, use proto.UnmarshalText to unmarshal it
   589  			g := m.mf.NewDynamicMessage(fd.GetMessageType())
   590  			if err := g.unmarshalText(tr, tok.tokTyp.EndToken()); err != nil {
   591  				return err
   592  			}
   593  			if fd.IsRepeated() {
   594  				if err := m.TryAddRepeatedField(fd, g); err != nil {
   595  					return err
   596  				}
   597  			} else {
   598  				if err := m.TrySetField(fd, g); err != nil {
   599  					return err
   600  				}
   601  			}
   602  		} else {
   603  			if tok.tokTyp != tokenColon {
   604  				return textError(tok, "Expecting a colon ':'; instead got %q", tok.txt)
   605  			}
   606  			if err := m.unmarshalFieldValueText(fd, tr); err != nil {
   607  				return err
   608  			}
   609  		}
   610  		tok = tr.peek()
   611  		if tok.tokTyp.IsSep() {
   612  			tr.next() // consume separator
   613  		}
   614  	}
   615  }
   616  func findMessageDescriptor(name string, fd *desc.FileDescriptor) *desc.MessageDescriptor {
   617  	md := findMessageInTransitiveDeps(name, fd, map[*desc.FileDescriptor]struct{}{})
   618  	if md == nil {
   619  		// couldn't find it; see if we have this message linked in
   620  		md, _ = desc.LoadMessageDescriptor(name)
   621  	}
   622  	return md
   623  }
   624  
   625  func findMessageInTransitiveDeps(name string, fd *desc.FileDescriptor, seen map[*desc.FileDescriptor]struct{}) *desc.MessageDescriptor {
   626  	if _, ok := seen[fd]; ok {
   627  		// already checked this file
   628  		return nil
   629  	}
   630  	seen[fd] = struct{}{}
   631  	md := fd.FindMessage(name)
   632  	if md != nil {
   633  		return md
   634  	}
   635  	// not in this file so recursively search its deps
   636  	for _, dep := range fd.GetDependencies() {
   637  		md = findMessageInTransitiveDeps(name, dep, seen)
   638  		if md != nil {
   639  			return md
   640  		}
   641  	}
   642  	// couldn't find it
   643  	return nil
   644  }
   645  
   646  func textError(tok *token, format string, args ...interface{}) error {
   647  	var msg string
   648  	if tok.tokTyp == tokenError {
   649  		msg = tok.val.(error).Error()
   650  	} else {
   651  		msg = fmt.Sprintf(format, args...)
   652  	}
   653  	return fmt.Errorf("line %d, col %d: %s", tok.pos.Line, tok.pos.Column, msg)
   654  }
   655  
   656  type setFunction func(*Message, *desc.FieldDescriptor, interface{}) error
   657  
   658  func (m *Message) unmarshalFieldValueText(fd *desc.FieldDescriptor, tr *txtReader) error {
   659  	var set setFunction
   660  	if fd.IsRepeated() {
   661  		set = (*Message).addRepeatedField
   662  	} else {
   663  		set = mergeField
   664  	}
   665  	tok := tr.peek()
   666  	if tok.tokTyp == tokenOpenBracket {
   667  		tr.next() // consume tok
   668  		for {
   669  			if err := m.unmarshalFieldElementText(fd, tr, set); err != nil {
   670  				return err
   671  			}
   672  			tok = tr.peek()
   673  			if tok.tokTyp == tokenCloseBracket {
   674  				tr.next() // consume tok
   675  				return nil
   676  			} else if tok.tokTyp.IsSep() {
   677  				tr.next() // consume separator
   678  			}
   679  		}
   680  	}
   681  	return m.unmarshalFieldElementText(fd, tr, set)
   682  }
   683  
   684  func (m *Message) unmarshalFieldElementText(fd *desc.FieldDescriptor, tr *txtReader, set setFunction) error {
   685  	tok := tr.next()
   686  	if tok.tokTyp == tokenEOF {
   687  		return io.ErrUnexpectedEOF
   688  	}
   689  
   690  	var expected string
   691  	switch fd.GetType() {
   692  	case descriptorpb.FieldDescriptorProto_TYPE_BOOL:
   693  		if tok.tokTyp == tokenIdent {
   694  			if tok.val.(string) == "true" {
   695  				return set(m, fd, true)
   696  			} else if tok.val.(string) == "false" {
   697  				return set(m, fd, false)
   698  			}
   699  		}
   700  		expected = "boolean value"
   701  	case descriptorpb.FieldDescriptorProto_TYPE_BYTES:
   702  		if tok.tokTyp == tokenString {
   703  			return set(m, fd, []byte(tok.val.(string)))
   704  		}
   705  		expected = "bytes string value"
   706  	case descriptorpb.FieldDescriptorProto_TYPE_STRING:
   707  		if tok.tokTyp == tokenString {
   708  			return set(m, fd, tok.val)
   709  		}
   710  		expected = "string value"
   711  	case descriptorpb.FieldDescriptorProto_TYPE_FLOAT:
   712  		switch tok.tokTyp {
   713  		case tokenFloat:
   714  			return set(m, fd, float32(tok.val.(float64)))
   715  		case tokenInt:
   716  			if f, err := strconv.ParseFloat(tok.val.(string), 32); err != nil {
   717  				return err
   718  			} else {
   719  				return set(m, fd, float32(f))
   720  			}
   721  		case tokenIdent:
   722  			ident := strings.ToLower(tok.val.(string))
   723  			if ident == "inf" {
   724  				return set(m, fd, float32(math.Inf(1)))
   725  			} else if ident == "nan" {
   726  				return set(m, fd, float32(math.NaN()))
   727  			}
   728  		case tokenMinus:
   729  			peeked := tr.peek()
   730  			if peeked.tokTyp == tokenIdent {
   731  				ident := strings.ToLower(peeked.val.(string))
   732  				if ident == "inf" {
   733  					tr.next() // consume peeked token
   734  					return set(m, fd, float32(math.Inf(-1)))
   735  				}
   736  			}
   737  		}
   738  		expected = "float value"
   739  	case descriptorpb.FieldDescriptorProto_TYPE_DOUBLE:
   740  		switch tok.tokTyp {
   741  		case tokenFloat:
   742  			return set(m, fd, tok.val)
   743  		case tokenInt:
   744  			if f, err := strconv.ParseFloat(tok.val.(string), 64); err != nil {
   745  				return err
   746  			} else {
   747  				return set(m, fd, f)
   748  			}
   749  		case tokenIdent:
   750  			ident := strings.ToLower(tok.val.(string))
   751  			if ident == "inf" {
   752  				return set(m, fd, math.Inf(1))
   753  			} else if ident == "nan" {
   754  				return set(m, fd, math.NaN())
   755  			}
   756  		case tokenMinus:
   757  			peeked := tr.peek()
   758  			if peeked.tokTyp == tokenIdent {
   759  				ident := strings.ToLower(peeked.val.(string))
   760  				if ident == "inf" {
   761  					tr.next() // consume peeked token
   762  					return set(m, fd, math.Inf(-1))
   763  				}
   764  			}
   765  		}
   766  		expected = "float value"
   767  	case descriptorpb.FieldDescriptorProto_TYPE_INT32,
   768  		descriptorpb.FieldDescriptorProto_TYPE_SINT32,
   769  		descriptorpb.FieldDescriptorProto_TYPE_SFIXED32:
   770  		if tok.tokTyp == tokenInt {
   771  			if i, err := strconv.ParseInt(tok.val.(string), 10, 32); err != nil {
   772  				return err
   773  			} else {
   774  				return set(m, fd, int32(i))
   775  			}
   776  		}
   777  		expected = "int value"
   778  	case descriptorpb.FieldDescriptorProto_TYPE_INT64,
   779  		descriptorpb.FieldDescriptorProto_TYPE_SINT64,
   780  		descriptorpb.FieldDescriptorProto_TYPE_SFIXED64:
   781  		if tok.tokTyp == tokenInt {
   782  			if i, err := strconv.ParseInt(tok.val.(string), 10, 64); err != nil {
   783  				return err
   784  			} else {
   785  				return set(m, fd, i)
   786  			}
   787  		}
   788  		expected = "int value"
   789  	case descriptorpb.FieldDescriptorProto_TYPE_UINT32,
   790  		descriptorpb.FieldDescriptorProto_TYPE_FIXED32:
   791  		if tok.tokTyp == tokenInt {
   792  			if i, err := strconv.ParseUint(tok.val.(string), 10, 32); err != nil {
   793  				return err
   794  			} else {
   795  				return set(m, fd, uint32(i))
   796  			}
   797  		}
   798  		expected = "unsigned int value"
   799  	case descriptorpb.FieldDescriptorProto_TYPE_UINT64,
   800  		descriptorpb.FieldDescriptorProto_TYPE_FIXED64:
   801  		if tok.tokTyp == tokenInt {
   802  			if i, err := strconv.ParseUint(tok.val.(string), 10, 64); err != nil {
   803  				return err
   804  			} else {
   805  				return set(m, fd, i)
   806  			}
   807  		}
   808  		expected = "unsigned int value"
   809  	case descriptorpb.FieldDescriptorProto_TYPE_ENUM:
   810  		if tok.tokTyp == tokenIdent {
   811  			// TODO: add a flag to just ignore unrecognized enum value names?
   812  			vd := fd.GetEnumType().FindValueByName(tok.val.(string))
   813  			if vd != nil {
   814  				return set(m, fd, vd.GetNumber())
   815  			}
   816  		} else if tok.tokTyp == tokenInt {
   817  			if i, err := strconv.ParseInt(tok.val.(string), 10, 32); err != nil {
   818  				return err
   819  			} else {
   820  				return set(m, fd, int32(i))
   821  			}
   822  		}
   823  		expected = fmt.Sprintf("enum %s value", fd.GetEnumType().GetFullyQualifiedName())
   824  	case descriptorpb.FieldDescriptorProto_TYPE_MESSAGE,
   825  		descriptorpb.FieldDescriptorProto_TYPE_GROUP:
   826  
   827  		endTok := tok.tokTyp.EndToken()
   828  		if endTok != tokenError {
   829  			dm := m.mf.NewDynamicMessage(fd.GetMessageType())
   830  			if err := dm.unmarshalText(tr, endTok); err != nil {
   831  				return err
   832  			}
   833  			// TODO: ideally we would use mf.NewMessage and, if not a dynamic message, use
   834  			// proto package to unmarshal it. But the text parser isn't particularly amenable
   835  			// to that, so we instead convert a dynamic message to a generated one if the
   836  			// known-type registry knows about the generated type...
   837  			var ktr *KnownTypeRegistry
   838  			if m.mf != nil {
   839  				ktr = m.mf.ktr
   840  			}
   841  			pm := ktr.CreateIfKnown(fd.GetMessageType().GetFullyQualifiedName())
   842  			if pm != nil {
   843  				if err := dm.ConvertTo(pm); err != nil {
   844  					return set(m, fd, pm)
   845  				}
   846  			}
   847  			return set(m, fd, dm)
   848  		}
   849  		expected = fmt.Sprintf("message %s value", fd.GetMessageType().GetFullyQualifiedName())
   850  	default:
   851  		return fmt.Errorf("field %q of message %q has unrecognized type: %v", fd.GetFullyQualifiedName(), m.md.GetFullyQualifiedName(), fd.GetType())
   852  	}
   853  
   854  	// if we get here, token was wrong type; create error message
   855  	var article string
   856  	if strings.Contains("aieou", expected[0:1]) {
   857  		article = "an"
   858  	} else {
   859  		article = "a"
   860  	}
   861  	return textError(tok, "Expecting %s %s; got %q", article, expected, tok.txt)
   862  }
   863  
   864  func unmarshalFieldNameText(tr *txtReader, tok *token) (string, error) {
   865  	if tok.tokTyp == tokenOpenBracket || tok.tokTyp == tokenOpenParen {
   866  		// extension name
   867  		var closeType tokenType
   868  		var closeChar string
   869  		if tok.tokTyp == tokenOpenBracket {
   870  			closeType = tokenCloseBracket
   871  			closeChar = "close bracket ']'"
   872  		} else {
   873  			closeType = tokenCloseParen
   874  			closeChar = "close paren ')'"
   875  		}
   876  		// must be followed by an identifier
   877  		idents := make([]string, 0, 1)
   878  		for {
   879  			tok = tr.next()
   880  			if tok.tokTyp == tokenEOF {
   881  				return "", io.ErrUnexpectedEOF
   882  			} else if tok.tokTyp != tokenIdent {
   883  				return "", textError(tok, "Expecting an identifier; instead got %q", tok.txt)
   884  			}
   885  			idents = append(idents, tok.val.(string))
   886  			// and then close bracket/paren, or "/" to keep adding URL elements to name
   887  			tok = tr.next()
   888  			if tok.tokTyp == tokenEOF {
   889  				return "", io.ErrUnexpectedEOF
   890  			} else if tok.tokTyp == closeType {
   891  				break
   892  			} else if tok.tokTyp != tokenSlash {
   893  				return "", textError(tok, "Expecting a %s; instead got %q", closeChar, tok.txt)
   894  			}
   895  		}
   896  		return "[" + strings.Join(idents, "/") + "]", nil
   897  	} else if tok.tokTyp == tokenIdent {
   898  		// normal field name
   899  		return tok.val.(string), nil
   900  	} else {
   901  		return "", textError(tok, "Expecting an identifier or tag number; instead got %q", tok.txt)
   902  	}
   903  }
   904  
   905  func skipFieldNameText(tr *txtReader) error {
   906  	tok := tr.next()
   907  	if tok.tokTyp == tokenEOF {
   908  		return io.ErrUnexpectedEOF
   909  	} else if tok.tokTyp == tokenInt || tok.tokTyp == tokenIdent {
   910  		return nil
   911  	} else {
   912  		_, err := unmarshalFieldNameText(tr, tok)
   913  		return err
   914  	}
   915  }
   916  
   917  func skipFieldValueText(tr *txtReader) error {
   918  	tok := tr.peek()
   919  	if tok.tokTyp == tokenOpenBracket {
   920  		tr.next() // consume tok
   921  		for {
   922  			if err := skipFieldElementText(tr); err != nil {
   923  				return err
   924  			}
   925  			tok = tr.peek()
   926  			if tok.tokTyp == tokenCloseBracket {
   927  				tr.next() // consume tok
   928  				return nil
   929  			} else if tok.tokTyp.IsSep() {
   930  				tr.next() // consume separator
   931  			}
   932  
   933  		}
   934  	}
   935  	return skipFieldElementText(tr)
   936  }
   937  
   938  func skipFieldElementText(tr *txtReader) error {
   939  	tok := tr.next()
   940  	switch tok.tokTyp {
   941  	case tokenEOF:
   942  		return io.ErrUnexpectedEOF
   943  	case tokenInt, tokenFloat, tokenString, tokenIdent:
   944  		return nil
   945  	case tokenOpenAngle:
   946  		return skipMessageText(tr, false)
   947  	default:
   948  		return textError(tok, "Expecting an angle bracket '<' or a value; instead got %q", tok.txt)
   949  	}
   950  }
   951  
   952  func skipMessageText(tr *txtReader, isGroup bool) error {
   953  	for {
   954  		tok := tr.peek()
   955  		if tok.tokTyp == tokenEOF {
   956  			return io.ErrUnexpectedEOF
   957  		} else if isGroup && tok.tokTyp == tokenCloseBrace {
   958  			return nil
   959  		} else if !isGroup && tok.tokTyp == tokenCloseAngle {
   960  			return nil
   961  		}
   962  
   963  		// field name or tag
   964  		if err := skipFieldNameText(tr); err != nil {
   965  			return err
   966  		}
   967  
   968  		// field value
   969  		tok = tr.next()
   970  		if tok.tokTyp == tokenEOF {
   971  			return io.ErrUnexpectedEOF
   972  		} else if tok.tokTyp == tokenOpenBrace {
   973  			if err := skipMessageText(tr, true); err != nil {
   974  				return err
   975  			}
   976  		} else if tok.tokTyp == tokenColon {
   977  			if err := skipFieldValueText(tr); err != nil {
   978  				return err
   979  			}
   980  		} else {
   981  			return textError(tok, "Expecting a colon ':' or brace '{'; instead got %q", tok.txt)
   982  		}
   983  
   984  		tok = tr.peek()
   985  		if tok.tokTyp.IsSep() {
   986  			tr.next() // consume separator
   987  		}
   988  	}
   989  }
   990  
   991  type tokenType int
   992  
   993  const (
   994  	tokenError tokenType = iota
   995  	tokenEOF
   996  	tokenIdent
   997  	tokenString
   998  	tokenInt
   999  	tokenFloat
  1000  	tokenColon
  1001  	tokenComma
  1002  	tokenSemiColon
  1003  	tokenOpenBrace
  1004  	tokenCloseBrace
  1005  	tokenOpenBracket
  1006  	tokenCloseBracket
  1007  	tokenOpenAngle
  1008  	tokenCloseAngle
  1009  	tokenOpenParen
  1010  	tokenCloseParen
  1011  	tokenSlash
  1012  	tokenMinus
  1013  )
  1014  
  1015  func (t tokenType) IsSep() bool {
  1016  	return t == tokenComma || t == tokenSemiColon
  1017  }
  1018  
  1019  func (t tokenType) EndToken() tokenType {
  1020  	switch t {
  1021  	case tokenOpenAngle:
  1022  		return tokenCloseAngle
  1023  	case tokenOpenBrace:
  1024  		return tokenCloseBrace
  1025  	default:
  1026  		return tokenError
  1027  	}
  1028  }
  1029  
  1030  type token struct {
  1031  	tokTyp tokenType
  1032  	val    interface{}
  1033  	txt    string
  1034  	pos    scanner.Position
  1035  }
  1036  
  1037  type txtReader struct {
  1038  	scanner    scanner.Scanner
  1039  	peeked     token
  1040  	havePeeked bool
  1041  }
  1042  
  1043  func newReader(text []byte) *txtReader {
  1044  	sc := scanner.Scanner{}
  1045  	sc.Init(bytes.NewReader(text))
  1046  	sc.Mode = scanner.ScanIdents | scanner.ScanInts | scanner.ScanFloats | scanner.ScanChars |
  1047  		scanner.ScanStrings | scanner.ScanComments | scanner.SkipComments
  1048  	// identifiers are same restrictions as Go identifiers, except we also allow dots since
  1049  	// we accept fully-qualified names
  1050  	sc.IsIdentRune = func(ch rune, i int) bool {
  1051  		return ch == '_' || unicode.IsLetter(ch) ||
  1052  			(i > 0 && unicode.IsDigit(ch)) ||
  1053  			(i > 0 && ch == '.')
  1054  	}
  1055  	// ignore errors; we handle them if/when we see malformed tokens
  1056  	sc.Error = func(s *scanner.Scanner, msg string) {}
  1057  	return &txtReader{scanner: sc}
  1058  }
  1059  
  1060  func (p *txtReader) peek() *token {
  1061  	if p.havePeeked {
  1062  		return &p.peeked
  1063  	}
  1064  	t := p.scanner.Scan()
  1065  	if t == scanner.EOF {
  1066  		p.peeked.tokTyp = tokenEOF
  1067  		p.peeked.val = nil
  1068  		p.peeked.txt = ""
  1069  		p.peeked.pos = p.scanner.Position
  1070  	} else if err := p.processToken(t, p.scanner.TokenText(), p.scanner.Position); err != nil {
  1071  		p.peeked.tokTyp = tokenError
  1072  		p.peeked.val = err
  1073  	}
  1074  	p.havePeeked = true
  1075  	return &p.peeked
  1076  }
  1077  
  1078  func (p *txtReader) processToken(t rune, text string, pos scanner.Position) error {
  1079  	p.peeked.pos = pos
  1080  	p.peeked.txt = text
  1081  	switch t {
  1082  	case scanner.Ident:
  1083  		p.peeked.tokTyp = tokenIdent
  1084  		p.peeked.val = text
  1085  	case scanner.Int:
  1086  		p.peeked.tokTyp = tokenInt
  1087  		p.peeked.val = text // can't parse the number because we don't know if it's signed or unsigned
  1088  	case scanner.Float:
  1089  		p.peeked.tokTyp = tokenFloat
  1090  		var err error
  1091  		if p.peeked.val, err = strconv.ParseFloat(text, 64); err != nil {
  1092  			return err
  1093  		}
  1094  	case scanner.Char, scanner.String:
  1095  		p.peeked.tokTyp = tokenString
  1096  		var err error
  1097  		if p.peeked.val, err = strconv.Unquote(text); err != nil {
  1098  			return err
  1099  		}
  1100  	case '-': // unary minus, for negative ints and floats
  1101  		ch := p.scanner.Peek()
  1102  		if ch < '0' || ch > '9' {
  1103  			p.peeked.tokTyp = tokenMinus
  1104  			p.peeked.val = '-'
  1105  		} else {
  1106  			t := p.scanner.Scan()
  1107  			if t == scanner.EOF {
  1108  				return io.ErrUnexpectedEOF
  1109  			} else if t == scanner.Float {
  1110  				p.peeked.tokTyp = tokenFloat
  1111  				text += p.scanner.TokenText()
  1112  				p.peeked.txt = text
  1113  				var err error
  1114  				if p.peeked.val, err = strconv.ParseFloat(text, 64); err != nil {
  1115  					p.peeked.pos = p.scanner.Position
  1116  					return err
  1117  				}
  1118  			} else if t == scanner.Int {
  1119  				p.peeked.tokTyp = tokenInt
  1120  				text += p.scanner.TokenText()
  1121  				p.peeked.txt = text
  1122  				p.peeked.val = text // can't parse the number because we don't know if it's signed or unsigned
  1123  			} else {
  1124  				p.peeked.pos = p.scanner.Position
  1125  				return fmt.Errorf("expecting an int or float but got %q", p.scanner.TokenText())
  1126  			}
  1127  		}
  1128  	case ':':
  1129  		p.peeked.tokTyp = tokenColon
  1130  		p.peeked.val = ':'
  1131  	case ',':
  1132  		p.peeked.tokTyp = tokenComma
  1133  		p.peeked.val = ','
  1134  	case ';':
  1135  		p.peeked.tokTyp = tokenSemiColon
  1136  		p.peeked.val = ';'
  1137  	case '{':
  1138  		p.peeked.tokTyp = tokenOpenBrace
  1139  		p.peeked.val = '{'
  1140  	case '}':
  1141  		p.peeked.tokTyp = tokenCloseBrace
  1142  		p.peeked.val = '}'
  1143  	case '<':
  1144  		p.peeked.tokTyp = tokenOpenAngle
  1145  		p.peeked.val = '<'
  1146  	case '>':
  1147  		p.peeked.tokTyp = tokenCloseAngle
  1148  		p.peeked.val = '>'
  1149  	case '[':
  1150  		p.peeked.tokTyp = tokenOpenBracket
  1151  		p.peeked.val = '['
  1152  	case ']':
  1153  		p.peeked.tokTyp = tokenCloseBracket
  1154  		p.peeked.val = ']'
  1155  	case '(':
  1156  		p.peeked.tokTyp = tokenOpenParen
  1157  		p.peeked.val = '('
  1158  	case ')':
  1159  		p.peeked.tokTyp = tokenCloseParen
  1160  		p.peeked.val = ')'
  1161  	case '/':
  1162  		// only allowed to separate URL components in expanded Any format
  1163  		p.peeked.tokTyp = tokenSlash
  1164  		p.peeked.val = '/'
  1165  	default:
  1166  		return fmt.Errorf("invalid character: %c", t)
  1167  	}
  1168  	return nil
  1169  }
  1170  
  1171  func (p *txtReader) next() *token {
  1172  	t := p.peek()
  1173  	if t.tokTyp != tokenEOF && t.tokTyp != tokenError {
  1174  		p.havePeeked = false
  1175  	}
  1176  	return t
  1177  }