github.com/matm/etcd@v0.3.1-0.20140328024009-5b4a473f1453/third_party/code.google.com/p/goprotobuf/proto/text_parser.go (about)

     1  // Go support for Protocol Buffers - Google's data interchange format
     2  //
     3  // Copyright 2010 The Go Authors.  All rights reserved.
     4  // http://code.google.com/p/goprotobuf/
     5  //
     6  // Redistribution and use in source and binary forms, with or without
     7  // modification, are permitted provided that the following conditions are
     8  // met:
     9  //
    10  //     * Redistributions of source code must retain the above copyright
    11  // notice, this list of conditions and the following disclaimer.
    12  //     * Redistributions in binary form must reproduce the above
    13  // copyright notice, this list of conditions and the following disclaimer
    14  // in the documentation and/or other materials provided with the
    15  // distribution.
    16  //     * Neither the name of Google Inc. nor the names of its
    17  // contributors may be used to endorse or promote products derived from
    18  // this software without specific prior written permission.
    19  //
    20  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    21  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    22  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
    23  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
    24  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    25  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
    26  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    27  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    28  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    29  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    30  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    31  
    32  package proto
    33  
    34  // Functions for parsing the Text protocol buffer format.
    35  // TODO: message sets.
    36  
    37  import (
    38  	"errors"
    39  	"fmt"
    40  	"reflect"
    41  	"strconv"
    42  	"strings"
    43  	"unicode/utf8"
    44  )
    45  
    46  type ParseError struct {
    47  	Message string
    48  	Line    int // 1-based line number
    49  	Offset  int // 0-based byte offset from start of input
    50  }
    51  
    52  func (p *ParseError) Error() string {
    53  	if p.Line == 1 {
    54  		// show offset only for first line
    55  		return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
    56  	}
    57  	return fmt.Sprintf("line %d: %v", p.Line, p.Message)
    58  }
    59  
    60  type token struct {
    61  	value    string
    62  	err      *ParseError
    63  	line     int    // line number
    64  	offset   int    // byte number from start of input, not start of line
    65  	unquoted string // the unquoted version of value, if it was a quoted string
    66  }
    67  
    68  func (t *token) String() string {
    69  	if t.err == nil {
    70  		return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
    71  	}
    72  	return fmt.Sprintf("parse error: %v", t.err)
    73  }
    74  
    75  type textParser struct {
    76  	s            string // remaining input
    77  	done         bool   // whether the parsing is finished (success or error)
    78  	backed       bool   // whether back() was called
    79  	offset, line int
    80  	cur          token
    81  }
    82  
    83  func newTextParser(s string) *textParser {
    84  	p := new(textParser)
    85  	p.s = s
    86  	p.line = 1
    87  	p.cur.line = 1
    88  	return p
    89  }
    90  
    91  func (p *textParser) errorf(format string, a ...interface{}) *ParseError {
    92  	pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
    93  	p.cur.err = pe
    94  	p.done = true
    95  	return pe
    96  }
    97  
    98  // Numbers and identifiers are matched by [-+._A-Za-z0-9]
    99  func isIdentOrNumberChar(c byte) bool {
   100  	switch {
   101  	case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
   102  		return true
   103  	case '0' <= c && c <= '9':
   104  		return true
   105  	}
   106  	switch c {
   107  	case '-', '+', '.', '_':
   108  		return true
   109  	}
   110  	return false
   111  }
   112  
   113  func isWhitespace(c byte) bool {
   114  	switch c {
   115  	case ' ', '\t', '\n', '\r':
   116  		return true
   117  	}
   118  	return false
   119  }
   120  
   121  func (p *textParser) skipWhitespace() {
   122  	i := 0
   123  	for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
   124  		if p.s[i] == '#' {
   125  			// comment; skip to end of line or input
   126  			for i < len(p.s) && p.s[i] != '\n' {
   127  				i++
   128  			}
   129  			if i == len(p.s) {
   130  				break
   131  			}
   132  		}
   133  		if p.s[i] == '\n' {
   134  			p.line++
   135  		}
   136  		i++
   137  	}
   138  	p.offset += i
   139  	p.s = p.s[i:len(p.s)]
   140  	if len(p.s) == 0 {
   141  		p.done = true
   142  	}
   143  }
   144  
   145  func (p *textParser) advance() {
   146  	// Skip whitespace
   147  	p.skipWhitespace()
   148  	if p.done {
   149  		return
   150  	}
   151  
   152  	// Start of non-whitespace
   153  	p.cur.err = nil
   154  	p.cur.offset, p.cur.line = p.offset, p.line
   155  	p.cur.unquoted = ""
   156  	switch p.s[0] {
   157  	case '<', '>', '{', '}', ':', '[', ']', ';', ',':
   158  		// Single symbol
   159  		p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
   160  	case '"', '\'':
   161  		// Quoted string
   162  		i := 1
   163  		for i < len(p.s) && p.s[i] != p.s[0] && p.s[i] != '\n' {
   164  			if p.s[i] == '\\' && i+1 < len(p.s) {
   165  				// skip escaped char
   166  				i++
   167  			}
   168  			i++
   169  		}
   170  		if i >= len(p.s) || p.s[i] != p.s[0] {
   171  			p.errorf("unmatched quote")
   172  			return
   173  		}
   174  		unq, err := unquoteC(p.s[1:i], rune(p.s[0]))
   175  		if err != nil {
   176  			p.errorf("invalid quoted string %v", p.s[0:i+1])
   177  			return
   178  		}
   179  		p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
   180  		p.cur.unquoted = unq
   181  	default:
   182  		i := 0
   183  		for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
   184  			i++
   185  		}
   186  		if i == 0 {
   187  			p.errorf("unexpected byte %#x", p.s[0])
   188  			return
   189  		}
   190  		p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
   191  	}
   192  	p.offset += len(p.cur.value)
   193  }
   194  
   195  var (
   196  	errBadUTF8 = errors.New("proto: bad UTF-8")
   197  	errBadHex  = errors.New("proto: bad hexadecimal")
   198  )
   199  
   200  func unquoteC(s string, quote rune) (string, error) {
   201  	// This is based on C++'s tokenizer.cc.
   202  	// Despite its name, this is *not* parsing C syntax.
   203  	// For instance, "\0" is an invalid quoted string.
   204  
   205  	// Avoid allocation in trivial cases.
   206  	simple := true
   207  	for _, r := range s {
   208  		if r == '\\' || r == quote {
   209  			simple = false
   210  			break
   211  		}
   212  	}
   213  	if simple {
   214  		return s, nil
   215  	}
   216  
   217  	buf := make([]byte, 0, 3*len(s)/2)
   218  	for len(s) > 0 {
   219  		r, n := utf8.DecodeRuneInString(s)
   220  		if r == utf8.RuneError && n == 1 {
   221  			return "", errBadUTF8
   222  		}
   223  		s = s[n:]
   224  		if r != '\\' {
   225  			if r < utf8.RuneSelf {
   226  				buf = append(buf, byte(r))
   227  			} else {
   228  				buf = append(buf, string(r)...)
   229  			}
   230  			continue
   231  		}
   232  
   233  		ch, tail, err := unescape(s)
   234  		if err != nil {
   235  			return "", err
   236  		}
   237  		buf = append(buf, ch...)
   238  		s = tail
   239  	}
   240  	return string(buf), nil
   241  }
   242  
   243  func unescape(s string) (ch string, tail string, err error) {
   244  	r, n := utf8.DecodeRuneInString(s)
   245  	if r == utf8.RuneError && n == 1 {
   246  		return "", "", errBadUTF8
   247  	}
   248  	s = s[n:]
   249  	switch r {
   250  	case 'a':
   251  		return "\a", s, nil
   252  	case 'b':
   253  		return "\b", s, nil
   254  	case 'f':
   255  		return "\f", s, nil
   256  	case 'n':
   257  		return "\n", s, nil
   258  	case 'r':
   259  		return "\r", s, nil
   260  	case 't':
   261  		return "\t", s, nil
   262  	case 'v':
   263  		return "\v", s, nil
   264  	case '?':
   265  		return "?", s, nil // trigraph workaround
   266  	case '\'', '"', '\\':
   267  		return string(r), s, nil
   268  	case '0', '1', '2', '3', '4', '5', '6', '7', 'x', 'X':
   269  		if len(s) < 2 {
   270  			return "", "", fmt.Errorf(`\%c requires 2 following digits`, r)
   271  		}
   272  		base := 8
   273  		ss := s[:2]
   274  		s = s[2:]
   275  		if r == 'x' || r == 'X' {
   276  			base = 16
   277  		} else {
   278  			ss = string(r) + ss
   279  		}
   280  		i, err := strconv.ParseUint(ss, base, 8)
   281  		if err != nil {
   282  			return "", "", err
   283  		}
   284  		return string([]byte{byte(i)}), s, nil
   285  	case 'u', 'U':
   286  		n := 4
   287  		if r == 'U' {
   288  			n = 8
   289  		}
   290  		if len(s) < n {
   291  			return "", "", fmt.Errorf(`\%c requires %d digits`, r, n)
   292  		}
   293  
   294  		bs := make([]byte, n/2)
   295  		for i := 0; i < n; i += 2 {
   296  			a, ok1 := unhex(s[i])
   297  			b, ok2 := unhex(s[i+1])
   298  			if !ok1 || !ok2 {
   299  				return "", "", errBadHex
   300  			}
   301  			bs[i/2] = a<<4 | b
   302  		}
   303  		s = s[n:]
   304  		return string(bs), s, nil
   305  	}
   306  	return "", "", fmt.Errorf(`unknown escape \%c`, r)
   307  }
   308  
   309  // Adapted from src/pkg/strconv/quote.go.
   310  func unhex(b byte) (v byte, ok bool) {
   311  	switch {
   312  	case '0' <= b && b <= '9':
   313  		return b - '0', true
   314  	case 'a' <= b && b <= 'f':
   315  		return b - 'a' + 10, true
   316  	case 'A' <= b && b <= 'F':
   317  		return b - 'A' + 10, true
   318  	}
   319  	return 0, false
   320  }
   321  
   322  // Back off the parser by one token. Can only be done between calls to next().
   323  // It makes the next advance() a no-op.
   324  func (p *textParser) back() { p.backed = true }
   325  
   326  // Advances the parser and returns the new current token.
   327  func (p *textParser) next() *token {
   328  	if p.backed || p.done {
   329  		p.backed = false
   330  		return &p.cur
   331  	}
   332  	p.advance()
   333  	if p.done {
   334  		p.cur.value = ""
   335  	} else if len(p.cur.value) > 0 && p.cur.value[0] == '"' {
   336  		// Look for multiple quoted strings separated by whitespace,
   337  		// and concatenate them.
   338  		cat := p.cur
   339  		for {
   340  			p.skipWhitespace()
   341  			if p.done || p.s[0] != '"' {
   342  				break
   343  			}
   344  			p.advance()
   345  			if p.cur.err != nil {
   346  				return &p.cur
   347  			}
   348  			cat.value += " " + p.cur.value
   349  			cat.unquoted += p.cur.unquoted
   350  		}
   351  		p.done = false // parser may have seen EOF, but we want to return cat
   352  		p.cur = cat
   353  	}
   354  	return &p.cur
   355  }
   356  
   357  // Return an error indicating which required field was not set.
   358  func (p *textParser) missingRequiredFieldError(sv reflect.Value) *ParseError {
   359  	st := sv.Type()
   360  	sprops := GetProperties(st)
   361  	for i := 0; i < st.NumField(); i++ {
   362  		if !isNil(sv.Field(i)) {
   363  			continue
   364  		}
   365  
   366  		props := sprops.Prop[i]
   367  		if props.Required {
   368  			return p.errorf("message %v missing required field %q", st, props.OrigName)
   369  		}
   370  	}
   371  	return p.errorf("message %v missing required field", st) // should not happen
   372  }
   373  
   374  // Returns the index in the struct for the named field, as well as the parsed tag properties.
   375  func structFieldByName(st reflect.Type, name string) (int, *Properties, bool) {
   376  	sprops := GetProperties(st)
   377  	i, ok := sprops.decoderOrigNames[name]
   378  	if ok {
   379  		return i, sprops.Prop[i], true
   380  	}
   381  	return -1, nil, false
   382  }
   383  
   384  // Consume a ':' from the input stream (if the next token is a colon),
   385  // returning an error if a colon is needed but not present.
   386  func (p *textParser) checkForColon(props *Properties, typ reflect.Type) *ParseError {
   387  	tok := p.next()
   388  	if tok.err != nil {
   389  		return tok.err
   390  	}
   391  	if tok.value != ":" {
   392  		// Colon is optional when the field is a group or message.
   393  		needColon := true
   394  		switch props.Wire {
   395  		case "group":
   396  			needColon = false
   397  		case "bytes":
   398  			// A "bytes" field is either a message, a string, or a repeated field;
   399  			// those three become *T, *string and []T respectively, so we can check for
   400  			// this field being a pointer to a non-string.
   401  			if typ.Kind() == reflect.Ptr {
   402  				// *T or *string
   403  				if typ.Elem().Kind() == reflect.String {
   404  					break
   405  				}
   406  			} else if typ.Kind() == reflect.Slice {
   407  				// []T or []*T
   408  				if typ.Elem().Kind() != reflect.Ptr {
   409  					break
   410  				}
   411  			}
   412  			needColon = false
   413  		}
   414  		if needColon {
   415  			return p.errorf("expected ':', found %q", tok.value)
   416  		}
   417  		p.back()
   418  	}
   419  	return nil
   420  }
   421  
   422  func (p *textParser) readStruct(sv reflect.Value, terminator string) *ParseError {
   423  	st := sv.Type()
   424  	reqCount := GetProperties(st).reqCount
   425  	// A struct is a sequence of "name: value", terminated by one of
   426  	// '>' or '}', or the end of the input.  A name may also be
   427  	// "[extension]".
   428  	for {
   429  		tok := p.next()
   430  		if tok.err != nil {
   431  			return tok.err
   432  		}
   433  		if tok.value == terminator {
   434  			break
   435  		}
   436  		if tok.value == "[" {
   437  			// Looks like an extension.
   438  			//
   439  			// TODO: Check whether we need to handle
   440  			// namespace rooted names (e.g. ".something.Foo").
   441  			tok = p.next()
   442  			if tok.err != nil {
   443  				return tok.err
   444  			}
   445  			var desc *ExtensionDesc
   446  			// This could be faster, but it's functional.
   447  			// TODO: Do something smarter than a linear scan.
   448  			for _, d := range RegisteredExtensions(reflect.New(st).Interface().(Message)) {
   449  				if d.Name == tok.value {
   450  					desc = d
   451  					break
   452  				}
   453  			}
   454  			if desc == nil {
   455  				return p.errorf("unrecognized extension %q", tok.value)
   456  			}
   457  			// Check the extension terminator.
   458  			tok = p.next()
   459  			if tok.err != nil {
   460  				return tok.err
   461  			}
   462  			if tok.value != "]" {
   463  				return p.errorf("unrecognized extension terminator %q", tok.value)
   464  			}
   465  
   466  			props := &Properties{}
   467  			props.Parse(desc.Tag)
   468  
   469  			typ := reflect.TypeOf(desc.ExtensionType)
   470  			if err := p.checkForColon(props, typ); err != nil {
   471  				return err
   472  			}
   473  
   474  			rep := desc.repeated()
   475  
   476  			// Read the extension structure, and set it in
   477  			// the value we're constructing.
   478  			var ext reflect.Value
   479  			if !rep {
   480  				ext = reflect.New(typ).Elem()
   481  			} else {
   482  				ext = reflect.New(typ.Elem()).Elem()
   483  			}
   484  			if err := p.readAny(ext, props); err != nil {
   485  				return err
   486  			}
   487  			ep := sv.Addr().Interface().(extendableProto)
   488  			if !rep {
   489  				SetExtension(ep, desc, ext.Interface())
   490  			} else {
   491  				old, err := GetExtension(ep, desc)
   492  				var sl reflect.Value
   493  				if err == nil {
   494  					sl = reflect.ValueOf(old) // existing slice
   495  				} else {
   496  					sl = reflect.MakeSlice(typ, 0, 1)
   497  				}
   498  				sl = reflect.Append(sl, ext)
   499  				SetExtension(ep, desc, sl.Interface())
   500  			}
   501  		} else {
   502  			// This is a normal, non-extension field.
   503  			fi, props, ok := structFieldByName(st, tok.value)
   504  			if !ok {
   505  				return p.errorf("unknown field name %q in %v", tok.value, st)
   506  			}
   507  
   508  			dst := sv.Field(fi)
   509  			isDstNil := isNil(dst)
   510  
   511  			// Check that it's not already set if it's not a repeated field.
   512  			if !props.Repeated && !isDstNil {
   513  				return p.errorf("non-repeated field %q was repeated", tok.value)
   514  			}
   515  
   516  			if err := p.checkForColon(props, st.Field(fi).Type); err != nil {
   517  				return err
   518  			}
   519  
   520  			// Parse into the field.
   521  			if err := p.readAny(dst, props); err != nil {
   522  				return err
   523  			}
   524  
   525  			if props.Required {
   526  				reqCount--
   527  			}
   528  		}
   529  
   530  		// For backward compatibility, permit a semicolon or comma after a field.
   531  		tok = p.next()
   532  		if tok.err != nil {
   533  			return tok.err
   534  		}
   535  		if tok.value != ";" && tok.value != "," {
   536  			p.back()
   537  		}
   538  	}
   539  
   540  	if reqCount > 0 {
   541  		return p.missingRequiredFieldError(sv)
   542  	}
   543  	return nil
   544  }
   545  
   546  func (p *textParser) readAny(v reflect.Value, props *Properties) *ParseError {
   547  	tok := p.next()
   548  	if tok.err != nil {
   549  		return tok.err
   550  	}
   551  	if tok.value == "" {
   552  		return p.errorf("unexpected EOF")
   553  	}
   554  
   555  	switch fv := v; fv.Kind() {
   556  	case reflect.Slice:
   557  		at := v.Type()
   558  		if at.Elem().Kind() == reflect.Uint8 {
   559  			// Special case for []byte
   560  			if tok.value[0] != '"' && tok.value[0] != '\'' {
   561  				// Deliberately written out here, as the error after
   562  				// this switch statement would write "invalid []byte: ...",
   563  				// which is not as user-friendly.
   564  				return p.errorf("invalid string: %v", tok.value)
   565  			}
   566  			bytes := []byte(tok.unquoted)
   567  			fv.Set(reflect.ValueOf(bytes))
   568  			return nil
   569  		}
   570  		// Repeated field. May already exist.
   571  		flen := fv.Len()
   572  		if flen == fv.Cap() {
   573  			nav := reflect.MakeSlice(at, flen, 2*flen+1)
   574  			reflect.Copy(nav, fv)
   575  			fv.Set(nav)
   576  		}
   577  		fv.SetLen(flen + 1)
   578  
   579  		// Read one.
   580  		p.back()
   581  		return p.readAny(fv.Index(flen), props)
   582  	case reflect.Bool:
   583  		// Either "true", "false", 1 or 0.
   584  		switch tok.value {
   585  		case "true", "1":
   586  			fv.SetBool(true)
   587  			return nil
   588  		case "false", "0":
   589  			fv.SetBool(false)
   590  			return nil
   591  		}
   592  	case reflect.Float32, reflect.Float64:
   593  		v := tok.value
   594  		// Ignore 'f' for compatibility with output generated by C++, but don't
   595  		// remove 'f' when the value is "-inf" or "inf".
   596  		if strings.HasSuffix(v, "f") && tok.value != "-inf" && tok.value != "inf" {
   597  			v = v[:len(v)-1]
   598  		}
   599  		if f, err := strconv.ParseFloat(v, fv.Type().Bits()); err == nil {
   600  			fv.SetFloat(f)
   601  			return nil
   602  		}
   603  	case reflect.Int32:
   604  		if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil {
   605  			fv.SetInt(x)
   606  			return nil
   607  		}
   608  		if len(props.Enum) == 0 {
   609  			break
   610  		}
   611  		m, ok := enumValueMaps[props.Enum]
   612  		if !ok {
   613  			break
   614  		}
   615  		x, ok := m[tok.value]
   616  		if !ok {
   617  			break
   618  		}
   619  		fv.SetInt(int64(x))
   620  		return nil
   621  	case reflect.Int64:
   622  		if x, err := strconv.ParseInt(tok.value, 0, 64); err == nil {
   623  			fv.SetInt(x)
   624  			return nil
   625  		}
   626  	case reflect.Ptr:
   627  		// A basic field (indirected through pointer), or a repeated message/group
   628  		p.back()
   629  		fv.Set(reflect.New(fv.Type().Elem()))
   630  		return p.readAny(fv.Elem(), props)
   631  	case reflect.String:
   632  		if tok.value[0] == '"' || tok.value[0] == '\'' {
   633  			fv.SetString(tok.unquoted)
   634  			return nil
   635  		}
   636  	case reflect.Struct:
   637  		var terminator string
   638  		switch tok.value {
   639  		case "{":
   640  			terminator = "}"
   641  		case "<":
   642  			terminator = ">"
   643  		default:
   644  			return p.errorf("expected '{' or '<', found %q", tok.value)
   645  		}
   646  		return p.readStruct(fv, terminator)
   647  	case reflect.Uint32:
   648  		if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil {
   649  			fv.SetUint(uint64(x))
   650  			return nil
   651  		}
   652  	case reflect.Uint64:
   653  		if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil {
   654  			fv.SetUint(x)
   655  			return nil
   656  		}
   657  	}
   658  	return p.errorf("invalid %v: %v", v.Type(), tok.value)
   659  }
   660  
   661  // UnmarshalText reads a protocol buffer in Text format. UnmarshalText resets pb
   662  // before starting to unmarshal, so any existing data in pb is always removed.
   663  func UnmarshalText(s string, pb Message) error {
   664  	pb.Reset()
   665  	v := reflect.ValueOf(pb)
   666  	if pe := newTextParser(s).readStruct(v.Elem(), ""); pe != nil {
   667  		return pe
   668  	}
   669  	return nil
   670  }