github.com/matm/etcd@v0.3.1-0.20140328024009-5b4a473f1453/third_party/code.google.com/p/gogoprotobuf/proto/text_parser.go (about) 1 // Extensions for Protocol Buffers to create more go like structures. 2 // 3 // Copyright (c) 2013, Vastech SA (PTY) LTD. All rights reserved. 4 // http://code.google.com/p/gogoprotobuf/gogoproto 5 // 6 // Go support for Protocol Buffers - Google's data interchange format 7 // 8 // Copyright 2010 The Go Authors. All rights reserved. 9 // http://code.google.com/p/goprotobuf/ 10 // 11 // Redistribution and use in source and binary forms, with or without 12 // modification, are permitted provided that the following conditions are 13 // met: 14 // 15 // * Redistributions of source code must retain the above copyright 16 // notice, this list of conditions and the following disclaimer. 17 // * Redistributions in binary form must reproduce the above 18 // copyright notice, this list of conditions and the following disclaimer 19 // in the documentation and/or other materials provided with the 20 // distribution. 21 // * Neither the name of Google Inc. nor the names of its 22 // contributors may be used to endorse or promote products derived from 23 // this software without specific prior written permission. 24 // 25 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 37 package proto 38 39 // Functions for parsing the Text protocol buffer format. 40 // TODO: message sets. 41 42 import ( 43 "errors" 44 "fmt" 45 "reflect" 46 "strconv" 47 "strings" 48 "unicode/utf8" 49 ) 50 51 type ParseError struct { 52 Message string 53 Line int // 1-based line number 54 Offset int // 0-based byte offset from start of input 55 } 56 57 func (p *ParseError) Error() string { 58 if p.Line == 1 { 59 // show offset only for first line 60 return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message) 61 } 62 return fmt.Sprintf("line %d: %v", p.Line, p.Message) 63 } 64 65 type token struct { 66 value string 67 err *ParseError 68 line int // line number 69 offset int // byte number from start of input, not start of line 70 unquoted string // the unquoted version of value, if it was a quoted string 71 } 72 73 func (t *token) String() string { 74 if t.err == nil { 75 return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset) 76 } 77 return fmt.Sprintf("parse error: %v", t.err) 78 } 79 80 type textParser struct { 81 s string // remaining input 82 done bool // whether the parsing is finished (success or error) 83 backed bool // whether back() was called 84 offset, line int 85 cur token 86 } 87 88 func newTextParser(s string) *textParser { 89 p := new(textParser) 90 p.s = s 91 p.line = 1 92 p.cur.line = 1 93 return p 94 } 95 96 func (p *textParser) errorf(format string, a ...interface{}) *ParseError { 97 pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset} 98 p.cur.err = pe 99 p.done = true 100 return pe 101 } 102 103 // Numbers and identifiers are matched by [-+._A-Za-z0-9] 104 func isIdentOrNumberChar(c byte) bool { 105 switch { 106 case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z': 107 return true 108 case '0' <= c && c <= '9': 109 return true 110 } 111 switch c { 112 case '-', '+', '.', '_': 113 return true 114 } 115 return false 116 } 117 118 func isWhitespace(c byte) bool { 119 switch c { 120 case ' ', '\t', '\n', '\r': 121 return true 122 } 123 return false 124 } 125 126 func (p *textParser) skipWhitespace() { 127 i := 0 128 for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') { 129 if p.s[i] == '#' { 130 // comment; skip to end of line or input 131 for i < len(p.s) && p.s[i] != '\n' { 132 i++ 133 } 134 if i == len(p.s) { 135 break 136 } 137 } 138 if p.s[i] == '\n' { 139 p.line++ 140 } 141 i++ 142 } 143 p.offset += i 144 p.s = p.s[i:len(p.s)] 145 if len(p.s) == 0 { 146 p.done = true 147 } 148 } 149 150 func (p *textParser) advance() { 151 // Skip whitespace 152 p.skipWhitespace() 153 if p.done { 154 return 155 } 156 157 // Start of non-whitespace 158 p.cur.err = nil 159 p.cur.offset, p.cur.line = p.offset, p.line 160 p.cur.unquoted = "" 161 switch p.s[0] { 162 case '<', '>', '{', '}', ':', '[', ']', ';', ',': 163 // Single symbol 164 p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)] 165 case '"', '\'': 166 // Quoted string 167 i := 1 168 for i < len(p.s) && p.s[i] != p.s[0] && p.s[i] != '\n' { 169 if p.s[i] == '\\' && i+1 < len(p.s) { 170 // skip escaped char 171 i++ 172 } 173 i++ 174 } 175 if i >= len(p.s) || p.s[i] != p.s[0] { 176 p.errorf("unmatched quote") 177 return 178 } 179 unq, err := unquoteC(p.s[1:i], rune(p.s[0])) 180 if err != nil { 181 p.errorf("invalid quoted string %v", p.s[0:i+1]) 182 return 183 } 184 p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)] 185 p.cur.unquoted = unq 186 default: 187 i := 0 188 for i < len(p.s) && isIdentOrNumberChar(p.s[i]) { 189 i++ 190 } 191 if i == 0 { 192 p.errorf("unexpected byte %#x", p.s[0]) 193 return 194 } 195 p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)] 196 } 197 p.offset += len(p.cur.value) 198 } 199 200 var ( 201 errBadUTF8 = errors.New("proto: bad UTF-8") 202 errBadHex = errors.New("proto: bad hexadecimal") 203 ) 204 205 func unquoteC(s string, quote rune) (string, error) { 206 // This is based on C++'s tokenizer.cc. 207 // Despite its name, this is *not* parsing C syntax. 208 // For instance, "\0" is an invalid quoted string. 209 210 // Avoid allocation in trivial cases. 211 simple := true 212 for _, r := range s { 213 if r == '\\' || r == quote { 214 simple = false 215 break 216 } 217 } 218 if simple { 219 return s, nil 220 } 221 222 buf := make([]byte, 0, 3*len(s)/2) 223 for len(s) > 0 { 224 r, n := utf8.DecodeRuneInString(s) 225 if r == utf8.RuneError && n == 1 { 226 return "", errBadUTF8 227 } 228 s = s[n:] 229 if r != '\\' { 230 if r < utf8.RuneSelf { 231 buf = append(buf, byte(r)) 232 } else { 233 buf = append(buf, string(r)...) 234 } 235 continue 236 } 237 238 ch, tail, err := unescape(s) 239 if err != nil { 240 return "", err 241 } 242 buf = append(buf, ch...) 243 s = tail 244 } 245 return string(buf), nil 246 } 247 248 func unescape(s string) (ch string, tail string, err error) { 249 r, n := utf8.DecodeRuneInString(s) 250 if r == utf8.RuneError && n == 1 { 251 return "", "", errBadUTF8 252 } 253 s = s[n:] 254 switch r { 255 case 'a': 256 return "\a", s, nil 257 case 'b': 258 return "\b", s, nil 259 case 'f': 260 return "\f", s, nil 261 case 'n': 262 return "\n", s, nil 263 case 'r': 264 return "\r", s, nil 265 case 't': 266 return "\t", s, nil 267 case 'v': 268 return "\v", s, nil 269 case '?': 270 return "?", s, nil // trigraph workaround 271 case '\'', '"', '\\': 272 return string(r), s, nil 273 case '0', '1', '2', '3', '4', '5', '6', '7', 'x', 'X': 274 if len(s) < 2 { 275 return "", "", fmt.Errorf(`\%c requires 2 following digits`, r) 276 } 277 base := 8 278 ss := s[:2] 279 s = s[2:] 280 if r == 'x' || r == 'X' { 281 base = 16 282 } else { 283 ss = string(r) + ss 284 } 285 i, err := strconv.ParseUint(ss, base, 8) 286 if err != nil { 287 return "", "", err 288 } 289 return string([]byte{byte(i)}), s, nil 290 case 'u', 'U': 291 n := 4 292 if r == 'U' { 293 n = 8 294 } 295 if len(s) < n { 296 return "", "", fmt.Errorf(`\%c requires %d digits`, r, n) 297 } 298 299 bs := make([]byte, n/2) 300 for i := 0; i < n; i += 2 { 301 a, ok1 := unhex(s[i]) 302 b, ok2 := unhex(s[i+1]) 303 if !ok1 || !ok2 { 304 return "", "", errBadHex 305 } 306 bs[i/2] = a<<4 | b 307 } 308 s = s[n:] 309 return string(bs), s, nil 310 } 311 return "", "", fmt.Errorf(`unknown escape \%c`, r) 312 } 313 314 // Adapted from src/pkg/strconv/quote.go. 315 func unhex(b byte) (v byte, ok bool) { 316 switch { 317 case '0' <= b && b <= '9': 318 return b - '0', true 319 case 'a' <= b && b <= 'f': 320 return b - 'a' + 10, true 321 case 'A' <= b && b <= 'F': 322 return b - 'A' + 10, true 323 } 324 return 0, false 325 } 326 327 // Back off the parser by one token. Can only be done between calls to next(). 328 // It makes the next advance() a no-op. 329 func (p *textParser) back() { p.backed = true } 330 331 // Advances the parser and returns the new current token. 332 func (p *textParser) next() *token { 333 if p.backed || p.done { 334 p.backed = false 335 return &p.cur 336 } 337 p.advance() 338 if p.done { 339 p.cur.value = "" 340 } else if len(p.cur.value) > 0 && p.cur.value[0] == '"' { 341 // Look for multiple quoted strings separated by whitespace, 342 // and concatenate them. 343 cat := p.cur 344 for { 345 p.skipWhitespace() 346 if p.done || p.s[0] != '"' { 347 break 348 } 349 p.advance() 350 if p.cur.err != nil { 351 return &p.cur 352 } 353 cat.value += " " + p.cur.value 354 cat.unquoted += p.cur.unquoted 355 } 356 p.done = false // parser may have seen EOF, but we want to return cat 357 p.cur = cat 358 } 359 return &p.cur 360 } 361 362 // Return an error indicating which required field was not set. 363 func (p *textParser) missingRequiredFieldError(sv reflect.Value) *ParseError { 364 st := sv.Type() 365 sprops := GetProperties(st) 366 for i := 0; i < st.NumField(); i++ { 367 if !isNil(sv.Field(i)) { 368 continue 369 } 370 371 props := sprops.Prop[i] 372 if props.Required { 373 return p.errorf("message %v missing required field %q", st, props.OrigName) 374 } 375 } 376 return p.errorf("message %v missing required field", st) // should not happen 377 } 378 379 // Returns the index in the struct for the named field, as well as the parsed tag properties. 380 func structFieldByName(st reflect.Type, name string) (int, *Properties, bool) { 381 sprops := GetProperties(st) 382 i, ok := sprops.decoderOrigNames[name] 383 if ok { 384 return i, sprops.Prop[i], true 385 } 386 return -1, nil, false 387 } 388 389 // Consume a ':' from the input stream (if the next token is a colon), 390 // returning an error if a colon is needed but not present. 391 func (p *textParser) checkForColon(props *Properties, typ reflect.Type) *ParseError { 392 tok := p.next() 393 if tok.err != nil { 394 return tok.err 395 } 396 if tok.value != ":" { 397 // Colon is optional when the field is a group or message. 398 needColon := true 399 switch props.Wire { 400 case "group": 401 needColon = false 402 case "bytes": 403 // A "bytes" field is either a message, a string, or a repeated field; 404 // those three become *T, *string and []T respectively, so we can check for 405 // this field being a pointer to a non-string. 406 if typ.Kind() == reflect.Ptr { 407 // *T or *string 408 if typ.Elem().Kind() == reflect.String { 409 break 410 } 411 } else if typ.Kind() == reflect.Slice { 412 // []T or []*T 413 if typ.Elem().Kind() != reflect.Ptr { 414 break 415 } 416 } 417 needColon = false 418 } 419 if needColon { 420 return p.errorf("expected ':', found %q", tok.value) 421 } 422 p.back() 423 } 424 return nil 425 } 426 427 func (p *textParser) readStruct(sv reflect.Value, terminator string) *ParseError { 428 st := sv.Type() 429 reqCount := GetProperties(st).reqCount 430 // A struct is a sequence of "name: value", terminated by one of 431 // '>' or '}', or the end of the input. A name may also be 432 // "[extension]". 433 for { 434 tok := p.next() 435 if tok.err != nil { 436 return tok.err 437 } 438 if tok.value == terminator { 439 break 440 } 441 if tok.value == "[" { 442 // Looks like an extension. 443 // 444 // TODO: Check whether we need to handle 445 // namespace rooted names (e.g. ".something.Foo"). 446 tok = p.next() 447 if tok.err != nil { 448 return tok.err 449 } 450 var desc *ExtensionDesc 451 // This could be faster, but it's functional. 452 // TODO: Do something smarter than a linear scan. 453 for _, d := range RegisteredExtensions(reflect.New(st).Interface().(Message)) { 454 if d.Name == tok.value { 455 desc = d 456 break 457 } 458 } 459 if desc == nil { 460 return p.errorf("unrecognized extension %q", tok.value) 461 } 462 // Check the extension terminator. 463 tok = p.next() 464 if tok.err != nil { 465 return tok.err 466 } 467 if tok.value != "]" { 468 return p.errorf("unrecognized extension terminator %q", tok.value) 469 } 470 471 props := &Properties{} 472 props.Parse(desc.Tag) 473 474 typ := reflect.TypeOf(desc.ExtensionType) 475 if err := p.checkForColon(props, typ); err != nil { 476 return err 477 } 478 479 rep := desc.repeated() 480 481 // Read the extension structure, and set it in 482 // the value we're constructing. 483 var ext reflect.Value 484 if !rep { 485 ext = reflect.New(typ).Elem() 486 } else { 487 ext = reflect.New(typ.Elem()).Elem() 488 } 489 if err := p.readAny(ext, props); err != nil { 490 return err 491 } 492 ep := sv.Addr().Interface().(extendableProto) 493 if !rep { 494 SetExtension(ep, desc, ext.Interface()) 495 } else { 496 old, err := GetExtension(ep, desc) 497 var sl reflect.Value 498 if err == nil { 499 sl = reflect.ValueOf(old) // existing slice 500 } else { 501 sl = reflect.MakeSlice(typ, 0, 1) 502 } 503 sl = reflect.Append(sl, ext) 504 SetExtension(ep, desc, sl.Interface()) 505 } 506 } else { 507 // This is a normal, non-extension field. 508 fi, props, ok := structFieldByName(st, tok.value) 509 if !ok { 510 return p.errorf("unknown field name %q in %v", tok.value, st) 511 } 512 513 dst := sv.Field(fi) 514 isDstNil := isNil(dst) 515 516 // Check that it's not already set if it's not a repeated field. 517 if !props.Repeated && !isDstNil && dst.Kind() == reflect.Ptr { 518 return p.errorf("non-repeated field %q was repeated", tok.value) 519 } 520 521 if err := p.checkForColon(props, st.Field(fi).Type); err != nil { 522 return err 523 } 524 525 // Parse into the field. 526 if err := p.readAny(dst, props); err != nil { 527 return err 528 } 529 530 if props.Required { 531 reqCount-- 532 } 533 } 534 535 // For backward compatibility, permit a semicolon or comma after a field. 536 tok = p.next() 537 if tok.err != nil { 538 return tok.err 539 } 540 if tok.value != ";" && tok.value != "," { 541 p.back() 542 } 543 } 544 545 if reqCount > 0 { 546 return p.missingRequiredFieldError(sv) 547 } 548 return nil 549 } 550 551 func (p *textParser) readAny(v reflect.Value, props *Properties) *ParseError { 552 tok := p.next() 553 if tok.err != nil { 554 return tok.err 555 } 556 if tok.value == "" { 557 return p.errorf("unexpected EOF") 558 } 559 if len(props.CustomType) > 0 { 560 if props.Repeated { 561 t := reflect.TypeOf(v.Interface()) 562 if t.Kind() == reflect.Slice { 563 tc := reflect.TypeOf(new(Marshaler)) 564 ok := t.Elem().Implements(tc.Elem()) 565 if ok { 566 fv := v 567 flen := fv.Len() 568 if flen == fv.Cap() { 569 nav := reflect.MakeSlice(v.Type(), flen, 2*flen+1) 570 reflect.Copy(nav, fv) 571 fv.Set(nav) 572 } 573 fv.SetLen(flen + 1) 574 575 // Read one. 576 p.back() 577 return p.readAny(fv.Index(flen), props) 578 } 579 } 580 } 581 if reflect.TypeOf(v.Interface()).Kind() == reflect.Ptr { 582 custom := reflect.New(props.ctype.Elem()).Interface().(Unmarshaler) 583 err := custom.Unmarshal([]byte(tok.unquoted)) 584 if err != nil { 585 return p.errorf("%v %v: %v", err, v.Type(), tok.value) 586 } 587 v.Set(reflect.ValueOf(custom)) 588 } else { 589 custom := reflect.New(reflect.TypeOf(v.Interface())).Interface().(Unmarshaler) 590 err := custom.Unmarshal([]byte(tok.unquoted)) 591 if err != nil { 592 return p.errorf("%v %v: %v", err, v.Type(), tok.value) 593 } 594 v.Set(reflect.Indirect(reflect.ValueOf(custom))) 595 } 596 return nil 597 } 598 switch fv := v; fv.Kind() { 599 case reflect.Slice: 600 at := v.Type() 601 if at.Elem().Kind() == reflect.Uint8 { 602 // Special case for []byte 603 if tok.value[0] != '"' && tok.value[0] != '\'' { 604 // Deliberately written out here, as the error after 605 // this switch statement would write "invalid []byte: ...", 606 // which is not as user-friendly. 607 return p.errorf("invalid string: %v", tok.value) 608 } 609 bytes := []byte(tok.unquoted) 610 fv.Set(reflect.ValueOf(bytes)) 611 return nil 612 } 613 // Repeated field. May already exist. 614 flen := fv.Len() 615 if flen == fv.Cap() { 616 nav := reflect.MakeSlice(at, flen, 2*flen+1) 617 reflect.Copy(nav, fv) 618 fv.Set(nav) 619 } 620 fv.SetLen(flen + 1) 621 622 // Read one. 623 p.back() 624 return p.readAny(fv.Index(flen), props) 625 case reflect.Bool: 626 // Either "true", "false", 1 or 0. 627 switch tok.value { 628 case "true", "1": 629 fv.SetBool(true) 630 return nil 631 case "false", "0": 632 fv.SetBool(false) 633 return nil 634 } 635 case reflect.Float32, reflect.Float64: 636 v := tok.value 637 // Ignore 'f' for compatibility with output generated by C++, but don't 638 // remove 'f' when the value is "-inf" or "inf". 639 if strings.HasSuffix(v, "f") && tok.value != "-inf" && tok.value != "inf" { 640 v = v[:len(v)-1] 641 } 642 if f, err := strconv.ParseFloat(v, fv.Type().Bits()); err == nil { 643 fv.SetFloat(f) 644 return nil 645 } 646 case reflect.Int32: 647 if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil { 648 fv.SetInt(x) 649 return nil 650 } 651 if len(props.Enum) == 0 { 652 break 653 } 654 m, ok := enumValueMaps[props.Enum] 655 if !ok { 656 break 657 } 658 x, ok := m[tok.value] 659 if !ok { 660 break 661 } 662 fv.SetInt(int64(x)) 663 return nil 664 case reflect.Int64: 665 if x, err := strconv.ParseInt(tok.value, 0, 64); err == nil { 666 fv.SetInt(x) 667 return nil 668 } 669 case reflect.Ptr: 670 // A basic field (indirected through pointer), or a repeated message/group 671 p.back() 672 fv.Set(reflect.New(fv.Type().Elem())) 673 return p.readAny(fv.Elem(), props) 674 case reflect.String: 675 if tok.value[0] == '"' || tok.value[0] == '\'' { 676 fv.SetString(tok.unquoted) 677 return nil 678 } 679 case reflect.Struct: 680 var terminator string 681 switch tok.value { 682 case "{": 683 terminator = "}" 684 case "<": 685 terminator = ">" 686 default: 687 return p.errorf("expected '{' or '<', found %q", tok.value) 688 } 689 return p.readStruct(fv, terminator) 690 case reflect.Uint32: 691 if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil { 692 fv.SetUint(uint64(x)) 693 return nil 694 } 695 case reflect.Uint64: 696 if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil { 697 fv.SetUint(x) 698 return nil 699 } 700 } 701 return p.errorf("invalid %v: %v", v.Type(), tok.value) 702 } 703 704 // UnmarshalText reads a protocol buffer in Text format. UnmarshalText resets pb 705 // before starting to unmarshal, so any existing data in pb is always removed. 706 func UnmarshalText(s string, pb Message) error { 707 pb.Reset() 708 v := reflect.ValueOf(pb) 709 if pe := newTextParser(s).readStruct(v.Elem(), ""); pe != nil { 710 return pe 711 } 712 return nil 713 }