github.com/schwarzm/garden-linux@v0.0.0-20150507151835-33bca2147c47/Godeps/_workspace/src/code.google.com/p/gogoprotobuf/proto/text_parser.go (about) 1 // Extensions for Protocol Buffers to create more go like structures. 2 // 3 // Copyright (c) 2013, Vastech SA (PTY) LTD. All rights reserved. 4 // http://code.google.com/p/gogoprotobuf/gogoproto 5 // 6 // Go support for Protocol Buffers - Google's data interchange format 7 // 8 // Copyright 2010 The Go Authors. All rights reserved. 9 // http://code.google.com/p/goprotobuf/ 10 // 11 // Redistribution and use in source and binary forms, with or without 12 // modification, are permitted provided that the following conditions are 13 // met: 14 // 15 // * Redistributions of source code must retain the above copyright 16 // notice, this list of conditions and the following disclaimer. 17 // * Redistributions in binary form must reproduce the above 18 // copyright notice, this list of conditions and the following disclaimer 19 // in the documentation and/or other materials provided with the 20 // distribution. 21 // * Neither the name of Google Inc. nor the names of its 22 // contributors may be used to endorse or promote products derived from 23 // this software without specific prior written permission. 24 // 25 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 37 package proto 38 39 // Functions for parsing the Text protocol buffer format. 40 // TODO: message sets. 41 42 import ( 43 "errors" 44 "fmt" 45 "reflect" 46 "strconv" 47 "strings" 48 "unicode/utf8" 49 ) 50 51 // textUnmarshaler is implemented by Messages that can unmarshal themsleves. 52 // It is identical to encoding.TextUnmarshaler, introduced in go 1.2, 53 // which will eventually replace it. 54 type textUnmarshaler interface { 55 UnmarshalText(text []byte) error 56 } 57 58 type ParseError struct { 59 Message string 60 Line int // 1-based line number 61 Offset int // 0-based byte offset from start of input 62 } 63 64 func (p *ParseError) Error() string { 65 if p.Line == 1 { 66 // show offset only for first line 67 return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message) 68 } 69 return fmt.Sprintf("line %d: %v", p.Line, p.Message) 70 } 71 72 type token struct { 73 value string 74 err *ParseError 75 line int // line number 76 offset int // byte number from start of input, not start of line 77 unquoted string // the unquoted version of value, if it was a quoted string 78 } 79 80 func (t *token) String() string { 81 if t.err == nil { 82 return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset) 83 } 84 return fmt.Sprintf("parse error: %v", t.err) 85 } 86 87 type textParser struct { 88 s string // remaining input 89 done bool // whether the parsing is finished (success or error) 90 backed bool // whether back() was called 91 offset, line int 92 cur token 93 } 94 95 func newTextParser(s string) *textParser { 96 p := new(textParser) 97 p.s = s 98 p.line = 1 99 p.cur.line = 1 100 return p 101 } 102 103 func (p *textParser) errorf(format string, a ...interface{}) *ParseError { 104 pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset} 105 p.cur.err = pe 106 p.done = true 107 return pe 108 } 109 110 // Numbers and identifiers are matched by [-+._A-Za-z0-9] 111 func isIdentOrNumberChar(c byte) bool { 112 switch { 113 case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z': 114 return true 115 case '0' <= c && c <= '9': 116 return true 117 } 118 switch c { 119 case '-', '+', '.', '_': 120 return true 121 } 122 return false 123 } 124 125 func isWhitespace(c byte) bool { 126 switch c { 127 case ' ', '\t', '\n', '\r': 128 return true 129 } 130 return false 131 } 132 133 func (p *textParser) skipWhitespace() { 134 i := 0 135 for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') { 136 if p.s[i] == '#' { 137 // comment; skip to end of line or input 138 for i < len(p.s) && p.s[i] != '\n' { 139 i++ 140 } 141 if i == len(p.s) { 142 break 143 } 144 } 145 if p.s[i] == '\n' { 146 p.line++ 147 } 148 i++ 149 } 150 p.offset += i 151 p.s = p.s[i:len(p.s)] 152 if len(p.s) == 0 { 153 p.done = true 154 } 155 } 156 157 func (p *textParser) advance() { 158 // Skip whitespace 159 p.skipWhitespace() 160 if p.done { 161 return 162 } 163 164 // Start of non-whitespace 165 p.cur.err = nil 166 p.cur.offset, p.cur.line = p.offset, p.line 167 p.cur.unquoted = "" 168 switch p.s[0] { 169 case '<', '>', '{', '}', ':', '[', ']', ';', ',': 170 // Single symbol 171 p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)] 172 case '"', '\'': 173 // Quoted string 174 i := 1 175 for i < len(p.s) && p.s[i] != p.s[0] && p.s[i] != '\n' { 176 if p.s[i] == '\\' && i+1 < len(p.s) { 177 // skip escaped char 178 i++ 179 } 180 i++ 181 } 182 if i >= len(p.s) || p.s[i] != p.s[0] { 183 p.errorf("unmatched quote") 184 return 185 } 186 unq, err := unquoteC(p.s[1:i], rune(p.s[0])) 187 if err != nil { 188 p.errorf("invalid quoted string %v", p.s[0:i+1]) 189 return 190 } 191 p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)] 192 p.cur.unquoted = unq 193 default: 194 i := 0 195 for i < len(p.s) && isIdentOrNumberChar(p.s[i]) { 196 i++ 197 } 198 if i == 0 { 199 p.errorf("unexpected byte %#x", p.s[0]) 200 return 201 } 202 p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)] 203 } 204 p.offset += len(p.cur.value) 205 } 206 207 var ( 208 errBadUTF8 = errors.New("proto: bad UTF-8") 209 errBadHex = errors.New("proto: bad hexadecimal") 210 ) 211 212 func unquoteC(s string, quote rune) (string, error) { 213 // This is based on C++'s tokenizer.cc. 214 // Despite its name, this is *not* parsing C syntax. 215 // For instance, "\0" is an invalid quoted string. 216 217 // Avoid allocation in trivial cases. 218 simple := true 219 for _, r := range s { 220 if r == '\\' || r == quote { 221 simple = false 222 break 223 } 224 } 225 if simple { 226 return s, nil 227 } 228 229 buf := make([]byte, 0, 3*len(s)/2) 230 for len(s) > 0 { 231 r, n := utf8.DecodeRuneInString(s) 232 if r == utf8.RuneError && n == 1 { 233 return "", errBadUTF8 234 } 235 s = s[n:] 236 if r != '\\' { 237 if r < utf8.RuneSelf { 238 buf = append(buf, byte(r)) 239 } else { 240 buf = append(buf, string(r)...) 241 } 242 continue 243 } 244 245 ch, tail, err := unescape(s) 246 if err != nil { 247 return "", err 248 } 249 buf = append(buf, ch...) 250 s = tail 251 } 252 return string(buf), nil 253 } 254 255 func unescape(s string) (ch string, tail string, err error) { 256 r, n := utf8.DecodeRuneInString(s) 257 if r == utf8.RuneError && n == 1 { 258 return "", "", errBadUTF8 259 } 260 s = s[n:] 261 switch r { 262 case 'a': 263 return "\a", s, nil 264 case 'b': 265 return "\b", s, nil 266 case 'f': 267 return "\f", s, nil 268 case 'n': 269 return "\n", s, nil 270 case 'r': 271 return "\r", s, nil 272 case 't': 273 return "\t", s, nil 274 case 'v': 275 return "\v", s, nil 276 case '?': 277 return "?", s, nil // trigraph workaround 278 case '\'', '"', '\\': 279 return string(r), s, nil 280 case '0', '1', '2', '3', '4', '5', '6', '7', 'x', 'X': 281 if len(s) < 2 { 282 return "", "", fmt.Errorf(`\%c requires 2 following digits`, r) 283 } 284 base := 8 285 ss := s[:2] 286 s = s[2:] 287 if r == 'x' || r == 'X' { 288 base = 16 289 } else { 290 ss = string(r) + ss 291 } 292 i, err := strconv.ParseUint(ss, base, 8) 293 if err != nil { 294 return "", "", err 295 } 296 return string([]byte{byte(i)}), s, nil 297 case 'u', 'U': 298 n := 4 299 if r == 'U' { 300 n = 8 301 } 302 if len(s) < n { 303 return "", "", fmt.Errorf(`\%c requires %d digits`, r, n) 304 } 305 306 bs := make([]byte, n/2) 307 for i := 0; i < n; i += 2 { 308 a, ok1 := unhex(s[i]) 309 b, ok2 := unhex(s[i+1]) 310 if !ok1 || !ok2 { 311 return "", "", errBadHex 312 } 313 bs[i/2] = a<<4 | b 314 } 315 s = s[n:] 316 return string(bs), s, nil 317 } 318 return "", "", fmt.Errorf(`unknown escape \%c`, r) 319 } 320 321 // Adapted from src/pkg/strconv/quote.go. 322 func unhex(b byte) (v byte, ok bool) { 323 switch { 324 case '0' <= b && b <= '9': 325 return b - '0', true 326 case 'a' <= b && b <= 'f': 327 return b - 'a' + 10, true 328 case 'A' <= b && b <= 'F': 329 return b - 'A' + 10, true 330 } 331 return 0, false 332 } 333 334 // Back off the parser by one token. Can only be done between calls to next(). 335 // It makes the next advance() a no-op. 336 func (p *textParser) back() { p.backed = true } 337 338 // Advances the parser and returns the new current token. 339 func (p *textParser) next() *token { 340 if p.backed || p.done { 341 p.backed = false 342 return &p.cur 343 } 344 p.advance() 345 if p.done { 346 p.cur.value = "" 347 } else if len(p.cur.value) > 0 && p.cur.value[0] == '"' { 348 // Look for multiple quoted strings separated by whitespace, 349 // and concatenate them. 350 cat := p.cur 351 for { 352 p.skipWhitespace() 353 if p.done || p.s[0] != '"' { 354 break 355 } 356 p.advance() 357 if p.cur.err != nil { 358 return &p.cur 359 } 360 cat.value += " " + p.cur.value 361 cat.unquoted += p.cur.unquoted 362 } 363 p.done = false // parser may have seen EOF, but we want to return cat 364 p.cur = cat 365 } 366 return &p.cur 367 } 368 369 // Return an error indicating which required field was not set. 370 func (p *textParser) missingRequiredFieldError(sv reflect.Value) *ParseError { 371 st := sv.Type() 372 sprops := GetProperties(st) 373 for i := 0; i < st.NumField(); i++ { 374 if !isNil(sv.Field(i)) { 375 continue 376 } 377 378 props := sprops.Prop[i] 379 if props.Required { 380 return p.errorf("message %v missing required field %q", st, props.OrigName) 381 } 382 } 383 return p.errorf("message %v missing required field", st) // should not happen 384 } 385 386 // Returns the index in the struct for the named field, as well as the parsed tag properties. 387 func structFieldByName(st reflect.Type, name string) (int, *Properties, bool) { 388 sprops := GetProperties(st) 389 i, ok := sprops.decoderOrigNames[name] 390 if ok { 391 return i, sprops.Prop[i], true 392 } 393 return -1, nil, false 394 } 395 396 // Consume a ':' from the input stream (if the next token is a colon), 397 // returning an error if a colon is needed but not present. 398 func (p *textParser) checkForColon(props *Properties, typ reflect.Type) *ParseError { 399 tok := p.next() 400 if tok.err != nil { 401 return tok.err 402 } 403 if tok.value != ":" { 404 // Colon is optional when the field is a group or message. 405 needColon := true 406 switch props.Wire { 407 case "group": 408 needColon = false 409 case "bytes": 410 // A "bytes" field is either a message, a string, or a repeated field; 411 // those three become *T, *string and []T respectively, so we can check for 412 // this field being a pointer to a non-string. 413 if typ.Kind() == reflect.Ptr { 414 // *T or *string 415 if typ.Elem().Kind() == reflect.String { 416 break 417 } 418 } else if typ.Kind() == reflect.Slice { 419 // []T or []*T 420 if typ.Elem().Kind() != reflect.Ptr { 421 break 422 } 423 } 424 needColon = false 425 } 426 if needColon { 427 return p.errorf("expected ':', found %q", tok.value) 428 } 429 p.back() 430 } 431 return nil 432 } 433 434 func (p *textParser) readStruct(sv reflect.Value, terminator string) *ParseError { 435 st := sv.Type() 436 reqCount := GetProperties(st).reqCount 437 // A struct is a sequence of "name: value", terminated by one of 438 // '>' or '}', or the end of the input. A name may also be 439 // "[extension]". 440 for { 441 tok := p.next() 442 if tok.err != nil { 443 return tok.err 444 } 445 if tok.value == terminator { 446 break 447 } 448 if tok.value == "[" { 449 // Looks like an extension. 450 // 451 // TODO: Check whether we need to handle 452 // namespace rooted names (e.g. ".something.Foo"). 453 tok = p.next() 454 if tok.err != nil { 455 return tok.err 456 } 457 var desc *ExtensionDesc 458 // This could be faster, but it's functional. 459 // TODO: Do something smarter than a linear scan. 460 for _, d := range RegisteredExtensions(reflect.New(st).Interface().(Message)) { 461 if d.Name == tok.value { 462 desc = d 463 break 464 } 465 } 466 if desc == nil { 467 return p.errorf("unrecognized extension %q", tok.value) 468 } 469 // Check the extension terminator. 470 tok = p.next() 471 if tok.err != nil { 472 return tok.err 473 } 474 if tok.value != "]" { 475 return p.errorf("unrecognized extension terminator %q", tok.value) 476 } 477 478 props := &Properties{} 479 props.Parse(desc.Tag) 480 481 typ := reflect.TypeOf(desc.ExtensionType) 482 if err := p.checkForColon(props, typ); err != nil { 483 return err 484 } 485 486 rep := desc.repeated() 487 488 // Read the extension structure, and set it in 489 // the value we're constructing. 490 var ext reflect.Value 491 if !rep { 492 ext = reflect.New(typ).Elem() 493 } else { 494 ext = reflect.New(typ.Elem()).Elem() 495 } 496 if err := p.readAny(ext, props); err != nil { 497 return err 498 } 499 ep := sv.Addr().Interface().(extendableProto) 500 if !rep { 501 SetExtension(ep, desc, ext.Interface()) 502 } else { 503 old, err := GetExtension(ep, desc) 504 var sl reflect.Value 505 if err == nil { 506 sl = reflect.ValueOf(old) // existing slice 507 } else { 508 sl = reflect.MakeSlice(typ, 0, 1) 509 } 510 sl = reflect.Append(sl, ext) 511 SetExtension(ep, desc, sl.Interface()) 512 } 513 } else { 514 // This is a normal, non-extension field. 515 fi, props, ok := structFieldByName(st, tok.value) 516 if !ok { 517 return p.errorf("unknown field name %q in %v", tok.value, st) 518 } 519 520 dst := sv.Field(fi) 521 isDstNil := isNil(dst) 522 523 // Check that it's not already set if it's not a repeated field. 524 if !props.Repeated && !isDstNil && dst.Kind() == reflect.Ptr { 525 return p.errorf("non-repeated field %q was repeated", tok.value) 526 } 527 528 if err := p.checkForColon(props, st.Field(fi).Type); err != nil { 529 return err 530 } 531 532 // Parse into the field. 533 if err := p.readAny(dst, props); err != nil { 534 return err 535 } 536 537 if props.Required { 538 reqCount-- 539 } 540 } 541 542 // For backward compatibility, permit a semicolon or comma after a field. 543 tok = p.next() 544 if tok.err != nil { 545 return tok.err 546 } 547 if tok.value != ";" && tok.value != "," { 548 p.back() 549 } 550 } 551 552 if reqCount > 0 { 553 return p.missingRequiredFieldError(sv) 554 } 555 return nil 556 } 557 558 func (p *textParser) readAny(v reflect.Value, props *Properties) *ParseError { 559 tok := p.next() 560 if tok.err != nil { 561 return tok.err 562 } 563 if tok.value == "" { 564 return p.errorf("unexpected EOF") 565 } 566 if len(props.CustomType) > 0 { 567 if props.Repeated { 568 t := reflect.TypeOf(v.Interface()) 569 if t.Kind() == reflect.Slice { 570 tc := reflect.TypeOf(new(Marshaler)) 571 ok := t.Elem().Implements(tc.Elem()) 572 if ok { 573 fv := v 574 flen := fv.Len() 575 if flen == fv.Cap() { 576 nav := reflect.MakeSlice(v.Type(), flen, 2*flen+1) 577 reflect.Copy(nav, fv) 578 fv.Set(nav) 579 } 580 fv.SetLen(flen + 1) 581 582 // Read one. 583 p.back() 584 return p.readAny(fv.Index(flen), props) 585 } 586 } 587 } 588 if reflect.TypeOf(v.Interface()).Kind() == reflect.Ptr { 589 custom := reflect.New(props.ctype.Elem()).Interface().(Unmarshaler) 590 err := custom.Unmarshal([]byte(tok.unquoted)) 591 if err != nil { 592 return p.errorf("%v %v: %v", err, v.Type(), tok.value) 593 } 594 v.Set(reflect.ValueOf(custom)) 595 } else { 596 custom := reflect.New(reflect.TypeOf(v.Interface())).Interface().(Unmarshaler) 597 err := custom.Unmarshal([]byte(tok.unquoted)) 598 if err != nil { 599 return p.errorf("%v %v: %v", err, v.Type(), tok.value) 600 } 601 v.Set(reflect.Indirect(reflect.ValueOf(custom))) 602 } 603 return nil 604 } 605 switch fv := v; fv.Kind() { 606 case reflect.Slice: 607 at := v.Type() 608 if at.Elem().Kind() == reflect.Uint8 { 609 // Special case for []byte 610 if tok.value[0] != '"' && tok.value[0] != '\'' { 611 // Deliberately written out here, as the error after 612 // this switch statement would write "invalid []byte: ...", 613 // which is not as user-friendly. 614 return p.errorf("invalid string: %v", tok.value) 615 } 616 bytes := []byte(tok.unquoted) 617 fv.Set(reflect.ValueOf(bytes)) 618 return nil 619 } 620 // Repeated field. May already exist. 621 flen := fv.Len() 622 if flen == fv.Cap() { 623 nav := reflect.MakeSlice(at, flen, 2*flen+1) 624 reflect.Copy(nav, fv) 625 fv.Set(nav) 626 } 627 fv.SetLen(flen + 1) 628 629 // Read one. 630 p.back() 631 return p.readAny(fv.Index(flen), props) 632 case reflect.Bool: 633 // Either "true", "false", 1 or 0. 634 switch tok.value { 635 case "true", "1": 636 fv.SetBool(true) 637 return nil 638 case "false", "0": 639 fv.SetBool(false) 640 return nil 641 } 642 case reflect.Float32, reflect.Float64: 643 v := tok.value 644 // Ignore 'f' for compatibility with output generated by C++, but don't 645 // remove 'f' when the value is "-inf" or "inf". 646 if strings.HasSuffix(v, "f") && tok.value != "-inf" && tok.value != "inf" { 647 v = v[:len(v)-1] 648 } 649 if f, err := strconv.ParseFloat(v, fv.Type().Bits()); err == nil { 650 fv.SetFloat(f) 651 return nil 652 } 653 case reflect.Int32: 654 if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil { 655 fv.SetInt(x) 656 return nil 657 } 658 659 if len(props.Enum) == 0 { 660 break 661 } 662 m, ok := enumValueMaps[props.Enum] 663 if !ok { 664 break 665 } 666 x, ok := m[tok.value] 667 if !ok { 668 break 669 } 670 fv.SetInt(int64(x)) 671 return nil 672 case reflect.Int64: 673 if x, err := strconv.ParseInt(tok.value, 0, 64); err == nil { 674 fv.SetInt(x) 675 return nil 676 } 677 678 case reflect.Ptr: 679 // A basic field (indirected through pointer), or a repeated message/group 680 p.back() 681 fv.Set(reflect.New(fv.Type().Elem())) 682 return p.readAny(fv.Elem(), props) 683 case reflect.String: 684 if tok.value[0] == '"' || tok.value[0] == '\'' { 685 fv.SetString(tok.unquoted) 686 return nil 687 } 688 case reflect.Struct: 689 var terminator string 690 switch tok.value { 691 case "{": 692 terminator = "}" 693 case "<": 694 terminator = ">" 695 default: 696 return p.errorf("expected '{' or '<', found %q", tok.value) 697 } 698 // TODO: Handle nested messages which implement textUnmarshaler. 699 return p.readStruct(fv, terminator) 700 case reflect.Uint32: 701 if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil { 702 fv.SetUint(uint64(x)) 703 return nil 704 } 705 case reflect.Uint64: 706 if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil { 707 fv.SetUint(x) 708 return nil 709 } 710 } 711 return p.errorf("invalid %v: %v", v.Type(), tok.value) 712 } 713 714 // UnmarshalText reads a protocol buffer in Text format. UnmarshalText resets pb 715 // before starting to unmarshal, so any existing data in pb is always removed. 716 func UnmarshalText(s string, pb Message) error { 717 if um, ok := pb.(textUnmarshaler); ok { 718 err := um.UnmarshalText([]byte(s)) 719 return err 720 } 721 pb.Reset() 722 v := reflect.ValueOf(pb) 723 if pe := newTextParser(s).readStruct(v.Elem(), ""); pe != nil { 724 return pe 725 } 726 return nil 727 }