github.com/matm/etcd@v0.3.1-0.20140328024009-5b4a473f1453/third_party/code.google.com/p/goprotobuf/proto/text_parser.go (about) 1 // Go support for Protocol Buffers - Google's data interchange format 2 // 3 // Copyright 2010 The Go Authors. All rights reserved. 4 // http://code.google.com/p/goprotobuf/ 5 // 6 // Redistribution and use in source and binary forms, with or without 7 // modification, are permitted provided that the following conditions are 8 // met: 9 // 10 // * Redistributions of source code must retain the above copyright 11 // notice, this list of conditions and the following disclaimer. 12 // * Redistributions in binary form must reproduce the above 13 // copyright notice, this list of conditions and the following disclaimer 14 // in the documentation and/or other materials provided with the 15 // distribution. 16 // * Neither the name of Google Inc. nor the names of its 17 // contributors may be used to endorse or promote products derived from 18 // this software without specific prior written permission. 19 // 20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 32 package proto 33 34 // Functions for parsing the Text protocol buffer format. 35 // TODO: message sets. 36 37 import ( 38 "errors" 39 "fmt" 40 "reflect" 41 "strconv" 42 "strings" 43 "unicode/utf8" 44 ) 45 46 type ParseError struct { 47 Message string 48 Line int // 1-based line number 49 Offset int // 0-based byte offset from start of input 50 } 51 52 func (p *ParseError) Error() string { 53 if p.Line == 1 { 54 // show offset only for first line 55 return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message) 56 } 57 return fmt.Sprintf("line %d: %v", p.Line, p.Message) 58 } 59 60 type token struct { 61 value string 62 err *ParseError 63 line int // line number 64 offset int // byte number from start of input, not start of line 65 unquoted string // the unquoted version of value, if it was a quoted string 66 } 67 68 func (t *token) String() string { 69 if t.err == nil { 70 return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset) 71 } 72 return fmt.Sprintf("parse error: %v", t.err) 73 } 74 75 type textParser struct { 76 s string // remaining input 77 done bool // whether the parsing is finished (success or error) 78 backed bool // whether back() was called 79 offset, line int 80 cur token 81 } 82 83 func newTextParser(s string) *textParser { 84 p := new(textParser) 85 p.s = s 86 p.line = 1 87 p.cur.line = 1 88 return p 89 } 90 91 func (p *textParser) errorf(format string, a ...interface{}) *ParseError { 92 pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset} 93 p.cur.err = pe 94 p.done = true 95 return pe 96 } 97 98 // Numbers and identifiers are matched by [-+._A-Za-z0-9] 99 func isIdentOrNumberChar(c byte) bool { 100 switch { 101 case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z': 102 return true 103 case '0' <= c && c <= '9': 104 return true 105 } 106 switch c { 107 case '-', '+', '.', '_': 108 return true 109 } 110 return false 111 } 112 113 func isWhitespace(c byte) bool { 114 switch c { 115 case ' ', '\t', '\n', '\r': 116 return true 117 } 118 return false 119 } 120 121 func (p *textParser) skipWhitespace() { 122 i := 0 123 for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') { 124 if p.s[i] == '#' { 125 // comment; skip to end of line or input 126 for i < len(p.s) && p.s[i] != '\n' { 127 i++ 128 } 129 if i == len(p.s) { 130 break 131 } 132 } 133 if p.s[i] == '\n' { 134 p.line++ 135 } 136 i++ 137 } 138 p.offset += i 139 p.s = p.s[i:len(p.s)] 140 if len(p.s) == 0 { 141 p.done = true 142 } 143 } 144 145 func (p *textParser) advance() { 146 // Skip whitespace 147 p.skipWhitespace() 148 if p.done { 149 return 150 } 151 152 // Start of non-whitespace 153 p.cur.err = nil 154 p.cur.offset, p.cur.line = p.offset, p.line 155 p.cur.unquoted = "" 156 switch p.s[0] { 157 case '<', '>', '{', '}', ':', '[', ']', ';', ',': 158 // Single symbol 159 p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)] 160 case '"', '\'': 161 // Quoted string 162 i := 1 163 for i < len(p.s) && p.s[i] != p.s[0] && p.s[i] != '\n' { 164 if p.s[i] == '\\' && i+1 < len(p.s) { 165 // skip escaped char 166 i++ 167 } 168 i++ 169 } 170 if i >= len(p.s) || p.s[i] != p.s[0] { 171 p.errorf("unmatched quote") 172 return 173 } 174 unq, err := unquoteC(p.s[1:i], rune(p.s[0])) 175 if err != nil { 176 p.errorf("invalid quoted string %v", p.s[0:i+1]) 177 return 178 } 179 p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)] 180 p.cur.unquoted = unq 181 default: 182 i := 0 183 for i < len(p.s) && isIdentOrNumberChar(p.s[i]) { 184 i++ 185 } 186 if i == 0 { 187 p.errorf("unexpected byte %#x", p.s[0]) 188 return 189 } 190 p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)] 191 } 192 p.offset += len(p.cur.value) 193 } 194 195 var ( 196 errBadUTF8 = errors.New("proto: bad UTF-8") 197 errBadHex = errors.New("proto: bad hexadecimal") 198 ) 199 200 func unquoteC(s string, quote rune) (string, error) { 201 // This is based on C++'s tokenizer.cc. 202 // Despite its name, this is *not* parsing C syntax. 203 // For instance, "\0" is an invalid quoted string. 204 205 // Avoid allocation in trivial cases. 206 simple := true 207 for _, r := range s { 208 if r == '\\' || r == quote { 209 simple = false 210 break 211 } 212 } 213 if simple { 214 return s, nil 215 } 216 217 buf := make([]byte, 0, 3*len(s)/2) 218 for len(s) > 0 { 219 r, n := utf8.DecodeRuneInString(s) 220 if r == utf8.RuneError && n == 1 { 221 return "", errBadUTF8 222 } 223 s = s[n:] 224 if r != '\\' { 225 if r < utf8.RuneSelf { 226 buf = append(buf, byte(r)) 227 } else { 228 buf = append(buf, string(r)...) 229 } 230 continue 231 } 232 233 ch, tail, err := unescape(s) 234 if err != nil { 235 return "", err 236 } 237 buf = append(buf, ch...) 238 s = tail 239 } 240 return string(buf), nil 241 } 242 243 func unescape(s string) (ch string, tail string, err error) { 244 r, n := utf8.DecodeRuneInString(s) 245 if r == utf8.RuneError && n == 1 { 246 return "", "", errBadUTF8 247 } 248 s = s[n:] 249 switch r { 250 case 'a': 251 return "\a", s, nil 252 case 'b': 253 return "\b", s, nil 254 case 'f': 255 return "\f", s, nil 256 case 'n': 257 return "\n", s, nil 258 case 'r': 259 return "\r", s, nil 260 case 't': 261 return "\t", s, nil 262 case 'v': 263 return "\v", s, nil 264 case '?': 265 return "?", s, nil // trigraph workaround 266 case '\'', '"', '\\': 267 return string(r), s, nil 268 case '0', '1', '2', '3', '4', '5', '6', '7', 'x', 'X': 269 if len(s) < 2 { 270 return "", "", fmt.Errorf(`\%c requires 2 following digits`, r) 271 } 272 base := 8 273 ss := s[:2] 274 s = s[2:] 275 if r == 'x' || r == 'X' { 276 base = 16 277 } else { 278 ss = string(r) + ss 279 } 280 i, err := strconv.ParseUint(ss, base, 8) 281 if err != nil { 282 return "", "", err 283 } 284 return string([]byte{byte(i)}), s, nil 285 case 'u', 'U': 286 n := 4 287 if r == 'U' { 288 n = 8 289 } 290 if len(s) < n { 291 return "", "", fmt.Errorf(`\%c requires %d digits`, r, n) 292 } 293 294 bs := make([]byte, n/2) 295 for i := 0; i < n; i += 2 { 296 a, ok1 := unhex(s[i]) 297 b, ok2 := unhex(s[i+1]) 298 if !ok1 || !ok2 { 299 return "", "", errBadHex 300 } 301 bs[i/2] = a<<4 | b 302 } 303 s = s[n:] 304 return string(bs), s, nil 305 } 306 return "", "", fmt.Errorf(`unknown escape \%c`, r) 307 } 308 309 // Adapted from src/pkg/strconv/quote.go. 310 func unhex(b byte) (v byte, ok bool) { 311 switch { 312 case '0' <= b && b <= '9': 313 return b - '0', true 314 case 'a' <= b && b <= 'f': 315 return b - 'a' + 10, true 316 case 'A' <= b && b <= 'F': 317 return b - 'A' + 10, true 318 } 319 return 0, false 320 } 321 322 // Back off the parser by one token. Can only be done between calls to next(). 323 // It makes the next advance() a no-op. 324 func (p *textParser) back() { p.backed = true } 325 326 // Advances the parser and returns the new current token. 327 func (p *textParser) next() *token { 328 if p.backed || p.done { 329 p.backed = false 330 return &p.cur 331 } 332 p.advance() 333 if p.done { 334 p.cur.value = "" 335 } else if len(p.cur.value) > 0 && p.cur.value[0] == '"' { 336 // Look for multiple quoted strings separated by whitespace, 337 // and concatenate them. 338 cat := p.cur 339 for { 340 p.skipWhitespace() 341 if p.done || p.s[0] != '"' { 342 break 343 } 344 p.advance() 345 if p.cur.err != nil { 346 return &p.cur 347 } 348 cat.value += " " + p.cur.value 349 cat.unquoted += p.cur.unquoted 350 } 351 p.done = false // parser may have seen EOF, but we want to return cat 352 p.cur = cat 353 } 354 return &p.cur 355 } 356 357 // Return an error indicating which required field was not set. 358 func (p *textParser) missingRequiredFieldError(sv reflect.Value) *ParseError { 359 st := sv.Type() 360 sprops := GetProperties(st) 361 for i := 0; i < st.NumField(); i++ { 362 if !isNil(sv.Field(i)) { 363 continue 364 } 365 366 props := sprops.Prop[i] 367 if props.Required { 368 return p.errorf("message %v missing required field %q", st, props.OrigName) 369 } 370 } 371 return p.errorf("message %v missing required field", st) // should not happen 372 } 373 374 // Returns the index in the struct for the named field, as well as the parsed tag properties. 375 func structFieldByName(st reflect.Type, name string) (int, *Properties, bool) { 376 sprops := GetProperties(st) 377 i, ok := sprops.decoderOrigNames[name] 378 if ok { 379 return i, sprops.Prop[i], true 380 } 381 return -1, nil, false 382 } 383 384 // Consume a ':' from the input stream (if the next token is a colon), 385 // returning an error if a colon is needed but not present. 386 func (p *textParser) checkForColon(props *Properties, typ reflect.Type) *ParseError { 387 tok := p.next() 388 if tok.err != nil { 389 return tok.err 390 } 391 if tok.value != ":" { 392 // Colon is optional when the field is a group or message. 393 needColon := true 394 switch props.Wire { 395 case "group": 396 needColon = false 397 case "bytes": 398 // A "bytes" field is either a message, a string, or a repeated field; 399 // those three become *T, *string and []T respectively, so we can check for 400 // this field being a pointer to a non-string. 401 if typ.Kind() == reflect.Ptr { 402 // *T or *string 403 if typ.Elem().Kind() == reflect.String { 404 break 405 } 406 } else if typ.Kind() == reflect.Slice { 407 // []T or []*T 408 if typ.Elem().Kind() != reflect.Ptr { 409 break 410 } 411 } 412 needColon = false 413 } 414 if needColon { 415 return p.errorf("expected ':', found %q", tok.value) 416 } 417 p.back() 418 } 419 return nil 420 } 421 422 func (p *textParser) readStruct(sv reflect.Value, terminator string) *ParseError { 423 st := sv.Type() 424 reqCount := GetProperties(st).reqCount 425 // A struct is a sequence of "name: value", terminated by one of 426 // '>' or '}', or the end of the input. A name may also be 427 // "[extension]". 428 for { 429 tok := p.next() 430 if tok.err != nil { 431 return tok.err 432 } 433 if tok.value == terminator { 434 break 435 } 436 if tok.value == "[" { 437 // Looks like an extension. 438 // 439 // TODO: Check whether we need to handle 440 // namespace rooted names (e.g. ".something.Foo"). 441 tok = p.next() 442 if tok.err != nil { 443 return tok.err 444 } 445 var desc *ExtensionDesc 446 // This could be faster, but it's functional. 447 // TODO: Do something smarter than a linear scan. 448 for _, d := range RegisteredExtensions(reflect.New(st).Interface().(Message)) { 449 if d.Name == tok.value { 450 desc = d 451 break 452 } 453 } 454 if desc == nil { 455 return p.errorf("unrecognized extension %q", tok.value) 456 } 457 // Check the extension terminator. 458 tok = p.next() 459 if tok.err != nil { 460 return tok.err 461 } 462 if tok.value != "]" { 463 return p.errorf("unrecognized extension terminator %q", tok.value) 464 } 465 466 props := &Properties{} 467 props.Parse(desc.Tag) 468 469 typ := reflect.TypeOf(desc.ExtensionType) 470 if err := p.checkForColon(props, typ); err != nil { 471 return err 472 } 473 474 rep := desc.repeated() 475 476 // Read the extension structure, and set it in 477 // the value we're constructing. 478 var ext reflect.Value 479 if !rep { 480 ext = reflect.New(typ).Elem() 481 } else { 482 ext = reflect.New(typ.Elem()).Elem() 483 } 484 if err := p.readAny(ext, props); err != nil { 485 return err 486 } 487 ep := sv.Addr().Interface().(extendableProto) 488 if !rep { 489 SetExtension(ep, desc, ext.Interface()) 490 } else { 491 old, err := GetExtension(ep, desc) 492 var sl reflect.Value 493 if err == nil { 494 sl = reflect.ValueOf(old) // existing slice 495 } else { 496 sl = reflect.MakeSlice(typ, 0, 1) 497 } 498 sl = reflect.Append(sl, ext) 499 SetExtension(ep, desc, sl.Interface()) 500 } 501 } else { 502 // This is a normal, non-extension field. 503 fi, props, ok := structFieldByName(st, tok.value) 504 if !ok { 505 return p.errorf("unknown field name %q in %v", tok.value, st) 506 } 507 508 dst := sv.Field(fi) 509 isDstNil := isNil(dst) 510 511 // Check that it's not already set if it's not a repeated field. 512 if !props.Repeated && !isDstNil { 513 return p.errorf("non-repeated field %q was repeated", tok.value) 514 } 515 516 if err := p.checkForColon(props, st.Field(fi).Type); err != nil { 517 return err 518 } 519 520 // Parse into the field. 521 if err := p.readAny(dst, props); err != nil { 522 return err 523 } 524 525 if props.Required { 526 reqCount-- 527 } 528 } 529 530 // For backward compatibility, permit a semicolon or comma after a field. 531 tok = p.next() 532 if tok.err != nil { 533 return tok.err 534 } 535 if tok.value != ";" && tok.value != "," { 536 p.back() 537 } 538 } 539 540 if reqCount > 0 { 541 return p.missingRequiredFieldError(sv) 542 } 543 return nil 544 } 545 546 func (p *textParser) readAny(v reflect.Value, props *Properties) *ParseError { 547 tok := p.next() 548 if tok.err != nil { 549 return tok.err 550 } 551 if tok.value == "" { 552 return p.errorf("unexpected EOF") 553 } 554 555 switch fv := v; fv.Kind() { 556 case reflect.Slice: 557 at := v.Type() 558 if at.Elem().Kind() == reflect.Uint8 { 559 // Special case for []byte 560 if tok.value[0] != '"' && tok.value[0] != '\'' { 561 // Deliberately written out here, as the error after 562 // this switch statement would write "invalid []byte: ...", 563 // which is not as user-friendly. 564 return p.errorf("invalid string: %v", tok.value) 565 } 566 bytes := []byte(tok.unquoted) 567 fv.Set(reflect.ValueOf(bytes)) 568 return nil 569 } 570 // Repeated field. May already exist. 571 flen := fv.Len() 572 if flen == fv.Cap() { 573 nav := reflect.MakeSlice(at, flen, 2*flen+1) 574 reflect.Copy(nav, fv) 575 fv.Set(nav) 576 } 577 fv.SetLen(flen + 1) 578 579 // Read one. 580 p.back() 581 return p.readAny(fv.Index(flen), props) 582 case reflect.Bool: 583 // Either "true", "false", 1 or 0. 584 switch tok.value { 585 case "true", "1": 586 fv.SetBool(true) 587 return nil 588 case "false", "0": 589 fv.SetBool(false) 590 return nil 591 } 592 case reflect.Float32, reflect.Float64: 593 v := tok.value 594 // Ignore 'f' for compatibility with output generated by C++, but don't 595 // remove 'f' when the value is "-inf" or "inf". 596 if strings.HasSuffix(v, "f") && tok.value != "-inf" && tok.value != "inf" { 597 v = v[:len(v)-1] 598 } 599 if f, err := strconv.ParseFloat(v, fv.Type().Bits()); err == nil { 600 fv.SetFloat(f) 601 return nil 602 } 603 case reflect.Int32: 604 if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil { 605 fv.SetInt(x) 606 return nil 607 } 608 if len(props.Enum) == 0 { 609 break 610 } 611 m, ok := enumValueMaps[props.Enum] 612 if !ok { 613 break 614 } 615 x, ok := m[tok.value] 616 if !ok { 617 break 618 } 619 fv.SetInt(int64(x)) 620 return nil 621 case reflect.Int64: 622 if x, err := strconv.ParseInt(tok.value, 0, 64); err == nil { 623 fv.SetInt(x) 624 return nil 625 } 626 case reflect.Ptr: 627 // A basic field (indirected through pointer), or a repeated message/group 628 p.back() 629 fv.Set(reflect.New(fv.Type().Elem())) 630 return p.readAny(fv.Elem(), props) 631 case reflect.String: 632 if tok.value[0] == '"' || tok.value[0] == '\'' { 633 fv.SetString(tok.unquoted) 634 return nil 635 } 636 case reflect.Struct: 637 var terminator string 638 switch tok.value { 639 case "{": 640 terminator = "}" 641 case "<": 642 terminator = ">" 643 default: 644 return p.errorf("expected '{' or '<', found %q", tok.value) 645 } 646 return p.readStruct(fv, terminator) 647 case reflect.Uint32: 648 if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil { 649 fv.SetUint(uint64(x)) 650 return nil 651 } 652 case reflect.Uint64: 653 if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil { 654 fv.SetUint(x) 655 return nil 656 } 657 } 658 return p.errorf("invalid %v: %v", v.Type(), tok.value) 659 } 660 661 // UnmarshalText reads a protocol buffer in Text format. UnmarshalText resets pb 662 // before starting to unmarshal, so any existing data in pb is always removed. 663 func UnmarshalText(s string, pb Message) error { 664 pb.Reset() 665 v := reflect.ValueOf(pb) 666 if pe := newTextParser(s).readStruct(v.Elem(), ""); pe != nil { 667 return pe 668 } 669 return nil 670 }