github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/cmd/fix/testdata/reflect.read.go.in (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package xml 6 7 import ( 8 "bytes" 9 "fmt" 10 "io" 11 "os" 12 "reflect" 13 "strconv" 14 "strings" 15 "unicode" 16 "utf8" 17 ) 18 19 // BUG(rsc): Mapping between XML elements and data structures is inherently flawed: 20 // an XML element is an order-dependent collection of anonymous 21 // values, while a data structure is an order-independent collection 22 // of named values. 23 // See package json for a textual representation more suitable 24 // to data structures. 25 26 // Unmarshal parses an XML element from r and uses the 27 // reflect library to fill in an arbitrary struct, slice, or string 28 // pointed at by val. Well-formed data that does not fit 29 // into val is discarded. 30 // 31 // For example, given these definitions: 32 // 33 // type Email struct { 34 // Where string "attr" 35 // Addr string 36 // } 37 // 38 // type Result struct { 39 // XMLName xml.Name "result" 40 // Name string 41 // Phone string 42 // Email []Email 43 // Groups []string "group>value" 44 // } 45 // 46 // result := Result{Name: "name", Phone: "phone", Email: nil} 47 // 48 // unmarshalling the XML input 49 // 50 // <result> 51 // <email where="home"> 52 // <addr>gre@example.com</addr> 53 // </email> 54 // <email where='work'> 55 // <addr>gre@work.com</addr> 56 // </email> 57 // <name>Grace R. Emlin</name> 58 // <group> 59 // <value>Friends</value> 60 // <value>Squash</value> 61 // </group> 62 // <address>123 Main Street</address> 63 // </result> 64 // 65 // via Unmarshal(r, &result) is equivalent to assigning 66 // 67 // r = Result{xml.Name{"", "result"}, 68 // "Grace R. Emlin", // name 69 // "phone", // no phone given 70 // []Email{ 71 // Email{"home", "gre@example.com"}, 72 // Email{"work", "gre@work.com"}, 73 // }, 74 // []string{"Friends", "Squash"}, 75 // } 76 // 77 // Note that the field r.Phone has not been modified and 78 // that the XML <address> element was discarded. Also, the field 79 // Groups was assigned considering the element path provided in the 80 // field tag. 81 // 82 // Because Unmarshal uses the reflect package, it can only 83 // assign to upper case fields. Unmarshal uses a case-insensitive 84 // comparison to match XML element names to struct field names. 85 // 86 // Unmarshal maps an XML element to a struct using the following rules: 87 // 88 // * If the struct has a field of type []byte or string with tag "innerxml", 89 // Unmarshal accumulates the raw XML nested inside the element 90 // in that field. The rest of the rules still apply. 91 // 92 // * If the struct has a field named XMLName of type xml.Name, 93 // Unmarshal records the element name in that field. 94 // 95 // * If the XMLName field has an associated tag string of the form 96 // "tag" or "namespace-URL tag", the XML element must have 97 // the given tag (and, optionally, name space) or else Unmarshal 98 // returns an error. 99 // 100 // * If the XML element has an attribute whose name matches a 101 // struct field of type string with tag "attr", Unmarshal records 102 // the attribute value in that field. 103 // 104 // * If the XML element contains character data, that data is 105 // accumulated in the first struct field that has tag "chardata". 106 // The struct field may have type []byte or string. 107 // If there is no such field, the character data is discarded. 108 // 109 // * If the XML element contains a sub-element whose name matches 110 // the prefix of a struct field tag formatted as "a>b>c", unmarshal 111 // will descend into the XML structure looking for elements with the 112 // given names, and will map the innermost elements to that struct field. 113 // A struct field tag starting with ">" is equivalent to one starting 114 // with the field name followed by ">". 115 // 116 // * If the XML element contains a sub-element whose name 117 // matches a struct field whose tag is neither "attr" nor "chardata", 118 // Unmarshal maps the sub-element to that struct field. 119 // Otherwise, if the struct has a field named Any, unmarshal 120 // maps the sub-element to that struct field. 121 // 122 // Unmarshal maps an XML element to a string or []byte by saving the 123 // concatenation of that element's character data in the string or []byte. 124 // 125 // Unmarshal maps an XML element to a slice by extending the length 126 // of the slice and mapping the element to the newly created value. 127 // 128 // Unmarshal maps an XML element to a bool by setting it to the boolean 129 // value represented by the string. 130 // 131 // Unmarshal maps an XML element to an integer or floating-point 132 // field by setting the field to the result of interpreting the string 133 // value in decimal. There is no check for overflow. 134 // 135 // Unmarshal maps an XML element to an xml.Name by recording the 136 // element name. 137 // 138 // Unmarshal maps an XML element to a pointer by setting the pointer 139 // to a freshly allocated value and then mapping the element to that value. 140 // 141 func Unmarshal(r io.Reader, val interface{}) os.Error { 142 v, ok := reflect.NewValue(val).(*reflect.PtrValue) 143 if !ok { 144 return os.NewError("non-pointer passed to Unmarshal") 145 } 146 p := NewParser(r) 147 elem := v.Elem() 148 err := p.unmarshal(elem, nil) 149 if err != nil { 150 return err 151 } 152 return nil 153 } 154 155 // An UnmarshalError represents an error in the unmarshalling process. 156 type UnmarshalError string 157 158 func (e UnmarshalError) String() string { return string(e) } 159 160 // A TagPathError represents an error in the unmarshalling process 161 // caused by the use of field tags with conflicting paths. 162 type TagPathError struct { 163 Struct reflect.Type 164 Field1, Tag1 string 165 Field2, Tag2 string 166 } 167 168 func (e *TagPathError) String() string { 169 return fmt.Sprintf("%s field %q with tag %q conflicts with field %q with tag %q", e.Struct, e.Field1, e.Tag1, e.Field2, e.Tag2) 170 } 171 172 // The Parser's Unmarshal method is like xml.Unmarshal 173 // except that it can be passed a pointer to the initial start element, 174 // useful when a client reads some raw XML tokens itself 175 // but also defers to Unmarshal for some elements. 176 // Passing a nil start element indicates that Unmarshal should 177 // read the token stream to find the start element. 178 func (p *Parser) Unmarshal(val interface{}, start *StartElement) os.Error { 179 v, ok := reflect.NewValue(val).(*reflect.PtrValue) 180 if !ok { 181 return os.NewError("non-pointer passed to Unmarshal") 182 } 183 return p.unmarshal(v.Elem(), start) 184 } 185 186 // fieldName strips invalid characters from an XML name 187 // to create a valid Go struct name. It also converts the 188 // name to lower case letters. 189 func fieldName(original string) string { 190 191 var i int 192 //remove leading underscores 193 for i = 0; i < len(original) && original[i] == '_'; i++ { 194 } 195 196 return strings.Map( 197 func(x int) int { 198 if x == '_' || unicode.IsDigit(x) || unicode.IsLetter(x) { 199 return unicode.ToLower(x) 200 } 201 return -1 202 }, 203 original[i:]) 204 } 205 206 // Unmarshal a single XML element into val. 207 func (p *Parser) unmarshal(val reflect.Value, start *StartElement) os.Error { 208 // Find start element if we need it. 209 if start == nil { 210 for { 211 tok, err := p.Token() 212 if err != nil { 213 return err 214 } 215 if t, ok := tok.(StartElement); ok { 216 start = &t 217 break 218 } 219 } 220 } 221 222 if pv, ok := val.(*reflect.PtrValue); ok { 223 if pv.Get() == 0 { 224 zv := reflect.MakeZero(pv.Type().(*reflect.PtrType).Elem()) 225 pv.PointTo(zv) 226 val = zv 227 } else { 228 val = pv.Elem() 229 } 230 } 231 232 var ( 233 data []byte 234 saveData reflect.Value 235 comment []byte 236 saveComment reflect.Value 237 saveXML reflect.Value 238 saveXMLIndex int 239 saveXMLData []byte 240 sv *reflect.StructValue 241 styp *reflect.StructType 242 fieldPaths map[string]pathInfo 243 ) 244 245 switch v := val.(type) { 246 default: 247 return os.NewError("unknown type " + v.Type().String()) 248 249 case *reflect.SliceValue: 250 typ := v.Type().(*reflect.SliceType) 251 if typ.Elem().Kind() == reflect.Uint8 { 252 // []byte 253 saveData = v 254 break 255 } 256 257 // Slice of element values. 258 // Grow slice. 259 n := v.Len() 260 if n >= v.Cap() { 261 ncap := 2 * n 262 if ncap < 4 { 263 ncap = 4 264 } 265 new := reflect.MakeSlice(typ, n, ncap) 266 reflect.Copy(new, v) 267 v.Set(new) 268 } 269 v.SetLen(n + 1) 270 271 // Recur to read element into slice. 272 if err := p.unmarshal(v.Elem(n), start); err != nil { 273 v.SetLen(n) 274 return err 275 } 276 return nil 277 278 case *reflect.BoolValue, *reflect.FloatValue, *reflect.IntValue, *reflect.UintValue, *reflect.StringValue: 279 saveData = v 280 281 case *reflect.StructValue: 282 if _, ok := v.Interface().(Name); ok { 283 v.Set(reflect.NewValue(start.Name).(*reflect.StructValue)) 284 break 285 } 286 287 sv = v 288 typ := sv.Type().(*reflect.StructType) 289 styp = typ 290 // Assign name. 291 if f, ok := typ.FieldByName("XMLName"); ok { 292 // Validate element name. 293 if f.Tag != "" { 294 tag := f.Tag 295 ns := "" 296 i := strings.LastIndex(tag, " ") 297 if i >= 0 { 298 ns, tag = tag[0:i], tag[i+1:] 299 } 300 if tag != start.Name.Local { 301 return UnmarshalError("expected element type <" + tag + "> but have <" + start.Name.Local + ">") 302 } 303 if ns != "" && ns != start.Name.Space { 304 e := "expected element <" + tag + "> in name space " + ns + " but have " 305 if start.Name.Space == "" { 306 e += "no name space" 307 } else { 308 e += start.Name.Space 309 } 310 return UnmarshalError(e) 311 } 312 } 313 314 // Save 315 v := sv.FieldByIndex(f.Index) 316 if _, ok := v.Interface().(Name); !ok { 317 return UnmarshalError(sv.Type().String() + " field XMLName does not have type xml.Name") 318 } 319 v.(*reflect.StructValue).Set(reflect.NewValue(start.Name).(*reflect.StructValue)) 320 } 321 322 // Assign attributes. 323 // Also, determine whether we need to save character data or comments. 324 for i, n := 0, typ.NumField(); i < n; i++ { 325 f := typ.Field(i) 326 switch f.Tag { 327 case "attr": 328 strv, ok := sv.FieldByIndex(f.Index).(*reflect.StringValue) 329 if !ok { 330 return UnmarshalError(sv.Type().String() + " field " + f.Name + " has attr tag but is not type string") 331 } 332 // Look for attribute. 333 val := "" 334 k := strings.ToLower(f.Name) 335 for _, a := range start.Attr { 336 if fieldName(a.Name.Local) == k { 337 val = a.Value 338 break 339 } 340 } 341 strv.Set(val) 342 343 case "comment": 344 if saveComment == nil { 345 saveComment = sv.FieldByIndex(f.Index) 346 } 347 348 case "chardata": 349 if saveData == nil { 350 saveData = sv.FieldByIndex(f.Index) 351 } 352 353 case "innerxml": 354 if saveXML == nil { 355 saveXML = sv.FieldByIndex(f.Index) 356 if p.saved == nil { 357 saveXMLIndex = 0 358 p.saved = new(bytes.Buffer) 359 } else { 360 saveXMLIndex = p.savedOffset() 361 } 362 } 363 364 default: 365 if strings.Contains(f.Tag, ">") { 366 if fieldPaths == nil { 367 fieldPaths = make(map[string]pathInfo) 368 } 369 path := strings.ToLower(f.Tag) 370 if strings.HasPrefix(f.Tag, ">") { 371 path = strings.ToLower(f.Name) + path 372 } 373 if strings.HasSuffix(f.Tag, ">") { 374 path = path[:len(path)-1] 375 } 376 err := addFieldPath(sv, fieldPaths, path, f.Index) 377 if err != nil { 378 return err 379 } 380 } 381 } 382 } 383 } 384 385 // Find end element. 386 // Process sub-elements along the way. 387 Loop: 388 for { 389 var savedOffset int 390 if saveXML != nil { 391 savedOffset = p.savedOffset() 392 } 393 tok, err := p.Token() 394 if err != nil { 395 return err 396 } 397 switch t := tok.(type) { 398 case StartElement: 399 // Sub-element. 400 // Look up by tag name. 401 if sv != nil { 402 k := fieldName(t.Name.Local) 403 404 if fieldPaths != nil { 405 if _, found := fieldPaths[k]; found { 406 if err := p.unmarshalPaths(sv, fieldPaths, k, &t); err != nil { 407 return err 408 } 409 continue Loop 410 } 411 } 412 413 match := func(s string) bool { 414 // check if the name matches ignoring case 415 if strings.ToLower(s) != k { 416 return false 417 } 418 // now check that it's public 419 c, _ := utf8.DecodeRuneInString(s) 420 return unicode.IsUpper(c) 421 } 422 423 f, found := styp.FieldByNameFunc(match) 424 if !found { // fall back to mop-up field named "Any" 425 f, found = styp.FieldByName("Any") 426 } 427 if found { 428 if err := p.unmarshal(sv.FieldByIndex(f.Index), &t); err != nil { 429 return err 430 } 431 continue Loop 432 } 433 } 434 // Not saving sub-element but still have to skip over it. 435 if err := p.Skip(); err != nil { 436 return err 437 } 438 439 case EndElement: 440 if saveXML != nil { 441 saveXMLData = p.saved.Bytes()[saveXMLIndex:savedOffset] 442 if saveXMLIndex == 0 { 443 p.saved = nil 444 } 445 } 446 break Loop 447 448 case CharData: 449 if saveData != nil { 450 data = append(data, t...) 451 } 452 453 case Comment: 454 if saveComment != nil { 455 comment = append(comment, t...) 456 } 457 } 458 } 459 460 var err os.Error 461 // Helper functions for integer and unsigned integer conversions 462 var itmp int64 463 getInt64 := func() bool { 464 itmp, err = strconv.Atoi64(string(data)) 465 // TODO: should check sizes 466 return err == nil 467 } 468 var utmp uint64 469 getUint64 := func() bool { 470 utmp, err = strconv.Atoui64(string(data)) 471 // TODO: check for overflow? 472 return err == nil 473 } 474 var ftmp float64 475 getFloat64 := func() bool { 476 ftmp, err = strconv.Atof64(string(data)) 477 // TODO: check for overflow? 478 return err == nil 479 } 480 481 // Save accumulated data and comments 482 switch t := saveData.(type) { 483 case nil: 484 // Probably a comment, handled below 485 default: 486 return os.NewError("cannot happen: unknown type " + t.Type().String()) 487 case *reflect.IntValue: 488 if !getInt64() { 489 return err 490 } 491 t.Set(itmp) 492 case *reflect.UintValue: 493 if !getUint64() { 494 return err 495 } 496 t.Set(utmp) 497 case *reflect.FloatValue: 498 if !getFloat64() { 499 return err 500 } 501 t.Set(ftmp) 502 case *reflect.BoolValue: 503 value, err := strconv.Atob(strings.TrimSpace(string(data))) 504 if err != nil { 505 return err 506 } 507 t.Set(value) 508 case *reflect.StringValue: 509 t.Set(string(data)) 510 case *reflect.SliceValue: 511 t.Set(reflect.NewValue(data).(*reflect.SliceValue)) 512 } 513 514 switch t := saveComment.(type) { 515 case *reflect.StringValue: 516 t.Set(string(comment)) 517 case *reflect.SliceValue: 518 t.Set(reflect.NewValue(comment).(*reflect.SliceValue)) 519 } 520 521 switch t := saveXML.(type) { 522 case *reflect.StringValue: 523 t.Set(string(saveXMLData)) 524 case *reflect.SliceValue: 525 t.Set(reflect.NewValue(saveXMLData).(*reflect.SliceValue)) 526 } 527 528 return nil 529 } 530 531 type pathInfo struct { 532 fieldIdx []int 533 complete bool 534 } 535 536 // addFieldPath takes an element path such as "a>b>c" and fills the 537 // paths map with all paths leading to it ("a", "a>b", and "a>b>c"). 538 // It is okay for paths to share a common, shorter prefix but not ok 539 // for one path to itself be a prefix of another. 540 func addFieldPath(sv *reflect.StructValue, paths map[string]pathInfo, path string, fieldIdx []int) os.Error { 541 if info, found := paths[path]; found { 542 return tagError(sv, info.fieldIdx, fieldIdx) 543 } 544 paths[path] = pathInfo{fieldIdx, true} 545 for { 546 i := strings.LastIndex(path, ">") 547 if i < 0 { 548 break 549 } 550 path = path[:i] 551 if info, found := paths[path]; found { 552 if info.complete { 553 return tagError(sv, info.fieldIdx, fieldIdx) 554 } 555 } else { 556 paths[path] = pathInfo{fieldIdx, false} 557 } 558 } 559 return nil 560 561 } 562 563 func tagError(sv *reflect.StructValue, idx1 []int, idx2 []int) os.Error { 564 t := sv.Type().(*reflect.StructType) 565 f1 := t.FieldByIndex(idx1) 566 f2 := t.FieldByIndex(idx2) 567 return &TagPathError{t, f1.Name, f1.Tag, f2.Name, f2.Tag} 568 } 569 570 // unmarshalPaths walks down an XML structure looking for 571 // wanted paths, and calls unmarshal on them. 572 func (p *Parser) unmarshalPaths(sv *reflect.StructValue, paths map[string]pathInfo, path string, start *StartElement) os.Error { 573 if info, _ := paths[path]; info.complete { 574 return p.unmarshal(sv.FieldByIndex(info.fieldIdx), start) 575 } 576 for { 577 tok, err := p.Token() 578 if err != nil { 579 return err 580 } 581 switch t := tok.(type) { 582 case StartElement: 583 k := path + ">" + fieldName(t.Name.Local) 584 if _, found := paths[k]; found { 585 if err := p.unmarshalPaths(sv, paths, k, &t); err != nil { 586 return err 587 } 588 continue 589 } 590 if err := p.Skip(); err != nil { 591 return err 592 } 593 case EndElement: 594 return nil 595 } 596 } 597 panic("unreachable") 598 } 599 600 // Have already read a start element. 601 // Read tokens until we find the end element. 602 // Token is taking care of making sure the 603 // end element matches the start element we saw. 604 func (p *Parser) Skip() os.Error { 605 for { 606 tok, err := p.Token() 607 if err != nil { 608 return err 609 } 610 switch t := tok.(type) { 611 case StartElement: 612 if err := p.Skip(); err != nil { 613 return err 614 } 615 case EndElement: 616 return nil 617 } 618 } 619 panic("unreachable") 620 }