github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/encoding/xml/read.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package xml 6 7 import ( 8 "bytes" 9 "errors" 10 "reflect" 11 "strconv" 12 "strings" 13 "time" 14 ) 15 16 // BUG(rsc): Mapping between XML elements and data structures is inherently flawed: 17 // an XML element is an order-dependent collection of anonymous 18 // values, while a data structure is an order-independent collection 19 // of named values. 20 // See package json for a textual representation more suitable 21 // to data structures. 22 23 // Unmarshal parses the XML-encoded data and stores the result in 24 // the value pointed to by v, which must be an arbitrary struct, 25 // slice, or string. Well-formed data that does not fit into v is 26 // discarded. 27 // 28 // Because Unmarshal uses the reflect package, it can only assign 29 // to exported (upper case) fields. Unmarshal uses a case-sensitive 30 // comparison to match XML element names to tag values and struct 31 // field names. 32 // 33 // Unmarshal maps an XML element to a struct using the following rules. 34 // In the rules, the tag of a field refers to the value associated with the 35 // key 'xml' in the struct field's tag (see the example above). 36 // 37 // * If the struct has a field of type []byte or string with tag 38 // ",innerxml", Unmarshal accumulates the raw XML nested inside the 39 // element in that field. The rest of the rules still apply. 40 // 41 // * If the struct has a field named XMLName of type xml.Name, 42 // Unmarshal records the element name in that field. 43 // 44 // * If the XMLName field has an associated tag of the form 45 // "name" or "namespace-URL name", the XML element must have 46 // the given name (and, optionally, name space) or else Unmarshal 47 // returns an error. 48 // 49 // * If the XML element has an attribute whose name matches a 50 // struct field name with an associated tag containing ",attr" or 51 // the explicit name in a struct field tag of the form "name,attr", 52 // Unmarshal records the attribute value in that field. 53 // 54 // * If the XML element contains character data, that data is 55 // accumulated in the first struct field that has tag "chardata". 56 // The struct field may have type []byte or string. 57 // If there is no such field, the character data is discarded. 58 // 59 // * If the XML element contains comments, they are accumulated in 60 // the first struct field that has tag ",comments". The struct 61 // field may have type []byte or string. If there is no such 62 // field, the comments are discarded. 63 // 64 // * If the XML element contains a sub-element whose name matches 65 // the prefix of a tag formatted as "a" or "a>b>c", unmarshal 66 // will descend into the XML structure looking for elements with the 67 // given names, and will map the innermost elements to that struct 68 // field. A tag starting with ">" is equivalent to one starting 69 // with the field name followed by ">". 70 // 71 // * If the XML element contains a sub-element whose name matches 72 // a struct field's XMLName tag and the struct field has no 73 // explicit name tag as per the previous rule, unmarshal maps 74 // the sub-element to that struct field. 75 // 76 // * If the XML element contains a sub-element whose name matches a 77 // field without any mode flags (",attr", ",chardata", etc), Unmarshal 78 // maps the sub-element to that struct field. 79 // 80 // * If the XML element contains a sub-element that hasn't matched any 81 // of the above rules and the struct has a field with tag ",any", 82 // unmarshal maps the sub-element to that struct field. 83 // 84 // * An anonymous struct field is handled as if the fields of its 85 // value were part of the outer struct. 86 // 87 // * A struct field with tag "-" is never unmarshalled into. 88 // 89 // Unmarshal maps an XML element to a string or []byte by saving the 90 // concatenation of that element's character data in the string or 91 // []byte. The saved []byte is never nil. 92 // 93 // Unmarshal maps an attribute value to a string or []byte by saving 94 // the value in the string or slice. 95 // 96 // Unmarshal maps an XML element to a slice by extending the length of 97 // the slice and mapping the element to the newly created value. 98 // 99 // Unmarshal maps an XML element or attribute value to a bool by 100 // setting it to the boolean value represented by the string. 101 // 102 // Unmarshal maps an XML element or attribute value to an integer or 103 // floating-point field by setting the field to the result of 104 // interpreting the string value in decimal. There is no check for 105 // overflow. 106 // 107 // Unmarshal maps an XML element to an xml.Name by recording the 108 // element name. 109 // 110 // Unmarshal maps an XML element to a pointer by setting the pointer 111 // to a freshly allocated value and then mapping the element to that value. 112 // 113 func Unmarshal(data []byte, v interface{}) error { 114 return NewDecoder(bytes.NewBuffer(data)).Decode(v) 115 } 116 117 // Decode works like xml.Unmarshal, except it reads the decoder 118 // stream to find the start element. 119 func (d *Decoder) Decode(v interface{}) error { 120 return d.DecodeElement(v, nil) 121 } 122 123 // DecodeElement works like xml.Unmarshal except that it takes 124 // a pointer to the start XML element to decode into v. 125 // It is useful when a client reads some raw XML tokens itself 126 // but also wants to defer to Unmarshal for some elements. 127 func (d *Decoder) DecodeElement(v interface{}, start *StartElement) error { 128 val := reflect.ValueOf(v) 129 if val.Kind() != reflect.Ptr { 130 return errors.New("non-pointer passed to Unmarshal") 131 } 132 return d.unmarshal(val.Elem(), start) 133 } 134 135 // An UnmarshalError represents an error in the unmarshalling process. 136 type UnmarshalError string 137 138 func (e UnmarshalError) Error() string { return string(e) } 139 140 // Unmarshal a single XML element into val. 141 func (p *Decoder) unmarshal(val reflect.Value, start *StartElement) error { 142 // Find start element if we need it. 143 if start == nil { 144 for { 145 tok, err := p.Token() 146 if err != nil { 147 return err 148 } 149 if t, ok := tok.(StartElement); ok { 150 start = &t 151 break 152 } 153 } 154 } 155 156 if pv := val; pv.Kind() == reflect.Ptr { 157 if pv.IsNil() { 158 pv.Set(reflect.New(pv.Type().Elem())) 159 } 160 val = pv.Elem() 161 } 162 163 var ( 164 data []byte 165 saveData reflect.Value 166 comment []byte 167 saveComment reflect.Value 168 saveXML reflect.Value 169 saveXMLIndex int 170 saveXMLData []byte 171 saveAny reflect.Value 172 sv reflect.Value 173 tinfo *typeInfo 174 err error 175 ) 176 177 switch v := val; v.Kind() { 178 default: 179 return errors.New("unknown type " + v.Type().String()) 180 181 case reflect.Interface: 182 // TODO: For now, simply ignore the field. In the near 183 // future we may choose to unmarshal the start 184 // element on it, if not nil. 185 return p.Skip() 186 187 case reflect.Slice: 188 typ := v.Type() 189 if typ.Elem().Kind() == reflect.Uint8 { 190 // []byte 191 saveData = v 192 break 193 } 194 195 // Slice of element values. 196 // Grow slice. 197 n := v.Len() 198 if n >= v.Cap() { 199 ncap := 2 * n 200 if ncap < 4 { 201 ncap = 4 202 } 203 new := reflect.MakeSlice(typ, n, ncap) 204 reflect.Copy(new, v) 205 v.Set(new) 206 } 207 v.SetLen(n + 1) 208 209 // Recur to read element into slice. 210 if err := p.unmarshal(v.Index(n), start); err != nil { 211 v.SetLen(n) 212 return err 213 } 214 return nil 215 216 case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.String: 217 saveData = v 218 219 case reflect.Struct: 220 typ := v.Type() 221 if typ == nameType { 222 v.Set(reflect.ValueOf(start.Name)) 223 break 224 } 225 if typ == timeType { 226 saveData = v 227 break 228 } 229 230 sv = v 231 tinfo, err = getTypeInfo(typ) 232 if err != nil { 233 return err 234 } 235 236 // Validate and assign element name. 237 if tinfo.xmlname != nil { 238 finfo := tinfo.xmlname 239 if finfo.name != "" && finfo.name != start.Name.Local { 240 return UnmarshalError("expected element type <" + finfo.name + "> but have <" + start.Name.Local + ">") 241 } 242 if finfo.xmlns != "" && finfo.xmlns != start.Name.Space { 243 e := "expected element <" + finfo.name + "> in name space " + finfo.xmlns + " but have " 244 if start.Name.Space == "" { 245 e += "no name space" 246 } else { 247 e += start.Name.Space 248 } 249 return UnmarshalError(e) 250 } 251 fv := finfo.value(sv) 252 if _, ok := fv.Interface().(Name); ok { 253 fv.Set(reflect.ValueOf(start.Name)) 254 } 255 } 256 257 // Assign attributes. 258 // Also, determine whether we need to save character data or comments. 259 for i := range tinfo.fields { 260 finfo := &tinfo.fields[i] 261 switch finfo.flags & fMode { 262 case fAttr: 263 strv := finfo.value(sv) 264 // Look for attribute. 265 for _, a := range start.Attr { 266 if a.Name.Local == finfo.name && (finfo.xmlns == "" || finfo.xmlns == a.Name.Space) { 267 copyValue(strv, []byte(a.Value)) 268 break 269 } 270 } 271 272 case fCharData: 273 if !saveData.IsValid() { 274 saveData = finfo.value(sv) 275 } 276 277 case fComment: 278 if !saveComment.IsValid() { 279 saveComment = finfo.value(sv) 280 } 281 282 case fAny, fAny | fElement: 283 if !saveAny.IsValid() { 284 saveAny = finfo.value(sv) 285 } 286 287 case fInnerXml: 288 if !saveXML.IsValid() { 289 saveXML = finfo.value(sv) 290 if p.saved == nil { 291 saveXMLIndex = 0 292 p.saved = new(bytes.Buffer) 293 } else { 294 saveXMLIndex = p.savedOffset() 295 } 296 } 297 } 298 } 299 } 300 301 // Find end element. 302 // Process sub-elements along the way. 303 Loop: 304 for { 305 var savedOffset int 306 if saveXML.IsValid() { 307 savedOffset = p.savedOffset() 308 } 309 tok, err := p.Token() 310 if err != nil { 311 return err 312 } 313 switch t := tok.(type) { 314 case StartElement: 315 consumed := false 316 if sv.IsValid() { 317 consumed, err = p.unmarshalPath(tinfo, sv, nil, &t) 318 if err != nil { 319 return err 320 } 321 if !consumed && saveAny.IsValid() { 322 consumed = true 323 if err := p.unmarshal(saveAny, &t); err != nil { 324 return err 325 } 326 } 327 } 328 if !consumed { 329 if err := p.Skip(); err != nil { 330 return err 331 } 332 } 333 334 case EndElement: 335 if saveXML.IsValid() { 336 saveXMLData = p.saved.Bytes()[saveXMLIndex:savedOffset] 337 if saveXMLIndex == 0 { 338 p.saved = nil 339 } 340 } 341 break Loop 342 343 case CharData: 344 if saveData.IsValid() { 345 data = append(data, t...) 346 } 347 348 case Comment: 349 if saveComment.IsValid() { 350 comment = append(comment, t...) 351 } 352 } 353 } 354 355 if err := copyValue(saveData, data); err != nil { 356 return err 357 } 358 359 switch t := saveComment; t.Kind() { 360 case reflect.String: 361 t.SetString(string(comment)) 362 case reflect.Slice: 363 t.Set(reflect.ValueOf(comment)) 364 } 365 366 switch t := saveXML; t.Kind() { 367 case reflect.String: 368 t.SetString(string(saveXMLData)) 369 case reflect.Slice: 370 t.Set(reflect.ValueOf(saveXMLData)) 371 } 372 373 return nil 374 } 375 376 func copyValue(dst reflect.Value, src []byte) (err error) { 377 if dst.Kind() == reflect.Ptr { 378 if dst.IsNil() { 379 dst.Set(reflect.New(dst.Type().Elem())) 380 } 381 dst = dst.Elem() 382 } 383 384 // Save accumulated data. 385 switch dst.Kind() { 386 case reflect.Invalid: 387 // Probably a commendst. 388 default: 389 return errors.New("cannot happen: unknown type " + dst.Type().String()) 390 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: 391 itmp, err := strconv.ParseInt(string(src), 10, dst.Type().Bits()) 392 if err != nil { 393 return err 394 } 395 dst.SetInt(itmp) 396 case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: 397 utmp, err := strconv.ParseUint(string(src), 10, dst.Type().Bits()) 398 if err != nil { 399 return err 400 } 401 dst.SetUint(utmp) 402 case reflect.Float32, reflect.Float64: 403 ftmp, err := strconv.ParseFloat(string(src), dst.Type().Bits()) 404 if err != nil { 405 return err 406 } 407 dst.SetFloat(ftmp) 408 case reflect.Bool: 409 value, err := strconv.ParseBool(strings.TrimSpace(string(src))) 410 if err != nil { 411 return err 412 } 413 dst.SetBool(value) 414 case reflect.String: 415 dst.SetString(string(src)) 416 case reflect.Slice: 417 if len(src) == 0 { 418 // non-nil to flag presence 419 src = []byte{} 420 } 421 dst.SetBytes(src) 422 case reflect.Struct: 423 if dst.Type() == timeType { 424 tv, err := time.Parse(time.RFC3339, string(src)) 425 if err != nil { 426 return err 427 } 428 dst.Set(reflect.ValueOf(tv)) 429 } 430 } 431 return nil 432 } 433 434 // unmarshalPath walks down an XML structure looking for wanted 435 // paths, and calls unmarshal on them. 436 // The consumed result tells whether XML elements have been consumed 437 // from the Decoder until start's matching end element, or if it's 438 // still untouched because start is uninteresting for sv's fields. 439 func (p *Decoder) unmarshalPath(tinfo *typeInfo, sv reflect.Value, parents []string, start *StartElement) (consumed bool, err error) { 440 recurse := false 441 Loop: 442 for i := range tinfo.fields { 443 finfo := &tinfo.fields[i] 444 if finfo.flags&fElement == 0 || len(finfo.parents) < len(parents) || finfo.xmlns != "" && finfo.xmlns != start.Name.Space { 445 continue 446 } 447 for j := range parents { 448 if parents[j] != finfo.parents[j] { 449 continue Loop 450 } 451 } 452 if len(finfo.parents) == len(parents) && finfo.name == start.Name.Local { 453 // It's a perfect match, unmarshal the field. 454 return true, p.unmarshal(finfo.value(sv), start) 455 } 456 if len(finfo.parents) > len(parents) && finfo.parents[len(parents)] == start.Name.Local { 457 // It's a prefix for the field. Break and recurse 458 // since it's not ok for one field path to be itself 459 // the prefix for another field path. 460 recurse = true 461 462 // We can reuse the same slice as long as we 463 // don't try to append to it. 464 parents = finfo.parents[:len(parents)+1] 465 break 466 } 467 } 468 if !recurse { 469 // We have no business with this element. 470 return false, nil 471 } 472 // The element is not a perfect match for any field, but one 473 // or more fields have the path to this element as a parent 474 // prefix. Recurse and attempt to match these. 475 for { 476 var tok Token 477 tok, err = p.Token() 478 if err != nil { 479 return true, err 480 } 481 switch t := tok.(type) { 482 case StartElement: 483 consumed2, err := p.unmarshalPath(tinfo, sv, parents, &t) 484 if err != nil { 485 return true, err 486 } 487 if !consumed2 { 488 if err := p.Skip(); err != nil { 489 return true, err 490 } 491 } 492 case EndElement: 493 return true, nil 494 } 495 } 496 } 497 498 // Skip reads tokens until it has consumed the end element 499 // matching the most recent start element already consumed. 500 // It recurs if it encounters a start element, so it can be used to 501 // skip nested structures. 502 // It returns nil if it finds an end element matching the start 503 // element; otherwise it returns an error describing the problem. 504 func (d *Decoder) Skip() error { 505 for { 506 tok, err := d.Token() 507 if err != nil { 508 return err 509 } 510 switch tok.(type) { 511 case StartElement: 512 if err := d.Skip(); err != nil { 513 return err 514 } 515 case EndElement: 516 return nil 517 } 518 } 519 }