golang.org/x/net@v0.25.1-0.20240516223405-c87a5b62e243/webdav/internal/xml/read.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package xml 6 7 import ( 8 "bytes" 9 "encoding" 10 "errors" 11 "fmt" 12 "reflect" 13 "strconv" 14 "strings" 15 ) 16 17 // BUG(rsc): Mapping between XML elements and data structures is inherently flawed: 18 // an XML element is an order-dependent collection of anonymous 19 // values, while a data structure is an order-independent collection 20 // of named values. 21 // See package json for a textual representation more suitable 22 // to data structures. 23 24 // Unmarshal parses the XML-encoded data and stores the result in 25 // the value pointed to by v, which must be an arbitrary struct, 26 // slice, or string. Well-formed data that does not fit into v is 27 // discarded. 28 // 29 // Because Unmarshal uses the reflect package, it can only assign 30 // to exported (upper case) fields. Unmarshal uses a case-sensitive 31 // comparison to match XML element names to tag values and struct 32 // field names. 33 // 34 // Unmarshal maps an XML element to a struct using the following rules. 35 // In the rules, the tag of a field refers to the value associated with the 36 // key 'xml' in the struct field's tag (see the example above). 37 // 38 // - If the struct has a field of type []byte or string with tag 39 // ",innerxml", Unmarshal accumulates the raw XML nested inside the 40 // element in that field. The rest of the rules still apply. 41 // 42 // - If the struct has a field named XMLName of type xml.Name, 43 // Unmarshal records the element name in that field. 44 // 45 // - If the XMLName field has an associated tag of the form 46 // "name" or "namespace-URL name", the XML element must have 47 // the given name (and, optionally, name space) or else Unmarshal 48 // returns an error. 49 // 50 // - If the XML element has an attribute whose name matches a 51 // struct field name with an associated tag containing ",attr" or 52 // the explicit name in a struct field tag of the form "name,attr", 53 // Unmarshal records the attribute value in that field. 54 // 55 // - If the XML element contains character data, that data is 56 // accumulated in the first struct field that has tag ",chardata". 57 // The struct field may have type []byte or string. 58 // If there is no such field, the character data is discarded. 59 // 60 // - If the XML element contains comments, they are accumulated in 61 // the first struct field that has tag ",comment". The struct 62 // field may have type []byte or string. If there is no such 63 // field, the comments are discarded. 64 // 65 // - If the XML element contains a sub-element whose name matches 66 // the prefix of a tag formatted as "a" or "a>b>c", unmarshal 67 // will descend into the XML structure looking for elements with the 68 // given names, and will map the innermost elements to that struct 69 // field. A tag starting with ">" is equivalent to one starting 70 // with the field name followed by ">". 71 // 72 // - If the XML element contains a sub-element whose name matches 73 // a struct field's XMLName tag and the struct field has no 74 // explicit name tag as per the previous rule, unmarshal maps 75 // the sub-element to that struct field. 76 // 77 // - If the XML element contains a sub-element whose name matches a 78 // field without any mode flags (",attr", ",chardata", etc), Unmarshal 79 // maps the sub-element to that struct field. 80 // 81 // - If the XML element contains a sub-element that hasn't matched any 82 // of the above rules and the struct has a field with tag ",any", 83 // unmarshal maps the sub-element to that struct field. 84 // 85 // - An anonymous struct field is handled as if the fields of its 86 // value were part of the outer struct. 87 // 88 // - A struct field with tag "-" is never unmarshalled into. 89 // 90 // Unmarshal maps an XML element to a string or []byte by saving the 91 // concatenation of that element's character data in the string or 92 // []byte. The saved []byte is never nil. 93 // 94 // Unmarshal maps an attribute value to a string or []byte by saving 95 // the value in the string or slice. 96 // 97 // Unmarshal maps an XML element to a slice by extending the length of 98 // the slice and mapping the element to the newly created value. 99 // 100 // Unmarshal maps an XML element or attribute value to a bool by 101 // setting it to the boolean value represented by the string. 102 // 103 // Unmarshal maps an XML element or attribute value to an integer or 104 // floating-point field by setting the field to the result of 105 // interpreting the string value in decimal. There is no check for 106 // overflow. 107 // 108 // Unmarshal maps an XML element to an xml.Name by recording the 109 // element name. 110 // 111 // Unmarshal maps an XML element to a pointer by setting the pointer 112 // to a freshly allocated value and then mapping the element to that value. 113 func Unmarshal(data []byte, v interface{}) error { 114 return NewDecoder(bytes.NewReader(data)).Decode(v) 115 } 116 117 // Decode works like xml.Unmarshal, except it reads the decoder 118 // stream to find the start element. 119 func (d *Decoder) Decode(v interface{}) error { 120 return d.DecodeElement(v, nil) 121 } 122 123 // DecodeElement works like xml.Unmarshal except that it takes 124 // a pointer to the start XML element to decode into v. 125 // It is useful when a client reads some raw XML tokens itself 126 // but also wants to defer to Unmarshal for some elements. 127 func (d *Decoder) DecodeElement(v interface{}, start *StartElement) error { 128 val := reflect.ValueOf(v) 129 if val.Kind() != reflect.Ptr { 130 return errors.New("non-pointer passed to Unmarshal") 131 } 132 return d.unmarshal(val.Elem(), start) 133 } 134 135 // An UnmarshalError represents an error in the unmarshalling process. 136 type UnmarshalError string 137 138 func (e UnmarshalError) Error() string { return string(e) } 139 140 // Unmarshaler is the interface implemented by objects that can unmarshal 141 // an XML element description of themselves. 142 // 143 // UnmarshalXML decodes a single XML element 144 // beginning with the given start element. 145 // If it returns an error, the outer call to Unmarshal stops and 146 // returns that error. 147 // UnmarshalXML must consume exactly one XML element. 148 // One common implementation strategy is to unmarshal into 149 // a separate value with a layout matching the expected XML 150 // using d.DecodeElement, and then to copy the data from 151 // that value into the receiver. 152 // Another common strategy is to use d.Token to process the 153 // XML object one token at a time. 154 // UnmarshalXML may not use d.RawToken. 155 type Unmarshaler interface { 156 UnmarshalXML(d *Decoder, start StartElement) error 157 } 158 159 // UnmarshalerAttr is the interface implemented by objects that can unmarshal 160 // an XML attribute description of themselves. 161 // 162 // UnmarshalXMLAttr decodes a single XML attribute. 163 // If it returns an error, the outer call to Unmarshal stops and 164 // returns that error. 165 // UnmarshalXMLAttr is used only for struct fields with the 166 // "attr" option in the field tag. 167 type UnmarshalerAttr interface { 168 UnmarshalXMLAttr(attr Attr) error 169 } 170 171 // receiverType returns the receiver type to use in an expression like "%s.MethodName". 172 func receiverType(val interface{}) string { 173 t := reflect.TypeOf(val) 174 if t.Name() != "" { 175 return t.String() 176 } 177 return "(" + t.String() + ")" 178 } 179 180 // unmarshalInterface unmarshals a single XML element into val. 181 // start is the opening tag of the element. 182 func (p *Decoder) unmarshalInterface(val Unmarshaler, start *StartElement) error { 183 // Record that decoder must stop at end tag corresponding to start. 184 p.pushEOF() 185 186 p.unmarshalDepth++ 187 err := val.UnmarshalXML(p, *start) 188 p.unmarshalDepth-- 189 if err != nil { 190 p.popEOF() 191 return err 192 } 193 194 if !p.popEOF() { 195 return fmt.Errorf("xml: %s.UnmarshalXML did not consume entire <%s> element", receiverType(val), start.Name.Local) 196 } 197 198 return nil 199 } 200 201 // unmarshalTextInterface unmarshals a single XML element into val. 202 // The chardata contained in the element (but not its children) 203 // is passed to the text unmarshaler. 204 func (p *Decoder) unmarshalTextInterface(val encoding.TextUnmarshaler, start *StartElement) error { 205 var buf []byte 206 depth := 1 207 for depth > 0 { 208 t, err := p.Token() 209 if err != nil { 210 return err 211 } 212 switch t := t.(type) { 213 case CharData: 214 if depth == 1 { 215 buf = append(buf, t...) 216 } 217 case StartElement: 218 depth++ 219 case EndElement: 220 depth-- 221 } 222 } 223 return val.UnmarshalText(buf) 224 } 225 226 // unmarshalAttr unmarshals a single XML attribute into val. 227 func (p *Decoder) unmarshalAttr(val reflect.Value, attr Attr) error { 228 if val.Kind() == reflect.Ptr { 229 if val.IsNil() { 230 val.Set(reflect.New(val.Type().Elem())) 231 } 232 val = val.Elem() 233 } 234 235 if val.CanInterface() && val.Type().Implements(unmarshalerAttrType) { 236 // This is an unmarshaler with a non-pointer receiver, 237 // so it's likely to be incorrect, but we do what we're told. 238 return val.Interface().(UnmarshalerAttr).UnmarshalXMLAttr(attr) 239 } 240 if val.CanAddr() { 241 pv := val.Addr() 242 if pv.CanInterface() && pv.Type().Implements(unmarshalerAttrType) { 243 return pv.Interface().(UnmarshalerAttr).UnmarshalXMLAttr(attr) 244 } 245 } 246 247 // Not an UnmarshalerAttr; try encoding.TextUnmarshaler. 248 if val.CanInterface() && val.Type().Implements(textUnmarshalerType) { 249 // This is an unmarshaler with a non-pointer receiver, 250 // so it's likely to be incorrect, but we do what we're told. 251 return val.Interface().(encoding.TextUnmarshaler).UnmarshalText([]byte(attr.Value)) 252 } 253 if val.CanAddr() { 254 pv := val.Addr() 255 if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) { 256 return pv.Interface().(encoding.TextUnmarshaler).UnmarshalText([]byte(attr.Value)) 257 } 258 } 259 260 copyValue(val, []byte(attr.Value)) 261 return nil 262 } 263 264 var ( 265 unmarshalerType = reflect.TypeOf((*Unmarshaler)(nil)).Elem() 266 unmarshalerAttrType = reflect.TypeOf((*UnmarshalerAttr)(nil)).Elem() 267 textUnmarshalerType = reflect.TypeOf((*encoding.TextUnmarshaler)(nil)).Elem() 268 ) 269 270 // Unmarshal a single XML element into val. 271 func (p *Decoder) unmarshal(val reflect.Value, start *StartElement) error { 272 // Find start element if we need it. 273 if start == nil { 274 for { 275 tok, err := p.Token() 276 if err != nil { 277 return err 278 } 279 if t, ok := tok.(StartElement); ok { 280 start = &t 281 break 282 } 283 } 284 } 285 286 // Load value from interface, but only if the result will be 287 // usefully addressable. 288 if val.Kind() == reflect.Interface && !val.IsNil() { 289 e := val.Elem() 290 if e.Kind() == reflect.Ptr && !e.IsNil() { 291 val = e 292 } 293 } 294 295 if val.Kind() == reflect.Ptr { 296 if val.IsNil() { 297 val.Set(reflect.New(val.Type().Elem())) 298 } 299 val = val.Elem() 300 } 301 302 if val.CanInterface() && val.Type().Implements(unmarshalerType) { 303 // This is an unmarshaler with a non-pointer receiver, 304 // so it's likely to be incorrect, but we do what we're told. 305 return p.unmarshalInterface(val.Interface().(Unmarshaler), start) 306 } 307 308 if val.CanAddr() { 309 pv := val.Addr() 310 if pv.CanInterface() && pv.Type().Implements(unmarshalerType) { 311 return p.unmarshalInterface(pv.Interface().(Unmarshaler), start) 312 } 313 } 314 315 if val.CanInterface() && val.Type().Implements(textUnmarshalerType) { 316 return p.unmarshalTextInterface(val.Interface().(encoding.TextUnmarshaler), start) 317 } 318 319 if val.CanAddr() { 320 pv := val.Addr() 321 if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) { 322 return p.unmarshalTextInterface(pv.Interface().(encoding.TextUnmarshaler), start) 323 } 324 } 325 326 var ( 327 data []byte 328 saveData reflect.Value 329 comment []byte 330 saveComment reflect.Value 331 saveXML reflect.Value 332 saveXMLIndex int 333 saveXMLData []byte 334 saveAny reflect.Value 335 sv reflect.Value 336 tinfo *typeInfo 337 err error 338 ) 339 340 switch v := val; v.Kind() { 341 default: 342 return errors.New("unknown type " + v.Type().String()) 343 344 case reflect.Interface: 345 // TODO: For now, simply ignore the field. In the near 346 // future we may choose to unmarshal the start 347 // element on it, if not nil. 348 return p.Skip() 349 350 case reflect.Slice: 351 typ := v.Type() 352 if typ.Elem().Kind() == reflect.Uint8 { 353 // []byte 354 saveData = v 355 break 356 } 357 358 // Slice of element values. 359 // Grow slice. 360 n := v.Len() 361 if n >= v.Cap() { 362 ncap := 2 * n 363 if ncap < 4 { 364 ncap = 4 365 } 366 new := reflect.MakeSlice(typ, n, ncap) 367 reflect.Copy(new, v) 368 v.Set(new) 369 } 370 v.SetLen(n + 1) 371 372 // Recur to read element into slice. 373 if err := p.unmarshal(v.Index(n), start); err != nil { 374 v.SetLen(n) 375 return err 376 } 377 return nil 378 379 case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.String: 380 saveData = v 381 382 case reflect.Struct: 383 typ := v.Type() 384 if typ == nameType { 385 v.Set(reflect.ValueOf(start.Name)) 386 break 387 } 388 389 sv = v 390 tinfo, err = getTypeInfo(typ) 391 if err != nil { 392 return err 393 } 394 395 // Validate and assign element name. 396 if tinfo.xmlname != nil { 397 finfo := tinfo.xmlname 398 if finfo.name != "" && finfo.name != start.Name.Local { 399 return UnmarshalError("expected element type <" + finfo.name + "> but have <" + start.Name.Local + ">") 400 } 401 if finfo.xmlns != "" && finfo.xmlns != start.Name.Space { 402 e := "expected element <" + finfo.name + "> in name space " + finfo.xmlns + " but have " 403 if start.Name.Space == "" { 404 e += "no name space" 405 } else { 406 e += start.Name.Space 407 } 408 return UnmarshalError(e) 409 } 410 fv := finfo.value(sv) 411 if _, ok := fv.Interface().(Name); ok { 412 fv.Set(reflect.ValueOf(start.Name)) 413 } 414 } 415 416 // Assign attributes. 417 // Also, determine whether we need to save character data or comments. 418 for i := range tinfo.fields { 419 finfo := &tinfo.fields[i] 420 switch finfo.flags & fMode { 421 case fAttr: 422 strv := finfo.value(sv) 423 // Look for attribute. 424 for _, a := range start.Attr { 425 if a.Name.Local == finfo.name && (finfo.xmlns == "" || finfo.xmlns == a.Name.Space) { 426 if err := p.unmarshalAttr(strv, a); err != nil { 427 return err 428 } 429 break 430 } 431 } 432 433 case fCharData: 434 if !saveData.IsValid() { 435 saveData = finfo.value(sv) 436 } 437 438 case fComment: 439 if !saveComment.IsValid() { 440 saveComment = finfo.value(sv) 441 } 442 443 case fAny, fAny | fElement: 444 if !saveAny.IsValid() { 445 saveAny = finfo.value(sv) 446 } 447 448 case fInnerXml: 449 if !saveXML.IsValid() { 450 saveXML = finfo.value(sv) 451 if p.saved == nil { 452 saveXMLIndex = 0 453 p.saved = new(bytes.Buffer) 454 } else { 455 saveXMLIndex = p.savedOffset() 456 } 457 } 458 } 459 } 460 } 461 462 // Find end element. 463 // Process sub-elements along the way. 464 Loop: 465 for { 466 var savedOffset int 467 if saveXML.IsValid() { 468 savedOffset = p.savedOffset() 469 } 470 tok, err := p.Token() 471 if err != nil { 472 return err 473 } 474 switch t := tok.(type) { 475 case StartElement: 476 consumed := false 477 if sv.IsValid() { 478 consumed, err = p.unmarshalPath(tinfo, sv, nil, &t) 479 if err != nil { 480 return err 481 } 482 if !consumed && saveAny.IsValid() { 483 consumed = true 484 if err := p.unmarshal(saveAny, &t); err != nil { 485 return err 486 } 487 } 488 } 489 if !consumed { 490 if err := p.Skip(); err != nil { 491 return err 492 } 493 } 494 495 case EndElement: 496 if saveXML.IsValid() { 497 saveXMLData = p.saved.Bytes()[saveXMLIndex:savedOffset] 498 if saveXMLIndex == 0 { 499 p.saved = nil 500 } 501 } 502 break Loop 503 504 case CharData: 505 if saveData.IsValid() { 506 data = append(data, t...) 507 } 508 509 case Comment: 510 if saveComment.IsValid() { 511 comment = append(comment, t...) 512 } 513 } 514 } 515 516 if saveData.IsValid() && saveData.CanInterface() && saveData.Type().Implements(textUnmarshalerType) { 517 if err := saveData.Interface().(encoding.TextUnmarshaler).UnmarshalText(data); err != nil { 518 return err 519 } 520 saveData = reflect.Value{} 521 } 522 523 if saveData.IsValid() && saveData.CanAddr() { 524 pv := saveData.Addr() 525 if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) { 526 if err := pv.Interface().(encoding.TextUnmarshaler).UnmarshalText(data); err != nil { 527 return err 528 } 529 saveData = reflect.Value{} 530 } 531 } 532 533 if err := copyValue(saveData, data); err != nil { 534 return err 535 } 536 537 switch t := saveComment; t.Kind() { 538 case reflect.String: 539 t.SetString(string(comment)) 540 case reflect.Slice: 541 t.Set(reflect.ValueOf(comment)) 542 } 543 544 switch t := saveXML; t.Kind() { 545 case reflect.String: 546 t.SetString(string(saveXMLData)) 547 case reflect.Slice: 548 t.Set(reflect.ValueOf(saveXMLData)) 549 } 550 551 return nil 552 } 553 554 func copyValue(dst reflect.Value, src []byte) (err error) { 555 dst0 := dst 556 557 if dst.Kind() == reflect.Ptr { 558 if dst.IsNil() { 559 dst.Set(reflect.New(dst.Type().Elem())) 560 } 561 dst = dst.Elem() 562 } 563 564 // Save accumulated data. 565 switch dst.Kind() { 566 case reflect.Invalid: 567 // Probably a comment. 568 default: 569 return errors.New("cannot unmarshal into " + dst0.Type().String()) 570 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: 571 itmp, err := strconv.ParseInt(string(src), 10, dst.Type().Bits()) 572 if err != nil { 573 return err 574 } 575 dst.SetInt(itmp) 576 case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: 577 utmp, err := strconv.ParseUint(string(src), 10, dst.Type().Bits()) 578 if err != nil { 579 return err 580 } 581 dst.SetUint(utmp) 582 case reflect.Float32, reflect.Float64: 583 ftmp, err := strconv.ParseFloat(string(src), dst.Type().Bits()) 584 if err != nil { 585 return err 586 } 587 dst.SetFloat(ftmp) 588 case reflect.Bool: 589 value, err := strconv.ParseBool(strings.TrimSpace(string(src))) 590 if err != nil { 591 return err 592 } 593 dst.SetBool(value) 594 case reflect.String: 595 dst.SetString(string(src)) 596 case reflect.Slice: 597 if len(src) == 0 { 598 // non-nil to flag presence 599 src = []byte{} 600 } 601 dst.SetBytes(src) 602 } 603 return nil 604 } 605 606 // unmarshalPath walks down an XML structure looking for wanted 607 // paths, and calls unmarshal on them. 608 // The consumed result tells whether XML elements have been consumed 609 // from the Decoder until start's matching end element, or if it's 610 // still untouched because start is uninteresting for sv's fields. 611 func (p *Decoder) unmarshalPath(tinfo *typeInfo, sv reflect.Value, parents []string, start *StartElement) (consumed bool, err error) { 612 recurse := false 613 Loop: 614 for i := range tinfo.fields { 615 finfo := &tinfo.fields[i] 616 if finfo.flags&fElement == 0 || len(finfo.parents) < len(parents) || finfo.xmlns != "" && finfo.xmlns != start.Name.Space { 617 continue 618 } 619 for j := range parents { 620 if parents[j] != finfo.parents[j] { 621 continue Loop 622 } 623 } 624 if len(finfo.parents) == len(parents) && finfo.name == start.Name.Local { 625 // It's a perfect match, unmarshal the field. 626 return true, p.unmarshal(finfo.value(sv), start) 627 } 628 if len(finfo.parents) > len(parents) && finfo.parents[len(parents)] == start.Name.Local { 629 // It's a prefix for the field. Break and recurse 630 // since it's not ok for one field path to be itself 631 // the prefix for another field path. 632 recurse = true 633 634 // We can reuse the same slice as long as we 635 // don't try to append to it. 636 parents = finfo.parents[:len(parents)+1] 637 break 638 } 639 } 640 if !recurse { 641 // We have no business with this element. 642 return false, nil 643 } 644 // The element is not a perfect match for any field, but one 645 // or more fields have the path to this element as a parent 646 // prefix. Recurse and attempt to match these. 647 for { 648 var tok Token 649 tok, err = p.Token() 650 if err != nil { 651 return true, err 652 } 653 switch t := tok.(type) { 654 case StartElement: 655 consumed2, err := p.unmarshalPath(tinfo, sv, parents, &t) 656 if err != nil { 657 return true, err 658 } 659 if !consumed2 { 660 if err := p.Skip(); err != nil { 661 return true, err 662 } 663 } 664 case EndElement: 665 return true, nil 666 } 667 } 668 } 669 670 // Skip reads tokens until it has consumed the end element 671 // matching the most recent start element already consumed. 672 // It recurs if it encounters a start element, so it can be used to 673 // skip nested structures. 674 // It returns nil if it finds an end element matching the start 675 // element; otherwise it returns an error describing the problem. 676 func (d *Decoder) Skip() error { 677 for { 678 tok, err := d.Token() 679 if err != nil { 680 return err 681 } 682 switch tok.(type) { 683 case StartElement: 684 if err := d.Skip(); err != nil { 685 return err 686 } 687 case EndElement: 688 return nil 689 } 690 } 691 }