github.com/boki/go-xmp@v1.0.1/xmp/node.go (about) 1 // Copyright (c) 2017-2018 Alexander Eichhorn 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"): you may 4 // not use this file except in compliance with the License. You may obtain 5 // a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12 // License for the specific language governing permissions and limitations 13 // under the License. 14 15 package xmp 16 17 import ( 18 "encoding/xml" 19 "errors" 20 "fmt" 21 "sort" 22 "strings" 23 ) 24 25 var ( 26 nodePool = make(chan *Node, 5000) 27 npAllocs, npFrees, npHits, npReturns int64 28 errNotFound = errors.New("not found") 29 ) 30 31 type Node struct { 32 XMLName xml.Name // node name and namespace 33 Attr AttrList // captures all unbound attributes and XMP qualifiers 34 Model Model // XmpCore, DublinCore, etc 35 Value string 36 Nodes NodeList // child nodes 37 } 38 39 type Attr struct { 40 Name xml.Name 41 Value string 42 } 43 44 type AttrList []Attr 45 46 func (x AttrList) IsZero() bool { 47 if len(x) == 0 { 48 return true 49 } 50 for _, v := range x { 51 if v.Value != "" { 52 return false 53 } 54 } 55 return true 56 } 57 58 func (x AttrList) XML() []xml.Attr { 59 l := make([]xml.Attr, len(x)) 60 for i, v := range x { 61 l[i] = xml.Attr(v) 62 } 63 return l 64 } 65 66 func (x *AttrList) From(l []xml.Attr) { 67 *x = make(AttrList, len(l)) 68 for i, v := range l { 69 (*x)[i] = Attr(v) 70 } 71 } 72 73 var EmptyName = xml.Name{} 74 75 func NewName(local string) xml.Name { 76 return xml.Name{Local: local} 77 } 78 79 func NewNode(name xml.Name) *Node { 80 var n *Node 81 select { 82 case n = <-nodePool: // Try to get one from the nodePool 83 npHits++ 84 n.XMLName = name 85 n.Attr = nil 86 n.Nodes = nil 87 n.Model = nil 88 n.Value = "" 89 default: // All in use, create a new, temporary: 90 npAllocs++ 91 n = &Node{ 92 XMLName: name, 93 Attr: nil, 94 Nodes: nil, 95 Model: nil, 96 Value: "", 97 } 98 } 99 100 return n 101 } 102 103 func (n *Node) Close() { 104 for _, v := range n.Nodes { 105 v.Close() 106 } 107 n.XMLName = xml.Name{} 108 n.Attr = nil 109 n.Nodes = nil 110 n.Model = nil 111 n.Value = "" 112 select { 113 case nodePool <- n: // try to put back into the nodePool 114 npReturns++ 115 default: // pool is full, will be garbage collected 116 npFrees++ 117 } 118 } 119 120 func copyNode(x *Node) *Node { 121 n := NewNode(x.XMLName) 122 n.Value = x.Value 123 n.Model = x.Model 124 n.Attr = make([]Attr, len(x.Attr)) 125 copy(n.Attr, x.Attr) 126 n.Nodes = copyNodes(x.Nodes) 127 return n 128 } 129 130 func copyNodes(x NodeList) NodeList { 131 l := make(NodeList, 0, len(x)) 132 for _, v := range x { 133 l = append(l, copyNode(v)) 134 } 135 return l 136 } 137 138 type NodeList []*Node 139 140 func (x *NodeList) AddNode(n *Node) *Node { 141 for i, l := 0, len(*x); i < l; i++ { 142 if (*x)[i].XMLName.Local == n.XMLName.Local { 143 (*x)[i] = n 144 return n 145 } 146 } 147 return x.AppendNode(n) 148 } 149 150 func (x *NodeList) AppendNode(n *Node) *Node { 151 *x = append(*x, n) 152 return n 153 } 154 155 func (n *NodeList) FindNode(ns *Namespace) *Node { 156 return n.FindNodeByName(ns.GetName()) 157 } 158 159 func (n *NodeList) FindNodeByName(prefix string) *Node { 160 for _, v := range *n { 161 if v.Name() == prefix { 162 return v 163 } 164 if v.Model != nil && v.Model.Can(prefix) { 165 return v 166 } 167 } 168 return nil 169 } 170 171 func (x *NodeList) Index(n *Node) int { 172 for i, v := range *x { 173 if v == n { 174 return i 175 } 176 } 177 return -1 178 } 179 180 func (x *NodeList) RemoveNode(n *Node) *Node { 181 if idx := x.Index(n); idx > -1 { 182 *x = append((*x)[:idx], (*x)[idx+1:]...) 183 } 184 return n 185 } 186 187 func (n *Node) IsZero() bool { 188 empty := n.Model == nil && n.Value == "" && (len(n.Attr) == 0 || n.Attr.IsZero()) 189 if !empty { 190 return false 191 } 192 for _, v := range n.Nodes { 193 empty = empty && v.IsZero() 194 } 195 return empty 196 } 197 198 func (n *Node) Name() string { 199 return stripPrefix(n.XMLName.Local) 200 } 201 202 func (n *Node) FullName() string { 203 if n.XMLName.Space != "" { 204 return NsRegistry.Short(n.XMLName.Space, n.XMLName.Local) 205 } 206 return n.XMLName.Local 207 } 208 209 func (n *Node) Namespace() string { 210 ns := n.XMLName.Space 211 if ns == "" { 212 ns = getPrefix(n.XMLName.Local) 213 } 214 return ns 215 } 216 217 func (n *Node) Namespaces(d *Document) NamespaceList { 218 m := make(map[string]bool) 219 220 // keep node namespace 221 if name := n.Namespace(); name != "" { 222 m[name] = true 223 } 224 225 // add model namespaces 226 if n.Model != nil { 227 for _, v := range n.Model.Namespaces() { 228 m[v.GetName()] = true 229 } 230 } 231 232 // walk attributes 233 for _, v := range n.Attr { 234 m[getPrefix(v.Name.Local)] = true 235 } 236 237 // walk subnodes and capture used namespaces 238 var l NamespaceList 239 for _, v := range n.Nodes { 240 l = append(l, v.Namespaces(d)...) 241 } 242 243 for name, _ := range m { 244 ns := d.findNsByPrefix(name) 245 if ns != nil && ns != nsRDF && ns != nsXML { 246 l = append(l, ns) 247 } 248 } 249 250 // keep unique namespaces only 251 return l.RemoveDups() 252 } 253 254 // keep list of nodes unique, overwrite contents when names equal 255 func (n *Node) AddNode(x *Node) *Node { 256 if x == n { 257 panic(fmt.Errorf("xmp: node loop detected")) 258 } 259 return n.Nodes.AddNode(x) 260 } 261 262 // append in any case 263 func (n *Node) AppendNode(x *Node) *Node { 264 if x == n { 265 panic(fmt.Errorf("xmp: node loop detected")) 266 } 267 return n.Nodes.AppendNode(x) 268 } 269 270 func (n *Node) Clear() { 271 for _, v := range n.Nodes { 272 v.Close() 273 } 274 n.Nodes = nil 275 } 276 277 func (n *Node) RemoveNode(x *Node) *Node { 278 if x == n { 279 panic(fmt.Errorf("xmp: node loop detected")) 280 } 281 return n.Nodes.RemoveNode(x) 282 } 283 284 func (n Node) IsArray() bool { 285 if len(n.Nodes) != 1 { 286 return false 287 } 288 switch n.Nodes[0].FullName() { 289 case "rdf:Seq", "rdf:Bag", "rdf:Alt": 290 return true 291 default: 292 return false 293 } 294 } 295 296 func (n Node) ArrayType() ArrayType { 297 if len(n.Nodes) == 1 { 298 switch n.Nodes[0].FullName() { 299 case "rdf:Seq": 300 return ArrayTypeOrdered 301 case "rdf:Bag": 302 return ArrayTypeUnordered 303 case "rdf:Alt": 304 return ArrayTypeAlternative 305 } 306 } 307 return ArrayType("") 308 } 309 310 func (n *Node) AddAttr(attr Attr) { 311 for i, l := 0, len(n.Attr); i < l; i++ { 312 if n.Attr[i].Name.Local == attr.Name.Local { 313 n.Attr[i].Value = attr.Value 314 return 315 } 316 } 317 n.Attr = append(n.Attr, attr) 318 } 319 320 // keep list of attributes unique, overwrite value when names equal 321 func (n *Node) AddStringAttr(name, value string) { 322 n.AddAttr(Attr{Name: xml.Name{Local: name}, Value: value}) 323 } 324 325 func (n *Node) GetAttr(ns, name string) []Attr { 326 l := make([]Attr, 0) 327 for _, v := range n.Attr { 328 if ns != "" && v.Name.Space != ns { 329 continue 330 } 331 if name != "" && stripPrefix(v.Name.Local) != name { 332 continue 333 } 334 l = append(l, v) 335 } 336 return l 337 } 338 339 func (n *Node) MarshalXML(e *xml.Encoder, start xml.StartElement) error { 340 if n.XMLName.Local == "" { 341 return nil 342 } 343 344 start.Name = n.XMLName 345 start.Attr = n.Attr.XML() 346 if n.Model != nil { 347 return e.EncodeElement(struct { 348 Data Model 349 Nodes []*Node 350 }{ 351 Data: n.Model, 352 Nodes: n.Nodes, 353 }, start) 354 355 } else { 356 return e.EncodeElement(struct { 357 Data string `xml:",chardata"` 358 Nodes []*Node 359 }{ 360 Data: n.Value, 361 Nodes: n.Nodes, 362 }, start) 363 364 } 365 } 366 367 func (n *Node) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { 368 var nodes []*Node 369 var done bool 370 for !done { 371 t, err := d.Token() 372 if err != nil { 373 return err 374 } 375 switch t := t.(type) { 376 case xml.CharData: 377 n.Value = strings.TrimSpace(string(t)) 378 case xml.StartElement: 379 x := NewNode(emptyName) 380 x.UnmarshalXML(d, t) 381 nodes = append(nodes, x) 382 case xml.EndElement: 383 done = true 384 } 385 } 386 n.XMLName = start.Name 387 n.Attr.From(start.Attr) 388 n.Nodes = nodes 389 return nil 390 } 391 392 func (n *Node) GetPath(path Path) (string, error) { 393 name, path := path.PopFront() 394 name, idx, lang := parsePathSegment(name) 395 if idx < -1 { 396 return "", fmt.Errorf("path field %s: invalid index", name) 397 } 398 // fmt.Printf("Get Node path ns=%s name=%s len=%d rest=%v idx=%d lang=%s\n", path.NamespacePrefix(), name, path.Len(), path, idx, lang) 399 if name == "" && idx == -1 && lang == "" { 400 if path.Len() == 0 { 401 return n.Value, nil 402 } 403 // ignore empty path segments and recurse 404 return n.GetPath(path) 405 } 406 407 // lookup name in node list or attributes 408 node := n.Nodes.FindNodeByName(stripPrefix(name)) 409 if node != nil { 410 switch { 411 case idx > -1: 412 // drill two levels deep into array nodes bag/seq+li 413 if len(node.Nodes) == 0 || len(node.Nodes[0].Nodes) <= idx { 414 return "", nil 415 } 416 return node.Nodes[0].Nodes[idx].GetPath(path) 417 case lang != "": 418 // drill two levels deep into alt-array nodes alt+li 419 if len(node.Nodes) == 0 || len(node.Nodes[0].Nodes) == 0 { 420 return "", nil 421 } 422 for _, v := range node.Nodes[0].Nodes { 423 attr := v.GetAttr("", "lang") 424 if len(attr) == 0 { 425 continue 426 } 427 if attr[0].Value == string(lang) { 428 return v.Value, nil 429 } 430 } 431 return "", nil 432 default: 433 return node.GetPath(path) 434 } 435 } 436 437 if attr := n.GetAttr("", stripPrefix(name)); len(attr) > 0 { 438 return attr[0].Value, nil 439 } 440 return "", nil 441 } 442 443 func (n *Node) SetPath(path Path, value string, flags SyncFlags) error { 444 name, path := path.PopFront() 445 name, idx, lang := parsePathSegment(name) 446 if idx < -1 { 447 return fmt.Errorf("path field %s: invalid index", name) 448 } 449 450 // fmt.Printf("Set Node path ns=%s len=%d, path=%s, name=%s rest=%v idx=%d lang=%s\n", path.NamespacePrefix(), path.Len(), path.String(), name, path, idx, lang) 451 if name == "" && idx == -1 && lang == "" { 452 if path.Len() == 0 { 453 n.Value = value 454 } 455 return nil 456 } 457 458 // handle attribute 459 if attr := n.GetAttr("", stripPrefix(name)); len(attr) > 0 { 460 switch { 461 case flags&REPLACE > 0 && value != "": 462 attr[0].Value = value 463 return nil 464 case flags&DELETE > 0 && value == "": 465 attr[0].Value = value 466 // will be ignored on next marshal 467 return nil 468 } 469 } 470 471 // handle nodes 472 node := n.Nodes.FindNodeByName(stripPrefix(name)) 473 if node == nil { 474 if flags&CREATE > 0 && value != "" { 475 if name != "" && !hasPrefix(name) { 476 name = path.NamespacePrefix() + ":" + name 477 } 478 node = n.AddNode(NewNode(NewName(name))) 479 } else { 480 return fmt.Errorf("CREATE flag required to make node '%s'", name) 481 } 482 } 483 switch { 484 case idx > -1 || (node.IsArray() && lang == ""): 485 arr := node.Nodes.FindNodeByName("Seq") 486 if arr == nil { 487 arr = node.Nodes.FindNodeByName("Bag") 488 } 489 if arr == nil && flags&CREATE == 0 { 490 return fmt.Errorf("CREATE flag required to make array '%s'", name) 491 } 492 if arr == nil { 493 arr = node.AddNode(NewNode(NewName("rdf:Seq"))) 494 } 495 496 // recurse when we're not at the end of the path 497 if path.Len() > 0 { 498 if idx < 0 { 499 idx = 0 500 } 501 if l := len(arr.Nodes); l <= idx { 502 if flags&(CREATE|APPEND) == 0 && value != "" { 503 return fmt.Errorf("CREATE flag required to extend array '%s'", name) 504 } 505 for ; l <= idx; l++ { 506 arr.AppendNode(NewNode(NewName("rdf:li"))) 507 } 508 } 509 return arr.Nodes[idx].SetPath(path, value, flags) 510 } 511 512 // when at end of path flags tell what to do 513 switch { 514 case flags&UNIQUE > 0 && value != "" && idx == -1: 515 // append if not exists 516 for _, v := range arr.Nodes { 517 if v.Value == value { 518 return nil 519 } 520 } 521 li := arr.AppendNode(NewNode(NewName("rdf:li"))) 522 li.Value = value 523 524 case flags&APPEND > 0 && value != "" && idx == -1: 525 // always append 526 li := arr.AppendNode(NewNode(NewName("rdf:li"))) 527 li.Value = value 528 529 case flags&(REPLACE|CREATE) > 0 && value != "" && idx == -1: 530 // replace the entire xmp array 531 arr.Clear() 532 li := arr.AppendNode(NewNode(NewName("rdf:li"))) 533 li.Value = value 534 535 case flags&(REPLACE|CREATE) > 0 && value != "" && idx > -1: 536 // replace a single item, add intermediate index positions 537 if l := len(arr.Nodes); l <= idx { 538 for ; l <= idx; l++ { 539 arr.AppendNode(NewNode(NewName("rdf:li"))) 540 } 541 } 542 arr.Nodes[idx].Value = value 543 544 case flags&DELETE > 0 && value == "" && idx == -1: 545 // delete the entire array 546 node.RemoveNode(arr).Close() 547 548 case flags&DELETE > 0 && value == "" && idx > -1: 549 // delete a single item 550 if idx < len(arr.Nodes) { 551 arr.Nodes = append(arr.Nodes[:idx], arr.Nodes[idx+1:]...) 552 } 553 default: 554 return fmt.Errorf("unsupported flag combination %v for %s", flags, name) 555 } 556 557 // AltString array 558 case lang != "": 559 arr := node.Nodes.FindNodeByName("Alt") 560 if arr == nil && flags&(CREATE|APPEND) == 0 { 561 return fmt.Errorf("CREATE flag required to extend array '%s'", name) 562 } 563 if arr == nil { 564 arr = node.AddNode(NewNode(NewName("rdf:Alt"))) 565 } 566 switch { 567 case flags&UNIQUE > 0 && value != "": 568 // append source when not exist 569 for _, v := range arr.Nodes { 570 if attr := v.GetAttr("", "lang"); len(attr) > 0 { 571 if attr[0].Value == lang && v.Value == value { 572 return nil 573 } 574 } 575 } 576 li := arr.AppendNode(NewNode(NewName("rdf:li"))) 577 li.AddStringAttr("xml:lang", lang) 578 li.Value = value 579 580 case flags&APPEND > 0 && value != "": 581 // append source value 582 li := arr.AppendNode(NewNode(NewName("rdf:li"))) 583 li.AddStringAttr("xml:lang", lang) 584 li.Value = value 585 586 case flags&(REPLACE|CREATE) > 0 && value != "" && lang != "": 587 // replace single entry 588 for _, v := range arr.Nodes { 589 if attr := v.GetAttr("", "lang"); len(attr) > 0 { 590 if attr[0].Value == lang { 591 v.Value = value 592 return nil 593 } 594 } 595 } 596 597 case flags&(REPLACE|CREATE) > 0 && value != "" && lang == "": 598 // replace entire AltString with a new version 599 arr.Clear() 600 li := NewNode(NewName("rdf:li")) 601 li.AddStringAttr("xml:lang", lang) 602 li.Value = value 603 arr.Nodes = NodeList{li} 604 605 case flags&DELETE > 0 && value == "" && lang != "": 606 // delete a specific language 607 for _, v := range arr.Nodes { 608 if attr := v.GetAttr("", "lang"); len(attr) > 0 { 609 if attr[0].Value == lang { 610 arr.RemoveNode(v).Close() 611 return nil 612 } 613 } 614 } 615 616 case flags&DELETE > 0 && value == "" && lang == "": 617 // remove and close the entire array 618 node.RemoveNode(arr).Close() 619 default: 620 return fmt.Errorf("unsupported flag combination %v", flags) 621 } 622 623 default: 624 if path.Len() > 0 { 625 return node.SetPath(path, value, flags) 626 } 627 // fmt.Printf("Set Node path ns=%s len=%d, path=%s, name=%s\n", path.NamespacePrefix(), path.Len(), path.String(), name) 628 switch { 629 case flags&(REPLACE|CREATE) > 0 && value != "": 630 node.Value = value 631 return nil 632 case flags&DELETE > 0 && value == "": 633 node.Value = value 634 node.Clear() 635 // will be ignored on next marshal 636 return nil 637 default: 638 return fmt.Errorf("unsupported flag combination %v", flags) 639 } 640 } 641 642 return nil 643 } 644 645 func (n *Node) ListPaths(path Path) (PathValueList, error) { 646 l := make(PathValueList, 0) 647 switch n.FullName() { 648 case "rdf:Seq", "rdf:Bag": 649 for i, li := range n.Nodes { 650 _, walker := path.Pop() 651 walker = walker.AppendIndex(i) 652 for _, v := range li.Nodes { 653 name := v.Name() 654 if v.Namespace() != path.NamespacePrefix() { 655 name = v.FullName() 656 } 657 r, err := v.ListPaths(walker.Push(name)) 658 if err != nil { 659 return nil, err 660 } 661 l = append(l, r...) 662 } 663 } 664 case "rdf:Alt": 665 for _, li := range n.Nodes { 666 lang := "x-default" 667 if attr := li.GetAttr("", "lang"); len(attr) > 0 { 668 lang = attr[0].Value 669 } 670 _, walker := path.Pop() 671 walker = walker.AppendIndexString(lang) 672 l = append(l, PathValue{ 673 Path: walker, 674 Value: li.Value, 675 }) 676 } 677 default: 678 for _, a := range n.Attr { 679 if skipField(a.Name) { 680 continue 681 } 682 name := a.Name.Local 683 if hasPrefix(name) && getPrefix(name) == path.NamespacePrefix() { 684 name = stripPrefix(name) 685 } 686 l = append(l, PathValue{ 687 Path: path.Push(name), 688 Value: a.Value, 689 }) 690 } 691 for _, v := range n.Nodes { 692 if skipField(v.XMLName) { 693 continue 694 } 695 name := v.Name() 696 if v.Namespace() != path.NamespacePrefix() { 697 name = v.FullName() 698 } 699 r, err := v.ListPaths(path.Push(name)) 700 if err != nil { 701 return nil, err 702 } 703 l = append(l, r...) 704 } 705 if n.Value != "" { 706 l = append(l, PathValue{ 707 Path: path, 708 Value: n.Value, 709 }) 710 } 711 } 712 sort.Sort(byPath(l)) 713 return l, nil 714 } 715 716 func (n *Node) translate(d *Decoder) { 717 d.translate(&n.XMLName) 718 // Note: don't use `for .. range` here because it copies 719 // structures, but we intend to alter the node tree 720 for i, l := 0, len(n.Attr); i < l; i++ { 721 d.translate(&n.Attr[i].Name) 722 } 723 for i, l := 0, len(n.Nodes); i < l; i++ { 724 n.Nodes[i].translate(d) 725 } 726 }