github.com/matislovas/ratago@v0.0.0-20240408115641-cc0857415a7a/xslt/stylesheet.go (about) 1 package xslt 2 3 import ( 4 "container/list" 5 "fmt" 6 "log" 7 "path" 8 "strconv" 9 "strings" 10 11 "github.com/matislovas/gokogiri/xml" 12 "github.com/matislovas/gokogiri/xpath" 13 ) 14 15 const XSLT_NAMESPACE = "http://www.w3.org/1999/XSL/Transform" 16 const XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace" 17 18 // Stylesheet is an XSLT 1.0 processor. 19 type Stylesheet struct { 20 Doc *xml.XmlDocument 21 Parent *Stylesheet //xsl:import 22 NamedTemplates map[string]*Template 23 NamespaceMapping map[string]string 24 NamespaceAlias map[string]string 25 ElementMatches map[string]*list.List //matches on element name 26 AttrMatches map[string]*list.List //matches on attr name 27 NodeMatches *list.List //matches on node() 28 TextMatches *list.List //matches on text() 29 PIMatches *list.List //matches on processing-instruction() 30 CommentMatches *list.List //matches on comment() 31 IdKeyMatches *list.List //matches on id() or key() 32 Imports *list.List 33 Variables map[string]*Variable 34 Functions map[string]xpath.XPathFunction 35 AttributeSets map[string]CompiledStep 36 ExcludePrefixes []string 37 ExtensionPrefixes []string 38 StripSpace []string 39 PreserveSpace []string 40 CDataElements []string 41 GlobalParameters []string 42 includes map[string]bool 43 Keys map[string]*Key 44 OutputMethod string //html, xml, text 45 DesiredEncoding string //encoding specified by xsl:output 46 OmitXmlDeclaration bool //defaults to false 47 IndentOutput bool //defaults to false 48 Standalone bool //defaults to false 49 doctypeSystem string 50 doctypePublic string 51 } 52 53 // StylesheetOptions to control processing. Parameters values are passed into 54 // the stylesheet via this structure. 55 type StylesheetOptions struct { 56 IndentOutput bool //force the output to be indented 57 Parameters map[string]interface{} //supply values for stylesheet parameters 58 } 59 60 // Returns true if the node is in the XSLT namespace 61 func IsXsltName(xmlnode xml.Node, name string) bool { 62 if xmlnode.Name() == name && xmlnode.Namespace() == XSLT_NAMESPACE { 63 return true 64 } 65 return false 66 } 67 68 // Returns true if the node is a whitespace-only text node 69 func IsBlank(xmlnode xml.Node) bool { 70 if xmlnode.NodeType() == xml.XML_TEXT_NODE || xmlnode.NodeType() == xml.XML_CDATA_SECTION_NODE { 71 content := xmlnode.Content() 72 if content == "" || strings.TrimSpace(content) == "" { 73 return true 74 } 75 } 76 return false 77 } 78 79 // ParseStylesheet compiles the stylesheet's XML representation 80 // and returns a Stylesheet instance. 81 // 82 // The fileuri argument is used to resolve relative paths for xsl:import and xsl:include 83 // instructions and should generally be the filename of the stylesheet. If you pass 84 // an empty string, the working directory will be used for path resolution. 85 func ParseStylesheet(doc *xml.XmlDocument, fileuri string) (style *Stylesheet, err error) { 86 style = &Stylesheet{Doc: doc, 87 NamespaceMapping: make(map[string]string), 88 NamespaceAlias: make(map[string]string), 89 ElementMatches: make(map[string]*list.List), 90 AttrMatches: make(map[string]*list.List), 91 PIMatches: list.New(), 92 CommentMatches: list.New(), 93 IdKeyMatches: list.New(), 94 NodeMatches: list.New(), 95 TextMatches: list.New(), 96 Imports: list.New(), 97 NamedTemplates: make(map[string]*Template), 98 AttributeSets: make(map[string]CompiledStep), 99 includes: make(map[string]bool), 100 Keys: make(map[string]*Key), 101 Functions: make(map[string]xpath.XPathFunction), 102 Variables: make(map[string]*Variable)} 103 104 // register the built-in XSLT functions 105 style.RegisterXsltFunctions() 106 107 //XsltParseStylesheetProcess 108 cur := xml.Node(doc.Root()) 109 110 // get all the namespace mappings 111 for _, ns := range cur.DeclaredNamespaces() { 112 style.NamespaceMapping[ns.Uri] = ns.Prefix 113 } 114 115 //get xsl:version, should be 1.0 or 2.0 116 version := cur.Attr("version") 117 if version != "1.0" { 118 log.Println("VERSION 1.0 expected") 119 } 120 121 //record excluded prefixes 122 excl := cur.Attr("exclude-result-prefixes") 123 if excl != "" { 124 style.ExcludePrefixes = strings.Fields(excl) 125 } 126 //record extension prefixes 127 ext := cur.Attr("extension-element-prefixes") 128 if ext != "" { 129 style.ExtensionPrefixes = strings.Fields(ext) 130 } 131 132 //if the root is an LRE, this is an simplified stylesheet 133 if !IsXsltName(cur, "stylesheet") && !IsXsltName(cur, "transform") { 134 template := &Template{Match: "/", Priority: 0} 135 template.CompileContent(doc) 136 style.compilePattern(template, "") 137 return 138 } 139 140 //optionally optimize by removing blank nodes, combining adjacent text nodes, etc 141 err = style.parseChildren(cur, fileuri) 142 143 //xsl:import (must be first) 144 //flag non-empty text nodes, non XSL-namespaced nodes 145 // actually registered extension namspaces are good! 146 //warn unknown XSLT element (forwards-compatible mode) 147 148 return 149 } 150 151 // Here we iterate through the children; this has been moved to its own function 152 // to facilitate the implementation of xsl:include (where we want the children to 153 // be treated as if they were part of the calling stylesheet) 154 func (style *Stylesheet) parseChildren(root xml.Node, fileuri string) (err error) { 155 //iterate through children 156 for cur := root.FirstChild(); cur != nil; cur = cur.NextSibling() { 157 //skip blank nodes 158 if IsBlank(cur) { 159 continue 160 } 161 162 //skip comment nodes 163 if cur.NodeType() == xml.XML_COMMENT_NODE { 164 continue 165 } 166 167 //handle templates 168 if IsXsltName(cur, "template") { 169 style.ParseTemplate(cur) 170 continue 171 } 172 173 if IsXsltName(cur, "variable") { 174 style.RegisterGlobalVariable(cur) 175 continue 176 } 177 178 if IsXsltName(cur, "key") { 179 name := cur.Attr("name") 180 use := cur.Attr("use") 181 match := cur.Attr("match") 182 k := &Key{make(map[string]xml.Nodeset), use, match} 183 style.Keys[name] = k 184 continue 185 } 186 187 if IsXsltName(cur, "param") { 188 name := cur.Attr("name") 189 // record that it's a global parameter - we'll check supplied options against this list 190 style.GlobalParameters = append(style.GlobalParameters, name) 191 style.RegisterGlobalVariable(cur) 192 continue 193 } 194 195 if IsXsltName(cur, "attribute-set") { 196 style.RegisterAttributeSet(cur) 197 continue 198 } 199 200 if IsXsltName(cur, "include") { 201 //check for recursion, multiple includes 202 loc := cur.Attr("href") 203 base := path.Dir(fileuri) 204 loc = path.Join(base, loc) 205 _, already := style.includes[loc] 206 if already { 207 panic("Multiple include detected of " + loc) 208 } 209 style.includes[loc] = true 210 211 //load the stylesheet 212 doc, e := xml.ReadFile(loc, xml.StrictParseOption) 213 if e != nil { 214 fmt.Println(e) 215 err = e 216 return 217 } 218 //_, _ = ParseStylesheet(doc, loc) 219 //update the including stylesheet 220 e = style.parseChildren(doc.Root(), loc) 221 if e != nil { 222 fmt.Println(e) 223 err = e 224 return 225 } 226 continue 227 } 228 229 if IsXsltName(cur, "import") { 230 //check for recursion, multiple includes 231 loc := cur.Attr("href") 232 base := path.Dir(fileuri) 233 loc = path.Join(base, loc) 234 _, already := style.includes[loc] 235 if already { 236 panic("Multiple include detected of " + loc) 237 } 238 style.includes[loc] = true 239 //increment import; new style context 240 doc, _ := xmlReadFile(loc) 241 _import, _ := ParseStylesheet(doc, loc) 242 style.Imports.PushFront(_import) 243 continue 244 } 245 246 if IsXsltName(cur, "output") { 247 cdata := cur.Attr("cdata-section-elements") 248 if cdata != "" { 249 style.CDataElements = strings.Fields(cdata) 250 } 251 style.OutputMethod = cur.Attr("method") 252 omit := cur.Attr("omit-xml-declaration") 253 if omit == "yes" { 254 style.OmitXmlDeclaration = true 255 } 256 indent := cur.Attr("indent") 257 if indent == "yes" { 258 style.IndentOutput = true 259 } 260 standalone := cur.Attr("standalone") 261 if standalone == "yes" { 262 style.Standalone = true 263 } 264 encoding := cur.Attr("encoding") 265 if encoding != "" && encoding != "utf-8" { 266 //TODO: emit a warning if we do not support the encoding 267 // if unsupported, leave blank to output default UTF-8 268 style.DesiredEncoding = encoding 269 } 270 style.doctypeSystem = cur.Attr("doctype-system") 271 style.doctypePublic = cur.Attr("doctype-public") 272 continue 273 } 274 275 if IsXsltName(cur, "strip-space") { 276 el := cur.Attr("elements") 277 if el != "" { 278 style.StripSpace = strings.Fields(el) 279 } 280 continue 281 } 282 283 if IsXsltName(cur, "preserve-space") { 284 el := cur.Attr("elements") 285 if el != "" { 286 style.PreserveSpace = strings.Fields(el) 287 } 288 continue 289 } 290 291 if IsXsltName(cur, "namespace-alias") { 292 stylens := cur.Attr("stylesheet-prefix") 293 resns := cur.Attr("result-prefix") 294 style.NamespaceAlias[stylens] = resns 295 continue 296 } 297 298 if IsXsltName(cur, "decimal-format") { 299 fmt.Println("GLOBAL TODO ", cur.Name()) 300 continue 301 } 302 } 303 return 304 } 305 306 func (style *Stylesheet) IsExcluded(prefix string) bool { 307 for _, p := range style.ExcludePrefixes { 308 if p == prefix { 309 return true 310 } 311 } 312 for _, p := range style.ExtensionPrefixes { 313 if p == prefix { 314 return true 315 } 316 } 317 return false 318 } 319 320 // Process takes an input document and returns the output produced 321 // by executing the stylesheet. 322 323 // The output is not guaranteed to be well-formed XML, so the 324 // serialized string is returned. Consideration is being given 325 // to returning a slice of bytes and encoding information. 326 func (style *Stylesheet) Process(doc *xml.XmlDocument, options StylesheetOptions) (out string, err error) { 327 // lookup output method, doctypes, encoding 328 // create output document with appropriate values 329 output := xml.CreateEmptyDocument(doc.InputEncoding(), doc.OutputEncoding()) 330 // init context node/document 331 context := &ExecutionContext{Output: output.Me, OutputNode: output, Style: style, Source: doc} 332 context.Current = doc 333 context.XPathContext = doc.DocXPathCtx() 334 // when evaluating keys/global vars position is always 1 335 context.XPathContext.SetContextPosition(1, 1) 336 start := doc 337 style.populateKeys(start, context) 338 // eval global params 339 // eval global variables 340 for _, val := range style.Variables { 341 val.Apply(doc, context) 342 } 343 344 // for each global parameter 345 for _, param := range style.GlobalParameters { 346 // was a parameter passed with this name? 347 gp_value, gp_ok := options.Parameters[param] 348 if gp_ok { 349 gp_var := style.Variables[param] 350 // replace value of style.Variables[key] 351 gp_var.Value = gp_value 352 // fmt.Println("Existing", param, "set to", gp_value) 353 } 354 } 355 356 // process nodes 357 style.processNode(start, context, nil) 358 359 out, err = style.constructOutput(output, options) 360 // reset anything required for re-use 361 return 362 } 363 364 func (style *Stylesheet) constructXmlDeclaration() (out string) { 365 out = "<?xml version=\"1.0\"" 366 if style.DesiredEncoding != "" { 367 out = out + fmt.Sprintf(" encoding=\"%s\"", style.DesiredEncoding) 368 } 369 if style.Standalone { 370 out = out + " standalone=\"yes\"" 371 } 372 out = out + "?>\n" 373 return 374 } 375 376 // actually produce (and possibly write) the final output 377 func (style *Stylesheet) constructOutput(output *xml.XmlDocument, options StylesheetOptions) (out string, err error) { 378 //if not explicitly set, spec requires us to check for html 379 outputType := style.OutputMethod 380 if outputType == "" { 381 outputType = "xml" 382 root := output.Root() 383 if root != nil && root.Name() == "html" && root.Namespace() == "" { 384 outputType = "html" 385 } 386 } 387 388 // construct DTD declaration depending on xsl:output settings 389 docType := "" 390 if style.doctypeSystem != "" { 391 docType = "<!DOCTYPE " 392 docType = docType + output.Root().Name() 393 if style.doctypePublic != "" { 394 docType = docType + fmt.Sprintf(" PUBLIC \"%s\"", style.doctypePublic) 395 } else { 396 docType = docType + " SYSTEM" 397 } 398 docType = docType + fmt.Sprintf(" \"%s\"", style.doctypeSystem) 399 docType = docType + ">\n" 400 } 401 402 // create the XML declaration depending on xsl:output settings 403 decl := "" 404 if outputType == "xml" { 405 if !style.OmitXmlDeclaration { 406 decl = style.constructXmlDeclaration() 407 } 408 format := xml.XML_SAVE_NO_DECL | xml.XML_SAVE_AS_XML 409 if options.IndentOutput || style.IndentOutput { 410 format = format | xml.XML_SAVE_FORMAT 411 } 412 // we get slightly incorrect output if we call out.SerializeWithFormat directly 413 // this seems to be a libxml bug; we work around it the same way libxslt does 414 415 //TODO: honor desired encoding 416 // this involves decisions about supported encodings, strings vs byte slices 417 // we can sidestep a little if we enable option to write directly to file 418 for cur := output.FirstChild(); cur != nil; cur = cur.NextSibling() { 419 b, size := cur.SerializeWithFormat(format, nil, nil) 420 if b != nil { 421 out = out + string(b[:size]) 422 } 423 } 424 if out != "" { 425 out = decl + docType + out + "\n" 426 } 427 } 428 if outputType == "html" { 429 out = docType 430 b, size := output.ToHtml(nil, nil) 431 out = out + string(b[:size]) 432 } 433 if outputType == "text" { 434 format := xml.XML_SAVE_NO_DECL 435 for cur := output.FirstChild(); cur != nil; cur = cur.NextSibling() { 436 b, size := cur.SerializeWithFormat(format, nil, nil) 437 if b != nil { 438 out = out + string(b[:size]) 439 } 440 } 441 } 442 return 443 } 444 445 // Determine which template, if any, matches the current node. 446 447 // If there is no matching template, nil is returned. 448 func (style *Stylesheet) LookupTemplate(node xml.Node, mode string, context *ExecutionContext) (template *Template) { 449 name := node.Name() 450 if node.NodeType() == xml.XML_DOCUMENT_NODE { 451 name = "/" 452 } 453 found := new(list.List) 454 l := style.ElementMatches[name] 455 if l != nil { 456 for i := l.Front(); i != nil; i = i.Next() { 457 c := i.Value.(*CompiledMatch) 458 if c.EvalMatch(node, mode, context) { 459 insertByPriority(found, c) 460 break 461 } 462 } 463 } 464 l = style.ElementMatches["*"] 465 if l != nil { 466 for i := l.Front(); i != nil; i = i.Next() { 467 c := i.Value.(*CompiledMatch) 468 if c.EvalMatch(node, mode, context) { 469 insertByPriority(found, c) 470 break 471 } 472 } 473 } 474 l = style.AttrMatches[name] 475 if l != nil { 476 for i := l.Front(); i != nil; i = i.Next() { 477 c := i.Value.(*CompiledMatch) 478 if c.EvalMatch(node, mode, context) { 479 insertByPriority(found, c) 480 break 481 } 482 } 483 } 484 l = style.AttrMatches["*"] 485 if l != nil { 486 for i := l.Front(); i != nil; i = i.Next() { 487 c := i.Value.(*CompiledMatch) 488 if c.EvalMatch(node, mode, context) { 489 insertByPriority(found, c) 490 break 491 } 492 } 493 } 494 //TODO: review order in which we consult generic matches 495 for i := style.IdKeyMatches.Front(); i != nil; i = i.Next() { 496 c := i.Value.(*CompiledMatch) 497 if c.EvalMatch(node, mode, context) { 498 insertByPriority(found, c) 499 break 500 } 501 } 502 for i := style.NodeMatches.Front(); i != nil; i = i.Next() { 503 c := i.Value.(*CompiledMatch) 504 if c.EvalMatch(node, mode, context) { 505 insertByPriority(found, c) 506 break 507 } 508 } 509 for i := style.TextMatches.Front(); i != nil; i = i.Next() { 510 c := i.Value.(*CompiledMatch) 511 if c.EvalMatch(node, mode, context) { 512 insertByPriority(found, c) 513 break 514 } 515 } 516 for i := style.PIMatches.Front(); i != nil; i = i.Next() { 517 c := i.Value.(*CompiledMatch) 518 if c.EvalMatch(node, mode, context) { 519 insertByPriority(found, c) 520 break 521 } 522 } 523 for i := style.CommentMatches.Front(); i != nil; i = i.Next() { 524 c := i.Value.(*CompiledMatch) 525 if c.EvalMatch(node, mode, context) { 526 insertByPriority(found, c) 527 break 528 } 529 } 530 531 // if there's a match at this import precedence, return 532 // the one with the highest priority 533 f := found.Front() 534 if f != nil { 535 template = f.Value.(*CompiledMatch).Template 536 return 537 } 538 539 // no match at this import precedence, 540 //consult the imported stylesheets 541 for i := style.Imports.Front(); i != nil; i = i.Next() { 542 s := i.Value.(*Stylesheet) 543 t := s.LookupTemplate(node, mode, context) 544 if t != nil { 545 return t 546 } 547 } 548 return 549 } 550 551 func (style *Stylesheet) RegisterAttributeSet(node xml.Node) { 552 name := node.Attr("name") 553 res := CompileSingleNode(node) 554 res.Compile(node) 555 style.AttributeSets[name] = res 556 } 557 558 func (style *Stylesheet) RegisterGlobalVariable(node xml.Node) { 559 name := node.Attr("name") 560 _var := CompileSingleNode(node).(*Variable) 561 _var.Compile(node) 562 style.Variables[name] = _var 563 } 564 565 func (style *Stylesheet) processDefaultRule(node xml.Node, context *ExecutionContext) { 566 //default for DOCUMENT, ELEMENT 567 children := context.ChildrenOf(node) 568 total := len(children) 569 for i, cur := range children { 570 context.XPathContext.SetContextPosition(i+1, total) 571 style.processNode(cur, context, nil) 572 } 573 //default for CDATA, TEXT, ATTR is copy as text 574 if node.NodeType() == xml.XML_TEXT_NODE { 575 if context.ShouldStrip(node) { 576 return 577 } 578 if context.UseCDataSection(context.OutputNode) { 579 r := context.Output.CreateCDataNode(node.Content()) 580 context.OutputNode.AddChild(r) 581 } else { 582 r := context.Output.CreateTextNode(node.Content()) 583 context.OutputNode.AddChild(r) 584 } 585 } 586 //default for namespace declaration is copy to output document 587 } 588 589 func (style *Stylesheet) processNode(node xml.Node, context *ExecutionContext, params []*Variable) { 590 //get template 591 template := style.LookupTemplate(node, context.Mode, context) 592 // for each import scope 593 // get the list of applicable templates for this mode 594 // (assume compilation ordered appropriately) 595 // eval each one until we get a match 596 // eval generic templates that might apply until we get a match 597 //apply default rule if null template 598 if template == nil { 599 style.processDefaultRule(node, context) 600 return 601 } 602 //apply template to current node 603 template.Apply(node, context, params) 604 } 605 606 func (style *Stylesheet) populateKeys(node xml.Node, context *ExecutionContext) { 607 for _, key := range style.Keys { 608 //see if the current node matches 609 matches := CompileMatch(key.match, nil) 610 hasMatch := false 611 for _, m := range matches { 612 if m.EvalMatch(node, "", context) { 613 hasMatch = true 614 break 615 } 616 } 617 if !hasMatch { 618 continue 619 } 620 lookupkey, _ := node.EvalXPath(key.use, context) 621 lookup := "" 622 switch lk := lookupkey.(type) { 623 case []xml.Node: 624 if len(lk) == 0 { 625 continue 626 } 627 lookup = lk[0].String() 628 case string: 629 lookup = lk 630 default: 631 lookup = fmt.Sprintf("%v", lk) 632 } 633 key.nodes[lookup] = append(key.nodes[lookup], node) 634 } 635 children := context.ChildrenOf(node) 636 for _, cur := range children { 637 style.populateKeys(cur, context) 638 } 639 } 640 641 // ParseTemplate parses and compiles the xsl:template elements. 642 func (style *Stylesheet) ParseTemplate(node xml.Node) { 643 //add to template list of stylesheet 644 //parse mode, match, name, priority 645 mode := node.Attr("mode") 646 name := node.Attr("name") 647 match := node.Attr("match") 648 priority := node.Attr("priority") 649 p := 0.0 650 if priority != "" { 651 p, _ = strconv.ParseFloat(priority, 64) 652 } 653 654 // TODO: validate the name (duplicate should raise error) 655 template := &Template{Match: match, Mode: mode, Name: name, Priority: p, Node: node} 656 657 template.CompileContent(node) 658 659 // compile pattern 660 style.compilePattern(template, priority) 661 } 662 663 func (style *Stylesheet) compilePattern(template *Template, priority string) { 664 if template.Name != "" { 665 style.NamedTemplates[template.Name] = template 666 } 667 668 if template.Match == "" { 669 return 670 } 671 672 matches := CompileMatch(template.Match, template) 673 for _, c := range matches { 674 // calculate priority if not explicitly set 675 if priority == "" { 676 template.Priority = c.DefaultPriority() 677 //fmt.Println("COMPILED", template.Match, c.Steps[0].Value, c.Steps[0].Op, template.Priority) 678 } 679 // insert into 'best' collection 680 if c.IsElement() { 681 hash := c.Hash() 682 l := style.ElementMatches[hash] 683 if l == nil { 684 l = list.New() 685 style.ElementMatches[hash] = l 686 } 687 insertByPriority(l, c) 688 } 689 if c.IsAttr() { 690 hash := c.Hash() 691 l := style.AttrMatches[hash] 692 if l == nil { 693 l = list.New() 694 style.AttrMatches[hash] = l 695 } 696 insertByPriority(l, c) 697 } 698 if c.IsIdKey() { 699 insertByPriority(style.IdKeyMatches, c) 700 } 701 if c.IsText() { 702 insertByPriority(style.TextMatches, c) 703 } 704 if c.IsComment() { 705 insertByPriority(style.CommentMatches, c) 706 } 707 if c.IsPI() { 708 insertByPriority(style.PIMatches, c) 709 } 710 if c.IsNode() { 711 insertByPriority(style.NodeMatches, c) 712 } 713 } 714 } 715 716 func insertByPriority(l *list.List, match *CompiledMatch) { 717 for i := l.Front(); i != nil; i = i.Next() { 718 cur := i.Value.(*CompiledMatch) 719 if cur.Template.Priority <= match.Template.Priority { 720 l.InsertBefore(match, i) 721 return 722 } 723 } 724 //either list is empty, or we're lowest priority template 725 l.PushBack(match) 726 } 727 728 // Locate an attribute set by name 729 func (style *Stylesheet) LookupAttributeSet(name string) CompiledStep { 730 attset, ok := style.AttributeSets[name] 731 if ok { 732 return attset 733 } 734 for i := style.Imports.Front(); i != nil; i = i.Next() { 735 s := i.Value.(*Stylesheet) 736 t := s.LookupAttributeSet(name) 737 if t != nil { 738 return t 739 } 740 } 741 return nil 742 }