github.com/avicd/go-utilx@v0.1.0/xmlx/xml.go (about)

     1  package xmlx
     2  
     3  import (
     4  	"bufio"
     5  	"encoding/xml"
     6  	"github.com/avicd/go-utilx/logx"
     7  	"golang.org/x/net/html/charset"
     8  	"io"
     9  	"strings"
    10  	"unicode"
    11  )
    12  
    13  const cdataOpen = "<![CDATA["
    14  const xmlnsPrefix = "xmlns"
    15  const xpathAttr = "__Attr__"
    16  
    17  type xmlStack struct {
    18  	*Node
    19  	prefix map[string]string
    20  	ns     map[string]string
    21  	next   *xmlStack
    22  	prev   *xmlStack
    23  }
    24  
    25  func (stack *xmlStack) pushNext(node *Node, attrs []xml.Attr) *xmlStack {
    26  	next := &xmlStack{
    27  		Node:   node,
    28  		prefix: map[string]string{},
    29  		ns:     map[string]string{},
    30  	}
    31  	node.Attrs = []*Node{}
    32  	for i, attr := range attrs {
    33  		attrNs := attr.Name.Space
    34  		// namespace with prefix
    35  		if attr.Name.Space == xmlnsPrefix ||
    36  			// default namespace
    37  			attr.Name.Space == "" && attr.Name.Local == xmlnsPrefix {
    38  			attrNs = ""
    39  			if attr.Name.Space != "" {
    40  				next.prefix[attr.Value] = attr.Name.Local
    41  			}
    42  			next.ns[attr.Name.Local] = attr.Value
    43  		}
    44  		var attrPrefix string
    45  		if stack != nil {
    46  			attrPrefix = stack.getPrefix(attrNs)
    47  		}
    48  		newAttr := &Node{
    49  			Type:         AttributeNode,
    50  			ParentNode:   node,
    51  			Name:         attr.Name.Local,
    52  			Value:        attr.Value,
    53  			NamespaceURI: attrNs,
    54  			Prefix:       attrPrefix,
    55  		}
    56  		if i > 0 {
    57  			newAttr.PrevSibling = node.Attrs[i-1]
    58  			newAttr.PrevSibling.NextSibling = newAttr
    59  		}
    60  		node.Attrs = append(node.Attrs, newAttr)
    61  	}
    62  	next.prev = stack
    63  	if stack != nil {
    64  		stack.next = next
    65  	}
    66  	return next
    67  }
    68  
    69  func (stack *xmlStack) popLast() *xmlStack {
    70  	if stack != nil {
    71  		prev := stack.prev
    72  		prev.next = nil
    73  		return prev
    74  	}
    75  	return nil
    76  }
    77  
    78  func (stack *xmlStack) getPrefix(ns string) string {
    79  	for p := stack; p != nil; p = p.prev {
    80  		if prefix, ok := p.prefix[ns]; ok {
    81  			return prefix
    82  		}
    83  	}
    84  	return ""
    85  }
    86  
    87  func (stack *xmlStack) getNs(prefix string) string {
    88  	for p := stack; p != nil; p = p.prev {
    89  		if ns, ok := p.ns[prefix]; ok {
    90  			return ns
    91  		}
    92  	}
    93  	return ""
    94  }
    95  
    96  func (stack *xmlStack) parseStrAttr(text string) []*Node {
    97  	buf := strings.TrimSpace(text)
    98  	var attrs []*Node
    99  	for len(buf) > 0 {
   100  		cut := strings.IndexFunc(buf, func(r rune) bool {
   101  			return !unicode.IsSpace(r)
   102  		})
   103  		if cut > -1 {
   104  			buf = buf[cut:]
   105  		}
   106  		var key string
   107  		var value string
   108  		cut = strings.Index(buf, "=")
   109  		if cut > -1 {
   110  			key = buf[0:cut]
   111  			buf = buf[cut+1:]
   112  			ptoken := buf[0:1]
   113  			buf = buf[1:]
   114  			cut = strings.Index(buf, ptoken)
   115  			value = buf[0:cut]
   116  			buf = buf[cut+1:]
   117  		} else {
   118  			cut = strings.IndexFunc(buf, unicode.IsSpace)
   119  			if cut > -1 {
   120  				key = buf[0:cut]
   121  				buf = buf[cut+1:]
   122  			} else {
   123  				key = buf
   124  				buf = ""
   125  			}
   126  		}
   127  		if key != "" {
   128  			attr := &Node{Type: AttributeNode, Value: value}
   129  			if cut = strings.Index(key, ":"); cut > -1 {
   130  				attr.Name = key[cut+1 : 0]
   131  				attr.Prefix = key[:cut]
   132  				attr.NamespaceURI = stack.getNs(attr.Prefix)
   133  			} else {
   134  				attr.Name = key
   135  			}
   136  			attrs = append(attrs, attr)
   137  		}
   138  	}
   139  	return attrs
   140  }
   141  
   142  type xmlParser struct {
   143  	*bufio.Reader
   144  	ci      int
   145  	isCData bool
   146  }
   147  
   148  func newXmlParser(reader io.Reader) *xmlParser {
   149  	return &xmlParser{Reader: bufio.NewReader(reader)}
   150  }
   151  
   152  func Parse(reader io.Reader) (*Node, error) {
   153  	return newXmlParser(reader).parse()
   154  }
   155  
   156  func (parser *xmlParser) ReadByte() (byte, error) {
   157  	bt, err := parser.Reader.ReadByte()
   158  	if err == nil {
   159  		if !parser.isCData && bt == cdataOpen[parser.ci] {
   160  			parser.ci++
   161  			if parser.ci == len(cdataOpen) {
   162  				parser.isCData = true
   163  			}
   164  		} else {
   165  			parser.ci = 0
   166  		}
   167  	}
   168  	return bt, err
   169  }
   170  
   171  func (parser *xmlParser) decoder() *xml.Decoder {
   172  	decoder := xml.NewDecoder(parser)
   173  	decoder.CharsetReader = charset.NewReaderLabel
   174  	return decoder
   175  }
   176  
   177  func (parser *xmlParser) parse() (*Node, error) {
   178  	root := &Node{
   179  		Type: DocumentNode,
   180  		Name: "document",
   181  	}
   182  	var current *xmlStack
   183  	current = current.pushNext(root, nil)
   184  	decoder := parser.decoder()
   185  	for {
   186  		parser.isCData = false
   187  		xtk, err := decoder.Token()
   188  		if err == io.EOF {
   189  			break
   190  		} else if err != nil {
   191  			logx.Error(err.Error())
   192  			return nil, err
   193  		}
   194  		var node *Node
   195  		parent := current
   196  		switch el := xtk.(type) {
   197  		case xml.StartElement:
   198  			node = &Node{
   199  				Type:         ElementNode,
   200  				Name:         el.Name.Local,
   201  				NamespaceURI: el.Name.Space,
   202  				Prefix:       parent.getPrefix(el.Name.Space),
   203  			}
   204  			current = current.pushNext(node, el.Attr)
   205  		case xml.EndElement:
   206  			current = current.popLast()
   207  		case xml.CharData:
   208  			text := string(el)
   209  			node = &Node{
   210  				Type:  TextNode,
   211  				Name:  "text",
   212  				Value: text,
   213  			}
   214  			if parser.isCData {
   215  				node.Type = CDataSectionNode
   216  				node.Name = ""
   217  			}
   218  		case xml.Comment:
   219  			text := string(el)
   220  			node = &Node{Type: CommentNode, Name: "comment", Value: text}
   221  		case xml.ProcInst:
   222  			node = &Node{Type: ProcessingInstructionNode, Name: el.Target, Value: string(el.Inst)}
   223  			node.Attrs = parent.parseStrAttr(string(el.Inst))
   224  		case xml.Directive:
   225  			text := string(el)
   226  			node = &Node{Type: DirectiveNode}
   227  			cut := strings.IndexFunc(text, unicode.IsSpace)
   228  			if cut > -1 {
   229  				node.Name = text[0:cut]
   230  				node.Value = strings.TrimSpace(text[cut:])
   231  			} else {
   232  				node.Name = text
   233  				node.Value = ""
   234  			}
   235  			if strings.ToUpper(node.Name) == "DOCTYPE" {
   236  				node.Type = DocumentTypeNode
   237  				cut = strings.IndexFunc(node.Value, unicode.IsSpace)
   238  				if cut > -1 {
   239  					node.Name = node.Value[0:cut]
   240  					node.Value = node.Value[cut:]
   241  				} else {
   242  					node.Name = node.Value
   243  					node.Value = ""
   244  				}
   245  			}
   246  		}
   247  		parent.Node.AppendChild(node)
   248  	}
   249  	return root, nil
   250  }