github.com/sohaha/zlsgo@v1.7.13-0.20240501141223-10dd1a906f76/zhttp/query.go (about)

     1  package zhttp
     2  
     3  import (
     4  	"bytes"
     5  
     6  	"golang.org/x/net/html"
     7  )
     8  
     9  type (
    10  	QueryHTML struct {
    11  		node   *html.Node
    12  		filter []*html.Node
    13  	}
    14  	Els []QueryHTML
    15  )
    16  
    17  func HTMLParse(HTML []byte) (doc QueryHTML, err error) {
    18  	var n *html.Node
    19  	n, err = html.Parse(bytes.NewReader(HTML))
    20  	if err != nil {
    21  		return
    22  	}
    23  	for n.Type != html.ElementNode {
    24  		switch n.Type {
    25  		case html.DocumentNode:
    26  			n = n.FirstChild
    27  		case html.DoctypeNode:
    28  			n = n.NextSibling
    29  		case html.CommentNode:
    30  			n = n.NextSibling
    31  		}
    32  	}
    33  	doc = QueryHTML{node: n}
    34  	return
    35  }
    36  
    37  func (r *QueryHTML) getNode() *html.Node {
    38  	if r.node == nil {
    39  		r.node = &html.Node{}
    40  	}
    41  	return r.node
    42  }
    43  
    44  func (r QueryHTML) SelectChild(el string, args ...map[string]string) QueryHTML {
    45  	var (
    46  		node  *html.Node
    47  		exist bool
    48  	)
    49  	forChild(r.getNode(), func(n *html.Node) bool {
    50  		elArr := matchEl(n, el, arr2Attr(args))
    51  		exist = elArr != nil
    52  		if exist {
    53  			node = elArr
    54  			return false
    55  		}
    56  		return true
    57  	})
    58  	if !exist {
    59  		return QueryHTML{node: &html.Node{}}
    60  	}
    61  	return QueryHTML{node: node}
    62  }
    63  
    64  func (r QueryHTML) SelectAllChild(el string, args ...map[string]string) (arr Els) {
    65  	forChild(r.getNode(), func(n *html.Node) bool {
    66  		elArr := matchEl(n, el, arr2Attr(args))
    67  		exist := elArr != nil
    68  		if exist {
    69  			arr = append(arr, QueryHTML{node: elArr})
    70  		}
    71  		return true
    72  	})
    73  	return
    74  }
    75  
    76  // Deprecated: please use SelectAllChild("")
    77  // Child All child elements
    78  func (r QueryHTML) Child() (childs []QueryHTML) {
    79  	r.ForEachChild(func(index int, child QueryHTML) bool {
    80  		childs = append(childs, child)
    81  		return true
    82  	})
    83  	return
    84  }
    85  
    86  func (r QueryHTML) ForEachChild(f func(index int, child QueryHTML) bool) {
    87  	i := -1
    88  	forChild(r.getNode(), func(n *html.Node) bool {
    89  		i++
    90  		return f(i, QueryHTML{node: n})
    91  	})
    92  }
    93  
    94  func (r QueryHTML) NthChild(index int) QueryHTML {
    95  	i := 0
    96  	doc := QueryHTML{}
    97  	forChild(r.getNode(), func(n *html.Node) bool {
    98  		i++
    99  		if i == index {
   100  			doc.node = n
   101  			return false
   102  		}
   103  		return true
   104  	})
   105  	return doc
   106  }
   107  
   108  func (r QueryHTML) Select(el string, args ...map[string]string) QueryHTML {
   109  	n := findChild(r.getNode(), el, args, false)
   110  	if len(n) == 0 {
   111  		return QueryHTML{node: &html.Node{}}
   112  	}
   113  	return QueryHTML{node: n[0]}
   114  }
   115  
   116  func (r QueryHTML) SelectAll(el string, args ...map[string]string) (arr Els) {
   117  	n := findChild(r.getNode(), el, args, true)
   118  	l := len(n)
   119  	if l == 0 {
   120  		return
   121  	}
   122  	arr = make([]QueryHTML, l)
   123  	for i := range n {
   124  		arr[i] = QueryHTML{node: n[i]}
   125  	}
   126  	return arr
   127  }
   128  
   129  func (r QueryHTML) SelectBrother(el string, args ...map[string]string) QueryHTML {
   130  	parent := r.SelectParent("")
   131  	child := parent.SelectAllChild(el, args...)
   132  	index := 0
   133  	brother := QueryHTML{}
   134  	for i := range child {
   135  		q := child[i]
   136  		if q.node == r.node {
   137  			index = i + 1
   138  			if len(child) > index {
   139  				brother = child[index]
   140  			}
   141  			break
   142  		}
   143  	}
   144  	return brother
   145  }
   146  
   147  func (r QueryHTML) SelectParent(el string, args ...map[string]string) QueryHTML {
   148  	n := r.getNode()
   149  	attr := arr2Attr(args)
   150  	for {
   151  		n = n.Parent
   152  		if n == nil {
   153  			break
   154  		}
   155  		p := matchEl(n, el, attr)
   156  		if p != nil {
   157  			return QueryHTML{node: p}
   158  		}
   159  	}
   160  
   161  	return QueryHTML{node: &html.Node{}}
   162  }
   163  
   164  func (r QueryHTML) Find(text string) QueryHTML {
   165  	level := parseSelector(text)
   166  	if len(level) == 0 {
   167  		return QueryHTML{node: &html.Node{}}
   168  	}
   169  	n := r
   170  	for i := range level {
   171  		l := level[i]
   172  		if l.Child {
   173  			n = n.SelectChild(l.Name, l.Attr)
   174  		} else if l.Brother {
   175  			n = n.SelectBrother(l.Name, l.Attr)
   176  		} else {
   177  			n = n.Select(l.Name, l.Attr)
   178  		}
   179  		if !n.Exist() {
   180  			return QueryHTML{node: &html.Node{}}
   181  		}
   182  	}
   183  	return n
   184  }
   185  
   186  func (r QueryHTML) Filter(el ...QueryHTML) QueryHTML {
   187  	for i := range el {
   188  		r.filter = append(r.filter, el[i].node)
   189  	}
   190  	return r
   191  }
   192  
   193  func parseSelector(text string) []*selector {
   194  	var (
   195  		ss []*selector
   196  		s  *selector
   197  	)
   198  	key, l := "", len(text)
   199  	if l > 0 {
   200  		s = &selector{i: 0, Attr: make(map[string]string)}
   201  		for i := 0; i < l; {
   202  			v := text[i]
   203  			add := 0
   204  			switch v {
   205  			case '#':
   206  				s.appendAttr(key, text, i)
   207  				key = "id"
   208  			case '.':
   209  				s.appendAttr(key, text, i)
   210  				key = "class"
   211  			case ' ', '>', '~':
   212  				s.appendAttr(key, text, i)
   213  				if s.Name != "" || len(s.Attr) != 0 {
   214  					ss = append(ss, s)
   215  					s = &selector{i: i + 1, Attr: make(map[string]string)}
   216  					key = ""
   217  				}
   218  				if v == '>' {
   219  					s.Child = true
   220  				} else if v == '~' {
   221  					s.Brother = true
   222  				}
   223  			}
   224  			i = i + 1 + add
   225  		}
   226  	}
   227  
   228  	if s != nil {
   229  		s.appendAttr(key, text, l)
   230  		ss = append(ss, s)
   231  	}
   232  	return ss
   233  }