github.com/sohaha/zlsgo@v1.7.13-0.20240501141223-10dd1a906f76/zhttp/query.go (about) 1 package zhttp 2 3 import ( 4 "bytes" 5 6 "golang.org/x/net/html" 7 ) 8 9 type ( 10 QueryHTML struct { 11 node *html.Node 12 filter []*html.Node 13 } 14 Els []QueryHTML 15 ) 16 17 func HTMLParse(HTML []byte) (doc QueryHTML, err error) { 18 var n *html.Node 19 n, err = html.Parse(bytes.NewReader(HTML)) 20 if err != nil { 21 return 22 } 23 for n.Type != html.ElementNode { 24 switch n.Type { 25 case html.DocumentNode: 26 n = n.FirstChild 27 case html.DoctypeNode: 28 n = n.NextSibling 29 case html.CommentNode: 30 n = n.NextSibling 31 } 32 } 33 doc = QueryHTML{node: n} 34 return 35 } 36 37 func (r *QueryHTML) getNode() *html.Node { 38 if r.node == nil { 39 r.node = &html.Node{} 40 } 41 return r.node 42 } 43 44 func (r QueryHTML) SelectChild(el string, args ...map[string]string) QueryHTML { 45 var ( 46 node *html.Node 47 exist bool 48 ) 49 forChild(r.getNode(), func(n *html.Node) bool { 50 elArr := matchEl(n, el, arr2Attr(args)) 51 exist = elArr != nil 52 if exist { 53 node = elArr 54 return false 55 } 56 return true 57 }) 58 if !exist { 59 return QueryHTML{node: &html.Node{}} 60 } 61 return QueryHTML{node: node} 62 } 63 64 func (r QueryHTML) SelectAllChild(el string, args ...map[string]string) (arr Els) { 65 forChild(r.getNode(), func(n *html.Node) bool { 66 elArr := matchEl(n, el, arr2Attr(args)) 67 exist := elArr != nil 68 if exist { 69 arr = append(arr, QueryHTML{node: elArr}) 70 } 71 return true 72 }) 73 return 74 } 75 76 // Deprecated: please use SelectAllChild("") 77 // Child All child elements 78 func (r QueryHTML) Child() (childs []QueryHTML) { 79 r.ForEachChild(func(index int, child QueryHTML) bool { 80 childs = append(childs, child) 81 return true 82 }) 83 return 84 } 85 86 func (r QueryHTML) ForEachChild(f func(index int, child QueryHTML) bool) { 87 i := -1 88 forChild(r.getNode(), func(n *html.Node) bool { 89 i++ 90 return f(i, QueryHTML{node: n}) 91 }) 92 } 93 94 func (r QueryHTML) NthChild(index int) QueryHTML { 95 i := 0 96 doc := QueryHTML{} 97 forChild(r.getNode(), func(n *html.Node) bool { 98 i++ 99 if i == index { 100 doc.node = n 101 return false 102 } 103 return true 104 }) 105 return doc 106 } 107 108 func (r QueryHTML) Select(el string, args ...map[string]string) QueryHTML { 109 n := findChild(r.getNode(), el, args, false) 110 if len(n) == 0 { 111 return QueryHTML{node: &html.Node{}} 112 } 113 return QueryHTML{node: n[0]} 114 } 115 116 func (r QueryHTML) SelectAll(el string, args ...map[string]string) (arr Els) { 117 n := findChild(r.getNode(), el, args, true) 118 l := len(n) 119 if l == 0 { 120 return 121 } 122 arr = make([]QueryHTML, l) 123 for i := range n { 124 arr[i] = QueryHTML{node: n[i]} 125 } 126 return arr 127 } 128 129 func (r QueryHTML) SelectBrother(el string, args ...map[string]string) QueryHTML { 130 parent := r.SelectParent("") 131 child := parent.SelectAllChild(el, args...) 132 index := 0 133 brother := QueryHTML{} 134 for i := range child { 135 q := child[i] 136 if q.node == r.node { 137 index = i + 1 138 if len(child) > index { 139 brother = child[index] 140 } 141 break 142 } 143 } 144 return brother 145 } 146 147 func (r QueryHTML) SelectParent(el string, args ...map[string]string) QueryHTML { 148 n := r.getNode() 149 attr := arr2Attr(args) 150 for { 151 n = n.Parent 152 if n == nil { 153 break 154 } 155 p := matchEl(n, el, attr) 156 if p != nil { 157 return QueryHTML{node: p} 158 } 159 } 160 161 return QueryHTML{node: &html.Node{}} 162 } 163 164 func (r QueryHTML) Find(text string) QueryHTML { 165 level := parseSelector(text) 166 if len(level) == 0 { 167 return QueryHTML{node: &html.Node{}} 168 } 169 n := r 170 for i := range level { 171 l := level[i] 172 if l.Child { 173 n = n.SelectChild(l.Name, l.Attr) 174 } else if l.Brother { 175 n = n.SelectBrother(l.Name, l.Attr) 176 } else { 177 n = n.Select(l.Name, l.Attr) 178 } 179 if !n.Exist() { 180 return QueryHTML{node: &html.Node{}} 181 } 182 } 183 return n 184 } 185 186 func (r QueryHTML) Filter(el ...QueryHTML) QueryHTML { 187 for i := range el { 188 r.filter = append(r.filter, el[i].node) 189 } 190 return r 191 } 192 193 func parseSelector(text string) []*selector { 194 var ( 195 ss []*selector 196 s *selector 197 ) 198 key, l := "", len(text) 199 if l > 0 { 200 s = &selector{i: 0, Attr: make(map[string]string)} 201 for i := 0; i < l; { 202 v := text[i] 203 add := 0 204 switch v { 205 case '#': 206 s.appendAttr(key, text, i) 207 key = "id" 208 case '.': 209 s.appendAttr(key, text, i) 210 key = "class" 211 case ' ', '>', '~': 212 s.appendAttr(key, text, i) 213 if s.Name != "" || len(s.Attr) != 0 { 214 ss = append(ss, s) 215 s = &selector{i: i + 1, Attr: make(map[string]string)} 216 key = "" 217 } 218 if v == '>' { 219 s.Child = true 220 } else if v == '~' { 221 s.Brother = true 222 } 223 } 224 i = i + 1 + add 225 } 226 } 227 228 if s != nil { 229 s.appendAttr(key, text, l) 230 ss = append(ss, s) 231 } 232 return ss 233 }