github.com/MontFerret/ferret@v0.18.0/pkg/drivers/http/xpath.go (about) 1 package http 2 3 import ( 4 "github.com/PuerkitoBio/goquery" 5 "github.com/antchfx/htmlquery" 6 "github.com/antchfx/xpath" 7 "golang.org/x/net/html" 8 9 "github.com/MontFerret/ferret/pkg/drivers" 10 "github.com/MontFerret/ferret/pkg/runtime/core" 11 "github.com/MontFerret/ferret/pkg/runtime/values" 12 ) 13 14 func EvalXPathToNode(selection *goquery.Selection, expression string) (drivers.HTMLNode, error) { 15 node := htmlquery.FindOne(fromSelectionToNode(selection), expression) 16 17 if node == nil { 18 return nil, nil 19 } 20 21 return parseXPathNode(node) 22 } 23 24 func EvalXPathToElement(selection *goquery.Selection, expression string) (drivers.HTMLElement, error) { 25 node, err := EvalXPathToNode(selection, expression) 26 27 if err != nil { 28 return nil, err 29 } 30 31 if node == nil { 32 return nil, nil 33 } 34 35 return drivers.ToElement(node) 36 } 37 38 func EvalXPathToNodes(selection *goquery.Selection, expression string) (*values.Array, error) { 39 return EvalXPathToNodesWith(selection, expression, func(node *html.Node) (core.Value, error) { 40 return parseXPathNode(node) 41 }) 42 } 43 44 func EvalXPathToNodesWith(selection *goquery.Selection, expression string, mapper func(node *html.Node) (core.Value, error)) (*values.Array, error) { 45 out, err := evalXPathToInternal(selection, expression) 46 47 if err != nil { 48 return nil, err 49 } 50 51 switch res := out.(type) { 52 case *xpath.NodeIterator: 53 items := values.NewArray(10) 54 55 for res.MoveNext() { 56 item, err := mapper(res.Current().(*htmlquery.NodeNavigator).Current()) 57 58 if err != nil { 59 return nil, err 60 } 61 62 if item != nil { 63 items.Push(item) 64 } 65 } 66 67 return items, nil 68 default: 69 return values.EmptyArray(), nil 70 } 71 } 72 73 func EvalXPathTo(selection *goquery.Selection, expression string) (core.Value, error) { 74 out, err := evalXPathToInternal(selection, expression) 75 76 if err != nil { 77 return nil, err 78 } 79 80 switch res := out.(type) { 81 case *xpath.NodeIterator: 82 items := values.NewArray(10) 83 84 for res.MoveNext() { 85 var item core.Value 86 87 node := res.Current() 88 89 switch node.NodeType() { 90 case xpath.TextNode: 91 item = values.NewString(node.Value()) 92 case xpath.AttributeNode: 93 item = values.NewString(node.Value()) 94 default: 95 i, err := parseXPathNode(node.(*htmlquery.NodeNavigator).Current()) 96 97 if err != nil { 98 return nil, err 99 } 100 101 item = i 102 } 103 104 if item != nil { 105 items.Push(item) 106 } 107 } 108 109 return items, nil 110 default: 111 return values.Parse(res), nil 112 } 113 } 114 115 func evalXPathToInternal(selection *goquery.Selection, expression string) (interface{}, error) { 116 exp, err := xpath.Compile(expression) 117 118 if err != nil { 119 return nil, err 120 } 121 122 return exp.Evaluate(htmlquery.CreateXPathNavigator(fromSelectionToNode(selection))), nil 123 } 124 125 func parseXPathNode(node *html.Node) (drivers.HTMLNode, error) { 126 if node == nil { 127 return nil, nil 128 } 129 130 switch node.Type { 131 case html.DocumentNode: 132 url := htmlquery.SelectAttr(node, "url") 133 return NewHTMLDocument(goquery.NewDocumentFromNode(node), url, nil) 134 case html.ElementNode: 135 return NewHTMLElement(&goquery.Selection{Nodes: []*html.Node{node}}) 136 default: 137 return nil, nil 138 } 139 }