github.com/MontFerret/ferret@v0.18.0/pkg/drivers/http/xpath.go (about)

     1  package http
     2  
     3  import (
     4  	"github.com/PuerkitoBio/goquery"
     5  	"github.com/antchfx/htmlquery"
     6  	"github.com/antchfx/xpath"
     7  	"golang.org/x/net/html"
     8  
     9  	"github.com/MontFerret/ferret/pkg/drivers"
    10  	"github.com/MontFerret/ferret/pkg/runtime/core"
    11  	"github.com/MontFerret/ferret/pkg/runtime/values"
    12  )
    13  
    14  func EvalXPathToNode(selection *goquery.Selection, expression string) (drivers.HTMLNode, error) {
    15  	node := htmlquery.FindOne(fromSelectionToNode(selection), expression)
    16  
    17  	if node == nil {
    18  		return nil, nil
    19  	}
    20  
    21  	return parseXPathNode(node)
    22  }
    23  
    24  func EvalXPathToElement(selection *goquery.Selection, expression string) (drivers.HTMLElement, error) {
    25  	node, err := EvalXPathToNode(selection, expression)
    26  
    27  	if err != nil {
    28  		return nil, err
    29  	}
    30  
    31  	if node == nil {
    32  		return nil, nil
    33  	}
    34  
    35  	return drivers.ToElement(node)
    36  }
    37  
    38  func EvalXPathToNodes(selection *goquery.Selection, expression string) (*values.Array, error) {
    39  	return EvalXPathToNodesWith(selection, expression, func(node *html.Node) (core.Value, error) {
    40  		return parseXPathNode(node)
    41  	})
    42  }
    43  
    44  func EvalXPathToNodesWith(selection *goquery.Selection, expression string, mapper func(node *html.Node) (core.Value, error)) (*values.Array, error) {
    45  	out, err := evalXPathToInternal(selection, expression)
    46  
    47  	if err != nil {
    48  		return nil, err
    49  	}
    50  
    51  	switch res := out.(type) {
    52  	case *xpath.NodeIterator:
    53  		items := values.NewArray(10)
    54  
    55  		for res.MoveNext() {
    56  			item, err := mapper(res.Current().(*htmlquery.NodeNavigator).Current())
    57  
    58  			if err != nil {
    59  				return nil, err
    60  			}
    61  
    62  			if item != nil {
    63  				items.Push(item)
    64  			}
    65  		}
    66  
    67  		return items, nil
    68  	default:
    69  		return values.EmptyArray(), nil
    70  	}
    71  }
    72  
    73  func EvalXPathTo(selection *goquery.Selection, expression string) (core.Value, error) {
    74  	out, err := evalXPathToInternal(selection, expression)
    75  
    76  	if err != nil {
    77  		return nil, err
    78  	}
    79  
    80  	switch res := out.(type) {
    81  	case *xpath.NodeIterator:
    82  		items := values.NewArray(10)
    83  
    84  		for res.MoveNext() {
    85  			var item core.Value
    86  
    87  			node := res.Current()
    88  
    89  			switch node.NodeType() {
    90  			case xpath.TextNode:
    91  				item = values.NewString(node.Value())
    92  			case xpath.AttributeNode:
    93  				item = values.NewString(node.Value())
    94  			default:
    95  				i, err := parseXPathNode(node.(*htmlquery.NodeNavigator).Current())
    96  
    97  				if err != nil {
    98  					return nil, err
    99  				}
   100  
   101  				item = i
   102  			}
   103  
   104  			if item != nil {
   105  				items.Push(item)
   106  			}
   107  		}
   108  
   109  		return items, nil
   110  	default:
   111  		return values.Parse(res), nil
   112  	}
   113  }
   114  
   115  func evalXPathToInternal(selection *goquery.Selection, expression string) (interface{}, error) {
   116  	exp, err := xpath.Compile(expression)
   117  
   118  	if err != nil {
   119  		return nil, err
   120  	}
   121  
   122  	return exp.Evaluate(htmlquery.CreateXPathNavigator(fromSelectionToNode(selection))), nil
   123  }
   124  
   125  func parseXPathNode(node *html.Node) (drivers.HTMLNode, error) {
   126  	if node == nil {
   127  		return nil, nil
   128  	}
   129  
   130  	switch node.Type {
   131  	case html.DocumentNode:
   132  		url := htmlquery.SelectAttr(node, "url")
   133  		return NewHTMLDocument(goquery.NewDocumentFromNode(node), url, nil)
   134  	case html.ElementNode:
   135  		return NewHTMLElement(&goquery.Selection{Nodes: []*html.Node{node}})
   136  	default:
   137  		return nil, nil
   138  	}
   139  }