github.com/godaddy-x/freego@v1.0.156/goquery/type.go (about) 1 package goquery 2 3 import ( 4 "errors" 5 "io" 6 "net/http" 7 "net/url" 8 9 "github.com/andybalholm/cascadia" 10 11 "golang.org/x/net/html" 12 ) 13 14 // Document represents an HTML document to be manipulated. Unlike jQuery, which 15 // is loaded as part of a DOM document, and thus acts upon its containing 16 // document, GoQuery doesn't know which HTML document to act upon. So it needs 17 // to be told, and that's what the Document class is for. It holds the root 18 // document node to manipulate, and can make selections on this document. 19 type Document struct { 20 *Selection 21 Url *url.URL 22 rootNode *html.Node 23 } 24 25 // NewDocumentFromNode is a Document constructor that takes a root html Node 26 // as argument. 27 func NewDocumentFromNode(root *html.Node) *Document { 28 return newDocument(root, nil) 29 } 30 31 // NewDocument is a Document constructor that takes a string URL as argument. 32 // It loads the specified document, parses it, and stores the root Document 33 // node, ready to be manipulated. 34 // 35 // Deprecated: Use the net/http standard library package to make the request 36 // and validate the response before calling goquery.NewDocumentFromReader 37 // with the response's body. 38 func NewDocument(url string) (*Document, error) { 39 // Load the URL 40 res, e := http.Get(url) 41 if e != nil { 42 return nil, e 43 } 44 return NewDocumentFromResponse(res) 45 } 46 47 // NewDocumentFromReader returns a Document from an io.Reader. 48 // It returns an error as second value if the reader's data cannot be parsed 49 // as html. It does not check if the reader is also an io.Closer, the 50 // provided reader is never closed by this call. It is the responsibility 51 // of the caller to close it if required. 52 func NewDocumentFromReader(r io.Reader) (*Document, error) { 53 root, e := html.Parse(r) 54 if e != nil { 55 return nil, e 56 } 57 return newDocument(root, nil), nil 58 } 59 60 // NewDocumentFromResponse is another Document constructor that takes an http response as argument. 61 // It loads the specified response's document, parses it, and stores the root Document 62 // node, ready to be manipulated. The response's body is closed on return. 63 // 64 // Deprecated: Use goquery.NewDocumentFromReader with the response's body. 65 func NewDocumentFromResponse(res *http.Response) (*Document, error) { 66 if res == nil { 67 return nil, errors.New("Response is nil") 68 } 69 defer res.Body.Close() 70 if res.Request == nil { 71 return nil, errors.New("Response.Request is nil") 72 } 73 74 // Parse the HTML into nodes 75 root, e := html.Parse(res.Body) 76 if e != nil { 77 return nil, e 78 } 79 80 // Create and fill the document 81 return newDocument(root, res.Request.URL), nil 82 } 83 84 // CloneDocument creates a deep-clone of a document. 85 func CloneDocument(doc *Document) *Document { 86 return newDocument(cloneNode(doc.rootNode), doc.Url) 87 } 88 89 // Private constructor, make sure all fields are correctly filled. 90 func newDocument(root *html.Node, url *url.URL) *Document { 91 // Create and fill the document 92 d := &Document{nil, url, root} 93 d.Selection = newSingleSelection(root, d) 94 return d 95 } 96 97 // Selection represents a collection of nodes matching some criteria. The 98 // initial Selection can be created by using Document.Find, and then 99 // manipulated using the jQuery-like chainable syntax and methods. 100 type Selection struct { 101 Nodes []*html.Node 102 document *Document 103 prevSel *Selection 104 } 105 106 // Helper constructor to create an empty selection 107 func newEmptySelection(doc *Document) *Selection { 108 return &Selection{nil, doc, nil} 109 } 110 111 // Helper constructor to create a selection of only one node 112 func newSingleSelection(node *html.Node, doc *Document) *Selection { 113 return &Selection{[]*html.Node{node}, doc, nil} 114 } 115 116 // Matcher is an interface that defines the methods to match 117 // HTML nodes against a compiled selector string. Cascadia's 118 // Selector implements this interface. 119 type Matcher interface { 120 Match(*html.Node) bool 121 MatchAll(*html.Node) []*html.Node 122 Filter([]*html.Node) []*html.Node 123 } 124 125 // compileMatcher compiles the selector string s and returns 126 // the corresponding Matcher. If s is an invalid selector string, 127 // it returns a Matcher that fails all matches. 128 func compileMatcher(s string) Matcher { 129 cs, err := cascadia.Compile(s) 130 if err != nil { 131 return invalidMatcher{} 132 } 133 return cs 134 } 135 136 // invalidMatcher is a Matcher that always fails to match. 137 type invalidMatcher struct{} 138 139 func (invalidMatcher) Match(n *html.Node) bool { return false } 140 func (invalidMatcher) MatchAll(n *html.Node) []*html.Node { return nil } 141 func (invalidMatcher) Filter(ns []*html.Node) []*html.Node { return nil }