github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/goquery/type.go (about) 1 package goquery 2 3 import ( 4 "code.google.com/p/go.net/html" 5 "io" 6 "net/http" 7 "net/url" 8 ) 9 10 // Document represents an HTML document to be manipulated. Unlike jQuery, which 11 // is loaded as part of a DOM document, and thus acts upon its containing 12 // document, GoQuery doesn't know which HTML document to act upon. So it needs 13 // to be told, and that's what the Document class is for. It holds the root 14 // document node to manipulate, and can make selections on this document. 15 type Document struct { 16 *Selection 17 Url *url.URL 18 rootNode *html.Node 19 } 20 21 // NewDocumentFromNode() is a Document constructor that takes a root html Node 22 // as argument. 23 func NewDocumentFromNode(root *html.Node) (d *Document) { 24 return newDocument(root, nil) 25 } 26 27 // NewDocument() is a Document constructor that takes a string URL as argument. 28 // It loads the specified document, parses it, and stores the root Document 29 // node, ready to be manipulated. 30 func NewDocument(url string) (d *Document, e error) { 31 // Load the URL 32 33 ua := "Mozilla/5.0 (Windows; U; Windows NT 8.8; en-US) AppleWebKit/883.13 (KHTML, like Gecko) Chrome/88.3.13.87 Safari/883.13" 34 client := &http.Client{} 35 36 req, err := http.NewRequest("GET", url, nil) 37 if err != nil { 38 return 39 } 40 41 req.Header.Set("User-Agent", ua) 42 43 res, e := client.Do(req) 44 if e != nil { 45 return 46 } 47 return NewDocumentFromResponse(res) 48 } 49 50 // NewDocumentFromReader() returns a Document from a generic reader. 51 // It returns an error as second value if the reader's data cannot be parsed 52 // as html. It does *not* check if the reader is also an io.Closer, so the 53 // provided reader is never closed by this call, it is the responsibility 54 // of the caller to close it if required. 55 func NewDocumentFromReader(r io.Reader) (d *Document, e error) { 56 root, e := html.Parse(r) 57 if e != nil { 58 return nil, e 59 } 60 return newDocument(root, nil), nil 61 } 62 63 // NewDocumentFromResponse() is another Document constructor that takes an http resonse as argument. 64 // It loads the specified response's document, parses it, and stores the root Document 65 // node, ready to be manipulated. 66 func NewDocumentFromResponse(res *http.Response) (d *Document, e error) { 67 defer res.Body.Close() 68 69 // Parse the HTML into nodes 70 root, e := html.Parse(res.Body) 71 if e != nil { 72 return 73 } 74 75 // Create and fill the document 76 return newDocument(root, res.Request.URL), nil 77 } 78 79 // Private constructor, make sure all fields are correctly filled. 80 func newDocument(root *html.Node, url *url.URL) *Document { 81 // Create and fill the document 82 d := &Document{nil, url, root} 83 d.Selection = newSingleSelection(root, d) 84 return d 85 } 86 87 // Selection represents a collection of nodes matching some criteria. The 88 // initial Selection can be created by using Document.Find(), and then 89 // manipulated using the jQuery-like chainable syntax and methods. 90 type Selection struct { 91 Nodes []*html.Node 92 document *Document 93 prevSel *Selection 94 } 95 96 // Helper constructor to create an empty selection 97 func newEmptySelection(doc *Document) *Selection { 98 return &Selection{nil, doc, nil} 99 } 100 101 // Helper constructor to create a selection of only one node 102 func newSingleSelection(node *html.Node, doc *Document) *Selection { 103 return &Selection{[]*html.Node{node}, doc, nil} 104 }