github.com/MontFerret/ferret@v0.18.0/pkg/drivers/http/document.go (about) 1 package http 2 3 import ( 4 "context" 5 "hash/fnv" 6 7 "github.com/PuerkitoBio/goquery" 8 9 "github.com/MontFerret/ferret/pkg/drivers" 10 "github.com/MontFerret/ferret/pkg/drivers/common" 11 "github.com/MontFerret/ferret/pkg/runtime/core" 12 "github.com/MontFerret/ferret/pkg/runtime/values" 13 ) 14 15 type HTMLDocument struct { 16 doc *goquery.Document 17 element drivers.HTMLElement 18 url values.String 19 parent drivers.HTMLDocument 20 children *values.Array 21 } 22 23 func NewRootHTMLDocument( 24 node *goquery.Document, 25 url string, 26 ) (*HTMLDocument, error) { 27 return NewHTMLDocument(node, url, nil) 28 } 29 30 func NewHTMLDocument( 31 node *goquery.Document, 32 url string, 33 parent drivers.HTMLDocument, 34 ) (*HTMLDocument, error) { 35 if url == "" { 36 return nil, core.Error(core.ErrMissedArgument, "document url") 37 } 38 39 if node == nil { 40 return nil, core.Error(core.ErrMissedArgument, "document root selection") 41 } 42 43 el, err := NewHTMLElement(node.Selection) 44 45 if err != nil { 46 return nil, err 47 } 48 49 doc := new(HTMLDocument) 50 doc.doc = node 51 doc.element = el 52 doc.parent = parent 53 doc.url = values.NewString(url) 54 doc.children = values.NewArray(10) 55 56 frames := node.Find("iframe") 57 frames.Each(func(i int, selection *goquery.Selection) { 58 child, _ := NewHTMLDocument(goquery.NewDocumentFromNode(selection.Nodes[0]), selection.AttrOr("src", url), doc) 59 60 doc.children.Push(child) 61 }) 62 63 return doc, nil 64 } 65 66 func (doc *HTMLDocument) MarshalJSON() ([]byte, error) { 67 return doc.element.MarshalJSON() 68 } 69 70 func (doc *HTMLDocument) Type() core.Type { 71 return drivers.HTMLDocumentType 72 } 73 74 func (doc *HTMLDocument) String() string { 75 str, err := doc.doc.Html() 76 77 if err != nil { 78 return "" 79 } 80 81 return str 82 } 83 84 func (doc *HTMLDocument) Compare(other core.Value) int64 { 85 switch other.Type() { 86 case drivers.HTMLElementType: 87 otherDoc := other.(drivers.HTMLDocument) 88 89 return doc.url.Compare(otherDoc.GetURL()) 90 default: 91 return drivers.Compare(doc.Type(), other.Type()) 92 } 93 } 94 95 func (doc *HTMLDocument) Unwrap() interface{} { 96 return doc.doc 97 } 98 99 func (doc *HTMLDocument) Hash() uint64 { 100 h := fnv.New64a() 101 102 h.Write([]byte(doc.Type().String())) 103 h.Write([]byte(":")) 104 h.Write([]byte(doc.url)) 105 106 return h.Sum64() 107 } 108 109 func (doc *HTMLDocument) Copy() core.Value { 110 cp, err := NewHTMLDocument(doc.doc, string(doc.url), doc.parent) 111 112 if err != nil { 113 return values.None 114 } 115 116 return cp 117 } 118 119 func (doc *HTMLDocument) Clone() core.Cloneable { 120 cloned, err := NewHTMLDocument(doc.doc, doc.url.String(), doc.parent) 121 122 if err != nil { 123 return values.None 124 } 125 126 return cloned 127 } 128 129 func (doc *HTMLDocument) Length() values.Int { 130 return values.NewInt(doc.doc.Length()) 131 } 132 133 func (doc *HTMLDocument) Iterate(_ context.Context) (core.Iterator, error) { 134 return common.NewIterator(doc.element) 135 } 136 137 func (doc *HTMLDocument) GetIn(ctx context.Context, path []core.Value) (core.Value, core.PathError) { 138 return common.GetInDocument(ctx, path, doc) 139 } 140 141 func (doc *HTMLDocument) SetIn(ctx context.Context, path []core.Value, value core.Value) core.PathError { 142 return common.SetInDocument(ctx, path, doc, value) 143 } 144 145 func (doc *HTMLDocument) GetNodeType(_ context.Context) (values.Int, error) { 146 return 9, nil 147 } 148 149 func (doc *HTMLDocument) GetNodeName(_ context.Context) (values.String, error) { 150 return "#document", nil 151 } 152 153 func (doc *HTMLDocument) GetChildNodes(ctx context.Context) (*values.Array, error) { 154 return doc.element.GetChildNodes(ctx) 155 } 156 157 func (doc *HTMLDocument) GetChildNode(ctx context.Context, idx values.Int) (core.Value, error) { 158 return doc.element.GetChildNode(ctx, idx) 159 } 160 161 func (doc *HTMLDocument) QuerySelector(ctx context.Context, selector drivers.QuerySelector) (core.Value, error) { 162 return doc.element.QuerySelector(ctx, selector) 163 } 164 165 func (doc *HTMLDocument) QuerySelectorAll(ctx context.Context, selector drivers.QuerySelector) (*values.Array, error) { 166 return doc.element.QuerySelectorAll(ctx, selector) 167 } 168 169 func (doc *HTMLDocument) CountBySelector(ctx context.Context, selector drivers.QuerySelector) (values.Int, error) { 170 return doc.element.CountBySelector(ctx, selector) 171 } 172 173 func (doc *HTMLDocument) ExistsBySelector(ctx context.Context, selector drivers.QuerySelector) (values.Boolean, error) { 174 return doc.element.ExistsBySelector(ctx, selector) 175 } 176 177 func (doc *HTMLDocument) XPath(ctx context.Context, expression values.String) (core.Value, error) { 178 return doc.element.XPath(ctx, expression) 179 } 180 181 func (doc *HTMLDocument) GetTitle() values.String { 182 title := doc.doc.Find("head > title") 183 184 return values.NewString(title.Text()) 185 } 186 187 func (doc *HTMLDocument) GetChildDocuments(_ context.Context) (*values.Array, error) { 188 return doc.children.Clone().(*values.Array), nil 189 } 190 191 func (doc *HTMLDocument) GetURL() values.String { 192 return doc.url 193 } 194 195 func (doc *HTMLDocument) GetElement() drivers.HTMLElement { 196 return doc.element 197 } 198 199 func (doc *HTMLDocument) GetName() values.String { 200 return "" 201 } 202 203 func (doc *HTMLDocument) GetParentDocument(_ context.Context) (drivers.HTMLDocument, error) { 204 return doc.parent, nil 205 } 206 207 func (doc *HTMLDocument) ScrollTop(_ context.Context, _ drivers.ScrollOptions) error { 208 return core.ErrNotSupported 209 } 210 211 func (doc *HTMLDocument) ScrollBottom(_ context.Context, _ drivers.ScrollOptions) error { 212 return core.ErrNotSupported 213 } 214 215 func (doc *HTMLDocument) ScrollBySelector(_ context.Context, _ drivers.QuerySelector, _ drivers.ScrollOptions) error { 216 return core.ErrNotSupported 217 } 218 219 func (doc *HTMLDocument) Scroll(_ context.Context, _ drivers.ScrollOptions) error { 220 return core.ErrNotSupported 221 } 222 223 func (doc *HTMLDocument) MoveMouseByXY(_ context.Context, _, _ values.Float) error { 224 return core.ErrNotSupported 225 } 226 227 func (doc *HTMLDocument) Close() error { 228 return nil 229 }