github.com/MontFerret/ferret@v0.18.0/pkg/drivers/http/document.go (about)

     1  package http
     2  
     3  import (
     4  	"context"
     5  	"hash/fnv"
     6  
     7  	"github.com/PuerkitoBio/goquery"
     8  
     9  	"github.com/MontFerret/ferret/pkg/drivers"
    10  	"github.com/MontFerret/ferret/pkg/drivers/common"
    11  	"github.com/MontFerret/ferret/pkg/runtime/core"
    12  	"github.com/MontFerret/ferret/pkg/runtime/values"
    13  )
    14  
    15  type HTMLDocument struct {
    16  	doc      *goquery.Document
    17  	element  drivers.HTMLElement
    18  	url      values.String
    19  	parent   drivers.HTMLDocument
    20  	children *values.Array
    21  }
    22  
    23  func NewRootHTMLDocument(
    24  	node *goquery.Document,
    25  	url string,
    26  ) (*HTMLDocument, error) {
    27  	return NewHTMLDocument(node, url, nil)
    28  }
    29  
    30  func NewHTMLDocument(
    31  	node *goquery.Document,
    32  	url string,
    33  	parent drivers.HTMLDocument,
    34  ) (*HTMLDocument, error) {
    35  	if url == "" {
    36  		return nil, core.Error(core.ErrMissedArgument, "document url")
    37  	}
    38  
    39  	if node == nil {
    40  		return nil, core.Error(core.ErrMissedArgument, "document root selection")
    41  	}
    42  
    43  	el, err := NewHTMLElement(node.Selection)
    44  
    45  	if err != nil {
    46  		return nil, err
    47  	}
    48  
    49  	doc := new(HTMLDocument)
    50  	doc.doc = node
    51  	doc.element = el
    52  	doc.parent = parent
    53  	doc.url = values.NewString(url)
    54  	doc.children = values.NewArray(10)
    55  
    56  	frames := node.Find("iframe")
    57  	frames.Each(func(i int, selection *goquery.Selection) {
    58  		child, _ := NewHTMLDocument(goquery.NewDocumentFromNode(selection.Nodes[0]), selection.AttrOr("src", url), doc)
    59  
    60  		doc.children.Push(child)
    61  	})
    62  
    63  	return doc, nil
    64  }
    65  
    66  func (doc *HTMLDocument) MarshalJSON() ([]byte, error) {
    67  	return doc.element.MarshalJSON()
    68  }
    69  
    70  func (doc *HTMLDocument) Type() core.Type {
    71  	return drivers.HTMLDocumentType
    72  }
    73  
    74  func (doc *HTMLDocument) String() string {
    75  	str, err := doc.doc.Html()
    76  
    77  	if err != nil {
    78  		return ""
    79  	}
    80  
    81  	return str
    82  }
    83  
    84  func (doc *HTMLDocument) Compare(other core.Value) int64 {
    85  	switch other.Type() {
    86  	case drivers.HTMLElementType:
    87  		otherDoc := other.(drivers.HTMLDocument)
    88  
    89  		return doc.url.Compare(otherDoc.GetURL())
    90  	default:
    91  		return drivers.Compare(doc.Type(), other.Type())
    92  	}
    93  }
    94  
    95  func (doc *HTMLDocument) Unwrap() interface{} {
    96  	return doc.doc
    97  }
    98  
    99  func (doc *HTMLDocument) Hash() uint64 {
   100  	h := fnv.New64a()
   101  
   102  	h.Write([]byte(doc.Type().String()))
   103  	h.Write([]byte(":"))
   104  	h.Write([]byte(doc.url))
   105  
   106  	return h.Sum64()
   107  }
   108  
   109  func (doc *HTMLDocument) Copy() core.Value {
   110  	cp, err := NewHTMLDocument(doc.doc, string(doc.url), doc.parent)
   111  
   112  	if err != nil {
   113  		return values.None
   114  	}
   115  
   116  	return cp
   117  }
   118  
   119  func (doc *HTMLDocument) Clone() core.Cloneable {
   120  	cloned, err := NewHTMLDocument(doc.doc, doc.url.String(), doc.parent)
   121  
   122  	if err != nil {
   123  		return values.None
   124  	}
   125  
   126  	return cloned
   127  }
   128  
   129  func (doc *HTMLDocument) Length() values.Int {
   130  	return values.NewInt(doc.doc.Length())
   131  }
   132  
   133  func (doc *HTMLDocument) Iterate(_ context.Context) (core.Iterator, error) {
   134  	return common.NewIterator(doc.element)
   135  }
   136  
   137  func (doc *HTMLDocument) GetIn(ctx context.Context, path []core.Value) (core.Value, core.PathError) {
   138  	return common.GetInDocument(ctx, path, doc)
   139  }
   140  
   141  func (doc *HTMLDocument) SetIn(ctx context.Context, path []core.Value, value core.Value) core.PathError {
   142  	return common.SetInDocument(ctx, path, doc, value)
   143  }
   144  
   145  func (doc *HTMLDocument) GetNodeType(_ context.Context) (values.Int, error) {
   146  	return 9, nil
   147  }
   148  
   149  func (doc *HTMLDocument) GetNodeName(_ context.Context) (values.String, error) {
   150  	return "#document", nil
   151  }
   152  
   153  func (doc *HTMLDocument) GetChildNodes(ctx context.Context) (*values.Array, error) {
   154  	return doc.element.GetChildNodes(ctx)
   155  }
   156  
   157  func (doc *HTMLDocument) GetChildNode(ctx context.Context, idx values.Int) (core.Value, error) {
   158  	return doc.element.GetChildNode(ctx, idx)
   159  }
   160  
   161  func (doc *HTMLDocument) QuerySelector(ctx context.Context, selector drivers.QuerySelector) (core.Value, error) {
   162  	return doc.element.QuerySelector(ctx, selector)
   163  }
   164  
   165  func (doc *HTMLDocument) QuerySelectorAll(ctx context.Context, selector drivers.QuerySelector) (*values.Array, error) {
   166  	return doc.element.QuerySelectorAll(ctx, selector)
   167  }
   168  
   169  func (doc *HTMLDocument) CountBySelector(ctx context.Context, selector drivers.QuerySelector) (values.Int, error) {
   170  	return doc.element.CountBySelector(ctx, selector)
   171  }
   172  
   173  func (doc *HTMLDocument) ExistsBySelector(ctx context.Context, selector drivers.QuerySelector) (values.Boolean, error) {
   174  	return doc.element.ExistsBySelector(ctx, selector)
   175  }
   176  
   177  func (doc *HTMLDocument) XPath(ctx context.Context, expression values.String) (core.Value, error) {
   178  	return doc.element.XPath(ctx, expression)
   179  }
   180  
   181  func (doc *HTMLDocument) GetTitle() values.String {
   182  	title := doc.doc.Find("head > title")
   183  
   184  	return values.NewString(title.Text())
   185  }
   186  
   187  func (doc *HTMLDocument) GetChildDocuments(_ context.Context) (*values.Array, error) {
   188  	return doc.children.Clone().(*values.Array), nil
   189  }
   190  
   191  func (doc *HTMLDocument) GetURL() values.String {
   192  	return doc.url
   193  }
   194  
   195  func (doc *HTMLDocument) GetElement() drivers.HTMLElement {
   196  	return doc.element
   197  }
   198  
   199  func (doc *HTMLDocument) GetName() values.String {
   200  	return ""
   201  }
   202  
   203  func (doc *HTMLDocument) GetParentDocument(_ context.Context) (drivers.HTMLDocument, error) {
   204  	return doc.parent, nil
   205  }
   206  
   207  func (doc *HTMLDocument) ScrollTop(_ context.Context, _ drivers.ScrollOptions) error {
   208  	return core.ErrNotSupported
   209  }
   210  
   211  func (doc *HTMLDocument) ScrollBottom(_ context.Context, _ drivers.ScrollOptions) error {
   212  	return core.ErrNotSupported
   213  }
   214  
   215  func (doc *HTMLDocument) ScrollBySelector(_ context.Context, _ drivers.QuerySelector, _ drivers.ScrollOptions) error {
   216  	return core.ErrNotSupported
   217  }
   218  
   219  func (doc *HTMLDocument) Scroll(_ context.Context, _ drivers.ScrollOptions) error {
   220  	return core.ErrNotSupported
   221  }
   222  
   223  func (doc *HTMLDocument) MoveMouseByXY(_ context.Context, _, _ values.Float) error {
   224  	return core.ErrNotSupported
   225  }
   226  
   227  func (doc *HTMLDocument) Close() error {
   228  	return nil
   229  }