github.com/MontFerret/ferret@v0.18.0/pkg/drivers/cdp/page.go (about)

     1  package cdp
     2  
     3  import (
     4  	"context"
     5  	"hash/fnv"
     6  	"io"
     7  	"regexp"
     8  	"sync"
     9  
    10  	"github.com/mafredri/cdp"
    11  	"github.com/mafredri/cdp/protocol/page"
    12  	"github.com/mafredri/cdp/rpcc"
    13  	"github.com/pkg/errors"
    14  	"github.com/rs/zerolog"
    15  
    16  	"github.com/MontFerret/ferret/pkg/drivers"
    17  	"github.com/MontFerret/ferret/pkg/drivers/cdp/dom"
    18  	"github.com/MontFerret/ferret/pkg/drivers/cdp/input"
    19  	net "github.com/MontFerret/ferret/pkg/drivers/cdp/network"
    20  	"github.com/MontFerret/ferret/pkg/drivers/cdp/templates"
    21  	"github.com/MontFerret/ferret/pkg/drivers/cdp/utils"
    22  	"github.com/MontFerret/ferret/pkg/drivers/common"
    23  	"github.com/MontFerret/ferret/pkg/runtime/core"
    24  	"github.com/MontFerret/ferret/pkg/runtime/events"
    25  	"github.com/MontFerret/ferret/pkg/runtime/logging"
    26  	"github.com/MontFerret/ferret/pkg/runtime/values"
    27  )
    28  
    29  type (
    30  	HTMLPageEvent string
    31  
    32  	HTMLPage struct {
    33  		mu      sync.Mutex
    34  		closed  values.Boolean
    35  		logger  zerolog.Logger
    36  		conn    *rpcc.Conn
    37  		client  *cdp.Client
    38  		network *net.Manager
    39  		dom     *dom.Manager
    40  	}
    41  )
    42  
    43  func LoadHTMLPage(
    44  	ctx context.Context,
    45  	conn *rpcc.Conn,
    46  	params drivers.Params,
    47  ) (p *HTMLPage, err error) {
    48  	logger := logging.FromContext(ctx)
    49  
    50  	if conn == nil {
    51  		return nil, core.Error(core.ErrMissedArgument, "connection")
    52  	}
    53  
    54  	client := cdp.NewClient(conn)
    55  	if err := enableFeatures(ctx, client, params); err != nil {
    56  		return nil, err
    57  	}
    58  
    59  	closers := make([]io.Closer, 0, 4)
    60  
    61  	defer func() {
    62  		if err != nil {
    63  			if err := client.Page.Close(context.Background()); err != nil {
    64  				logger.Error().Err(err)
    65  			}
    66  
    67  			if err := conn.Close(); err != nil {
    68  				logger.Error().Err(err)
    69  			}
    70  
    71  			common.CloseAll(logger, closers, "failed to close a Page resource")
    72  		}
    73  	}()
    74  
    75  	netOpts := net.Options{
    76  		Headers: params.Headers,
    77  	}
    78  
    79  	if params.Cookies != nil && params.Cookies.Length() > 0 {
    80  		netOpts.Cookies = make(map[string]*drivers.HTTPCookies)
    81  		netOpts.Cookies[params.URL] = params.Cookies
    82  	}
    83  
    84  	if params.Ignore != nil && len(params.Ignore.Resources) > 0 {
    85  		netOpts.Filter = &net.Filter{
    86  			Patterns: params.Ignore.Resources,
    87  		}
    88  	}
    89  
    90  	netManager, err := net.New(
    91  		logger,
    92  		client,
    93  		netOpts,
    94  	)
    95  
    96  	if err != nil {
    97  		return nil, err
    98  	}
    99  
   100  	mouse := input.NewMouse(client)
   101  	keyboard := input.NewKeyboard(client)
   102  
   103  	domManager, err := dom.New(
   104  		logger,
   105  		client,
   106  		mouse,
   107  		keyboard,
   108  	)
   109  
   110  	if err != nil {
   111  		return nil, err
   112  	}
   113  
   114  	p = NewHTMLPage(
   115  		logger,
   116  		conn,
   117  		client,
   118  		netManager,
   119  		domManager,
   120  	)
   121  
   122  	if params.URL != BlankPageURL && params.URL != "" {
   123  		err = p.Navigate(ctx, values.NewString(params.URL))
   124  	} else {
   125  		err = p.loadMainFrame(ctx)
   126  	}
   127  
   128  	if err != nil {
   129  		return p, err
   130  	}
   131  
   132  	return p, nil
   133  }
   134  
   135  func LoadHTMLPageWithContent(
   136  	ctx context.Context,
   137  	conn *rpcc.Conn,
   138  	params drivers.Params,
   139  	content []byte,
   140  ) (p *HTMLPage, err error) {
   141  	logger := logging.FromContext(ctx)
   142  	p, err = LoadHTMLPage(ctx, conn, params)
   143  
   144  	if err != nil {
   145  		return nil, err
   146  	}
   147  
   148  	defer func() {
   149  		if err != nil {
   150  			if e := p.Close(); e != nil {
   151  				logger.Error().Err(e).Msg("failed to close page")
   152  			}
   153  		}
   154  	}()
   155  
   156  	frameID := p.getCurrentDocument().Frame().Frame.ID
   157  	err = p.client.Page.SetDocumentContent(ctx, page.NewSetDocumentContentArgs(frameID, string(content)))
   158  
   159  	if err != nil {
   160  		return nil, errors.Wrap(err, "set document content")
   161  	}
   162  
   163  	// Remove prev frames (from a blank page)
   164  	prev := p.dom.GetMainFrame()
   165  	err = p.dom.RemoveFrameRecursively(prev.Frame().Frame.ID)
   166  
   167  	if err != nil {
   168  		return nil, err
   169  	}
   170  
   171  	err = p.loadMainFrame(ctx)
   172  
   173  	if err != nil {
   174  		return nil, err
   175  	}
   176  
   177  	return p, nil
   178  }
   179  
   180  func NewHTMLPage(
   181  	logger zerolog.Logger,
   182  	conn *rpcc.Conn,
   183  	client *cdp.Client,
   184  	netManager *net.Manager,
   185  	domManager *dom.Manager,
   186  ) *HTMLPage {
   187  	p := new(HTMLPage)
   188  	p.closed = values.False
   189  	p.logger = logging.WithName(logger.With(), "cdp_page").Logger()
   190  	p.conn = conn
   191  	p.client = client
   192  	p.network = netManager
   193  	p.dom = domManager
   194  
   195  	return p
   196  }
   197  
   198  func (p *HTMLPage) MarshalJSON() ([]byte, error) {
   199  	return p.getCurrentDocument().MarshalJSON()
   200  }
   201  
   202  func (p *HTMLPage) Type() core.Type {
   203  	return drivers.HTMLPageType
   204  }
   205  
   206  func (p *HTMLPage) String() string {
   207  	return p.getCurrentDocument().GetURL().String()
   208  }
   209  
   210  func (p *HTMLPage) Compare(other core.Value) int64 {
   211  	tc := drivers.Compare(p.Type(), other.Type())
   212  
   213  	if tc != 0 {
   214  		return tc
   215  	}
   216  
   217  	cdpPage, ok := other.(*HTMLPage)
   218  
   219  	if !ok {
   220  		return 1
   221  	}
   222  
   223  	return p.getCurrentDocument().GetURL().Compare(cdpPage.GetURL())
   224  }
   225  
   226  func (p *HTMLPage) Unwrap() interface{} {
   227  	p.mu.Lock()
   228  	defer p.mu.Unlock()
   229  
   230  	return p
   231  }
   232  
   233  func (p *HTMLPage) Hash() uint64 {
   234  	h := fnv.New64a()
   235  
   236  	h.Write([]byte("CDP"))
   237  	h.Write([]byte(p.Type().String()))
   238  	h.Write([]byte(":"))
   239  	h.Write([]byte(p.getCurrentDocument().GetURL()))
   240  
   241  	return h.Sum64()
   242  }
   243  
   244  func (p *HTMLPage) Copy() core.Value {
   245  	return values.None
   246  }
   247  
   248  func (p *HTMLPage) GetIn(ctx context.Context, path []core.Value) (core.Value, core.PathError) {
   249  	return common.GetInPage(ctx, path, p)
   250  }
   251  
   252  func (p *HTMLPage) SetIn(ctx context.Context, path []core.Value, value core.Value) core.PathError {
   253  	return common.SetInPage(ctx, path, p, value)
   254  }
   255  
   256  func (p *HTMLPage) Iterate(ctx context.Context) (core.Iterator, error) {
   257  	return p.getCurrentDocument().Iterate(ctx)
   258  }
   259  
   260  func (p *HTMLPage) Length() values.Int {
   261  	return p.getCurrentDocument().Length()
   262  }
   263  
   264  func (p *HTMLPage) Close() error {
   265  	p.mu.Lock()
   266  	defer p.mu.Unlock()
   267  
   268  	var url string
   269  	frame := p.dom.GetMainFrame()
   270  
   271  	if frame != nil {
   272  		url = frame.GetURL().String()
   273  	}
   274  
   275  	p.closed = values.True
   276  
   277  	err := p.dom.Close()
   278  
   279  	if err != nil {
   280  		p.logger.Warn().
   281  			Str("url", url).
   282  			Err(err).
   283  			Msg("failed to close dom manager")
   284  	}
   285  
   286  	err = p.network.Close()
   287  
   288  	if err != nil {
   289  		p.logger.Warn().
   290  			Str("url", url).
   291  			Err(err).
   292  			Msg("failed to close network manager")
   293  	}
   294  
   295  	err = p.client.Page.Close(context.Background())
   296  
   297  	if err != nil {
   298  		p.logger.Warn().
   299  			Str("url", url).
   300  			Err(err).
   301  			Msg("failed to close browser page")
   302  	}
   303  
   304  	// Ignore errors from the connection object
   305  	p.conn.Close()
   306  
   307  	return nil
   308  }
   309  
   310  func (p *HTMLPage) IsClosed() values.Boolean {
   311  	p.mu.Lock()
   312  	defer p.mu.Unlock()
   313  
   314  	return p.closed
   315  }
   316  
   317  func (p *HTMLPage) GetURL() values.String {
   318  	res, err := p.getCurrentDocument().Eval().EvalValue(context.Background(), templates.GetURL())
   319  
   320  	if err == nil {
   321  		return values.ToString(res)
   322  	}
   323  
   324  	p.logger.Warn().
   325  		Err(err).
   326  		Msg("failed to retrieve URL")
   327  
   328  	return p.getCurrentDocument().GetURL()
   329  }
   330  
   331  func (p *HTMLPage) GetMainFrame() drivers.HTMLDocument {
   332  	return p.getCurrentDocument()
   333  }
   334  
   335  func (p *HTMLPage) GetFrames(ctx context.Context) (*values.Array, error) {
   336  	p.mu.Lock()
   337  	defer p.mu.Unlock()
   338  
   339  	return p.dom.GetFrameNodes(ctx)
   340  }
   341  
   342  func (p *HTMLPage) GetFrame(ctx context.Context, idx values.Int) (core.Value, error) {
   343  	p.mu.Lock()
   344  	defer p.mu.Unlock()
   345  
   346  	frames, err := p.dom.GetFrameNodes(ctx)
   347  
   348  	if err != nil {
   349  		return values.None, err
   350  	}
   351  
   352  	return frames.Get(idx), nil
   353  }
   354  
   355  func (p *HTMLPage) GetCookies(ctx context.Context) (*drivers.HTTPCookies, error) {
   356  	p.mu.Lock()
   357  	defer p.mu.Unlock()
   358  
   359  	return p.network.GetCookies(ctx)
   360  }
   361  
   362  func (p *HTMLPage) SetCookies(ctx context.Context, cookies *drivers.HTTPCookies) error {
   363  	p.mu.Lock()
   364  	defer p.mu.Unlock()
   365  
   366  	return p.network.SetCookies(ctx, p.getCurrentDocument().GetURL().String(), cookies)
   367  }
   368  
   369  func (p *HTMLPage) DeleteCookies(ctx context.Context, cookies *drivers.HTTPCookies) error {
   370  	p.mu.Lock()
   371  	defer p.mu.Unlock()
   372  
   373  	return p.network.DeleteCookies(ctx, p.getCurrentDocument().GetURL().String(), cookies)
   374  }
   375  
   376  func (p *HTMLPage) GetResponse(ctx context.Context) (drivers.HTTPResponse, error) {
   377  	doc := p.getCurrentDocument()
   378  
   379  	if doc == nil {
   380  		return drivers.HTTPResponse{}, nil
   381  	}
   382  
   383  	return p.network.GetResponse(ctx, doc.Frame().Frame.ID)
   384  }
   385  
   386  func (p *HTMLPage) PrintToPDF(ctx context.Context, params drivers.PDFParams) (values.Binary, error) {
   387  	p.mu.Lock()
   388  	defer p.mu.Unlock()
   389  
   390  	args := page.NewPrintToPDFArgs()
   391  	args.
   392  		SetLandscape(bool(params.Landscape)).
   393  		SetDisplayHeaderFooter(bool(params.DisplayHeaderFooter)).
   394  		SetPrintBackground(bool(params.PrintBackground)).
   395  		SetPreferCSSPageSize(bool(params.PreferCSSPageSize))
   396  
   397  	if params.Scale > 0 {
   398  		args.SetScale(float64(params.Scale))
   399  	}
   400  
   401  	if params.PaperWidth > 0 {
   402  		args.SetPaperWidth(float64(params.PaperWidth))
   403  	}
   404  
   405  	if params.PaperHeight > 0 {
   406  		args.SetPaperHeight(float64(params.PaperHeight))
   407  	}
   408  
   409  	if params.MarginTop > 0 {
   410  		args.SetMarginTop(float64(params.MarginTop))
   411  	}
   412  
   413  	if params.MarginBottom > 0 {
   414  		args.SetMarginBottom(float64(params.MarginBottom))
   415  	}
   416  
   417  	if params.MarginRight > 0 {
   418  		args.SetMarginRight(float64(params.MarginRight))
   419  	}
   420  
   421  	if params.MarginLeft > 0 {
   422  		args.SetMarginLeft(float64(params.MarginLeft))
   423  	}
   424  
   425  	if params.PageRanges != values.EmptyString {
   426  		args.SetPageRanges(string(params.PageRanges))
   427  	}
   428  
   429  	if params.HeaderTemplate != values.EmptyString {
   430  		args.SetHeaderTemplate(string(params.HeaderTemplate))
   431  	}
   432  
   433  	if params.FooterTemplate != values.EmptyString {
   434  		args.SetFooterTemplate(string(params.FooterTemplate))
   435  	}
   436  
   437  	reply, err := p.client.Page.PrintToPDF(ctx, args)
   438  
   439  	if err != nil {
   440  		return values.NewBinary([]byte{}), err
   441  	}
   442  
   443  	return values.NewBinary(reply.Data), nil
   444  }
   445  
   446  func (p *HTMLPage) CaptureScreenshot(ctx context.Context, params drivers.ScreenshotParams) (values.Binary, error) {
   447  	p.mu.Lock()
   448  	defer p.mu.Unlock()
   449  
   450  	metrics, err := p.client.Page.GetLayoutMetrics(ctx)
   451  
   452  	if err != nil {
   453  		return values.NewBinary(nil), err
   454  	}
   455  
   456  	if params.Format == drivers.ScreenshotFormatJPEG && params.Quality < 0 && params.Quality > 100 {
   457  		params.Quality = 100
   458  	}
   459  
   460  	if params.X < 0 {
   461  		params.X = 0
   462  	}
   463  
   464  	if params.Y < 0 {
   465  		params.Y = 0
   466  	}
   467  
   468  	clientWidth, clientHeight := utils.GetLayoutViewportWH(metrics)
   469  
   470  	if params.Width <= 0 {
   471  		params.Width = values.Float(clientWidth) - params.X
   472  	}
   473  
   474  	if params.Height <= 0 {
   475  		params.Height = values.Float(clientHeight) - params.Y
   476  	}
   477  
   478  	clip := page.Viewport{
   479  		X:      float64(params.X),
   480  		Y:      float64(params.Y),
   481  		Width:  float64(params.Width),
   482  		Height: float64(params.Height),
   483  		Scale:  1.0,
   484  	}
   485  
   486  	format := string(params.Format)
   487  	quality := int(params.Quality)
   488  	args := page.CaptureScreenshotArgs{
   489  		Format:  &format,
   490  		Quality: &quality,
   491  		Clip:    &clip,
   492  	}
   493  
   494  	reply, err := p.client.Page.CaptureScreenshot(ctx, &args)
   495  
   496  	if err != nil {
   497  		return values.NewBinary([]byte{}), err
   498  	}
   499  
   500  	return values.NewBinary(reply.Data), nil
   501  }
   502  
   503  func (p *HTMLPage) Navigate(ctx context.Context, url values.String) error {
   504  	p.mu.Lock()
   505  	defer p.mu.Unlock()
   506  
   507  	if err := p.network.Navigate(ctx, url); err != nil {
   508  		return err
   509  	}
   510  
   511  	return p.reloadMainFrame(ctx)
   512  }
   513  
   514  func (p *HTMLPage) NavigateBack(ctx context.Context, skip values.Int) (values.Boolean, error) {
   515  	p.mu.Lock()
   516  	defer p.mu.Unlock()
   517  
   518  	ret, err := p.network.NavigateBack(ctx, skip)
   519  
   520  	if err != nil {
   521  		return values.False, err
   522  	}
   523  
   524  	return ret, p.reloadMainFrame(ctx)
   525  }
   526  
   527  func (p *HTMLPage) NavigateForward(ctx context.Context, skip values.Int) (values.Boolean, error) {
   528  	p.mu.Lock()
   529  	defer p.mu.Unlock()
   530  
   531  	ret, err := p.network.NavigateForward(ctx, skip)
   532  
   533  	if err != nil {
   534  		return values.False, err
   535  	}
   536  
   537  	return ret, p.reloadMainFrame(ctx)
   538  }
   539  
   540  func (p *HTMLPage) WaitForNavigation(ctx context.Context, targetURL values.String) error {
   541  	p.mu.Lock()
   542  	defer p.mu.Unlock()
   543  
   544  	pattern, err := p.urlToRegexp(targetURL)
   545  
   546  	if err != nil {
   547  		return err
   548  	}
   549  
   550  	if err := p.network.WaitForNavigation(ctx, net.WaitEventOptions{URL: pattern}); err != nil {
   551  		return err
   552  	}
   553  
   554  	return p.reloadMainFrame(ctx)
   555  }
   556  
   557  func (p *HTMLPage) WaitForFrameNavigation(ctx context.Context, frame drivers.HTMLDocument, targetURL values.String) error {
   558  	p.mu.Lock()
   559  	defer p.mu.Unlock()
   560  
   561  	current := p.dom.GetMainFrame()
   562  	doc, ok := frame.(*dom.HTMLDocument)
   563  
   564  	if !ok {
   565  		return errors.New("invalid frame type")
   566  	}
   567  
   568  	pattern, err := p.urlToRegexp(targetURL)
   569  
   570  	if err != nil {
   571  		return err
   572  	}
   573  
   574  	frameID := doc.Frame().Frame.ID
   575  	isMain := current.Frame().Frame.ID == frameID
   576  
   577  	opts := net.WaitEventOptions{
   578  		URL: pattern,
   579  	}
   580  
   581  	// if it's the current document
   582  	if !isMain {
   583  		opts.FrameID = frameID
   584  	}
   585  
   586  	if err = p.network.WaitForNavigation(ctx, opts); err != nil {
   587  		return err
   588  	}
   589  
   590  	return p.reloadMainFrame(ctx)
   591  }
   592  
   593  func (p *HTMLPage) Subscribe(ctx context.Context, subscription events.Subscription) (events.Stream, error) {
   594  	switch subscription.EventName {
   595  	case drivers.NavigationEvent:
   596  		p.mu.Lock()
   597  		defer p.mu.Unlock()
   598  
   599  		stream, err := p.network.OnNavigation(ctx)
   600  
   601  		if err != nil {
   602  			return nil, err
   603  		}
   604  
   605  		return newPageNavigationEventStream(stream, func(ctx context.Context) error {
   606  			return p.reloadMainFrame(ctx)
   607  		}), nil
   608  	case drivers.RequestEvent:
   609  		return p.network.OnRequest(ctx)
   610  	case drivers.ResponseEvent:
   611  		return p.network.OnResponse(ctx)
   612  	default:
   613  		return nil, core.Errorf(core.ErrInvalidOperation, "unknown event name: %s", subscription.EventName)
   614  	}
   615  }
   616  
   617  func (p *HTMLPage) urlToRegexp(targetURL values.String) (*regexp.Regexp, error) {
   618  	if targetURL == "" {
   619  		return nil, nil
   620  	}
   621  
   622  	r, err := regexp.Compile(targetURL.String())
   623  
   624  	if err != nil {
   625  		return nil, errors.Wrap(err, "invalid URL pattern")
   626  	}
   627  
   628  	return r, nil
   629  }
   630  
   631  func (p *HTMLPage) reloadMainFrame(ctx context.Context) error {
   632  	prev := p.dom.GetMainFrame()
   633  
   634  	if prev != nil {
   635  		if err := p.dom.RemoveFrameRecursively(prev.Frame().Frame.ID); err != nil {
   636  			p.logger.Error().Err(err).Msg("failed to remove main frame")
   637  		}
   638  	}
   639  
   640  	next, err := p.dom.LoadRootDocument(ctx)
   641  
   642  	if err != nil {
   643  		p.logger.Error().Err(err).Msg("failed to load a new root document")
   644  
   645  		return err
   646  	}
   647  
   648  	p.dom.SetMainFrame(next)
   649  
   650  	return nil
   651  }
   652  
   653  func (p *HTMLPage) loadMainFrame(ctx context.Context) error {
   654  	next, err := p.dom.LoadRootDocument(ctx)
   655  
   656  	if err != nil {
   657  		return err
   658  	}
   659  
   660  	p.dom.SetMainFrame(next)
   661  
   662  	return nil
   663  }
   664  
   665  func (p *HTMLPage) getCurrentDocument() *dom.HTMLDocument {
   666  	return p.dom.GetMainFrame()
   667  }