github.com/MontFerret/ferret@v0.18.0/pkg/stdlib/html/document.go (about)

     1  package html
     2  
     3  import (
     4  	"context"
     5  	"strings"
     6  	"time"
     7  
     8  	"github.com/pkg/errors"
     9  
    10  	"github.com/MontFerret/ferret/pkg/drivers"
    11  	"github.com/MontFerret/ferret/pkg/drivers/cdp"
    12  	"github.com/MontFerret/ferret/pkg/runtime/core"
    13  	"github.com/MontFerret/ferret/pkg/runtime/values"
    14  	"github.com/MontFerret/ferret/pkg/runtime/values/types"
    15  )
    16  
    17  type PageLoadParams struct {
    18  	drivers.Params
    19  	Driver  string
    20  	Timeout time.Duration
    21  }
    22  
    23  // DOCUMENT opens an HTML page by a given url.
    24  // By default, loads a page by http call - resulted page does not support any interactions.
    25  // @param {Object} [params] - An object containing the following properties :
    26  // @param {String} [params.driver] - Driver name to use.
    27  // @param {Int} [params.timeout=60000] - Page load timeout.
    28  // @param {String} [params.userAgent] - Custom user agent.
    29  // @param {Boolean} [params.keepCookies=False] - Boolean value indicating whether to use cookies from previous sessions i.e. not to open a page in the Incognito mode.
    30  // @param {Object[] | Object} [params.cookies] - Set of HTTP cookies to use during page loading.
    31  // @param {String} params.cookies.*.name - Cookie name.
    32  // @param {String} params.cookies.*.value - Cookie value.
    33  // @param {String} params.cookies.*.path - Cookie path.
    34  // @param {String} params.cookies.*.domain - Cookie domain.
    35  // @param {Int} [params.cookies.*.maxAge] - Cookie max age.
    36  // @param {String|DateTime} [params.cookies.*.expires] - Cookie expiration date time.
    37  // @param {String} [params.cookies.*.sameSite] - Cookie cross-origin policy.
    38  // @param {Boolean} [params.cookies.*.httpOnly=false] - Cookie cannot be accessed through client side script.
    39  // @param {Boolean} [params.cookies.*.secure=false] - Cookie sent to the server only with an encrypted request over the HTTPS protocol.
    40  // @param {Object} [params.headers] - Set of HTTP headers to use during page loading.
    41  // @param {Object} [params.ignore] - Set of parameters to ignore some page functionality or behavior.
    42  // @param {Object[]} [params.ignore.resources] - Collection of rules to ignore resources during page load and navigation.
    43  // @param {String} [params.ignore.resources.*.url] - Resource url pattern. If set, requests for matching urls will be blocked. Wildcards ('*' -> zero or more, '?' -> exactly one) are allowed. Escape character is backslash. Omitting is equivalent to "*".
    44  // @param {String} [params.ignore.resources.*.type] - Resource type. If set, requests for matching resource types will be blocked.
    45  // @param {Object[]} [params.ignore.statusCodes] - Collection of rules to ignore certain HTTP codes that can cause failures.
    46  // @param {String} [params.ignore.statusCodes.*.url] - Url pattern. If set, codes for matching urls will be ignored. Wildcards ('*' -> zero or more, '?' -> exactly one) are allowed. Escape character is backslash. Omitting is equivalent to "*".
    47  // @param {Int} [params.ignore.statusCodes.*.code] - HTTP code to ignore.
    48  // @param {Object} [params.viewport] - Viewport params.
    49  // @param {Int} [params.viewport.height] - Viewport height.
    50  // @param {Int} [params.viewport.width] - Viewport width.
    51  // @param {Float} [params.viewport.scaleFactor] - Viewport scale factor.
    52  // @param {Boolean} [params.viewport.mobile] - Value that indicates whether to emulate mobile device.
    53  // @param {Boolean} [params.viewport.landscape] - Value that indicates whether to render a page in landscape position.
    54  // @param {String} [params.charset] - (only HTTPDriver) Source charset content to convert UTF-8.
    55  // @return {HTMLPage} - Loaded HTML page.
    56  func Open(ctx context.Context, args ...core.Value) (core.Value, error) {
    57  	err := core.ValidateArgs(args, 1, 2)
    58  
    59  	if err != nil {
    60  		return values.None, err
    61  	}
    62  
    63  	err = core.ValidateType(args[0], types.String)
    64  
    65  	if err != nil {
    66  		return values.None, err
    67  	}
    68  
    69  	url := args[0].(values.String)
    70  
    71  	var params PageLoadParams
    72  
    73  	if len(args) == 1 {
    74  		params = newDefaultDocLoadParams(url)
    75  	} else {
    76  		p, err := newPageLoadParams(url, args[1])
    77  
    78  		if err != nil {
    79  			return values.None, err
    80  		}
    81  
    82  		params = p
    83  	}
    84  
    85  	ctx, cancel := context.WithTimeout(ctx, params.Timeout)
    86  	defer cancel()
    87  
    88  	drv, err := drivers.FromContext(ctx, params.Driver)
    89  
    90  	if err != nil {
    91  		return values.None, err
    92  	}
    93  
    94  	return drv.Open(ctx, params.Params)
    95  }
    96  
    97  func newDefaultDocLoadParams(url values.String) PageLoadParams {
    98  	return PageLoadParams{
    99  		Params: drivers.Params{
   100  			URL: url.String(),
   101  		},
   102  		Timeout: drivers.DefaultPageLoadTimeout * time.Millisecond,
   103  	}
   104  }
   105  
   106  func newPageLoadParams(url values.String, arg core.Value) (PageLoadParams, error) {
   107  	res := newDefaultDocLoadParams(url)
   108  
   109  	if err := core.ValidateType(arg, types.Boolean, types.String, types.Object); err != nil {
   110  		return res, err
   111  	}
   112  
   113  	switch arg.Type() {
   114  	case types.Object:
   115  		obj := arg.(*values.Object)
   116  
   117  		driver, exists := obj.Get(values.NewString("driver"))
   118  
   119  		if exists {
   120  			if err := core.ValidateType(driver, types.String); err != nil {
   121  				return res, err
   122  			}
   123  
   124  			res.Driver = driver.(values.String).String()
   125  		}
   126  
   127  		timeout, exists := obj.Get(values.NewString("timeout"))
   128  
   129  		if exists {
   130  			if err := core.ValidateType(timeout, types.Int); err != nil {
   131  				return res, err
   132  			}
   133  
   134  			res.Timeout = time.Duration(timeout.(values.Int)) * time.Millisecond
   135  		}
   136  
   137  		userAgent, exists := obj.Get(values.NewString("userAgent"))
   138  
   139  		if exists {
   140  			if err := core.ValidateType(userAgent, types.String); err != nil {
   141  				return res, err
   142  			}
   143  
   144  			res.UserAgent = userAgent.String()
   145  		}
   146  
   147  		keepCookies, exists := obj.Get(values.NewString("keepCookies"))
   148  
   149  		if exists {
   150  			if err := core.ValidateType(keepCookies, types.Boolean); err != nil {
   151  				return res, err
   152  			}
   153  
   154  			res.KeepCookies = bool(keepCookies.(values.Boolean))
   155  		}
   156  
   157  		cookies, exists := obj.Get(values.NewString("cookies"))
   158  
   159  		if exists {
   160  			if err := core.ValidateType(cookies, types.Array, types.Object); err != nil {
   161  				return res, err
   162  			}
   163  
   164  			switch c := cookies.(type) {
   165  			case *values.Array:
   166  				cookies, err := parseCookieArray(c)
   167  
   168  				if err != nil {
   169  					return res, err
   170  				}
   171  
   172  				res.Cookies = cookies
   173  			case *values.Object:
   174  				cookies, err := parseCookieObject(c)
   175  
   176  				if err != nil {
   177  					return res, err
   178  				}
   179  
   180  				res.Cookies = cookies
   181  			default:
   182  				res.Cookies = drivers.NewHTTPCookies()
   183  			}
   184  		}
   185  
   186  		headers, exists := obj.Get(values.NewString("headers"))
   187  
   188  		if exists {
   189  			if err := core.ValidateType(headers, types.Object); err != nil {
   190  				return res, err
   191  			}
   192  
   193  			header := parseHeader(headers.(*values.Object))
   194  			res.Headers = header
   195  		}
   196  
   197  		viewport, exists := obj.Get(values.NewString("viewport"))
   198  
   199  		if exists {
   200  			viewport, err := parseViewport(viewport)
   201  
   202  			if err != nil {
   203  				return res, err
   204  			}
   205  
   206  			res.Viewport = viewport
   207  		}
   208  
   209  		ignore, exists := obj.Get(values.NewString("ignore"))
   210  
   211  		if exists {
   212  			ignore, err := parseIgnore(ignore)
   213  
   214  			if err != nil {
   215  				return res, err
   216  			}
   217  
   218  			res.Ignore = ignore
   219  		}
   220  
   221  		charset, exists := obj.Get(values.NewString("charset"))
   222  
   223  		if exists {
   224  			if err := core.ValidateType(charset, types.String); err != nil {
   225  				return res, err
   226  			}
   227  
   228  			res.Charset = charset.String()
   229  		}
   230  	case types.String:
   231  		res.Driver = arg.(values.String).String()
   232  	case types.Boolean:
   233  		b := arg.(values.Boolean)
   234  
   235  		// fallback
   236  		if b {
   237  			res.Driver = cdp.DriverName
   238  		}
   239  	}
   240  
   241  	return res, nil
   242  }
   243  
   244  func parseCookieObject(obj *values.Object) (*drivers.HTTPCookies, error) {
   245  	if obj == nil {
   246  		return nil, errors.Wrap(core.ErrMissedArgument, "cookies")
   247  	}
   248  
   249  	var err error
   250  	res := drivers.NewHTTPCookies()
   251  
   252  	obj.ForEach(func(value core.Value, _ string) bool {
   253  		cookie, e := parseCookie(value)
   254  
   255  		if e != nil {
   256  			err = e
   257  
   258  			return false
   259  		}
   260  
   261  		res.Set(cookie)
   262  
   263  		return true
   264  	})
   265  
   266  	return res, err
   267  }
   268  
   269  func parseCookieArray(arr *values.Array) (*drivers.HTTPCookies, error) {
   270  	if arr == nil {
   271  		return nil, errors.Wrap(core.ErrMissedArgument, "cookies")
   272  	}
   273  
   274  	var err error
   275  	res := drivers.NewHTTPCookies()
   276  
   277  	arr.ForEach(func(value core.Value, _ int) bool {
   278  		cookie, e := parseCookie(value)
   279  
   280  		if e != nil {
   281  			err = e
   282  
   283  			return false
   284  		}
   285  
   286  		res.Set(cookie)
   287  
   288  		return true
   289  	})
   290  
   291  	return res, err
   292  }
   293  
   294  func parseCookie(value core.Value) (drivers.HTTPCookie, error) {
   295  	err := core.ValidateType(value, types.Object, drivers.HTTPCookieType)
   296  
   297  	if err != nil {
   298  		return drivers.HTTPCookie{}, err
   299  	}
   300  
   301  	if value.Type() == drivers.HTTPCookieType {
   302  		return value.(drivers.HTTPCookie), nil
   303  	}
   304  
   305  	co := value.(*values.Object)
   306  
   307  	cookie := drivers.HTTPCookie{
   308  		Name:   co.MustGet("name").String(),
   309  		Value:  co.MustGet("value").String(),
   310  		Path:   co.MustGet("path").String(),
   311  		Domain: co.MustGet("domain").String(),
   312  	}
   313  
   314  	maxAge, exists := co.Get("maxAge")
   315  
   316  	if exists {
   317  		if err = core.ValidateType(maxAge, types.Int); err != nil {
   318  			return drivers.HTTPCookie{}, err
   319  		}
   320  
   321  		cookie.MaxAge = int(maxAge.(values.Int))
   322  	}
   323  
   324  	expires, exists := co.Get("expires")
   325  
   326  	if exists {
   327  		if err = core.ValidateType(expires, types.DateTime, types.String); err != nil {
   328  			return drivers.HTTPCookie{}, err
   329  		}
   330  
   331  		if expires.Type() == types.DateTime {
   332  			cookie.Expires = expires.(values.DateTime).Unwrap().(time.Time)
   333  		} else {
   334  			t, err := time.Parse(values.DefaultTimeLayout, expires.String())
   335  
   336  			if err != nil {
   337  				return drivers.HTTPCookie{}, err
   338  			}
   339  
   340  			cookie.Expires = t
   341  		}
   342  	}
   343  
   344  	sameSite, exists := co.Get("sameSite")
   345  
   346  	if exists {
   347  		sameSite := strings.ToLower(sameSite.String())
   348  
   349  		switch sameSite {
   350  		case "lax":
   351  			cookie.SameSite = drivers.SameSiteLaxMode
   352  		case "strict":
   353  			cookie.SameSite = drivers.SameSiteStrictMode
   354  		default:
   355  			cookie.SameSite = drivers.SameSiteDefaultMode
   356  		}
   357  	}
   358  
   359  	httpOnly, exists := co.Get("httpOnly")
   360  
   361  	if exists {
   362  		if err = core.ValidateType(httpOnly, types.Boolean); err != nil {
   363  			return drivers.HTTPCookie{}, err
   364  		}
   365  
   366  		cookie.HTTPOnly = bool(httpOnly.(values.Boolean))
   367  	}
   368  
   369  	secure, exists := co.Get("secure")
   370  
   371  	if exists {
   372  		if err = core.ValidateType(secure, types.Boolean); err != nil {
   373  			return drivers.HTTPCookie{}, err
   374  		}
   375  
   376  		cookie.Secure = bool(secure.(values.Boolean))
   377  	}
   378  
   379  	return cookie, err
   380  }
   381  
   382  func parseHeader(headers *values.Object) *drivers.HTTPHeaders {
   383  	res := drivers.NewHTTPHeaders()
   384  
   385  	headers.ForEach(func(value core.Value, key string) bool {
   386  		if value.Type() == types.Array {
   387  			value := value.(*values.Array)
   388  
   389  			keyValues := make([]string, 0, value.Length())
   390  
   391  			value.ForEach(func(v core.Value, idx int) bool {
   392  				keyValues = append(keyValues, v.String())
   393  
   394  				return true
   395  			})
   396  
   397  			res.SetArr(key, keyValues)
   398  		} else {
   399  			res.Set(key, value.String())
   400  		}
   401  
   402  		return true
   403  	})
   404  
   405  	return res
   406  }
   407  
   408  func parseViewport(value core.Value) (*drivers.Viewport, error) {
   409  	if err := core.ValidateType(value, types.Object); err != nil {
   410  		return nil, err
   411  	}
   412  
   413  	res := &drivers.Viewport{}
   414  
   415  	viewport := value.(*values.Object)
   416  
   417  	width, exists := viewport.Get(values.NewString("width"))
   418  
   419  	if exists {
   420  		if err := core.ValidateType(width, types.Int); err != nil {
   421  			return nil, err
   422  		}
   423  
   424  		res.Width = int(values.ToInt(width))
   425  	}
   426  
   427  	height, exists := viewport.Get(values.NewString("height"))
   428  
   429  	if exists {
   430  		if err := core.ValidateType(height, types.Int); err != nil {
   431  			return nil, err
   432  		}
   433  
   434  		res.Height = int(values.ToInt(height))
   435  	}
   436  
   437  	mobile, exists := viewport.Get(values.NewString("mobile"))
   438  
   439  	if exists {
   440  		res.Mobile = bool(values.ToBoolean(mobile))
   441  	}
   442  
   443  	landscape, exists := viewport.Get(values.NewString("landscape"))
   444  
   445  	if exists {
   446  		res.Landscape = bool(values.ToBoolean(landscape))
   447  	}
   448  
   449  	scaleFactor, exists := viewport.Get(values.NewString("scaleFactor"))
   450  
   451  	if exists {
   452  		res.ScaleFactor = float64(values.ToFloat(scaleFactor))
   453  	}
   454  
   455  	return res, nil
   456  }
   457  
   458  func parseIgnore(value core.Value) (*drivers.Ignore, error) {
   459  	if err := core.ValidateType(value, types.Object); err != nil {
   460  		return nil, err
   461  	}
   462  
   463  	res := &drivers.Ignore{}
   464  
   465  	ignore := value.(*values.Object)
   466  
   467  	resources, exists := ignore.Get("resources")
   468  
   469  	if exists {
   470  		if err := core.ValidateType(resources, types.Array); err != nil {
   471  			return nil, err
   472  		}
   473  
   474  		resources := resources.(*values.Array)
   475  
   476  		res.Resources = make([]drivers.ResourceFilter, 0, resources.Length())
   477  
   478  		var e error
   479  
   480  		resources.ForEach(func(el core.Value, idx int) bool {
   481  			if e = core.ValidateType(el, types.Object); e != nil {
   482  				return false
   483  			}
   484  
   485  			pattern := el.(*values.Object)
   486  
   487  			url, urlExists := pattern.Get("url")
   488  			resType, resTypeExists := pattern.Get("type")
   489  
   490  			// ignore element
   491  			if !urlExists && !resTypeExists {
   492  				return true
   493  			}
   494  
   495  			res.Resources = append(res.Resources, drivers.ResourceFilter{
   496  				URL:  url.String(),
   497  				Type: resType.String(),
   498  			})
   499  
   500  			return true
   501  		})
   502  
   503  		if e != nil {
   504  			return nil, e
   505  		}
   506  	}
   507  
   508  	statusCodes, exists := ignore.Get("statusCodes")
   509  
   510  	if exists {
   511  		if err := core.ValidateType(statusCodes, types.Array); err != nil {
   512  			return nil, err
   513  		}
   514  
   515  		statusCodes := statusCodes.(*values.Array)
   516  
   517  		res.StatusCodes = make([]drivers.StatusCodeFilter, 0, statusCodes.Length())
   518  
   519  		var e error
   520  
   521  		statusCodes.ForEach(func(el core.Value, idx int) bool {
   522  			if e = core.ValidateType(el, types.Object); e != nil {
   523  				return false
   524  			}
   525  
   526  			pattern := el.(*values.Object)
   527  
   528  			url := pattern.MustGetOr("url", values.NewString(""))
   529  			code, codeExists := pattern.Get("code")
   530  
   531  			// ignore element
   532  			if !codeExists {
   533  				e = errors.New("http code is required")
   534  				return false
   535  			}
   536  
   537  			res.StatusCodes = append(res.StatusCodes, drivers.StatusCodeFilter{
   538  				URL:  url.String(),
   539  				Code: int(values.ToInt(code)),
   540  			})
   541  
   542  			return true
   543  		})
   544  	}
   545  
   546  	return res, nil
   547  }