github.com/MontFerret/ferret@v0.18.0/pkg/stdlib/html/document.go (about) 1 package html 2 3 import ( 4 "context" 5 "strings" 6 "time" 7 8 "github.com/pkg/errors" 9 10 "github.com/MontFerret/ferret/pkg/drivers" 11 "github.com/MontFerret/ferret/pkg/drivers/cdp" 12 "github.com/MontFerret/ferret/pkg/runtime/core" 13 "github.com/MontFerret/ferret/pkg/runtime/values" 14 "github.com/MontFerret/ferret/pkg/runtime/values/types" 15 ) 16 17 type PageLoadParams struct { 18 drivers.Params 19 Driver string 20 Timeout time.Duration 21 } 22 23 // DOCUMENT opens an HTML page by a given url. 24 // By default, loads a page by http call - resulted page does not support any interactions. 25 // @param {Object} [params] - An object containing the following properties : 26 // @param {String} [params.driver] - Driver name to use. 27 // @param {Int} [params.timeout=60000] - Page load timeout. 28 // @param {String} [params.userAgent] - Custom user agent. 29 // @param {Boolean} [params.keepCookies=False] - Boolean value indicating whether to use cookies from previous sessions i.e. not to open a page in the Incognito mode. 30 // @param {Object[] | Object} [params.cookies] - Set of HTTP cookies to use during page loading. 31 // @param {String} params.cookies.*.name - Cookie name. 32 // @param {String} params.cookies.*.value - Cookie value. 33 // @param {String} params.cookies.*.path - Cookie path. 34 // @param {String} params.cookies.*.domain - Cookie domain. 35 // @param {Int} [params.cookies.*.maxAge] - Cookie max age. 36 // @param {String|DateTime} [params.cookies.*.expires] - Cookie expiration date time. 37 // @param {String} [params.cookies.*.sameSite] - Cookie cross-origin policy. 38 // @param {Boolean} [params.cookies.*.httpOnly=false] - Cookie cannot be accessed through client side script. 39 // @param {Boolean} [params.cookies.*.secure=false] - Cookie sent to the server only with an encrypted request over the HTTPS protocol. 40 // @param {Object} [params.headers] - Set of HTTP headers to use during page loading. 41 // @param {Object} [params.ignore] - Set of parameters to ignore some page functionality or behavior. 42 // @param {Object[]} [params.ignore.resources] - Collection of rules to ignore resources during page load and navigation. 43 // @param {String} [params.ignore.resources.*.url] - Resource url pattern. If set, requests for matching urls will be blocked. Wildcards ('*' -> zero or more, '?' -> exactly one) are allowed. Escape character is backslash. Omitting is equivalent to "*". 44 // @param {String} [params.ignore.resources.*.type] - Resource type. If set, requests for matching resource types will be blocked. 45 // @param {Object[]} [params.ignore.statusCodes] - Collection of rules to ignore certain HTTP codes that can cause failures. 46 // @param {String} [params.ignore.statusCodes.*.url] - Url pattern. If set, codes for matching urls will be ignored. Wildcards ('*' -> zero or more, '?' -> exactly one) are allowed. Escape character is backslash. Omitting is equivalent to "*". 47 // @param {Int} [params.ignore.statusCodes.*.code] - HTTP code to ignore. 48 // @param {Object} [params.viewport] - Viewport params. 49 // @param {Int} [params.viewport.height] - Viewport height. 50 // @param {Int} [params.viewport.width] - Viewport width. 51 // @param {Float} [params.viewport.scaleFactor] - Viewport scale factor. 52 // @param {Boolean} [params.viewport.mobile] - Value that indicates whether to emulate mobile device. 53 // @param {Boolean} [params.viewport.landscape] - Value that indicates whether to render a page in landscape position. 54 // @param {String} [params.charset] - (only HTTPDriver) Source charset content to convert UTF-8. 55 // @return {HTMLPage} - Loaded HTML page. 56 func Open(ctx context.Context, args ...core.Value) (core.Value, error) { 57 err := core.ValidateArgs(args, 1, 2) 58 59 if err != nil { 60 return values.None, err 61 } 62 63 err = core.ValidateType(args[0], types.String) 64 65 if err != nil { 66 return values.None, err 67 } 68 69 url := args[0].(values.String) 70 71 var params PageLoadParams 72 73 if len(args) == 1 { 74 params = newDefaultDocLoadParams(url) 75 } else { 76 p, err := newPageLoadParams(url, args[1]) 77 78 if err != nil { 79 return values.None, err 80 } 81 82 params = p 83 } 84 85 ctx, cancel := context.WithTimeout(ctx, params.Timeout) 86 defer cancel() 87 88 drv, err := drivers.FromContext(ctx, params.Driver) 89 90 if err != nil { 91 return values.None, err 92 } 93 94 return drv.Open(ctx, params.Params) 95 } 96 97 func newDefaultDocLoadParams(url values.String) PageLoadParams { 98 return PageLoadParams{ 99 Params: drivers.Params{ 100 URL: url.String(), 101 }, 102 Timeout: drivers.DefaultPageLoadTimeout * time.Millisecond, 103 } 104 } 105 106 func newPageLoadParams(url values.String, arg core.Value) (PageLoadParams, error) { 107 res := newDefaultDocLoadParams(url) 108 109 if err := core.ValidateType(arg, types.Boolean, types.String, types.Object); err != nil { 110 return res, err 111 } 112 113 switch arg.Type() { 114 case types.Object: 115 obj := arg.(*values.Object) 116 117 driver, exists := obj.Get(values.NewString("driver")) 118 119 if exists { 120 if err := core.ValidateType(driver, types.String); err != nil { 121 return res, err 122 } 123 124 res.Driver = driver.(values.String).String() 125 } 126 127 timeout, exists := obj.Get(values.NewString("timeout")) 128 129 if exists { 130 if err := core.ValidateType(timeout, types.Int); err != nil { 131 return res, err 132 } 133 134 res.Timeout = time.Duration(timeout.(values.Int)) * time.Millisecond 135 } 136 137 userAgent, exists := obj.Get(values.NewString("userAgent")) 138 139 if exists { 140 if err := core.ValidateType(userAgent, types.String); err != nil { 141 return res, err 142 } 143 144 res.UserAgent = userAgent.String() 145 } 146 147 keepCookies, exists := obj.Get(values.NewString("keepCookies")) 148 149 if exists { 150 if err := core.ValidateType(keepCookies, types.Boolean); err != nil { 151 return res, err 152 } 153 154 res.KeepCookies = bool(keepCookies.(values.Boolean)) 155 } 156 157 cookies, exists := obj.Get(values.NewString("cookies")) 158 159 if exists { 160 if err := core.ValidateType(cookies, types.Array, types.Object); err != nil { 161 return res, err 162 } 163 164 switch c := cookies.(type) { 165 case *values.Array: 166 cookies, err := parseCookieArray(c) 167 168 if err != nil { 169 return res, err 170 } 171 172 res.Cookies = cookies 173 case *values.Object: 174 cookies, err := parseCookieObject(c) 175 176 if err != nil { 177 return res, err 178 } 179 180 res.Cookies = cookies 181 default: 182 res.Cookies = drivers.NewHTTPCookies() 183 } 184 } 185 186 headers, exists := obj.Get(values.NewString("headers")) 187 188 if exists { 189 if err := core.ValidateType(headers, types.Object); err != nil { 190 return res, err 191 } 192 193 header := parseHeader(headers.(*values.Object)) 194 res.Headers = header 195 } 196 197 viewport, exists := obj.Get(values.NewString("viewport")) 198 199 if exists { 200 viewport, err := parseViewport(viewport) 201 202 if err != nil { 203 return res, err 204 } 205 206 res.Viewport = viewport 207 } 208 209 ignore, exists := obj.Get(values.NewString("ignore")) 210 211 if exists { 212 ignore, err := parseIgnore(ignore) 213 214 if err != nil { 215 return res, err 216 } 217 218 res.Ignore = ignore 219 } 220 221 charset, exists := obj.Get(values.NewString("charset")) 222 223 if exists { 224 if err := core.ValidateType(charset, types.String); err != nil { 225 return res, err 226 } 227 228 res.Charset = charset.String() 229 } 230 case types.String: 231 res.Driver = arg.(values.String).String() 232 case types.Boolean: 233 b := arg.(values.Boolean) 234 235 // fallback 236 if b { 237 res.Driver = cdp.DriverName 238 } 239 } 240 241 return res, nil 242 } 243 244 func parseCookieObject(obj *values.Object) (*drivers.HTTPCookies, error) { 245 if obj == nil { 246 return nil, errors.Wrap(core.ErrMissedArgument, "cookies") 247 } 248 249 var err error 250 res := drivers.NewHTTPCookies() 251 252 obj.ForEach(func(value core.Value, _ string) bool { 253 cookie, e := parseCookie(value) 254 255 if e != nil { 256 err = e 257 258 return false 259 } 260 261 res.Set(cookie) 262 263 return true 264 }) 265 266 return res, err 267 } 268 269 func parseCookieArray(arr *values.Array) (*drivers.HTTPCookies, error) { 270 if arr == nil { 271 return nil, errors.Wrap(core.ErrMissedArgument, "cookies") 272 } 273 274 var err error 275 res := drivers.NewHTTPCookies() 276 277 arr.ForEach(func(value core.Value, _ int) bool { 278 cookie, e := parseCookie(value) 279 280 if e != nil { 281 err = e 282 283 return false 284 } 285 286 res.Set(cookie) 287 288 return true 289 }) 290 291 return res, err 292 } 293 294 func parseCookie(value core.Value) (drivers.HTTPCookie, error) { 295 err := core.ValidateType(value, types.Object, drivers.HTTPCookieType) 296 297 if err != nil { 298 return drivers.HTTPCookie{}, err 299 } 300 301 if value.Type() == drivers.HTTPCookieType { 302 return value.(drivers.HTTPCookie), nil 303 } 304 305 co := value.(*values.Object) 306 307 cookie := drivers.HTTPCookie{ 308 Name: co.MustGet("name").String(), 309 Value: co.MustGet("value").String(), 310 Path: co.MustGet("path").String(), 311 Domain: co.MustGet("domain").String(), 312 } 313 314 maxAge, exists := co.Get("maxAge") 315 316 if exists { 317 if err = core.ValidateType(maxAge, types.Int); err != nil { 318 return drivers.HTTPCookie{}, err 319 } 320 321 cookie.MaxAge = int(maxAge.(values.Int)) 322 } 323 324 expires, exists := co.Get("expires") 325 326 if exists { 327 if err = core.ValidateType(expires, types.DateTime, types.String); err != nil { 328 return drivers.HTTPCookie{}, err 329 } 330 331 if expires.Type() == types.DateTime { 332 cookie.Expires = expires.(values.DateTime).Unwrap().(time.Time) 333 } else { 334 t, err := time.Parse(values.DefaultTimeLayout, expires.String()) 335 336 if err != nil { 337 return drivers.HTTPCookie{}, err 338 } 339 340 cookie.Expires = t 341 } 342 } 343 344 sameSite, exists := co.Get("sameSite") 345 346 if exists { 347 sameSite := strings.ToLower(sameSite.String()) 348 349 switch sameSite { 350 case "lax": 351 cookie.SameSite = drivers.SameSiteLaxMode 352 case "strict": 353 cookie.SameSite = drivers.SameSiteStrictMode 354 default: 355 cookie.SameSite = drivers.SameSiteDefaultMode 356 } 357 } 358 359 httpOnly, exists := co.Get("httpOnly") 360 361 if exists { 362 if err = core.ValidateType(httpOnly, types.Boolean); err != nil { 363 return drivers.HTTPCookie{}, err 364 } 365 366 cookie.HTTPOnly = bool(httpOnly.(values.Boolean)) 367 } 368 369 secure, exists := co.Get("secure") 370 371 if exists { 372 if err = core.ValidateType(secure, types.Boolean); err != nil { 373 return drivers.HTTPCookie{}, err 374 } 375 376 cookie.Secure = bool(secure.(values.Boolean)) 377 } 378 379 return cookie, err 380 } 381 382 func parseHeader(headers *values.Object) *drivers.HTTPHeaders { 383 res := drivers.NewHTTPHeaders() 384 385 headers.ForEach(func(value core.Value, key string) bool { 386 if value.Type() == types.Array { 387 value := value.(*values.Array) 388 389 keyValues := make([]string, 0, value.Length()) 390 391 value.ForEach(func(v core.Value, idx int) bool { 392 keyValues = append(keyValues, v.String()) 393 394 return true 395 }) 396 397 res.SetArr(key, keyValues) 398 } else { 399 res.Set(key, value.String()) 400 } 401 402 return true 403 }) 404 405 return res 406 } 407 408 func parseViewport(value core.Value) (*drivers.Viewport, error) { 409 if err := core.ValidateType(value, types.Object); err != nil { 410 return nil, err 411 } 412 413 res := &drivers.Viewport{} 414 415 viewport := value.(*values.Object) 416 417 width, exists := viewport.Get(values.NewString("width")) 418 419 if exists { 420 if err := core.ValidateType(width, types.Int); err != nil { 421 return nil, err 422 } 423 424 res.Width = int(values.ToInt(width)) 425 } 426 427 height, exists := viewport.Get(values.NewString("height")) 428 429 if exists { 430 if err := core.ValidateType(height, types.Int); err != nil { 431 return nil, err 432 } 433 434 res.Height = int(values.ToInt(height)) 435 } 436 437 mobile, exists := viewport.Get(values.NewString("mobile")) 438 439 if exists { 440 res.Mobile = bool(values.ToBoolean(mobile)) 441 } 442 443 landscape, exists := viewport.Get(values.NewString("landscape")) 444 445 if exists { 446 res.Landscape = bool(values.ToBoolean(landscape)) 447 } 448 449 scaleFactor, exists := viewport.Get(values.NewString("scaleFactor")) 450 451 if exists { 452 res.ScaleFactor = float64(values.ToFloat(scaleFactor)) 453 } 454 455 return res, nil 456 } 457 458 func parseIgnore(value core.Value) (*drivers.Ignore, error) { 459 if err := core.ValidateType(value, types.Object); err != nil { 460 return nil, err 461 } 462 463 res := &drivers.Ignore{} 464 465 ignore := value.(*values.Object) 466 467 resources, exists := ignore.Get("resources") 468 469 if exists { 470 if err := core.ValidateType(resources, types.Array); err != nil { 471 return nil, err 472 } 473 474 resources := resources.(*values.Array) 475 476 res.Resources = make([]drivers.ResourceFilter, 0, resources.Length()) 477 478 var e error 479 480 resources.ForEach(func(el core.Value, idx int) bool { 481 if e = core.ValidateType(el, types.Object); e != nil { 482 return false 483 } 484 485 pattern := el.(*values.Object) 486 487 url, urlExists := pattern.Get("url") 488 resType, resTypeExists := pattern.Get("type") 489 490 // ignore element 491 if !urlExists && !resTypeExists { 492 return true 493 } 494 495 res.Resources = append(res.Resources, drivers.ResourceFilter{ 496 URL: url.String(), 497 Type: resType.String(), 498 }) 499 500 return true 501 }) 502 503 if e != nil { 504 return nil, e 505 } 506 } 507 508 statusCodes, exists := ignore.Get("statusCodes") 509 510 if exists { 511 if err := core.ValidateType(statusCodes, types.Array); err != nil { 512 return nil, err 513 } 514 515 statusCodes := statusCodes.(*values.Array) 516 517 res.StatusCodes = make([]drivers.StatusCodeFilter, 0, statusCodes.Length()) 518 519 var e error 520 521 statusCodes.ForEach(func(el core.Value, idx int) bool { 522 if e = core.ValidateType(el, types.Object); e != nil { 523 return false 524 } 525 526 pattern := el.(*values.Object) 527 528 url := pattern.MustGetOr("url", values.NewString("")) 529 code, codeExists := pattern.Get("code") 530 531 // ignore element 532 if !codeExists { 533 e = errors.New("http code is required") 534 return false 535 } 536 537 res.StatusCodes = append(res.StatusCodes, drivers.StatusCodeFilter{ 538 URL: url.String(), 539 Code: int(values.ToInt(code)), 540 }) 541 542 return true 543 }) 544 } 545 546 return res, nil 547 }