github.com/MontFerret/ferret@v0.18.0/pkg/drivers/cdp/page.go (about) 1 package cdp 2 3 import ( 4 "context" 5 "hash/fnv" 6 "io" 7 "regexp" 8 "sync" 9 10 "github.com/mafredri/cdp" 11 "github.com/mafredri/cdp/protocol/page" 12 "github.com/mafredri/cdp/rpcc" 13 "github.com/pkg/errors" 14 "github.com/rs/zerolog" 15 16 "github.com/MontFerret/ferret/pkg/drivers" 17 "github.com/MontFerret/ferret/pkg/drivers/cdp/dom" 18 "github.com/MontFerret/ferret/pkg/drivers/cdp/input" 19 net "github.com/MontFerret/ferret/pkg/drivers/cdp/network" 20 "github.com/MontFerret/ferret/pkg/drivers/cdp/templates" 21 "github.com/MontFerret/ferret/pkg/drivers/cdp/utils" 22 "github.com/MontFerret/ferret/pkg/drivers/common" 23 "github.com/MontFerret/ferret/pkg/runtime/core" 24 "github.com/MontFerret/ferret/pkg/runtime/events" 25 "github.com/MontFerret/ferret/pkg/runtime/logging" 26 "github.com/MontFerret/ferret/pkg/runtime/values" 27 ) 28 29 type ( 30 HTMLPageEvent string 31 32 HTMLPage struct { 33 mu sync.Mutex 34 closed values.Boolean 35 logger zerolog.Logger 36 conn *rpcc.Conn 37 client *cdp.Client 38 network *net.Manager 39 dom *dom.Manager 40 } 41 ) 42 43 func LoadHTMLPage( 44 ctx context.Context, 45 conn *rpcc.Conn, 46 params drivers.Params, 47 ) (p *HTMLPage, err error) { 48 logger := logging.FromContext(ctx) 49 50 if conn == nil { 51 return nil, core.Error(core.ErrMissedArgument, "connection") 52 } 53 54 client := cdp.NewClient(conn) 55 if err := enableFeatures(ctx, client, params); err != nil { 56 return nil, err 57 } 58 59 closers := make([]io.Closer, 0, 4) 60 61 defer func() { 62 if err != nil { 63 if err := client.Page.Close(context.Background()); err != nil { 64 logger.Error().Err(err) 65 } 66 67 if err := conn.Close(); err != nil { 68 logger.Error().Err(err) 69 } 70 71 common.CloseAll(logger, closers, "failed to close a Page resource") 72 } 73 }() 74 75 netOpts := net.Options{ 76 Headers: params.Headers, 77 } 78 79 if params.Cookies != nil && params.Cookies.Length() > 0 { 80 netOpts.Cookies = make(map[string]*drivers.HTTPCookies) 81 netOpts.Cookies[params.URL] = params.Cookies 82 } 83 84 if params.Ignore != nil && len(params.Ignore.Resources) > 0 { 85 netOpts.Filter = &net.Filter{ 86 Patterns: params.Ignore.Resources, 87 } 88 } 89 90 netManager, err := net.New( 91 logger, 92 client, 93 netOpts, 94 ) 95 96 if err != nil { 97 return nil, err 98 } 99 100 mouse := input.NewMouse(client) 101 keyboard := input.NewKeyboard(client) 102 103 domManager, err := dom.New( 104 logger, 105 client, 106 mouse, 107 keyboard, 108 ) 109 110 if err != nil { 111 return nil, err 112 } 113 114 p = NewHTMLPage( 115 logger, 116 conn, 117 client, 118 netManager, 119 domManager, 120 ) 121 122 if params.URL != BlankPageURL && params.URL != "" { 123 err = p.Navigate(ctx, values.NewString(params.URL)) 124 } else { 125 err = p.loadMainFrame(ctx) 126 } 127 128 if err != nil { 129 return p, err 130 } 131 132 return p, nil 133 } 134 135 func LoadHTMLPageWithContent( 136 ctx context.Context, 137 conn *rpcc.Conn, 138 params drivers.Params, 139 content []byte, 140 ) (p *HTMLPage, err error) { 141 logger := logging.FromContext(ctx) 142 p, err = LoadHTMLPage(ctx, conn, params) 143 144 if err != nil { 145 return nil, err 146 } 147 148 defer func() { 149 if err != nil { 150 if e := p.Close(); e != nil { 151 logger.Error().Err(e).Msg("failed to close page") 152 } 153 } 154 }() 155 156 frameID := p.getCurrentDocument().Frame().Frame.ID 157 err = p.client.Page.SetDocumentContent(ctx, page.NewSetDocumentContentArgs(frameID, string(content))) 158 159 if err != nil { 160 return nil, errors.Wrap(err, "set document content") 161 } 162 163 // Remove prev frames (from a blank page) 164 prev := p.dom.GetMainFrame() 165 err = p.dom.RemoveFrameRecursively(prev.Frame().Frame.ID) 166 167 if err != nil { 168 return nil, err 169 } 170 171 err = p.loadMainFrame(ctx) 172 173 if err != nil { 174 return nil, err 175 } 176 177 return p, nil 178 } 179 180 func NewHTMLPage( 181 logger zerolog.Logger, 182 conn *rpcc.Conn, 183 client *cdp.Client, 184 netManager *net.Manager, 185 domManager *dom.Manager, 186 ) *HTMLPage { 187 p := new(HTMLPage) 188 p.closed = values.False 189 p.logger = logging.WithName(logger.With(), "cdp_page").Logger() 190 p.conn = conn 191 p.client = client 192 p.network = netManager 193 p.dom = domManager 194 195 return p 196 } 197 198 func (p *HTMLPage) MarshalJSON() ([]byte, error) { 199 return p.getCurrentDocument().MarshalJSON() 200 } 201 202 func (p *HTMLPage) Type() core.Type { 203 return drivers.HTMLPageType 204 } 205 206 func (p *HTMLPage) String() string { 207 return p.getCurrentDocument().GetURL().String() 208 } 209 210 func (p *HTMLPage) Compare(other core.Value) int64 { 211 tc := drivers.Compare(p.Type(), other.Type()) 212 213 if tc != 0 { 214 return tc 215 } 216 217 cdpPage, ok := other.(*HTMLPage) 218 219 if !ok { 220 return 1 221 } 222 223 return p.getCurrentDocument().GetURL().Compare(cdpPage.GetURL()) 224 } 225 226 func (p *HTMLPage) Unwrap() interface{} { 227 p.mu.Lock() 228 defer p.mu.Unlock() 229 230 return p 231 } 232 233 func (p *HTMLPage) Hash() uint64 { 234 h := fnv.New64a() 235 236 h.Write([]byte("CDP")) 237 h.Write([]byte(p.Type().String())) 238 h.Write([]byte(":")) 239 h.Write([]byte(p.getCurrentDocument().GetURL())) 240 241 return h.Sum64() 242 } 243 244 func (p *HTMLPage) Copy() core.Value { 245 return values.None 246 } 247 248 func (p *HTMLPage) GetIn(ctx context.Context, path []core.Value) (core.Value, core.PathError) { 249 return common.GetInPage(ctx, path, p) 250 } 251 252 func (p *HTMLPage) SetIn(ctx context.Context, path []core.Value, value core.Value) core.PathError { 253 return common.SetInPage(ctx, path, p, value) 254 } 255 256 func (p *HTMLPage) Iterate(ctx context.Context) (core.Iterator, error) { 257 return p.getCurrentDocument().Iterate(ctx) 258 } 259 260 func (p *HTMLPage) Length() values.Int { 261 return p.getCurrentDocument().Length() 262 } 263 264 func (p *HTMLPage) Close() error { 265 p.mu.Lock() 266 defer p.mu.Unlock() 267 268 var url string 269 frame := p.dom.GetMainFrame() 270 271 if frame != nil { 272 url = frame.GetURL().String() 273 } 274 275 p.closed = values.True 276 277 err := p.dom.Close() 278 279 if err != nil { 280 p.logger.Warn(). 281 Str("url", url). 282 Err(err). 283 Msg("failed to close dom manager") 284 } 285 286 err = p.network.Close() 287 288 if err != nil { 289 p.logger.Warn(). 290 Str("url", url). 291 Err(err). 292 Msg("failed to close network manager") 293 } 294 295 err = p.client.Page.Close(context.Background()) 296 297 if err != nil { 298 p.logger.Warn(). 299 Str("url", url). 300 Err(err). 301 Msg("failed to close browser page") 302 } 303 304 // Ignore errors from the connection object 305 p.conn.Close() 306 307 return nil 308 } 309 310 func (p *HTMLPage) IsClosed() values.Boolean { 311 p.mu.Lock() 312 defer p.mu.Unlock() 313 314 return p.closed 315 } 316 317 func (p *HTMLPage) GetURL() values.String { 318 res, err := p.getCurrentDocument().Eval().EvalValue(context.Background(), templates.GetURL()) 319 320 if err == nil { 321 return values.ToString(res) 322 } 323 324 p.logger.Warn(). 325 Err(err). 326 Msg("failed to retrieve URL") 327 328 return p.getCurrentDocument().GetURL() 329 } 330 331 func (p *HTMLPage) GetMainFrame() drivers.HTMLDocument { 332 return p.getCurrentDocument() 333 } 334 335 func (p *HTMLPage) GetFrames(ctx context.Context) (*values.Array, error) { 336 p.mu.Lock() 337 defer p.mu.Unlock() 338 339 return p.dom.GetFrameNodes(ctx) 340 } 341 342 func (p *HTMLPage) GetFrame(ctx context.Context, idx values.Int) (core.Value, error) { 343 p.mu.Lock() 344 defer p.mu.Unlock() 345 346 frames, err := p.dom.GetFrameNodes(ctx) 347 348 if err != nil { 349 return values.None, err 350 } 351 352 return frames.Get(idx), nil 353 } 354 355 func (p *HTMLPage) GetCookies(ctx context.Context) (*drivers.HTTPCookies, error) { 356 p.mu.Lock() 357 defer p.mu.Unlock() 358 359 return p.network.GetCookies(ctx) 360 } 361 362 func (p *HTMLPage) SetCookies(ctx context.Context, cookies *drivers.HTTPCookies) error { 363 p.mu.Lock() 364 defer p.mu.Unlock() 365 366 return p.network.SetCookies(ctx, p.getCurrentDocument().GetURL().String(), cookies) 367 } 368 369 func (p *HTMLPage) DeleteCookies(ctx context.Context, cookies *drivers.HTTPCookies) error { 370 p.mu.Lock() 371 defer p.mu.Unlock() 372 373 return p.network.DeleteCookies(ctx, p.getCurrentDocument().GetURL().String(), cookies) 374 } 375 376 func (p *HTMLPage) GetResponse(ctx context.Context) (drivers.HTTPResponse, error) { 377 doc := p.getCurrentDocument() 378 379 if doc == nil { 380 return drivers.HTTPResponse{}, nil 381 } 382 383 return p.network.GetResponse(ctx, doc.Frame().Frame.ID) 384 } 385 386 func (p *HTMLPage) PrintToPDF(ctx context.Context, params drivers.PDFParams) (values.Binary, error) { 387 p.mu.Lock() 388 defer p.mu.Unlock() 389 390 args := page.NewPrintToPDFArgs() 391 args. 392 SetLandscape(bool(params.Landscape)). 393 SetDisplayHeaderFooter(bool(params.DisplayHeaderFooter)). 394 SetPrintBackground(bool(params.PrintBackground)). 395 SetPreferCSSPageSize(bool(params.PreferCSSPageSize)) 396 397 if params.Scale > 0 { 398 args.SetScale(float64(params.Scale)) 399 } 400 401 if params.PaperWidth > 0 { 402 args.SetPaperWidth(float64(params.PaperWidth)) 403 } 404 405 if params.PaperHeight > 0 { 406 args.SetPaperHeight(float64(params.PaperHeight)) 407 } 408 409 if params.MarginTop > 0 { 410 args.SetMarginTop(float64(params.MarginTop)) 411 } 412 413 if params.MarginBottom > 0 { 414 args.SetMarginBottom(float64(params.MarginBottom)) 415 } 416 417 if params.MarginRight > 0 { 418 args.SetMarginRight(float64(params.MarginRight)) 419 } 420 421 if params.MarginLeft > 0 { 422 args.SetMarginLeft(float64(params.MarginLeft)) 423 } 424 425 if params.PageRanges != values.EmptyString { 426 args.SetPageRanges(string(params.PageRanges)) 427 } 428 429 if params.HeaderTemplate != values.EmptyString { 430 args.SetHeaderTemplate(string(params.HeaderTemplate)) 431 } 432 433 if params.FooterTemplate != values.EmptyString { 434 args.SetFooterTemplate(string(params.FooterTemplate)) 435 } 436 437 reply, err := p.client.Page.PrintToPDF(ctx, args) 438 439 if err != nil { 440 return values.NewBinary([]byte{}), err 441 } 442 443 return values.NewBinary(reply.Data), nil 444 } 445 446 func (p *HTMLPage) CaptureScreenshot(ctx context.Context, params drivers.ScreenshotParams) (values.Binary, error) { 447 p.mu.Lock() 448 defer p.mu.Unlock() 449 450 metrics, err := p.client.Page.GetLayoutMetrics(ctx) 451 452 if err != nil { 453 return values.NewBinary(nil), err 454 } 455 456 if params.Format == drivers.ScreenshotFormatJPEG && params.Quality < 0 && params.Quality > 100 { 457 params.Quality = 100 458 } 459 460 if params.X < 0 { 461 params.X = 0 462 } 463 464 if params.Y < 0 { 465 params.Y = 0 466 } 467 468 clientWidth, clientHeight := utils.GetLayoutViewportWH(metrics) 469 470 if params.Width <= 0 { 471 params.Width = values.Float(clientWidth) - params.X 472 } 473 474 if params.Height <= 0 { 475 params.Height = values.Float(clientHeight) - params.Y 476 } 477 478 clip := page.Viewport{ 479 X: float64(params.X), 480 Y: float64(params.Y), 481 Width: float64(params.Width), 482 Height: float64(params.Height), 483 Scale: 1.0, 484 } 485 486 format := string(params.Format) 487 quality := int(params.Quality) 488 args := page.CaptureScreenshotArgs{ 489 Format: &format, 490 Quality: &quality, 491 Clip: &clip, 492 } 493 494 reply, err := p.client.Page.CaptureScreenshot(ctx, &args) 495 496 if err != nil { 497 return values.NewBinary([]byte{}), err 498 } 499 500 return values.NewBinary(reply.Data), nil 501 } 502 503 func (p *HTMLPage) Navigate(ctx context.Context, url values.String) error { 504 p.mu.Lock() 505 defer p.mu.Unlock() 506 507 if err := p.network.Navigate(ctx, url); err != nil { 508 return err 509 } 510 511 return p.reloadMainFrame(ctx) 512 } 513 514 func (p *HTMLPage) NavigateBack(ctx context.Context, skip values.Int) (values.Boolean, error) { 515 p.mu.Lock() 516 defer p.mu.Unlock() 517 518 ret, err := p.network.NavigateBack(ctx, skip) 519 520 if err != nil { 521 return values.False, err 522 } 523 524 return ret, p.reloadMainFrame(ctx) 525 } 526 527 func (p *HTMLPage) NavigateForward(ctx context.Context, skip values.Int) (values.Boolean, error) { 528 p.mu.Lock() 529 defer p.mu.Unlock() 530 531 ret, err := p.network.NavigateForward(ctx, skip) 532 533 if err != nil { 534 return values.False, err 535 } 536 537 return ret, p.reloadMainFrame(ctx) 538 } 539 540 func (p *HTMLPage) WaitForNavigation(ctx context.Context, targetURL values.String) error { 541 p.mu.Lock() 542 defer p.mu.Unlock() 543 544 pattern, err := p.urlToRegexp(targetURL) 545 546 if err != nil { 547 return err 548 } 549 550 if err := p.network.WaitForNavigation(ctx, net.WaitEventOptions{URL: pattern}); err != nil { 551 return err 552 } 553 554 return p.reloadMainFrame(ctx) 555 } 556 557 func (p *HTMLPage) WaitForFrameNavigation(ctx context.Context, frame drivers.HTMLDocument, targetURL values.String) error { 558 p.mu.Lock() 559 defer p.mu.Unlock() 560 561 current := p.dom.GetMainFrame() 562 doc, ok := frame.(*dom.HTMLDocument) 563 564 if !ok { 565 return errors.New("invalid frame type") 566 } 567 568 pattern, err := p.urlToRegexp(targetURL) 569 570 if err != nil { 571 return err 572 } 573 574 frameID := doc.Frame().Frame.ID 575 isMain := current.Frame().Frame.ID == frameID 576 577 opts := net.WaitEventOptions{ 578 URL: pattern, 579 } 580 581 // if it's the current document 582 if !isMain { 583 opts.FrameID = frameID 584 } 585 586 if err = p.network.WaitForNavigation(ctx, opts); err != nil { 587 return err 588 } 589 590 return p.reloadMainFrame(ctx) 591 } 592 593 func (p *HTMLPage) Subscribe(ctx context.Context, subscription events.Subscription) (events.Stream, error) { 594 switch subscription.EventName { 595 case drivers.NavigationEvent: 596 p.mu.Lock() 597 defer p.mu.Unlock() 598 599 stream, err := p.network.OnNavigation(ctx) 600 601 if err != nil { 602 return nil, err 603 } 604 605 return newPageNavigationEventStream(stream, func(ctx context.Context) error { 606 return p.reloadMainFrame(ctx) 607 }), nil 608 case drivers.RequestEvent: 609 return p.network.OnRequest(ctx) 610 case drivers.ResponseEvent: 611 return p.network.OnResponse(ctx) 612 default: 613 return nil, core.Errorf(core.ErrInvalidOperation, "unknown event name: %s", subscription.EventName) 614 } 615 } 616 617 func (p *HTMLPage) urlToRegexp(targetURL values.String) (*regexp.Regexp, error) { 618 if targetURL == "" { 619 return nil, nil 620 } 621 622 r, err := regexp.Compile(targetURL.String()) 623 624 if err != nil { 625 return nil, errors.Wrap(err, "invalid URL pattern") 626 } 627 628 return r, nil 629 } 630 631 func (p *HTMLPage) reloadMainFrame(ctx context.Context) error { 632 prev := p.dom.GetMainFrame() 633 634 if prev != nil { 635 if err := p.dom.RemoveFrameRecursively(prev.Frame().Frame.ID); err != nil { 636 p.logger.Error().Err(err).Msg("failed to remove main frame") 637 } 638 } 639 640 next, err := p.dom.LoadRootDocument(ctx) 641 642 if err != nil { 643 p.logger.Error().Err(err).Msg("failed to load a new root document") 644 645 return err 646 } 647 648 p.dom.SetMainFrame(next) 649 650 return nil 651 } 652 653 func (p *HTMLPage) loadMainFrame(ctx context.Context) error { 654 next, err := p.dom.LoadRootDocument(ctx) 655 656 if err != nil { 657 return err 658 } 659 660 p.dom.SetMainFrame(next) 661 662 return nil 663 } 664 665 func (p *HTMLPage) getCurrentDocument() *dom.HTMLDocument { 666 return p.dom.GetMainFrame() 667 }