github.com/MontFerret/ferret@v0.18.0/pkg/stdlib/html/pagination.go (about) 1 package html 2 3 import ( 4 "context" 5 6 "github.com/rs/zerolog" 7 8 "github.com/MontFerret/ferret/pkg/drivers" 9 "github.com/MontFerret/ferret/pkg/runtime/core" 10 "github.com/MontFerret/ferret/pkg/runtime/logging" 11 "github.com/MontFerret/ferret/pkg/runtime/values" 12 ) 13 14 // PAGINATION creates an iterator that goes through pages using CSS selector. 15 // The iterator starts from the current page i.e. it does not change the page on 1st iteration. 16 // That allows you to keep scraping logic inside FOR loop. 17 // @param {HTMLPage | HTMLDocument | HTMLElement} node - Target html node. 18 // @param {String} selector - CSS selector for a pagination on the page. 19 func Pagination(ctx context.Context, args ...core.Value) (core.Value, error) { 20 err := core.ValidateArgs(args, 2, 2) 21 22 if err != nil { 23 return values.None, err 24 } 25 26 page, err := drivers.ToPage(args[0]) 27 28 if err != nil { 29 return values.None, err 30 } 31 32 selector, err := drivers.ToQuerySelector(args[1]) 33 34 if err != nil { 35 return values.None, err 36 } 37 38 logger := logging. 39 WithName(logging.FromContext(ctx).With(), "stdlib_html_pagination"). 40 Str("selector", selector.String()). 41 Logger() 42 43 return &Paging{logger, page, selector}, nil 44 } 45 46 var PagingType = core.NewType("paging") 47 48 type ( 49 Paging struct { 50 logger zerolog.Logger 51 page drivers.HTMLPage 52 selector drivers.QuerySelector 53 } 54 55 PagingIterator struct { 56 logger zerolog.Logger 57 page drivers.HTMLPage 58 selector drivers.QuerySelector 59 pos values.Int 60 } 61 ) 62 63 func (p *Paging) MarshalJSON() ([]byte, error) { 64 return nil, core.ErrInvalidOperation 65 } 66 67 func (p *Paging) Type() core.Type { 68 return PagingType 69 } 70 71 func (p *Paging) String() string { 72 return PagingType.String() 73 } 74 75 func (p *Paging) Compare(_ core.Value) int64 { 76 return 1 77 } 78 79 func (p *Paging) Unwrap() interface{} { 80 return nil 81 } 82 83 func (p *Paging) Hash() uint64 { 84 return 0 85 } 86 87 func (p *Paging) Copy() core.Value { 88 return values.None 89 } 90 91 func (p *Paging) Iterate(_ context.Context) (core.Iterator, error) { 92 return &PagingIterator{p.logger, p.page, p.selector, -1}, nil 93 } 94 95 func (i *PagingIterator) Next(ctx context.Context) (core.Value, core.Value, error) { 96 i.pos++ 97 98 i.logger.Trace().Int("position", int(i.pos)).Msg("starting to advance iteration") 99 100 if i.pos == 0 { 101 i.logger.Trace().Msg("starting point of pagination. nothing to do. exit") 102 return values.ZeroInt, values.ZeroInt, nil 103 } 104 105 i.logger.Trace().Msg("checking if an element exists...") 106 exists, err := i.page.GetMainFrame().ExistsBySelector(ctx, i.selector) 107 108 if err != nil { 109 i.logger.Trace().Err(err).Msg("failed to check") 110 111 return values.None, values.None, err 112 } 113 114 if !exists { 115 i.logger.Trace().Bool("exists", bool(exists)).Msg("element does not exist. exit") 116 117 return values.None, values.None, core.ErrNoMoreData 118 } 119 120 i.logger.Trace().Bool("exists", bool(exists)).Msg("element exists. clicking...") 121 122 err = i.page.GetMainFrame().GetElement().ClickBySelector(ctx, i.selector, 1) 123 124 if err != nil { 125 i.logger.Trace().Err(err).Msg("failed to click. exit") 126 127 return values.None, values.None, err 128 } 129 130 i.logger.Trace().Msg("successfully clicked on element. iteration has succeeded") 131 132 // terminate 133 return i.pos, i.pos, nil 134 }