github.com/MontFerret/ferret@v0.18.0/pkg/stdlib/html/pagination.go (about)

     1  package html
     2  
     3  import (
     4  	"context"
     5  
     6  	"github.com/rs/zerolog"
     7  
     8  	"github.com/MontFerret/ferret/pkg/drivers"
     9  	"github.com/MontFerret/ferret/pkg/runtime/core"
    10  	"github.com/MontFerret/ferret/pkg/runtime/logging"
    11  	"github.com/MontFerret/ferret/pkg/runtime/values"
    12  )
    13  
    14  // PAGINATION creates an iterator that goes through pages using CSS selector.
    15  // The iterator starts from the current page i.e. it does not change the page on 1st iteration.
    16  // That allows you to keep scraping logic inside FOR loop.
    17  // @param {HTMLPage | HTMLDocument | HTMLElement} node - Target html node.
    18  // @param {String} selector - CSS selector for a pagination on the page.
    19  func Pagination(ctx context.Context, args ...core.Value) (core.Value, error) {
    20  	err := core.ValidateArgs(args, 2, 2)
    21  
    22  	if err != nil {
    23  		return values.None, err
    24  	}
    25  
    26  	page, err := drivers.ToPage(args[0])
    27  
    28  	if err != nil {
    29  		return values.None, err
    30  	}
    31  
    32  	selector, err := drivers.ToQuerySelector(args[1])
    33  
    34  	if err != nil {
    35  		return values.None, err
    36  	}
    37  
    38  	logger := logging.
    39  		WithName(logging.FromContext(ctx).With(), "stdlib_html_pagination").
    40  		Str("selector", selector.String()).
    41  		Logger()
    42  
    43  	return &Paging{logger, page, selector}, nil
    44  }
    45  
    46  var PagingType = core.NewType("paging")
    47  
    48  type (
    49  	Paging struct {
    50  		logger   zerolog.Logger
    51  		page     drivers.HTMLPage
    52  		selector drivers.QuerySelector
    53  	}
    54  
    55  	PagingIterator struct {
    56  		logger   zerolog.Logger
    57  		page     drivers.HTMLPage
    58  		selector drivers.QuerySelector
    59  		pos      values.Int
    60  	}
    61  )
    62  
    63  func (p *Paging) MarshalJSON() ([]byte, error) {
    64  	return nil, core.ErrInvalidOperation
    65  }
    66  
    67  func (p *Paging) Type() core.Type {
    68  	return PagingType
    69  }
    70  
    71  func (p *Paging) String() string {
    72  	return PagingType.String()
    73  }
    74  
    75  func (p *Paging) Compare(_ core.Value) int64 {
    76  	return 1
    77  }
    78  
    79  func (p *Paging) Unwrap() interface{} {
    80  	return nil
    81  }
    82  
    83  func (p *Paging) Hash() uint64 {
    84  	return 0
    85  }
    86  
    87  func (p *Paging) Copy() core.Value {
    88  	return values.None
    89  }
    90  
    91  func (p *Paging) Iterate(_ context.Context) (core.Iterator, error) {
    92  	return &PagingIterator{p.logger, p.page, p.selector, -1}, nil
    93  }
    94  
    95  func (i *PagingIterator) Next(ctx context.Context) (core.Value, core.Value, error) {
    96  	i.pos++
    97  
    98  	i.logger.Trace().Int("position", int(i.pos)).Msg("starting to advance iteration")
    99  
   100  	if i.pos == 0 {
   101  		i.logger.Trace().Msg("starting point of pagination. nothing to do. exit")
   102  		return values.ZeroInt, values.ZeroInt, nil
   103  	}
   104  
   105  	i.logger.Trace().Msg("checking if an element exists...")
   106  	exists, err := i.page.GetMainFrame().ExistsBySelector(ctx, i.selector)
   107  
   108  	if err != nil {
   109  		i.logger.Trace().Err(err).Msg("failed to check")
   110  
   111  		return values.None, values.None, err
   112  	}
   113  
   114  	if !exists {
   115  		i.logger.Trace().Bool("exists", bool(exists)).Msg("element does not exist. exit")
   116  
   117  		return values.None, values.None, core.ErrNoMoreData
   118  	}
   119  
   120  	i.logger.Trace().Bool("exists", bool(exists)).Msg("element exists. clicking...")
   121  
   122  	err = i.page.GetMainFrame().GetElement().ClickBySelector(ctx, i.selector, 1)
   123  
   124  	if err != nil {
   125  		i.logger.Trace().Err(err).Msg("failed to click. exit")
   126  
   127  		return values.None, values.None, err
   128  	}
   129  
   130  	i.logger.Trace().Msg("successfully clicked on element. iteration has succeeded")
   131  
   132  	// terminate
   133  	return i.pos, i.pos, nil
   134  }