github.com/MontFerret/ferret@v0.18.0/examples/pagination.fql (about) 1 LET baseURL = 'https://www.amazon.com/' 2 LET amazon = DOCUMENT(baseURL, { driver: "cdp" }) 3 4 WAIT_ELEMENT(amazon, '#nav-search-submit-button') 5 INPUT(amazon, '#twotabsearchtextbox', @criteria) 6 CLICK(amazon, '#nav-search-submit-button') 7 8 WAITFOR EVENT "navigation" IN amazon 9 FILTER current.url =~ "www\.amazon\.com\/s\?k=" 10 TIMEOUT 50000 11 12 WAIT_ELEMENT(amazon, '[class*="template=PAGINATION"]') 13 14 LET paginator = ELEMENT(amazon, '[class*="-pagination"]') 15 LET foundPrefixes = (FOR cn IN SPLIT(paginator.attributes.class, " ") 16 FILTER cn LIKE "*-pagination*" 17 LIMIT 1 18 RETURN FIRST(SPLIT(cn, "-")) 19 ) 20 21 LET prefix = FIRST(foundPrefixes) 22 T::NOT::EMPTY(prefix, "CSS prefix should not be empty") 23 PRINT("CSS Prefix is:", prefix) 24 25 LET paginationItems = paginator.length 26 27 LET variants = { 28 "s": { 29 nextBtnSelector: ".s-pagination-next", 30 pagersSelector: ".s-pagination-item:not(.s-pagination-next, .s-pagination-previous):last-of-type" 31 }, 32 "a": { 33 nextBtnSelector: ".a-pagination .a-last", 34 pagersSelector: FMT("ul.a-pagination li:nth-of-type({})", paginator.length - 1) 35 } 36 } 37 38 LET selectors = variants[prefix] 39 40 T::NOT::NONE(selectors, "Supported CSS selectors not found") 41 42 LET spinner = FMT('[data-component-type="{0}-search-results"] .{0}-result-list-placeholder', prefix) 43 LET resultListSelector = FMT('[data-component-type="{}-search-results"]', prefix) 44 LET resultItemSelector = FMT('[data-component-type="{}-search-result"]', prefix) 45 46 LET pagersSelector = FMT('.{0}-pagination :not(.{0}-last)', prefix) 47 LET priceWholeSelector = '.a-price-whole' 48 LET priceFracSelector = '.a-price-fraction' 49 50 LET pagers = ELEMENTS(amazon, pagersSelector) 51 LET pages = LENGTH(pagers) > 0 ? TO_INT(INNER_TEXT(LAST(pagers))) : 0 52 53 PRINT("Found pages:", pages) 54 55 LET result = ( 56 FOR pageNum IN 1..pages 57 LIMIT @pages 58 59 LET clicked = pageNum == 1 ? false : CLICK(amazon, selectors.nextBtnSelector) 60 LET waitSelector = clicked ? WAIT_NO_CLASS(amazon, spinner, 'aok-hidden') && WAIT_ELEMENT(amazon, resultItemSelector) : false 61 62 PRINT("page:", pageNum, "clicked", clicked) 63 64 LET found = ELEMENTS(amazon, resultItemSelector) 65 66 LET items = ( 67 FOR el IN found 68 LET hasPrice = ELEMENT_EXISTS(el, priceWholeSelector) 69 LET priceWholeTxt = hasPrice ? FIRST(REGEX_MATCH(INNER_TEXT(el, priceWholeSelector), "[0-9]+")) : "0" 70 LET priceFracTxt = hasPrice ? FIRST(REGEX_MATCH(INNER_TEXT(el, priceFracSelector), "[0-9]+")) : "00" 71 LET price = TO_FLOAT(priceWholeTxt + "." + priceFracTxt) 72 LET anchor = ELEMENT(el, "a") 73 74 RETURN { 75 page: pageNum, 76 url: baseURL + anchor.attributes.href, 77 title: INNER_TEXT(el, 'h2'), 78 price 79 } 80 ) 81 82 RETURN items 83 ) 84 85 RETURN FLATTEN(result) 86 87