github.com/bcampbell/scrapeomat@v0.0.0-20220820232205-23e64141c89e/cfg/paywalled.cfg (about) 1 [scraper "thesun.co.uk"] 2 # (paywalled) 3 url="http://www.thesun.co.uk/" 4 # eg http://www.thesun.co.uk/sol/homepage/sport/football/5584296/Carlo-Ancelotti-emerges-as-a-rival-to-Louis-van-Gaal-in-the-race-to-succeed-David-Moyes.html 5 artpat=".*/\\d{4,}/[^/]+[.]html$" 6 navsel="#mainNav a" 7 cookies 8 9 [scraper "thescottishsun.co.uk"] 10 # (paywalled) 11 url="http://www.thescottishsun.co.uk/" 12 # http://www.thescottishsun.co.uk/scotsol/homepage/news/scottishpolitics/referendum2014/5935744/Better-Together-leader-claims-Eck-has-lost-the-plot.html 13 artpat=".*/\\d{4,}/[^/]+[.]html$" 14 navsel="#mainNav a" 15 cookies 16 17 [scraper "thetimes.co.uk"] 18 # (paywalled) 19 url="http://www.thetimes.co.uk/" 20 artpat=".*/article\\d{4,}.ece$" 21 navsel="#p-nav a" 22 baseerrorthreshold=5 23 cookies 24 25 [scraper "thesundaytimes.co.uk"] 26 # (paywalled) 27 url="http://www.thesundaytimes.co.uk/" 28 artpat=".*/article\\d{4,}.ece$" 29 navsel="#topnav a" 30 baseerrorthreshold=5 31 cookies 32