github.com/bcampbell/scrapeomat@v0.0.0-20220820232205-23e64141c89e/cfg/paywalled.cfg (about)

     1  [scraper "thesun.co.uk"]
     2  # (paywalled)
     3  url="http://www.thesun.co.uk/"
     4  # eg http://www.thesun.co.uk/sol/homepage/sport/football/5584296/Carlo-Ancelotti-emerges-as-a-rival-to-Louis-van-Gaal-in-the-race-to-succeed-David-Moyes.html
     5  artpat=".*/\\d{4,}/[^/]+[.]html$"
     6  navsel="#mainNav a"
     7  cookies
     8  
     9  [scraper "thescottishsun.co.uk"]
    10  # (paywalled)
    11  url="http://www.thescottishsun.co.uk/"
    12  # http://www.thescottishsun.co.uk/scotsol/homepage/news/scottishpolitics/referendum2014/5935744/Better-Together-leader-claims-Eck-has-lost-the-plot.html
    13  artpat=".*/\\d{4,}/[^/]+[.]html$"
    14  navsel="#mainNav a"
    15  cookies
    16  
    17  [scraper "thetimes.co.uk"]
    18  # (paywalled)
    19  url="http://www.thetimes.co.uk/"
    20  artpat=".*/article\\d{4,}.ece$"
    21  navsel="#p-nav a"
    22  baseerrorthreshold=5
    23  cookies
    24  
    25  [scraper "thesundaytimes.co.uk"]
    26  # (paywalled)
    27  url="http://www.thesundaytimes.co.uk/"
    28  artpat=".*/article\\d{4,}.ece$"
    29  navsel="#topnav a"
    30  baseerrorthreshold=5
    31  cookies
    32