github.com/bcampbell/scrapeomat@v0.0.0-20220820232205-23e64141c89e/cfg/ft.cfg (about) 1 [scraper "ft.com"] 2 # (paywalled) 3 cookies 4 url="http://www.ft.com/" 5 navsel="nav.nav-ftcom a" 6 baseerrorthreshold=5 7 hostpat="^(www|blogs)[.]ft[.]com$" 8 # http://www.ft.com/cms/s/0/d4dd7dcc-ca67-11e3-bb92-00144feabdc0.html 9 artpat=".*/[-0-9a-f]{8,}.html$" 10 # http://blogs.ft.com/tech-blog/2014/04/lytro-refocuses-on-illum/ 11 # http://blogs.ft.com/the-a-list/2014/08/28/russias-invasion-of-ukraine-demands-a-decisive-response/ 12 # TODO: http://blogs.ft.com/the-world/liveblogs/2014-09-17-2/ 13 artpat=".*/\\d{4}/\\d{2}.*/[^/]{4,}/$"