github.com/bcampbell/scrapeomat@v0.0.0-20220820232205-23e64141c89e/cfg/ft.cfg (about)

     1  [scraper "ft.com"]
     2  # (paywalled)
     3  cookies
     4  url="http://www.ft.com/"
     5  navsel="nav.nav-ftcom a"
     6  baseerrorthreshold=5
     7  hostpat="^(www|blogs)[.]ft[.]com$"
     8  # http://www.ft.com/cms/s/0/d4dd7dcc-ca67-11e3-bb92-00144feabdc0.html
     9  artpat=".*/[-0-9a-f]{8,}.html$"
    10  # http://blogs.ft.com/tech-blog/2014/04/lytro-refocuses-on-illum/
    11  # http://blogs.ft.com/the-a-list/2014/08/28/russias-invasion-of-ukraine-demands-a-decisive-response/
    12  # TODO: http://blogs.ft.com/the-world/liveblogs/2014-09-17-2/
    13  artpat=".*/\\d{4}/\\d{2}.*/[^/]{4,}/$"