github.com/bcampbell/scrapeomat@v0.0.0-20220820232205-23e64141c89e/cmd/backfill/telegraph.go (about) 1 package main 2 3 import ( 4 "fmt" 5 "github.com/andybalholm/cascadia" 6 "github.com/bcampbell/arts/util" 7 "net/http" 8 ) 9 10 // archive pages, form: 11 // http://www.telegraph.co.uk/archive/2009-2-15.html 12 13 func DoTelegraph(opts *Options) error { 14 15 linkSel := cascadia.MustCompile(`.summary h3 a`) 16 17 days, err := opts.DayRange() 18 if err != nil { 19 return err 20 } 21 22 client := &http.Client{Transport: util.NewPoliteTripper()} 23 24 for _, day := range days { 25 u := fmt.Sprintf("http://www.telegraph.co.uk/archive/%d-%d-%d.html", day.Year(), day.Month(), day.Day()) 26 27 doc, err := fetchAndParse(client, u) 28 if err != nil { 29 return err 30 } 31 32 links, err := grabLinks(doc, linkSel, u) 33 if err != nil { 34 return err 35 } 36 37 for _, l := range links { 38 fmt.Println(l) 39 } 40 } 41 return nil 42 }