github.com/bcampbell/scrapeomat@v0.0.0-20220820232205-23e64141c89e/cmd/backfill/eluniversal.go (about) 1 package main 2 3 import ( 4 "fmt" 5 "github.com/andybalholm/cascadia" 6 "github.com/bcampbell/arts/util" 7 "net/http" 8 ) 9 10 // use search page 11 // http://activo.eluniversal.com.mx/historico/search/index.php?q=una&start=0 12 // returns 20 articles per page 13 // 'start' param is article number (0-based) 14 15 func DoElUniversal(opts *Options) error { 16 17 linkSel := cascadia.MustCompile(`.moduloNoticia .HeadNota a`) 18 19 client := &http.Client{Transport: util.NewPoliteTripper()} 20 21 for n := opts.nStart; n < (opts.nStart + (opts.nPages * 20)); n += 20 { 22 u := fmt.Sprintf("http://activo.eluniversal.com.mx/historico/search/index.php?q=una&start=%d", n) 23 24 doc, err := fetchAndParse(client, u) 25 if err != nil { 26 return err 27 } 28 29 links, err := grabLinks(doc, linkSel, u) 30 if err != nil { 31 return err 32 } 33 34 for _, l := range links { 35 fmt.Println(l) 36 } 37 } 38 return nil 39 }