github.com/bcampbell/scrapeomat@v0.0.0-20220820232205-23e64141c89e/cmd/backfill/eluniversal.go (about)

     1  package main
     2  
     3  import (
     4  	"fmt"
     5  	"github.com/andybalholm/cascadia"
     6  	"github.com/bcampbell/arts/util"
     7  	"net/http"
     8  )
     9  
    10  // use search page
    11  // http://activo.eluniversal.com.mx/historico/search/index.php?q=una&start=0
    12  // returns 20 articles per page
    13  // 'start' param is article number (0-based)
    14  
    15  func DoElUniversal(opts *Options) error {
    16  
    17  	linkSel := cascadia.MustCompile(`.moduloNoticia .HeadNota a`)
    18  
    19  	client := &http.Client{Transport: util.NewPoliteTripper()}
    20  
    21  	for n := opts.nStart; n < (opts.nStart + (opts.nPages * 20)); n += 20 {
    22  		u := fmt.Sprintf("http://activo.eluniversal.com.mx/historico/search/index.php?q=una&start=%d", n)
    23  
    24  		doc, err := fetchAndParse(client, u)
    25  		if err != nil {
    26  			return err
    27  		}
    28  
    29  		links, err := grabLinks(doc, linkSel, u)
    30  		if err != nil {
    31  			return err
    32  		}
    33  
    34  		for _, l := range links {
    35  			fmt.Println(l)
    36  		}
    37  	}
    38  	return nil
    39  }