github.com/bcampbell/scrapeomat@v0.0.0-20220820232205-23e64141c89e/cmd/backfill/telegraph.go (about)

     1  package main
     2  
     3  import (
     4  	"fmt"
     5  	"github.com/andybalholm/cascadia"
     6  	"github.com/bcampbell/arts/util"
     7  	"net/http"
     8  )
     9  
    10  // archive pages, form:
    11  // http://www.telegraph.co.uk/archive/2009-2-15.html
    12  
    13  func DoTelegraph(opts *Options) error {
    14  
    15  	linkSel := cascadia.MustCompile(`.summary h3 a`)
    16  
    17  	days, err := opts.DayRange()
    18  	if err != nil {
    19  		return err
    20  	}
    21  
    22  	client := &http.Client{Transport: util.NewPoliteTripper()}
    23  
    24  	for _, day := range days {
    25  		u := fmt.Sprintf("http://www.telegraph.co.uk/archive/%d-%d-%d.html", day.Year(), day.Month(), day.Day())
    26  
    27  		doc, err := fetchAndParse(client, u)
    28  		if err != nil {
    29  			return err
    30  		}
    31  
    32  		links, err := grabLinks(doc, linkSel, u)
    33  		if err != nil {
    34  			return err
    35  		}
    36  
    37  		for _, l := range links {
    38  			fmt.Println(l)
    39  		}
    40  	}
    41  	return nil
    42  }