github.com/bcampbell/scrapeomat@v0.0.0-20220820232205-23e64141c89e/slurp/article.go

github.com/bcampbell/scrapeomat@v0.0.0-20220820232205-23e64141c89e/slurp/article.go (about)

     1  package slurp
     2  
     3  import (
     4  //	"fmt"
     5  )
     6  
     7  type Publication struct {
     8  	// Code is a short, unique name (eg "mirror")
     9  	Code string `json:"code"`
    10  	// Name is the 'pretty' name (eg "The Daily Mirror")
    11  	Name   string `json:"name,omitempty"`
    12  	Domain string `json:"domain,omitempty"`
    13  }
    14  
    15  type Author struct {
    16  	Name    string `json:"name"`
    17  	RelLink string `json:"rel_link,omitempty"`
    18  	Email   string `json:"email,omitempty"`
    19  	Twitter string `json:"twitter,omitempty"`
    20  }
    21  
    22  type Keyword struct {
    23  	Name string `json:"name"`
    24  	URL  string `json:"url,omitempty"`
    25  }
    26  
    27  // wire format for article data
    28  type Article struct {
    29  	ID           int    `json:"id,omitempty"`
    30  	CanonicalURL string `json:"canonical_url"`
    31  
    32  	// all known URLs for article (including canonical)
    33  	// TODO: first url should be considered "preferred" if no canonical?
    34  	URLs []string `json:"urls"`
    35  
    36  	Headline string   `json:"headline"`
    37  	Authors  []Author `json:"authors,omitempty"`
    38  
    39  	// Content contains HTML, sanitised using a subset of tags
    40  	Content string `json:"content"`
    41  
    42  	// Published contains date of publication.
    43  	// An ISO8601 string is used instead of time.Time, so that
    44  	// less-precise representations can be held (eg YYYY-MM)
    45  	Published   string      `json:"published,omitempty"`
    46  	Updated     string      `json:"updated,omitempty"`
    47  	Publication Publication `json:"publication,omitempty"`
    48  	// Keywords contains data from rel-tags, meta keywords etc...
    49  	Keywords []Keyword `json:"keywords,omitempty"`
    50  	Section  string    `json:"section,omitempty"`
    51  	Tags     []string  `json:"tags,omitempty"`
    52  
    53  	// extra fields from twitcooker
    54  	Extra struct {
    55  		RetweetCount  int `json:"retweet_count,omitempty"`
    56  		FavoriteCount int `json:"favorite_count,omitempty"`
    57  		// resolved links
    58  		Links []string `json:"links,omitempty"`
    59  	} `json:"extra,omitempty"`
    60  }