github.com/bcampbell/scrapeomat@v0.0.0-20220820232205-23e64141c89e/slurp/article.go (about) 1 package slurp 2 3 import ( 4 // "fmt" 5 ) 6 7 type Publication struct { 8 // Code is a short, unique name (eg "mirror") 9 Code string `json:"code"` 10 // Name is the 'pretty' name (eg "The Daily Mirror") 11 Name string `json:"name,omitempty"` 12 Domain string `json:"domain,omitempty"` 13 } 14 15 type Author struct { 16 Name string `json:"name"` 17 RelLink string `json:"rel_link,omitempty"` 18 Email string `json:"email,omitempty"` 19 Twitter string `json:"twitter,omitempty"` 20 } 21 22 type Keyword struct { 23 Name string `json:"name"` 24 URL string `json:"url,omitempty"` 25 } 26 27 // wire format for article data 28 type Article struct { 29 ID int `json:"id,omitempty"` 30 CanonicalURL string `json:"canonical_url"` 31 32 // all known URLs for article (including canonical) 33 // TODO: first url should be considered "preferred" if no canonical? 34 URLs []string `json:"urls"` 35 36 Headline string `json:"headline"` 37 Authors []Author `json:"authors,omitempty"` 38 39 // Content contains HTML, sanitised using a subset of tags 40 Content string `json:"content"` 41 42 // Published contains date of publication. 43 // An ISO8601 string is used instead of time.Time, so that 44 // less-precise representations can be held (eg YYYY-MM) 45 Published string `json:"published,omitempty"` 46 Updated string `json:"updated,omitempty"` 47 Publication Publication `json:"publication,omitempty"` 48 // Keywords contains data from rel-tags, meta keywords etc... 49 Keywords []Keyword `json:"keywords,omitempty"` 50 Section string `json:"section,omitempty"` 51 Tags []string `json:"tags,omitempty"` 52 53 // extra fields from twitcooker 54 Extra struct { 55 RetweetCount int `json:"retweet_count,omitempty"` 56 FavoriteCount int `json:"favorite_count,omitempty"` 57 // resolved links 58 Links []string `json:"links,omitempty"` 59 } `json:"extra,omitempty"` 60 }