github.com/bcampbell/scrapeomat@v0.0.0-20220820232205-23e64141c89e/store/article.go (about) 1 package store 2 3 import ( 4 // TODO: KILLKILLKILL 5 "github.com/bcampbell/arts/arts" 6 "strings" 7 ) 8 9 type Author struct { 10 Name string `json:"name"` 11 RelLink string `json:"rel_link,omitempty"` 12 Email string `json:"email,omitempty"` 13 Twitter string `json:"twitter,omitempty"` 14 } 15 16 type Keyword struct { 17 Name string `json:"name"` 18 URL string `json:"url,omitempty"` 19 } 20 21 type Publication struct { 22 Code string `json:"code"` // short unique code for publication 23 Name string `json:"name,omitempty"` 24 Domain string `json:"domain,omitempty"` 25 26 // TODO: add publication versions of rel-author 27 // eg "article:publisher", rel-publisher 28 } 29 30 type TweetExtra struct { 31 RetweetCount int `json:"retweet_count,omitempty"` 32 FavoriteCount int `json:"favorite_count,omitempty"` 33 // resolved links 34 Links []string `json:"links,omitempty"` 35 } 36 37 type Article struct { 38 ID int `json:"id,omitempty"` 39 CanonicalURL string `json:"canonical_url,omitempty"` 40 // all known URLs for article (including canonical) 41 // TODO: first url should be considered "preferred" if no canonical? 42 URLs []string `json:"urls,omitempty"` 43 Headline string `json:"headline,omitempty"` 44 Authors []Author `json:"authors,omitempty"` 45 Content string `json:"content,omitempty"` 46 // Published contains date of publication. 47 // An ISO8601 string is used instead of time.Time, so that 48 // less-precise representations can be held (eg YYYY-MM) 49 // If no timezone is given, assume UTC. 50 Published string `json:"published,omitempty"` 51 Updated string `json:"updated,omitempty"` 52 Publication Publication `json:"publication,omitempty"` 53 Keywords []Keyword `json:"keywords,omitempty"` 54 Section string `json:"section,omitempty"` 55 // space for extra, free-form data 56 // Extra interface{} `json:"extra,omitempty"` 57 // Ha! not free-form any more! (bugfix for annoying int/float json issue) 58 Extra *TweetExtra `json:"extra,omitempty"` 59 } 60 61 // copy an arts.Article into our struct 62 func ConvertArticle(src *arts.Article) *Article { 63 art := &Article{ 64 CanonicalURL: src.CanonicalURL, 65 URLs: make([]string, len(src.URLs)), 66 Headline: src.Headline, 67 Authors: make([]Author, len(src.Authors)), 68 Content: src.Content, 69 Published: src.Published, 70 Updated: src.Updated, 71 Publication: Publication{ 72 Name: src.Publication.Name, 73 Domain: src.Publication.Domain, 74 }, 75 Keywords: make([]Keyword, len(src.Keywords)), 76 Section: src.Section, 77 } 78 79 for i, u := range src.URLs { 80 art.URLs[i] = u 81 } 82 for i, a := range src.Authors { 83 art.Authors[i] = Author{Name: a.Name, RelLink: a.RelLink, Email: a.Email, Twitter: a.Twitter} 84 } 85 for i, kw := range src.Keywords { 86 art.Keywords[i] = Keyword{Name: kw.Name, URL: kw.URL} 87 } 88 89 // sort out a decent pubcode 90 if art.Publication.Code == "" { 91 code := strings.ToLower(strings.Join(strings.Fields(art.Publication.Name), "")) 92 if code != "" { 93 art.Publication.Code = code 94 } else { 95 art.Publication.Code = art.Publication.Domain 96 } 97 } 98 99 return art 100 }