github.com/bcampbell/scrapeomat@v0.0.0-20220820232205-23e64141c89e/store/sqlstore/stash.go (about) 1 package sqlstore 2 3 import ( 4 "database/sql" 5 "encoding/json" 6 "errors" 7 "fmt" 8 9 "github.com/bcampbell/scrapeomat/store" 10 ) 11 12 // Stash adds or updates articles in the database. 13 // If the article has an ID, it's assumed to be an update. 14 // If it doesn't, then it's an add. 15 // 16 // Returns a list of article IDs, one per input article. 17 func (ss *SQLStore) Stash(arts ...*store.Article) ([]int, error) { 18 var err error 19 var tx *sql.Tx 20 tx, err = ss.db.Begin() 21 if err != nil { 22 return nil, err 23 } 24 25 defer func() { 26 if err == nil { 27 tx.Commit() 28 } else { 29 tx.Rollback() 30 } 31 }() 32 33 ids := make([]int, 0, len(arts)) 34 for _, art := range arts { 35 var artID int 36 artID, err := ss.stashArticle(tx, art) 37 if err != nil { 38 return nil, err 39 } 40 ids = append(ids, artID) 41 } 42 return ids, nil 43 } 44 45 func (ss *SQLStore) stashArticle(tx *sql.Tx, art *store.Article) (int, error) { 46 pubID, err := ss.findOrCreatePublication(tx, &art.Publication) 47 if err != nil { 48 return 0, err 49 } 50 51 artID := art.ID 52 53 extra := []byte{} 54 if art.Extra != nil { 55 extra, err = json.Marshal(art.Extra) 56 if err != nil { 57 return 0, err 58 } 59 } 60 61 if artID == 0 { 62 // it's a new article 63 artID, err = ss.insertArticle(tx, art, pubID, extra) 64 if err != nil { 65 return 0, err 66 } 67 } else { 68 // updating an existing article 69 70 q := `UPDATE article SET (canonical_url, headline, content, published, updated, publication_id, section,extra,added) = (?,?,?,?,?,?,?,?,` + ss.nowSQL() + `) WHERE id=?` 71 _, err = tx.Exec(ss.rebind(q), 72 art.CanonicalURL, 73 art.Headline, 74 art.Content, 75 ss.cvtTime(art.Published), 76 ss.cvtTime(art.Updated), 77 pubID, 78 art.Section, 79 extra, 80 artID) 81 if err != nil { 82 return 0, err 83 } 84 85 // delete old urls 86 _, err = tx.Exec(ss.rebind(`DELETE FROM article_url WHERE article_id=?`), artID) 87 if err != nil { 88 return 0, err 89 } 90 91 // delete old keywords 92 _, err = tx.Exec(ss.rebind(`DELETE FROM article_keyword WHERE article_id=?`), artID) 93 if err != nil { 94 return 0, err 95 } 96 97 // delete old authors 98 _, err = tx.Exec(ss.rebind(`DELETE FROM author WHERE id IN (SELECT author_id FROM author_attr WHERE article_id=?)`), artID) 99 if err != nil { 100 return 0, err 101 } 102 _, err = tx.Exec(ss.rebind(`DELETE FROM author_attr WHERE article_id=?`), artID) 103 if err != nil { 104 return 0, err 105 } 106 } 107 108 for _, u := range art.URLs { 109 _, err = tx.Exec(ss.rebind(`INSERT INTO article_url(article_id,url) VALUES(?,?)`), artID, u) 110 if err != nil { 111 return 0, fmt.Errorf("failed adding url %s: %s", u, err) 112 } 113 } 114 115 for _, k := range art.Keywords { 116 _, err = tx.Exec(ss.rebind(`INSERT INTO article_keyword(article_id,name,url) VALUES(?,?,?)`), 117 artID, 118 k.Name, 119 k.URL) 120 if err != nil { 121 return 0, fmt.Errorf("failed adding keyword %s (%s): %s", k.Name, k.URL, err) 122 } 123 } 124 125 for _, author := range art.Authors { 126 err := ss.addAuthorToArticle(tx, artID, &author) 127 if err != nil { 128 return 0, err 129 } 130 } 131 132 // all good. 133 return artID, nil 134 } 135 136 // addAuthorToArticle adds new rows to `author` and `author_attr`. 137 func (ss *SQLStore) addAuthorToArticle(tx *sql.Tx, artID int, author *store.Author) error { 138 var authorID int 139 switch ss.insertIDType() { 140 case RESULT: 141 // sqlite3 142 result, err := tx.Exec(ss.rebind(`INSERT INTO author(name,rel_link,email,twitter) VALUES (?,?,?,?)`), 143 author.Name, 144 author.RelLink, 145 author.Email, 146 author.Twitter) 147 if err != nil { 148 return err 149 } 150 tmpID, err := result.LastInsertId() 151 if err != nil { 152 return err 153 } 154 authorID = int(tmpID) 155 case RETURNING: 156 // postgres 157 err := tx.QueryRow(ss.rebind(`INSERT INTO author(name,rel_link,email,twitter) VALUES (?,?,?,?) RETURNING id`), 158 author.Name, 159 author.RelLink, 160 author.Email, 161 author.Twitter).Scan(&authorID) 162 if err != nil { 163 return err 164 } 165 } 166 167 // link author to the article 168 _, err := tx.Exec(ss.rebind(`INSERT INTO author_attr(author_id,article_id) VALUES (?,?)`), 169 authorID, 170 artID) 171 if err != nil { 172 return err 173 } 174 return nil 175 } 176 177 func (ss *SQLStore) insertArticle(tx *sql.Tx, art *store.Article, pubID int, extra []byte) (int, error) { 178 switch ss.insertIDType() { 179 case RESULT: 180 { 181 q := `INSERT INTO article(canonical_url, headline, content, published, updated, publication_id, section,extra) VALUES(?,?,?,?,?,?,?,?)` 182 result, err := tx.Exec(ss.rebind(q), 183 art.CanonicalURL, 184 art.Headline, 185 art.Content, 186 ss.cvtTime(art.Published), 187 ss.cvtTime(art.Updated), 188 pubID, 189 art.Section, 190 extra) 191 if err != nil { 192 return 0, err 193 } 194 tmpID, err := result.LastInsertId() 195 if err != nil { 196 return 0, err 197 } 198 return int(tmpID), nil 199 } 200 case RETURNING: 201 { 202 var lastID int 203 q := `INSERT INTO article(canonical_url, headline, content, published, updated, publication_id, section,extra) VALUES(?,?,?,?,?,?,?,?) RETURNING id` 204 err := tx.QueryRow(ss.rebind(q), 205 art.CanonicalURL, 206 art.Headline, 207 art.Content, 208 ss.cvtTime(art.Published), 209 ss.cvtTime(art.Updated), 210 pubID, 211 art.Section, 212 extra).Scan(&lastID) 213 if err != nil { 214 return 0, err 215 } 216 return lastID, nil 217 } 218 default: 219 return 0, errors.New("unsupported db driver") 220 } 221 } 222 223 func (ss *SQLStore) findOrCreatePublication(tx *sql.Tx, pub *store.Publication) (int, error) { 224 pubID, err := ss.findPublication(tx, pub) 225 if err != nil { 226 return 0, err 227 } 228 if pubID != 0 { 229 return pubID, nil 230 } 231 return ss.insertPublication(tx, pub) 232 } 233 234 // returns 0 if no match 235 func (ss *SQLStore) findPublication(tx *sql.Tx, pub *store.Publication) (int, error) { 236 var pubID int 237 var err error 238 239 if pub.Code != "" { 240 241 err = tx.QueryRow(ss.rebind(`SELECT id FROM publication WHERE code=?`), pub.Code).Scan(&pubID) 242 if err == nil { 243 return pubID, nil // return existing publication 244 } 245 if err != sql.ErrNoRows { 246 return 0, err 247 } 248 } 249 250 if pub.Name != "" { 251 252 err = tx.QueryRow(ss.rebind(`SELECT id FROM publication WHERE name=?`), pub.Name).Scan(&pubID) 253 if err == nil { 254 return pubID, nil // return existing publication 255 } 256 if err != sql.ErrNoRows { 257 return 0, err 258 } 259 } 260 261 // TODO: publications can have multiple domains... 262 if pub.Domain != "" { 263 err = tx.QueryRow(ss.rebind(`SELECT id FROM publication WHERE domain=?`), pub.Domain).Scan(&pubID) 264 if err == nil { 265 return pubID, nil // return existing publication 266 } 267 if err != sql.ErrNoRows { 268 return 0, err 269 } 270 } 271 272 return 0, nil // no match 273 } 274 275 func (ss *SQLStore) insertPublication(tx *sql.Tx, pub *store.Publication) (int, error) { 276 switch ss.insertIDType() { 277 case RESULT: // sqlite, mysql... 278 { 279 result, err := tx.Exec(ss.rebind(`INSERT INTO publication(code,name,domain) VALUES(?,?,?)`), 280 pub.Code, 281 pub.Name, 282 pub.Domain) 283 if err != nil { 284 return 0, err 285 } 286 pubID, err := result.LastInsertId() 287 if err != nil { 288 return 0, err 289 } 290 return int(pubID), nil 291 } 292 case RETURNING: // postgresql 293 { 294 var lastID int 295 err := tx.QueryRow(ss.rebind(`INSERT INTO publication(code,name,domain) VALUES(?,?,?) RETURNING id`), 296 pub.Code, 297 pub.Name, 298 pub.Domain).Scan(&lastID) 299 if err != nil { 300 return 0, err 301 } 302 return lastID, nil 303 } 304 default: 305 return 0, errors.New("unsupported db driver") 306 } 307 308 }