github.com/bcampbell/scrapeomat@v0.0.0-20220820232205-23e64141c89e/cmd/wpjsontool/wp.go (about) 1 package main 2 3 // in theory this could be broken out into a standalone 4 // wp-json client, but really should just replace it with an existing one. 5 6 import ( 7 "encoding/json" 8 "fmt" 9 "io/ioutil" 10 "net/http" 11 "net/url" 12 "os" 13 "strconv" 14 ) 15 16 // Client holds everything we need to perform WP api queries. 17 type Client struct { 18 HTTPClient *http.Client 19 BaseURL string // eg "https://example.com/wp-json" 20 Verbose bool 21 CacheDir string 22 } 23 24 func (wp *Client) Get(u string) (*http.Response, error) { 25 return HTTPGetWithCache(wp.HTTPClient, u, wp.CacheDir) 26 } 27 28 // 29 type Tag struct { 30 ID int `json:"id"` 31 Count int `json:"count"` 32 Description string `json:"description"` 33 Link string `json:"link"` 34 Name string `json:"name"` 35 Slug string `json:"slug"` 36 Taxonomy string `json:"taxonomy"` 37 } 38 39 type Category struct { 40 ID int `json:"id"` 41 Count int `json:"count"` 42 Description string `json:"description"` 43 Link string `json:"link"` 44 Name string `json:"name"` 45 Slug string `json:"slug"` 46 Taxonomy string `json:"taxonomy"` 47 Parent int `json:"parent"` 48 } 49 50 // Post data returned from wp/posts endpoint 51 type Post struct { 52 Link string `json:"link"` 53 Title struct { 54 Rendered string `json:"rendered"` 55 } `json:"title"` 56 Date string `json:"date"` 57 Modified string `json:"modified"` 58 Content struct { 59 Rendered string `json:"rendered"` 60 } `json:"content"` 61 Tags []int `json:"tags"` 62 Categories []int `json:"categories"` 63 } 64 65 // fetch a list of posts ("/wp/v2/posts) 66 // returns expected number of posts, posts, error 67 func (wp *Client) ListPosts(params url.Values) ([]*Post, int, error) { 68 u := wp.BaseURL + "/wp/v2/posts?" + params.Encode() 69 70 if wp.Verbose { 71 fmt.Fprintf(os.Stderr, "fetch %s\n", u) 72 } 73 74 resp, err := wp.Get(u) 75 if err != nil { 76 return nil, 0, err 77 } 78 79 // totalpages is returned as a header 80 expectedTotal, err := strconv.Atoi(resp.Header.Get("X-WP-Total")) 81 if err != nil { 82 return nil, 0, err 83 } 84 raw, err := ioutil.ReadAll(resp.Body) 85 resp.Body.Close() 86 if err != nil { 87 return nil, 0, err 88 } 89 if resp.StatusCode != 200 { 90 return nil, 0, fmt.Errorf("%s: %d\n", u, resp.StatusCode) 91 } 92 93 posts := []*Post{} 94 95 err = json.Unmarshal(raw, &posts) 96 if err != nil { 97 return nil, 0, err 98 } 99 100 return posts, expectedTotal, nil 101 } 102 103 // ListPostsAll repeatedly calls ListPosts until the whole set has been 104 // retrieved. The pagination-related params will overriden, but all others 105 // will be passed on verbatim with each request. 106 // The postSink callback will be invoked as each batch of posts is reeceived. 107 func (wp *Client) ListPostsAll(params url.Values, postSink func([]*Post, int) error) error { 108 perPage := 100 109 numReceived := 0 110 111 for { 112 // We override pagination-related params 113 params.Set("per_page", strconv.Itoa(perPage)) 114 params.Set("offset", strconv.Itoa(numReceived)) 115 params.Del("page") 116 117 batch, expectedTotal, err := wp.ListPosts(params) 118 if err != nil { 119 return err 120 } 121 122 err = postSink(batch, expectedTotal) 123 if err != nil { 124 return err 125 } 126 numReceived += len(batch) 127 128 fmt.Fprintf(os.Stderr, "received %d/%d\n", numReceived, expectedTotal) 129 if len(batch) == 0 || numReceived >= expectedTotal { 130 break 131 } 132 } 133 return nil 134 } 135 136 // GET /wp/v2/tags 137 // params we're interested in: 138 func (wp *Client) ListTags(params url.Values) ([]*Tag, int, error) { 139 u := wp.BaseURL + "/wp/v2/tags?" + params.Encode() 140 141 if wp.Verbose { 142 fmt.Fprintf(os.Stderr, "fetch %s\n", u) 143 } 144 145 resp, err := wp.Get(u) 146 if err != nil { 147 return nil, 0, err 148 } 149 150 // totalpages is returned as a header 151 expectedTotal, err := strconv.Atoi(resp.Header.Get("X-WP-Total")) 152 if err != nil { 153 return nil, 0, err 154 } 155 156 raw, err := ioutil.ReadAll(resp.Body) 157 resp.Body.Close() 158 if err != nil { 159 return nil, 0, err 160 } 161 if resp.StatusCode != 200 { 162 return nil, 0, fmt.Errorf("%s: %d\n", u, resp.StatusCode) 163 } 164 165 tags := []*Tag{} 166 167 err = json.Unmarshal(raw, &tags) 168 if err != nil { 169 return nil, 0, err 170 } 171 172 return tags, expectedTotal, nil 173 } 174 175 // ListTagsAll repeatedly calls ListTags until the whole set has been 176 // retrieved. The pagination-related params will overriden, but all others 177 // will be passed on verbatim with each request. 178 func (wp *Client) ListTagsAll(params url.Values) ([]*Tag, error) { 179 perPage := 100 180 181 tags := []*Tag{} 182 183 for { 184 // We override pagination-related params 185 params.Set("per_page", strconv.Itoa(perPage)) 186 params.Set("offset", strconv.Itoa(len(tags))) 187 params.Del("page") 188 189 batch, expectedTotal, err := wp.ListTags(params) 190 191 if err != nil { 192 return nil, err 193 } 194 195 tags = append(tags, batch...) 196 if wp.Verbose { 197 fmt.Fprintf(os.Stderr, " %d/%d\n", len(tags), expectedTotal) 198 } 199 if len(batch) == 0 || len(tags) >= expectedTotal { 200 break 201 } 202 } 203 return tags, nil 204 } 205 206 // GET /wp/v2/categories 207 // params we're interested in: 208 func (wp *Client) ListCategories(params url.Values) ([]*Category, int, error) { 209 u := wp.BaseURL + "/wp/v2/categories?" + params.Encode() 210 211 if wp.Verbose { 212 fmt.Fprintf(os.Stderr, "fetch %s\n", u) 213 } 214 215 resp, err := wp.Get(u) 216 if err != nil { 217 return nil, 0, err 218 } 219 220 // totalpages is returned as a header 221 expectedTotal, err := strconv.Atoi(resp.Header.Get("X-WP-Total")) 222 if err != nil { 223 return nil, 0, err 224 } 225 226 raw, err := ioutil.ReadAll(resp.Body) 227 resp.Body.Close() 228 if err != nil { 229 return nil, 0, err 230 } 231 if resp.StatusCode != 200 { 232 return nil, 0, fmt.Errorf("%s: %d\n", u, resp.StatusCode) 233 } 234 235 categories := []*Category{} 236 237 err = json.Unmarshal(raw, &categories) 238 if err != nil { 239 return nil, 0, err 240 } 241 242 return categories, expectedTotal, nil 243 } 244 245 // ListCategoriesAll repeatedly calls ListCategories until the whole set has been 246 // retrieved. The pagination-related params will overriden, but all others 247 // will be passed on verbatim with each request. 248 func (wp *Client) ListCategoriesAll(params url.Values) ([]*Category, error) { 249 perPage := 100 250 251 categories := []*Category{} 252 253 for { 254 // We override pagination-related params 255 params.Set("per_page", strconv.Itoa(perPage)) 256 params.Set("offset", strconv.Itoa(len(categories))) 257 params.Del("page") 258 259 batch, expectedTotal, err := wp.ListCategories(params) 260 261 if err != nil { 262 return nil, err 263 } 264 265 categories = append(categories, batch...) 266 if wp.Verbose { 267 fmt.Fprintf(os.Stderr, " %d/%d\n", len(categories), expectedTotal) 268 } 269 if len(batch) == 0 || len(categories) >= expectedTotal { 270 break 271 } 272 } 273 return categories, nil 274 }