github.com/bcampbell/scrapeomat@v0.0.0-20220820232205-23e64141c89e/cmd/wpjsontool/wp.go (about)

     1  package main
     2  
     3  // in theory this could be broken out into a standalone
     4  // wp-json client, but really should just replace it with an existing one.
     5  
     6  import (
     7  	"encoding/json"
     8  	"fmt"
     9  	"io/ioutil"
    10  	"net/http"
    11  	"net/url"
    12  	"os"
    13  	"strconv"
    14  )
    15  
    16  // Client holds everything we need to perform WP api queries.
    17  type Client struct {
    18  	HTTPClient *http.Client
    19  	BaseURL    string // eg "https://example.com/wp-json"
    20  	Verbose    bool
    21  	CacheDir   string
    22  }
    23  
    24  func (wp *Client) Get(u string) (*http.Response, error) {
    25  	return HTTPGetWithCache(wp.HTTPClient, u, wp.CacheDir)
    26  }
    27  
    28  //
    29  type Tag struct {
    30  	ID          int    `json:"id"`
    31  	Count       int    `json:"count"`
    32  	Description string `json:"description"`
    33  	Link        string `json:"link"`
    34  	Name        string `json:"name"`
    35  	Slug        string `json:"slug"`
    36  	Taxonomy    string `json:"taxonomy"`
    37  }
    38  
    39  type Category struct {
    40  	ID          int    `json:"id"`
    41  	Count       int    `json:"count"`
    42  	Description string `json:"description"`
    43  	Link        string `json:"link"`
    44  	Name        string `json:"name"`
    45  	Slug        string `json:"slug"`
    46  	Taxonomy    string `json:"taxonomy"`
    47  	Parent      int    `json:"parent"`
    48  }
    49  
    50  // Post data returned from wp/posts endpoint
    51  type Post struct {
    52  	Link  string `json:"link"`
    53  	Title struct {
    54  		Rendered string `json:"rendered"`
    55  	} `json:"title"`
    56  	Date     string `json:"date"`
    57  	Modified string `json:"modified"`
    58  	Content  struct {
    59  		Rendered string `json:"rendered"`
    60  	} `json:"content"`
    61  	Tags       []int `json:"tags"`
    62  	Categories []int `json:"categories"`
    63  }
    64  
    65  // fetch a list of posts ("/wp/v2/posts)
    66  // returns expected number of posts, posts, error
    67  func (wp *Client) ListPosts(params url.Values) ([]*Post, int, error) {
    68  	u := wp.BaseURL + "/wp/v2/posts?" + params.Encode()
    69  
    70  	if wp.Verbose {
    71  		fmt.Fprintf(os.Stderr, "fetch %s\n", u)
    72  	}
    73  
    74  	resp, err := wp.Get(u)
    75  	if err != nil {
    76  		return nil, 0, err
    77  	}
    78  
    79  	// totalpages is returned as a header
    80  	expectedTotal, err := strconv.Atoi(resp.Header.Get("X-WP-Total"))
    81  	if err != nil {
    82  		return nil, 0, err
    83  	}
    84  	raw, err := ioutil.ReadAll(resp.Body)
    85  	resp.Body.Close()
    86  	if err != nil {
    87  		return nil, 0, err
    88  	}
    89  	if resp.StatusCode != 200 {
    90  		return nil, 0, fmt.Errorf("%s: %d\n", u, resp.StatusCode)
    91  	}
    92  
    93  	posts := []*Post{}
    94  
    95  	err = json.Unmarshal(raw, &posts)
    96  	if err != nil {
    97  		return nil, 0, err
    98  	}
    99  
   100  	return posts, expectedTotal, nil
   101  }
   102  
   103  // ListPostsAll repeatedly calls ListPosts until the whole set has been
   104  // retrieved. The pagination-related params will overriden, but all others
   105  // will be passed on verbatim with each request.
   106  // The postSink callback will be invoked as each batch of posts is reeceived.
   107  func (wp *Client) ListPostsAll(params url.Values, postSink func([]*Post, int) error) error {
   108  	perPage := 100
   109  	numReceived := 0
   110  
   111  	for {
   112  		// We override pagination-related params
   113  		params.Set("per_page", strconv.Itoa(perPage))
   114  		params.Set("offset", strconv.Itoa(numReceived))
   115  		params.Del("page")
   116  
   117  		batch, expectedTotal, err := wp.ListPosts(params)
   118  		if err != nil {
   119  			return err
   120  		}
   121  
   122  		err = postSink(batch, expectedTotal)
   123  		if err != nil {
   124  			return err
   125  		}
   126  		numReceived += len(batch)
   127  
   128  		fmt.Fprintf(os.Stderr, "received %d/%d\n", numReceived, expectedTotal)
   129  		if len(batch) == 0 || numReceived >= expectedTotal {
   130  			break
   131  		}
   132  	}
   133  	return nil
   134  }
   135  
   136  // GET /wp/v2/tags
   137  // params we're interested in:
   138  func (wp *Client) ListTags(params url.Values) ([]*Tag, int, error) {
   139  	u := wp.BaseURL + "/wp/v2/tags?" + params.Encode()
   140  
   141  	if wp.Verbose {
   142  		fmt.Fprintf(os.Stderr, "fetch %s\n", u)
   143  	}
   144  
   145  	resp, err := wp.Get(u)
   146  	if err != nil {
   147  		return nil, 0, err
   148  	}
   149  
   150  	// totalpages is returned as a header
   151  	expectedTotal, err := strconv.Atoi(resp.Header.Get("X-WP-Total"))
   152  	if err != nil {
   153  		return nil, 0, err
   154  	}
   155  
   156  	raw, err := ioutil.ReadAll(resp.Body)
   157  	resp.Body.Close()
   158  	if err != nil {
   159  		return nil, 0, err
   160  	}
   161  	if resp.StatusCode != 200 {
   162  		return nil, 0, fmt.Errorf("%s: %d\n", u, resp.StatusCode)
   163  	}
   164  
   165  	tags := []*Tag{}
   166  
   167  	err = json.Unmarshal(raw, &tags)
   168  	if err != nil {
   169  		return nil, 0, err
   170  	}
   171  
   172  	return tags, expectedTotal, nil
   173  }
   174  
   175  // ListTagsAll repeatedly calls ListTags until the whole set has been
   176  // retrieved. The pagination-related params will overriden, but all others
   177  // will be passed on verbatim with each request.
   178  func (wp *Client) ListTagsAll(params url.Values) ([]*Tag, error) {
   179  	perPage := 100
   180  
   181  	tags := []*Tag{}
   182  
   183  	for {
   184  		// We override pagination-related params
   185  		params.Set("per_page", strconv.Itoa(perPage))
   186  		params.Set("offset", strconv.Itoa(len(tags)))
   187  		params.Del("page")
   188  
   189  		batch, expectedTotal, err := wp.ListTags(params)
   190  
   191  		if err != nil {
   192  			return nil, err
   193  		}
   194  
   195  		tags = append(tags, batch...)
   196  		if wp.Verbose {
   197  			fmt.Fprintf(os.Stderr, " %d/%d\n", len(tags), expectedTotal)
   198  		}
   199  		if len(batch) == 0 || len(tags) >= expectedTotal {
   200  			break
   201  		}
   202  	}
   203  	return tags, nil
   204  }
   205  
   206  // GET /wp/v2/categories
   207  // params we're interested in:
   208  func (wp *Client) ListCategories(params url.Values) ([]*Category, int, error) {
   209  	u := wp.BaseURL + "/wp/v2/categories?" + params.Encode()
   210  
   211  	if wp.Verbose {
   212  		fmt.Fprintf(os.Stderr, "fetch %s\n", u)
   213  	}
   214  
   215  	resp, err := wp.Get(u)
   216  	if err != nil {
   217  		return nil, 0, err
   218  	}
   219  
   220  	// totalpages is returned as a header
   221  	expectedTotal, err := strconv.Atoi(resp.Header.Get("X-WP-Total"))
   222  	if err != nil {
   223  		return nil, 0, err
   224  	}
   225  
   226  	raw, err := ioutil.ReadAll(resp.Body)
   227  	resp.Body.Close()
   228  	if err != nil {
   229  		return nil, 0, err
   230  	}
   231  	if resp.StatusCode != 200 {
   232  		return nil, 0, fmt.Errorf("%s: %d\n", u, resp.StatusCode)
   233  	}
   234  
   235  	categories := []*Category{}
   236  
   237  	err = json.Unmarshal(raw, &categories)
   238  	if err != nil {
   239  		return nil, 0, err
   240  	}
   241  
   242  	return categories, expectedTotal, nil
   243  }
   244  
   245  // ListCategoriesAll repeatedly calls ListCategories until the whole set has been
   246  // retrieved. The pagination-related params will overriden, but all others
   247  // will be passed on verbatim with each request.
   248  func (wp *Client) ListCategoriesAll(params url.Values) ([]*Category, error) {
   249  	perPage := 100
   250  
   251  	categories := []*Category{}
   252  
   253  	for {
   254  		// We override pagination-related params
   255  		params.Set("per_page", strconv.Itoa(perPage))
   256  		params.Set("offset", strconv.Itoa(len(categories)))
   257  		params.Del("page")
   258  
   259  		batch, expectedTotal, err := wp.ListCategories(params)
   260  
   261  		if err != nil {
   262  			return nil, err
   263  		}
   264  
   265  		categories = append(categories, batch...)
   266  		if wp.Verbose {
   267  			fmt.Fprintf(os.Stderr, " %d/%d\n", len(categories), expectedTotal)
   268  		}
   269  		if len(batch) == 0 || len(categories) >= expectedTotal {
   270  			break
   271  		}
   272  	}
   273  	return categories, nil
   274  }