github.com/bcampbell/scrapeomat@v0.0.0-20220820232205-23e64141c89e/slurp/summary.go (about)

     1  package slurp
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"net/http"
     7  	//	"net/url"
     8  	"sort"
     9  	"time"
    10  )
    11  
    12  // map of maps pubcodes -> days -> counts
    13  type RawSummary map[string]map[string]int
    14  
    15  // returns a map of maps pubcodes -> days -> counts
    16  func (s *Slurper) Summary(filt *Filter) (RawSummary, error) {
    17  
    18  	if filt.Count != 0 {
    19  		return nil, fmt.Errorf("Count must be zero")
    20  	}
    21  
    22  	params := filt.params()
    23  
    24  	client := s.Client
    25  	if client == nil {
    26  		client = &http.Client{}
    27  	}
    28  
    29  	u := s.Location + "/api/summary?" + params.Encode()
    30  
    31  	// fmt.Printf("request: %s\n", u)
    32  	resp, err := client.Get(u)
    33  	if err != nil {
    34  		return nil, err
    35  	}
    36  	defer resp.Body.Close()
    37  
    38  	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
    39  		err = fmt.Errorf("HTTP error: %s", resp.Status)
    40  		return nil, err
    41  	}
    42  
    43  	dec := json.NewDecoder(resp.Body)
    44  
    45  	var raw struct {
    46  		Counts RawSummary `json:"counts"`
    47  	}
    48  
    49  	err = dec.Decode(&raw)
    50  	if err != nil {
    51  		return nil, err
    52  	}
    53  
    54  	return raw.Counts, nil
    55  }
    56  
    57  type CookedSummary struct {
    58  	PubCodes []string
    59  	Days     []string
    60  	// An array of array of counts
    61  	// access as: Data[pubcodeindex][dayindex]
    62  	Data [][]int
    63  	Max  int
    64  }
    65  
    66  // return a range of days (sorted, inclusive)
    67  func dayRange(dayFrom string, dayTo string) []string {
    68  	days := []string{}
    69  
    70  	tFrom, err := time.Parse("2006-01-02", dayFrom)
    71  	if err != nil {
    72  		return days
    73  	}
    74  
    75  	tTo, err := time.Parse("2006-01-02", dayTo)
    76  	if err != nil {
    77  		return days
    78  	}
    79  
    80  	for day := tFrom; !day.After(tTo); day = day.AddDate(0, 0, 1) {
    81  		days = append(days, day.Format("2006-01-02"))
    82  	}
    83  	return days
    84  }
    85  
    86  func dayExtents(raw RawSummary) (time.Time, time.Time) {
    87  	maxDay := time.Time{}
    88  	minDay := time.Date(999999, 0, 0, 0, 0, 0, 0, time.UTC)
    89  	for _, days := range raw {
    90  		for day, _ := range days {
    91  			if day == "" {
    92  				continue
    93  			}
    94  			t, err := time.Parse("2006-01-02", day)
    95  			if err != nil {
    96  				continue
    97  			}
    98  
    99  			if t.Before(minDay) {
   100  				minDay = t
   101  			}
   102  			if t.After(maxDay) {
   103  				maxDay = t
   104  			}
   105  		}
   106  	}
   107  	return minDay, maxDay
   108  }
   109  
   110  // cooks raw article counts, filling in missing days
   111  func CookSummary(raw RawSummary) *CookedSummary {
   112  
   113  	// get date extents
   114  	minDay, maxDay := dayExtents(raw)
   115  
   116  	// create continuous day range, no gaps
   117  	days := []string{} //dayRange(dayFrom, dayTo)
   118  	for day := minDay; !day.After(maxDay); day = day.AddDate(0, 0, 1) {
   119  		days = append(days, day.Format("2006-01-02"))
   120  	}
   121  
   122  	//
   123  	pubCodes := make([]string, 0, len(raw))
   124  	for pubCode, _ := range raw {
   125  		pubCodes = append(pubCodes, pubCode)
   126  	}
   127  	sort.Strings(pubCodes)
   128  
   129  	cooked := CookedSummary{
   130  		Days:     days,
   131  		PubCodes: pubCodes,
   132  		Data:     make([][]int, len(pubCodes)),
   133  		Max:      0,
   134  	}
   135  
   136  	maxCnt := 0
   137  	for i, pubCode := range pubCodes {
   138  		counts := make([]int, len(days))
   139  		for j, day := range days {
   140  			cnt := raw[pubCode][day]
   141  			if cnt > maxCnt {
   142  				maxCnt = cnt
   143  			}
   144  			counts[j] = cnt
   145  		}
   146  		cooked.Data[i] = counts
   147  	}
   148  	cooked.Max = maxCnt
   149  
   150  	return &cooked
   151  }