github.com/bosssauce/ponzu@v0.11.1-0.20200102001432-9bc41b703131/system/api/analytics/init.go (about)

     1  // Package analytics provides the methods to run an analytics reporting system
     2  // for API requests which may be useful to users for measuring access and
     3  // possibly identifying bad actors abusing requests.
     4  package analytics
     5  
     6  import (
     7  	"encoding/json"
     8  	"log"
     9  	"net/http"
    10  	"path/filepath"
    11  	"runtime"
    12  	"strings"
    13  	"time"
    14  
    15  	"github.com/boltdb/bolt"
    16  	"github.com/ponzu-cms/ponzu/system/cfg"
    17  )
    18  
    19  type apiRequest struct {
    20  	URL        string `json:"url"`
    21  	Method     string `json:"http_method"`
    22  	Origin     string `json:"origin"`
    23  	Proto      string `json:"http_protocol"`
    24  	RemoteAddr string `json:"ip_address"`
    25  	Timestamp  int64  `json:"timestamp"`
    26  	External   bool   `json:"external_content"`
    27  }
    28  
    29  type apiMetric struct {
    30  	Date   string `json:"date"`
    31  	Total  int    `json:"total"`
    32  	Unique int    `json:"unique"`
    33  }
    34  
    35  var (
    36  	store       *bolt.DB
    37  	requestChan chan apiRequest
    38  )
    39  
    40  // RANGE determines the number of days ponzu request analytics and metrics are
    41  // stored and displayed within the system
    42  const RANGE = 14
    43  
    44  // Record queues an apiRequest for metrics
    45  func Record(req *http.Request) {
    46  	external := strings.Contains(req.URL.Path, "/external/")
    47  
    48  	ts := int64(time.Nanosecond) * time.Now().UnixNano() / int64(time.Millisecond)
    49  
    50  	r := apiRequest{
    51  		URL:        req.URL.String(),
    52  		Method:     req.Method,
    53  		Origin:     req.Header.Get("Origin"),
    54  		Proto:      req.Proto,
    55  		RemoteAddr: req.RemoteAddr,
    56  		Timestamp:  ts,
    57  		External:   external,
    58  	}
    59  
    60  	// put r on buffered requestChan to take advantage of batch insertion in DB
    61  	requestChan <- r
    62  }
    63  
    64  // Close exports the abillity to close our db file. Should be called with defer
    65  // after call to Init() from the same place.
    66  func Close() {
    67  	err := store.Close()
    68  	if err != nil {
    69  		log.Println(err)
    70  	}
    71  }
    72  
    73  // Init creates a db connection, initializes the db with schema and data and
    74  // sets up the queue/batching channel
    75  func Init() {
    76  	var err error
    77  	analyticsDb := filepath.Join(cfg.DataDir(),"analytics.db")
    78  	store, err = bolt.Open(analyticsDb, 0666, nil)
    79  	if err != nil {
    80  		log.Fatalln(err)
    81  	}
    82  
    83  	err = store.Update(func(tx *bolt.Tx) error {
    84  		_, err := tx.CreateBucketIfNotExists([]byte("__requests"))
    85  		if err != nil {
    86  			return err
    87  		}
    88  
    89  		_, err = tx.CreateBucketIfNotExists([]byte("__metrics"))
    90  		if err != nil {
    91  			return err
    92  		}
    93  
    94  		return nil
    95  	})
    96  	if err != nil {
    97  		log.Fatalln("Error idempotently creating requests bucket in analytics.db:", err)
    98  	}
    99  
   100  	requestChan = make(chan apiRequest, 1024*64*runtime.NumCPU())
   101  
   102  	go serve()
   103  
   104  	if err != nil {
   105  		log.Fatalln(err)
   106  	}
   107  }
   108  
   109  func serve() {
   110  	// make timer to notify select to batch request insert from requestChan
   111  	// interval: 30 seconds
   112  	apiRequestTimer := time.NewTicker(time.Second * 30)
   113  
   114  	// make timer to notify select to remove analytics older than RANGE days
   115  	// interval: RANGE/2 days
   116  	// TODO: enable analytics backup service to cloud
   117  	pruneThreshold := time.Hour * 24 * RANGE
   118  	pruneDBTimer := time.NewTicker(pruneThreshold / 2)
   119  
   120  	for {
   121  		select {
   122  		case <-apiRequestTimer.C:
   123  			err := batchInsert(requestChan)
   124  			if err != nil {
   125  				log.Println(err)
   126  			}
   127  
   128  		case <-pruneDBTimer.C:
   129  			err := batchPrune(pruneThreshold)
   130  			if err != nil {
   131  				log.Println(err)
   132  			}
   133  
   134  		case <-time.After(time.Second * 30):
   135  			continue
   136  		}
   137  	}
   138  }
   139  
   140  // ChartData returns the map containing decoded javascript needed to chart RANGE
   141  // days of data by day
   142  func ChartData() (map[string]interface{}, error) {
   143  	// set thresholds for today and the RANGE-1 days preceding
   144  	times := [RANGE]time.Time{}
   145  	dates := [RANGE]string{}
   146  	now := time.Now()
   147  	today := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, time.UTC)
   148  
   149  	ips := [RANGE]map[string]struct{}{}
   150  	for i := range ips {
   151  		ips[i] = make(map[string]struct{})
   152  	}
   153  
   154  	total := [RANGE]int{}
   155  	unique := [RANGE]int{}
   156  
   157  	for i := range times {
   158  		// subtract 24 * i hours to make days prior
   159  		dur := time.Duration(24 * i * -1)
   160  		day := today.Add(time.Hour * dur)
   161  
   162  		// day threshold is [...n-1-i, n-1, n]
   163  		times[len(times)-1-i] = day
   164  		dates[len(times)-1-i] = day.Format("01/02")
   165  	}
   166  
   167  	// get api request analytics and metrics from db
   168  	var requests = []apiRequest{}
   169  	currentMetrics := make(map[string]apiMetric)
   170  
   171  	err := store.Update(func(tx *bolt.Tx) error {
   172  		m := tx.Bucket([]byte("__metrics"))
   173  		b := tx.Bucket([]byte("__requests"))
   174  
   175  		err := m.ForEach(func(k, v []byte) error {
   176  			var metric apiMetric
   177  			err := json.Unmarshal(v, &metric)
   178  			if err != nil {
   179  				log.Println("Error decoding api metric json from analytics db:", err)
   180  				return nil
   181  			}
   182  
   183  			// add metric to currentMetrics map
   184  			currentMetrics[metric.Date] = metric
   185  
   186  			return nil
   187  		})
   188  		if err != nil {
   189  			return err
   190  		}
   191  
   192  		err = b.ForEach(func(k, v []byte) error {
   193  			var r apiRequest
   194  			err := json.Unmarshal(v, &r)
   195  			if err != nil {
   196  				log.Println("Error decoding api request json from analytics db:", err)
   197  				return nil
   198  			}
   199  
   200  			// append request to requests for analysis if its timestamp is today
   201  			// or if its day is not already in cache, otherwise delete it
   202  			d := time.Unix(r.Timestamp/1000, 0)
   203  			_, inCache := currentMetrics[d.Format("01/02")]
   204  			if !d.Before(today) || !inCache {
   205  				requests = append(requests, r)
   206  			} else {
   207  				err := b.Delete(k)
   208  				if err != nil {
   209  					return err
   210  				}
   211  			}
   212  
   213  			return nil
   214  		})
   215  		if err != nil {
   216  			return err
   217  		}
   218  
   219  		return nil
   220  	})
   221  	if err != nil {
   222  		return nil, err
   223  	}
   224  
   225  CHECK_REQUEST:
   226  	for i := range requests {
   227  		ts := time.Unix(requests[i].Timestamp/1000, 0)
   228  
   229  		for j := range times {
   230  			// if on today, there will be no next iteration to set values for
   231  			// day prior so all valid requests belong to today
   232  			if j == len(times)-1 {
   233  				if ts.After(times[j]) || ts.Equal(times[j]) {
   234  					// do all record keeping
   235  					total[j]++
   236  
   237  					if _, ok := ips[j][requests[i].RemoteAddr]; !ok {
   238  						unique[j]++
   239  						ips[j][requests[i].RemoteAddr] = struct{}{}
   240  					}
   241  
   242  					continue CHECK_REQUEST
   243  				}
   244  			}
   245  
   246  			if ts.Equal(times[j]) {
   247  				// increment total count for current time threshold (day)
   248  				total[j]++
   249  
   250  				// if no IP found for current threshold, increment unique and record IP
   251  				if _, ok := ips[j][requests[i].RemoteAddr]; !ok {
   252  					unique[j]++
   253  					ips[j][requests[i].RemoteAddr] = struct{}{}
   254  				}
   255  
   256  				continue CHECK_REQUEST
   257  			}
   258  
   259  			if ts.Before(times[j]) {
   260  				// check if older than earliest threshold
   261  				if j == 0 {
   262  					continue CHECK_REQUEST
   263  				}
   264  
   265  				// increment total count for previous time threshold (day)
   266  				total[j-1]++
   267  
   268  				// if no IP found for day prior, increment unique and record IP
   269  				if _, ok := ips[j-1][requests[i].RemoteAddr]; !ok {
   270  					unique[j-1]++
   271  					ips[j-1][requests[i].RemoteAddr] = struct{}{}
   272  				}
   273  			}
   274  		}
   275  	}
   276  
   277  	// add data to currentMetrics from total and unique
   278  	for i := range dates {
   279  		_, ok := currentMetrics[dates[i]]
   280  		if !ok {
   281  			m := apiMetric{
   282  				Date:   dates[i],
   283  				Total:  total[i],
   284  				Unique: unique[i],
   285  			}
   286  
   287  			currentMetrics[dates[i]] = m
   288  		}
   289  	}
   290  
   291  	// loop through total and unique to see which dates are accounted for and
   292  	// insert data from metrics array where dates are not
   293  	err = store.Update(func(tx *bolt.Tx) error {
   294  		b := tx.Bucket([]byte("__metrics"))
   295  
   296  		for i := range dates {
   297  			// populate total and unique with cached data if needed
   298  			if total[i] == 0 {
   299  				total[i] = currentMetrics[dates[i]].Total
   300  			}
   301  
   302  			if unique[i] == 0 {
   303  				unique[i] = currentMetrics[dates[i]].Unique
   304  			}
   305  
   306  			// check if we need to insert old data into cache - as long as it
   307  			// is not today's data
   308  			if dates[i] != today.Format("01/02") {
   309  				k := []byte(dates[i])
   310  				if b.Get(k) == nil {
   311  					// keep zero counts out of cache in case data is added from
   312  					// other sources
   313  					if currentMetrics[dates[i]].Total != 0 {
   314  						v, err := json.Marshal(currentMetrics[dates[i]])
   315  						if err != nil {
   316  							return err
   317  						}
   318  
   319  						err = b.Put(k, v)
   320  						if err != nil {
   321  							return err
   322  						}
   323  					}
   324  				}
   325  			}
   326  		}
   327  
   328  		return nil
   329  	})
   330  	if err != nil {
   331  		return nil, err
   332  	}
   333  
   334  	// marshal array counts to js arrays for output to chart
   335  	jsUnique, err := json.Marshal(unique)
   336  	if err != nil {
   337  		return nil, err
   338  	}
   339  
   340  	jsTotal, err := json.Marshal(total)
   341  	if err != nil {
   342  		return nil, err
   343  	}
   344  
   345  	return map[string]interface{}{
   346  		"dates":  dates,
   347  		"unique": string(jsUnique),
   348  		"total":  string(jsTotal),
   349  		"from":   dates[0],
   350  		"to":     dates[len(dates)-1],
   351  	}, nil
   352  }