github.com/ncruces/go-sqlite3@v0.15.1-0.20240520133447-53eef1510ff0/ext/stats/welford.go

github.com/ncruces/go-sqlite3@v0.15.1-0.20240520133447-53eef1510ff0/ext/stats/welford.go (about)

     1  package stats
     2  
     3  import (
     4  	"math"
     5  	"strconv"
     6  	"strings"
     7  )
     8  
     9  // Welford's algorithm with Kahan summation:
    10  // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
    11  // https://en.wikipedia.org/wiki/Kahan_summation_algorithm
    12  
    13  // See also:
    14  // https://duckdb.org/docs/sql/aggregates.html#statistical-aggregates
    15  
    16  type welford struct {
    17  	m1, m2 kahan
    18  	n      int64
    19  }
    20  
    21  func (w welford) average() float64 {
    22  	return w.m1.hi
    23  }
    24  
    25  func (w welford) var_pop() float64 {
    26  	return w.m2.hi / float64(w.n)
    27  }
    28  
    29  func (w welford) var_samp() float64 {
    30  	return w.m2.hi / float64(w.n-1) // Bessel's correction
    31  }
    32  
    33  func (w welford) stddev_pop() float64 {
    34  	return math.Sqrt(w.var_pop())
    35  }
    36  
    37  func (w welford) stddev_samp() float64 {
    38  	return math.Sqrt(w.var_samp())
    39  }
    40  
    41  func (w *welford) enqueue(x float64) {
    42  	w.n++
    43  	d1 := x - w.m1.hi - w.m1.lo
    44  	w.m1.add(d1 / float64(w.n))
    45  	d2 := x - w.m1.hi - w.m1.lo
    46  	w.m2.add(d1 * d2)
    47  }
    48  
    49  func (w *welford) dequeue(x float64) {
    50  	w.n--
    51  	d1 := x - w.m1.hi - w.m1.lo
    52  	w.m1.sub(d1 / float64(w.n))
    53  	d2 := x - w.m1.hi - w.m1.lo
    54  	w.m2.sub(d1 * d2)
    55  }
    56  
    57  type welford2 struct {
    58  	m1y, m2y kahan
    59  	m1x, m2x kahan
    60  	cov      kahan
    61  	n        int64
    62  }
    63  
    64  func (w welford2) covar_pop() float64 {
    65  	return w.cov.hi / float64(w.n)
    66  }
    67  
    68  func (w welford2) covar_samp() float64 {
    69  	return w.cov.hi / float64(w.n-1) // Bessel's correction
    70  }
    71  
    72  func (w welford2) correlation() float64 {
    73  	return w.cov.hi / math.Sqrt(w.m2y.hi*w.m2x.hi)
    74  }
    75  
    76  func (w welford2) regr_avgy() float64 {
    77  	return w.m1y.hi
    78  }
    79  
    80  func (w welford2) regr_avgx() float64 {
    81  	return w.m1x.hi
    82  }
    83  
    84  func (w welford2) regr_syy() float64 {
    85  	return w.m2y.hi
    86  }
    87  
    88  func (w welford2) regr_sxx() float64 {
    89  	return w.m2x.hi
    90  }
    91  
    92  func (w welford2) regr_sxy() float64 {
    93  	return w.cov.hi
    94  }
    95  
    96  func (w welford2) regr_count() int64 {
    97  	return w.n
    98  }
    99  
   100  func (w welford2) regr_slope() float64 {
   101  	return w.cov.hi / w.m2x.hi
   102  }
   103  
   104  func (w welford2) regr_intercept() float64 {
   105  	slope := -w.regr_slope()
   106  	hi := math.FMA(slope, w.m1x.hi, w.m1y.hi)
   107  	lo := math.FMA(slope, w.m1x.lo, w.m1y.lo)
   108  	return hi + lo
   109  }
   110  
   111  func (w welford2) regr_r2() float64 {
   112  	return w.cov.hi * w.cov.hi / (w.m2y.hi * w.m2x.hi)
   113  }
   114  
   115  func (w welford2) regr_json() string {
   116  	var json strings.Builder
   117  	var num [32]byte
   118  	json.Grow(128)
   119  	json.WriteString(`{"count":`)
   120  	json.Write(strconv.AppendInt(num[:0], w.regr_count(), 10))
   121  	json.WriteString(`,"avgy":`)
   122  	json.Write(strconv.AppendFloat(num[:0], w.regr_avgy(), 'g', -1, 64))
   123  	json.WriteString(`,"avgx":`)
   124  	json.Write(strconv.AppendFloat(num[:0], w.regr_avgx(), 'g', -1, 64))
   125  	json.WriteString(`,"syy":`)
   126  	json.Write(strconv.AppendFloat(num[:0], w.regr_syy(), 'g', -1, 64))
   127  	json.WriteString(`,"sxx":`)
   128  	json.Write(strconv.AppendFloat(num[:0], w.regr_sxx(), 'g', -1, 64))
   129  	json.WriteString(`,"sxy":`)
   130  	json.Write(strconv.AppendFloat(num[:0], w.regr_sxy(), 'g', -1, 64))
   131  	json.WriteString(`,"slope":`)
   132  	json.Write(strconv.AppendFloat(num[:0], w.regr_slope(), 'g', -1, 64))
   133  	json.WriteString(`,"intercept":`)
   134  	json.Write(strconv.AppendFloat(num[:0], w.regr_intercept(), 'g', -1, 64))
   135  	json.WriteString(`,"r2":`)
   136  	json.Write(strconv.AppendFloat(num[:0], w.regr_r2(), 'g', -1, 64))
   137  	json.WriteByte('}')
   138  	return json.String()
   139  }
   140  
   141  func (w *welford2) enqueue(y, x float64) {
   142  	w.n++
   143  	d1y := y - w.m1y.hi - w.m1y.lo
   144  	d1x := x - w.m1x.hi - w.m1x.lo
   145  	w.m1y.add(d1y / float64(w.n))
   146  	w.m1x.add(d1x / float64(w.n))
   147  	d2y := y - w.m1y.hi - w.m1y.lo
   148  	d2x := x - w.m1x.hi - w.m1x.lo
   149  	w.m2y.add(d1y * d2y)
   150  	w.m2x.add(d1x * d2x)
   151  	w.cov.add(d1y * d2x)
   152  }
   153  
   154  func (w *welford2) dequeue(y, x float64) {
   155  	w.n--
   156  	d1y := y - w.m1y.hi - w.m1y.lo
   157  	d1x := x - w.m1x.hi - w.m1x.lo
   158  	w.m1y.sub(d1y / float64(w.n))
   159  	w.m1x.sub(d1x / float64(w.n))
   160  	d2y := y - w.m1y.hi - w.m1y.lo
   161  	d2x := x - w.m1x.hi - w.m1x.lo
   162  	w.m2y.sub(d1y * d2y)
   163  	w.m2x.sub(d1x * d2x)
   164  	w.cov.sub(d1y * d2x)
   165  }
   166  
   167  type kahan struct{ hi, lo float64 }
   168  
   169  func (k *kahan) add(x float64) {
   170  	y := k.lo + x
   171  	t := k.hi + y
   172  	k.lo = y - (t - k.hi)
   173  	k.hi = t
   174  }
   175  
   176  func (k *kahan) sub(x float64) {
   177  	y := k.lo - x
   178  	t := k.hi + y
   179  	k.lo = y - (t - k.hi)
   180  	k.hi = t
   181  }