github.com/ncruces/go-sqlite3@v0.15.1-0.20240520133447-53eef1510ff0/ext/stats/welford.go (about) 1 package stats 2 3 import ( 4 "math" 5 "strconv" 6 "strings" 7 ) 8 9 // Welford's algorithm with Kahan summation: 10 // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm 11 // https://en.wikipedia.org/wiki/Kahan_summation_algorithm 12 13 // See also: 14 // https://duckdb.org/docs/sql/aggregates.html#statistical-aggregates 15 16 type welford struct { 17 m1, m2 kahan 18 n int64 19 } 20 21 func (w welford) average() float64 { 22 return w.m1.hi 23 } 24 25 func (w welford) var_pop() float64 { 26 return w.m2.hi / float64(w.n) 27 } 28 29 func (w welford) var_samp() float64 { 30 return w.m2.hi / float64(w.n-1) // Bessel's correction 31 } 32 33 func (w welford) stddev_pop() float64 { 34 return math.Sqrt(w.var_pop()) 35 } 36 37 func (w welford) stddev_samp() float64 { 38 return math.Sqrt(w.var_samp()) 39 } 40 41 func (w *welford) enqueue(x float64) { 42 w.n++ 43 d1 := x - w.m1.hi - w.m1.lo 44 w.m1.add(d1 / float64(w.n)) 45 d2 := x - w.m1.hi - w.m1.lo 46 w.m2.add(d1 * d2) 47 } 48 49 func (w *welford) dequeue(x float64) { 50 w.n-- 51 d1 := x - w.m1.hi - w.m1.lo 52 w.m1.sub(d1 / float64(w.n)) 53 d2 := x - w.m1.hi - w.m1.lo 54 w.m2.sub(d1 * d2) 55 } 56 57 type welford2 struct { 58 m1y, m2y kahan 59 m1x, m2x kahan 60 cov kahan 61 n int64 62 } 63 64 func (w welford2) covar_pop() float64 { 65 return w.cov.hi / float64(w.n) 66 } 67 68 func (w welford2) covar_samp() float64 { 69 return w.cov.hi / float64(w.n-1) // Bessel's correction 70 } 71 72 func (w welford2) correlation() float64 { 73 return w.cov.hi / math.Sqrt(w.m2y.hi*w.m2x.hi) 74 } 75 76 func (w welford2) regr_avgy() float64 { 77 return w.m1y.hi 78 } 79 80 func (w welford2) regr_avgx() float64 { 81 return w.m1x.hi 82 } 83 84 func (w welford2) regr_syy() float64 { 85 return w.m2y.hi 86 } 87 88 func (w welford2) regr_sxx() float64 { 89 return w.m2x.hi 90 } 91 92 func (w welford2) regr_sxy() float64 { 93 return w.cov.hi 94 } 95 96 func (w welford2) regr_count() int64 { 97 return w.n 98 } 99 100 func (w welford2) regr_slope() float64 { 101 return w.cov.hi / w.m2x.hi 102 } 103 104 func (w welford2) regr_intercept() float64 { 105 slope := -w.regr_slope() 106 hi := math.FMA(slope, w.m1x.hi, w.m1y.hi) 107 lo := math.FMA(slope, w.m1x.lo, w.m1y.lo) 108 return hi + lo 109 } 110 111 func (w welford2) regr_r2() float64 { 112 return w.cov.hi * w.cov.hi / (w.m2y.hi * w.m2x.hi) 113 } 114 115 func (w welford2) regr_json() string { 116 var json strings.Builder 117 var num [32]byte 118 json.Grow(128) 119 json.WriteString(`{"count":`) 120 json.Write(strconv.AppendInt(num[:0], w.regr_count(), 10)) 121 json.WriteString(`,"avgy":`) 122 json.Write(strconv.AppendFloat(num[:0], w.regr_avgy(), 'g', -1, 64)) 123 json.WriteString(`,"avgx":`) 124 json.Write(strconv.AppendFloat(num[:0], w.regr_avgx(), 'g', -1, 64)) 125 json.WriteString(`,"syy":`) 126 json.Write(strconv.AppendFloat(num[:0], w.regr_syy(), 'g', -1, 64)) 127 json.WriteString(`,"sxx":`) 128 json.Write(strconv.AppendFloat(num[:0], w.regr_sxx(), 'g', -1, 64)) 129 json.WriteString(`,"sxy":`) 130 json.Write(strconv.AppendFloat(num[:0], w.regr_sxy(), 'g', -1, 64)) 131 json.WriteString(`,"slope":`) 132 json.Write(strconv.AppendFloat(num[:0], w.regr_slope(), 'g', -1, 64)) 133 json.WriteString(`,"intercept":`) 134 json.Write(strconv.AppendFloat(num[:0], w.regr_intercept(), 'g', -1, 64)) 135 json.WriteString(`,"r2":`) 136 json.Write(strconv.AppendFloat(num[:0], w.regr_r2(), 'g', -1, 64)) 137 json.WriteByte('}') 138 return json.String() 139 } 140 141 func (w *welford2) enqueue(y, x float64) { 142 w.n++ 143 d1y := y - w.m1y.hi - w.m1y.lo 144 d1x := x - w.m1x.hi - w.m1x.lo 145 w.m1y.add(d1y / float64(w.n)) 146 w.m1x.add(d1x / float64(w.n)) 147 d2y := y - w.m1y.hi - w.m1y.lo 148 d2x := x - w.m1x.hi - w.m1x.lo 149 w.m2y.add(d1y * d2y) 150 w.m2x.add(d1x * d2x) 151 w.cov.add(d1y * d2x) 152 } 153 154 func (w *welford2) dequeue(y, x float64) { 155 w.n-- 156 d1y := y - w.m1y.hi - w.m1y.lo 157 d1x := x - w.m1x.hi - w.m1x.lo 158 w.m1y.sub(d1y / float64(w.n)) 159 w.m1x.sub(d1x / float64(w.n)) 160 d2y := y - w.m1y.hi - w.m1y.lo 161 d2x := x - w.m1x.hi - w.m1x.lo 162 w.m2y.sub(d1y * d2y) 163 w.m2x.sub(d1x * d2x) 164 w.cov.sub(d1y * d2x) 165 } 166 167 type kahan struct{ hi, lo float64 } 168 169 func (k *kahan) add(x float64) { 170 y := k.lo + x 171 t := k.hi + y 172 k.lo = y - (t - k.hi) 173 k.hi = t 174 } 175 176 func (k *kahan) sub(x float64) { 177 y := k.lo - x 178 t := k.hi + y 179 k.lo = y - (t - k.hi) 180 k.hi = t 181 }