github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/compressionhelpers/tile_encoder.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package compressionhelpers 13 14 import ( 15 "encoding/binary" 16 "math" 17 "sync/atomic" 18 19 "gonum.org/v1/gonum/stat/distuv" 20 ) 21 22 type distribution interface { 23 Transform(x float64) float64 24 CDF(x float64) float64 25 Quantile(x float64) float64 26 } 27 28 type logNormalDistribution struct { 29 dist *distuv.LogNormal 30 } 31 32 func newLogNormalDistribution(mean float64, std float64) distribution { 33 return &logNormalDistribution{ 34 dist: &distuv.LogNormal{ 35 Mu: mean, 36 Sigma: std, 37 }, 38 } 39 } 40 41 func (d *logNormalDistribution) Transform(x float64) float64 { 42 if x > 0 { 43 return math.Log(x) 44 } 45 return 0 46 } 47 48 func (d *logNormalDistribution) CDF(x float64) float64 { 49 return d.dist.CDF(x) 50 } 51 52 func (d *logNormalDistribution) Quantile(x float64) float64 { 53 return d.dist.Quantile(x) 54 } 55 56 type normalDistribution struct { 57 dist *distuv.Normal 58 } 59 60 func newNormalDistribution(mean float64, std float64) distribution { 61 return &normalDistribution{ 62 dist: &distuv.Normal{ 63 Mu: mean, 64 Sigma: std, 65 }, 66 } 67 } 68 69 func (d *normalDistribution) Transform(x float64) float64 { 70 return x 71 } 72 73 func (d *normalDistribution) CDF(x float64) float64 { 74 return d.dist.CDF(x) 75 } 76 77 func (d *normalDistribution) Quantile(x float64) float64 { 78 return d.dist.Quantile(x) 79 } 80 81 type Centroid struct { 82 Center []float32 83 Calculated atomic.Bool 84 } 85 86 type EncoderDistribution byte 87 88 const ( 89 NormalEncoderDistribution EncoderDistribution = 0 90 LogNormalEncoderDistribution EncoderDistribution = 1 91 ) 92 93 type TileEncoder struct { 94 bins float64 95 mean float64 96 stdDev float64 97 size float64 98 s1 float64 99 s2 float64 100 segment int 101 centroids []Centroid 102 encoderDistribution EncoderDistribution 103 distribution distribution 104 } 105 106 func NewTileEncoder(bits int, segment int, encoderDistribution EncoderDistribution) *TileEncoder { 107 centroids := math.Pow(2, float64(bits)) 108 te := &TileEncoder{ 109 bins: centroids, 110 mean: 0, 111 stdDev: 0, 112 size: 0, 113 s1: 0, 114 s2: 0, 115 segment: segment, 116 centroids: make([]Centroid, int(centroids)), 117 encoderDistribution: encoderDistribution, 118 } 119 te.setEncoderDistribution() 120 return te 121 } 122 123 func RestoreTileEncoder(bins float64, mean float64, stdDev float64, size float64, s1 float64, s2 float64, segment uint16, encoderDistribution byte) *TileEncoder { 124 te := &TileEncoder{ 125 bins: bins, 126 mean: mean, 127 stdDev: stdDev, 128 size: size, 129 s1: s1, 130 s2: s2, 131 segment: int(segment), 132 encoderDistribution: EncoderDistribution(encoderDistribution), 133 } 134 te.setEncoderDistribution() 135 return te 136 } 137 138 func (te *TileEncoder) ExposeDataForRestore() []byte { 139 buffer := make([]byte, 51) 140 binary.LittleEndian.PutUint64(buffer[0:8], math.Float64bits(te.bins)) 141 binary.LittleEndian.PutUint64(buffer[8:16], math.Float64bits(te.mean)) 142 binary.LittleEndian.PutUint64(buffer[16:24], math.Float64bits(te.stdDev)) 143 binary.LittleEndian.PutUint64(buffer[24:32], math.Float64bits(te.size)) 144 binary.LittleEndian.PutUint64(buffer[32:40], math.Float64bits(te.s1)) 145 binary.LittleEndian.PutUint64(buffer[40:48], math.Float64bits(te.s2)) 146 binary.LittleEndian.PutUint16(buffer[48:50], uint16(te.segment)) 147 buffer[50] = byte(te.encoderDistribution) 148 return buffer 149 } 150 151 func (te *TileEncoder) Fit(data [][]float32) error { 152 te.setEncoderDistribution() 153 return nil 154 } 155 156 func (te *TileEncoder) setEncoderDistribution() { 157 switch te.encoderDistribution { 158 case LogNormalEncoderDistribution: 159 te.distribution = newLogNormalDistribution(te.mean, te.stdDev) 160 case NormalEncoderDistribution: 161 te.distribution = newNormalDistribution(te.mean, te.stdDev) 162 } 163 } 164 165 func (te *TileEncoder) Add(x []float32) { 166 // calculate mean and stddev iteratively 167 x64 := te.distribution.Transform(float64(x[te.segment])) 168 te.s1 += x64 169 te.s2 += x64 * x64 170 te.size++ 171 te.mean = te.s1 / te.size 172 sum := te.s2 + te.size*te.mean*te.mean 173 prod := 2 * te.mean * te.s1 174 te.stdDev = math.Sqrt((sum - prod) / te.size) 175 } 176 177 func (te *TileEncoder) Encode(x []float32) byte { 178 cdf := te.distribution.CDF(float64(x[te.segment])) 179 intPart, _ := math.Modf(cdf * float64(te.bins)) 180 return byte(intPart) 181 } 182 183 func (te *TileEncoder) centroid(b byte) []float32 { 184 res := make([]float32, 0, 1) 185 if b == 0 { 186 res = append(res, float32(te.distribution.Quantile(1/te.bins))) 187 } else if b == byte(te.bins) { 188 res = append(res, float32(te.distribution.Quantile((te.bins-1)/te.bins))) 189 } else { 190 b64 := float64(b) 191 mean := (b64/te.bins + (b64+1)/te.bins) / 2 192 res = append(res, float32(te.distribution.Quantile(mean))) 193 } 194 return res 195 } 196 197 func (te *TileEncoder) Centroid(b byte) []float32 { 198 if te.centroids[b].Calculated.Load() { 199 return te.centroids[b].Center 200 } 201 te.centroids[b].Center = te.centroid(b) 202 te.centroids[b].Calculated.Store(true) 203 return te.centroids[b].Center 204 }