github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/shard_dimension_tracking.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package db 13 14 import ( 15 "encoding/binary" 16 "strings" 17 "time" 18 19 enterrors "github.com/weaviate/weaviate/entities/errors" 20 21 "github.com/weaviate/weaviate/adapters/repos/db/helpers" 22 "github.com/weaviate/weaviate/adapters/repos/db/lsmkv" 23 "github.com/weaviate/weaviate/entities/schema" 24 hnswent "github.com/weaviate/weaviate/entities/vectorindex/hnsw" 25 "github.com/weaviate/weaviate/usecases/monitoring" 26 ) 27 28 type DimensionCategory int 29 30 const ( 31 DimensionCategoryStandard DimensionCategory = iota 32 DimensionCategoryPQ 33 DimensionCategoryBQ 34 ) 35 36 func (s *Shard) Dimensions() int { 37 keyLen := 4 38 return s.calcDimensions(func(k []byte, v []lsmkv.MapPair) int { 39 // consider only keys of len 4, skipping ones prefixed with vector name 40 if len(k) == keyLen { 41 dimLength := binary.LittleEndian.Uint32(k) 42 return int(dimLength) * len(v) 43 } 44 return 0 45 }) 46 } 47 48 func (s *Shard) DimensionsForVec(vecName string) int { 49 nameLen := len(vecName) 50 keyLen := nameLen + 4 51 return s.calcDimensions(func(k []byte, v []lsmkv.MapPair) int { 52 // consider only keys of len vecName + 4, prefixed with vecName 53 if len(k) == keyLen && strings.HasPrefix(string(k), vecName) { 54 dimLength := binary.LittleEndian.Uint32(k[nameLen:]) 55 return int(dimLength) * len(v) 56 } 57 return 0 58 }) 59 } 60 61 func (s *Shard) QuantizedDimensions(segments int) int { 62 // Exit early if segments is 0 (unset), in this case PQ will use the same number of dimensions 63 // as the segment size 64 if segments <= 0 { 65 return s.Dimensions() 66 } 67 68 keyLen := 4 69 return s.calcDimensions(func(k []byte, v []lsmkv.MapPair) int { 70 // consider only keys of len 4, skipping ones prefixed with vector name 71 if len(k) == keyLen { 72 if dimLength := binary.LittleEndian.Uint32(k); dimLength > 0 { 73 return len(v) 74 } 75 } 76 return 0 77 }) * segments 78 } 79 80 func (s *Shard) QuantizedDimensionsForVec(segments int, vecName string) int { 81 // Exit early if segments is 0 (unset), in this case PQ will use the same number of dimensions 82 // as the segment size 83 if segments <= 0 { 84 return s.DimensionsForVec(vecName) 85 } 86 87 nameLen := len(vecName) 88 keyLen := nameLen + 4 89 return s.calcDimensions(func(k []byte, v []lsmkv.MapPair) int { 90 // consider only keys of len vecName + 4, prefixed with vecName 91 if len(k) == keyLen && strings.HasPrefix(string(k), vecName) { 92 if dimLength := binary.LittleEndian.Uint32(k[nameLen:]); dimLength > 0 { 93 return len(v) 94 } 95 } 96 return 0 97 }) * segments 98 } 99 100 func (s *Shard) calcDimensions(calcEntry func(k []byte, v []lsmkv.MapPair) int) int { 101 b := s.store.Bucket(helpers.DimensionsBucketLSM) 102 if b == nil { 103 return 0 104 } 105 106 c := b.MapCursor() 107 defer c.Close() 108 109 sum := 0 110 for k, v := c.First(); k != nil; k, v = c.Next() { 111 sum += calcEntry(k, v) 112 } 113 114 return sum 115 } 116 117 func (s *Shard) initDimensionTracking() { 118 if s.index.Config.TrackVectorDimensions { 119 // always send vector dimensions at startup if tracking is enabled 120 s.publishDimensionMetrics() 121 // start tracking vector dimensions goroutine only when tracking is enabled 122 f := func() { 123 t := time.NewTicker(5 * time.Minute) 124 defer t.Stop() 125 for { 126 select { 127 case <-s.stopMetrics: 128 return 129 case <-t.C: 130 s.publishDimensionMetrics() 131 } 132 } 133 } 134 enterrors.GoWrapper(f, s.index.logger) 135 } 136 } 137 138 func (s *Shard) publishDimensionMetrics() { 139 if s.promMetrics != nil { 140 className := s.index.Config.ClassName.String() 141 142 if !s.hasTargetVectors() { 143 // send stats for legacy vector only 144 switch category, segments := getDimensionCategory(s.index.vectorIndexUserConfig); category { 145 case DimensionCategoryPQ: 146 count := s.QuantizedDimensions(segments) 147 sendVectorSegmentsMetric(s.promMetrics, className, s.name, count) 148 sendVectorDimensionsMetric(s.promMetrics, className, s.name, 0) 149 case DimensionCategoryBQ: 150 count := s.Dimensions() / 8 // BQ has a flat 8x reduction in the dimensions metric 151 sendVectorSegmentsMetric(s.promMetrics, className, s.name, count) 152 sendVectorDimensionsMetric(s.promMetrics, className, s.name, 0) 153 default: 154 count := s.Dimensions() 155 sendVectorDimensionsMetric(s.promMetrics, className, s.name, count) 156 } 157 return 158 } 159 160 sumSegments := 0 161 sumDimensions := 0 162 163 // send stats for each target vector 164 for vecName, vecCfg := range s.index.vectorIndexUserConfigs { 165 switch category, segments := getDimensionCategory(vecCfg); category { 166 case DimensionCategoryPQ: 167 count := s.QuantizedDimensionsForVec(segments, vecName) 168 sumSegments += count 169 sendVectorSegmentsForVecMetric(s.promMetrics, className, s.name, count, vecName) 170 sendVectorDimensionsForVecMetric(s.promMetrics, className, s.name, 0, vecName) 171 case DimensionCategoryBQ: 172 count := s.DimensionsForVec(vecName) / 8 // BQ has a flat 8x reduction in the dimensions metric 173 sumSegments += count 174 sendVectorSegmentsForVecMetric(s.promMetrics, className, s.name, count, vecName) 175 sendVectorDimensionsForVecMetric(s.promMetrics, className, s.name, 0, vecName) 176 default: 177 count := s.DimensionsForVec(vecName) 178 sumDimensions += count 179 sendVectorDimensionsForVecMetric(s.promMetrics, className, s.name, count, vecName) 180 } 181 } 182 183 // send sum stats for all target vectors 184 sendVectorSegmentsMetric(s.promMetrics, className, s.name, sumSegments) 185 sendVectorDimensionsMetric(s.promMetrics, className, s.name, sumDimensions) 186 } 187 } 188 189 func (s *Shard) clearDimensionMetrics() { 190 clearDimensionMetrics(s.promMetrics, s.index.Config.ClassName.String(), 191 s.name, s.index.vectorIndexUserConfig, s.index.vectorIndexUserConfigs) 192 } 193 194 func clearDimensionMetrics(promMetrics *monitoring.PrometheusMetrics, 195 className, shardName string, 196 cfg schema.VectorIndexConfig, targetCfgs map[string]schema.VectorIndexConfig, 197 ) { 198 if promMetrics != nil { 199 if !hasTargetVectors(cfg, targetCfgs) { 200 // send stats for legacy vector only 201 switch category, _ := getDimensionCategory(cfg); category { 202 case DimensionCategoryPQ, DimensionCategoryBQ: 203 sendVectorDimensionsMetric(promMetrics, className, shardName, 0) 204 sendVectorSegmentsMetric(promMetrics, className, shardName, 0) 205 default: 206 sendVectorDimensionsMetric(promMetrics, className, shardName, 0) 207 } 208 return 209 } 210 211 // send stats for each target vector 212 for vecName, vecCfg := range targetCfgs { 213 switch category, _ := getDimensionCategory(vecCfg); category { 214 case DimensionCategoryPQ, DimensionCategoryBQ: 215 sendVectorDimensionsForVecMetric(promMetrics, className, shardName, 0, vecName) 216 sendVectorSegmentsForVecMetric(promMetrics, className, shardName, 0, vecName) 217 default: 218 sendVectorDimensionsForVecMetric(promMetrics, className, shardName, 0, vecName) 219 } 220 } 221 222 // send sum stats for all target vectors 223 sendVectorDimensionsMetric(promMetrics, className, shardName, 0) 224 sendVectorSegmentsMetric(promMetrics, className, shardName, 0) 225 } 226 } 227 228 func sendVectorSegmentsMetric(promMetrics *monitoring.PrometheusMetrics, 229 className, shardName string, count int, 230 ) { 231 metric, err := promMetrics.VectorSegmentsSum. 232 GetMetricWithLabelValues(className, shardName) 233 if err == nil { 234 metric.Set(float64(count)) 235 } 236 } 237 238 func sendVectorSegmentsForVecMetric(promMetrics *monitoring.PrometheusMetrics, 239 className, shardName string, count int, vecName string, 240 ) { 241 metric, err := promMetrics.VectorSegmentsSumByVector. 242 GetMetricWithLabelValues(className, shardName, vecName) 243 if err == nil { 244 metric.Set(float64(count)) 245 } 246 } 247 248 func sendVectorDimensionsMetric(promMetrics *monitoring.PrometheusMetrics, 249 className, shardName string, count int, 250 ) { 251 // Important: Never group classes/shards for this metric. We need the 252 // granularity here as this tracks an absolute value per shard that changes 253 // independently over time. 254 // 255 // If we need to reduce metrics further, an alternative could be to not 256 // make dimension tracking shard-centric, but rather make it node-centric. 257 // Then have a single metric that aggregates all dimensions first, then 258 // observes only the sum 259 metric, err := promMetrics.VectorDimensionsSum. 260 GetMetricWithLabelValues(className, shardName) 261 if err == nil { 262 metric.Set(float64(count)) 263 } 264 } 265 266 func sendVectorDimensionsForVecMetric(promMetrics *monitoring.PrometheusMetrics, 267 className, shardName string, count int, vecName string, 268 ) { 269 // Important: Never group classes/shards for this metric. We need the 270 // granularity here as this tracks an absolute value per shard that changes 271 // independently over time. 272 // 273 // If we need to reduce metrics further, an alternative could be to not 274 // make dimension tracking shard-centric, but rather make it node-centric. 275 // Then have a single metric that aggregates all dimensions first, then 276 // observes only the sum 277 metric, err := promMetrics.VectorDimensionsSumByVector. 278 GetMetricWithLabelValues(className, shardName, vecName) 279 if err == nil { 280 metric.Set(float64(count)) 281 } 282 } 283 284 func getDimensionCategory(cfg schema.VectorIndexConfig) (DimensionCategory, int) { 285 // We have special dimension tracking for BQ and PQ to represent reduced costs 286 // these are published under the separate vector_segments_dimensions metric 287 if hnswUserConfig, ok := cfg.(hnswent.UserConfig); ok { 288 if hnswUserConfig.PQ.Enabled { 289 return DimensionCategoryPQ, hnswUserConfig.PQ.Segments 290 } 291 if hnswUserConfig.BQ.Enabled { 292 return DimensionCategoryBQ, 0 293 } 294 } 295 return DimensionCategoryStandard, 0 296 }