github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/shard_dimension_tracking.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package db
    13  
    14  import (
    15  	"encoding/binary"
    16  	"strings"
    17  	"time"
    18  
    19  	enterrors "github.com/weaviate/weaviate/entities/errors"
    20  
    21  	"github.com/weaviate/weaviate/adapters/repos/db/helpers"
    22  	"github.com/weaviate/weaviate/adapters/repos/db/lsmkv"
    23  	"github.com/weaviate/weaviate/entities/schema"
    24  	hnswent "github.com/weaviate/weaviate/entities/vectorindex/hnsw"
    25  	"github.com/weaviate/weaviate/usecases/monitoring"
    26  )
    27  
    28  type DimensionCategory int
    29  
    30  const (
    31  	DimensionCategoryStandard DimensionCategory = iota
    32  	DimensionCategoryPQ
    33  	DimensionCategoryBQ
    34  )
    35  
    36  func (s *Shard) Dimensions() int {
    37  	keyLen := 4
    38  	return s.calcDimensions(func(k []byte, v []lsmkv.MapPair) int {
    39  		// consider only keys of len 4, skipping ones prefixed with vector name
    40  		if len(k) == keyLen {
    41  			dimLength := binary.LittleEndian.Uint32(k)
    42  			return int(dimLength) * len(v)
    43  		}
    44  		return 0
    45  	})
    46  }
    47  
    48  func (s *Shard) DimensionsForVec(vecName string) int {
    49  	nameLen := len(vecName)
    50  	keyLen := nameLen + 4
    51  	return s.calcDimensions(func(k []byte, v []lsmkv.MapPair) int {
    52  		// consider only keys of len vecName + 4, prefixed with vecName
    53  		if len(k) == keyLen && strings.HasPrefix(string(k), vecName) {
    54  			dimLength := binary.LittleEndian.Uint32(k[nameLen:])
    55  			return int(dimLength) * len(v)
    56  		}
    57  		return 0
    58  	})
    59  }
    60  
    61  func (s *Shard) QuantizedDimensions(segments int) int {
    62  	// Exit early if segments is 0 (unset), in this case PQ will use the same number of dimensions
    63  	// as the segment size
    64  	if segments <= 0 {
    65  		return s.Dimensions()
    66  	}
    67  
    68  	keyLen := 4
    69  	return s.calcDimensions(func(k []byte, v []lsmkv.MapPair) int {
    70  		// consider only keys of len 4, skipping ones prefixed with vector name
    71  		if len(k) == keyLen {
    72  			if dimLength := binary.LittleEndian.Uint32(k); dimLength > 0 {
    73  				return len(v)
    74  			}
    75  		}
    76  		return 0
    77  	}) * segments
    78  }
    79  
    80  func (s *Shard) QuantizedDimensionsForVec(segments int, vecName string) int {
    81  	// Exit early if segments is 0 (unset), in this case PQ will use the same number of dimensions
    82  	// as the segment size
    83  	if segments <= 0 {
    84  		return s.DimensionsForVec(vecName)
    85  	}
    86  
    87  	nameLen := len(vecName)
    88  	keyLen := nameLen + 4
    89  	return s.calcDimensions(func(k []byte, v []lsmkv.MapPair) int {
    90  		// consider only keys of len vecName + 4, prefixed with vecName
    91  		if len(k) == keyLen && strings.HasPrefix(string(k), vecName) {
    92  			if dimLength := binary.LittleEndian.Uint32(k[nameLen:]); dimLength > 0 {
    93  				return len(v)
    94  			}
    95  		}
    96  		return 0
    97  	}) * segments
    98  }
    99  
   100  func (s *Shard) calcDimensions(calcEntry func(k []byte, v []lsmkv.MapPair) int) int {
   101  	b := s.store.Bucket(helpers.DimensionsBucketLSM)
   102  	if b == nil {
   103  		return 0
   104  	}
   105  
   106  	c := b.MapCursor()
   107  	defer c.Close()
   108  
   109  	sum := 0
   110  	for k, v := c.First(); k != nil; k, v = c.Next() {
   111  		sum += calcEntry(k, v)
   112  	}
   113  
   114  	return sum
   115  }
   116  
   117  func (s *Shard) initDimensionTracking() {
   118  	if s.index.Config.TrackVectorDimensions {
   119  		// always send vector dimensions at startup if tracking is enabled
   120  		s.publishDimensionMetrics()
   121  		// start tracking vector dimensions goroutine only when tracking is enabled
   122  		f := func() {
   123  			t := time.NewTicker(5 * time.Minute)
   124  			defer t.Stop()
   125  			for {
   126  				select {
   127  				case <-s.stopMetrics:
   128  					return
   129  				case <-t.C:
   130  					s.publishDimensionMetrics()
   131  				}
   132  			}
   133  		}
   134  		enterrors.GoWrapper(f, s.index.logger)
   135  	}
   136  }
   137  
   138  func (s *Shard) publishDimensionMetrics() {
   139  	if s.promMetrics != nil {
   140  		className := s.index.Config.ClassName.String()
   141  
   142  		if !s.hasTargetVectors() {
   143  			// send stats for legacy vector only
   144  			switch category, segments := getDimensionCategory(s.index.vectorIndexUserConfig); category {
   145  			case DimensionCategoryPQ:
   146  				count := s.QuantizedDimensions(segments)
   147  				sendVectorSegmentsMetric(s.promMetrics, className, s.name, count)
   148  				sendVectorDimensionsMetric(s.promMetrics, className, s.name, 0)
   149  			case DimensionCategoryBQ:
   150  				count := s.Dimensions() / 8 // BQ has a flat 8x reduction in the dimensions metric
   151  				sendVectorSegmentsMetric(s.promMetrics, className, s.name, count)
   152  				sendVectorDimensionsMetric(s.promMetrics, className, s.name, 0)
   153  			default:
   154  				count := s.Dimensions()
   155  				sendVectorDimensionsMetric(s.promMetrics, className, s.name, count)
   156  			}
   157  			return
   158  		}
   159  
   160  		sumSegments := 0
   161  		sumDimensions := 0
   162  
   163  		// send stats for each target vector
   164  		for vecName, vecCfg := range s.index.vectorIndexUserConfigs {
   165  			switch category, segments := getDimensionCategory(vecCfg); category {
   166  			case DimensionCategoryPQ:
   167  				count := s.QuantizedDimensionsForVec(segments, vecName)
   168  				sumSegments += count
   169  				sendVectorSegmentsForVecMetric(s.promMetrics, className, s.name, count, vecName)
   170  				sendVectorDimensionsForVecMetric(s.promMetrics, className, s.name, 0, vecName)
   171  			case DimensionCategoryBQ:
   172  				count := s.DimensionsForVec(vecName) / 8 // BQ has a flat 8x reduction in the dimensions metric
   173  				sumSegments += count
   174  				sendVectorSegmentsForVecMetric(s.promMetrics, className, s.name, count, vecName)
   175  				sendVectorDimensionsForVecMetric(s.promMetrics, className, s.name, 0, vecName)
   176  			default:
   177  				count := s.DimensionsForVec(vecName)
   178  				sumDimensions += count
   179  				sendVectorDimensionsForVecMetric(s.promMetrics, className, s.name, count, vecName)
   180  			}
   181  		}
   182  
   183  		// send sum stats for all target vectors
   184  		sendVectorSegmentsMetric(s.promMetrics, className, s.name, sumSegments)
   185  		sendVectorDimensionsMetric(s.promMetrics, className, s.name, sumDimensions)
   186  	}
   187  }
   188  
   189  func (s *Shard) clearDimensionMetrics() {
   190  	clearDimensionMetrics(s.promMetrics, s.index.Config.ClassName.String(),
   191  		s.name, s.index.vectorIndexUserConfig, s.index.vectorIndexUserConfigs)
   192  }
   193  
   194  func clearDimensionMetrics(promMetrics *monitoring.PrometheusMetrics,
   195  	className, shardName string,
   196  	cfg schema.VectorIndexConfig, targetCfgs map[string]schema.VectorIndexConfig,
   197  ) {
   198  	if promMetrics != nil {
   199  		if !hasTargetVectors(cfg, targetCfgs) {
   200  			// send stats for legacy vector only
   201  			switch category, _ := getDimensionCategory(cfg); category {
   202  			case DimensionCategoryPQ, DimensionCategoryBQ:
   203  				sendVectorDimensionsMetric(promMetrics, className, shardName, 0)
   204  				sendVectorSegmentsMetric(promMetrics, className, shardName, 0)
   205  			default:
   206  				sendVectorDimensionsMetric(promMetrics, className, shardName, 0)
   207  			}
   208  			return
   209  		}
   210  
   211  		// send stats for each target vector
   212  		for vecName, vecCfg := range targetCfgs {
   213  			switch category, _ := getDimensionCategory(vecCfg); category {
   214  			case DimensionCategoryPQ, DimensionCategoryBQ:
   215  				sendVectorDimensionsForVecMetric(promMetrics, className, shardName, 0, vecName)
   216  				sendVectorSegmentsForVecMetric(promMetrics, className, shardName, 0, vecName)
   217  			default:
   218  				sendVectorDimensionsForVecMetric(promMetrics, className, shardName, 0, vecName)
   219  			}
   220  		}
   221  
   222  		// send sum stats for all target vectors
   223  		sendVectorDimensionsMetric(promMetrics, className, shardName, 0)
   224  		sendVectorSegmentsMetric(promMetrics, className, shardName, 0)
   225  	}
   226  }
   227  
   228  func sendVectorSegmentsMetric(promMetrics *monitoring.PrometheusMetrics,
   229  	className, shardName string, count int,
   230  ) {
   231  	metric, err := promMetrics.VectorSegmentsSum.
   232  		GetMetricWithLabelValues(className, shardName)
   233  	if err == nil {
   234  		metric.Set(float64(count))
   235  	}
   236  }
   237  
   238  func sendVectorSegmentsForVecMetric(promMetrics *monitoring.PrometheusMetrics,
   239  	className, shardName string, count int, vecName string,
   240  ) {
   241  	metric, err := promMetrics.VectorSegmentsSumByVector.
   242  		GetMetricWithLabelValues(className, shardName, vecName)
   243  	if err == nil {
   244  		metric.Set(float64(count))
   245  	}
   246  }
   247  
   248  func sendVectorDimensionsMetric(promMetrics *monitoring.PrometheusMetrics,
   249  	className, shardName string, count int,
   250  ) {
   251  	// Important: Never group classes/shards for this metric. We need the
   252  	// granularity here as this tracks an absolute value per shard that changes
   253  	// independently over time.
   254  	//
   255  	// If we need to reduce metrics further, an alternative could be to not
   256  	// make dimension tracking shard-centric, but rather make it node-centric.
   257  	// Then have a single metric that aggregates all dimensions first, then
   258  	// observes only the sum
   259  	metric, err := promMetrics.VectorDimensionsSum.
   260  		GetMetricWithLabelValues(className, shardName)
   261  	if err == nil {
   262  		metric.Set(float64(count))
   263  	}
   264  }
   265  
   266  func sendVectorDimensionsForVecMetric(promMetrics *monitoring.PrometheusMetrics,
   267  	className, shardName string, count int, vecName string,
   268  ) {
   269  	// Important: Never group classes/shards for this metric. We need the
   270  	// granularity here as this tracks an absolute value per shard that changes
   271  	// independently over time.
   272  	//
   273  	// If we need to reduce metrics further, an alternative could be to not
   274  	// make dimension tracking shard-centric, but rather make it node-centric.
   275  	// Then have a single metric that aggregates all dimensions first, then
   276  	// observes only the sum
   277  	metric, err := promMetrics.VectorDimensionsSumByVector.
   278  		GetMetricWithLabelValues(className, shardName, vecName)
   279  	if err == nil {
   280  		metric.Set(float64(count))
   281  	}
   282  }
   283  
   284  func getDimensionCategory(cfg schema.VectorIndexConfig) (DimensionCategory, int) {
   285  	// We have special dimension tracking for BQ and PQ to represent reduced costs
   286  	// these are published under the separate vector_segments_dimensions metric
   287  	if hnswUserConfig, ok := cfg.(hnswent.UserConfig); ok {
   288  		if hnswUserConfig.PQ.Enabled {
   289  			return DimensionCategoryPQ, hnswUserConfig.PQ.Segments
   290  		}
   291  		if hnswUserConfig.BQ.Enabled {
   292  			return DimensionCategoryBQ, 0
   293  		}
   294  	}
   295  	return DimensionCategoryStandard, 0
   296  }