github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/compress.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package hnsw
    13  
    14  import (
    15  	"context"
    16  	"errors"
    17  	"fmt"
    18  
    19  	"github.com/weaviate/weaviate/adapters/repos/db/vector/compressionhelpers"
    20  
    21  	"github.com/weaviate/weaviate/entities/storobj"
    22  	ent "github.com/weaviate/weaviate/entities/vectorindex/hnsw"
    23  )
    24  
    25  func (h *hnsw) calculateOptimalSegments(dims int) int {
    26  	if dims >= 2048 && dims%8 == 0 {
    27  		return dims / 8
    28  	} else if dims >= 768 && dims%6 == 0 {
    29  		return dims / 6
    30  	} else if dims >= 256 && dims%4 == 0 {
    31  		return dims / 4
    32  	} else if dims%2 == 0 {
    33  		return dims / 2
    34  	}
    35  	return dims
    36  }
    37  
    38  func (h *hnsw) compress(cfg ent.UserConfig) error {
    39  	if !cfg.PQ.Enabled && !cfg.BQ.Enabled {
    40  		return nil
    41  	}
    42  
    43  	h.compressActionLock.Lock()
    44  	defer h.compressActionLock.Unlock()
    45  	data := h.cache.All()
    46  	if cfg.PQ.Enabled {
    47  		if h.isEmpty() {
    48  			return errors.New("Compress command cannot be executed before inserting some data. Please, insert your data first.")
    49  		}
    50  		dims := int(h.dims)
    51  
    52  		if cfg.PQ.Segments <= 0 {
    53  			cfg.PQ.Segments = h.calculateOptimalSegments(dims)
    54  			h.pqConfig.Segments = cfg.PQ.Segments
    55  		}
    56  
    57  		cleanData := make([][]float32, 0, len(data))
    58  		for i := range data {
    59  			// Rather than just taking the cache dump at face value, let's explicitly
    60  			// request the vectors. Otherwise we would miss any vector that's currently
    61  			// not in the cache, for example because the cache is not hot yet after a
    62  			// restart.
    63  			p, err := h.cache.Get(context.Background(), uint64(i))
    64  			if err != nil {
    65  				var e storobj.ErrNotFound
    66  				if errors.As(err, &e) {
    67  					// already deleted, ignore
    68  					continue
    69  				} else {
    70  					return fmt.Errorf("unexpected error obtaining vectors for fitting: %w", err)
    71  				}
    72  			}
    73  
    74  			if p == nil {
    75  				// already deleted, ignore
    76  				continue
    77  			}
    78  
    79  			cleanData = append(cleanData, p)
    80  		}
    81  
    82  		var err error
    83  		h.compressor, err = compressionhelpers.NewHNSWPQCompressor(cfg.PQ, h.distancerProvider, dims, 1e12, h.logger, cleanData, h.store)
    84  		if err != nil {
    85  			return fmt.Errorf("Compressing vectors: %w", err)
    86  		}
    87  		h.commitLog.AddPQ(h.compressor.ExposeFields())
    88  	} else {
    89  		var err error
    90  		h.compressor, err = compressionhelpers.NewBQCompressor(h.distancerProvider, 1e12, h.logger, h.store)
    91  		if err != nil {
    92  			return err
    93  		}
    94  	}
    95  	compressionhelpers.Concurrently(uint64(len(data)),
    96  		func(index uint64) {
    97  			if data[index] == nil {
    98  				return
    99  			}
   100  			h.compressor.Preload(index, data[index])
   101  		})
   102  
   103  	h.compressed.Store(true)
   104  	h.cache.Drop()
   105  	return nil
   106  }