github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/compress.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package hnsw 13 14 import ( 15 "context" 16 "errors" 17 "fmt" 18 19 "github.com/weaviate/weaviate/adapters/repos/db/vector/compressionhelpers" 20 21 "github.com/weaviate/weaviate/entities/storobj" 22 ent "github.com/weaviate/weaviate/entities/vectorindex/hnsw" 23 ) 24 25 func (h *hnsw) calculateOptimalSegments(dims int) int { 26 if dims >= 2048 && dims%8 == 0 { 27 return dims / 8 28 } else if dims >= 768 && dims%6 == 0 { 29 return dims / 6 30 } else if dims >= 256 && dims%4 == 0 { 31 return dims / 4 32 } else if dims%2 == 0 { 33 return dims / 2 34 } 35 return dims 36 } 37 38 func (h *hnsw) compress(cfg ent.UserConfig) error { 39 if !cfg.PQ.Enabled && !cfg.BQ.Enabled { 40 return nil 41 } 42 43 h.compressActionLock.Lock() 44 defer h.compressActionLock.Unlock() 45 data := h.cache.All() 46 if cfg.PQ.Enabled { 47 if h.isEmpty() { 48 return errors.New("Compress command cannot be executed before inserting some data. Please, insert your data first.") 49 } 50 dims := int(h.dims) 51 52 if cfg.PQ.Segments <= 0 { 53 cfg.PQ.Segments = h.calculateOptimalSegments(dims) 54 h.pqConfig.Segments = cfg.PQ.Segments 55 } 56 57 cleanData := make([][]float32, 0, len(data)) 58 for i := range data { 59 // Rather than just taking the cache dump at face value, let's explicitly 60 // request the vectors. Otherwise we would miss any vector that's currently 61 // not in the cache, for example because the cache is not hot yet after a 62 // restart. 63 p, err := h.cache.Get(context.Background(), uint64(i)) 64 if err != nil { 65 var e storobj.ErrNotFound 66 if errors.As(err, &e) { 67 // already deleted, ignore 68 continue 69 } else { 70 return fmt.Errorf("unexpected error obtaining vectors for fitting: %w", err) 71 } 72 } 73 74 if p == nil { 75 // already deleted, ignore 76 continue 77 } 78 79 cleanData = append(cleanData, p) 80 } 81 82 var err error 83 h.compressor, err = compressionhelpers.NewHNSWPQCompressor(cfg.PQ, h.distancerProvider, dims, 1e12, h.logger, cleanData, h.store) 84 if err != nil { 85 return fmt.Errorf("Compressing vectors: %w", err) 86 } 87 h.commitLog.AddPQ(h.compressor.ExposeFields()) 88 } else { 89 var err error 90 h.compressor, err = compressionhelpers.NewBQCompressor(h.distancerProvider, 1e12, h.logger, h.store) 91 if err != nil { 92 return err 93 } 94 } 95 compressionhelpers.Concurrently(uint64(len(data)), 96 func(index uint64) { 97 if data[index] == nil { 98 return 99 } 100 h.compressor.Preload(index, data[index]) 101 }) 102 103 h.compressed.Store(true) 104 h.cache.Drop() 105 return nil 106 }