github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/index.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package hnsw 13 14 import ( 15 "context" 16 "fmt" 17 "io" 18 "math" 19 "math/rand" 20 "strings" 21 "sync" 22 "sync/atomic" 23 24 "github.com/pkg/errors" 25 "github.com/sirupsen/logrus" 26 "github.com/weaviate/weaviate/adapters/repos/db/lsmkv" 27 "github.com/weaviate/weaviate/adapters/repos/db/priorityqueue" 28 "github.com/weaviate/weaviate/adapters/repos/db/vector/cache" 29 "github.com/weaviate/weaviate/adapters/repos/db/vector/common" 30 "github.com/weaviate/weaviate/adapters/repos/db/vector/compressionhelpers" 31 "github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/distancer" 32 "github.com/weaviate/weaviate/entities/cyclemanager" 33 "github.com/weaviate/weaviate/entities/schema" 34 "github.com/weaviate/weaviate/entities/storobj" 35 ent "github.com/weaviate/weaviate/entities/vectorindex/hnsw" 36 ) 37 38 type hnsw struct { 39 // global lock to prevent concurrent map read/write, etc. 40 sync.RWMutex 41 42 // certain operations related to deleting, such as finding a new entrypoint 43 // can only run sequentially, this separate lock helps assuring this without 44 // blocking the general usage of the hnsw index 45 deleteLock *sync.Mutex 46 47 tombstoneLock *sync.RWMutex 48 49 // prevents tombstones cleanup to be performed in parallel with index reset operation 50 resetLock *sync.Mutex 51 // indicates whether reset operation occurred or not - if so tombstones cleanup method 52 // is aborted as it makes no sense anymore 53 resetCtx context.Context 54 resetCtxCancel context.CancelFunc 55 56 // indicates the index is shutting down 57 shutdownCtx context.Context 58 shutdownCtxCancel context.CancelFunc 59 60 // make sure the very first insert happens just once, otherwise we 61 // accidentally overwrite previous entrypoints on parallel imports on an 62 // empty graph 63 initialInsertOnce *sync.Once 64 65 // Each node should not have more edges than this number 66 maximumConnections int 67 68 // Nodes in the lowest level have a separate (usually higher) max connection 69 // limit 70 maximumConnectionsLayerZero int 71 72 // the current maximum can be smaller than the configured maximum because of 73 // the exponentially decaying layer function. The initial entry is started at 74 // layer 0, but this has the chance to grow with every subsequent entry 75 currentMaximumLayer int 76 77 // this is a point on the highest level, if we insert a new point with a 78 // higher level it will become the new entry point. Note tat the level of 79 // this point is always currentMaximumLayer 80 entryPointID uint64 81 82 // ef parameter used in construction phases, should be higher than ef during querying 83 efConstruction int 84 85 // ef at search time 86 ef int64 87 88 // only used if ef=-1 89 efMin int64 90 efMax int64 91 efFactor int64 92 93 // on filtered searches with less than n elements, perform flat search 94 flatSearchCutoff int64 95 96 levelNormalizer float64 97 98 nodes []*vertex 99 100 vectorForID common.VectorForID[float32] 101 TempVectorForIDThunk common.TempVectorForID 102 multiVectorForID common.MultiVectorForID 103 trackDimensionsOnce sync.Once 104 dims int32 105 106 cache cache.Cache[float32] 107 108 commitLog CommitLogger 109 110 // a lookup of current tombstones (i.e. nodes that have received a tombstone, 111 // but have not been cleaned up yet) Cleanup is the process of removal of all 112 // outgoing edges to the tombstone as well as deleting the tombstone itself. 113 // This process should happen periodically. 114 tombstones map[uint64]struct{} 115 116 tombstoneCleanupCallbackCtrl cyclemanager.CycleCallbackCtrl 117 shardCompactionCallbacks cyclemanager.CycleCallbackGroup 118 shardFlushCallbacks cyclemanager.CycleCallbackGroup 119 120 // // for distributed spike, can be used to call a insertExternal on a different graph 121 // insertHook func(node, targetLevel int, neighborsAtLevel map[int][]uint32) 122 123 id string 124 rootPath string 125 126 logger logrus.FieldLogger 127 distancerProvider distancer.Provider 128 129 pools *pools 130 131 forbidFlat bool // mostly used in testing scenarios where we want to use the index even in scenarios where we typically wouldn't 132 133 metrics *Metrics 134 insertMetrics *insertMetrics 135 136 randFunc func() float64 // added to temporarily get rid on flakiness in tombstones related tests. to be removed after fixing WEAVIATE-179 137 138 // The deleteVsInsertLock makes sure that there are no concurrent delete and 139 // insert operations happening. It uses an RW-Mutex with: 140 // 141 // RLock -> Insert operations, this means any number of import operations can 142 // happen concurrently. 143 // 144 // Lock -> Delete operation. This means only a single delete operation can 145 // occur at a time, no insert operation can occur simultaneously with a 146 // delete. Since the delete is cheap (just marking the node as deleted), the 147 // single-threadedness of deletes is not a big problem. 148 // 149 // This lock was introduced as part of 150 // https://github.com/weaviate/weaviate/issues/2194 151 // 152 // See 153 // https://github.com/weaviate/weaviate/pull/2191#issuecomment-1242726787 154 // where we ran performance tests to make sure introducing this lock has no 155 // negative impact on performance. 156 deleteVsInsertLock sync.RWMutex 157 158 compressed atomic.Bool 159 doNotRescore bool 160 161 compressor compressionhelpers.VectorCompressor 162 pqConfig ent.PQConfig 163 164 compressActionLock *sync.RWMutex 165 className string 166 shardName string 167 VectorForIDThunk common.VectorForID[float32] 168 shardedNodeLocks *common.ShardedRWLocks 169 store *lsmkv.Store 170 } 171 172 type CommitLogger interface { 173 ID() string 174 AddNode(node *vertex) error 175 SetEntryPointWithMaxLayer(id uint64, level int) error 176 AddLinkAtLevel(nodeid uint64, level int, target uint64) error 177 ReplaceLinksAtLevel(nodeid uint64, level int, targets []uint64) error 178 AddTombstone(nodeid uint64) error 179 RemoveTombstone(nodeid uint64) error 180 DeleteNode(nodeid uint64) error 181 ClearLinks(nodeid uint64) error 182 ClearLinksAtLevel(nodeid uint64, level uint16) error 183 Reset() error 184 Drop(ctx context.Context) error 185 Flush() error 186 Shutdown(ctx context.Context) error 187 RootPath() string 188 SwitchCommitLogs(bool) error 189 AddPQ(compressionhelpers.PQData) error 190 } 191 192 type BufferedLinksLogger interface { 193 AddLinkAtLevel(nodeid uint64, level int, target uint64) error 194 ReplaceLinksAtLevel(nodeid uint64, level int, targets []uint64) error 195 Close() error // Close should Flush and Close 196 } 197 198 type MakeCommitLogger func() (CommitLogger, error) 199 200 // New creates a new HNSW index, the commit logger is provided through a thunk 201 // (a function which can be deferred). This is because creating a commit logger 202 // opens files for writing. However, checking whether a file is present, is a 203 // criterium for the index to see if it has to recover from disk or if its a 204 // truly new index. So instead the index is initialized, with un-biased disk 205 // checks first and only then is the commit logger created 206 func New(cfg Config, uc ent.UserConfig, tombstoneCallbacks, shardCompactionCallbacks, 207 shardFlushCallbacks cyclemanager.CycleCallbackGroup, store *lsmkv.Store, 208 ) (*hnsw, error) { 209 if err := cfg.Validate(); err != nil { 210 return nil, errors.Wrap(err, "invalid config") 211 } 212 213 if cfg.Logger == nil { 214 logger := logrus.New() 215 logger.Out = io.Discard 216 cfg.Logger = logger 217 } 218 219 normalizeOnRead := false 220 if cfg.DistanceProvider.Type() == "cosine-dot" { 221 normalizeOnRead = true 222 } 223 224 vectorCache := cache.NewShardedFloat32LockCache(cfg.VectorForIDThunk, uc.VectorCacheMaxObjects, 225 cfg.Logger, normalizeOnRead, cache.DefaultDeletionInterval) 226 227 resetCtx, resetCtxCancel := context.WithCancel(context.Background()) 228 shutdownCtx, shutdownCtxCancel := context.WithCancel(context.Background()) 229 index := &hnsw{ 230 maximumConnections: uc.MaxConnections, 231 232 // inspired by original paper and other implementations 233 maximumConnectionsLayerZero: 2 * uc.MaxConnections, 234 235 // inspired by c++ implementation 236 levelNormalizer: 1 / math.Log(float64(uc.MaxConnections)), 237 efConstruction: uc.EFConstruction, 238 flatSearchCutoff: int64(uc.FlatSearchCutoff), 239 nodes: make([]*vertex, cache.InitialSize), 240 cache: vectorCache, 241 vectorForID: vectorCache.Get, 242 multiVectorForID: vectorCache.MultiGet, 243 id: cfg.ID, 244 rootPath: cfg.RootPath, 245 tombstones: map[uint64]struct{}{}, 246 logger: cfg.Logger, 247 distancerProvider: cfg.DistanceProvider, 248 deleteLock: &sync.Mutex{}, 249 tombstoneLock: &sync.RWMutex{}, 250 resetLock: &sync.Mutex{}, 251 resetCtx: resetCtx, 252 resetCtxCancel: resetCtxCancel, 253 shutdownCtx: shutdownCtx, 254 shutdownCtxCancel: shutdownCtxCancel, 255 initialInsertOnce: &sync.Once{}, 256 257 ef: int64(uc.EF), 258 efMin: int64(uc.DynamicEFMin), 259 efMax: int64(uc.DynamicEFMax), 260 efFactor: int64(uc.DynamicEFFactor), 261 262 metrics: NewMetrics(cfg.PrometheusMetrics, cfg.ClassName, cfg.ShardName), 263 shardName: cfg.ShardName, 264 265 randFunc: rand.Float64, 266 compressActionLock: &sync.RWMutex{}, 267 className: cfg.ClassName, 268 VectorForIDThunk: cfg.VectorForIDThunk, 269 TempVectorForIDThunk: cfg.TempVectorForIDThunk, 270 pqConfig: uc.PQ, 271 shardedNodeLocks: common.NewDefaultShardedRWLocks(), 272 273 shardCompactionCallbacks: shardCompactionCallbacks, 274 shardFlushCallbacks: shardFlushCallbacks, 275 store: store, 276 } 277 278 if uc.BQ.Enabled { 279 var err error 280 index.compressor, err = compressionhelpers.NewBQCompressor(index.distancerProvider, uc.VectorCacheMaxObjects, cfg.Logger, store) 281 if err != nil { 282 return nil, err 283 } 284 index.compressed.Store(true) 285 index.cache.Drop() 286 index.cache = nil 287 } 288 289 if err := index.init(cfg); err != nil { 290 return nil, errors.Wrapf(err, "init index %q", index.id) 291 } 292 293 // TODO common_cycle_manager move to poststartup? 294 id := strings.Join([]string{ 295 "hnsw", "tombstone_cleanup", 296 index.className, index.shardName, index.id, 297 }, "/") 298 index.tombstoneCleanupCallbackCtrl = tombstoneCallbacks.Register(id, index.tombstoneCleanup) 299 index.insertMetrics = newInsertMetrics(index.metrics) 300 301 return index, nil 302 } 303 304 // TODO: use this for incoming replication 305 // func (h *hnsw) insertFromExternal(nodeId, targetLevel int, neighborsAtLevel map[int][]uint32) { 306 // defer m.addBuildingReplication(time.Now()) 307 308 // // randomly introduce up to 50ms delay to account for network slowness 309 // time.Sleep(time.Duration(rand.Intn(500)) * time.Millisecond) 310 311 // var node *hnswVertex 312 // h.RLock() 313 // total := len(h.nodes) 314 // if total > nodeId { 315 // node = h.nodes[nodeId] // it could be that we implicitly added this node already because it was referenced 316 // } 317 // h.RUnlock() 318 319 // if node == nil { 320 // node = &hnswVertex{ 321 // id: nodeId, 322 // connections: make(map[int][]uint32), 323 // level: targetLevel, 324 // } 325 // } else { 326 // node.level = targetLevel 327 // } 328 329 // if total == 0 { 330 // h.Lock() 331 // h.commitLog.SetEntryPointWithMaxLayer(node.id, 0) 332 // h.entryPointID = node.id 333 // h.currentMaximumLayer = 0 334 // node.connections = map[int][]uint32{} 335 // node.level = 0 336 // // h.nodes = make([]*hnswVertex, 100000) 337 // h.commitLog.AddNode(node) 338 // h.nodes[node.id] = node 339 // h.Unlock() 340 // return 341 // } 342 343 // currentMaximumLayer := h.currentMaximumLayer 344 // h.Lock() 345 // h.nodes[nodeId] = node 346 // h.commitLog.AddNode(node) 347 // h.Unlock() 348 349 // for level := min(targetLevel, currentMaximumLayer); level >= 0; level-- { 350 // neighbors := neighborsAtLevel[level] 351 352 // for _, neighborID := range neighbors { 353 // h.RLock() 354 // neighbor := h.nodes[neighborID] 355 // if neighbor == nil { 356 // // due to everything being parallel it could be that the linked neighbor 357 // // doesn't exist yet 358 // h.nodes[neighborID] = &hnswVertex{ 359 // id: int(neighborID), 360 // connections: make(map[int][]uint32), 361 // } 362 // neighbor = h.nodes[neighborID] 363 // } 364 // h.RUnlock() 365 366 // neighbor.linkAtLevel(level, uint32(nodeId), h.commitLog) 367 // node.linkAtLevel(level, uint32(neighbor.id), h.commitLog) 368 369 // neighbor.RLock() 370 // currentConnections := neighbor.connections[level] 371 // neighbor.RUnlock() 372 373 // maximumConnections := h.maximumConnections 374 // if level == 0 { 375 // maximumConnections = h.maximumConnectionsLayerZero 376 // } 377 378 // if len(currentConnections) <= maximumConnections { 379 // // nothing to do, skip 380 // continue 381 // } 382 383 // // TODO: support both neighbor selection algos 384 // updatedConnections := h.selectNeighborsSimpleFromId(nodeId, currentConnections, maximumConnections) 385 386 // neighbor.Lock() 387 // h.commitLog.ReplaceLinksAtLevel(neighbor.id, level, updatedConnections) 388 // neighbor.connections[level] = updatedConnections 389 // neighbor.Unlock() 390 // } 391 // } 392 393 // if targetLevel > h.currentMaximumLayer { 394 // h.Lock() 395 // h.commitLog.SetEntryPointWithMaxLayer(nodeId, targetLevel) 396 // h.entryPointID = nodeId 397 // h.currentMaximumLayer = targetLevel 398 // h.Unlock() 399 // } 400 401 // } 402 403 func (h *hnsw) findBestEntrypointForNode(currentMaxLevel, targetLevel int, 404 entryPointID uint64, nodeVec []float32, distancer compressionhelpers.CompressorDistancer, 405 ) (uint64, error) { 406 // in case the new target is lower than the current max, we need to search 407 // each layer for a better candidate and update the candidate 408 for level := currentMaxLevel; level > targetLevel; level-- { 409 eps := priorityqueue.NewMin[any](1) 410 var dist float32 411 var ok bool 412 var err error 413 if h.compressed.Load() { 414 dist, ok, err = distancer.DistanceToNode(entryPointID) 415 var e storobj.ErrNotFound 416 if errors.As(err, &e) { 417 h.handleDeletedNode(e.DocID) 418 } 419 } else { 420 dist, ok, err = h.distBetweenNodeAndVec(entryPointID, nodeVec) 421 } 422 if err != nil { 423 return 0, errors.Wrapf(err, 424 "calculate distance between insert node and entry point at level %d", level) 425 } 426 if !ok { 427 continue 428 } 429 430 eps.Insert(entryPointID, dist) 431 res, err := h.searchLayerByVectorWithDistancer(nodeVec, eps, 1, level, nil, distancer) 432 if err != nil { 433 return 0, 434 errors.Wrapf(err, "update candidate: search layer at level %d", level) 435 } 436 if res.Len() > 0 { 437 // if we could find a new entrypoint, use it 438 // in case everything was tombstoned, stick with the existing one 439 elem := res.Pop() 440 n := h.nodeByID(elem.ID) 441 if n != nil && !n.isUnderMaintenance() { 442 // but not if the entrypoint is under maintenance 443 entryPointID = elem.ID 444 } 445 } 446 447 h.pools.pqResults.Put(res) 448 } 449 450 return entryPointID, nil 451 } 452 453 func min(a, b int) int { 454 if a < b { 455 return a 456 } 457 return b 458 } 459 460 func (h *hnsw) distBetweenNodes(a, b uint64) (float32, bool, error) { 461 if h.compressed.Load() { 462 dist, err := h.compressor.DistanceBetweenCompressedVectorsFromIDs(context.Background(), a, b) 463 if err != nil { 464 var e storobj.ErrNotFound 465 if errors.As(err, &e) { 466 h.handleDeletedNode(e.DocID) 467 return 0, false, nil 468 } else { 469 return 0, false, err 470 } 471 } 472 473 return dist, true, nil 474 } 475 476 // TODO: introduce single search/transaction context instead of spawning new 477 // ones 478 vecA, err := h.vectorForID(context.Background(), a) 479 if err != nil { 480 var e storobj.ErrNotFound 481 if errors.As(err, &e) { 482 h.handleDeletedNode(e.DocID) 483 return 0, false, nil 484 } else { 485 // not a typed error, we can recover from, return with err 486 return 0, false, errors.Wrapf(err, 487 "could not get vector of object at docID %d", a) 488 } 489 } 490 491 if len(vecA) == 0 { 492 return 0, false, fmt.Errorf("got a nil or zero-length vector at docID %d", a) 493 } 494 495 vecB, err := h.vectorForID(context.Background(), b) 496 if err != nil { 497 var e storobj.ErrNotFound 498 if errors.As(err, &e) { 499 h.handleDeletedNode(e.DocID) 500 return 0, false, nil 501 } else { 502 // not a typed error, we can recover from, return with err 503 return 0, false, errors.Wrapf(err, 504 "could not get vector of object at docID %d", b) 505 } 506 } 507 508 if len(vecB) == 0 { 509 return 0, false, fmt.Errorf("got a nil or zero-length vector at docID %d", b) 510 } 511 512 return h.distancerProvider.SingleDist(vecA, vecB) 513 } 514 515 func (h *hnsw) distBetweenNodeAndVec(node uint64, vecB []float32) (float32, bool, error) { 516 if h.compressed.Load() { 517 dist, err := h.compressor.DistanceBetweenCompressedAndUncompressedVectorsFromID(context.Background(), node, vecB) 518 if err != nil { 519 var e storobj.ErrNotFound 520 if errors.As(err, &e) { 521 h.handleDeletedNode(e.DocID) 522 return 0, false, nil 523 } else { 524 return 0, false, err 525 } 526 } 527 528 return dist, true, nil 529 } 530 531 // TODO: introduce single search/transaction context instead of spawning new 532 // ones 533 vecA, err := h.vectorForID(context.Background(), node) 534 if err != nil { 535 var e storobj.ErrNotFound 536 if errors.As(err, &e) { 537 h.handleDeletedNode(e.DocID) 538 return 0, false, nil 539 } else { 540 // not a typed error, we can recover from, return with err 541 return 0, false, errors.Wrapf(err, 542 "could not get vector of object at docID %d", node) 543 } 544 } 545 546 if len(vecA) == 0 { 547 return 0, false, fmt.Errorf( 548 "got a nil or zero-length vector at docID %d", node) 549 } 550 551 if len(vecB) == 0 { 552 return 0, false, fmt.Errorf( 553 "got a nil or zero-length vector as search vector") 554 } 555 556 return h.distancerProvider.SingleDist(vecA, vecB) 557 } 558 559 func (h *hnsw) Stats() { 560 fmt.Printf("levels: %d\n", h.currentMaximumLayer) 561 562 perLevelCount := map[int]uint{} 563 564 for _, node := range h.nodes { 565 if node == nil { 566 continue 567 } 568 l := node.level 569 if l == 0 && len(node.connections) == 0 { 570 // filter out allocated space without nodes 571 continue 572 } 573 c, ok := perLevelCount[l] 574 if !ok { 575 perLevelCount[l] = 0 576 } 577 578 perLevelCount[l] = c + 1 579 } 580 581 for level, count := range perLevelCount { 582 fmt.Printf("unique count on level %d: %d\n", level, count) 583 } 584 } 585 586 func (h *hnsw) isEmpty() bool { 587 h.RLock() 588 defer h.RUnlock() 589 h.shardedNodeLocks.RLock(h.entryPointID) 590 defer h.shardedNodeLocks.RUnlock(h.entryPointID) 591 592 return h.isEmptyUnlocked() 593 } 594 595 func (h *hnsw) isEmptyUnlocked() bool { 596 return h.nodes[h.entryPointID] == nil 597 } 598 599 func (h *hnsw) nodeByID(id uint64) *vertex { 600 h.RLock() 601 defer h.RUnlock() 602 603 if id >= uint64(len(h.nodes)) { 604 // See https://github.com/weaviate/weaviate/issues/1838 for details. 605 // This could be after a crash recovery when the object store is "further 606 // ahead" than the hnsw index and we receive a delete request 607 return nil 608 } 609 610 h.shardedNodeLocks.RLock(id) 611 defer h.shardedNodeLocks.RUnlock(id) 612 613 return h.nodes[id] 614 } 615 616 func (h *hnsw) Drop(ctx context.Context) error { 617 // cancel tombstone cleanup goroutine 618 if err := h.tombstoneCleanupCallbackCtrl.Unregister(ctx); err != nil { 619 return errors.Wrap(err, "hnsw drop") 620 } 621 622 if h.compressed.Load() { 623 err := h.compressor.Drop() 624 if err != nil { 625 return fmt.Errorf("failed to shutdown compressed store") 626 } 627 } else { 628 // cancel vector cache goroutine 629 h.cache.Drop() 630 } 631 632 // cancel commit logger last, as the tombstone cleanup cycle might still 633 // write while it's still running 634 err := h.commitLog.Drop(ctx) 635 if err != nil { 636 return errors.Wrap(err, "commit log drop") 637 } 638 639 return nil 640 } 641 642 func (h *hnsw) Shutdown(ctx context.Context) error { 643 h.shutdownCtxCancel() 644 645 if err := h.commitLog.Shutdown(ctx); err != nil { 646 return errors.Wrap(err, "hnsw shutdown") 647 } 648 649 if err := h.tombstoneCleanupCallbackCtrl.Unregister(ctx); err != nil { 650 return errors.Wrap(err, "hnsw shutdown") 651 } 652 653 if h.compressed.Load() { 654 err := h.compressor.Drop() 655 if err != nil { 656 return errors.Wrap(err, "hnsw shutdown") 657 } 658 } else { 659 h.cache.Drop() 660 } 661 662 return nil 663 } 664 665 func (h *hnsw) Flush() error { 666 return h.commitLog.Flush() 667 } 668 669 func (h *hnsw) Entrypoint() uint64 { 670 h.RLock() 671 defer h.RUnlock() 672 673 return h.entryPointID 674 } 675 676 func (h *hnsw) DistanceBetweenVectors(x, y []float32) (float32, bool, error) { 677 return h.distancerProvider.SingleDist(x, y) 678 } 679 680 func (h *hnsw) ContainsNode(id uint64) bool { 681 h.RLock() 682 defer h.RUnlock() 683 h.shardedNodeLocks.RLock(id) 684 defer h.shardedNodeLocks.RUnlock(id) 685 686 return len(h.nodes) > int(id) && h.nodes[id] != nil 687 } 688 689 func (h *hnsw) DistancerProvider() distancer.Provider { 690 return h.distancerProvider 691 } 692 693 func (h *hnsw) ShouldCompress() (bool, int) { 694 return h.pqConfig.Enabled, h.pqConfig.TrainingLimit 695 } 696 697 func (h *hnsw) ShouldCompressFromConfig(config schema.VectorIndexConfig) (bool, int) { 698 hnswConfig := config.(ent.UserConfig) 699 return hnswConfig.PQ.Enabled, hnswConfig.PQ.TrainingLimit 700 } 701 702 func (h *hnsw) Compressed() bool { 703 return h.compressed.Load() 704 } 705 706 func (h *hnsw) AlreadyIndexed() uint64 { 707 return uint64(h.cache.CountVectors()) 708 } 709 710 func (h *hnsw) normalizeVec(vec []float32) []float32 { 711 if h.distancerProvider.Type() == "cosine-dot" { 712 // cosine-dot requires normalized vectors, as the dot product and cosine 713 // similarity are only identical if the vector is normalized 714 return distancer.Normalize(vec) 715 } 716 return vec 717 }