github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/search_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package hnsw
    13  
    14  import (
    15  	"context"
    16  	"testing"
    17  
    18  	"github.com/stretchr/testify/assert"
    19  	"github.com/stretchr/testify/require"
    20  	"github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/distancer"
    21  	"github.com/weaviate/weaviate/adapters/repos/db/vector/testinghelpers"
    22  	"github.com/weaviate/weaviate/entities/cyclemanager"
    23  	ent "github.com/weaviate/weaviate/entities/vectorindex/hnsw"
    24  )
    25  
    26  // prevents a regression of
    27  // https://github.com/weaviate/weaviate/issues/2155
    28  func TestNilCheckOnPartiallyCleanedNode(t *testing.T) {
    29  	vectors := [][]float32{
    30  		{100, 100}, // first to import makes this the EP, it is far from any query which means it will be replaced.
    31  		{2, 2},     // a good potential entrypoint, but we will corrupt it later on
    32  		{1, 1},     // the perfect search result
    33  	}
    34  
    35  	var vectorIndex *hnsw
    36  
    37  	t.Run("import", func(*testing.T) {
    38  		index, err := New(Config{
    39  			RootPath:              "doesnt-matter-as-committlogger-is-mocked-out",
    40  			ID:                    "bug-2155",
    41  			MakeCommitLoggerThunk: MakeNoopCommitLogger,
    42  			DistanceProvider:      distancer.NewL2SquaredProvider(),
    43  			VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) {
    44  				return vectors[int(id)], nil
    45  			},
    46  		}, ent.UserConfig{
    47  			MaxConnections: 30,
    48  			EFConstruction: 128,
    49  
    50  			// The actual size does not matter for this test, but if it defaults to
    51  			// zero it will constantly think it's full and needs to be deleted - even
    52  			// after just being deleted, so make sure to use a positive number here.
    53  			VectorCacheMaxObjects: 100000,
    54  		}, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(),
    55  			cyclemanager.NewCallbackGroupNoop(), testinghelpers.NewDummyStore(t))
    56  		require.Nil(t, err)
    57  		vectorIndex = index
    58  	})
    59  
    60  	t.Run("manually add the nodes", func(t *testing.T) {
    61  		vectorIndex.entryPointID = 0
    62  		vectorIndex.currentMaximumLayer = 1
    63  		vectorIndex.nodes = []*vertex{
    64  			{
    65  				// must be on a non-zero layer for this bug to occur
    66  				level: 1,
    67  				connections: [][]uint64{
    68  					{1, 2},
    69  					{1},
    70  				},
    71  			},
    72  			nil, // corrupt node
    73  			{
    74  				level: 0,
    75  				connections: [][]uint64{
    76  					{0, 1, 2},
    77  				},
    78  			},
    79  		}
    80  	})
    81  
    82  	t.Run("run a search that would typically find the new ep", func(t *testing.T) {
    83  		res, _, err := vectorIndex.SearchByVector([]float32{1.7, 1.7}, 20, nil)
    84  		require.Nil(t, err)
    85  		assert.Equal(t, []uint64{2, 0}, res, "right results are found")
    86  	})
    87  
    88  	t.Run("the corrupt node is now marked deleted", func(t *testing.T) {
    89  		_, ok := vectorIndex.tombstones[1]
    90  		assert.True(t, ok)
    91  	})
    92  }