github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/helper_for_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  //go:build integrationTest
    13  // +build integrationTest
    14  
    15  package db
    16  
    17  import (
    18  	"context"
    19  	"math/rand"
    20  	"testing"
    21  	"time"
    22  
    23  	"github.com/go-openapi/strfmt"
    24  	"github.com/google/uuid"
    25  	"github.com/sirupsen/logrus/hooks/test"
    26  	"github.com/stretchr/testify/require"
    27  	"github.com/weaviate/weaviate/adapters/repos/db/indexcheckpoint"
    28  	"github.com/weaviate/weaviate/adapters/repos/db/inverted"
    29  	"github.com/weaviate/weaviate/adapters/repos/db/inverted/stopwords"
    30  	"github.com/weaviate/weaviate/entities/models"
    31  	"github.com/weaviate/weaviate/entities/schema"
    32  	"github.com/weaviate/weaviate/entities/storobj"
    33  	enthnsw "github.com/weaviate/weaviate/entities/vectorindex/hnsw"
    34  )
    35  
    36  func parkingGaragesSchema() schema.Schema {
    37  	return schema.Schema{
    38  		Objects: &models.Schema{
    39  			Classes: []*models.Class{
    40  				{
    41  					Class:               "MultiRefParkingGarage",
    42  					VectorIndexConfig:   enthnsw.NewDefaultUserConfig(),
    43  					InvertedIndexConfig: invertedConfig(),
    44  					Properties: []*models.Property{
    45  						{
    46  							Name:         "name",
    47  							DataType:     schema.DataTypeText.PropString(),
    48  							Tokenization: models.PropertyTokenizationWhitespace,
    49  						},
    50  						{
    51  							Name:     "location",
    52  							DataType: []string{string(schema.DataTypeGeoCoordinates)},
    53  						},
    54  					},
    55  				},
    56  				{
    57  					Class:               "MultiRefParkingLot",
    58  					VectorIndexConfig:   enthnsw.NewDefaultUserConfig(),
    59  					InvertedIndexConfig: invertedConfig(),
    60  					Properties: []*models.Property{
    61  						{
    62  							Name:         "name",
    63  							DataType:     schema.DataTypeText.PropString(),
    64  							Tokenization: models.PropertyTokenizationWhitespace,
    65  						},
    66  					},
    67  				},
    68  				{
    69  					Class:               "MultiRefCar",
    70  					VectorIndexConfig:   enthnsw.NewDefaultUserConfig(),
    71  					InvertedIndexConfig: invertedConfig(),
    72  					Properties: []*models.Property{
    73  						{
    74  							Name:         "name",
    75  							DataType:     schema.DataTypeText.PropString(),
    76  							Tokenization: models.PropertyTokenizationWhitespace,
    77  						},
    78  						{
    79  							Name:     "parkedAt",
    80  							DataType: []string{"MultiRefParkingGarage", "MultiRefParkingLot"},
    81  						},
    82  					},
    83  				},
    84  				{
    85  					Class:               "MultiRefDriver",
    86  					VectorIndexConfig:   enthnsw.NewDefaultUserConfig(),
    87  					InvertedIndexConfig: invertedConfig(),
    88  					Properties: []*models.Property{
    89  						{
    90  							Name:         "name",
    91  							DataType:     schema.DataTypeText.PropString(),
    92  							Tokenization: models.PropertyTokenizationWhitespace,
    93  						},
    94  						{
    95  							Name:     "drives",
    96  							DataType: []string{"MultiRefCar"},
    97  						},
    98  					},
    99  				},
   100  				{
   101  					Class:               "MultiRefPerson",
   102  					VectorIndexConfig:   enthnsw.NewDefaultUserConfig(),
   103  					InvertedIndexConfig: invertedConfig(),
   104  					Properties: []*models.Property{
   105  						{
   106  							Name:         "name",
   107  							DataType:     schema.DataTypeText.PropString(),
   108  							Tokenization: models.PropertyTokenizationWhitespace,
   109  						},
   110  						{
   111  							Name:     "friendsWith",
   112  							DataType: []string{"MultiRefDriver"},
   113  						},
   114  					},
   115  				},
   116  				{
   117  					Class:               "MultiRefSociety",
   118  					VectorIndexConfig:   enthnsw.NewDefaultUserConfig(),
   119  					InvertedIndexConfig: invertedConfig(),
   120  					Properties: []*models.Property{
   121  						{
   122  							Name:         "name",
   123  							DataType:     schema.DataTypeText.PropString(),
   124  							Tokenization: models.PropertyTokenizationWhitespace,
   125  						},
   126  						{
   127  							Name:     "hasMembers",
   128  							DataType: []string{"MultiRefPerson"},
   129  						},
   130  					},
   131  				},
   132  
   133  				// for classifications test
   134  				{
   135  					Class:               "ExactCategory",
   136  					VectorIndexConfig:   enthnsw.NewDefaultUserConfig(),
   137  					InvertedIndexConfig: invertedConfig(),
   138  					Properties: []*models.Property{
   139  						{
   140  							Name:         "name",
   141  							DataType:     schema.DataTypeText.PropString(),
   142  							Tokenization: models.PropertyTokenizationWhitespace,
   143  						},
   144  					},
   145  				},
   146  				{
   147  					Class:               "MainCategory",
   148  					VectorIndexConfig:   enthnsw.NewDefaultUserConfig(),
   149  					InvertedIndexConfig: invertedConfig(),
   150  					Properties: []*models.Property{
   151  						{
   152  							Name:         "name",
   153  							DataType:     schema.DataTypeText.PropString(),
   154  							Tokenization: models.PropertyTokenizationWhitespace,
   155  						},
   156  					},
   157  				},
   158  			},
   159  		},
   160  	}
   161  }
   162  
   163  func cityCountryAirportSchema() schema.Schema {
   164  	return schema.Schema{
   165  		Objects: &models.Schema{
   166  			Classes: []*models.Class{
   167  				{
   168  					Class:               "Country",
   169  					VectorIndexConfig:   enthnsw.NewDefaultUserConfig(),
   170  					InvertedIndexConfig: invertedConfig(),
   171  					Properties: []*models.Property{
   172  						{Name: "name", DataType: schema.DataTypeText.PropString(), Tokenization: models.PropertyTokenizationWhitespace},
   173  					},
   174  				},
   175  				{
   176  					Class:               "City",
   177  					VectorIndexConfig:   enthnsw.NewDefaultUserConfig(),
   178  					InvertedIndexConfig: invertedConfig(),
   179  					Properties: []*models.Property{
   180  						{Name: "name", DataType: schema.DataTypeText.PropString(), Tokenization: models.PropertyTokenizationWhitespace},
   181  						{Name: "inCountry", DataType: []string{"Country"}},
   182  						{Name: "population", DataType: []string{"int"}},
   183  						{Name: "location", DataType: []string{"geoCoordinates"}},
   184  					},
   185  				},
   186  				{
   187  					Class:               "Airport",
   188  					VectorIndexConfig:   enthnsw.NewDefaultUserConfig(),
   189  					InvertedIndexConfig: invertedConfig(),
   190  					Properties: []*models.Property{
   191  						{Name: "code", DataType: schema.DataTypeText.PropString(), Tokenization: models.PropertyTokenizationWhitespace},
   192  						{Name: "phone", DataType: []string{"phoneNumber"}},
   193  						{Name: "inCity", DataType: []string{"City"}},
   194  					},
   195  				},
   196  			},
   197  		},
   198  	}
   199  }
   200  
   201  func testCtx() context.Context {
   202  	//nolint:govet
   203  	ctx, _ := context.WithTimeout(context.Background(), 30*time.Second)
   204  	return ctx
   205  }
   206  
   207  func getRandomSeed() *rand.Rand {
   208  	return rand.New(rand.NewSource(time.Now().UnixNano()))
   209  }
   210  
   211  func testShard(t *testing.T, ctx context.Context, className string, indexOpts ...func(*Index)) (ShardLike, *Index) {
   212  	return testShardWithSettings(t, ctx, &models.Class{Class: className}, enthnsw.UserConfig{Skip: true},
   213  		false, false, indexOpts...)
   214  }
   215  
   216  func testShardWithSettings(t *testing.T, ctx context.Context, class *models.Class,
   217  	vic schema.VectorIndexConfig, withStopwords, withCheckpoints bool, indexOpts ...func(*Index),
   218  ) (ShardLike, *Index) {
   219  	tmpDir := t.TempDir()
   220  	logger, _ := test.NewNullLogger()
   221  	maxResults := int64(10_000)
   222  
   223  	repo, err := New(logger, Config{
   224  		MemtablesFlushDirtyAfter:  60,
   225  		RootPath:                  tmpDir,
   226  		QueryMaximumResults:       maxResults,
   227  		MaxImportGoroutinesFactor: 1,
   228  	}, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, &fakeReplicationClient{}, nil)
   229  	require.Nil(t, err)
   230  
   231  	shardState := singleShardState()
   232  	sch := schema.Schema{
   233  		Objects: &models.Schema{
   234  			Classes: []*models.Class{class},
   235  		},
   236  	}
   237  	schemaGetter := &fakeSchemaGetter{shardState: shardState, schema: sch}
   238  
   239  	iic := schema.InvertedIndexConfig{}
   240  	if class.InvertedIndexConfig != nil {
   241  		iic = inverted.ConfigFromModel(class.InvertedIndexConfig)
   242  	}
   243  	var sd *stopwords.Detector
   244  	if withStopwords {
   245  		sd, err = stopwords.NewDetectorFromConfig(iic.Stopwords)
   246  		require.NoError(t, err)
   247  	}
   248  	var checkpts *indexcheckpoint.Checkpoints
   249  	if withCheckpoints {
   250  		checkpts, err = indexcheckpoint.New(tmpDir, logger)
   251  		require.NoError(t, err)
   252  	}
   253  
   254  	idx := &Index{
   255  		Config: IndexConfig{
   256  			RootPath:            tmpDir,
   257  			ClassName:           schema.ClassName(class.Class),
   258  			QueryMaximumResults: maxResults,
   259  		},
   260  		invertedIndexConfig:   iic,
   261  		vectorIndexUserConfig: vic,
   262  		logger:                logger,
   263  		getSchema:             schemaGetter,
   264  		centralJobQueue:       repo.jobQueueCh,
   265  		stopwords:             sd,
   266  		indexCheckpoints:      checkpts,
   267  	}
   268  	idx.closingCtx, idx.closingCancel = context.WithCancel(context.Background())
   269  	idx.initCycleCallbacksNoop()
   270  	for _, opt := range indexOpts {
   271  		opt(idx)
   272  	}
   273  
   274  	shardName := shardState.AllPhysicalShards()[0]
   275  	shard, err := idx.initShard(ctx, shardName, class, nil)
   276  	require.NoError(t, err)
   277  
   278  	idx.shards.Store(shardName, shard)
   279  	return shard, idx
   280  }
   281  
   282  func testObject(className string) *storobj.Object {
   283  	return &storobj.Object{
   284  		MarshallerVersion: 1,
   285  		Object: models.Object{
   286  			ID:    strfmt.UUID(uuid.NewString()),
   287  			Class: className,
   288  		},
   289  		Vector: []float32{1, 2, 3},
   290  	}
   291  }
   292  
   293  func createRandomObjects(r *rand.Rand, className string, numObj int) []*storobj.Object {
   294  	obj := make([]*storobj.Object, numObj)
   295  
   296  	for i := 0; i < numObj; i++ {
   297  		obj[i] = &storobj.Object{
   298  			MarshallerVersion: 1,
   299  			Object: models.Object{
   300  				ID:    strfmt.UUID(uuid.NewString()),
   301  				Class: className,
   302  			},
   303  			Vector: []float32{r.Float32(), r.Float32(), r.Float32(), r.Float32()},
   304  		}
   305  	}
   306  	return obj
   307  }