github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/helper_for_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 //go:build integrationTest 13 // +build integrationTest 14 15 package db 16 17 import ( 18 "context" 19 "math/rand" 20 "testing" 21 "time" 22 23 "github.com/go-openapi/strfmt" 24 "github.com/google/uuid" 25 "github.com/sirupsen/logrus/hooks/test" 26 "github.com/stretchr/testify/require" 27 "github.com/weaviate/weaviate/adapters/repos/db/indexcheckpoint" 28 "github.com/weaviate/weaviate/adapters/repos/db/inverted" 29 "github.com/weaviate/weaviate/adapters/repos/db/inverted/stopwords" 30 "github.com/weaviate/weaviate/entities/models" 31 "github.com/weaviate/weaviate/entities/schema" 32 "github.com/weaviate/weaviate/entities/storobj" 33 enthnsw "github.com/weaviate/weaviate/entities/vectorindex/hnsw" 34 ) 35 36 func parkingGaragesSchema() schema.Schema { 37 return schema.Schema{ 38 Objects: &models.Schema{ 39 Classes: []*models.Class{ 40 { 41 Class: "MultiRefParkingGarage", 42 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 43 InvertedIndexConfig: invertedConfig(), 44 Properties: []*models.Property{ 45 { 46 Name: "name", 47 DataType: schema.DataTypeText.PropString(), 48 Tokenization: models.PropertyTokenizationWhitespace, 49 }, 50 { 51 Name: "location", 52 DataType: []string{string(schema.DataTypeGeoCoordinates)}, 53 }, 54 }, 55 }, 56 { 57 Class: "MultiRefParkingLot", 58 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 59 InvertedIndexConfig: invertedConfig(), 60 Properties: []*models.Property{ 61 { 62 Name: "name", 63 DataType: schema.DataTypeText.PropString(), 64 Tokenization: models.PropertyTokenizationWhitespace, 65 }, 66 }, 67 }, 68 { 69 Class: "MultiRefCar", 70 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 71 InvertedIndexConfig: invertedConfig(), 72 Properties: []*models.Property{ 73 { 74 Name: "name", 75 DataType: schema.DataTypeText.PropString(), 76 Tokenization: models.PropertyTokenizationWhitespace, 77 }, 78 { 79 Name: "parkedAt", 80 DataType: []string{"MultiRefParkingGarage", "MultiRefParkingLot"}, 81 }, 82 }, 83 }, 84 { 85 Class: "MultiRefDriver", 86 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 87 InvertedIndexConfig: invertedConfig(), 88 Properties: []*models.Property{ 89 { 90 Name: "name", 91 DataType: schema.DataTypeText.PropString(), 92 Tokenization: models.PropertyTokenizationWhitespace, 93 }, 94 { 95 Name: "drives", 96 DataType: []string{"MultiRefCar"}, 97 }, 98 }, 99 }, 100 { 101 Class: "MultiRefPerson", 102 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 103 InvertedIndexConfig: invertedConfig(), 104 Properties: []*models.Property{ 105 { 106 Name: "name", 107 DataType: schema.DataTypeText.PropString(), 108 Tokenization: models.PropertyTokenizationWhitespace, 109 }, 110 { 111 Name: "friendsWith", 112 DataType: []string{"MultiRefDriver"}, 113 }, 114 }, 115 }, 116 { 117 Class: "MultiRefSociety", 118 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 119 InvertedIndexConfig: invertedConfig(), 120 Properties: []*models.Property{ 121 { 122 Name: "name", 123 DataType: schema.DataTypeText.PropString(), 124 Tokenization: models.PropertyTokenizationWhitespace, 125 }, 126 { 127 Name: "hasMembers", 128 DataType: []string{"MultiRefPerson"}, 129 }, 130 }, 131 }, 132 133 // for classifications test 134 { 135 Class: "ExactCategory", 136 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 137 InvertedIndexConfig: invertedConfig(), 138 Properties: []*models.Property{ 139 { 140 Name: "name", 141 DataType: schema.DataTypeText.PropString(), 142 Tokenization: models.PropertyTokenizationWhitespace, 143 }, 144 }, 145 }, 146 { 147 Class: "MainCategory", 148 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 149 InvertedIndexConfig: invertedConfig(), 150 Properties: []*models.Property{ 151 { 152 Name: "name", 153 DataType: schema.DataTypeText.PropString(), 154 Tokenization: models.PropertyTokenizationWhitespace, 155 }, 156 }, 157 }, 158 }, 159 }, 160 } 161 } 162 163 func cityCountryAirportSchema() schema.Schema { 164 return schema.Schema{ 165 Objects: &models.Schema{ 166 Classes: []*models.Class{ 167 { 168 Class: "Country", 169 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 170 InvertedIndexConfig: invertedConfig(), 171 Properties: []*models.Property{ 172 {Name: "name", DataType: schema.DataTypeText.PropString(), Tokenization: models.PropertyTokenizationWhitespace}, 173 }, 174 }, 175 { 176 Class: "City", 177 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 178 InvertedIndexConfig: invertedConfig(), 179 Properties: []*models.Property{ 180 {Name: "name", DataType: schema.DataTypeText.PropString(), Tokenization: models.PropertyTokenizationWhitespace}, 181 {Name: "inCountry", DataType: []string{"Country"}}, 182 {Name: "population", DataType: []string{"int"}}, 183 {Name: "location", DataType: []string{"geoCoordinates"}}, 184 }, 185 }, 186 { 187 Class: "Airport", 188 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 189 InvertedIndexConfig: invertedConfig(), 190 Properties: []*models.Property{ 191 {Name: "code", DataType: schema.DataTypeText.PropString(), Tokenization: models.PropertyTokenizationWhitespace}, 192 {Name: "phone", DataType: []string{"phoneNumber"}}, 193 {Name: "inCity", DataType: []string{"City"}}, 194 }, 195 }, 196 }, 197 }, 198 } 199 } 200 201 func testCtx() context.Context { 202 //nolint:govet 203 ctx, _ := context.WithTimeout(context.Background(), 30*time.Second) 204 return ctx 205 } 206 207 func getRandomSeed() *rand.Rand { 208 return rand.New(rand.NewSource(time.Now().UnixNano())) 209 } 210 211 func testShard(t *testing.T, ctx context.Context, className string, indexOpts ...func(*Index)) (ShardLike, *Index) { 212 return testShardWithSettings(t, ctx, &models.Class{Class: className}, enthnsw.UserConfig{Skip: true}, 213 false, false, indexOpts...) 214 } 215 216 func testShardWithSettings(t *testing.T, ctx context.Context, class *models.Class, 217 vic schema.VectorIndexConfig, withStopwords, withCheckpoints bool, indexOpts ...func(*Index), 218 ) (ShardLike, *Index) { 219 tmpDir := t.TempDir() 220 logger, _ := test.NewNullLogger() 221 maxResults := int64(10_000) 222 223 repo, err := New(logger, Config{ 224 MemtablesFlushDirtyAfter: 60, 225 RootPath: tmpDir, 226 QueryMaximumResults: maxResults, 227 MaxImportGoroutinesFactor: 1, 228 }, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, &fakeReplicationClient{}, nil) 229 require.Nil(t, err) 230 231 shardState := singleShardState() 232 sch := schema.Schema{ 233 Objects: &models.Schema{ 234 Classes: []*models.Class{class}, 235 }, 236 } 237 schemaGetter := &fakeSchemaGetter{shardState: shardState, schema: sch} 238 239 iic := schema.InvertedIndexConfig{} 240 if class.InvertedIndexConfig != nil { 241 iic = inverted.ConfigFromModel(class.InvertedIndexConfig) 242 } 243 var sd *stopwords.Detector 244 if withStopwords { 245 sd, err = stopwords.NewDetectorFromConfig(iic.Stopwords) 246 require.NoError(t, err) 247 } 248 var checkpts *indexcheckpoint.Checkpoints 249 if withCheckpoints { 250 checkpts, err = indexcheckpoint.New(tmpDir, logger) 251 require.NoError(t, err) 252 } 253 254 idx := &Index{ 255 Config: IndexConfig{ 256 RootPath: tmpDir, 257 ClassName: schema.ClassName(class.Class), 258 QueryMaximumResults: maxResults, 259 }, 260 invertedIndexConfig: iic, 261 vectorIndexUserConfig: vic, 262 logger: logger, 263 getSchema: schemaGetter, 264 centralJobQueue: repo.jobQueueCh, 265 stopwords: sd, 266 indexCheckpoints: checkpts, 267 } 268 idx.closingCtx, idx.closingCancel = context.WithCancel(context.Background()) 269 idx.initCycleCallbacksNoop() 270 for _, opt := range indexOpts { 271 opt(idx) 272 } 273 274 shardName := shardState.AllPhysicalShards()[0] 275 shard, err := idx.initShard(ctx, shardName, class, nil) 276 require.NoError(t, err) 277 278 idx.shards.Store(shardName, shard) 279 return shard, idx 280 } 281 282 func testObject(className string) *storobj.Object { 283 return &storobj.Object{ 284 MarshallerVersion: 1, 285 Object: models.Object{ 286 ID: strfmt.UUID(uuid.NewString()), 287 Class: className, 288 }, 289 Vector: []float32{1, 2, 3}, 290 } 291 } 292 293 func createRandomObjects(r *rand.Rand, className string, numObj int) []*storobj.Object { 294 obj := make([]*storobj.Object, numObj) 295 296 for i := 0; i < numObj; i++ { 297 obj[i] = &storobj.Object{ 298 MarshallerVersion: 1, 299 Object: models.Object{ 300 ID: strfmt.UUID(uuid.NewString()), 301 Class: className, 302 }, 303 Vector: []float32{r.Float32(), r.Float32(), r.Float32(), r.Float32()}, 304 } 305 } 306 return obj 307 }