github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/clusterintegrationtest/helpers_for_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 //go:build integrationTest 13 // +build integrationTest 14 15 package clusterintegrationtest 16 17 import ( 18 "context" 19 "encoding/json" 20 "fmt" 21 "math" 22 "math/rand" 23 "sort" 24 "testing" 25 "time" 26 27 "github.com/go-openapi/strfmt" 28 "github.com/google/uuid" 29 "github.com/stretchr/testify/require" 30 "github.com/weaviate/weaviate/adapters/repos/db" 31 "github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/distancer" 32 "github.com/weaviate/weaviate/entities/additional" 33 "github.com/weaviate/weaviate/entities/models" 34 "github.com/weaviate/weaviate/entities/schema" 35 "github.com/weaviate/weaviate/entities/schema/crossref" 36 enthnsw "github.com/weaviate/weaviate/entities/vectorindex/hnsw" 37 "github.com/weaviate/weaviate/usecases/objects" 38 "github.com/weaviate/weaviate/usecases/sharding" 39 ) 40 41 func getRandomSeed() *rand.Rand { 42 return rand.New(rand.NewSource(time.Now().UnixNano())) 43 } 44 45 func setupDirectory(t *testing.T) string { 46 dirName := t.TempDir() 47 return dirName 48 } 49 50 func dataAsBatch(data []*models.Object) objects.BatchObjects { 51 batchObjs := make(objects.BatchObjects, len(data)) 52 for i := range data { 53 batchObjs[i] = objects.BatchObject{ 54 OriginalIndex: i, 55 Err: nil, 56 Object: data[i], 57 UUID: data[i].ID, 58 } 59 } 60 61 return batchObjs 62 } 63 64 func dataAsBatchWithProps(data []*models.Object, props []string) objects.BatchObjects { 65 batchObjs := make(objects.BatchObjects, len(data)) 66 for i := range data { 67 batchObjs[i] = objects.BatchObject{ 68 OriginalIndex: i, 69 Err: nil, 70 Object: copyObjectWithProp(data[i], props), 71 UUID: data[i].ID, 72 } 73 } 74 75 return batchObjs 76 } 77 78 // copyObjectWithProp is not a 100% copy. It may still contain the same 79 // pointers in some properties, it does however guarantee that it does not 80 // alter the existing input - this guarantee is lost, if you modify the output 81 func copyObjectWithProp(in *models.Object, propsToCopy []string) *models.Object { 82 out := &models.Object{} 83 84 out.Additional = in.Additional 85 out.Class = in.Class 86 out.Vector = in.Vector 87 out.CreationTimeUnix = in.CreationTimeUnix 88 out.LastUpdateTimeUnix = in.LastUpdateTimeUnix 89 out.ID = in.ID 90 props := map[string]interface{}{} 91 92 for _, propName := range propsToCopy { 93 props[propName] = in.Properties.(map[string]interface{})[propName] 94 } 95 96 out.Properties = props 97 return out 98 } 99 100 func multiShardState(nodeCount int) *sharding.State { 101 config, err := sharding.ParseConfig(map[string]interface{}{ 102 "desiredCount": json.Number(fmt.Sprintf("%d", nodeCount)), 103 }, 1) 104 if err != nil { 105 panic(err) 106 } 107 108 nodeList := make([]string, nodeCount) 109 for i := range nodeList { 110 nodeList[i] = fmt.Sprintf("node-%d", i) 111 } 112 113 s, err := sharding.InitState("multi-shard-test-index", config, 114 fakeNodes{nodeList}, 1, false) 115 if err != nil { 116 panic(err) 117 } 118 119 return s 120 } 121 122 func class() *models.Class { 123 cfg := enthnsw.NewDefaultUserConfig() 124 cfg.EF = 500 125 return &models.Class{ 126 Class: distributedClass, 127 VectorIndexConfig: cfg, 128 InvertedIndexConfig: invertedConfig(), 129 Properties: []*models.Property{ 130 { 131 Name: "description", 132 DataType: schema.DataTypeText.PropString(), 133 Tokenization: models.PropertyTokenizationWord, 134 }, 135 { 136 Name: "other_property", 137 DataType: schema.DataTypeText.PropString(), 138 Tokenization: models.PropertyTokenizationWord, 139 }, 140 { 141 Name: "date_property", 142 DataType: schema.DataTypeDate.PropString(), 143 }, 144 { 145 Name: "date_array_property", 146 DataType: schema.DataTypeDateArray.PropString(), 147 }, 148 { 149 Name: "int_property", 150 DataType: schema.DataTypeInt.PropString(), 151 }, 152 { 153 Name: "phone_property", 154 DataType: schema.DataTypePhoneNumber.PropString(), 155 }, 156 }, 157 } 158 } 159 160 func secondClassWithRef() *models.Class { 161 cfg := enthnsw.NewDefaultUserConfig() 162 cfg.EF = 500 163 return &models.Class{ 164 Class: "SecondDistributed", 165 VectorIndexConfig: cfg, 166 InvertedIndexConfig: invertedConfig(), 167 Properties: []*models.Property{ 168 { 169 Name: "description", 170 DataType: []string{string(schema.DataTypeText)}, 171 }, 172 { 173 Name: "toFirst", 174 DataType: []string{distributedClass}, 175 }, 176 }, 177 } 178 } 179 180 func invertedConfig() *models.InvertedIndexConfig { 181 return &models.InvertedIndexConfig{ 182 CleanupIntervalSeconds: 60, 183 } 184 } 185 186 func exampleData(size int) []*models.Object { 187 out := make([]*models.Object, size) 188 189 for i := range out { 190 vec := make([]float32, vectorDims) 191 for i := range vec { 192 vec[i] = rand.Float32() 193 } 194 195 timestamp := time.Unix(0, 0).Add(time.Duration(i) * time.Hour) 196 phoneNumber := uint64(1000000 + rand.Intn(10000)) 197 198 out[i] = &models.Object{ 199 Class: distributedClass, 200 ID: strfmt.UUID(uuid.New().String()), 201 Properties: map[string]interface{}{ 202 "description": fmt.Sprintf("object-%d", i), 203 "date_property": timestamp, 204 "date_array_property": []interface{}{timestamp}, 205 "int_property": rand.Intn(1000), 206 "phone_property": &models.PhoneNumber{ 207 CountryCode: 49, 208 DefaultCountry: "DE", 209 Input: fmt.Sprintf("0171 %d", phoneNumber), 210 Valid: true, 211 InternationalFormatted: fmt.Sprintf("+49 171 %d", phoneNumber), 212 National: phoneNumber, 213 NationalFormatted: fmt.Sprintf("0171 %d", phoneNumber), 214 }, 215 }, 216 Vector: vec, 217 } 218 } 219 220 return out 221 } 222 223 func exampleDataWithRefs(size int, refCount int, targetObjs []*models.Object) []*models.Object { 224 out := make([]*models.Object, size) 225 226 for i := range out { 227 vec := make([]float32, vectorDims) 228 for i := range vec { 229 vec[i] = rand.Float32() 230 } 231 232 refs := make(models.MultipleRef, refCount) 233 for i := range refs { 234 randomTarget := targetObjs[rand.Intn(len(targetObjs))] 235 refs[i] = crossref.New("localhost", distributedClass, randomTarget.ID).SingleRef() 236 } 237 238 out[i] = &models.Object{ 239 Class: "SecondDistributed", 240 ID: strfmt.UUID(uuid.New().String()), 241 Properties: map[string]interface{}{ 242 "description": fmt.Sprintf("second-object-%d", i), 243 "toFirst": refs, 244 }, 245 Vector: vec, 246 } 247 } 248 249 return out 250 } 251 252 func bruteForceObjectsByQuery(objs []*models.Object, 253 query []float32, 254 ) []*models.Object { 255 type distanceAndObj struct { 256 distance float32 257 obj *models.Object 258 } 259 260 distProv := distancer.NewCosineDistanceProvider() 261 distances := make([]distanceAndObj, len(objs)) 262 263 for i := range objs { 264 dist, _, _ := distProv.SingleDist(normalize(query), normalize(objs[i].Vector)) 265 distances[i] = distanceAndObj{ 266 distance: dist, 267 obj: objs[i], 268 } 269 } 270 271 sort.Slice(distances, func(a, b int) bool { 272 return distances[a].distance < distances[b].distance 273 }) 274 275 out := make([]*models.Object, len(objs)) 276 for i := range out { 277 out[i] = distances[i].obj 278 } 279 280 return out 281 } 282 283 func normalize(v []float32) []float32 { 284 var norm float32 285 for i := range v { 286 norm += v[i] * v[i] 287 } 288 289 norm = float32(math.Sqrt(float64(norm))) 290 for i := range v { 291 v[i] = v[i] / norm 292 } 293 294 return v 295 } 296 297 func manuallyResolveRef(t *testing.T, obj *models.Object, 298 possibleTargets []*models.Object, localPropName, 299 referencedPropName string, 300 repo *db.DB, 301 ) []map[string]interface{} { 302 beacons := obj.Properties.(map[string]interface{})[localPropName].(models.MultipleRef) 303 out := make([]map[string]interface{}, len(beacons)) 304 305 for i, ref := range beacons { 306 parsed, err := crossref.Parse(ref.Beacon.String()) 307 require.Nil(t, err) 308 target := findId(possibleTargets, parsed.TargetID) 309 require.NotNil(t, target, "target not found") 310 if referencedPropName == "vector" { 311 // find referenced object to get his actual vector from DB 312 require.NotNil(t, repo) 313 res, err := repo.Object(context.Background(), parsed.Class, parsed.TargetID, 314 nil, additional.Properties{Vector: true}, nil, "") 315 require.Nil(t, err) 316 require.NotNil(t, res) 317 out[i] = map[string]interface{}{ 318 referencedPropName: res.Vector, 319 } 320 } else { 321 out[i] = map[string]interface{}{ 322 referencedPropName: target.Properties.(map[string]interface{})[referencedPropName], 323 } 324 } 325 } 326 327 return out 328 } 329 330 func findId(list []*models.Object, id strfmt.UUID) *models.Object { 331 for _, obj := range list { 332 if obj.ID == id { 333 return obj 334 } 335 } 336 337 return nil 338 } 339 340 func refsAsBatch(in []*models.Object, propName string) objects.BatchReferences { 341 out := objects.BatchReferences{} 342 343 originalIndex := 0 344 for _, obj := range in { 345 beacons := obj.Properties.(map[string]interface{})[propName].(models.MultipleRef) 346 current := make(objects.BatchReferences, len(beacons)) 347 for i, beacon := range beacons { 348 to, err := crossref.Parse(beacon.Beacon.String()) 349 if err != nil { 350 panic(err) 351 } 352 current[i] = objects.BatchReference{ 353 OriginalIndex: originalIndex, 354 To: to, 355 From: crossref.NewSource(schema.ClassName(obj.Class), 356 schema.PropertyName(propName), obj.ID), 357 } 358 originalIndex++ 359 } 360 out = append(out, current...) 361 } 362 363 return out 364 }