github.com/weaviate/weaviate@v1.24.6/entities/vectorindex/hnsw/config_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package hnsw 13 14 import ( 15 "encoding/json" 16 "math" 17 "testing" 18 19 "github.com/stretchr/testify/assert" 20 "github.com/stretchr/testify/require" 21 "github.com/weaviate/weaviate/entities/vectorindex/common" 22 ) 23 24 func Test_UserConfig(t *testing.T) { 25 type test struct { 26 name string 27 input interface{} 28 expected UserConfig 29 expectErr bool 30 expectErrMsg string 31 } 32 33 tests := []test{ 34 { 35 name: "nothing specified, all defaults", 36 input: nil, 37 expected: UserConfig{ 38 CleanupIntervalSeconds: DefaultCleanupIntervalSeconds, 39 MaxConnections: DefaultMaxConnections, 40 EFConstruction: DefaultEFConstruction, 41 VectorCacheMaxObjects: common.DefaultVectorCacheMaxObjects, 42 EF: DefaultEF, 43 Skip: DefaultSkip, 44 FlatSearchCutoff: DefaultFlatSearchCutoff, 45 DynamicEFMin: DefaultDynamicEFMin, 46 DynamicEFMax: DefaultDynamicEFMax, 47 DynamicEFFactor: DefaultDynamicEFFactor, 48 Distance: common.DefaultDistanceMetric, 49 PQ: PQConfig{ 50 Enabled: DefaultPQEnabled, 51 BitCompression: DefaultPQBitCompression, 52 Segments: DefaultPQSegments, 53 Centroids: DefaultPQCentroids, 54 TrainingLimit: DefaultPQTrainingLimit, 55 Encoder: PQEncoder{ 56 Type: DefaultPQEncoderType, 57 Distribution: DefaultPQEncoderDistribution, 58 }, 59 }, 60 }, 61 }, 62 63 { 64 name: "with maximum connections", 65 input: map[string]interface{}{ 66 "maxConnections": json.Number("100"), 67 }, 68 expected: UserConfig{ 69 CleanupIntervalSeconds: DefaultCleanupIntervalSeconds, 70 MaxConnections: 100, 71 EFConstruction: DefaultEFConstruction, 72 VectorCacheMaxObjects: common.DefaultVectorCacheMaxObjects, 73 EF: DefaultEF, 74 FlatSearchCutoff: DefaultFlatSearchCutoff, 75 DynamicEFMin: DefaultDynamicEFMin, 76 DynamicEFMax: DefaultDynamicEFMax, 77 DynamicEFFactor: DefaultDynamicEFFactor, 78 Distance: common.DefaultDistanceMetric, 79 PQ: PQConfig{ 80 Enabled: DefaultPQEnabled, 81 BitCompression: DefaultPQBitCompression, 82 Segments: DefaultPQSegments, 83 Centroids: DefaultPQCentroids, 84 TrainingLimit: DefaultPQTrainingLimit, 85 Encoder: PQEncoder{ 86 Type: DefaultPQEncoderType, 87 Distribution: DefaultPQEncoderDistribution, 88 }, 89 }, 90 }, 91 }, 92 93 { 94 name: "with all optional fields", 95 input: map[string]interface{}{ 96 "cleanupIntervalSeconds": json.Number("11"), 97 "maxConnections": json.Number("12"), 98 "efConstruction": json.Number("13"), 99 "vectorCacheMaxObjects": json.Number("14"), 100 "ef": json.Number("15"), 101 "flatSearchCutoff": json.Number("16"), 102 "dynamicEfMin": json.Number("17"), 103 "dynamicEfMax": json.Number("18"), 104 "dynamicEfFactor": json.Number("19"), 105 "skip": true, 106 "distance": "l2-squared", 107 }, 108 expected: UserConfig{ 109 CleanupIntervalSeconds: 11, 110 MaxConnections: 12, 111 EFConstruction: 13, 112 VectorCacheMaxObjects: 14, 113 EF: 15, 114 FlatSearchCutoff: 16, 115 DynamicEFMin: 17, 116 DynamicEFMax: 18, 117 DynamicEFFactor: 19, 118 Skip: true, 119 Distance: "l2-squared", 120 PQ: PQConfig{ 121 Enabled: DefaultPQEnabled, 122 BitCompression: DefaultPQBitCompression, 123 Segments: DefaultPQSegments, 124 Centroids: DefaultPQCentroids, 125 TrainingLimit: DefaultPQTrainingLimit, 126 Encoder: PQEncoder{ 127 Type: DefaultPQEncoderType, 128 Distribution: DefaultPQEncoderDistribution, 129 }, 130 }, 131 }, 132 }, 133 134 { 135 name: "with all optional fields", 136 input: map[string]interface{}{ 137 "cleanupIntervalSeconds": json.Number("11"), 138 "maxConnections": json.Number("12"), 139 "efConstruction": json.Number("13"), 140 "vectorCacheMaxObjects": json.Number("14"), 141 "ef": json.Number("15"), 142 "flatSearchCutoff": json.Number("16"), 143 "dynamicEfMin": json.Number("17"), 144 "dynamicEfMax": json.Number("18"), 145 "dynamicEfFactor": json.Number("19"), 146 "skip": true, 147 "distance": "manhattan", 148 }, 149 expected: UserConfig{ 150 CleanupIntervalSeconds: 11, 151 MaxConnections: 12, 152 EFConstruction: 13, 153 VectorCacheMaxObjects: 14, 154 EF: 15, 155 FlatSearchCutoff: 16, 156 DynamicEFMin: 17, 157 DynamicEFMax: 18, 158 DynamicEFFactor: 19, 159 Skip: true, 160 Distance: "manhattan", 161 PQ: PQConfig{ 162 Enabled: DefaultPQEnabled, 163 BitCompression: DefaultPQBitCompression, 164 Segments: DefaultPQSegments, 165 Centroids: DefaultPQCentroids, 166 TrainingLimit: DefaultPQTrainingLimit, 167 Encoder: PQEncoder{ 168 Type: DefaultPQEncoderType, 169 Distribution: DefaultPQEncoderDistribution, 170 }, 171 }, 172 }, 173 }, 174 175 { 176 name: "with all optional fields", 177 input: map[string]interface{}{ 178 "cleanupIntervalSeconds": json.Number("11"), 179 "maxConnections": json.Number("12"), 180 "efConstruction": json.Number("13"), 181 "vectorCacheMaxObjects": json.Number("14"), 182 "ef": json.Number("15"), 183 "flatSearchCutoff": json.Number("16"), 184 "dynamicEfMin": json.Number("17"), 185 "dynamicEfMax": json.Number("18"), 186 "dynamicEfFactor": json.Number("19"), 187 "skip": true, 188 "distance": "hamming", 189 }, 190 expected: UserConfig{ 191 CleanupIntervalSeconds: 11, 192 MaxConnections: 12, 193 EFConstruction: 13, 194 VectorCacheMaxObjects: 14, 195 EF: 15, 196 FlatSearchCutoff: 16, 197 DynamicEFMin: 17, 198 DynamicEFMax: 18, 199 DynamicEFFactor: 19, 200 Skip: true, 201 Distance: "hamming", 202 PQ: PQConfig{ 203 Enabled: DefaultPQEnabled, 204 BitCompression: DefaultPQBitCompression, 205 Segments: DefaultPQSegments, 206 Centroids: DefaultPQCentroids, 207 TrainingLimit: DefaultPQTrainingLimit, 208 Encoder: PQEncoder{ 209 Type: DefaultPQEncoderType, 210 Distribution: DefaultPQEncoderDistribution, 211 }, 212 }, 213 }, 214 }, 215 216 { 217 // opposed to from the API 218 name: "with raw data as floats", 219 input: map[string]interface{}{ 220 "cleanupIntervalSeconds": float64(11), 221 "maxConnections": float64(12), 222 "efConstruction": float64(13), 223 "vectorCacheMaxObjects": float64(14), 224 "ef": float64(15), 225 "flatSearchCutoff": float64(16), 226 "dynamicEfMin": float64(17), 227 "dynamicEfMax": float64(18), 228 "dynamicEfFactor": float64(19), 229 }, 230 expected: UserConfig{ 231 CleanupIntervalSeconds: 11, 232 MaxConnections: 12, 233 EFConstruction: 13, 234 VectorCacheMaxObjects: 14, 235 EF: 15, 236 FlatSearchCutoff: 16, 237 DynamicEFMin: 17, 238 DynamicEFMax: 18, 239 DynamicEFFactor: 19, 240 Distance: common.DefaultDistanceMetric, 241 PQ: PQConfig{ 242 Enabled: DefaultPQEnabled, 243 BitCompression: DefaultPQBitCompression, 244 Segments: DefaultPQSegments, 245 Centroids: DefaultPQCentroids, 246 TrainingLimit: DefaultPQTrainingLimit, 247 Encoder: PQEncoder{ 248 Type: DefaultPQEncoderType, 249 Distribution: DefaultPQEncoderDistribution, 250 }, 251 }, 252 }, 253 }, 254 255 { 256 name: "with pq tile normal encoder", 257 input: map[string]interface{}{ 258 "cleanupIntervalSeconds": float64(11), 259 "maxConnections": float64(12), 260 "efConstruction": float64(13), 261 "vectorCacheMaxObjects": float64(14), 262 "ef": float64(15), 263 "flatSearchCutoff": float64(16), 264 "dynamicEfMin": float64(17), 265 "dynamicEfMax": float64(18), 266 "dynamicEfFactor": float64(19), 267 "pq": map[string]interface{}{ 268 "enabled": true, 269 "bitCompression": false, 270 "segments": float64(64), 271 "centroids": float64(DefaultPQCentroids), 272 "trainingLimit": float64(DefaultPQTrainingLimit), 273 "encoder": map[string]interface{}{ 274 "type": "tile", 275 "distribution": "normal", 276 }, 277 }, 278 }, 279 expected: UserConfig{ 280 CleanupIntervalSeconds: 11, 281 MaxConnections: 12, 282 EFConstruction: 13, 283 VectorCacheMaxObjects: 14, 284 EF: 15, 285 FlatSearchCutoff: 16, 286 DynamicEFMin: 17, 287 DynamicEFMax: 18, 288 DynamicEFFactor: 19, 289 Distance: common.DefaultDistanceMetric, 290 PQ: PQConfig{ 291 Enabled: true, 292 Segments: 64, 293 Centroids: DefaultPQCentroids, 294 TrainingLimit: DefaultPQTrainingLimit, 295 Encoder: PQEncoder{ 296 Type: "tile", 297 Distribution: "normal", 298 }, 299 }, 300 }, 301 }, 302 303 { 304 name: "with pq kmeans normal encoder", 305 input: map[string]interface{}{ 306 "cleanupIntervalSeconds": float64(11), 307 "maxConnections": float64(12), 308 "efConstruction": float64(13), 309 "vectorCacheMaxObjects": float64(14), 310 "ef": float64(15), 311 "flatSearchCutoff": float64(16), 312 "dynamicEfMin": float64(17), 313 "dynamicEfMax": float64(18), 314 "dynamicEfFactor": float64(19), 315 "pq": map[string]interface{}{ 316 "enabled": true, 317 "bitCompression": false, 318 "segments": float64(64), 319 "centroids": float64(DefaultPQCentroids), 320 "trainingLimit": float64(DefaultPQTrainingLimit), 321 "encoder": map[string]interface{}{ 322 "type": PQEncoderTypeKMeans, 323 }, 324 }, 325 }, 326 expected: UserConfig{ 327 CleanupIntervalSeconds: 11, 328 MaxConnections: 12, 329 EFConstruction: 13, 330 VectorCacheMaxObjects: 14, 331 EF: 15, 332 FlatSearchCutoff: 16, 333 DynamicEFMin: 17, 334 DynamicEFMax: 18, 335 DynamicEFFactor: 19, 336 Distance: common.DefaultDistanceMetric, 337 PQ: PQConfig{ 338 Enabled: true, 339 Segments: 64, 340 Centroids: DefaultPQCentroids, 341 TrainingLimit: DefaultPQTrainingLimit, 342 Encoder: PQEncoder{ 343 Type: DefaultPQEncoderType, 344 Distribution: DefaultPQEncoderDistribution, 345 }, 346 }, 347 }, 348 }, 349 350 { 351 name: "with invalid encoder", 352 input: map[string]interface{}{ 353 "pq": map[string]interface{}{ 354 "enabled": true, 355 "encoder": map[string]interface{}{ 356 "type": "bernoulli", 357 }, 358 }, 359 }, 360 expectErr: true, 361 expectErrMsg: "invalid encoder type bernoulli", 362 }, 363 364 { 365 name: "with invalid distribution", 366 input: map[string]interface{}{ 367 "pq": map[string]interface{}{ 368 "enabled": true, 369 "encoder": map[string]interface{}{ 370 "distribution": "lognormal", 371 }, 372 }, 373 }, 374 expectErr: true, 375 expectErrMsg: "invalid encoder distribution lognormal", 376 }, 377 378 { 379 // opposed to from the API 380 name: "with rounded vectorCacheMaxObjects that would otherwise overflow", 381 input: map[string]interface{}{ 382 "cleanupIntervalSeconds": json.Number("11"), 383 "maxConnections": json.Number("12"), 384 "efConstruction": json.Number("13"), 385 "vectorCacheMaxObjects": json.Number("9223372036854776000"), 386 "ef": json.Number("15"), 387 "flatSearchCutoff": json.Number("16"), 388 "dynamicEfMin": json.Number("17"), 389 "dynamicEfMax": json.Number("18"), 390 "dynamicEfFactor": json.Number("19"), 391 }, 392 expected: UserConfig{ 393 CleanupIntervalSeconds: 11, 394 MaxConnections: 12, 395 EFConstruction: 13, 396 VectorCacheMaxObjects: math.MaxInt64, 397 EF: 15, 398 FlatSearchCutoff: 16, 399 DynamicEFMin: 17, 400 DynamicEFMax: 18, 401 DynamicEFFactor: 19, 402 Distance: common.DefaultDistanceMetric, 403 PQ: PQConfig{ 404 Enabled: DefaultPQEnabled, 405 BitCompression: DefaultPQBitCompression, 406 Segments: DefaultPQSegments, 407 Centroids: DefaultPQCentroids, 408 TrainingLimit: DefaultPQTrainingLimit, 409 Encoder: PQEncoder{ 410 Type: DefaultPQEncoderType, 411 Distribution: DefaultPQEncoderDistribution, 412 }, 413 }, 414 }, 415 }, 416 { 417 name: "invalid max connections (json)", 418 input: map[string]interface{}{ 419 "maxConnections": json.Number("0"), 420 }, 421 expectErr: true, 422 expectErrMsg: "maxConnections must be a positive integer " + 423 "with a minimum of 4", 424 }, 425 { 426 name: "invalid max connections (float)", 427 input: map[string]interface{}{ 428 "maxConnections": float64(3), 429 }, 430 expectErr: true, 431 expectErrMsg: "maxConnections must be a positive integer " + 432 "with a minimum of 4", 433 }, 434 { 435 name: "invalid efConstruction (json)", 436 input: map[string]interface{}{ 437 "efConstruction": json.Number("0"), 438 }, 439 expectErr: true, 440 expectErrMsg: "efConstruction must be a positive integer " + 441 "with a minimum of 4", 442 }, 443 { 444 name: "invalid efConstruction (float)", 445 input: map[string]interface{}{ 446 "efConstruction": float64(3), 447 }, 448 expectErr: true, 449 expectErrMsg: "efConstruction must be a positive integer " + 450 "with a minimum of 4", 451 }, 452 { 453 name: "with bq", 454 input: map[string]interface{}{ 455 "cleanupIntervalSeconds": float64(11), 456 "maxConnections": float64(12), 457 "efConstruction": float64(13), 458 "vectorCacheMaxObjects": float64(14), 459 "ef": float64(15), 460 "flatSearchCutoff": float64(16), 461 "dynamicEfMin": float64(17), 462 "dynamicEfMax": float64(18), 463 "dynamicEfFactor": float64(19), 464 "bq": map[string]interface{}{ 465 "enabled": true, 466 }, 467 }, 468 expected: UserConfig{ 469 CleanupIntervalSeconds: 11, 470 MaxConnections: 12, 471 EFConstruction: 13, 472 VectorCacheMaxObjects: 14, 473 EF: 15, 474 FlatSearchCutoff: 16, 475 DynamicEFMin: 17, 476 DynamicEFMax: 18, 477 DynamicEFFactor: 19, 478 Distance: common.DefaultDistanceMetric, 479 PQ: PQConfig{ 480 Enabled: false, 481 Segments: 0, 482 Centroids: DefaultPQCentroids, 483 TrainingLimit: DefaultPQTrainingLimit, 484 Encoder: PQEncoder{ 485 Type: DefaultPQEncoderType, 486 Distribution: DefaultPQEncoderDistribution, 487 }, 488 }, 489 BQ: BQConfig{ 490 Enabled: true, 491 }, 492 }, 493 }, 494 { 495 name: "with invalid compression", 496 input: map[string]interface{}{ 497 "pq": map[string]interface{}{ 498 "enabled": true, 499 "encoder": map[string]interface{}{ 500 "type": "kmeans", 501 }, 502 }, 503 "bq": map[string]interface{}{ 504 "enabled": true, 505 }, 506 }, 507 expectErr: true, 508 expectErrMsg: "invalid hnsw config: two compression methods enabled: PQ and BQ", 509 }, 510 } 511 512 for _, test := range tests { 513 t.Run(test.name, func(t *testing.T) { 514 cfg, err := ParseAndValidateConfig(test.input) 515 if test.expectErr { 516 require.NotNil(t, err) 517 assert.Contains(t, err.Error(), test.expectErrMsg) 518 return 519 } else { 520 assert.Nil(t, err) 521 assert.Equal(t, test.expected, cfg) 522 } 523 }) 524 } 525 }