github.com/weaviate/weaviate@v1.24.6/entities/vectorindex/hnsw/config_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package hnsw
    13  
    14  import (
    15  	"encoding/json"
    16  	"math"
    17  	"testing"
    18  
    19  	"github.com/stretchr/testify/assert"
    20  	"github.com/stretchr/testify/require"
    21  	"github.com/weaviate/weaviate/entities/vectorindex/common"
    22  )
    23  
    24  func Test_UserConfig(t *testing.T) {
    25  	type test struct {
    26  		name         string
    27  		input        interface{}
    28  		expected     UserConfig
    29  		expectErr    bool
    30  		expectErrMsg string
    31  	}
    32  
    33  	tests := []test{
    34  		{
    35  			name:  "nothing specified, all defaults",
    36  			input: nil,
    37  			expected: UserConfig{
    38  				CleanupIntervalSeconds: DefaultCleanupIntervalSeconds,
    39  				MaxConnections:         DefaultMaxConnections,
    40  				EFConstruction:         DefaultEFConstruction,
    41  				VectorCacheMaxObjects:  common.DefaultVectorCacheMaxObjects,
    42  				EF:                     DefaultEF,
    43  				Skip:                   DefaultSkip,
    44  				FlatSearchCutoff:       DefaultFlatSearchCutoff,
    45  				DynamicEFMin:           DefaultDynamicEFMin,
    46  				DynamicEFMax:           DefaultDynamicEFMax,
    47  				DynamicEFFactor:        DefaultDynamicEFFactor,
    48  				Distance:               common.DefaultDistanceMetric,
    49  				PQ: PQConfig{
    50  					Enabled:        DefaultPQEnabled,
    51  					BitCompression: DefaultPQBitCompression,
    52  					Segments:       DefaultPQSegments,
    53  					Centroids:      DefaultPQCentroids,
    54  					TrainingLimit:  DefaultPQTrainingLimit,
    55  					Encoder: PQEncoder{
    56  						Type:         DefaultPQEncoderType,
    57  						Distribution: DefaultPQEncoderDistribution,
    58  					},
    59  				},
    60  			},
    61  		},
    62  
    63  		{
    64  			name: "with maximum connections",
    65  			input: map[string]interface{}{
    66  				"maxConnections": json.Number("100"),
    67  			},
    68  			expected: UserConfig{
    69  				CleanupIntervalSeconds: DefaultCleanupIntervalSeconds,
    70  				MaxConnections:         100,
    71  				EFConstruction:         DefaultEFConstruction,
    72  				VectorCacheMaxObjects:  common.DefaultVectorCacheMaxObjects,
    73  				EF:                     DefaultEF,
    74  				FlatSearchCutoff:       DefaultFlatSearchCutoff,
    75  				DynamicEFMin:           DefaultDynamicEFMin,
    76  				DynamicEFMax:           DefaultDynamicEFMax,
    77  				DynamicEFFactor:        DefaultDynamicEFFactor,
    78  				Distance:               common.DefaultDistanceMetric,
    79  				PQ: PQConfig{
    80  					Enabled:        DefaultPQEnabled,
    81  					BitCompression: DefaultPQBitCompression,
    82  					Segments:       DefaultPQSegments,
    83  					Centroids:      DefaultPQCentroids,
    84  					TrainingLimit:  DefaultPQTrainingLimit,
    85  					Encoder: PQEncoder{
    86  						Type:         DefaultPQEncoderType,
    87  						Distribution: DefaultPQEncoderDistribution,
    88  					},
    89  				},
    90  			},
    91  		},
    92  
    93  		{
    94  			name: "with all optional fields",
    95  			input: map[string]interface{}{
    96  				"cleanupIntervalSeconds": json.Number("11"),
    97  				"maxConnections":         json.Number("12"),
    98  				"efConstruction":         json.Number("13"),
    99  				"vectorCacheMaxObjects":  json.Number("14"),
   100  				"ef":                     json.Number("15"),
   101  				"flatSearchCutoff":       json.Number("16"),
   102  				"dynamicEfMin":           json.Number("17"),
   103  				"dynamicEfMax":           json.Number("18"),
   104  				"dynamicEfFactor":        json.Number("19"),
   105  				"skip":                   true,
   106  				"distance":               "l2-squared",
   107  			},
   108  			expected: UserConfig{
   109  				CleanupIntervalSeconds: 11,
   110  				MaxConnections:         12,
   111  				EFConstruction:         13,
   112  				VectorCacheMaxObjects:  14,
   113  				EF:                     15,
   114  				FlatSearchCutoff:       16,
   115  				DynamicEFMin:           17,
   116  				DynamicEFMax:           18,
   117  				DynamicEFFactor:        19,
   118  				Skip:                   true,
   119  				Distance:               "l2-squared",
   120  				PQ: PQConfig{
   121  					Enabled:        DefaultPQEnabled,
   122  					BitCompression: DefaultPQBitCompression,
   123  					Segments:       DefaultPQSegments,
   124  					Centroids:      DefaultPQCentroids,
   125  					TrainingLimit:  DefaultPQTrainingLimit,
   126  					Encoder: PQEncoder{
   127  						Type:         DefaultPQEncoderType,
   128  						Distribution: DefaultPQEncoderDistribution,
   129  					},
   130  				},
   131  			},
   132  		},
   133  
   134  		{
   135  			name: "with all optional fields",
   136  			input: map[string]interface{}{
   137  				"cleanupIntervalSeconds": json.Number("11"),
   138  				"maxConnections":         json.Number("12"),
   139  				"efConstruction":         json.Number("13"),
   140  				"vectorCacheMaxObjects":  json.Number("14"),
   141  				"ef":                     json.Number("15"),
   142  				"flatSearchCutoff":       json.Number("16"),
   143  				"dynamicEfMin":           json.Number("17"),
   144  				"dynamicEfMax":           json.Number("18"),
   145  				"dynamicEfFactor":        json.Number("19"),
   146  				"skip":                   true,
   147  				"distance":               "manhattan",
   148  			},
   149  			expected: UserConfig{
   150  				CleanupIntervalSeconds: 11,
   151  				MaxConnections:         12,
   152  				EFConstruction:         13,
   153  				VectorCacheMaxObjects:  14,
   154  				EF:                     15,
   155  				FlatSearchCutoff:       16,
   156  				DynamicEFMin:           17,
   157  				DynamicEFMax:           18,
   158  				DynamicEFFactor:        19,
   159  				Skip:                   true,
   160  				Distance:               "manhattan",
   161  				PQ: PQConfig{
   162  					Enabled:        DefaultPQEnabled,
   163  					BitCompression: DefaultPQBitCompression,
   164  					Segments:       DefaultPQSegments,
   165  					Centroids:      DefaultPQCentroids,
   166  					TrainingLimit:  DefaultPQTrainingLimit,
   167  					Encoder: PQEncoder{
   168  						Type:         DefaultPQEncoderType,
   169  						Distribution: DefaultPQEncoderDistribution,
   170  					},
   171  				},
   172  			},
   173  		},
   174  
   175  		{
   176  			name: "with all optional fields",
   177  			input: map[string]interface{}{
   178  				"cleanupIntervalSeconds": json.Number("11"),
   179  				"maxConnections":         json.Number("12"),
   180  				"efConstruction":         json.Number("13"),
   181  				"vectorCacheMaxObjects":  json.Number("14"),
   182  				"ef":                     json.Number("15"),
   183  				"flatSearchCutoff":       json.Number("16"),
   184  				"dynamicEfMin":           json.Number("17"),
   185  				"dynamicEfMax":           json.Number("18"),
   186  				"dynamicEfFactor":        json.Number("19"),
   187  				"skip":                   true,
   188  				"distance":               "hamming",
   189  			},
   190  			expected: UserConfig{
   191  				CleanupIntervalSeconds: 11,
   192  				MaxConnections:         12,
   193  				EFConstruction:         13,
   194  				VectorCacheMaxObjects:  14,
   195  				EF:                     15,
   196  				FlatSearchCutoff:       16,
   197  				DynamicEFMin:           17,
   198  				DynamicEFMax:           18,
   199  				DynamicEFFactor:        19,
   200  				Skip:                   true,
   201  				Distance:               "hamming",
   202  				PQ: PQConfig{
   203  					Enabled:        DefaultPQEnabled,
   204  					BitCompression: DefaultPQBitCompression,
   205  					Segments:       DefaultPQSegments,
   206  					Centroids:      DefaultPQCentroids,
   207  					TrainingLimit:  DefaultPQTrainingLimit,
   208  					Encoder: PQEncoder{
   209  						Type:         DefaultPQEncoderType,
   210  						Distribution: DefaultPQEncoderDistribution,
   211  					},
   212  				},
   213  			},
   214  		},
   215  
   216  		{
   217  			// opposed to from the API
   218  			name: "with raw data as floats",
   219  			input: map[string]interface{}{
   220  				"cleanupIntervalSeconds": float64(11),
   221  				"maxConnections":         float64(12),
   222  				"efConstruction":         float64(13),
   223  				"vectorCacheMaxObjects":  float64(14),
   224  				"ef":                     float64(15),
   225  				"flatSearchCutoff":       float64(16),
   226  				"dynamicEfMin":           float64(17),
   227  				"dynamicEfMax":           float64(18),
   228  				"dynamicEfFactor":        float64(19),
   229  			},
   230  			expected: UserConfig{
   231  				CleanupIntervalSeconds: 11,
   232  				MaxConnections:         12,
   233  				EFConstruction:         13,
   234  				VectorCacheMaxObjects:  14,
   235  				EF:                     15,
   236  				FlatSearchCutoff:       16,
   237  				DynamicEFMin:           17,
   238  				DynamicEFMax:           18,
   239  				DynamicEFFactor:        19,
   240  				Distance:               common.DefaultDistanceMetric,
   241  				PQ: PQConfig{
   242  					Enabled:        DefaultPQEnabled,
   243  					BitCompression: DefaultPQBitCompression,
   244  					Segments:       DefaultPQSegments,
   245  					Centroids:      DefaultPQCentroids,
   246  					TrainingLimit:  DefaultPQTrainingLimit,
   247  					Encoder: PQEncoder{
   248  						Type:         DefaultPQEncoderType,
   249  						Distribution: DefaultPQEncoderDistribution,
   250  					},
   251  				},
   252  			},
   253  		},
   254  
   255  		{
   256  			name: "with pq tile normal encoder",
   257  			input: map[string]interface{}{
   258  				"cleanupIntervalSeconds": float64(11),
   259  				"maxConnections":         float64(12),
   260  				"efConstruction":         float64(13),
   261  				"vectorCacheMaxObjects":  float64(14),
   262  				"ef":                     float64(15),
   263  				"flatSearchCutoff":       float64(16),
   264  				"dynamicEfMin":           float64(17),
   265  				"dynamicEfMax":           float64(18),
   266  				"dynamicEfFactor":        float64(19),
   267  				"pq": map[string]interface{}{
   268  					"enabled":        true,
   269  					"bitCompression": false,
   270  					"segments":       float64(64),
   271  					"centroids":      float64(DefaultPQCentroids),
   272  					"trainingLimit":  float64(DefaultPQTrainingLimit),
   273  					"encoder": map[string]interface{}{
   274  						"type":         "tile",
   275  						"distribution": "normal",
   276  					},
   277  				},
   278  			},
   279  			expected: UserConfig{
   280  				CleanupIntervalSeconds: 11,
   281  				MaxConnections:         12,
   282  				EFConstruction:         13,
   283  				VectorCacheMaxObjects:  14,
   284  				EF:                     15,
   285  				FlatSearchCutoff:       16,
   286  				DynamicEFMin:           17,
   287  				DynamicEFMax:           18,
   288  				DynamicEFFactor:        19,
   289  				Distance:               common.DefaultDistanceMetric,
   290  				PQ: PQConfig{
   291  					Enabled:       true,
   292  					Segments:      64,
   293  					Centroids:     DefaultPQCentroids,
   294  					TrainingLimit: DefaultPQTrainingLimit,
   295  					Encoder: PQEncoder{
   296  						Type:         "tile",
   297  						Distribution: "normal",
   298  					},
   299  				},
   300  			},
   301  		},
   302  
   303  		{
   304  			name: "with pq kmeans normal encoder",
   305  			input: map[string]interface{}{
   306  				"cleanupIntervalSeconds": float64(11),
   307  				"maxConnections":         float64(12),
   308  				"efConstruction":         float64(13),
   309  				"vectorCacheMaxObjects":  float64(14),
   310  				"ef":                     float64(15),
   311  				"flatSearchCutoff":       float64(16),
   312  				"dynamicEfMin":           float64(17),
   313  				"dynamicEfMax":           float64(18),
   314  				"dynamicEfFactor":        float64(19),
   315  				"pq": map[string]interface{}{
   316  					"enabled":        true,
   317  					"bitCompression": false,
   318  					"segments":       float64(64),
   319  					"centroids":      float64(DefaultPQCentroids),
   320  					"trainingLimit":  float64(DefaultPQTrainingLimit),
   321  					"encoder": map[string]interface{}{
   322  						"type": PQEncoderTypeKMeans,
   323  					},
   324  				},
   325  			},
   326  			expected: UserConfig{
   327  				CleanupIntervalSeconds: 11,
   328  				MaxConnections:         12,
   329  				EFConstruction:         13,
   330  				VectorCacheMaxObjects:  14,
   331  				EF:                     15,
   332  				FlatSearchCutoff:       16,
   333  				DynamicEFMin:           17,
   334  				DynamicEFMax:           18,
   335  				DynamicEFFactor:        19,
   336  				Distance:               common.DefaultDistanceMetric,
   337  				PQ: PQConfig{
   338  					Enabled:       true,
   339  					Segments:      64,
   340  					Centroids:     DefaultPQCentroids,
   341  					TrainingLimit: DefaultPQTrainingLimit,
   342  					Encoder: PQEncoder{
   343  						Type:         DefaultPQEncoderType,
   344  						Distribution: DefaultPQEncoderDistribution,
   345  					},
   346  				},
   347  			},
   348  		},
   349  
   350  		{
   351  			name: "with invalid encoder",
   352  			input: map[string]interface{}{
   353  				"pq": map[string]interface{}{
   354  					"enabled": true,
   355  					"encoder": map[string]interface{}{
   356  						"type": "bernoulli",
   357  					},
   358  				},
   359  			},
   360  			expectErr:    true,
   361  			expectErrMsg: "invalid encoder type bernoulli",
   362  		},
   363  
   364  		{
   365  			name: "with invalid distribution",
   366  			input: map[string]interface{}{
   367  				"pq": map[string]interface{}{
   368  					"enabled": true,
   369  					"encoder": map[string]interface{}{
   370  						"distribution": "lognormal",
   371  					},
   372  				},
   373  			},
   374  			expectErr:    true,
   375  			expectErrMsg: "invalid encoder distribution lognormal",
   376  		},
   377  
   378  		{
   379  			// opposed to from the API
   380  			name: "with rounded vectorCacheMaxObjects that would otherwise overflow",
   381  			input: map[string]interface{}{
   382  				"cleanupIntervalSeconds": json.Number("11"),
   383  				"maxConnections":         json.Number("12"),
   384  				"efConstruction":         json.Number("13"),
   385  				"vectorCacheMaxObjects":  json.Number("9223372036854776000"),
   386  				"ef":                     json.Number("15"),
   387  				"flatSearchCutoff":       json.Number("16"),
   388  				"dynamicEfMin":           json.Number("17"),
   389  				"dynamicEfMax":           json.Number("18"),
   390  				"dynamicEfFactor":        json.Number("19"),
   391  			},
   392  			expected: UserConfig{
   393  				CleanupIntervalSeconds: 11,
   394  				MaxConnections:         12,
   395  				EFConstruction:         13,
   396  				VectorCacheMaxObjects:  math.MaxInt64,
   397  				EF:                     15,
   398  				FlatSearchCutoff:       16,
   399  				DynamicEFMin:           17,
   400  				DynamicEFMax:           18,
   401  				DynamicEFFactor:        19,
   402  				Distance:               common.DefaultDistanceMetric,
   403  				PQ: PQConfig{
   404  					Enabled:        DefaultPQEnabled,
   405  					BitCompression: DefaultPQBitCompression,
   406  					Segments:       DefaultPQSegments,
   407  					Centroids:      DefaultPQCentroids,
   408  					TrainingLimit:  DefaultPQTrainingLimit,
   409  					Encoder: PQEncoder{
   410  						Type:         DefaultPQEncoderType,
   411  						Distribution: DefaultPQEncoderDistribution,
   412  					},
   413  				},
   414  			},
   415  		},
   416  		{
   417  			name: "invalid max connections (json)",
   418  			input: map[string]interface{}{
   419  				"maxConnections": json.Number("0"),
   420  			},
   421  			expectErr: true,
   422  			expectErrMsg: "maxConnections must be a positive integer " +
   423  				"with a minimum of 4",
   424  		},
   425  		{
   426  			name: "invalid max connections (float)",
   427  			input: map[string]interface{}{
   428  				"maxConnections": float64(3),
   429  			},
   430  			expectErr: true,
   431  			expectErrMsg: "maxConnections must be a positive integer " +
   432  				"with a minimum of 4",
   433  		},
   434  		{
   435  			name: "invalid efConstruction (json)",
   436  			input: map[string]interface{}{
   437  				"efConstruction": json.Number("0"),
   438  			},
   439  			expectErr: true,
   440  			expectErrMsg: "efConstruction must be a positive integer " +
   441  				"with a minimum of 4",
   442  		},
   443  		{
   444  			name: "invalid efConstruction (float)",
   445  			input: map[string]interface{}{
   446  				"efConstruction": float64(3),
   447  			},
   448  			expectErr: true,
   449  			expectErrMsg: "efConstruction must be a positive integer " +
   450  				"with a minimum of 4",
   451  		},
   452  		{
   453  			name: "with bq",
   454  			input: map[string]interface{}{
   455  				"cleanupIntervalSeconds": float64(11),
   456  				"maxConnections":         float64(12),
   457  				"efConstruction":         float64(13),
   458  				"vectorCacheMaxObjects":  float64(14),
   459  				"ef":                     float64(15),
   460  				"flatSearchCutoff":       float64(16),
   461  				"dynamicEfMin":           float64(17),
   462  				"dynamicEfMax":           float64(18),
   463  				"dynamicEfFactor":        float64(19),
   464  				"bq": map[string]interface{}{
   465  					"enabled": true,
   466  				},
   467  			},
   468  			expected: UserConfig{
   469  				CleanupIntervalSeconds: 11,
   470  				MaxConnections:         12,
   471  				EFConstruction:         13,
   472  				VectorCacheMaxObjects:  14,
   473  				EF:                     15,
   474  				FlatSearchCutoff:       16,
   475  				DynamicEFMin:           17,
   476  				DynamicEFMax:           18,
   477  				DynamicEFFactor:        19,
   478  				Distance:               common.DefaultDistanceMetric,
   479  				PQ: PQConfig{
   480  					Enabled:       false,
   481  					Segments:      0,
   482  					Centroids:     DefaultPQCentroids,
   483  					TrainingLimit: DefaultPQTrainingLimit,
   484  					Encoder: PQEncoder{
   485  						Type:         DefaultPQEncoderType,
   486  						Distribution: DefaultPQEncoderDistribution,
   487  					},
   488  				},
   489  				BQ: BQConfig{
   490  					Enabled: true,
   491  				},
   492  			},
   493  		},
   494  		{
   495  			name: "with invalid compression",
   496  			input: map[string]interface{}{
   497  				"pq": map[string]interface{}{
   498  					"enabled": true,
   499  					"encoder": map[string]interface{}{
   500  						"type": "kmeans",
   501  					},
   502  				},
   503  				"bq": map[string]interface{}{
   504  					"enabled": true,
   505  				},
   506  			},
   507  			expectErr:    true,
   508  			expectErrMsg: "invalid hnsw config: two compression methods enabled: PQ and BQ",
   509  		},
   510  	}
   511  
   512  	for _, test := range tests {
   513  		t.Run(test.name, func(t *testing.T) {
   514  			cfg, err := ParseAndValidateConfig(test.input)
   515  			if test.expectErr {
   516  				require.NotNil(t, err)
   517  				assert.Contains(t, err.Error(), test.expectErrMsg)
   518  				return
   519  			} else {
   520  				assert.Nil(t, err)
   521  				assert.Equal(t, test.expected, cfg)
   522  			}
   523  		})
   524  	}
   525  }