github.com/weaviate/weaviate@v1.24.6/entities/vectorindex/hnsw/config.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package hnsw
    13  
    14  import (
    15  	"fmt"
    16  	"strings"
    17  
    18  	vectorIndexCommon "github.com/weaviate/weaviate/entities/vectorindex/common"
    19  
    20  	"github.com/weaviate/weaviate/entities/schema"
    21  )
    22  
    23  const (
    24  	// Set these defaults if the user leaves them blank
    25  	DefaultCleanupIntervalSeconds = 5 * 60
    26  	DefaultMaxConnections         = 64
    27  	DefaultEFConstruction         = 128
    28  	DefaultEF                     = -1 // indicates "let Weaviate pick"
    29  	DefaultDynamicEFMin           = 100
    30  	DefaultDynamicEFMax           = 500
    31  	DefaultDynamicEFFactor        = 8
    32  	DefaultSkip                   = false
    33  	DefaultFlatSearchCutoff       = 40000
    34  
    35  	// Fail validation if those criteria are not met
    36  	MinmumMaxConnections = 4
    37  	MinmumEFConstruction = 4
    38  )
    39  
    40  // UserConfig bundles all values settable by a user in the per-class settings
    41  type UserConfig struct {
    42  	Skip                   bool     `json:"skip"`
    43  	CleanupIntervalSeconds int      `json:"cleanupIntervalSeconds"`
    44  	MaxConnections         int      `json:"maxConnections"`
    45  	EFConstruction         int      `json:"efConstruction"`
    46  	EF                     int      `json:"ef"`
    47  	DynamicEFMin           int      `json:"dynamicEfMin"`
    48  	DynamicEFMax           int      `json:"dynamicEfMax"`
    49  	DynamicEFFactor        int      `json:"dynamicEfFactor"`
    50  	VectorCacheMaxObjects  int      `json:"vectorCacheMaxObjects"`
    51  	FlatSearchCutoff       int      `json:"flatSearchCutoff"`
    52  	Distance               string   `json:"distance"`
    53  	PQ                     PQConfig `json:"pq"`
    54  	BQ                     BQConfig `json:"bq"`
    55  }
    56  
    57  // IndexType returns the type of the underlying vector index, thus making sure
    58  // the schema.VectorIndexConfig interface is implemented
    59  func (u UserConfig) IndexType() string {
    60  	return "hnsw"
    61  }
    62  
    63  func (u UserConfig) DistanceName() string {
    64  	return u.Distance
    65  }
    66  
    67  // SetDefaults in the user-specifyable part of the config
    68  func (u *UserConfig) SetDefaults() {
    69  	u.MaxConnections = DefaultMaxConnections
    70  	u.EFConstruction = DefaultEFConstruction
    71  	u.CleanupIntervalSeconds = DefaultCleanupIntervalSeconds
    72  	u.VectorCacheMaxObjects = vectorIndexCommon.DefaultVectorCacheMaxObjects
    73  	u.EF = DefaultEF
    74  	u.DynamicEFFactor = DefaultDynamicEFFactor
    75  	u.DynamicEFMax = DefaultDynamicEFMax
    76  	u.DynamicEFMin = DefaultDynamicEFMin
    77  	u.Skip = DefaultSkip
    78  	u.FlatSearchCutoff = DefaultFlatSearchCutoff
    79  	u.Distance = vectorIndexCommon.DefaultDistanceMetric
    80  	u.PQ = PQConfig{
    81  		Enabled:        DefaultPQEnabled,
    82  		BitCompression: DefaultPQBitCompression,
    83  		Segments:       DefaultPQSegments,
    84  		Centroids:      DefaultPQCentroids,
    85  		TrainingLimit:  DefaultPQTrainingLimit,
    86  		Encoder: PQEncoder{
    87  			Type:         DefaultPQEncoderType,
    88  			Distribution: DefaultPQEncoderDistribution,
    89  		},
    90  	}
    91  	u.BQ = BQConfig{
    92  		Enabled: DefaultBQEnabled,
    93  	}
    94  }
    95  
    96  // ParseAndValidateConfig from an unknown input value, as this is not further
    97  // specified in the API to allow of exchanging the index type
    98  func ParseAndValidateConfig(input interface{}) (schema.VectorIndexConfig, error) {
    99  	uc := UserConfig{}
   100  	uc.SetDefaults()
   101  
   102  	if input == nil {
   103  		return uc, nil
   104  	}
   105  
   106  	asMap, ok := input.(map[string]interface{})
   107  	if !ok || asMap == nil {
   108  		return uc, fmt.Errorf("input must be a non-nil map")
   109  	}
   110  
   111  	if err := vectorIndexCommon.OptionalIntFromMap(asMap, "maxConnections", func(v int) {
   112  		uc.MaxConnections = v
   113  	}); err != nil {
   114  		return uc, err
   115  	}
   116  
   117  	if err := vectorIndexCommon.OptionalIntFromMap(asMap, "cleanupIntervalSeconds", func(v int) {
   118  		uc.CleanupIntervalSeconds = v
   119  	}); err != nil {
   120  		return uc, err
   121  	}
   122  
   123  	if err := vectorIndexCommon.OptionalIntFromMap(asMap, "efConstruction", func(v int) {
   124  		uc.EFConstruction = v
   125  	}); err != nil {
   126  		return uc, err
   127  	}
   128  
   129  	if err := vectorIndexCommon.OptionalIntFromMap(asMap, "ef", func(v int) {
   130  		uc.EF = v
   131  	}); err != nil {
   132  		return uc, err
   133  	}
   134  
   135  	if err := vectorIndexCommon.OptionalIntFromMap(asMap, "dynamicEfFactor", func(v int) {
   136  		uc.DynamicEFFactor = v
   137  	}); err != nil {
   138  		return uc, err
   139  	}
   140  
   141  	if err := vectorIndexCommon.OptionalIntFromMap(asMap, "dynamicEfMax", func(v int) {
   142  		uc.DynamicEFMax = v
   143  	}); err != nil {
   144  		return uc, err
   145  	}
   146  
   147  	if err := vectorIndexCommon.OptionalIntFromMap(asMap, "dynamicEfMin", func(v int) {
   148  		uc.DynamicEFMin = v
   149  	}); err != nil {
   150  		return uc, err
   151  	}
   152  
   153  	if err := vectorIndexCommon.OptionalIntFromMap(asMap, "vectorCacheMaxObjects", func(v int) {
   154  		uc.VectorCacheMaxObjects = v
   155  	}); err != nil {
   156  		return uc, err
   157  	}
   158  
   159  	if err := vectorIndexCommon.OptionalIntFromMap(asMap, "flatSearchCutoff", func(v int) {
   160  		uc.FlatSearchCutoff = v
   161  	}); err != nil {
   162  		return uc, err
   163  	}
   164  
   165  	if err := vectorIndexCommon.OptionalBoolFromMap(asMap, "skip", func(v bool) {
   166  		uc.Skip = v
   167  	}); err != nil {
   168  		return uc, err
   169  	}
   170  
   171  	if err := vectorIndexCommon.OptionalStringFromMap(asMap, "distance", func(v string) {
   172  		uc.Distance = v
   173  	}); err != nil {
   174  		return uc, err
   175  	}
   176  
   177  	if err := parsePQMap(asMap, &uc.PQ); err != nil {
   178  		return uc, err
   179  	}
   180  
   181  	if err := parseBQMap(asMap, &uc.BQ); err != nil {
   182  		return uc, err
   183  	}
   184  
   185  	return uc, uc.validate()
   186  }
   187  
   188  func (u *UserConfig) validate() error {
   189  	var errMsgs []string
   190  	if u.MaxConnections < MinmumMaxConnections {
   191  		errMsgs = append(errMsgs, fmt.Sprintf(
   192  			"maxConnections must be a positive integer with a minimum of %d",
   193  			MinmumMaxConnections,
   194  		))
   195  	}
   196  
   197  	if u.EFConstruction < MinmumEFConstruction {
   198  		errMsgs = append(errMsgs, fmt.Sprintf(
   199  			"efConstruction must be a positive integer with a minimum of %d",
   200  			MinmumMaxConnections,
   201  		))
   202  	}
   203  
   204  	if len(errMsgs) > 0 {
   205  		return fmt.Errorf("invalid hnsw config: %s",
   206  			strings.Join(errMsgs, ", "))
   207  	}
   208  
   209  	if u.PQ.Enabled && u.BQ.Enabled {
   210  		return fmt.Errorf("invalid hnsw config: two compression methods enabled: PQ and BQ")
   211  	}
   212  
   213  	return nil
   214  }
   215  
   216  func NewDefaultUserConfig() UserConfig {
   217  	uc := UserConfig{}
   218  	uc.SetDefaults()
   219  	return uc
   220  }