github.com/weaviate/weaviate@v1.24.6/modules/text2vec-voyageai/vectorizer/class_settings.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package vectorizer
    13  
    14  import (
    15  	"fmt"
    16  
    17  	"github.com/pkg/errors"
    18  
    19  	"github.com/weaviate/weaviate/entities/models"
    20  	"github.com/weaviate/weaviate/entities/moduletools"
    21  	"github.com/weaviate/weaviate/entities/schema"
    22  	basesettings "github.com/weaviate/weaviate/usecases/modulecomponents/settings"
    23  )
    24  
    25  const (
    26  	DefaultBaseURL               = "https://api.voyageai.com/v1"
    27  	DefaultVoyageAIModel         = "voyage-large-2"
    28  	DefaultTruncate              = true
    29  	DefaultVectorizeClassName    = true
    30  	DefaultPropertyIndexed       = true
    31  	DefaultVectorizePropertyName = false
    32  )
    33  
    34  var (
    35  	availableVoyageAIModels = []string{
    36  		"voyage-large-2", "voyage-code-2", "voyage-2",
    37  	}
    38  	experimetnalVoyageAIModels = []string{}
    39  )
    40  
    41  type classSettings struct {
    42  	basesettings.BaseClassSettings
    43  	cfg moduletools.ClassConfig
    44  }
    45  
    46  func NewClassSettings(cfg moduletools.ClassConfig) *classSettings {
    47  	return &classSettings{cfg: cfg, BaseClassSettings: *basesettings.NewBaseClassSettings(cfg)}
    48  }
    49  
    50  func (cs *classSettings) Model() string {
    51  	return cs.getProperty("model", DefaultVoyageAIModel)
    52  }
    53  
    54  func (cs *classSettings) Truncate() bool {
    55  	return cs.getBoolProperty("truncate", DefaultTruncate)
    56  }
    57  
    58  func (cs *classSettings) BaseURL() string {
    59  	return cs.getProperty("baseURL", DefaultBaseURL)
    60  }
    61  
    62  func (cs *classSettings) Validate(class *models.Class) error {
    63  	if cs.cfg == nil {
    64  		// we would receive a nil-config on cross-class requests, such as Explore{}
    65  		return errors.New("empty config")
    66  	}
    67  
    68  	if err := cs.BaseClassSettings.Validate(); err != nil {
    69  		return err
    70  	}
    71  
    72  	model := cs.Model()
    73  	if !cs.validateVoyageAISetting(model, append(availableVoyageAIModels, experimetnalVoyageAIModels...)) {
    74  		return errors.Errorf("wrong VoyageAI model name, available model names are: %v", availableVoyageAIModels)
    75  	}
    76  
    77  	err := cs.validateIndexState(class, cs)
    78  	if err != nil {
    79  		return err
    80  	}
    81  
    82  	return nil
    83  }
    84  
    85  func (cs *classSettings) validateVoyageAISetting(value string, availableValues []string) bool {
    86  	for i := range availableValues {
    87  		if value == availableValues[i] {
    88  			return true
    89  		}
    90  	}
    91  	return false
    92  }
    93  
    94  func (cs *classSettings) getProperty(name, defaultValue string) string {
    95  	return cs.BaseClassSettings.GetPropertyAsString(name, defaultValue)
    96  }
    97  
    98  func (cs *classSettings) getBoolProperty(name string, defaultValue bool) bool {
    99  	return cs.BaseClassSettings.GetPropertyAsBool(name, defaultValue)
   100  }
   101  
   102  func (cs *classSettings) validateIndexState(class *models.Class, settings ClassSettings) error {
   103  	if settings.VectorizeClassName() {
   104  		// if the user chooses to vectorize the classname, vector-building will
   105  		// always be possible, no need to investigate further
   106  
   107  		return nil
   108  	}
   109  
   110  	// search if there is at least one indexed, string/text prop. If found pass
   111  	// validation
   112  	for _, prop := range class.Properties {
   113  		if len(prop.DataType) < 1 {
   114  			return errors.Errorf("property %s must have at least one datatype: "+
   115  				"got %v", prop.Name, prop.DataType)
   116  		}
   117  
   118  		if prop.DataType[0] != string(schema.DataTypeText) {
   119  			// we can only vectorize text-like props
   120  			continue
   121  		}
   122  
   123  		if settings.PropertyIndexed(prop.Name) {
   124  			// found at least one, this is a valid schema
   125  			return nil
   126  		}
   127  	}
   128  
   129  	return fmt.Errorf("invalid properties: didn't find a single property which is " +
   130  		"of type string or text and is not excluded from indexing. In addition the " +
   131  		"class name is excluded from vectorization as well, meaning that it cannot be " +
   132  		"used to determine the vector position. To fix this, set 'vectorizeClassName' " +
   133  		"to true if the class name is contextionary-valid. Alternatively add at least " +
   134  		"contextionary-valid text/string property which is not excluded from " +
   135  		"indexing")
   136  }