github.com/weaviate/weaviate@v1.24.6/modules/text2vec-cohere/vectorizer/class_settings.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package vectorizer
    13  
    14  import (
    15  	"fmt"
    16  
    17  	"github.com/pkg/errors"
    18  
    19  	"github.com/weaviate/weaviate/entities/models"
    20  	"github.com/weaviate/weaviate/entities/moduletools"
    21  	"github.com/weaviate/weaviate/entities/schema"
    22  	basesettings "github.com/weaviate/weaviate/usecases/modulecomponents/settings"
    23  )
    24  
    25  const (
    26  	DefaultBaseURL               = "https://api.cohere.ai"
    27  	DefaultCohereModel           = "embed-multilingual-v3.0"
    28  	DefaultTruncate              = "END"
    29  	DefaultVectorizeClassName    = true
    30  	DefaultPropertyIndexed       = true
    31  	DefaultVectorizePropertyName = false
    32  )
    33  
    34  var (
    35  	availableCohereModels = []string{
    36  		"medium",
    37  		"large", "small", "multilingual-22-12",
    38  		"embed-english-v2.0", "embed-english-light-v2.0", "embed-multilingual-v2.0",
    39  		"embed-english-v3.0", "embed-english-light-v3.0", "embed-multilingual-v3.0", "embed-multilingual-light-v3.0",
    40  	}
    41  	experimetnalCohereModels = []string{"multilingual-2210-alpha"}
    42  	availableTruncates       = []string{"NONE", "START", "END", "LEFT", "RIGHT"}
    43  )
    44  
    45  type classSettings struct {
    46  	basesettings.BaseClassSettings
    47  	cfg moduletools.ClassConfig
    48  }
    49  
    50  func NewClassSettings(cfg moduletools.ClassConfig) *classSettings {
    51  	return &classSettings{cfg: cfg, BaseClassSettings: *basesettings.NewBaseClassSettings(cfg)}
    52  }
    53  
    54  func (cs *classSettings) Model() string {
    55  	return cs.getProperty("model", DefaultCohereModel)
    56  }
    57  
    58  func (cs *classSettings) Truncate() string {
    59  	return cs.getProperty("truncate", DefaultTruncate)
    60  }
    61  
    62  func (cs *classSettings) BaseURL() string {
    63  	return cs.getProperty("baseURL", DefaultBaseURL)
    64  }
    65  
    66  func (cs *classSettings) Validate(class *models.Class) error {
    67  	if cs.cfg == nil {
    68  		// we would receive a nil-config on cross-class requests, such as Explore{}
    69  		return errors.New("empty config")
    70  	}
    71  
    72  	if err := cs.BaseClassSettings.Validate(); err != nil {
    73  		return err
    74  	}
    75  
    76  	model := cs.Model()
    77  	if !cs.validateCohereSetting(model, append(availableCohereModels, experimetnalCohereModels...)) {
    78  		return errors.Errorf("wrong Cohere model name, available model names are: %v", availableCohereModels)
    79  	}
    80  	truncate := cs.Truncate()
    81  	if !cs.validateCohereSetting(truncate, availableTruncates) {
    82  		return errors.Errorf("wrong truncate type, available types are: %v", availableTruncates)
    83  	}
    84  
    85  	err := cs.validateIndexState(class, cs)
    86  	if err != nil {
    87  		return err
    88  	}
    89  
    90  	return nil
    91  }
    92  
    93  func (cs *classSettings) validateCohereSetting(value string, availableValues []string) bool {
    94  	for i := range availableValues {
    95  		if value == availableValues[i] {
    96  			return true
    97  		}
    98  	}
    99  	return false
   100  }
   101  
   102  func (cs *classSettings) getProperty(name, defaultValue string) string {
   103  	return cs.BaseClassSettings.GetPropertyAsString(name, defaultValue)
   104  }
   105  
   106  func (cs *classSettings) validateIndexState(class *models.Class, settings ClassSettings) error {
   107  	if settings.VectorizeClassName() {
   108  		// if the user chooses to vectorize the classname, vector-building will
   109  		// always be possible, no need to investigate further
   110  
   111  		return nil
   112  	}
   113  
   114  	// search if there is at least one indexed, string/text prop. If found pass
   115  	// validation
   116  	for _, prop := range class.Properties {
   117  		if len(prop.DataType) < 1 {
   118  			return errors.Errorf("property %s must have at least one datatype: "+
   119  				"got %v", prop.Name, prop.DataType)
   120  		}
   121  
   122  		if prop.DataType[0] != string(schema.DataTypeText) {
   123  			// we can only vectorize text-like props
   124  			continue
   125  		}
   126  
   127  		if settings.PropertyIndexed(prop.Name) {
   128  			// found at least one, this is a valid schema
   129  			return nil
   130  		}
   131  	}
   132  
   133  	return fmt.Errorf("invalid properties: didn't find a single property which is " +
   134  		"of type string or text and is not excluded from indexing. In addition the " +
   135  		"class name is excluded from vectorization as well, meaning that it cannot be " +
   136  		"used to determine the vector position. To fix this, set 'vectorizeClassName' " +
   137  		"to true if the class name is contextionary-valid. Alternatively add at least " +
   138  		"contextionary-valid text/string property which is not excluded from " +
   139  		"indexing")
   140  }