github.com/weaviate/weaviate@v1.24.6/modules/text2vec-aws/vectorizer/class_settings.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package vectorizer
    13  
    14  import (
    15  	"fmt"
    16  	"strings"
    17  
    18  	"github.com/pkg/errors"
    19  
    20  	"github.com/weaviate/weaviate/entities/models"
    21  	"github.com/weaviate/weaviate/entities/moduletools"
    22  	"github.com/weaviate/weaviate/entities/schema"
    23  	basesettings "github.com/weaviate/weaviate/usecases/modulecomponents/settings"
    24  )
    25  
    26  const (
    27  	serviceProperty       = "service"
    28  	regionProperty        = "region"
    29  	modelProperty         = "model"
    30  	endpointProperty      = "endpoint"
    31  	targetModelProperty   = "targetModel"
    32  	targetVariantProperty = "targetVariant"
    33  )
    34  
    35  const (
    36  	DefaultVectorizeClassName    = false
    37  	DefaultPropertyIndexed       = true
    38  	DefaultVectorizePropertyName = false
    39  	DefaultService               = "bedrock"
    40  )
    41  
    42  var availableAWSServices = []string{
    43  	"bedrock",
    44  	"sagemaker",
    45  }
    46  
    47  var availableAWSBedrockModels = []string{
    48  	"amazon.titan-embed-text-v1",
    49  	"cohere.embed-english-v3",
    50  	"cohere.embed-multilingual-v3",
    51  }
    52  
    53  type classSettings struct {
    54  	basesettings.BaseClassSettings
    55  	cfg moduletools.ClassConfig
    56  }
    57  
    58  func NewClassSettings(cfg moduletools.ClassConfig) *classSettings {
    59  	return &classSettings{cfg: cfg, BaseClassSettings: *basesettings.NewBaseClassSettings(cfg)}
    60  }
    61  
    62  func (ic *classSettings) Validate(class *models.Class) error {
    63  	if ic.cfg == nil {
    64  		// we would receive a nil-config on cross-class requests, such as Explore{}
    65  		return errors.New("empty config")
    66  	}
    67  
    68  	var errorMessages []string
    69  
    70  	if err := ic.BaseClassSettings.Validate(); err != nil {
    71  		errorMessages = append(errorMessages, err.Error())
    72  	}
    73  
    74  	service := ic.Service()
    75  	if service == "" || !ic.validatAvailableAWSSetting(service, availableAWSServices) {
    76  		errorMessages = append(errorMessages, fmt.Sprintf("wrong %s, available services are: %v", serviceProperty, availableAWSServices))
    77  	}
    78  	region := ic.Region()
    79  	if region == "" {
    80  		errorMessages = append(errorMessages, fmt.Sprintf("%s cannot be empty", regionProperty))
    81  	}
    82  
    83  	if isBedrock(service) {
    84  		model := ic.Model()
    85  		if model == "" || !ic.validatAvailableAWSSetting(model, availableAWSBedrockModels) {
    86  			errorMessages = append(errorMessages, fmt.Sprintf("wrong %s, available models are: %v", modelProperty, availableAWSBedrockModels))
    87  		}
    88  		endpoint := ic.Endpoint()
    89  		if endpoint != "" {
    90  			errorMessages = append(errorMessages, fmt.Sprintf("wrong configuration: %s, not applicable to %s", endpoint, service))
    91  		}
    92  	}
    93  
    94  	if isSagemaker(service) {
    95  		endpoint := ic.Endpoint()
    96  		if endpoint == "" {
    97  			errorMessages = append(errorMessages, fmt.Sprintf("%s cannot be empty", endpointProperty))
    98  		}
    99  		model := ic.Model()
   100  		if model != "" {
   101  			errorMessages = append(errorMessages, fmt.Sprintf("wrong configuration: %s, not applicable to %s. did you mean %s", modelProperty, service, targetModelProperty))
   102  		}
   103  	}
   104  
   105  	if len(errorMessages) > 0 {
   106  		return fmt.Errorf("%s", strings.Join(errorMessages, ", "))
   107  	}
   108  
   109  	err := ic.validateIndexState(class, ic)
   110  	if err != nil {
   111  		return err
   112  	}
   113  
   114  	return nil
   115  }
   116  
   117  func (ic *classSettings) validatAvailableAWSSetting(value string, availableValues []string) bool {
   118  	for i := range availableValues {
   119  		if value == availableValues[i] {
   120  			return true
   121  		}
   122  	}
   123  	return false
   124  }
   125  
   126  func (ic *classSettings) getStringProperty(name, defaultValue string) string {
   127  	return ic.BaseClassSettings.GetPropertyAsString(name, defaultValue)
   128  }
   129  
   130  func (cv *classSettings) validateIndexState(class *models.Class, settings ClassSettings) error {
   131  	if settings.VectorizeClassName() {
   132  		// if the user chooses to vectorize the classname, vector-building will
   133  		// always be possible, no need to investigate further
   134  
   135  		return nil
   136  	}
   137  
   138  	// search if there is at least one indexed, string/text prop. If found pass
   139  	// validation
   140  	for _, prop := range class.Properties {
   141  		if len(prop.DataType) < 1 {
   142  			return errors.Errorf("property %s must have at least one datatype: "+
   143  				"got %v", prop.Name, prop.DataType)
   144  		}
   145  
   146  		if prop.DataType[0] != string(schema.DataTypeString) &&
   147  			prop.DataType[0] != string(schema.DataTypeText) {
   148  			// we can only vectorize text-like props
   149  			continue
   150  		}
   151  
   152  		if settings.PropertyIndexed(prop.Name) {
   153  			// found at least one, this is a valid schema
   154  			return nil
   155  		}
   156  	}
   157  
   158  	return fmt.Errorf("invalid properties: didn't find a single property which is " +
   159  		"of type string or text and is not excluded from indexing. In addition the " +
   160  		"class name is excluded from vectorization as well, meaning that it cannot be " +
   161  		"used to determine the vector position. To fix this, set 'vectorizeClassName' " +
   162  		"to true if the class name is contextionary-valid. Alternatively add at least " +
   163  		"contextionary-valid text/string property which is not excluded from " +
   164  		"indexing")
   165  }
   166  
   167  // Aws params
   168  func (ic *classSettings) Service() string {
   169  	return ic.getStringProperty(serviceProperty, DefaultService)
   170  }
   171  
   172  func (ic *classSettings) Region() string {
   173  	return ic.getStringProperty(regionProperty, "")
   174  }
   175  
   176  func (ic *classSettings) Model() string {
   177  	return ic.getStringProperty(modelProperty, "")
   178  }
   179  
   180  func (ic *classSettings) Endpoint() string {
   181  	return ic.getStringProperty(endpointProperty, "")
   182  }
   183  
   184  func (ic *classSettings) TargetModel() string {
   185  	return ic.getStringProperty(targetModelProperty, "")
   186  }
   187  
   188  func (ic *classSettings) TargetVariant() string {
   189  	return ic.getStringProperty(targetVariantProperty, "")
   190  }
   191  
   192  func isSagemaker(service string) bool {
   193  	return service == "sagemaker"
   194  }
   195  
   196  func isBedrock(service string) bool {
   197  	return service == "bedrock"
   198  }