github.com/weaviate/weaviate@v1.24.6/modules/text2vec-palm/vectorizer/class_settings.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package vectorizer
    13  
    14  import (
    15  	"fmt"
    16  	"strings"
    17  
    18  	"github.com/pkg/errors"
    19  
    20  	"github.com/weaviate/weaviate/entities/models"
    21  	"github.com/weaviate/weaviate/entities/moduletools"
    22  	"github.com/weaviate/weaviate/entities/schema"
    23  	basesettings "github.com/weaviate/weaviate/usecases/modulecomponents/settings"
    24  )
    25  
    26  const (
    27  	apiEndpointProperty = "apiEndpoint"
    28  	projectIDProperty   = "projectId"
    29  	modelIDProperty     = "modelId"
    30  	titleProperty       = "titleProperty"
    31  )
    32  
    33  const (
    34  	DefaultVectorizeClassName     = false
    35  	DefaultPropertyIndexed        = true
    36  	DefaultVectorizePropertyName  = false
    37  	DefaultApiEndpoint            = "us-central1-aiplatform.googleapis.com"
    38  	DefaultModelID                = "textembedding-gecko@001"
    39  	DefaulGenerativeAIApiEndpoint = "generativelanguage.googleapis.com"
    40  	DefaulGenerativeAIModelID     = "embedding-gecko-001"
    41  )
    42  
    43  var availablePalmModels = []string{
    44  	DefaultModelID,
    45  	"textembedding-gecko@latest",
    46  	"textembedding-gecko-multilingual@latest",
    47  	"textembedding-gecko@003",
    48  	"textembedding-gecko@002",
    49  	"textembedding-gecko-multilingual@001",
    50  	"textembedding-gecko@001",
    51  }
    52  
    53  var availableGenerativeAIModels = []string{
    54  	DefaulGenerativeAIModelID,
    55  }
    56  
    57  type classSettings struct {
    58  	basesettings.BaseClassSettings
    59  	cfg moduletools.ClassConfig
    60  }
    61  
    62  func NewClassSettings(cfg moduletools.ClassConfig) *classSettings {
    63  	return &classSettings{cfg: cfg, BaseClassSettings: *basesettings.NewBaseClassSettings(cfg)}
    64  }
    65  
    66  func (ic *classSettings) Validate(class *models.Class) error {
    67  	if ic.cfg == nil {
    68  		// we would receive a nil-config on cross-class requests, such as Explore{}
    69  		return errors.New("empty config")
    70  	}
    71  
    72  	var errorMessages []string
    73  
    74  	if err := ic.BaseClassSettings.Validate(); err != nil {
    75  		errorMessages = append(errorMessages, err.Error())
    76  	}
    77  
    78  	apiEndpoint := ic.ApiEndpoint()
    79  	model := ic.ModelID()
    80  	if apiEndpoint == DefaulGenerativeAIApiEndpoint {
    81  		if model != "" && !ic.validatePalmSetting(model, availableGenerativeAIModels) {
    82  			errorMessages = append(errorMessages, fmt.Sprintf("wrong %s available Generative AI model names are: %v", modelIDProperty, availableGenerativeAIModels))
    83  		}
    84  	} else {
    85  		projectID := ic.ProjectID()
    86  		if projectID == "" {
    87  			errorMessages = append(errorMessages, fmt.Sprintf("%s cannot be empty", projectIDProperty))
    88  		}
    89  		if model != "" && !ic.validatePalmSetting(model, availablePalmModels) {
    90  			errorMessages = append(errorMessages, fmt.Sprintf("wrong %s available model names are: %v", modelIDProperty, availablePalmModels))
    91  		}
    92  	}
    93  
    94  	if len(errorMessages) > 0 {
    95  		return fmt.Errorf("%s", strings.Join(errorMessages, ", "))
    96  	}
    97  
    98  	err := ic.validateIndexState(class, ic)
    99  	if err != nil {
   100  		return err
   101  	}
   102  
   103  	return nil
   104  }
   105  
   106  func (ic *classSettings) validatePalmSetting(value string, availableValues []string) bool {
   107  	for i := range availableValues {
   108  		if value == availableValues[i] {
   109  			return true
   110  		}
   111  	}
   112  	return false
   113  }
   114  
   115  func (ic *classSettings) getStringProperty(name, defaultValue string) string {
   116  	return ic.BaseClassSettings.GetPropertyAsString(name, defaultValue)
   117  }
   118  
   119  func (cv *classSettings) validateIndexState(class *models.Class, settings ClassSettings) error {
   120  	if settings.VectorizeClassName() {
   121  		// if the user chooses to vectorize the classname, vector-building will
   122  		// always be possible, no need to investigate further
   123  
   124  		return nil
   125  	}
   126  
   127  	// search if there is at least one indexed, string/text prop. If found pass
   128  	// validation
   129  	for _, prop := range class.Properties {
   130  		if len(prop.DataType) < 1 {
   131  			return errors.Errorf("property %s must have at least one datatype: "+
   132  				"got %v", prop.Name, prop.DataType)
   133  		}
   134  
   135  		if prop.DataType[0] != string(schema.DataTypeString) &&
   136  			prop.DataType[0] != string(schema.DataTypeText) {
   137  			// we can only vectorize text-like props
   138  			continue
   139  		}
   140  
   141  		if settings.PropertyIndexed(prop.Name) {
   142  			// found at least one, this is a valid schema
   143  			return nil
   144  		}
   145  	}
   146  
   147  	return fmt.Errorf("invalid properties: didn't find a single property which is " +
   148  		"of type string or text and is not excluded from indexing. In addition the " +
   149  		"class name is excluded from vectorization as well, meaning that it cannot be " +
   150  		"used to determine the vector position. To fix this, set 'vectorizeClassName' " +
   151  		"to true if the class name is contextionary-valid. Alternatively add at least " +
   152  		"contextionary-valid text/string property which is not excluded from " +
   153  		"indexing")
   154  }
   155  
   156  func (ic *classSettings) getDefaultModel(apiEndpoint string) string {
   157  	if apiEndpoint == DefaulGenerativeAIApiEndpoint {
   158  		return DefaulGenerativeAIModelID
   159  	}
   160  	return DefaultModelID
   161  }
   162  
   163  // PaLM params
   164  func (ic *classSettings) ApiEndpoint() string {
   165  	return ic.getStringProperty(apiEndpointProperty, DefaultApiEndpoint)
   166  }
   167  
   168  func (ic *classSettings) ProjectID() string {
   169  	return ic.getStringProperty(projectIDProperty, "")
   170  }
   171  
   172  func (ic *classSettings) ModelID() string {
   173  	return ic.getStringProperty(modelIDProperty, ic.getDefaultModel(ic.ApiEndpoint()))
   174  }
   175  
   176  func (ic *classSettings) TitleProperty() string {
   177  	return ic.getStringProperty(titleProperty, "")
   178  }