github.com/weaviate/weaviate@v1.24.6/usecases/schema/validation.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package schema
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"strings"
    18  
    19  	"github.com/pkg/errors"
    20  	"github.com/weaviate/weaviate/entities/models"
    21  	"github.com/weaviate/weaviate/entities/schema"
    22  	"github.com/weaviate/weaviate/usecases/config"
    23  )
    24  
    25  func (m *Manager) validateClassNameUniqueness(name string) error {
    26  	pred := func(c *models.Class) bool {
    27  		return strings.EqualFold(name, c.Class)
    28  	}
    29  	existingName := ""
    30  	m.schemaCache.RLockGuard(func() error {
    31  		if cls := m.schemaCache.unsafeFindClassIf(pred); cls != nil {
    32  			existingName = cls.Class
    33  		}
    34  		return nil
    35  	})
    36  
    37  	if existingName == "" {
    38  		return nil
    39  	}
    40  	if name != existingName {
    41  		// It's a permutation
    42  		return fmt.Errorf(
    43  			"class name %q already exists as a permutation of: %q. class names must be unique when lowercased",
    44  			name, existingName)
    45  	}
    46  	return fmt.Errorf("class name %q already exists", name)
    47  }
    48  
    49  // Check that the format of the name is correct
    50  func (m *Manager) validateClassName(ctx context.Context, className string) error {
    51  	_, err := schema.ValidateClassName(className)
    52  	return err
    53  }
    54  
    55  func (m *Manager) validatePropertyTokenization(tokenization string, propertyDataType schema.PropertyDataType) error {
    56  	if propertyDataType.IsPrimitive() {
    57  		primitiveDataType := propertyDataType.AsPrimitive()
    58  
    59  		switch primitiveDataType {
    60  		case schema.DataTypeString, schema.DataTypeStringArray:
    61  			// deprecated as of v1.19, will be migrated to DataTypeText/DataTypeTextArray
    62  			switch tokenization {
    63  			case models.PropertyTokenizationField, models.PropertyTokenizationWord:
    64  				return nil
    65  			}
    66  		case schema.DataTypeText, schema.DataTypeTextArray:
    67  			switch tokenization {
    68  			case models.PropertyTokenizationField, models.PropertyTokenizationWord,
    69  				models.PropertyTokenizationWhitespace, models.PropertyTokenizationLowercase, models.PropertyTokenizationTrigram, models.PropertyTokenizationGse:
    70  				return nil
    71  			}
    72  		default:
    73  			if tokenization == "" {
    74  				return nil
    75  			}
    76  			return fmt.Errorf("Tokenization is not allowed for data type '%s'", primitiveDataType)
    77  		}
    78  		return fmt.Errorf("Tokenization '%s' is not allowed for data type '%s'", tokenization, primitiveDataType)
    79  	}
    80  
    81  	if tokenization == "" {
    82  		return nil
    83  	}
    84  
    85  	if propertyDataType.IsNested() {
    86  		return fmt.Errorf("Tokenization is not allowed for object/object[] data types")
    87  	}
    88  	return fmt.Errorf("Tokenization is not allowed for reference data type")
    89  }
    90  
    91  func (m *Manager) validatePropertyIndexing(prop *models.Property) error {
    92  	if prop.IndexInverted != nil {
    93  		if prop.IndexFilterable != nil || prop.IndexSearchable != nil {
    94  			return fmt.Errorf("`indexInverted` is deprecated and can not be set together with `indexFilterable` or `indexSearchable`")
    95  		}
    96  	}
    97  
    98  	primitiveDataType, isPrimitive := schema.AsPrimitive(prop.DataType)
    99  
   100  	// TODO nested - should not be allowed for blobs (verify backward compat)
   101  	// if prop.IndexFilterable != nil {
   102  	// 	if isPrimitive && primitiveDataType == schema.DataTypeBlob {
   103  	// 		return fmt.Errorf("`indexFilterable` is not allowed for blob data type")
   104  	// 	}
   105  	// }
   106  
   107  	if prop.IndexSearchable != nil {
   108  		validateSet := true
   109  		if isPrimitive {
   110  			switch primitiveDataType {
   111  			case schema.DataTypeString, schema.DataTypeStringArray:
   112  				// string/string[] are migrated to text/text[] later,
   113  				// at this point they are still valid data types, therefore should be handled here
   114  				// true or false allowed
   115  				validateSet = false
   116  			case schema.DataTypeText, schema.DataTypeTextArray:
   117  				// true or false allowed
   118  				validateSet = false
   119  			default:
   120  				// do nothing
   121  			}
   122  		}
   123  
   124  		if validateSet && *prop.IndexSearchable {
   125  			return fmt.Errorf("`indexSearchable` is not allowed for other than text/text[] data types")
   126  		}
   127  	}
   128  
   129  	return nil
   130  }
   131  
   132  type validatorNestedProperty func(property *models.NestedProperty,
   133  	primitiveDataType, nestedDataType schema.DataType,
   134  	isPrimitive, isNested bool, propNamePrefix string) error
   135  
   136  var validatorsNestedProperty = []validatorNestedProperty{
   137  	validateNestedPropertyName,
   138  	validateNestedPropertyDataType,
   139  	validateNestedPropertyTokenization,
   140  	validateNestedPropertyIndexFilterable,
   141  	validateNestedPropertyIndexSearchable,
   142  }
   143  
   144  func validateNestedProperties(properties []*models.NestedProperty, propNamePrefix string) error {
   145  	if len(properties) == 0 {
   146  		return fmt.Errorf("Property '%s': At least one nested property is required for data type object/object[]",
   147  			propNamePrefix)
   148  	}
   149  
   150  	for _, property := range properties {
   151  		primitiveDataType, isPrimitive := schema.AsPrimitive(property.DataType)
   152  		nestedDataType, isNested := schema.AsNested(property.DataType)
   153  
   154  		for _, validator := range validatorsNestedProperty {
   155  			if err := validator(property, primitiveDataType, nestedDataType, isPrimitive, isNested, propNamePrefix); err != nil {
   156  				return err
   157  			}
   158  		}
   159  		if isNested {
   160  			if err := validateNestedProperties(property.NestedProperties, propNamePrefix+"."+property.Name); err != nil {
   161  				return err
   162  			}
   163  		}
   164  	}
   165  	return nil
   166  }
   167  
   168  func validateNestedPropertyName(property *models.NestedProperty,
   169  	_, _ schema.DataType,
   170  	_, _ bool, propNamePrefix string,
   171  ) error {
   172  	return schema.ValidateNestedPropertyName(property.Name, propNamePrefix)
   173  }
   174  
   175  func validateNestedPropertyDataType(property *models.NestedProperty,
   176  	primitiveDataType, _ schema.DataType,
   177  	isPrimitive, isNested bool, propNamePrefix string,
   178  ) error {
   179  	propName := propNamePrefix + "." + property.Name
   180  
   181  	if isPrimitive {
   182  		// DataTypeString and DataTypeStringArray as deprecated since 1.19 are not allowed
   183  		switch primitiveDataType {
   184  		case schema.DataTypeString, schema.DataTypeStringArray:
   185  			return fmt.Errorf("Property '%s': data type '%s' is deprecated and not allowed as nested property", propName, primitiveDataType)
   186  		case schema.DataTypeGeoCoordinates, schema.DataTypePhoneNumber:
   187  			return fmt.Errorf("Property '%s': data type '%s' not allowed as nested property", propName, primitiveDataType)
   188  		default:
   189  			// do nothing
   190  		}
   191  		return nil
   192  	}
   193  	if isNested {
   194  		return nil
   195  	}
   196  	return fmt.Errorf("Property '%s': reference data type not allowed", propName)
   197  }
   198  
   199  // Tokenization allowed only for text/text[] data types
   200  func validateNestedPropertyTokenization(property *models.NestedProperty,
   201  	primitiveDataType, _ schema.DataType,
   202  	isPrimitive, isNested bool, propNamePrefix string,
   203  ) error {
   204  	propName := propNamePrefix + "." + property.Name
   205  
   206  	if isPrimitive {
   207  		switch primitiveDataType {
   208  		case schema.DataTypeText, schema.DataTypeTextArray:
   209  			switch property.Tokenization {
   210  			case models.PropertyTokenizationField, models.PropertyTokenizationWord,
   211  				models.PropertyTokenizationWhitespace, models.PropertyTokenizationLowercase:
   212  				return nil
   213  			}
   214  			return fmt.Errorf("Property '%s': Tokenization '%s' is not allowed for data type '%s'",
   215  				propName, property.Tokenization, primitiveDataType)
   216  		default:
   217  			if property.Tokenization == "" {
   218  				return nil
   219  			}
   220  			return fmt.Errorf("Property '%s': Tokenization is not allowed for data type '%s'",
   221  				propName, primitiveDataType)
   222  		}
   223  	}
   224  	if property.Tokenization == "" {
   225  		return nil
   226  	}
   227  	if isNested {
   228  		return fmt.Errorf("Property '%s': Tokenization is not allowed for object/object[] data types", propName)
   229  	}
   230  	return fmt.Errorf("Property '%s': Tokenization is not allowed for reference data type", propName)
   231  }
   232  
   233  // indexFilterable allowed for primitive & ref data types
   234  func validateNestedPropertyIndexFilterable(property *models.NestedProperty,
   235  	primitiveDataType, _ schema.DataType,
   236  	isPrimitive, _ bool, propNamePrefix string,
   237  ) error {
   238  	propName := propNamePrefix + "." + property.Name
   239  
   240  	// at this point indexSearchable should be set (either by user or by defaults)
   241  	if property.IndexFilterable == nil {
   242  		return fmt.Errorf("Property '%s': `indexFilterable` not set", propName)
   243  	}
   244  
   245  	if isPrimitive && primitiveDataType == schema.DataTypeBlob {
   246  		if *property.IndexFilterable {
   247  			return fmt.Errorf("Property: '%s': indexFilterable is not allowed for blob data type",
   248  				propName)
   249  		}
   250  	}
   251  
   252  	return nil
   253  }
   254  
   255  // indexSearchable allowed for text/text[] data types
   256  func validateNestedPropertyIndexSearchable(property *models.NestedProperty,
   257  	primitiveDataType, _ schema.DataType,
   258  	isPrimitive, _ bool, propNamePrefix string,
   259  ) error {
   260  	propName := propNamePrefix + "." + property.Name
   261  
   262  	// at this point indexSearchable should be set (either by user or by defaults)
   263  	if property.IndexSearchable == nil {
   264  		return fmt.Errorf("Property '%s': `indexSearchable` not set", propName)
   265  	}
   266  
   267  	if isPrimitive {
   268  		switch primitiveDataType {
   269  		case schema.DataTypeText, schema.DataTypeTextArray:
   270  			return nil
   271  		default:
   272  			// do nothing
   273  		}
   274  	}
   275  	if *property.IndexSearchable {
   276  		return fmt.Errorf("Property '%s': `indexSearchable` is not allowed for other than text/text[] data types",
   277  			propName)
   278  	}
   279  
   280  	return nil
   281  }
   282  
   283  func (m *Manager) validateVectorSettings(class *models.Class) error {
   284  	if !hasTargetVectors(class) {
   285  		if err := m.validateVectorizer(class.Vectorizer); err != nil {
   286  			return err
   287  		}
   288  		if err := m.validateVectorIndexType(class.VectorIndexType); err != nil {
   289  			return err
   290  		}
   291  		return nil
   292  	}
   293  
   294  	if class.Vectorizer != "" {
   295  		return fmt.Errorf("class.vectorizer %q can not be set if class.vectorConfig is configured", class.Vectorizer)
   296  	}
   297  	if class.VectorIndexType != "" {
   298  		return fmt.Errorf("class.vectorIndexType %q can not be set if class.vectorConfig is configured", class.VectorIndexType)
   299  	}
   300  
   301  	for name, cfg := range class.VectorConfig {
   302  		// check only if vectorizer correctly configured (map with single key being vectorizer name)
   303  		// other cases are handled in module config validation
   304  		if vm, ok := cfg.Vectorizer.(map[string]interface{}); ok && len(vm) == 1 {
   305  			for vectorizer := range vm {
   306  				if err := m.validateVectorizer(vectorizer); err != nil {
   307  					return fmt.Errorf("target vector %q: %w", name, err)
   308  				}
   309  			}
   310  		}
   311  		if err := m.validateVectorIndexType(cfg.VectorIndexType); err != nil {
   312  			return fmt.Errorf("target vector %q: %w", name, err)
   313  		}
   314  	}
   315  	return nil
   316  }
   317  
   318  func (m *Manager) validateVectorizer(vectorizer string) error {
   319  	if vectorizer == config.VectorizerModuleNone {
   320  		return nil
   321  	}
   322  
   323  	if err := m.vectorizerValidator.ValidateVectorizer(vectorizer); err != nil {
   324  		return errors.Wrap(err, "vectorizer")
   325  	}
   326  
   327  	return nil
   328  }
   329  
   330  func (m *Manager) validateVectorIndexType(vectorIndexType string) error {
   331  	switch vectorIndexType {
   332  	case "hnsw", "flat":
   333  		return nil
   334  	default:
   335  		return errors.Errorf("unrecognized or unsupported vectorIndexType %q",
   336  			vectorIndexType)
   337  	}
   338  }