github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/objects.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package inverted
    13  
    14  import (
    15  	"encoding/json"
    16  	"fmt"
    17  	"time"
    18  	"unicode/utf8"
    19  
    20  	"github.com/go-openapi/strfmt"
    21  	"github.com/google/uuid"
    22  	"github.com/pkg/errors"
    23  	"github.com/weaviate/weaviate/adapters/repos/db/helpers"
    24  	"github.com/weaviate/weaviate/entities/filters"
    25  	"github.com/weaviate/weaviate/entities/models"
    26  	"github.com/weaviate/weaviate/entities/schema"
    27  	"github.com/weaviate/weaviate/usecases/objects/validation"
    28  )
    29  
    30  func (a *Analyzer) Object(input map[string]any, props []*models.Property,
    31  	uuid strfmt.UUID,
    32  ) ([]Property, error) {
    33  	propsMap := map[string]*models.Property{}
    34  	for _, prop := range props {
    35  		propsMap[prop.Name] = prop
    36  	}
    37  
    38  	properties, err := a.analyzeProps(propsMap, input)
    39  	if err != nil {
    40  		return nil, fmt.Errorf("analyze props: %w", err)
    41  	}
    42  
    43  	idProp, err := a.analyzeIDProp(uuid)
    44  	if err != nil {
    45  		return nil, fmt.Errorf("analyze uuid prop: %w", err)
    46  	}
    47  	properties = append(properties, *idProp)
    48  
    49  	tsProps, err := a.analyzeTimestampProps(input)
    50  	if err != nil {
    51  		return nil, fmt.Errorf("analyze timestamp props: %w", err)
    52  	}
    53  	// tsProps will be nil here if weaviate is
    54  	// not setup to index by timestamps
    55  	if tsProps != nil {
    56  		properties = append(properties, tsProps...)
    57  	}
    58  
    59  	return properties, nil
    60  }
    61  
    62  func (a *Analyzer) analyzeProps(propsMap map[string]*models.Property,
    63  	input map[string]any,
    64  ) ([]Property, error) {
    65  	var out []Property
    66  	for key, prop := range propsMap {
    67  		if len(prop.DataType) < 1 {
    68  			return nil, fmt.Errorf("prop %q has no datatype", prop.Name)
    69  		}
    70  
    71  		if !HasInvertedIndex(prop) {
    72  			continue
    73  		}
    74  
    75  		if schema.IsBlobDataType(prop.DataType) {
    76  			continue
    77  		}
    78  
    79  		if schema.IsRefDataType(prop.DataType) {
    80  			if err := a.extendPropertiesWithReference(&out, prop, input, key); err != nil {
    81  				return nil, err
    82  			}
    83  		} else if schema.IsArrayDataType(prop.DataType) {
    84  			if err := a.extendPropertiesWithArrayType(&out, prop, input, key); err != nil {
    85  				return nil, err
    86  			}
    87  		} else {
    88  			if err := a.extendPropertiesWithPrimitive(&out, prop, input, key); err != nil {
    89  				return nil, err
    90  			}
    91  		}
    92  
    93  	}
    94  	return out, nil
    95  }
    96  
    97  func (a *Analyzer) analyzeIDProp(id strfmt.UUID) (*Property, error) {
    98  	value, err := id.MarshalText()
    99  	if err != nil {
   100  		return nil, fmt.Errorf("marshal id prop: %w", err)
   101  	}
   102  	return &Property{
   103  		Name: filters.InternalPropID,
   104  		Items: []Countable{
   105  			{
   106  				Data: value,
   107  			},
   108  		},
   109  		HasFilterableIndex: HasFilterableIndexIdProp,
   110  		HasSearchableIndex: HasSearchableIndexIdProp,
   111  	}, nil
   112  }
   113  
   114  func (a *Analyzer) analyzeTimestampProps(input map[string]any) ([]Property, error) {
   115  	createTime, createTimeOK := input[filters.InternalPropCreationTimeUnix]
   116  	updateTime, updateTimeOK := input[filters.InternalPropLastUpdateTimeUnix]
   117  
   118  	var props []Property
   119  	if createTimeOK {
   120  		b, err := json.Marshal(createTime)
   121  		if err != nil {
   122  			return nil, fmt.Errorf("analyze create timestamp prop: %w", err)
   123  		}
   124  		props = append(props, Property{
   125  			Name:               filters.InternalPropCreationTimeUnix,
   126  			Items:              []Countable{{Data: b}},
   127  			HasFilterableIndex: HasFilterableIndexTimestampProp,
   128  			HasSearchableIndex: HasSearchableIndexTimestampProp,
   129  		})
   130  	}
   131  
   132  	if updateTimeOK {
   133  		b, err := json.Marshal(updateTime)
   134  		if err != nil {
   135  			return nil, fmt.Errorf("analyze update timestamp prop: %w", err)
   136  		}
   137  		props = append(props, Property{
   138  			Name:               filters.InternalPropLastUpdateTimeUnix,
   139  			Items:              []Countable{{Data: b}},
   140  			HasFilterableIndex: HasFilterableIndexTimestampProp,
   141  			HasSearchableIndex: HasSearchableIndexTimestampProp,
   142  		})
   143  	}
   144  
   145  	return props, nil
   146  }
   147  
   148  func (a *Analyzer) extendPropertiesWithArrayType(properties *[]Property,
   149  	prop *models.Property, input map[string]any, propName string,
   150  ) error {
   151  	value, ok := input[propName]
   152  	if !ok {
   153  		// skip any primitive prop that's not set
   154  		return nil
   155  	}
   156  
   157  	var err error
   158  	value, err = typedSliceToUntyped(value)
   159  	if err != nil {
   160  		return fmt.Errorf("extend properties with array type: %w", err)
   161  	}
   162  
   163  	values, ok := value.([]any)
   164  	if !ok {
   165  		// skip any primitive prop that's not set
   166  		return errors.New("analyze array prop: expected array prop")
   167  	}
   168  
   169  	property, err := a.analyzeArrayProp(prop, values)
   170  	if err != nil {
   171  		return fmt.Errorf("analyze array prop: %w", err)
   172  	}
   173  	if property == nil {
   174  		return nil
   175  	}
   176  
   177  	*properties = append(*properties, *property)
   178  	return nil
   179  }
   180  
   181  // extendPropertiesWithPrimitive mutates the passed in properties, by extending
   182  // it with an additional property - if applicable
   183  func (a *Analyzer) extendPropertiesWithPrimitive(properties *[]Property,
   184  	prop *models.Property, input map[string]any, propName string,
   185  ) error {
   186  	var property *Property
   187  	var err error
   188  
   189  	value, ok := input[propName]
   190  	if !ok {
   191  		// skip any primitive prop that's not set
   192  		return nil
   193  	}
   194  	property, err = a.analyzePrimitiveProp(prop, value)
   195  	if err != nil {
   196  		return fmt.Errorf("analyze primitive prop: %w", err)
   197  	}
   198  	if property == nil {
   199  		return nil
   200  	}
   201  
   202  	*properties = append(*properties, *property)
   203  	return nil
   204  }
   205  
   206  func (a *Analyzer) analyzeArrayProp(prop *models.Property, values []any) (*Property, error) {
   207  	var items []Countable
   208  	hasFilterableIndex := HasFilterableIndex(prop)
   209  	hasSearchableIndex := HasSearchableIndex(prop)
   210  
   211  	switch dt := schema.DataType(prop.DataType[0]); dt {
   212  	case schema.DataTypeTextArray:
   213  		hasFilterableIndex = hasFilterableIndex && !a.isFallbackToSearchable()
   214  		in, err := stringsFromValues(prop, values)
   215  		if err != nil {
   216  			return nil, err
   217  		}
   218  		items = a.TextArray(prop.Tokenization, in)
   219  	case schema.DataTypeIntArray:
   220  		in := make([]int64, len(values))
   221  		for i, value := range values {
   222  			if asJsonNumber, ok := value.(json.Number); ok {
   223  				var err error
   224  				value, err = asJsonNumber.Float64()
   225  				if err != nil {
   226  					return nil, err
   227  				}
   228  			}
   229  
   230  			if asFloat, ok := value.(float64); ok {
   231  				// unmarshaling from json into a dynamic schema will assume every number
   232  				// is a float64
   233  				value = int64(asFloat)
   234  			}
   235  
   236  			asInt, ok := value.(int64)
   237  			if !ok {
   238  				return nil, fmt.Errorf("expected property %s to be of type int64, but got %T", prop.Name, value)
   239  			}
   240  			in[i] = asInt
   241  		}
   242  
   243  		var err error
   244  		items, err = a.IntArray(in)
   245  		if err != nil {
   246  			return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err)
   247  		}
   248  	case schema.DataTypeNumberArray:
   249  		in := make([]float64, len(values))
   250  		for i, value := range values {
   251  			if asJsonNumber, ok := value.(json.Number); ok {
   252  				var err error
   253  				value, err = asJsonNumber.Float64()
   254  				if err != nil {
   255  					return nil, err
   256  				}
   257  			}
   258  
   259  			asFloat, ok := value.(float64)
   260  			if !ok {
   261  				return nil, fmt.Errorf("expected property %s to be of type float64, but got %T", prop.Name, value)
   262  			}
   263  			in[i] = asFloat
   264  		}
   265  
   266  		var err error
   267  		items, err = a.FloatArray(in) // convert to int before analyzing
   268  		if err != nil {
   269  			return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err)
   270  		}
   271  	case schema.DataTypeBooleanArray:
   272  		in := make([]bool, len(values))
   273  		for i, value := range values {
   274  			asBool, ok := value.(bool)
   275  			if !ok {
   276  				return nil, fmt.Errorf("expected property %s to be of type bool, but got %T", prop.Name, value)
   277  			}
   278  			in[i] = asBool
   279  		}
   280  
   281  		var err error
   282  		items, err = a.BoolArray(in) // convert to int before analyzing
   283  		if err != nil {
   284  			return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err)
   285  		}
   286  	case schema.DataTypeDateArray:
   287  		in := make([]int64, len(values))
   288  		for i, value := range values {
   289  			// dates can be either a date-string or directly a time object. Try to parse both
   290  			if asTime, okTime := value.(time.Time); okTime {
   291  				in[i] = asTime.UnixNano()
   292  			} else if asString, okString := value.(string); okString {
   293  				parsedTime, err := time.Parse(time.RFC3339Nano, asString)
   294  				if err != nil {
   295  					return nil, fmt.Errorf("parse time: %w", err)
   296  				}
   297  				in[i] = parsedTime.UnixNano()
   298  			} else {
   299  				return nil, fmt.Errorf("expected property %s to be a time-string or time object, but got %T", prop.Name, value)
   300  			}
   301  		}
   302  
   303  		var err error
   304  		items, err = a.IntArray(in)
   305  		if err != nil {
   306  			return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err)
   307  		}
   308  	case schema.DataTypeUUIDArray:
   309  		parsed, err := validation.ParseUUIDArray(values)
   310  		if err != nil {
   311  			return nil, fmt.Errorf("parse uuid array: %w", err)
   312  		}
   313  
   314  		items, err = a.UUIDArray(parsed)
   315  		if err != nil {
   316  			return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err)
   317  		}
   318  
   319  	default:
   320  		// ignore unsupported prop type
   321  		return nil, nil
   322  	}
   323  
   324  	return &Property{
   325  		Name:               prop.Name,
   326  		Items:              items,
   327  		Length:             len(values),
   328  		HasFilterableIndex: hasFilterableIndex,
   329  		HasSearchableIndex: hasSearchableIndex,
   330  	}, nil
   331  }
   332  
   333  func stringsFromValues(prop *models.Property, values []any) ([]string, error) {
   334  	in := make([]string, len(values))
   335  	for i, value := range values {
   336  		asString, ok := value.(string)
   337  		if !ok {
   338  			return nil, fmt.Errorf("expected property %s to be of type string, but got %T", prop.Name, value)
   339  		}
   340  		in[i] = asString
   341  	}
   342  	return in, nil
   343  }
   344  
   345  func (a *Analyzer) analyzePrimitiveProp(prop *models.Property, value any) (*Property, error) {
   346  	var items []Countable
   347  	propertyLength := -1 // will be overwritten for string/text, signals not to add the other types.
   348  	hasFilterableIndex := HasFilterableIndex(prop)
   349  	hasSearchableIndex := HasSearchableIndex(prop)
   350  
   351  	switch dt := schema.DataType(prop.DataType[0]); dt {
   352  	case schema.DataTypeText:
   353  		hasFilterableIndex = hasFilterableIndex && !a.isFallbackToSearchable()
   354  		asString, ok := value.(string)
   355  		if !ok {
   356  			return nil, fmt.Errorf("expected property %s to be of type string, but got %T", prop.Name, value)
   357  		}
   358  		items = a.Text(prop.Tokenization, asString)
   359  		propertyLength = utf8.RuneCountInString(asString)
   360  	case schema.DataTypeInt:
   361  		if asFloat, ok := value.(float64); ok {
   362  			// unmarshaling from json into a dynamic schema will assume every number
   363  			// is a float64
   364  			value = int64(asFloat)
   365  		}
   366  
   367  		if asInt, ok := value.(int); ok {
   368  			// when merging an existing object we may retrieve an untyped int
   369  			value = int64(asInt)
   370  		}
   371  
   372  		asInt, ok := value.(int64)
   373  		if !ok {
   374  			return nil, fmt.Errorf("expected property %s to be of type int64, but got %T", prop.Name, value)
   375  		}
   376  
   377  		var err error
   378  		items, err = a.Int(asInt)
   379  		if err != nil {
   380  			return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err)
   381  		}
   382  	case schema.DataTypeNumber:
   383  		asFloat, ok := value.(float64)
   384  		if !ok {
   385  			return nil, fmt.Errorf("expected property %s to be of type float64, but got %T", prop.Name, value)
   386  		}
   387  
   388  		var err error
   389  		items, err = a.Float(asFloat) // convert to int before analyzing
   390  		if err != nil {
   391  			return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err)
   392  		}
   393  	case schema.DataTypeBoolean:
   394  		asBool, ok := value.(bool)
   395  		if !ok {
   396  			return nil, fmt.Errorf("expected property %s to be of type bool, but got %T", prop.Name, value)
   397  		}
   398  
   399  		var err error
   400  		items, err = a.Bool(asBool) // convert to int before analyzing
   401  		if err != nil {
   402  			return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err)
   403  		}
   404  	case schema.DataTypeDate:
   405  		var err error
   406  		if asString, ok := value.(string); ok {
   407  			// for example when patching the date may have been loaded as a string
   408  			value, err = time.Parse(time.RFC3339Nano, asString)
   409  			if err != nil {
   410  				return nil, fmt.Errorf("parse stringified timestamp: %w", err)
   411  			}
   412  		}
   413  		asTime, ok := value.(time.Time)
   414  		if !ok {
   415  			return nil, fmt.Errorf("expected property %s to be time.Time, but got %T", prop.Name, value)
   416  		}
   417  
   418  		items, err = a.Int(asTime.UnixNano())
   419  		if err != nil {
   420  			return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err)
   421  		}
   422  	case schema.DataTypeUUID:
   423  		var err error
   424  
   425  		if asString, ok := value.(string); ok {
   426  			// for example when patching the uuid may have been loaded as a string
   427  			value, err = uuid.Parse(asString)
   428  			if err != nil {
   429  				return nil, fmt.Errorf("parse stringified uuid: %w", err)
   430  			}
   431  		}
   432  
   433  		asUUID, ok := value.(uuid.UUID)
   434  		if !ok {
   435  			return nil, fmt.Errorf("expected property %s to be uuid.UUID, but got %T", prop.Name, value)
   436  		}
   437  
   438  		items, err = a.UUID(asUUID)
   439  		if err != nil {
   440  			return nil, fmt.Errorf("analyze property %s: %w", prop.Name, err)
   441  		}
   442  	default:
   443  		// ignore unsupported prop type
   444  		return nil, nil
   445  	}
   446  
   447  	return &Property{
   448  		Name:               prop.Name,
   449  		Items:              items,
   450  		Length:             propertyLength,
   451  		HasFilterableIndex: hasFilterableIndex,
   452  		HasSearchableIndex: hasSearchableIndex,
   453  	}, nil
   454  }
   455  
   456  // extendPropertiesWithReference extends the specified properties arrays with
   457  // either 1 or 2 entries: If the ref is not set, only the ref-count property
   458  // will be added. If the ref is set the ref-prop itself will also be added and
   459  // contain all references as values
   460  func (a *Analyzer) extendPropertiesWithReference(properties *[]Property,
   461  	prop *models.Property, input map[string]any, propName string,
   462  ) error {
   463  	value, ok := input[propName]
   464  	if !ok {
   465  		// explicitly set zero-value, so we can index for "ref not set"
   466  		value = make(models.MultipleRef, 0)
   467  	}
   468  
   469  	var asRefs models.MultipleRef
   470  	asRefs, ok = value.(models.MultipleRef)
   471  	if !ok {
   472  		// due to the fix introduced in https://github.com/weaviate/weaviate/pull/2320,
   473  		// MultipleRef's can appear as empty []any when no actual refs are provided for
   474  		// an object's reference property.
   475  		//
   476  		// if we encounter []any, assume it indicates an empty ref prop, and skip it.
   477  		_, ok := value.([]any)
   478  		if !ok {
   479  			return fmt.Errorf("expected property %q to be of type models.MutlipleRef,"+
   480  				" but got %T", prop.Name, value)
   481  		}
   482  		return nil
   483  	}
   484  
   485  	property, err := a.analyzeRefPropCount(prop, asRefs)
   486  	if err != nil {
   487  		return fmt.Errorf("ref count: %w", err)
   488  	}
   489  
   490  	*properties = append(*properties, *property)
   491  
   492  	if len(asRefs) == 0 {
   493  		return nil
   494  	}
   495  
   496  	property, err = a.analyzeRefProp(prop, asRefs)
   497  	if err != nil {
   498  		return fmt.Errorf("refs: %w", err)
   499  	}
   500  
   501  	*properties = append(*properties, *property)
   502  	return nil
   503  }
   504  
   505  func (a *Analyzer) analyzeRefPropCount(prop *models.Property,
   506  	value models.MultipleRef,
   507  ) (*Property, error) {
   508  	items, err := a.RefCount(value)
   509  	if err != nil {
   510  		return nil, fmt.Errorf("analyze ref-property %q: %w", prop.Name, err)
   511  	}
   512  
   513  	return &Property{
   514  		Name:               helpers.MetaCountProp(prop.Name),
   515  		Items:              items,
   516  		Length:             len(value),
   517  		HasFilterableIndex: HasFilterableIndex(prop),
   518  		HasSearchableIndex: HasSearchableIndex(prop),
   519  	}, nil
   520  }
   521  
   522  func (a *Analyzer) analyzeRefProp(prop *models.Property,
   523  	value models.MultipleRef,
   524  ) (*Property, error) {
   525  	items, err := a.Ref(value)
   526  	if err != nil {
   527  		return nil, fmt.Errorf("analyze ref-property %q: %w", prop.Name, err)
   528  	}
   529  
   530  	return &Property{
   531  		Name:               prop.Name,
   532  		Items:              items,
   533  		HasFilterableIndex: HasFilterableIndex(prop),
   534  		HasSearchableIndex: HasSearchableIndex(prop),
   535  	}, nil
   536  }
   537  
   538  func typedSliceToUntyped(in any) ([]any, error) {
   539  	switch typed := in.(type) {
   540  	case []any:
   541  		// nothing to do
   542  		return typed, nil
   543  	case []string:
   544  		return convertToUntyped[string](typed), nil
   545  	case []int:
   546  		return convertToUntyped[int](typed), nil
   547  	case []time.Time:
   548  		return convertToUntyped[time.Time](typed), nil
   549  	case []bool:
   550  		return convertToUntyped[bool](typed), nil
   551  	case []float64:
   552  		return convertToUntyped[float64](typed), nil
   553  	case []uuid.UUID:
   554  		return convertToUntyped[uuid.UUID](typed), nil
   555  	default:
   556  		return nil, errors.Errorf("unsupported type %T", in)
   557  	}
   558  }
   559  
   560  func convertToUntyped[T comparable](in []T) []any {
   561  	out := make([]any, len(in))
   562  	for i := range out {
   563  		out[i] = in[i]
   564  	}
   565  	return out
   566  }
   567  
   568  // Indicates whether property should be indexed
   569  // Index holds document ids with property of/containing particular value
   570  // and number of its occurrences in that property
   571  // (index created using bucket of StrategyMapCollection)
   572  func HasSearchableIndex(prop *models.Property) bool {
   573  	switch dt, _ := schema.AsPrimitive(prop.DataType); dt {
   574  	case schema.DataTypeText, schema.DataTypeTextArray:
   575  		// by default property has searchable index only for text/text[] props
   576  		if prop.IndexSearchable == nil {
   577  			return true
   578  		}
   579  		return *prop.IndexSearchable
   580  	default:
   581  		return false
   582  	}
   583  }
   584  
   585  // Indicates whether property should be indexed
   586  // Index holds document ids with property of/containing particular value
   587  // (index created using bucket of StrategyRoaringSet)
   588  func HasFilterableIndex(prop *models.Property) bool {
   589  	// by default property has filterable index
   590  	if prop.IndexFilterable == nil {
   591  		return true
   592  	}
   593  	return *prop.IndexFilterable
   594  }
   595  
   596  func HasInvertedIndex(prop *models.Property) bool {
   597  	return HasFilterableIndex(prop) || HasSearchableIndex(prop)
   598  }
   599  
   600  const (
   601  	// always
   602  	HasFilterableIndexIdProp = true
   603  	HasSearchableIndexIdProp = false
   604  
   605  	// only if index.invertedIndexConfig.IndexTimestamps set
   606  	HasFilterableIndexTimestampProp = true
   607  	HasSearchableIndexTimestampProp = false
   608  
   609  	// only if property.indexFilterable or property.indexSearchable set
   610  	HasFilterableIndexMetaCount = true
   611  	HasSearchableIndexMetaCount = false
   612  
   613  	// only if index.invertedIndexConfig.IndexNullState set
   614  	// and either property.indexFilterable or property.indexSearchable set
   615  	HasFilterableIndexPropNull = true
   616  	HasSearchableIndexPropNull = false
   617  
   618  	// only if index.invertedIndexConfig.IndexPropertyLength set
   619  	// and either property.indexFilterable or property.indexSearchable set
   620  	HasFilterableIndexPropLength = true
   621  	HasSearchableIndexPropLength = false
   622  )