github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/merge_integration_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  //go:build integrationTest
    13  // +build integrationTest
    14  
    15  package db
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"testing"
    21  	"time"
    22  
    23  	"github.com/go-openapi/strfmt"
    24  	"github.com/google/uuid"
    25  	"github.com/sirupsen/logrus"
    26  	"github.com/stretchr/testify/assert"
    27  	"github.com/stretchr/testify/require"
    28  	"github.com/weaviate/weaviate/entities/additional"
    29  	"github.com/weaviate/weaviate/entities/dto"
    30  	"github.com/weaviate/weaviate/entities/filters"
    31  	"github.com/weaviate/weaviate/entities/models"
    32  	"github.com/weaviate/weaviate/entities/schema"
    33  	"github.com/weaviate/weaviate/entities/schema/crossref"
    34  	enthnsw "github.com/weaviate/weaviate/entities/vectorindex/hnsw"
    35  	"github.com/weaviate/weaviate/usecases/objects"
    36  )
    37  
    38  func Test_MergingObjects(t *testing.T) {
    39  	dirName := t.TempDir()
    40  
    41  	logger := logrus.New()
    42  	schemaGetter := &fakeSchemaGetter{
    43  		schema:     schema.Schema{Objects: &models.Schema{Classes: nil}},
    44  		shardState: singleShardState(),
    45  	}
    46  	repo, err := New(logger, Config{
    47  		MemtablesFlushDirtyAfter:  60,
    48  		RootPath:                  dirName,
    49  		MaxImportGoroutinesFactor: 1,
    50  		TrackVectorDimensions:     true,
    51  	}, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, &fakeReplicationClient{}, nil)
    52  	require.Nil(t, err)
    53  	repo.SetSchemaGetter(schemaGetter)
    54  	require.Nil(t, repo.WaitForStartup(testCtx()))
    55  	defer repo.Shutdown(context.Background())
    56  	migrator := NewMigrator(repo, logger)
    57  
    58  	sch := schema.Schema{
    59  		Objects: &models.Schema{
    60  			Classes: []*models.Class{
    61  				{
    62  					Class:               "MergeTestTarget",
    63  					VectorIndexConfig:   enthnsw.NewDefaultUserConfig(),
    64  					InvertedIndexConfig: invertedConfig(),
    65  					Properties: []*models.Property{
    66  						{
    67  							Name:         "name",
    68  							DataType:     schema.DataTypeText.PropString(),
    69  							Tokenization: models.PropertyTokenizationWhitespace,
    70  						},
    71  					},
    72  				},
    73  				{
    74  					Class:               "MergeTestSource",
    75  					VectorIndexConfig:   enthnsw.NewDefaultUserConfig(),
    76  					InvertedIndexConfig: invertedConfig(),
    77  					Properties: []*models.Property{ // tries to have "one of each property type"
    78  						{
    79  							Name:         "string",
    80  							DataType:     schema.DataTypeText.PropString(),
    81  							Tokenization: models.PropertyTokenizationWhitespace,
    82  						},
    83  						{
    84  							Name:     "text",
    85  							DataType: []string{"text"},
    86  						},
    87  						{
    88  							Name:     "number",
    89  							DataType: []string{"number"},
    90  						},
    91  						{
    92  							Name:     "int",
    93  							DataType: []string{"int"},
    94  						},
    95  						{
    96  							Name:     "date",
    97  							DataType: []string{"date"},
    98  						},
    99  						{
   100  							Name:     "geo",
   101  							DataType: []string{"geoCoordinates"},
   102  						},
   103  						{
   104  							Name:     "toTarget",
   105  							DataType: []string{"MergeTestTarget"},
   106  						},
   107  					},
   108  				},
   109  				{
   110  					Class:               "MergeTestNoVector",
   111  					VectorIndexConfig:   enthnsw.NewDefaultUserConfig(),
   112  					InvertedIndexConfig: invertedConfig(),
   113  					Properties: []*models.Property{
   114  						{
   115  							Name:         "foo",
   116  							DataType:     schema.DataTypeText.PropString(),
   117  							Tokenization: models.PropertyTokenizationWhitespace,
   118  						},
   119  					},
   120  				},
   121  			},
   122  		},
   123  	}
   124  
   125  	t.Run("add required classes", func(t *testing.T) {
   126  		for _, class := range sch.Objects.Classes {
   127  			t.Run(fmt.Sprintf("add %s", class.Class), func(t *testing.T) {
   128  				err := migrator.AddClass(context.Background(), class, schemaGetter.shardState)
   129  				require.Nil(t, err)
   130  			})
   131  		}
   132  	})
   133  
   134  	schemaGetter.schema = sch
   135  
   136  	target1 := strfmt.UUID("897be7cc-1ae1-4b40-89d9-d3ea98037638")
   137  	target2 := strfmt.UUID("5cc94aba-93e4-408a-ab19-3d803216a04e")
   138  	target3 := strfmt.UUID("81982705-8b1e-4228-b84c-911818d7ee85")
   139  	target4 := strfmt.UUID("7f69c263-17f4-4529-a54d-891a7c008ca4")
   140  	sourceID := strfmt.UUID("8738ddd5-a0ed-408d-a5d6-6f818fd56be6")
   141  	noVecID := strfmt.UUID("b4933761-88b2-4666-856d-298eb1ad0a59")
   142  
   143  	t.Run("add objects", func(t *testing.T) {
   144  		now := time.Now().UnixNano() / int64(time.Millisecond)
   145  		err := repo.PutObject(context.Background(), &models.Object{
   146  			ID:    sourceID,
   147  			Class: "MergeTestSource",
   148  			Properties: map[string]interface{}{
   149  				"string": "only the string prop set",
   150  			},
   151  			CreationTimeUnix:   now,
   152  			LastUpdateTimeUnix: now,
   153  		}, []float32{0.5}, nil, nil)
   154  		require.Nil(t, err)
   155  
   156  		targetDimensionsBefore := GetDimensionsFromRepo(repo, "MergeTestTarget")
   157  
   158  		targets := []strfmt.UUID{target1, target2, target3, target4}
   159  
   160  		for i, target := range targets {
   161  			err = repo.PutObject(context.Background(), &models.Object{
   162  				ID:    target,
   163  				Class: "MergeTestTarget",
   164  				Properties: map[string]interface{}{
   165  					"name": fmt.Sprintf("target item %d", i),
   166  				},
   167  			}, []float32{0.5}, nil, nil)
   168  			require.Nil(t, err)
   169  		}
   170  
   171  		targetDimensionsAfter := GetDimensionsFromRepo(repo, "MergeTestTarget")
   172  		require.Equal(t, targetDimensionsBefore+4, targetDimensionsAfter)
   173  
   174  		err = repo.PutObject(context.Background(), &models.Object{
   175  			ID:    noVecID,
   176  			Class: "MergeTestNoVector",
   177  			Properties: map[string]interface{}{
   178  				"foo": "bar",
   179  			},
   180  			CreationTimeUnix:   now,
   181  			LastUpdateTimeUnix: now,
   182  		}, nil, nil, nil)
   183  		require.Nil(t, err)
   184  
   185  		targetDimensionsAfterNoVec := GetDimensionsFromRepo(repo, "MergeTestTarget")
   186  		require.Equal(t, targetDimensionsAfter, targetDimensionsAfterNoVec)
   187  	})
   188  
   189  	var lastUpdateTimeUnix int64
   190  
   191  	t.Run("fetch original object's update timestamp", func(t *testing.T) {
   192  		source, err := repo.ObjectByID(context.Background(), sourceID, nil, additional.Properties{
   193  			LastUpdateTimeUnix: true,
   194  		}, "")
   195  		require.Nil(t, err)
   196  
   197  		lastUpdateTimeUnix = source.Object().LastUpdateTimeUnix
   198  		require.NotEmpty(t, lastUpdateTimeUnix)
   199  	})
   200  
   201  	t.Run("merge other previously unset properties into it", func(t *testing.T) {
   202  		// give the lastUpdateTimeUnix time to be different.
   203  		// on some machines this may not be needed, but for
   204  		// faster processors, the difference is undetectable
   205  		time.Sleep(time.Millisecond)
   206  
   207  		md := objects.MergeDocument{
   208  			Class: "MergeTestSource",
   209  			ID:    sourceID,
   210  			PrimitiveSchema: map[string]interface{}{
   211  				"number": 7.0,
   212  				"int":    int64(9),
   213  				"geo": &models.GeoCoordinates{
   214  					Latitude:  ptFloat32(30.2),
   215  					Longitude: ptFloat32(60.2),
   216  				},
   217  				"text": "some text",
   218  			},
   219  			UpdateTime: time.Now().UnixNano() / int64(time.Millisecond),
   220  		}
   221  
   222  		err := repo.Merge(context.Background(), md, nil, "")
   223  		assert.Nil(t, err)
   224  	})
   225  
   226  	t.Run("compare merge object's update time with original", func(t *testing.T) {
   227  		source, err := repo.ObjectByID(context.Background(), sourceID, nil, additional.Properties{
   228  			LastUpdateTimeUnix: true,
   229  		}, "")
   230  		require.Nil(t, err)
   231  
   232  		assert.Greater(t, source.Object().LastUpdateTimeUnix, lastUpdateTimeUnix)
   233  	})
   234  
   235  	t.Run("check that the object was successfully merged", func(t *testing.T) {
   236  		source, err := repo.ObjectByID(context.Background(), sourceID, nil, additional.Properties{}, "")
   237  		require.Nil(t, err)
   238  
   239  		sch := source.Object().Properties.(map[string]interface{})
   240  		expectedSchema := map[string]interface{}{
   241  			// from original
   242  			"string": "only the string prop set",
   243  
   244  			// from merge
   245  			"number": 7.0,
   246  			"int":    float64(9),
   247  			"geo": &models.GeoCoordinates{
   248  				Latitude:  ptFloat32(30.2),
   249  				Longitude: ptFloat32(60.2),
   250  			},
   251  			"text": "some text",
   252  		}
   253  
   254  		assert.Equal(t, expectedSchema, sch)
   255  	})
   256  
   257  	t.Run("trying to merge from non-existing index", func(t *testing.T) {
   258  		md := objects.MergeDocument{
   259  			Class: "WrongClass",
   260  			ID:    sourceID,
   261  			PrimitiveSchema: map[string]interface{}{
   262  				"number": 7.0,
   263  			},
   264  		}
   265  
   266  		err := repo.Merge(context.Background(), md, nil, "")
   267  		assert.Equal(t, fmt.Errorf(
   268  			"merge from non-existing index for WrongClass"), err)
   269  	})
   270  	t.Run("add a reference and replace one prop", func(t *testing.T) {
   271  		source, err := crossref.ParseSource(fmt.Sprintf(
   272  			"weaviate://localhost/MergeTestSource/%s/toTarget", sourceID))
   273  		require.Nil(t, err)
   274  		targets := []strfmt.UUID{target1}
   275  		refs := make(objects.BatchReferences, len(targets))
   276  		for i, target := range targets {
   277  			to, err := crossref.Parse(fmt.Sprintf("weaviate://localhost/%s", target))
   278  			require.Nil(t, err)
   279  			refs[i] = objects.BatchReference{
   280  				Err:  nil,
   281  				From: source,
   282  				To:   to,
   283  			}
   284  		}
   285  		md := objects.MergeDocument{
   286  			Class: "MergeTestSource",
   287  			ID:    sourceID,
   288  			PrimitiveSchema: map[string]interface{}{
   289  				"string": "let's update the string prop",
   290  			},
   291  			References: refs,
   292  		}
   293  		err = repo.Merge(context.Background(), md, nil, "")
   294  		assert.Nil(t, err)
   295  	})
   296  
   297  	t.Run("check that the object was successfully merged", func(t *testing.T) {
   298  		source, err := repo.ObjectByID(context.Background(), sourceID, nil, additional.Properties{}, "")
   299  		require.Nil(t, err)
   300  
   301  		ref, err := crossref.Parse(fmt.Sprintf("weaviate://localhost/%s", target1))
   302  		require.Nil(t, err)
   303  
   304  		sch := source.Object().Properties.(map[string]interface{})
   305  		expectedSchema := map[string]interface{}{
   306  			"string": "let's update the string prop",
   307  			"number": 7.0,
   308  			"int":    float64(9),
   309  			"geo": &models.GeoCoordinates{
   310  				Latitude:  ptFloat32(30.2),
   311  				Longitude: ptFloat32(60.2),
   312  			},
   313  			"text": "some text",
   314  			"toTarget": models.MultipleRef{
   315  				ref.SingleRef(),
   316  			},
   317  		}
   318  
   319  		assert.Equal(t, expectedSchema, sch)
   320  	})
   321  
   322  	t.Run("add more references in rapid succession", func(t *testing.T) {
   323  		// this test case prevents a regression on gh-1016
   324  		source, err := crossref.ParseSource(fmt.Sprintf(
   325  			"weaviate://localhost/MergeTestSource/%s/toTarget", sourceID))
   326  		require.Nil(t, err)
   327  		targets := []strfmt.UUID{target2, target3, target4}
   328  		refs := make(objects.BatchReferences, len(targets))
   329  		for i, target := range targets {
   330  			to, err := crossref.Parse(fmt.Sprintf("weaviate://localhost/%s", target))
   331  			require.Nil(t, err)
   332  			refs[i] = objects.BatchReference{
   333  				Err:  nil,
   334  				From: source,
   335  				To:   to,
   336  			}
   337  		}
   338  		md := objects.MergeDocument{
   339  			Class:      "MergeTestSource",
   340  			ID:         sourceID,
   341  			References: refs,
   342  		}
   343  		err = repo.Merge(context.Background(), md, nil, "")
   344  		assert.Nil(t, err)
   345  	})
   346  
   347  	t.Run("check all references are now present", func(t *testing.T) {
   348  		source, err := repo.ObjectByID(context.Background(), sourceID, nil, additional.Properties{}, "")
   349  		require.Nil(t, err)
   350  
   351  		refs := source.Object().Properties.(map[string]interface{})["toTarget"]
   352  		refsSlice, ok := refs.(models.MultipleRef)
   353  		require.True(t, ok, fmt.Sprintf("toTarget must be models.MultipleRef, but got %#v", refs))
   354  
   355  		foundBeacons := []string{}
   356  		for _, ref := range refsSlice {
   357  			foundBeacons = append(foundBeacons, ref.Beacon.String())
   358  		}
   359  		expectedBeacons := []string{
   360  			fmt.Sprintf("weaviate://localhost/%s", target1),
   361  			fmt.Sprintf("weaviate://localhost/%s", target2),
   362  			fmt.Sprintf("weaviate://localhost/%s", target3),
   363  			fmt.Sprintf("weaviate://localhost/%s", target4),
   364  		}
   365  
   366  		assert.ElementsMatch(t, foundBeacons, expectedBeacons)
   367  	})
   368  
   369  	t.Run("merge object with no vector", func(t *testing.T) {
   370  		err = repo.Merge(context.Background(), objects.MergeDocument{
   371  			Class:           "MergeTestNoVector",
   372  			ID:              noVecID,
   373  			PrimitiveSchema: map[string]interface{}{"foo": "baz"},
   374  		}, nil, "")
   375  		require.Nil(t, err)
   376  
   377  		orig, err := repo.ObjectByID(context.Background(), noVecID, nil, additional.Properties{}, "")
   378  		require.Nil(t, err)
   379  
   380  		expectedSchema := map[string]interface{}{
   381  			"foo": "baz",
   382  			"id":  noVecID,
   383  		}
   384  
   385  		assert.Equal(t, expectedSchema, orig.Schema)
   386  	})
   387  }
   388  
   389  // This prevents a regression on
   390  // https://github.com/weaviate/weaviate/issues/2193
   391  //
   392  // Prior to the fix it was possible that a prop that was not touched during the
   393  // merge (and therefore only loaded from disk) failed during the
   394  // inverted-indexing for the new doc id. This was then hidden by the fact that
   395  // error handling was broken inside the inverted.Analyzer. This test tries to
   396  // make sure that every possible property type stays intact if untouched
   397  // during a Merge operation
   398  //
   399  // To achieve this, every prop in this class exists twice, once with the prefix
   400  // 'touched_' and once with 'untouched_'. In the initial insert both properties
   401  // contain the same value, but then during the patch merge, the 'touched_'
   402  // properties are updated to a different value while the 'untouched_'
   403  // properties are left untouched. Then we try to retrieve the object through a
   404  // filter matching each property. The 'untouched_' properties are matched with
   405  // the original value, the 'touched_' props are matched with the updated ones
   406  func Test_Merge_UntouchedPropsCorrectlyIndexed(t *testing.T) {
   407  	dirName := t.TempDir()
   408  
   409  	logger := logrus.New()
   410  	schemaGetter := &fakeSchemaGetter{
   411  		schema:     schema.Schema{Objects: &models.Schema{Classes: nil}},
   412  		shardState: singleShardState(),
   413  	}
   414  	repo, err := New(logger, Config{
   415  		MemtablesFlushDirtyAfter:  60,
   416  		RootPath:                  dirName,
   417  		MaxImportGoroutinesFactor: 1,
   418  		QueryMaximumResults:       10000,
   419  		TrackVectorDimensions:     true,
   420  	}, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, &fakeReplicationClient{}, nil)
   421  	require.Nil(t, err)
   422  	repo.SetSchemaGetter(schemaGetter)
   423  	require.Nil(t, repo.WaitForStartup(testCtx()))
   424  	defer repo.Shutdown(context.Background())
   425  	migrator := NewMigrator(repo, logger)
   426  	hnswConfig := enthnsw.NewDefaultUserConfig()
   427  	hnswConfig.Skip = true
   428  	sch := schema.Schema{
   429  		Objects: &models.Schema{
   430  			Classes: []*models.Class{
   431  				{
   432  					Class:               "TestClass",
   433  					VectorIndexConfig:   hnswConfig,
   434  					InvertedIndexConfig: invertedConfig(),
   435  					Properties: []*models.Property{ // tries to have "one of each property type"
   436  						{
   437  							Name:         "untouched_string",
   438  							DataType:     schema.DataTypeText.PropString(),
   439  							Tokenization: models.PropertyTokenizationWhitespace,
   440  						},
   441  						{
   442  							Name:         "touched_string",
   443  							DataType:     schema.DataTypeText.PropString(),
   444  							Tokenization: models.PropertyTokenizationWhitespace,
   445  						},
   446  						{
   447  							Name:         "untouched_string_array",
   448  							DataType:     schema.DataTypeTextArray.PropString(),
   449  							Tokenization: models.PropertyTokenizationWhitespace,
   450  						},
   451  						{
   452  							Name:         "touched_string_array",
   453  							DataType:     schema.DataTypeTextArray.PropString(),
   454  							Tokenization: models.PropertyTokenizationWhitespace,
   455  						},
   456  						{
   457  							Name: "untouched_text", Tokenization: "word",
   458  							DataType: []string{"text"},
   459  						},
   460  						{
   461  							Name: "touched_text", Tokenization: "word",
   462  							DataType: []string{"text"},
   463  						},
   464  						{
   465  							Name: "untouched_text_array", Tokenization: "word",
   466  							DataType: []string{"text[]"},
   467  						},
   468  						{
   469  							Name: "touched_text_array", Tokenization: "word",
   470  							DataType: []string{"text[]"},
   471  						},
   472  						{Name: "untouched_number", DataType: []string{"number"}},
   473  						{Name: "touched_number", DataType: []string{"number"}},
   474  						{Name: "untouched_number_array", DataType: []string{"number[]"}},
   475  						{Name: "touched_number_array", DataType: []string{"number[]"}},
   476  						{Name: "untouched_int", DataType: []string{"int"}},
   477  						{Name: "touched_int", DataType: []string{"int"}},
   478  						{Name: "untouched_int_array", DataType: []string{"int[]"}},
   479  						{Name: "touched_int_array", DataType: []string{"int[]"}},
   480  						{Name: "untouched_date", DataType: []string{"date"}},
   481  						{Name: "touched_date", DataType: []string{"date"}},
   482  						{Name: "untouched_date_array", DataType: []string{"date[]"}},
   483  						{Name: "touched_date_array", DataType: []string{"date[]"}},
   484  						{Name: "untouched_geo", DataType: []string{"geoCoordinates"}},
   485  						{Name: "touched_geo", DataType: []string{"geoCoordinates"}},
   486  					},
   487  				},
   488  			},
   489  		},
   490  	}
   491  
   492  	t.Run("add required classes", func(t *testing.T) {
   493  		for _, class := range sch.Objects.Classes {
   494  			t.Run(fmt.Sprintf("add %s", class.Class), func(t *testing.T) {
   495  				err := migrator.AddClass(context.Background(), class, schemaGetter.shardState)
   496  				require.Nil(t, err)
   497  			})
   498  		}
   499  	})
   500  
   501  	schemaGetter.schema = sch
   502  
   503  	t.Run("add initial object", func(t *testing.T) {
   504  		id := 0
   505  		err := repo.PutObject(context.Background(), &models.Object{
   506  			ID:    uuidFromInt(id),
   507  			Class: "TestClass",
   508  			Properties: map[string]interface{}{
   509  				"untouched_number":       float64(id),
   510  				"untouched_number_array": []interface{}{float64(id)},
   511  				"untouched_int":          id,
   512  				"untouched_int_array":    []interface{}{int64(id)},
   513  				"untouched_string":       fmt.Sprintf("%d", id),
   514  				"untouched_string_array": []string{fmt.Sprintf("%d", id)},
   515  				"untouched_text":         fmt.Sprintf("%d", id),
   516  				"untouched_text_array":   []string{fmt.Sprintf("%d", id)},
   517  				"untouched_date":         time.Unix(0, 0).Add(time.Duration(id) * time.Hour),
   518  				"untouched_date_array":   []time.Time{time.Unix(0, 0).Add(time.Duration(id) * time.Hour)},
   519  				"untouched_geo": &models.GeoCoordinates{
   520  					ptFloat32(float32(id)), ptFloat32(float32(id)),
   521  				},
   522  
   523  				"touched_number":       float64(id),
   524  				"touched_number_array": []interface{}{float64(id)},
   525  				"touched_int":          id,
   526  				"touched_int_array":    []interface{}{int64(id)},
   527  				"touched_string":       fmt.Sprintf("%d", id),
   528  				"touched_string_array": []string{fmt.Sprintf("%d", id)},
   529  				"touched_text":         fmt.Sprintf("%d", id),
   530  				"touched_text_array":   []string{fmt.Sprintf("%d", id)},
   531  				"touched_date":         time.Unix(0, 0).Add(time.Duration(id) * time.Hour),
   532  				"touched_date_array":   []time.Time{time.Unix(0, 0).Add(time.Duration(id) * time.Hour)},
   533  				"touched_geo": &models.GeoCoordinates{
   534  					ptFloat32(float32(id)), ptFloat32(float32(id)),
   535  				},
   536  			},
   537  			CreationTimeUnix:   int64(id),
   538  			LastUpdateTimeUnix: int64(id),
   539  		}, []float32{0.5}, nil, nil)
   540  		require.Nil(t, err)
   541  	})
   542  
   543  	t.Run("patch half the props (all that contain 'touched')", func(t *testing.T) {
   544  		updateID := 28
   545  		md := objects.MergeDocument{
   546  			Class: "TestClass",
   547  			ID:    uuidFromInt(0),
   548  			PrimitiveSchema: map[string]interface{}{
   549  				"touched_number":       float64(updateID),
   550  				"touched_number_array": []interface{}{float64(updateID)},
   551  				"touched_int":          updateID,
   552  				"touched_int_array":    []interface{}{int64(updateID)},
   553  				"touched_string":       fmt.Sprintf("%d", updateID),
   554  				"touched_string_array": []string{fmt.Sprintf("%d", updateID)},
   555  				"touched_text":         fmt.Sprintf("%d", updateID),
   556  				"touched_text_array":   []string{fmt.Sprintf("%d", updateID)},
   557  				"touched_date":         time.Unix(0, 0).Add(time.Duration(updateID) * time.Hour),
   558  				"touched_date_array":   []time.Time{time.Unix(0, 0).Add(time.Duration(updateID) * time.Hour)},
   559  				"touched_geo": &models.GeoCoordinates{
   560  					ptFloat32(float32(updateID)), ptFloat32(float32(updateID)),
   561  				},
   562  			},
   563  			References: nil,
   564  		}
   565  		err = repo.Merge(context.Background(), md, nil, "")
   566  		assert.Nil(t, err)
   567  	})
   568  
   569  	t.Run("retrieve by each individual prop", func(t *testing.T) {
   570  		retrieve := func(prefix string, id int) func(t *testing.T) {
   571  			return func(t *testing.T) {
   572  				type test struct {
   573  					name   string
   574  					filter *filters.LocalFilter
   575  				}
   576  
   577  				tests := []test{
   578  					{
   579  						name: "string filter",
   580  						filter: buildFilter(
   581  							fmt.Sprintf("%s_string", prefix),
   582  							fmt.Sprintf("%d", id),
   583  							eq,
   584  							schema.DataTypeText),
   585  					},
   586  					{
   587  						name: "string array filter",
   588  						filter: buildFilter(
   589  							fmt.Sprintf("%s_string_array", prefix),
   590  							fmt.Sprintf("%d", id),
   591  							eq,
   592  							schema.DataTypeText),
   593  					},
   594  					{
   595  						name: "text filter",
   596  						filter: buildFilter(
   597  							fmt.Sprintf("%s_text", prefix),
   598  							fmt.Sprintf("%d", id),
   599  							eq,
   600  							dtText),
   601  					},
   602  					{
   603  						name: "text array filter",
   604  						filter: buildFilter(
   605  							fmt.Sprintf("%s_text_array", prefix),
   606  							fmt.Sprintf("%d", id),
   607  							eq,
   608  							dtText),
   609  					},
   610  					{
   611  						name: "int filter",
   612  						filter: buildFilter(
   613  							fmt.Sprintf("%s_int", prefix), id, eq, dtInt),
   614  					},
   615  					{
   616  						name: "int array filter",
   617  						filter: buildFilter(
   618  							fmt.Sprintf("%s_int_array", prefix), id, eq, dtInt),
   619  					},
   620  					{
   621  						name: "number filter",
   622  						filter: buildFilter(
   623  							fmt.Sprintf("%s_number", prefix), float64(id), eq, dtNumber),
   624  					},
   625  					{
   626  						name: "number array filter",
   627  						filter: buildFilter(
   628  							fmt.Sprintf("%s_number_array", prefix), float64(id), eq, dtNumber),
   629  					},
   630  					{
   631  						name: "date filter",
   632  						filter: buildFilter(
   633  							fmt.Sprintf("%s_date", prefix),
   634  							time.Unix(0, 0).Add(time.Duration(id)*time.Hour),
   635  							eq, dtDate),
   636  					},
   637  					{
   638  						name: "date array filter",
   639  						filter: buildFilter(
   640  							fmt.Sprintf("%s_date_array", prefix),
   641  							time.Unix(0, 0).Add(time.Duration(id)*time.Hour),
   642  							eq, dtDate),
   643  					},
   644  					{
   645  						name: "geoFilter filter",
   646  						filter: buildFilter(
   647  							fmt.Sprintf("%s_geo", prefix),
   648  							filters.GeoRange{
   649  								GeoCoordinates: &models.GeoCoordinates{
   650  									ptFloat32(float32(id)), ptFloat32(float32(id)),
   651  								},
   652  								Distance: 2,
   653  							},
   654  							wgr, dtGeoCoordinates),
   655  					},
   656  				}
   657  
   658  				for _, tc := range tests {
   659  					t.Run(tc.name, func(t *testing.T) {
   660  						params := dto.GetParams{
   661  							ClassName:  "TestClass",
   662  							Pagination: &filters.Pagination{Limit: 5},
   663  							Filters:    tc.filter,
   664  						}
   665  						res, err := repo.VectorSearch(context.Background(), params)
   666  						require.Nil(t, err)
   667  						require.Len(t, res, 1)
   668  
   669  						// hard-code the only uuid
   670  						assert.Equal(t, uuidFromInt(0), res[0].ID)
   671  					})
   672  				}
   673  			}
   674  		}
   675  		t.Run("using untouched", retrieve("untouched", 0))
   676  		t.Run("using touched", retrieve("touched", 28))
   677  	})
   678  }
   679  
   680  func Test_MergeDocIdPreserved_PropsCorrectlyIndexed(t *testing.T) {
   681  	dirName := t.TempDir()
   682  
   683  	logger := logrus.New()
   684  	schemaGetter := &fakeSchemaGetter{
   685  		schema:     schema.Schema{Objects: &models.Schema{Classes: nil}},
   686  		shardState: singleShardState(),
   687  	}
   688  	repo, err := New(logger, Config{
   689  		MemtablesFlushDirtyAfter:  60,
   690  		RootPath:                  dirName,
   691  		MaxImportGoroutinesFactor: 1,
   692  		QueryMaximumResults:       10000,
   693  		TrackVectorDimensions:     true,
   694  	}, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, &fakeReplicationClient{}, nil)
   695  	require.Nil(t, err)
   696  	repo.SetSchemaGetter(schemaGetter)
   697  	require.Nil(t, repo.WaitForStartup(testCtx()))
   698  	defer repo.Shutdown(context.Background())
   699  	migrator := NewMigrator(repo, logger)
   700  	hnswConfig := enthnsw.NewDefaultUserConfig()
   701  	hnswConfig.Skip = true
   702  	sch := schema.Schema{
   703  		Objects: &models.Schema{
   704  			Classes: []*models.Class{
   705  				{
   706  					Class:               "TestClass",
   707  					VectorIndexConfig:   hnswConfig,
   708  					InvertedIndexConfig: invertedConfig(),
   709  					Properties: []*models.Property{ // tries to have "one of each property type"
   710  						{
   711  							Name:         "untouched_string",
   712  							DataType:     schema.DataTypeText.PropString(),
   713  							Tokenization: models.PropertyTokenizationWhitespace,
   714  						},
   715  						{
   716  							Name:         "touched_string",
   717  							DataType:     schema.DataTypeText.PropString(),
   718  							Tokenization: models.PropertyTokenizationWhitespace,
   719  						},
   720  						{
   721  							Name:         "untouched_string_array",
   722  							DataType:     schema.DataTypeTextArray.PropString(),
   723  							Tokenization: models.PropertyTokenizationWhitespace,
   724  						},
   725  						{
   726  							Name:         "touched_string_array",
   727  							DataType:     schema.DataTypeTextArray.PropString(),
   728  							Tokenization: models.PropertyTokenizationWhitespace,
   729  						},
   730  						{
   731  							Name: "untouched_text", Tokenization: "word",
   732  							DataType: []string{"text"},
   733  						},
   734  						{
   735  							Name: "touched_text", Tokenization: "word",
   736  							DataType: []string{"text"},
   737  						},
   738  						{
   739  							Name: "untouched_text_array", Tokenization: "word",
   740  							DataType: []string{"text[]"},
   741  						},
   742  						{
   743  							Name: "touched_text_array", Tokenization: "word",
   744  							DataType: []string{"text[]"},
   745  						},
   746  						{Name: "untouched_number", DataType: []string{"number"}},
   747  						{Name: "touched_number", DataType: []string{"number"}},
   748  						{Name: "untouched_number_array", DataType: []string{"number[]"}},
   749  						{Name: "touched_number_array", DataType: []string{"number[]"}},
   750  						{Name: "untouched_int", DataType: []string{"int"}},
   751  						{Name: "touched_int", DataType: []string{"int"}},
   752  						{Name: "untouched_int_array", DataType: []string{"int[]"}},
   753  						{Name: "touched_int_array", DataType: []string{"int[]"}},
   754  						{Name: "untouched_date", DataType: []string{"date"}},
   755  						{Name: "touched_date", DataType: []string{"date"}},
   756  						{Name: "untouched_date_array", DataType: []string{"date[]"}},
   757  						{Name: "touched_date_array", DataType: []string{"date[]"}},
   758  					},
   759  				},
   760  			},
   761  		},
   762  	}
   763  
   764  	t.Run("add required classes", func(t *testing.T) {
   765  		for _, class := range sch.Objects.Classes {
   766  			t.Run(fmt.Sprintf("add %s", class.Class), func(t *testing.T) {
   767  				err := migrator.AddClass(context.Background(), class, schemaGetter.shardState)
   768  				require.Nil(t, err)
   769  			})
   770  		}
   771  	})
   772  
   773  	schemaGetter.schema = sch
   774  
   775  	t.Run("add initial object", func(t *testing.T) {
   776  		id := 0
   777  		err := repo.PutObject(context.Background(), &models.Object{
   778  			ID:    uuidFromInt(id),
   779  			Class: "TestClass",
   780  			Properties: map[string]interface{}{
   781  				"untouched_number":       float64(id),
   782  				"untouched_number_array": []interface{}{float64(id)},
   783  				"untouched_int":          id,
   784  				"untouched_int_array":    []interface{}{int64(id)},
   785  				"untouched_string":       fmt.Sprintf("%d", id),
   786  				"untouched_string_array": []string{fmt.Sprintf("%d", id)},
   787  				"untouched_text":         fmt.Sprintf("%d", id),
   788  				"untouched_text_array":   []string{fmt.Sprintf("%d", id)},
   789  				"untouched_date":         time.Unix(0, 0).Add(time.Duration(id) * time.Hour),
   790  				"untouched_date_array":   []time.Time{time.Unix(0, 0).Add(time.Duration(id) * time.Hour)},
   791  
   792  				"touched_number":       float64(id),
   793  				"touched_number_array": []interface{}{float64(id)},
   794  				"touched_int":          id,
   795  				"touched_int_array":    []interface{}{int64(id)},
   796  				"touched_string":       fmt.Sprintf("%d", id),
   797  				"touched_string_array": []string{fmt.Sprintf("%d", id)},
   798  				"touched_text":         fmt.Sprintf("%d", id),
   799  				"touched_text_array":   []string{fmt.Sprintf("%d", id)},
   800  				"touched_date":         time.Unix(0, 0).Add(time.Duration(id) * time.Hour),
   801  				"touched_date_array":   []time.Time{time.Unix(0, 0).Add(time.Duration(id) * time.Hour)},
   802  			},
   803  			CreationTimeUnix:   int64(id),
   804  			LastUpdateTimeUnix: int64(id),
   805  		}, []float32{0.5}, nil, nil)
   806  		require.Nil(t, err)
   807  	})
   808  
   809  	t.Run("patch half the props (all that contain 'touched')", func(t *testing.T) {
   810  		updateID := 28
   811  		md := objects.MergeDocument{
   812  			Class: "TestClass",
   813  			ID:    uuidFromInt(0),
   814  			PrimitiveSchema: map[string]interface{}{
   815  				"touched_number":       float64(updateID),
   816  				"touched_number_array": []interface{}{float64(updateID)},
   817  				"touched_int":          updateID,
   818  				"touched_int_array":    []interface{}{int64(updateID)},
   819  				"touched_string":       fmt.Sprintf("%d", updateID),
   820  				"touched_string_array": []string{fmt.Sprintf("%d", updateID)},
   821  				"touched_text":         fmt.Sprintf("%d", updateID),
   822  				"touched_text_array":   []string{fmt.Sprintf("%d", updateID)},
   823  				"touched_date":         time.Unix(0, 0).Add(time.Duration(updateID) * time.Hour),
   824  				"touched_date_array":   []time.Time{time.Unix(0, 0).Add(time.Duration(updateID) * time.Hour)},
   825  			},
   826  			References: nil,
   827  		}
   828  		err = repo.Merge(context.Background(), md, nil, "")
   829  		assert.Nil(t, err)
   830  	})
   831  
   832  	t.Run("retrieve by each individual prop", func(t *testing.T) {
   833  		retrieve := func(prefix string, id int) func(t *testing.T) {
   834  			return func(t *testing.T) {
   835  				type test struct {
   836  					name   string
   837  					filter *filters.LocalFilter
   838  				}
   839  
   840  				tests := []test{
   841  					{
   842  						name: "string filter",
   843  						filter: buildFilter(
   844  							fmt.Sprintf("%s_string", prefix),
   845  							fmt.Sprintf("%d", id),
   846  							eq,
   847  							schema.DataTypeText),
   848  					},
   849  					{
   850  						name: "string array filter",
   851  						filter: buildFilter(
   852  							fmt.Sprintf("%s_string_array", prefix),
   853  							fmt.Sprintf("%d", id),
   854  							eq,
   855  							schema.DataTypeText),
   856  					},
   857  					{
   858  						name: "text filter",
   859  						filter: buildFilter(
   860  							fmt.Sprintf("%s_text", prefix),
   861  							fmt.Sprintf("%d", id),
   862  							eq,
   863  							dtText),
   864  					},
   865  					{
   866  						name: "text array filter",
   867  						filter: buildFilter(
   868  							fmt.Sprintf("%s_text_array", prefix),
   869  							fmt.Sprintf("%d", id),
   870  							eq,
   871  							dtText),
   872  					},
   873  					{
   874  						name: "int filter",
   875  						filter: buildFilter(
   876  							fmt.Sprintf("%s_int", prefix), id, eq, dtInt),
   877  					},
   878  					{
   879  						name: "int array filter",
   880  						filter: buildFilter(
   881  							fmt.Sprintf("%s_int_array", prefix), id, eq, dtInt),
   882  					},
   883  					{
   884  						name: "number filter",
   885  						filter: buildFilter(
   886  							fmt.Sprintf("%s_number", prefix), float64(id), eq, dtNumber),
   887  					},
   888  					{
   889  						name: "number array filter",
   890  						filter: buildFilter(
   891  							fmt.Sprintf("%s_number_array", prefix), float64(id), eq, dtNumber),
   892  					},
   893  					{
   894  						name: "date filter",
   895  						filter: buildFilter(
   896  							fmt.Sprintf("%s_date", prefix),
   897  							time.Unix(0, 0).Add(time.Duration(id)*time.Hour),
   898  							eq, dtDate),
   899  					},
   900  					{
   901  						name: "date array filter",
   902  						filter: buildFilter(
   903  							fmt.Sprintf("%s_date_array", prefix),
   904  							time.Unix(0, 0).Add(time.Duration(id)*time.Hour),
   905  							eq, dtDate),
   906  					},
   907  				}
   908  
   909  				for _, tc := range tests {
   910  					t.Run(tc.name, func(t *testing.T) {
   911  						params := dto.GetParams{
   912  							ClassName:  "TestClass",
   913  							Pagination: &filters.Pagination{Limit: 5},
   914  							Filters:    tc.filter,
   915  						}
   916  						res, err := repo.VectorSearch(context.Background(), params)
   917  						require.Nil(t, err)
   918  						require.Len(t, res, 1)
   919  
   920  						// hard-code the only uuid
   921  						assert.Equal(t, uuidFromInt(0), res[0].ID)
   922  					})
   923  				}
   924  			}
   925  		}
   926  		t.Run("using untouched", retrieve("untouched", 0))
   927  		t.Run("using touched", retrieve("touched", 28))
   928  	})
   929  }
   930  
   931  func uuidFromInt(in int) strfmt.UUID {
   932  	return strfmt.UUID(uuid.MustParse(fmt.Sprintf("%032d", in)).String())
   933  }