github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/searcher_integration_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  //go:build integrationTest
    13  
    14  package inverted
    15  
    16  import (
    17  	"bytes"
    18  	"context"
    19  	"encoding/binary"
    20  	"math"
    21  	"strings"
    22  	"testing"
    23  
    24  	"github.com/go-openapi/strfmt"
    25  	"github.com/google/uuid"
    26  	"github.com/sirupsen/logrus/hooks/test"
    27  	"github.com/stretchr/testify/assert"
    28  	"github.com/stretchr/testify/require"
    29  	"github.com/weaviate/weaviate/adapters/repos/db/helpers"
    30  	"github.com/weaviate/weaviate/adapters/repos/db/lsmkv"
    31  	"github.com/weaviate/weaviate/adapters/repos/db/roaringset"
    32  	"github.com/weaviate/weaviate/entities/additional"
    33  	"github.com/weaviate/weaviate/entities/cyclemanager"
    34  	"github.com/weaviate/weaviate/entities/filters"
    35  	"github.com/weaviate/weaviate/entities/models"
    36  	"github.com/weaviate/weaviate/entities/schema"
    37  	"github.com/weaviate/weaviate/entities/storobj"
    38  	"github.com/weaviate/weaviate/usecases/config"
    39  )
    40  
    41  func TestObjects(t *testing.T) {
    42  	var (
    43  		dirName      = t.TempDir()
    44  		logger, _    = test.NewNullLogger()
    45  		propName     = "inverted-with-frequency"
    46  		charSet      = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    47  		charRepeat   = 50
    48  		multiplier   = 10
    49  		numObjects   = len(charSet) * multiplier
    50  		docIDCounter = uint64(0)
    51  	)
    52  
    53  	store, err := lsmkv.New(dirName, dirName, logger, nil,
    54  		cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop())
    55  	require.Nil(t, err)
    56  	defer func() { assert.Nil(t, err) }()
    57  
    58  	t.Run("create buckets", func(t *testing.T) {
    59  		require.Nil(t, store.CreateOrLoadBucket(context.Background(), helpers.ObjectsBucketLSM,
    60  			lsmkv.WithStrategy(lsmkv.StrategyReplace), lsmkv.WithSecondaryIndices(1)))
    61  		require.NotNil(t, store.Bucket(helpers.ObjectsBucketLSM))
    62  
    63  		require.Nil(t, store.CreateOrLoadBucket(context.Background(),
    64  			helpers.BucketSearchableFromPropNameLSM(propName),
    65  			lsmkv.WithStrategy(lsmkv.StrategyMapCollection)))
    66  		require.NotNil(t, store.Bucket(helpers.BucketSearchableFromPropNameLSM(propName)))
    67  	})
    68  
    69  	type testCase struct {
    70  		targetChar uint8
    71  		object     *storobj.Object
    72  	}
    73  	tests := make([]testCase, numObjects)
    74  
    75  	t.Run("put objects and build test cases", func(t *testing.T) {
    76  		for i := 0; i < numObjects; i++ {
    77  			targetChar := charSet[i%len(charSet)]
    78  			prop := repeatString(string(targetChar), charRepeat)
    79  			obj := storobj.Object{
    80  				MarshallerVersion: 1,
    81  				Object: models.Object{
    82  					ID:    strfmt.UUID(uuid.NewString()),
    83  					Class: className,
    84  					Properties: map[string]interface{}{
    85  						propName: prop,
    86  					},
    87  				},
    88  				DocID: docIDCounter,
    89  			}
    90  			docIDCounter++
    91  			putObject(t, store, &obj, propName, []byte(prop))
    92  			tests[i] = testCase{
    93  				targetChar: targetChar,
    94  				object:     &obj,
    95  			}
    96  		}
    97  	})
    98  
    99  	bitmapFactory := roaringset.NewBitmapFactory(newFakeMaxIDGetter(docIDCounter), logger)
   100  
   101  	searcher := NewSearcher(logger, store, createSchema(), nil, nil,
   102  		fakeStopwordDetector{}, 2, func() bool { return false }, "",
   103  		config.DefaultQueryNestedCrossReferenceLimit, bitmapFactory)
   104  
   105  	t.Run("run tests", func(t *testing.T) {
   106  		t.Run("NotEqual", func(t *testing.T) {
   107  			t.Parallel()
   108  			for _, test := range tests {
   109  				filter := &filters.LocalFilter{Root: &filters.Clause{
   110  					Operator: filters.OperatorNotEqual,
   111  					On: &filters.Path{
   112  						Class:    className,
   113  						Property: schema.PropertyName(propName),
   114  					},
   115  					Value: &filters.Value{
   116  						Value: repeatString(string(test.targetChar), charRepeat),
   117  						Type:  schema.DataTypeText,
   118  					},
   119  				}}
   120  				objs, err := searcher.Objects(context.Background(), numObjects,
   121  					filter, nil, additional.Properties{}, className)
   122  				assert.Nil(t, err)
   123  				assert.Len(t, objs, numObjects-multiplier)
   124  			}
   125  		})
   126  		t.Run("Equal", func(t *testing.T) {
   127  			t.Parallel()
   128  			for _, test := range tests {
   129  				filter := &filters.LocalFilter{Root: &filters.Clause{
   130  					Operator: filters.OperatorEqual,
   131  					On: &filters.Path{
   132  						Class:    className,
   133  						Property: schema.PropertyName(propName),
   134  					},
   135  					Value: &filters.Value{
   136  						Value: repeatString(string(test.targetChar), charRepeat),
   137  						Type:  schema.DataTypeText,
   138  					},
   139  				}}
   140  				objs, err := searcher.Objects(context.Background(), numObjects,
   141  					filter, nil, additional.Properties{}, className)
   142  				assert.Nil(t, err)
   143  				assert.Len(t, objs, multiplier)
   144  			}
   145  		})
   146  	})
   147  }
   148  
   149  func TestDocIDs(t *testing.T) {
   150  	var (
   151  		dirName      = t.TempDir()
   152  		logger, _    = test.NewNullLogger()
   153  		propName     = "inverted-with-frequency"
   154  		charSet      = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
   155  		charRepeat   = 3
   156  		multiplier   = 100
   157  		numObjects   = len(charSet) * multiplier
   158  		docIDCounter = uint64(0)
   159  	)
   160  	store, err := lsmkv.New(dirName, dirName, logger, nil,
   161  		cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop())
   162  	require.Nil(t, err)
   163  	defer func() { assert.Nil(t, err) }()
   164  
   165  	t.Run("create buckets", func(t *testing.T) {
   166  		require.Nil(t, store.CreateOrLoadBucket(context.Background(), helpers.ObjectsBucketLSM,
   167  			lsmkv.WithStrategy(lsmkv.StrategyReplace), lsmkv.WithSecondaryIndices(1)))
   168  		require.NotNil(t, store.Bucket(helpers.ObjectsBucketLSM))
   169  
   170  		require.Nil(t, store.CreateOrLoadBucket(context.Background(),
   171  			helpers.BucketSearchableFromPropNameLSM(propName),
   172  			lsmkv.WithStrategy(lsmkv.StrategyMapCollection)))
   173  		require.NotNil(t, store.Bucket(helpers.BucketSearchableFromPropNameLSM(propName)))
   174  	})
   175  
   176  	t.Run("put objects", func(t *testing.T) {
   177  		for i := 0; i < numObjects; i++ {
   178  			targetChar := charSet[i%len(charSet)]
   179  			prop := repeatString(string(targetChar), charRepeat)
   180  			obj := storobj.Object{
   181  				MarshallerVersion: 1,
   182  				Object: models.Object{
   183  					ID:    strfmt.UUID(uuid.NewString()),
   184  					Class: className,
   185  					Properties: map[string]interface{}{
   186  						propName: prop,
   187  					},
   188  				},
   189  				DocID: docIDCounter,
   190  			}
   191  			docIDCounter++
   192  			putObject(t, store, &obj, propName, []byte(prop))
   193  		}
   194  	})
   195  
   196  	bitmapFactory := roaringset.NewBitmapFactory(newFakeMaxIDGetter(docIDCounter), logger)
   197  
   198  	searcher := NewSearcher(logger, store, createSchema(), nil, nil,
   199  		fakeStopwordDetector{}, 2, func() bool { return false }, "",
   200  		config.DefaultQueryNestedCrossReferenceLimit, bitmapFactory)
   201  
   202  	type testCase struct {
   203  		expectedMatches int
   204  		filter          filters.LocalFilter
   205  	}
   206  	tests := []testCase{
   207  		{
   208  			filter: filters.LocalFilter{
   209  				Root: &filters.Clause{
   210  					Operator: filters.OperatorNotEqual,
   211  					On: &filters.Path{
   212  						Class:    className,
   213  						Property: schema.PropertyName(propName),
   214  					},
   215  					Value: &filters.Value{
   216  						Value: "[[[",
   217  						Type:  schema.DataTypeText,
   218  					},
   219  				},
   220  			},
   221  			expectedMatches: numObjects,
   222  		},
   223  		{
   224  			filter: filters.LocalFilter{
   225  				Root: &filters.Clause{
   226  					Operator: filters.OperatorNotEqual,
   227  					On: &filters.Path{
   228  						Class:    className,
   229  						Property: schema.PropertyName(propName),
   230  					},
   231  					Value: &filters.Value{
   232  						Value: "AAA",
   233  						Type:  schema.DataTypeText,
   234  					},
   235  				},
   236  			},
   237  			expectedMatches: len(charSet)*multiplier - 1,
   238  		},
   239  	}
   240  
   241  	for _, tc := range tests {
   242  		allow, err := searcher.DocIDs(context.Background(), &tc.filter, additional.Properties{}, className)
   243  		require.Nil(t, err)
   244  		assert.Equal(t, tc.expectedMatches, allow.Len())
   245  	}
   246  }
   247  
   248  // lifted from Shard::pairPropertyWithFrequency to emulate Bucket::MapSet functionality
   249  func pairPropWithFreq(docID uint64, freq, propLen float32) lsmkv.MapPair {
   250  	buf := make([]byte, 16)
   251  
   252  	binary.BigEndian.PutUint64(buf[0:8], docID)
   253  	binary.LittleEndian.PutUint32(buf[8:12], math.Float32bits(freq))
   254  	binary.LittleEndian.PutUint32(buf[12:16], math.Float32bits(propLen))
   255  
   256  	return lsmkv.MapPair{
   257  		Key:   buf[:8],
   258  		Value: buf[8:],
   259  	}
   260  }
   261  
   262  func putObject(t *testing.T, store *lsmkv.Store, obj *storobj.Object, propName string, data []byte) {
   263  	b, err := obj.MarshalBinary()
   264  	require.Nil(t, err)
   265  
   266  	keyBuf := bytes.NewBuffer(nil)
   267  	binary.Write(keyBuf, binary.LittleEndian, &obj.DocID)
   268  	docIDBytes := keyBuf.Bytes()
   269  
   270  	bucket := store.Bucket(helpers.ObjectsBucketLSM)
   271  	err = bucket.Put([]byte(obj.ID()), b, lsmkv.WithSecondaryKey(0, docIDBytes))
   272  	require.Nil(t, err)
   273  
   274  	propBucketName := helpers.BucketSearchableFromPropNameLSM(propName)
   275  	propBucket := store.Bucket(propBucketName)
   276  	err = propBucket.MapSet(data, pairPropWithFreq(obj.DocID, 1, float32(len(data))))
   277  	require.Nil(t, err)
   278  }
   279  
   280  func repeatString(s string, n int) string {
   281  	sb := strings.Builder{}
   282  	for i := 0; i < n; i++ {
   283  		sb.WriteString(s)
   284  	}
   285  	return sb.String()
   286  }