github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/searcher_integration_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 //go:build integrationTest 13 14 package inverted 15 16 import ( 17 "bytes" 18 "context" 19 "encoding/binary" 20 "math" 21 "strings" 22 "testing" 23 24 "github.com/go-openapi/strfmt" 25 "github.com/google/uuid" 26 "github.com/sirupsen/logrus/hooks/test" 27 "github.com/stretchr/testify/assert" 28 "github.com/stretchr/testify/require" 29 "github.com/weaviate/weaviate/adapters/repos/db/helpers" 30 "github.com/weaviate/weaviate/adapters/repos/db/lsmkv" 31 "github.com/weaviate/weaviate/adapters/repos/db/roaringset" 32 "github.com/weaviate/weaviate/entities/additional" 33 "github.com/weaviate/weaviate/entities/cyclemanager" 34 "github.com/weaviate/weaviate/entities/filters" 35 "github.com/weaviate/weaviate/entities/models" 36 "github.com/weaviate/weaviate/entities/schema" 37 "github.com/weaviate/weaviate/entities/storobj" 38 "github.com/weaviate/weaviate/usecases/config" 39 ) 40 41 func TestObjects(t *testing.T) { 42 var ( 43 dirName = t.TempDir() 44 logger, _ = test.NewNullLogger() 45 propName = "inverted-with-frequency" 46 charSet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 47 charRepeat = 50 48 multiplier = 10 49 numObjects = len(charSet) * multiplier 50 docIDCounter = uint64(0) 51 ) 52 53 store, err := lsmkv.New(dirName, dirName, logger, nil, 54 cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop()) 55 require.Nil(t, err) 56 defer func() { assert.Nil(t, err) }() 57 58 t.Run("create buckets", func(t *testing.T) { 59 require.Nil(t, store.CreateOrLoadBucket(context.Background(), helpers.ObjectsBucketLSM, 60 lsmkv.WithStrategy(lsmkv.StrategyReplace), lsmkv.WithSecondaryIndices(1))) 61 require.NotNil(t, store.Bucket(helpers.ObjectsBucketLSM)) 62 63 require.Nil(t, store.CreateOrLoadBucket(context.Background(), 64 helpers.BucketSearchableFromPropNameLSM(propName), 65 lsmkv.WithStrategy(lsmkv.StrategyMapCollection))) 66 require.NotNil(t, store.Bucket(helpers.BucketSearchableFromPropNameLSM(propName))) 67 }) 68 69 type testCase struct { 70 targetChar uint8 71 object *storobj.Object 72 } 73 tests := make([]testCase, numObjects) 74 75 t.Run("put objects and build test cases", func(t *testing.T) { 76 for i := 0; i < numObjects; i++ { 77 targetChar := charSet[i%len(charSet)] 78 prop := repeatString(string(targetChar), charRepeat) 79 obj := storobj.Object{ 80 MarshallerVersion: 1, 81 Object: models.Object{ 82 ID: strfmt.UUID(uuid.NewString()), 83 Class: className, 84 Properties: map[string]interface{}{ 85 propName: prop, 86 }, 87 }, 88 DocID: docIDCounter, 89 } 90 docIDCounter++ 91 putObject(t, store, &obj, propName, []byte(prop)) 92 tests[i] = testCase{ 93 targetChar: targetChar, 94 object: &obj, 95 } 96 } 97 }) 98 99 bitmapFactory := roaringset.NewBitmapFactory(newFakeMaxIDGetter(docIDCounter), logger) 100 101 searcher := NewSearcher(logger, store, createSchema(), nil, nil, 102 fakeStopwordDetector{}, 2, func() bool { return false }, "", 103 config.DefaultQueryNestedCrossReferenceLimit, bitmapFactory) 104 105 t.Run("run tests", func(t *testing.T) { 106 t.Run("NotEqual", func(t *testing.T) { 107 t.Parallel() 108 for _, test := range tests { 109 filter := &filters.LocalFilter{Root: &filters.Clause{ 110 Operator: filters.OperatorNotEqual, 111 On: &filters.Path{ 112 Class: className, 113 Property: schema.PropertyName(propName), 114 }, 115 Value: &filters.Value{ 116 Value: repeatString(string(test.targetChar), charRepeat), 117 Type: schema.DataTypeText, 118 }, 119 }} 120 objs, err := searcher.Objects(context.Background(), numObjects, 121 filter, nil, additional.Properties{}, className) 122 assert.Nil(t, err) 123 assert.Len(t, objs, numObjects-multiplier) 124 } 125 }) 126 t.Run("Equal", func(t *testing.T) { 127 t.Parallel() 128 for _, test := range tests { 129 filter := &filters.LocalFilter{Root: &filters.Clause{ 130 Operator: filters.OperatorEqual, 131 On: &filters.Path{ 132 Class: className, 133 Property: schema.PropertyName(propName), 134 }, 135 Value: &filters.Value{ 136 Value: repeatString(string(test.targetChar), charRepeat), 137 Type: schema.DataTypeText, 138 }, 139 }} 140 objs, err := searcher.Objects(context.Background(), numObjects, 141 filter, nil, additional.Properties{}, className) 142 assert.Nil(t, err) 143 assert.Len(t, objs, multiplier) 144 } 145 }) 146 }) 147 } 148 149 func TestDocIDs(t *testing.T) { 150 var ( 151 dirName = t.TempDir() 152 logger, _ = test.NewNullLogger() 153 propName = "inverted-with-frequency" 154 charSet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 155 charRepeat = 3 156 multiplier = 100 157 numObjects = len(charSet) * multiplier 158 docIDCounter = uint64(0) 159 ) 160 store, err := lsmkv.New(dirName, dirName, logger, nil, 161 cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop()) 162 require.Nil(t, err) 163 defer func() { assert.Nil(t, err) }() 164 165 t.Run("create buckets", func(t *testing.T) { 166 require.Nil(t, store.CreateOrLoadBucket(context.Background(), helpers.ObjectsBucketLSM, 167 lsmkv.WithStrategy(lsmkv.StrategyReplace), lsmkv.WithSecondaryIndices(1))) 168 require.NotNil(t, store.Bucket(helpers.ObjectsBucketLSM)) 169 170 require.Nil(t, store.CreateOrLoadBucket(context.Background(), 171 helpers.BucketSearchableFromPropNameLSM(propName), 172 lsmkv.WithStrategy(lsmkv.StrategyMapCollection))) 173 require.NotNil(t, store.Bucket(helpers.BucketSearchableFromPropNameLSM(propName))) 174 }) 175 176 t.Run("put objects", func(t *testing.T) { 177 for i := 0; i < numObjects; i++ { 178 targetChar := charSet[i%len(charSet)] 179 prop := repeatString(string(targetChar), charRepeat) 180 obj := storobj.Object{ 181 MarshallerVersion: 1, 182 Object: models.Object{ 183 ID: strfmt.UUID(uuid.NewString()), 184 Class: className, 185 Properties: map[string]interface{}{ 186 propName: prop, 187 }, 188 }, 189 DocID: docIDCounter, 190 } 191 docIDCounter++ 192 putObject(t, store, &obj, propName, []byte(prop)) 193 } 194 }) 195 196 bitmapFactory := roaringset.NewBitmapFactory(newFakeMaxIDGetter(docIDCounter), logger) 197 198 searcher := NewSearcher(logger, store, createSchema(), nil, nil, 199 fakeStopwordDetector{}, 2, func() bool { return false }, "", 200 config.DefaultQueryNestedCrossReferenceLimit, bitmapFactory) 201 202 type testCase struct { 203 expectedMatches int 204 filter filters.LocalFilter 205 } 206 tests := []testCase{ 207 { 208 filter: filters.LocalFilter{ 209 Root: &filters.Clause{ 210 Operator: filters.OperatorNotEqual, 211 On: &filters.Path{ 212 Class: className, 213 Property: schema.PropertyName(propName), 214 }, 215 Value: &filters.Value{ 216 Value: "[[[", 217 Type: schema.DataTypeText, 218 }, 219 }, 220 }, 221 expectedMatches: numObjects, 222 }, 223 { 224 filter: filters.LocalFilter{ 225 Root: &filters.Clause{ 226 Operator: filters.OperatorNotEqual, 227 On: &filters.Path{ 228 Class: className, 229 Property: schema.PropertyName(propName), 230 }, 231 Value: &filters.Value{ 232 Value: "AAA", 233 Type: schema.DataTypeText, 234 }, 235 }, 236 }, 237 expectedMatches: len(charSet)*multiplier - 1, 238 }, 239 } 240 241 for _, tc := range tests { 242 allow, err := searcher.DocIDs(context.Background(), &tc.filter, additional.Properties{}, className) 243 require.Nil(t, err) 244 assert.Equal(t, tc.expectedMatches, allow.Len()) 245 } 246 } 247 248 // lifted from Shard::pairPropertyWithFrequency to emulate Bucket::MapSet functionality 249 func pairPropWithFreq(docID uint64, freq, propLen float32) lsmkv.MapPair { 250 buf := make([]byte, 16) 251 252 binary.BigEndian.PutUint64(buf[0:8], docID) 253 binary.LittleEndian.PutUint32(buf[8:12], math.Float32bits(freq)) 254 binary.LittleEndian.PutUint32(buf[12:16], math.Float32bits(propLen)) 255 256 return lsmkv.MapPair{ 257 Key: buf[:8], 258 Value: buf[8:], 259 } 260 } 261 262 func putObject(t *testing.T, store *lsmkv.Store, obj *storobj.Object, propName string, data []byte) { 263 b, err := obj.MarshalBinary() 264 require.Nil(t, err) 265 266 keyBuf := bytes.NewBuffer(nil) 267 binary.Write(keyBuf, binary.LittleEndian, &obj.DocID) 268 docIDBytes := keyBuf.Bytes() 269 270 bucket := store.Bucket(helpers.ObjectsBucketLSM) 271 err = bucket.Put([]byte(obj.ID()), b, lsmkv.WithSecondaryKey(0, docIDBytes)) 272 require.Nil(t, err) 273 274 propBucketName := helpers.BucketSearchableFromPropNameLSM(propName) 275 propBucket := store.Bucket(propBucketName) 276 err = propBucket.MapSet(data, pairPropWithFreq(obj.DocID, 1, float32(len(data)))) 277 require.Nil(t, err) 278 } 279 280 func repeatString(s string, n int) string { 281 sb := strings.Builder{} 282 for i := 0; i < n; i++ { 283 sb.WriteString(s) 284 } 285 return sb.String() 286 }