github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/posting/index_test.go (about) 1 /* 2 * Copyright 2016-2018 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package posting 18 19 import ( 20 "bytes" 21 "context" 22 "math" 23 "testing" 24 "time" 25 26 "github.com/dgraph-io/badger" 27 "github.com/stretchr/testify/require" 28 29 "github.com/dgraph-io/dgraph/protos/pb" 30 "github.com/dgraph-io/dgraph/schema" 31 "github.com/dgraph-io/dgraph/types" 32 "github.com/dgraph-io/dgraph/x" 33 ) 34 35 func uids(l *List, readTs uint64) []uint64 { 36 r, err := l.Uids(ListOptions{ReadTs: readTs}) 37 x.Check(err) 38 return r.Uids 39 } 40 41 // indexTokensForTest is just a wrapper around indexTokens used for convenience. 42 func indexTokensForTest(attr, lang string, val types.Val) ([]string, error) { 43 return indexTokens(&indexMutationInfo{ 44 tokenizers: schema.State().Tokenizer(attr), 45 edge: &pb.DirectedEdge{ 46 Attr: attr, 47 Lang: lang, 48 }, 49 val: val, 50 }) 51 } 52 53 func TestIndexingInt(t *testing.T) { 54 require.NoError(t, schema.ParseBytes([]byte("age:int @index(int) ."), 1)) 55 a, err := indexTokensForTest("age", "", types.Val{Tid: types.StringID, Value: []byte("10")}) 56 require.NoError(t, err) 57 require.EqualValues(t, []byte{0x6, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xa}, []byte(a[0])) 58 } 59 60 func TestIndexingIntNegative(t *testing.T) { 61 require.NoError(t, schema.ParseBytes([]byte("age:int @index(int) ."), 1)) 62 a, err := indexTokensForTest("age", "", types.Val{Tid: types.StringID, Value: []byte("-10")}) 63 require.NoError(t, err) 64 require.EqualValues(t, []byte{0x6, 0x0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf6}, 65 []byte(a[0])) 66 } 67 68 func TestIndexingFloat(t *testing.T) { 69 require.NoError(t, schema.ParseBytes([]byte("age:float @index(float) ."), 1)) 70 a, err := indexTokensForTest("age", "", types.Val{Tid: types.StringID, Value: []byte("10.43")}) 71 require.NoError(t, err) 72 require.EqualValues(t, []byte{0x7, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xa}, []byte(a[0])) 73 } 74 75 func TestIndexingTime(t *testing.T) { 76 require.NoError(t, schema.ParseBytes([]byte("age:dateTime @index(year) ."), 1)) 77 a, err := indexTokensForTest("age", "", types.Val{Tid: types.StringID, 78 Value: []byte("0010-01-01T01:01:01.000000001")}) 79 require.NoError(t, err) 80 require.EqualValues(t, []byte{0x4, 0x0, 0xa}, []byte(a[0])) 81 } 82 83 func TestIndexing(t *testing.T) { 84 require.NoError(t, schema.ParseBytes([]byte("name:string @index(term) ."), 1)) 85 a, err := indexTokensForTest("name", "", types.Val{Tid: types.StringID, Value: []byte("abc")}) 86 require.NoError(t, err) 87 require.EqualValues(t, "\x01abc", string(a[0])) 88 } 89 90 func TestIndexingMultiLang(t *testing.T) { 91 require.NoError(t, schema.ParseBytes([]byte("name:string @index(fulltext) ."), 1)) 92 93 // ensure that default tokenizer is suitable for English 94 a, err := indexTokensForTest("name", "", types.Val{Tid: types.StringID, 95 Value: []byte("stemming")}) 96 require.NoError(t, err) 97 require.EqualValues(t, "\x08stem", string(a[0])) 98 99 // ensure that Finnish tokenizer is used 100 a, err = indexTokensForTest("name", "fi", types.Val{Tid: types.StringID, 101 Value: []byte("edeltäneessä")}) 102 require.NoError(t, err) 103 require.EqualValues(t, "\x08edeltän", string(a[0])) 104 105 // ensure that German tokenizer is used 106 a, err = indexTokensForTest("name", "de", types.Val{Tid: types.StringID, 107 Value: []byte("Auffassungsvermögen")}) 108 require.NoError(t, err) 109 require.EqualValues(t, "\x08auffassungsvermog", string(a[0])) 110 111 // ensure that default tokenizer works differently than German 112 a, err = indexTokensForTest("name", "", types.Val{Tid: types.StringID, 113 Value: []byte("Auffassungsvermögen")}) 114 require.NoError(t, err) 115 require.EqualValues(t, "\x08auffassungsvermögen", string(a[0])) 116 } 117 118 func TestIndexingInvalidLang(t *testing.T) { 119 require.NoError(t, schema.ParseBytes([]byte("name:string @index(fulltext) ."), 1)) 120 121 // tokenizer for "xx" language won't return an error. 122 _, err := indexTokensForTest("name", "xx", types.Val{Tid: types.StringID, 123 Value: []byte("error")}) 124 require.NoError(t, err) 125 } 126 127 func TestIndexingAliasedLang(t *testing.T) { 128 require.NoError(t, schema.ParseBytes([]byte("name:string @index(fulltext) @lang ."), 1)) 129 _, err := indexTokensForTest("name", "es", types.Val{Tid: types.StringID, 130 Value: []byte("base")}) 131 require.NoError(t, err) 132 // es-es and es-419 are aliased to es 133 _, err = indexTokensForTest("name", "es-es", types.Val{Tid: types.StringID, 134 Value: []byte("alias")}) 135 require.NoError(t, err) 136 _, err = indexTokensForTest("name", "es-419", types.Val{Tid: types.StringID, 137 Value: []byte("alias")}) 138 require.NoError(t, err) 139 } 140 141 func addMutation(t *testing.T, l *List, edge *pb.DirectedEdge, op uint32, 142 startTs uint64, commitTs uint64, index bool) { 143 if op == Del { 144 edge.Op = pb.DirectedEdge_DEL 145 } else if op == Set { 146 edge.Op = pb.DirectedEdge_SET 147 } else { 148 x.Fatalf("Unhandled op: %v", op) 149 } 150 txn := Oracle().RegisterStartTs(startTs) 151 txn.cache.SetIfAbsent(string(l.key), l) 152 if index { 153 require.NoError(t, l.AddMutationWithIndex(context.Background(), edge, txn)) 154 } else { 155 err := l.addMutation(context.Background(), txn, edge) 156 require.NoError(t, err) 157 } 158 159 txn.Update() 160 writer := NewTxnWriter(pstore) 161 require.NoError(t, txn.CommitToDisk(writer, commitTs)) 162 require.NoError(t, writer.Flush()) 163 } 164 165 const schemaVal = ` 166 name: string @index(term) . 167 name2: string @index(term) . 168 dob: dateTime @index(year) . 169 friend: [uid] @reverse . 170 ` 171 172 const mutatedSchemaVal = ` 173 name:string @index(term) . 174 name2:string . 175 dob:dateTime @index(year) . 176 friend:[uid] @reverse . 177 ` 178 179 // TODO(Txn): We can't read index key on disk if it was written in same txn. 180 func TestTokensTable(t *testing.T) { 181 require.NoError(t, schema.ParseBytes([]byte(schemaVal), 1)) 182 183 key := x.DataKey("name", 1) 184 l, err := getNew(key, ps) 185 require.NoError(t, err) 186 187 edge := &pb.DirectedEdge{ 188 Value: []byte("david"), 189 Label: "testing", 190 Attr: "name", 191 Entity: 157, 192 } 193 addMutation(t, l, edge, Set, 1, 2, true) 194 195 key = x.IndexKey("name", "\x01david") 196 time.Sleep(10 * time.Millisecond) 197 198 txn := ps.NewTransactionAt(3, false) 199 _, err = txn.Get(key) 200 require.NoError(t, err) 201 202 require.EqualValues(t, []string{"\x01david"}, tokensForTest("name")) 203 } 204 205 // tokensForTest returns keys for a table. This is just for testing / debugging. 206 func tokensForTest(attr string) []string { 207 pk := x.ParsedKey{Attr: attr} 208 prefix := pk.IndexPrefix() 209 txn := pstore.NewTransactionAt(math.MaxUint64, false) 210 defer txn.Discard() 211 it := txn.NewIterator(badger.DefaultIteratorOptions) 212 defer it.Close() 213 214 var out []string 215 for it.Seek(prefix); it.Valid(); it.Next() { 216 key := it.Item().Key() 217 if !bytes.HasPrefix(key, prefix) { 218 break 219 } 220 k, err := x.Parse(key) 221 x.Check(err) 222 x.AssertTrue(k.IsIndex()) 223 out = append(out, k.Term) 224 } 225 return out 226 } 227 228 // addEdgeToValue adds edge without indexing. 229 func addEdgeToValue(t *testing.T, attr string, src uint64, 230 value string, startTs, commitTs uint64) { 231 edge := &pb.DirectedEdge{ 232 Value: []byte(value), 233 Label: "testing", 234 Attr: attr, 235 Entity: src, 236 Op: pb.DirectedEdge_SET, 237 } 238 l, err := GetNoStore(x.DataKey(attr, src)) 239 require.NoError(t, err) 240 // No index entries added here as we do not call AddMutationWithIndex. 241 addMutation(t, l, edge, Set, startTs, commitTs, false) 242 } 243 244 // addEdgeToUID adds uid edge with reverse edge 245 func addEdgeToUID(t *testing.T, attr string, src uint64, 246 dst uint64, startTs, commitTs uint64) { 247 edge := &pb.DirectedEdge{ 248 ValueId: dst, 249 Label: "testing", 250 Attr: attr, 251 Entity: src, 252 Op: pb.DirectedEdge_SET, 253 } 254 l, err := GetNoStore(x.DataKey(attr, src)) 255 require.NoError(t, err) 256 // No index entries added here as we do not call AddMutationWithIndex. 257 addMutation(t, l, edge, Set, startTs, commitTs, false) 258 } 259 260 func TestRebuildIndex(t *testing.T) { 261 addEdgeToValue(t, "name2", 91, "Michonne", uint64(1), uint64(2)) 262 addEdgeToValue(t, "name2", 92, "David", uint64(3), uint64(4)) 263 264 require.NoError(t, schema.ParseBytes([]byte(schemaVal), 1)) 265 currentSchema, _ := schema.State().Get("name2") 266 rb := IndexRebuild{ 267 Attr: "name2", 268 StartTs: 5, 269 OldSchema: nil, 270 CurrentSchema: ¤tSchema, 271 } 272 require.NoError(t, rebuildIndex(context.Background(), &rb)) 273 274 // Check index entries in data store. 275 txn := ps.NewTransactionAt(6, false) 276 defer txn.Discard() 277 it := txn.NewIterator(badger.DefaultIteratorOptions) 278 defer it.Close() 279 pk := x.ParsedKey{Attr: "name2"} 280 prefix := pk.IndexPrefix() 281 var idxKeys []string 282 var idxVals []*List 283 for it.Seek(prefix); it.Valid(); it.Next() { 284 item := it.Item() 285 key := item.Key() 286 if !bytes.HasPrefix(key, prefix) { 287 break 288 } 289 if item.UserMeta()&BitEmptyPosting == BitEmptyPosting { 290 continue 291 } 292 idxKeys = append(idxKeys, string(key)) 293 l, err := GetNoStore(key) 294 require.NoError(t, err) 295 idxVals = append(idxVals, l) 296 } 297 require.Len(t, idxKeys, 2) 298 require.Len(t, idxVals, 2) 299 require.EqualValues(t, idxKeys[0], x.IndexKey("name2", "\x01david")) 300 require.EqualValues(t, idxKeys[1], x.IndexKey("name2", "\x01michonne")) 301 302 uids1 := uids(idxVals[0], 6) 303 uids2 := uids(idxVals[1], 6) 304 require.Len(t, uids1, 1) 305 require.Len(t, uids2, 1) 306 require.EqualValues(t, 92, uids1[0]) 307 require.EqualValues(t, 91, uids2[0]) 308 } 309 310 func TestRebuildIndexWithDeletion(t *testing.T) { 311 addEdgeToValue(t, "name2", 91, "Michonne", uint64(1), uint64(2)) 312 addEdgeToValue(t, "name2", 92, "David", uint64(3), uint64(4)) 313 314 require.NoError(t, schema.ParseBytes([]byte(schemaVal), 1)) 315 currentSchema, _ := schema.State().Get("name2") 316 rb := IndexRebuild{ 317 Attr: "name2", 318 StartTs: 5, 319 OldSchema: nil, 320 CurrentSchema: ¤tSchema, 321 } 322 require.NoError(t, rebuildIndex(context.Background(), &rb)) 323 324 // Mutate the schema (the index in name2 is deleted) and rebuild the index. 325 require.NoError(t, schema.ParseBytes([]byte(mutatedSchemaVal), 1)) 326 newSchema, _ := schema.State().Get("name2") 327 rb = IndexRebuild{ 328 Attr: "name2", 329 StartTs: 6, 330 OldSchema: ¤tSchema, 331 CurrentSchema: &newSchema, 332 } 333 require.NoError(t, rebuildIndex(context.Background(), &rb)) 334 335 // Check index entries in data store. 336 txn := ps.NewTransactionAt(7, false) 337 defer txn.Discard() 338 it := txn.NewIterator(badger.DefaultIteratorOptions) 339 defer it.Close() 340 pk := x.ParsedKey{Attr: "name2"} 341 prefix := pk.IndexPrefix() 342 var idxKeys []string 343 var idxVals []*List 344 for it.Seek(prefix); it.Valid(); it.Next() { 345 item := it.Item() 346 key := item.Key() 347 if !bytes.HasPrefix(key, prefix) { 348 break 349 } 350 if item.UserMeta()&BitEmptyPosting == BitEmptyPosting { 351 continue 352 } 353 idxKeys = append(idxKeys, string(key)) 354 l, err := GetNoStore(key) 355 require.NoError(t, err) 356 idxVals = append(idxVals, l) 357 } 358 359 // The index keys should not be available anymore. 360 require.Len(t, idxKeys, 0) 361 require.Len(t, idxVals, 0) 362 } 363 364 func TestRebuildReverseEdges(t *testing.T) { 365 addEdgeToUID(t, "friend", 1, 23, uint64(10), uint64(11)) 366 addEdgeToUID(t, "friend", 1, 24, uint64(12), uint64(13)) 367 addEdgeToUID(t, "friend", 2, 23, uint64(14), uint64(15)) 368 369 require.NoError(t, schema.ParseBytes([]byte(schemaVal), 1)) 370 currentSchema, _ := schema.State().Get("friend") 371 rb := IndexRebuild{ 372 Attr: "friend", 373 StartTs: 16, 374 OldSchema: nil, 375 CurrentSchema: ¤tSchema, 376 } 377 // TODO: Remove after fixing sync marks. 378 require.NoError(t, rebuildReverseEdges(context.Background(), &rb)) 379 380 // Check index entries in data store. 381 txn := ps.NewTransactionAt(17, false) 382 defer txn.Discard() 383 iterOpts := badger.DefaultIteratorOptions 384 iterOpts.AllVersions = true 385 it := txn.NewIterator(iterOpts) 386 defer it.Close() 387 pk := x.ParsedKey{Attr: "friend"} 388 prefix := pk.ReversePrefix() 389 var revKeys []string 390 var revVals []*List 391 var prevKey []byte 392 it.Seek(prefix) 393 for it.ValidForPrefix(prefix) { 394 item := it.Item() 395 key := item.Key() 396 if bytes.Equal(key, prevKey) { 397 it.Next() 398 continue 399 } 400 prevKey = append(prevKey[:0], key...) 401 revKeys = append(revKeys, string(key)) 402 l, err := ReadPostingList(key, it) 403 require.NoError(t, err) 404 revVals = append(revVals, l) 405 } 406 require.Len(t, revKeys, 2) 407 require.Len(t, revVals, 2) 408 409 uids0 := uids(revVals[0], 17) 410 uids1 := uids(revVals[1], 17) 411 require.Len(t, uids0, 2) 412 require.Len(t, uids1, 1) 413 require.EqualValues(t, 1, uids0[0]) 414 require.EqualValues(t, 2, uids0[1]) 415 require.EqualValues(t, 1, uids1[0]) 416 } 417 418 func TestNeedsIndexRebuild(t *testing.T) { 419 rb := IndexRebuild{} 420 rb.OldSchema = &pb.SchemaUpdate{ValueType: pb.Posting_UID} 421 rb.CurrentSchema = &pb.SchemaUpdate{ValueType: pb.Posting_UID} 422 rebuildInfo := rb.needsIndexRebuild() 423 require.Equal(t, indexOp(indexNoop), rebuildInfo.op) 424 require.Equal(t, []string(nil), rebuildInfo.tokenizersToDelete) 425 require.Equal(t, []string(nil), rebuildInfo.tokenizersToRebuild) 426 427 rb.OldSchema = nil 428 rebuildInfo = rb.needsIndexRebuild() 429 require.Equal(t, indexOp(indexNoop), rebuildInfo.op) 430 require.Equal(t, []string(nil), rebuildInfo.tokenizersToDelete) 431 require.Equal(t, []string(nil), rebuildInfo.tokenizersToRebuild) 432 433 rb.OldSchema = &pb.SchemaUpdate{ValueType: pb.Posting_STRING, Directive: pb.SchemaUpdate_INDEX, 434 Tokenizer: []string{"exact"}} 435 rb.CurrentSchema = &pb.SchemaUpdate{ValueType: pb.Posting_STRING, 436 Directive: pb.SchemaUpdate_INDEX, 437 Tokenizer: []string{"exact"}} 438 rebuildInfo = rb.needsIndexRebuild() 439 require.Equal(t, indexOp(indexNoop), rebuildInfo.op) 440 require.Equal(t, []string(nil), rebuildInfo.tokenizersToDelete) 441 require.Equal(t, []string(nil), rebuildInfo.tokenizersToRebuild) 442 443 rb.OldSchema = &pb.SchemaUpdate{ValueType: pb.Posting_STRING, Directive: pb.SchemaUpdate_INDEX, 444 Tokenizer: []string{"term"}} 445 rb.CurrentSchema = &pb.SchemaUpdate{ValueType: pb.Posting_STRING, 446 Directive: pb.SchemaUpdate_INDEX} 447 rebuildInfo = rb.needsIndexRebuild() 448 require.Equal(t, indexOp(indexRebuild), rebuildInfo.op) 449 require.Equal(t, []string{"term"}, rebuildInfo.tokenizersToDelete) 450 require.Equal(t, []string(nil), rebuildInfo.tokenizersToRebuild) 451 452 rb.OldSchema = &pb.SchemaUpdate{ValueType: pb.Posting_STRING, Directive: pb.SchemaUpdate_INDEX, 453 Tokenizer: []string{"exact"}} 454 rb.CurrentSchema = &pb.SchemaUpdate{ValueType: pb.Posting_FLOAT, 455 Directive: pb.SchemaUpdate_INDEX, 456 Tokenizer: []string{"exact"}} 457 rebuildInfo = rb.needsIndexRebuild() 458 require.Equal(t, indexOp(indexRebuild), rebuildInfo.op) 459 require.Equal(t, []string{"exact"}, rebuildInfo.tokenizersToDelete) 460 require.Equal(t, []string{"exact"}, rebuildInfo.tokenizersToRebuild) 461 462 rb.OldSchema = &pb.SchemaUpdate{ValueType: pb.Posting_STRING, Directive: pb.SchemaUpdate_INDEX, 463 Tokenizer: []string{"exact"}} 464 rb.CurrentSchema = &pb.SchemaUpdate{ValueType: pb.Posting_FLOAT, 465 Directive: pb.SchemaUpdate_NONE} 466 rebuildInfo = rb.needsIndexRebuild() 467 require.Equal(t, indexOp(indexDelete), rebuildInfo.op) 468 require.Equal(t, []string{"exact"}, rebuildInfo.tokenizersToDelete) 469 require.Equal(t, []string(nil), rebuildInfo.tokenizersToRebuild) 470 } 471 472 func TestNeedsCountIndexRebuild(t *testing.T) { 473 rb := IndexRebuild{} 474 rb.OldSchema = &pb.SchemaUpdate{ValueType: pb.Posting_UID} 475 rb.CurrentSchema = &pb.SchemaUpdate{ValueType: pb.Posting_UID, Count: true} 476 require.Equal(t, indexOp(indexRebuild), rb.needsCountIndexRebuild()) 477 478 rb.OldSchema = nil 479 require.Equal(t, indexOp(indexRebuild), rb.needsCountIndexRebuild()) 480 481 rb.OldSchema = &pb.SchemaUpdate{ValueType: pb.Posting_UID, Count: false} 482 rb.CurrentSchema = &pb.SchemaUpdate{ValueType: pb.Posting_UID, Count: false} 483 require.Equal(t, indexOp(indexNoop), rb.needsCountIndexRebuild()) 484 485 rb.OldSchema = &pb.SchemaUpdate{ValueType: pb.Posting_UID, Count: true} 486 rb.CurrentSchema = &pb.SchemaUpdate{ValueType: pb.Posting_UID, Count: false} 487 require.Equal(t, indexOp(indexDelete), rb.needsCountIndexRebuild()) 488 } 489 490 func TestNeedsReverseEdgesRebuild(t *testing.T) { 491 rb := IndexRebuild{} 492 rb.OldSchema = &pb.SchemaUpdate{ValueType: pb.Posting_UID, Directive: pb.SchemaUpdate_INDEX} 493 rb.CurrentSchema = &pb.SchemaUpdate{ValueType: pb.Posting_UID, 494 Directive: pb.SchemaUpdate_REVERSE} 495 require.Equal(t, indexOp(indexRebuild), rb.needsReverseEdgesRebuild()) 496 497 rb.OldSchema = nil 498 require.Equal(t, indexOp(indexRebuild), rb.needsReverseEdgesRebuild()) 499 500 rb.OldSchema = &pb.SchemaUpdate{ValueType: pb.Posting_UID, Directive: pb.SchemaUpdate_REVERSE} 501 rb.CurrentSchema = &pb.SchemaUpdate{ValueType: pb.Posting_UID, 502 Directive: pb.SchemaUpdate_REVERSE} 503 require.Equal(t, indexOp(indexNoop), rb.needsReverseEdgesRebuild()) 504 505 rb.CurrentSchema = &pb.SchemaUpdate{ValueType: pb.Posting_UID, 506 Directive: pb.SchemaUpdate_REVERSE} 507 rb.CurrentSchema = &pb.SchemaUpdate{ValueType: pb.Posting_UID, 508 Directive: pb.SchemaUpdate_INDEX} 509 require.Equal(t, indexOp(indexDelete), rb.needsReverseEdgesRebuild()) 510 } 511 512 func TestNeedsListTypeRebuild(t *testing.T) { 513 rb := IndexRebuild{} 514 rb.OldSchema = &pb.SchemaUpdate{ValueType: pb.Posting_UID, List: false} 515 rb.CurrentSchema = &pb.SchemaUpdate{ValueType: pb.Posting_UID, List: true} 516 rebuild, err := rb.needsListTypeRebuild() 517 require.True(t, rebuild) 518 require.NoError(t, err) 519 520 rb.OldSchema = nil 521 rebuild, err = rb.needsListTypeRebuild() 522 require.False(t, rebuild) 523 require.NoError(t, err) 524 525 rb.OldSchema = &pb.SchemaUpdate{ValueType: pb.Posting_UID, List: true} 526 rb.CurrentSchema = &pb.SchemaUpdate{ValueType: pb.Posting_UID, List: false} 527 rebuild, err = rb.needsListTypeRebuild() 528 require.False(t, rebuild) 529 require.Error(t, err) 530 }