github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/posting/index.go (about) 1 /* 2 * Copyright 2016-2018 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package posting 18 19 import ( 20 "bytes" 21 "context" 22 "encoding/hex" 23 "fmt" 24 "math" 25 "time" 26 27 "github.com/golang/glog" 28 ostats "go.opencensus.io/stats" 29 otrace "go.opencensus.io/trace" 30 31 "github.com/dgraph-io/badger" 32 bpb "github.com/dgraph-io/badger/pb" 33 "github.com/dgraph-io/dgraph/protos/pb" 34 "github.com/dgraph-io/dgraph/schema" 35 "github.com/dgraph-io/dgraph/tok" 36 "github.com/dgraph-io/dgraph/types" 37 "github.com/dgraph-io/dgraph/x" 38 "github.com/pkg/errors" 39 ) 40 41 var emptyCountParams countParams 42 43 type indexMutationInfo struct { 44 tokenizers []tok.Tokenizer 45 edge *pb.DirectedEdge // Represents the original uid -> value edge. 46 val types.Val 47 op pb.DirectedEdge_Op 48 } 49 50 // indexTokensforTokenizers return tokens, without the predicate prefix and 51 // index rune, for specific tokenizers. 52 func indexTokens(info *indexMutationInfo) ([]string, error) { 53 attr := info.edge.Attr 54 lang := info.edge.GetLang() 55 56 schemaType, err := schema.State().TypeOf(attr) 57 if err != nil || !schemaType.IsScalar() { 58 return nil, errors.Errorf("Cannot index attribute %s of type object.", attr) 59 } 60 61 if !schema.State().IsIndexed(attr) { 62 return nil, errors.Errorf("Attribute %s is not indexed.", attr) 63 } 64 sv, err := types.Convert(info.val, schemaType) 65 if err != nil { 66 return nil, err 67 } 68 69 var tokens []string 70 for _, it := range info.tokenizers { 71 toks, err := tok.BuildTokens(sv.Value, tok.GetLangTokenizer(it, lang)) 72 if err != nil { 73 return tokens, err 74 } 75 tokens = append(tokens, toks...) 76 } 77 return tokens, nil 78 } 79 80 // addIndexMutations adds mutation(s) for a single term, to maintain the index, 81 // but only for the given tokenizers. 82 // TODO - See if we need to pass op as argument as t should already have Op. 83 func (txn *Txn) addIndexMutations(ctx context.Context, info *indexMutationInfo) error { 84 if info.tokenizers == nil { 85 info.tokenizers = schema.State().Tokenizer(info.edge.Attr) 86 } 87 88 attr := info.edge.Attr 89 uid := info.edge.Entity 90 x.AssertTrue(uid != 0) 91 tokens, err := indexTokens(info) 92 93 if err != nil { 94 // This data is not indexable 95 return err 96 } 97 98 // Create a value token -> uid edge. 99 edge := &pb.DirectedEdge{ 100 ValueId: uid, 101 Attr: attr, 102 Op: info.op, 103 } 104 105 for _, token := range tokens { 106 if err := txn.addIndexMutation(ctx, edge, token); err != nil { 107 return err 108 } 109 } 110 return nil 111 } 112 113 func (txn *Txn) addIndexMutation(ctx context.Context, edge *pb.DirectedEdge, 114 token string) error { 115 key := x.IndexKey(edge.Attr, token) 116 117 plist, err := txn.cache.GetFromDelta(key) 118 if err != nil { 119 return err 120 } 121 122 x.AssertTrue(plist != nil) 123 if err = plist.addMutation(ctx, txn, edge); err != nil { 124 return err 125 } 126 ostats.Record(ctx, x.NumEdges.M(1)) 127 return nil 128 } 129 130 // countParams is sent to updateCount function. It is used to update the count index. 131 // It deletes the uid from the key corresponding to <attr, countBefore> and adds it 132 // to <attr, countAfter>. 133 type countParams struct { 134 attr string 135 countBefore int 136 countAfter int 137 entity uint64 138 reverse bool 139 } 140 141 func (txn *Txn) addReverseMutationHelper(ctx context.Context, plist *List, 142 hasCountIndex bool, edge *pb.DirectedEdge) (countParams, error) { 143 countBefore, countAfter := 0, 0 144 145 if hasCountIndex { 146 countBefore = plist.Length(txn.StartTs, 0) 147 if countBefore == -1 { 148 return emptyCountParams, ErrTsTooOld 149 } 150 } 151 if err := plist.addMutation(ctx, txn, edge); err != nil { 152 return emptyCountParams, err 153 } 154 if hasCountIndex { 155 countAfter = plist.Length(txn.StartTs, 0) 156 if countAfter == -1 { 157 return emptyCountParams, ErrTsTooOld 158 } 159 return countParams{ 160 attr: edge.Attr, 161 countBefore: countBefore, 162 countAfter: countAfter, 163 entity: edge.Entity, 164 reverse: true, 165 }, nil 166 } 167 return emptyCountParams, nil 168 } 169 170 func (txn *Txn) addReverseMutation(ctx context.Context, t *pb.DirectedEdge) error { 171 key := x.ReverseKey(t.Attr, t.ValueId) 172 hasCountIndex := schema.State().HasCount(t.Attr) 173 174 var getFn func(key []byte) (*List, error) 175 if hasCountIndex { 176 // We need to retrieve the full posting list from disk, to allow us to get the length of the 177 // posting list for the counts. 178 getFn = txn.Get 179 } else { 180 // We are just adding a reverse edge. No need to read the list from disk. 181 getFn = txn.GetFromDelta 182 } 183 plist, err := getFn(key) 184 if err != nil { 185 return err 186 } 187 188 x.AssertTrue(plist != nil) 189 // We must create a copy here. 190 edge := &pb.DirectedEdge{ 191 Entity: t.ValueId, 192 ValueId: t.Entity, 193 Attr: t.Attr, 194 Op: t.Op, 195 Facets: t.Facets, 196 } 197 198 cp, err := txn.addReverseMutationHelper(ctx, plist, hasCountIndex, edge) 199 if err != nil { 200 return err 201 } 202 ostats.Record(ctx, x.NumEdges.M(1)) 203 204 if hasCountIndex && cp.countAfter != cp.countBefore { 205 if err := txn.updateCount(ctx, cp); err != nil { 206 return err 207 } 208 } 209 return nil 210 } 211 212 func (l *List) handleDeleteAll(ctx context.Context, edge *pb.DirectedEdge, 213 txn *Txn) error { 214 isReversed := schema.State().IsReversed(edge.Attr) 215 isIndexed := schema.State().IsIndexed(edge.Attr) 216 hasCount := schema.State().HasCount(edge.Attr) 217 delEdge := &pb.DirectedEdge{ 218 Attr: edge.Attr, 219 Op: edge.Op, 220 Entity: edge.Entity, 221 } 222 // To calculate length of posting list. Used for deletion of count index. 223 var plen int 224 err := l.Iterate(txn.StartTs, 0, func(p *pb.Posting) error { 225 plen++ 226 switch { 227 case isReversed: 228 // Delete reverse edge for each posting. 229 delEdge.ValueId = p.Uid 230 return txn.addReverseMutation(ctx, delEdge) 231 case isIndexed: 232 // Delete index edge of each posting. 233 val := types.Val{ 234 Tid: types.TypeID(p.ValType), 235 Value: p.Value, 236 } 237 return txn.addIndexMutations(ctx, &indexMutationInfo{ 238 tokenizers: schema.State().Tokenizer(edge.Attr), 239 edge: edge, 240 val: val, 241 op: pb.DirectedEdge_DEL, 242 }) 243 default: 244 return nil 245 } 246 }) 247 if err != nil { 248 return err 249 } 250 if hasCount { 251 // Delete uid from count index. Deletion of reverses is taken care by addReverseMutation 252 // above. 253 if err := txn.updateCount(ctx, countParams{ 254 attr: edge.Attr, 255 countBefore: plen, 256 countAfter: 0, 257 entity: edge.Entity, 258 }); err != nil { 259 return err 260 } 261 } 262 263 return l.addMutation(ctx, txn, edge) 264 } 265 266 func (txn *Txn) addCountMutation(ctx context.Context, t *pb.DirectedEdge, count uint32, 267 reverse bool) error { 268 key := x.CountKey(t.Attr, count, reverse) 269 plist, err := txn.cache.GetFromDelta(key) 270 if err != nil { 271 return err 272 } 273 274 x.AssertTruef(plist != nil, "plist is nil [%s] %d", 275 t.Attr, t.ValueId) 276 if err = plist.addMutation(ctx, txn, t); err != nil { 277 return err 278 } 279 ostats.Record(ctx, x.NumEdges.M(1)) 280 return nil 281 282 } 283 284 func (txn *Txn) updateCount(ctx context.Context, params countParams) error { 285 edge := pb.DirectedEdge{ 286 ValueId: params.entity, 287 Attr: params.attr, 288 Op: pb.DirectedEdge_DEL, 289 } 290 if err := txn.addCountMutation(ctx, &edge, uint32(params.countBefore), 291 params.reverse); err != nil { 292 return err 293 } 294 295 if params.countAfter > 0 { 296 edge.Op = pb.DirectedEdge_SET 297 if err := txn.addCountMutation(ctx, &edge, uint32(params.countAfter), 298 params.reverse); err != nil { 299 return err 300 } 301 } 302 return nil 303 } 304 305 func (txn *Txn) addMutationHelper(ctx context.Context, l *List, doUpdateIndex bool, 306 hasCountIndex bool, t *pb.DirectedEdge) (types.Val, bool, countParams, error) { 307 var val types.Val 308 var found bool 309 var err error 310 311 t1 := time.Now() 312 l.Lock() 313 defer l.Unlock() 314 315 if dur := time.Since(t1); dur > time.Millisecond { 316 span := otrace.FromContext(ctx) 317 span.Annotatef([]otrace.Attribute{otrace.BoolAttribute("slow-lock", true)}, 318 "Acquired lock %v %v %v", dur, t.Attr, t.Entity) 319 } 320 321 if err := l.canMutateUid(txn, t); err != nil { 322 return val, found, emptyCountParams, err 323 } 324 325 if doUpdateIndex { 326 // Check original value BEFORE any mutation actually happens. 327 val, found, err = l.findValue(txn.StartTs, fingerprintEdge(t)) 328 if err != nil { 329 return val, found, emptyCountParams, err 330 } 331 } 332 333 // If the predicate schema is not a list, ignore delete triples whose object is not a star or 334 // a value that does not match the existing value. 335 if !schema.State().IsList(t.Attr) && t.Op == pb.DirectedEdge_DEL && string(t.Value) != x.Star { 336 newPost := NewPosting(t) 337 pFound, currPost, err := l.findPosting(txn.StartTs, fingerprintEdge(t)) 338 if err != nil { 339 return val, found, emptyCountParams, err 340 } 341 342 // This is a scalar value of non-list type and a delete edge mutation, so if the value 343 // given by the user doesn't match the value we have, we return found to be false, to avoid 344 // deleting the uid from index posting list. 345 // This second check is required because we fingerprint the scalar values as math.MaxUint64, 346 // so even though they might be different the check in the doUpdateIndex block above would 347 // return found to be true. 348 if pFound && !(bytes.Equal(currPost.Value, newPost.Value) && 349 types.TypeID(currPost.ValType) == types.TypeID(newPost.ValType)) { 350 return val, false, emptyCountParams, nil 351 } 352 } 353 354 countBefore, countAfter := 0, 0 355 if hasCountIndex { 356 countBefore = l.length(txn.StartTs, 0) 357 if countBefore == -1 { 358 return val, found, emptyCountParams, ErrTsTooOld 359 } 360 } 361 if err = l.addMutationInternal(ctx, txn, t); err != nil { 362 return val, found, emptyCountParams, err 363 } 364 if hasCountIndex { 365 countAfter = l.length(txn.StartTs, 0) 366 if countAfter == -1 { 367 return val, found, emptyCountParams, ErrTsTooOld 368 } 369 return val, found, countParams{ 370 attr: t.Attr, 371 countBefore: countBefore, 372 countAfter: countAfter, 373 entity: t.Entity, 374 }, nil 375 } 376 return val, found, emptyCountParams, nil 377 } 378 379 // AddMutationWithIndex is addMutation with support for indexing. It also 380 // supports reverse edges. 381 func (l *List) AddMutationWithIndex(ctx context.Context, edge *pb.DirectedEdge, 382 txn *Txn) error { 383 if len(edge.Attr) == 0 { 384 return errors.Errorf("Predicate cannot be empty for edge with subject: [%v], object: [%v]"+ 385 " and value: [%v]", edge.Entity, edge.ValueId, edge.Value) 386 } 387 388 if edge.Op == pb.DirectedEdge_DEL && string(edge.Value) == x.Star { 389 return l.handleDeleteAll(ctx, edge, txn) 390 } 391 392 doUpdateIndex := pstore != nil && schema.State().IsIndexed(edge.Attr) 393 hasCountIndex := schema.State().HasCount(edge.Attr) 394 val, found, cp, err := txn.addMutationHelper(ctx, l, doUpdateIndex, hasCountIndex, edge) 395 if err != nil { 396 return err 397 } 398 ostats.Record(ctx, x.NumEdges.M(1)) 399 if hasCountIndex && cp.countAfter != cp.countBefore { 400 if err := txn.updateCount(ctx, cp); err != nil { 401 return err 402 } 403 } 404 if doUpdateIndex { 405 // Exact matches. 406 if found && val.Value != nil { 407 if err := txn.addIndexMutations(ctx, &indexMutationInfo{ 408 tokenizers: schema.State().Tokenizer(edge.Attr), 409 edge: edge, 410 val: val, 411 op: pb.DirectedEdge_DEL, 412 }); err != nil { 413 return err 414 } 415 } 416 if edge.Op == pb.DirectedEdge_SET { 417 val = types.Val{ 418 Tid: types.TypeID(edge.ValueType), 419 Value: edge.Value, 420 } 421 if err := txn.addIndexMutations(ctx, &indexMutationInfo{ 422 tokenizers: schema.State().Tokenizer(edge.Attr), 423 edge: edge, 424 val: val, 425 op: pb.DirectedEdge_SET, 426 }); err != nil { 427 return err 428 } 429 } 430 } 431 // Add reverse mutation irrespective of hasMutated, server crash can happen after 432 // mutation is synced and before reverse edge is synced 433 if (pstore != nil) && (edge.ValueId != 0) && schema.State().IsReversed(edge.Attr) { 434 if err := txn.addReverseMutation(ctx, edge); err != nil { 435 return err 436 } 437 } 438 return nil 439 } 440 441 // deleteTokensFor deletes the index for the given attribute and token. 442 func deleteTokensFor(attr, tokenizerName string) error { 443 pk := x.ParsedKey{Attr: attr} 444 prefix := pk.IndexPrefix() 445 tokenizer, ok := tok.GetTokenizer(tokenizerName) 446 if !ok { 447 return errors.Errorf("Could not find valid tokenizer for %s", tokenizerName) 448 } 449 prefix = append(prefix, tokenizer.Identifier()) 450 if err := pstore.DropPrefix(prefix); err != nil { 451 return err 452 } 453 454 // Also delete all the parts of any list that has been split into multiple parts. 455 // Such keys have a different prefix (the last byte is set to 1). 456 prefix = pk.IndexPrefix() 457 prefix[len(prefix)-1] = x.ByteSplit 458 prefix = append(prefix, tokenizer.Identifier()) 459 return pstore.DropPrefix(prefix) 460 } 461 462 func deleteReverseEdges(attr string) error { 463 pk := x.ParsedKey{Attr: attr} 464 prefix := pk.ReversePrefix() 465 if err := pstore.DropPrefix(prefix); err != nil { 466 return err 467 } 468 469 // Also delete all the parts of any list that has been split into multiple parts. 470 // Such keys have a different prefix (the last byte is set to 1). 471 prefix = pk.ReversePrefix() 472 prefix[len(prefix)-1] = x.ByteSplit 473 474 return pstore.DropPrefix(prefix) 475 } 476 477 func deleteCountIndex(attr string) error { 478 pk := x.ParsedKey{Attr: attr} 479 if err := pstore.DropPrefix(pk.CountPrefix(false)); err != nil { 480 return err 481 } 482 if err := pstore.DropPrefix(pk.CountPrefix(true)); err != nil { 483 return err 484 } 485 486 // Also delete all the parts of any list that has been split into multiple parts. 487 // Such keys have a different prefix (the last byte is set to 1). 488 prefix := pk.CountPrefix(false) 489 prefix[len(prefix)-1] = x.ByteSplit 490 if err := pstore.DropPrefix(prefix); err != nil { 491 return err 492 } 493 494 prefix = pk.CountPrefix(true) 495 prefix[len(prefix)-1] = x.ByteSplit 496 return pstore.DropPrefix(prefix) 497 } 498 499 // rebuilder handles the process of rebuilding an index. 500 type rebuilder struct { 501 attr string 502 prefix []byte 503 startTs uint64 504 505 // The posting list passed here is the on disk version. It is not coming 506 // from the LRU cache. 507 fn func(uid uint64, pl *List, txn *Txn) error 508 } 509 510 func (r *rebuilder) Run(ctx context.Context) error { 511 glog.V(1).Infof( 512 "Rebuilding index for predicate %s: Starting process. StartTs=%d. Prefix=\n%s\n", 513 r.attr, r.startTs, hex.Dump(r.prefix)) 514 515 // We create one txn for all the mutations to be housed in. We also create a 516 // localized posting list cache, to avoid stressing or mixing up with the 517 // global lcache (the LRU cache). 518 txn := NewTxn(r.startTs) 519 520 stream := pstore.NewStreamAt(r.startTs) 521 stream.LogPrefix = fmt.Sprintf("Rebuilding index for predicate %s:", r.attr) 522 stream.Prefix = r.prefix 523 stream.KeyToList = func(key []byte, itr *badger.Iterator) (*bpb.KVList, error) { 524 // We should return quickly if the context is no longer valid. 525 select { 526 case <-ctx.Done(): 527 return nil, ctx.Err() 528 default: 529 } 530 531 pk, err := x.Parse(key) 532 if err != nil { 533 return nil, errors.Wrapf(err, "could not parse key %s", hex.Dump(key)) 534 } 535 536 item := itr.Item() 537 keyCopy := item.KeyCopy(nil) 538 l, err := ReadPostingList(keyCopy, itr) 539 if err != nil { 540 return nil, err 541 } 542 if err := r.fn(pk.Uid, l, txn); err != nil { 543 return nil, err 544 } 545 546 return nil, nil 547 } 548 stream.Send = func(*bpb.KVList) error { 549 // The work of adding the index edges to the transaction is done by r.fn 550 // so this function doesn't have any work to do. 551 return nil 552 } 553 554 if err := stream.Orchestrate(ctx); err != nil { 555 return err 556 } 557 glog.V(1).Infof("Rebuilding index for predicate %s: Iteration done. Now committing at ts=%d\n", 558 r.attr, r.startTs) 559 560 // Convert data into deltas. 561 txn.Update() 562 563 // Now we write all the created posting lists to disk. 564 writer := NewTxnWriter(pstore) 565 counter := 0 566 numDeltas := len(txn.cache.deltas) 567 for key, delta := range txn.cache.deltas { 568 if len(delta) == 0 { 569 continue 570 } 571 // We choose to write the PL at r.startTs, so it won't be read by txns, 572 // which occurred before this schema mutation. Typically, we use 573 // kv.Version as the timestamp. 574 if err := writer.SetAt([]byte(key), delta, BitDeltaPosting, r.startTs); err != nil { 575 return err 576 } 577 578 counter++ 579 if counter%1e5 == 0 { 580 glog.V(1).Infof("Rebuilding index for predicate %s: wrote %d of %d deltas to disk.\n", 581 r.attr, counter, numDeltas) 582 } 583 } 584 glog.V(1).Infoln("Rebuild: Flushing all writes.") 585 return writer.Flush() 586 } 587 588 // IndexRebuild holds the info needed to initiate a rebuilt of the indices. 589 type IndexRebuild struct { 590 Attr string 591 StartTs uint64 592 OldSchema *pb.SchemaUpdate 593 CurrentSchema *pb.SchemaUpdate 594 } 595 596 type indexOp int 597 598 const ( 599 indexNoop indexOp = iota // Index should be left alone. 600 indexDelete = iota // Index should be deleted. 601 indexRebuild = iota // Index should be deleted and rebuilt. 602 ) 603 604 // Run rebuilds all indices that need it. 605 func (rb *IndexRebuild) Run(ctx context.Context) error { 606 if err := rebuildListType(ctx, rb); err != nil { 607 return err 608 } 609 if err := rebuildIndex(ctx, rb); err != nil { 610 return err 611 } 612 if err := rebuildReverseEdges(ctx, rb); err != nil { 613 return err 614 } 615 return rebuildCountIndex(ctx, rb) 616 } 617 618 type indexRebuildInfo struct { 619 op indexOp 620 tokenizersToDelete []string 621 tokenizersToRebuild []string 622 } 623 624 func (rb *IndexRebuild) needsIndexRebuild() indexRebuildInfo { 625 x.AssertTruef(rb.CurrentSchema != nil, "Current schema cannot be nil.") 626 627 // If the old schema is nil, we can treat it as an empty schema. Copy it 628 // first to avoid overwriting it in rb. 629 old := rb.OldSchema 630 if old == nil { 631 old = &pb.SchemaUpdate{} 632 } 633 634 currIndex := rb.CurrentSchema.Directive == pb.SchemaUpdate_INDEX 635 prevIndex := old.Directive == pb.SchemaUpdate_INDEX 636 637 // Index does not need to be rebuilt or deleted if the scheme directive 638 // did not require an index before and now. 639 if !currIndex && !prevIndex { 640 return indexRebuildInfo{ 641 op: indexNoop, 642 } 643 } 644 645 // Index only needs to be deleted if the schema directive changed and the 646 // new directive does not require an index. Predicate is not checking 647 // prevIndex since the previous if statement guarantees both values are 648 // different. 649 if !currIndex { 650 return indexRebuildInfo{ 651 op: indexDelete, 652 tokenizersToDelete: old.Tokenizer, 653 } 654 } 655 656 // All tokenizers in the index need to be deleted and rebuilt if the value 657 // types have changed. 658 if currIndex && rb.CurrentSchema.ValueType != old.ValueType { 659 return indexRebuildInfo{ 660 op: indexRebuild, 661 tokenizersToDelete: old.Tokenizer, 662 tokenizersToRebuild: rb.CurrentSchema.Tokenizer, 663 } 664 } 665 666 // Index needs to be rebuilt if the tokenizers have changed 667 prevTokens := make(map[string]struct{}) 668 for _, t := range old.Tokenizer { 669 prevTokens[t] = struct{}{} 670 } 671 currTokens := make(map[string]struct{}) 672 for _, t := range rb.CurrentSchema.Tokenizer { 673 currTokens[t] = struct{}{} 674 } 675 676 newTokenizers, deletedTokenizers := x.Diff(currTokens, prevTokens) 677 678 // If the tokenizers are the same, nothing needs to be done. 679 if len(newTokenizers) == 0 && len(deletedTokenizers) == 0 { 680 return indexRebuildInfo{ 681 op: indexNoop, 682 } 683 } 684 685 return indexRebuildInfo{ 686 op: indexRebuild, 687 tokenizersToDelete: deletedTokenizers, 688 tokenizersToRebuild: newTokenizers, 689 } 690 } 691 692 // rebuildIndex rebuilds index for a given attribute. 693 // We commit mutations with startTs and ignore the errors. 694 func rebuildIndex(ctx context.Context, rb *IndexRebuild) error { 695 // Exit early if indices do not need to be rebuilt. 696 rebuildInfo := rb.needsIndexRebuild() 697 698 if rebuildInfo.op == indexNoop { 699 return nil 700 } 701 702 glog.Infof("Deleting index for attr %s and tokenizers %s", rb.Attr, 703 rebuildInfo.tokenizersToDelete) 704 for _, tokenizer := range rebuildInfo.tokenizersToDelete { 705 if err := deleteTokensFor(rb.Attr, tokenizer); err != nil { 706 return err 707 } 708 } 709 710 // Exit early if the index only need to be deleted and not rebuilt. 711 if rebuildInfo.op == indexDelete { 712 return nil 713 } 714 715 // Exit early if there are no tokenizers to rebuild. 716 if len(rebuildInfo.tokenizersToRebuild) == 0 { 717 return nil 718 } 719 720 glog.Infof("Rebuilding index for attr %s and tokenizers %s", rb.Attr, 721 rebuildInfo.tokenizersToRebuild) 722 // Before rebuilding, the existing index needs to be deleted. 723 for _, tokenizer := range rebuildInfo.tokenizersToRebuild { 724 if err := deleteTokensFor(rb.Attr, tokenizer); err != nil { 725 return err 726 } 727 } 728 729 tokenizers, err := tok.GetTokenizers(rebuildInfo.tokenizersToRebuild) 730 if err != nil { 731 return err 732 } 733 734 pk := x.ParsedKey{Attr: rb.Attr} 735 builder := rebuilder{attr: rb.Attr, prefix: pk.DataPrefix(), startTs: rb.StartTs} 736 builder.fn = func(uid uint64, pl *List, txn *Txn) error { 737 edge := pb.DirectedEdge{Attr: rb.Attr, Entity: uid} 738 return pl.Iterate(txn.StartTs, 0, func(p *pb.Posting) error { 739 // Add index entries based on p. 740 val := types.Val{ 741 Value: p.Value, 742 Tid: types.TypeID(p.ValType), 743 } 744 745 for { 746 err := txn.addIndexMutations(ctx, &indexMutationInfo{ 747 tokenizers: tokenizers, 748 edge: &edge, 749 val: val, 750 op: pb.DirectedEdge_SET, 751 }) 752 switch err { 753 case ErrRetry: 754 time.Sleep(10 * time.Millisecond) 755 default: 756 return err 757 } 758 } 759 }) 760 } 761 return builder.Run(ctx) 762 } 763 764 func (rb *IndexRebuild) needsCountIndexRebuild() indexOp { 765 x.AssertTruef(rb.CurrentSchema != nil, "Current schema cannot be nil.") 766 767 // If the old schema is nil, treat it as an empty schema. Copy it to avoid 768 // overwriting it in rb. 769 old := rb.OldSchema 770 if old == nil { 771 old = &pb.SchemaUpdate{} 772 } 773 774 // Do nothing if the schema directive did not change. 775 if rb.CurrentSchema.Count == old.Count { 776 return indexNoop 777 778 } 779 780 // If the new schema does not require an index, delete the current index. 781 if !rb.CurrentSchema.Count { 782 return indexDelete 783 } 784 785 // Otherwise, the index needs to be rebuilt. 786 return indexRebuild 787 } 788 789 // rebuildCountIndex rebuilds the count index for a given attribute. 790 func rebuildCountIndex(ctx context.Context, rb *IndexRebuild) error { 791 op := rb.needsCountIndexRebuild() 792 if op == indexNoop { 793 return nil 794 } 795 796 glog.Infof("Deleting count index for %s", rb.Attr) 797 if err := deleteCountIndex(rb.Attr); err != nil { 798 return err 799 } 800 801 // Exit early if attribute is index only needed to be deleted. 802 if op == indexDelete { 803 return nil 804 } 805 806 glog.Infof("Rebuilding count index for %s", rb.Attr) 807 var reverse bool 808 fn := func(uid uint64, pl *List, txn *Txn) error { 809 t := &pb.DirectedEdge{ 810 ValueId: uid, 811 Attr: rb.Attr, 812 Op: pb.DirectedEdge_SET, 813 } 814 sz := pl.Length(rb.StartTs, 0) 815 if sz == -1 { 816 return nil 817 } 818 for { 819 err := txn.addCountMutation(ctx, t, uint32(sz), reverse) 820 switch err { 821 case ErrRetry: 822 time.Sleep(10 * time.Millisecond) 823 default: 824 return err 825 } 826 } 827 } 828 829 // Create the forward index. 830 pk := x.ParsedKey{Attr: rb.Attr} 831 builder := rebuilder{attr: rb.Attr, prefix: pk.DataPrefix(), startTs: rb.StartTs} 832 builder.fn = fn 833 if err := builder.Run(ctx); err != nil { 834 return err 835 } 836 837 // Create the reverse index. The count reverse index is created if this 838 // predicate has both a count and reverse directive in the schema. It's safe 839 // to call builder.Run even if that's not the case as the reverse prefix 840 // will be empty. 841 reverse = true 842 builder = rebuilder{attr: rb.Attr, prefix: pk.ReversePrefix(), startTs: rb.StartTs} 843 builder.fn = fn 844 return builder.Run(ctx) 845 } 846 847 func (rb *IndexRebuild) needsReverseEdgesRebuild() indexOp { 848 x.AssertTruef(rb.CurrentSchema != nil, "Current schema cannot be nil.") 849 850 // If old schema is nil, treat it as an empty schema. Copy it to avoid 851 // overwriting it in rb. 852 old := rb.OldSchema 853 if old == nil { 854 old = &pb.SchemaUpdate{} 855 } 856 857 currIndex := rb.CurrentSchema.Directive == pb.SchemaUpdate_REVERSE 858 prevIndex := old.Directive == pb.SchemaUpdate_REVERSE 859 860 // If the schema directive did not change, return indexNoop. 861 if currIndex == prevIndex { 862 return indexNoop 863 } 864 865 // If the current schema requires an index, index should be rebuilt. 866 if currIndex { 867 return indexRebuild 868 } 869 // Otherwise, index should only be deleted. 870 return indexDelete 871 } 872 873 // rebuildReverseEdges rebuilds the reverse edges for a given attribute. 874 func rebuildReverseEdges(ctx context.Context, rb *IndexRebuild) error { 875 op := rb.needsReverseEdgesRebuild() 876 if op == indexNoop { 877 return nil 878 } 879 880 glog.Infof("Deleting reverse index for %s", rb.Attr) 881 if err := deleteReverseEdges(rb.Attr); err != nil { 882 return err 883 } 884 885 // Exit early if index only needed to be deleted. 886 if op == indexDelete { 887 return nil 888 } 889 890 glog.Infof("Rebuilding reverse index for %s", rb.Attr) 891 pk := x.ParsedKey{Attr: rb.Attr} 892 builder := rebuilder{attr: rb.Attr, prefix: pk.DataPrefix(), startTs: rb.StartTs} 893 builder.fn = func(uid uint64, pl *List, txn *Txn) error { 894 edge := pb.DirectedEdge{Attr: rb.Attr, Entity: uid} 895 return pl.Iterate(txn.StartTs, 0, func(pp *pb.Posting) error { 896 puid := pp.Uid 897 // Add reverse entries based on p. 898 edge.ValueId = puid 899 edge.Op = pb.DirectedEdge_SET 900 edge.Facets = pp.Facets 901 edge.Label = pp.Label 902 903 for { 904 err := txn.addReverseMutation(ctx, &edge) 905 switch err { 906 case ErrRetry: 907 time.Sleep(10 * time.Millisecond) 908 default: 909 return err 910 } 911 } 912 }) 913 } 914 return builder.Run(ctx) 915 } 916 917 // needsListTypeRebuild returns true if the schema changed from a scalar to a 918 // list. It returns true if the index can be left as is. 919 func (rb *IndexRebuild) needsListTypeRebuild() (bool, error) { 920 x.AssertTruef(rb.CurrentSchema != nil, "Current schema cannot be nil.") 921 922 if rb.OldSchema == nil { 923 return false, nil 924 } 925 if rb.CurrentSchema.List && !rb.OldSchema.List { 926 return true, nil 927 } 928 if rb.OldSchema.List && !rb.CurrentSchema.List { 929 return false, errors.Errorf("Type can't be changed from list to scalar for attr: [%s]"+ 930 " without dropping it first.", rb.CurrentSchema.Predicate) 931 } 932 933 return false, nil 934 } 935 936 // rebuildListType rebuilds the index when the schema is changed from scalar to list type. 937 // We need to fingerprint the values to get the new ValueId. 938 func rebuildListType(ctx context.Context, rb *IndexRebuild) error { 939 if needsRebuild, err := rb.needsListTypeRebuild(); !needsRebuild || err != nil { 940 return err 941 } 942 943 pk := x.ParsedKey{Attr: rb.Attr} 944 builder := rebuilder{attr: rb.Attr, prefix: pk.DataPrefix(), startTs: rb.StartTs} 945 builder.fn = func(uid uint64, pl *List, txn *Txn) error { 946 var mpost *pb.Posting 947 err := pl.Iterate(txn.StartTs, 0, func(p *pb.Posting) error { 948 // We only want to modify the untagged value. There could be other values with a 949 // lang tag. 950 if p.Uid == math.MaxUint64 { 951 mpost = p 952 } 953 return nil 954 }) 955 if err != nil { 956 return err 957 } 958 if mpost == nil { 959 return nil 960 } 961 // Delete the old edge corresponding to ValueId math.MaxUint64 962 t := &pb.DirectedEdge{ 963 ValueId: mpost.Uid, 964 Attr: rb.Attr, 965 Op: pb.DirectedEdge_DEL, 966 } 967 968 // Ensure that list is in the cache run by txn. Otherwise, nothing would 969 // get updated. 970 pl = txn.cache.SetIfAbsent(string(pl.key), pl) 971 if err := pl.addMutation(ctx, txn, t); err != nil { 972 return err 973 } 974 // Add the new edge with the fingerprinted value id. 975 newEdge := &pb.DirectedEdge{ 976 Attr: rb.Attr, 977 Value: mpost.Value, 978 ValueType: mpost.ValType, 979 Op: pb.DirectedEdge_SET, 980 Label: mpost.Label, 981 Facets: mpost.Facets, 982 } 983 return pl.addMutation(ctx, txn, newEdge) 984 } 985 return builder.Run(ctx) 986 } 987 988 // DeleteAll deletes all entries in the posting list. 989 func DeleteAll() error { 990 return pstore.DropAll() 991 } 992 993 // DeleteData deletes all data but leaves types and schema intact. 994 func DeleteData() error { 995 return pstore.DropPrefix([]byte{x.DefaultPrefix}) 996 } 997 998 // DeletePredicate deletes all entries and indices for a given predicate. 999 func DeletePredicate(ctx context.Context, attr string) error { 1000 glog.Infof("Dropping predicate: [%s]", attr) 1001 prefix := x.PredicatePrefix(attr) 1002 if err := pstore.DropPrefix(prefix); err != nil { 1003 return err 1004 } 1005 1006 return schema.State().Delete(attr) 1007 }