github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/posting/index.go (about)

     1  /*
     2   * Copyright 2016-2018 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package posting
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"encoding/hex"
    23  	"fmt"
    24  	"math"
    25  	"time"
    26  
    27  	"github.com/golang/glog"
    28  	ostats "go.opencensus.io/stats"
    29  	otrace "go.opencensus.io/trace"
    30  
    31  	"github.com/dgraph-io/badger"
    32  	bpb "github.com/dgraph-io/badger/pb"
    33  	"github.com/dgraph-io/dgraph/protos/pb"
    34  	"github.com/dgraph-io/dgraph/schema"
    35  	"github.com/dgraph-io/dgraph/tok"
    36  	"github.com/dgraph-io/dgraph/types"
    37  	"github.com/dgraph-io/dgraph/x"
    38  	"github.com/pkg/errors"
    39  )
    40  
    41  var emptyCountParams countParams
    42  
    43  type indexMutationInfo struct {
    44  	tokenizers []tok.Tokenizer
    45  	edge       *pb.DirectedEdge // Represents the original uid -> value edge.
    46  	val        types.Val
    47  	op         pb.DirectedEdge_Op
    48  }
    49  
    50  // indexTokensforTokenizers return tokens, without the predicate prefix and
    51  // index rune, for specific tokenizers.
    52  func indexTokens(info *indexMutationInfo) ([]string, error) {
    53  	attr := info.edge.Attr
    54  	lang := info.edge.GetLang()
    55  
    56  	schemaType, err := schema.State().TypeOf(attr)
    57  	if err != nil || !schemaType.IsScalar() {
    58  		return nil, errors.Errorf("Cannot index attribute %s of type object.", attr)
    59  	}
    60  
    61  	if !schema.State().IsIndexed(attr) {
    62  		return nil, errors.Errorf("Attribute %s is not indexed.", attr)
    63  	}
    64  	sv, err := types.Convert(info.val, schemaType)
    65  	if err != nil {
    66  		return nil, err
    67  	}
    68  
    69  	var tokens []string
    70  	for _, it := range info.tokenizers {
    71  		toks, err := tok.BuildTokens(sv.Value, tok.GetLangTokenizer(it, lang))
    72  		if err != nil {
    73  			return tokens, err
    74  		}
    75  		tokens = append(tokens, toks...)
    76  	}
    77  	return tokens, nil
    78  }
    79  
    80  // addIndexMutations adds mutation(s) for a single term, to maintain the index,
    81  // but only for the given tokenizers.
    82  // TODO - See if we need to pass op as argument as t should already have Op.
    83  func (txn *Txn) addIndexMutations(ctx context.Context, info *indexMutationInfo) error {
    84  	if info.tokenizers == nil {
    85  		info.tokenizers = schema.State().Tokenizer(info.edge.Attr)
    86  	}
    87  
    88  	attr := info.edge.Attr
    89  	uid := info.edge.Entity
    90  	x.AssertTrue(uid != 0)
    91  	tokens, err := indexTokens(info)
    92  
    93  	if err != nil {
    94  		// This data is not indexable
    95  		return err
    96  	}
    97  
    98  	// Create a value token -> uid edge.
    99  	edge := &pb.DirectedEdge{
   100  		ValueId: uid,
   101  		Attr:    attr,
   102  		Op:      info.op,
   103  	}
   104  
   105  	for _, token := range tokens {
   106  		if err := txn.addIndexMutation(ctx, edge, token); err != nil {
   107  			return err
   108  		}
   109  	}
   110  	return nil
   111  }
   112  
   113  func (txn *Txn) addIndexMutation(ctx context.Context, edge *pb.DirectedEdge,
   114  	token string) error {
   115  	key := x.IndexKey(edge.Attr, token)
   116  
   117  	plist, err := txn.cache.GetFromDelta(key)
   118  	if err != nil {
   119  		return err
   120  	}
   121  
   122  	x.AssertTrue(plist != nil)
   123  	if err = plist.addMutation(ctx, txn, edge); err != nil {
   124  		return err
   125  	}
   126  	ostats.Record(ctx, x.NumEdges.M(1))
   127  	return nil
   128  }
   129  
   130  // countParams is sent to updateCount function. It is used to update the count index.
   131  // It deletes the uid from the key corresponding to <attr, countBefore> and adds it
   132  // to <attr, countAfter>.
   133  type countParams struct {
   134  	attr        string
   135  	countBefore int
   136  	countAfter  int
   137  	entity      uint64
   138  	reverse     bool
   139  }
   140  
   141  func (txn *Txn) addReverseMutationHelper(ctx context.Context, plist *List,
   142  	hasCountIndex bool, edge *pb.DirectedEdge) (countParams, error) {
   143  	countBefore, countAfter := 0, 0
   144  
   145  	if hasCountIndex {
   146  		countBefore = plist.Length(txn.StartTs, 0)
   147  		if countBefore == -1 {
   148  			return emptyCountParams, ErrTsTooOld
   149  		}
   150  	}
   151  	if err := plist.addMutation(ctx, txn, edge); err != nil {
   152  		return emptyCountParams, err
   153  	}
   154  	if hasCountIndex {
   155  		countAfter = plist.Length(txn.StartTs, 0)
   156  		if countAfter == -1 {
   157  			return emptyCountParams, ErrTsTooOld
   158  		}
   159  		return countParams{
   160  			attr:        edge.Attr,
   161  			countBefore: countBefore,
   162  			countAfter:  countAfter,
   163  			entity:      edge.Entity,
   164  			reverse:     true,
   165  		}, nil
   166  	}
   167  	return emptyCountParams, nil
   168  }
   169  
   170  func (txn *Txn) addReverseMutation(ctx context.Context, t *pb.DirectedEdge) error {
   171  	key := x.ReverseKey(t.Attr, t.ValueId)
   172  	hasCountIndex := schema.State().HasCount(t.Attr)
   173  
   174  	var getFn func(key []byte) (*List, error)
   175  	if hasCountIndex {
   176  		// We need to retrieve the full posting list from disk, to allow us to get the length of the
   177  		// posting list for the counts.
   178  		getFn = txn.Get
   179  	} else {
   180  		// We are just adding a reverse edge. No need to read the list from disk.
   181  		getFn = txn.GetFromDelta
   182  	}
   183  	plist, err := getFn(key)
   184  	if err != nil {
   185  		return err
   186  	}
   187  
   188  	x.AssertTrue(plist != nil)
   189  	// We must create a copy here.
   190  	edge := &pb.DirectedEdge{
   191  		Entity:  t.ValueId,
   192  		ValueId: t.Entity,
   193  		Attr:    t.Attr,
   194  		Op:      t.Op,
   195  		Facets:  t.Facets,
   196  	}
   197  
   198  	cp, err := txn.addReverseMutationHelper(ctx, plist, hasCountIndex, edge)
   199  	if err != nil {
   200  		return err
   201  	}
   202  	ostats.Record(ctx, x.NumEdges.M(1))
   203  
   204  	if hasCountIndex && cp.countAfter != cp.countBefore {
   205  		if err := txn.updateCount(ctx, cp); err != nil {
   206  			return err
   207  		}
   208  	}
   209  	return nil
   210  }
   211  
   212  func (l *List) handleDeleteAll(ctx context.Context, edge *pb.DirectedEdge,
   213  	txn *Txn) error {
   214  	isReversed := schema.State().IsReversed(edge.Attr)
   215  	isIndexed := schema.State().IsIndexed(edge.Attr)
   216  	hasCount := schema.State().HasCount(edge.Attr)
   217  	delEdge := &pb.DirectedEdge{
   218  		Attr:   edge.Attr,
   219  		Op:     edge.Op,
   220  		Entity: edge.Entity,
   221  	}
   222  	// To calculate length of posting list. Used for deletion of count index.
   223  	var plen int
   224  	err := l.Iterate(txn.StartTs, 0, func(p *pb.Posting) error {
   225  		plen++
   226  		switch {
   227  		case isReversed:
   228  			// Delete reverse edge for each posting.
   229  			delEdge.ValueId = p.Uid
   230  			return txn.addReverseMutation(ctx, delEdge)
   231  		case isIndexed:
   232  			// Delete index edge of each posting.
   233  			val := types.Val{
   234  				Tid:   types.TypeID(p.ValType),
   235  				Value: p.Value,
   236  			}
   237  			return txn.addIndexMutations(ctx, &indexMutationInfo{
   238  				tokenizers: schema.State().Tokenizer(edge.Attr),
   239  				edge:       edge,
   240  				val:        val,
   241  				op:         pb.DirectedEdge_DEL,
   242  			})
   243  		default:
   244  			return nil
   245  		}
   246  	})
   247  	if err != nil {
   248  		return err
   249  	}
   250  	if hasCount {
   251  		// Delete uid from count index. Deletion of reverses is taken care by addReverseMutation
   252  		// above.
   253  		if err := txn.updateCount(ctx, countParams{
   254  			attr:        edge.Attr,
   255  			countBefore: plen,
   256  			countAfter:  0,
   257  			entity:      edge.Entity,
   258  		}); err != nil {
   259  			return err
   260  		}
   261  	}
   262  
   263  	return l.addMutation(ctx, txn, edge)
   264  }
   265  
   266  func (txn *Txn) addCountMutation(ctx context.Context, t *pb.DirectedEdge, count uint32,
   267  	reverse bool) error {
   268  	key := x.CountKey(t.Attr, count, reverse)
   269  	plist, err := txn.cache.GetFromDelta(key)
   270  	if err != nil {
   271  		return err
   272  	}
   273  
   274  	x.AssertTruef(plist != nil, "plist is nil [%s] %d",
   275  		t.Attr, t.ValueId)
   276  	if err = plist.addMutation(ctx, txn, t); err != nil {
   277  		return err
   278  	}
   279  	ostats.Record(ctx, x.NumEdges.M(1))
   280  	return nil
   281  
   282  }
   283  
   284  func (txn *Txn) updateCount(ctx context.Context, params countParams) error {
   285  	edge := pb.DirectedEdge{
   286  		ValueId: params.entity,
   287  		Attr:    params.attr,
   288  		Op:      pb.DirectedEdge_DEL,
   289  	}
   290  	if err := txn.addCountMutation(ctx, &edge, uint32(params.countBefore),
   291  		params.reverse); err != nil {
   292  		return err
   293  	}
   294  
   295  	if params.countAfter > 0 {
   296  		edge.Op = pb.DirectedEdge_SET
   297  		if err := txn.addCountMutation(ctx, &edge, uint32(params.countAfter),
   298  			params.reverse); err != nil {
   299  			return err
   300  		}
   301  	}
   302  	return nil
   303  }
   304  
   305  func (txn *Txn) addMutationHelper(ctx context.Context, l *List, doUpdateIndex bool,
   306  	hasCountIndex bool, t *pb.DirectedEdge) (types.Val, bool, countParams, error) {
   307  	var val types.Val
   308  	var found bool
   309  	var err error
   310  
   311  	t1 := time.Now()
   312  	l.Lock()
   313  	defer l.Unlock()
   314  
   315  	if dur := time.Since(t1); dur > time.Millisecond {
   316  		span := otrace.FromContext(ctx)
   317  		span.Annotatef([]otrace.Attribute{otrace.BoolAttribute("slow-lock", true)},
   318  			"Acquired lock %v %v %v", dur, t.Attr, t.Entity)
   319  	}
   320  
   321  	if err := l.canMutateUid(txn, t); err != nil {
   322  		return val, found, emptyCountParams, err
   323  	}
   324  
   325  	if doUpdateIndex {
   326  		// Check original value BEFORE any mutation actually happens.
   327  		val, found, err = l.findValue(txn.StartTs, fingerprintEdge(t))
   328  		if err != nil {
   329  			return val, found, emptyCountParams, err
   330  		}
   331  	}
   332  
   333  	// If the predicate schema is not a list, ignore delete triples whose object is not a star or
   334  	// a value that does not match the existing value.
   335  	if !schema.State().IsList(t.Attr) && t.Op == pb.DirectedEdge_DEL && string(t.Value) != x.Star {
   336  		newPost := NewPosting(t)
   337  		pFound, currPost, err := l.findPosting(txn.StartTs, fingerprintEdge(t))
   338  		if err != nil {
   339  			return val, found, emptyCountParams, err
   340  		}
   341  
   342  		// This is a scalar value of non-list type and a delete edge mutation, so if the value
   343  		// given by the user doesn't match the value we have, we return found to be false, to avoid
   344  		// deleting the uid from index posting list.
   345  		// This second check is required because we fingerprint the scalar values as math.MaxUint64,
   346  		// so even though they might be different the check in the doUpdateIndex block above would
   347  		// return found to be true.
   348  		if pFound && !(bytes.Equal(currPost.Value, newPost.Value) &&
   349  			types.TypeID(currPost.ValType) == types.TypeID(newPost.ValType)) {
   350  			return val, false, emptyCountParams, nil
   351  		}
   352  	}
   353  
   354  	countBefore, countAfter := 0, 0
   355  	if hasCountIndex {
   356  		countBefore = l.length(txn.StartTs, 0)
   357  		if countBefore == -1 {
   358  			return val, found, emptyCountParams, ErrTsTooOld
   359  		}
   360  	}
   361  	if err = l.addMutationInternal(ctx, txn, t); err != nil {
   362  		return val, found, emptyCountParams, err
   363  	}
   364  	if hasCountIndex {
   365  		countAfter = l.length(txn.StartTs, 0)
   366  		if countAfter == -1 {
   367  			return val, found, emptyCountParams, ErrTsTooOld
   368  		}
   369  		return val, found, countParams{
   370  			attr:        t.Attr,
   371  			countBefore: countBefore,
   372  			countAfter:  countAfter,
   373  			entity:      t.Entity,
   374  		}, nil
   375  	}
   376  	return val, found, emptyCountParams, nil
   377  }
   378  
   379  // AddMutationWithIndex is addMutation with support for indexing. It also
   380  // supports reverse edges.
   381  func (l *List) AddMutationWithIndex(ctx context.Context, edge *pb.DirectedEdge,
   382  	txn *Txn) error {
   383  	if len(edge.Attr) == 0 {
   384  		return errors.Errorf("Predicate cannot be empty for edge with subject: [%v], object: [%v]"+
   385  			" and value: [%v]", edge.Entity, edge.ValueId, edge.Value)
   386  	}
   387  
   388  	if edge.Op == pb.DirectedEdge_DEL && string(edge.Value) == x.Star {
   389  		return l.handleDeleteAll(ctx, edge, txn)
   390  	}
   391  
   392  	doUpdateIndex := pstore != nil && schema.State().IsIndexed(edge.Attr)
   393  	hasCountIndex := schema.State().HasCount(edge.Attr)
   394  	val, found, cp, err := txn.addMutationHelper(ctx, l, doUpdateIndex, hasCountIndex, edge)
   395  	if err != nil {
   396  		return err
   397  	}
   398  	ostats.Record(ctx, x.NumEdges.M(1))
   399  	if hasCountIndex && cp.countAfter != cp.countBefore {
   400  		if err := txn.updateCount(ctx, cp); err != nil {
   401  			return err
   402  		}
   403  	}
   404  	if doUpdateIndex {
   405  		// Exact matches.
   406  		if found && val.Value != nil {
   407  			if err := txn.addIndexMutations(ctx, &indexMutationInfo{
   408  				tokenizers: schema.State().Tokenizer(edge.Attr),
   409  				edge:       edge,
   410  				val:        val,
   411  				op:         pb.DirectedEdge_DEL,
   412  			}); err != nil {
   413  				return err
   414  			}
   415  		}
   416  		if edge.Op == pb.DirectedEdge_SET {
   417  			val = types.Val{
   418  				Tid:   types.TypeID(edge.ValueType),
   419  				Value: edge.Value,
   420  			}
   421  			if err := txn.addIndexMutations(ctx, &indexMutationInfo{
   422  				tokenizers: schema.State().Tokenizer(edge.Attr),
   423  				edge:       edge,
   424  				val:        val,
   425  				op:         pb.DirectedEdge_SET,
   426  			}); err != nil {
   427  				return err
   428  			}
   429  		}
   430  	}
   431  	// Add reverse mutation irrespective of hasMutated, server crash can happen after
   432  	// mutation is synced and before reverse edge is synced
   433  	if (pstore != nil) && (edge.ValueId != 0) && schema.State().IsReversed(edge.Attr) {
   434  		if err := txn.addReverseMutation(ctx, edge); err != nil {
   435  			return err
   436  		}
   437  	}
   438  	return nil
   439  }
   440  
   441  // deleteTokensFor deletes the index for the given attribute and token.
   442  func deleteTokensFor(attr, tokenizerName string) error {
   443  	pk := x.ParsedKey{Attr: attr}
   444  	prefix := pk.IndexPrefix()
   445  	tokenizer, ok := tok.GetTokenizer(tokenizerName)
   446  	if !ok {
   447  		return errors.Errorf("Could not find valid tokenizer for %s", tokenizerName)
   448  	}
   449  	prefix = append(prefix, tokenizer.Identifier())
   450  	if err := pstore.DropPrefix(prefix); err != nil {
   451  		return err
   452  	}
   453  
   454  	// Also delete all the parts of any list that has been split into multiple parts.
   455  	// Such keys have a different prefix (the last byte is set to 1).
   456  	prefix = pk.IndexPrefix()
   457  	prefix[len(prefix)-1] = x.ByteSplit
   458  	prefix = append(prefix, tokenizer.Identifier())
   459  	return pstore.DropPrefix(prefix)
   460  }
   461  
   462  func deleteReverseEdges(attr string) error {
   463  	pk := x.ParsedKey{Attr: attr}
   464  	prefix := pk.ReversePrefix()
   465  	if err := pstore.DropPrefix(prefix); err != nil {
   466  		return err
   467  	}
   468  
   469  	// Also delete all the parts of any list that has been split into multiple parts.
   470  	// Such keys have a different prefix (the last byte is set to 1).
   471  	prefix = pk.ReversePrefix()
   472  	prefix[len(prefix)-1] = x.ByteSplit
   473  
   474  	return pstore.DropPrefix(prefix)
   475  }
   476  
   477  func deleteCountIndex(attr string) error {
   478  	pk := x.ParsedKey{Attr: attr}
   479  	if err := pstore.DropPrefix(pk.CountPrefix(false)); err != nil {
   480  		return err
   481  	}
   482  	if err := pstore.DropPrefix(pk.CountPrefix(true)); err != nil {
   483  		return err
   484  	}
   485  
   486  	// Also delete all the parts of any list that has been split into multiple parts.
   487  	// Such keys have a different prefix (the last byte is set to 1).
   488  	prefix := pk.CountPrefix(false)
   489  	prefix[len(prefix)-1] = x.ByteSplit
   490  	if err := pstore.DropPrefix(prefix); err != nil {
   491  		return err
   492  	}
   493  
   494  	prefix = pk.CountPrefix(true)
   495  	prefix[len(prefix)-1] = x.ByteSplit
   496  	return pstore.DropPrefix(prefix)
   497  }
   498  
   499  // rebuilder handles the process of rebuilding an index.
   500  type rebuilder struct {
   501  	attr    string
   502  	prefix  []byte
   503  	startTs uint64
   504  
   505  	// The posting list passed here is the on disk version. It is not coming
   506  	// from the LRU cache.
   507  	fn func(uid uint64, pl *List, txn *Txn) error
   508  }
   509  
   510  func (r *rebuilder) Run(ctx context.Context) error {
   511  	glog.V(1).Infof(
   512  		"Rebuilding index for predicate %s: Starting process. StartTs=%d. Prefix=\n%s\n",
   513  		r.attr, r.startTs, hex.Dump(r.prefix))
   514  
   515  	// We create one txn for all the mutations to be housed in. We also create a
   516  	// localized posting list cache, to avoid stressing or mixing up with the
   517  	// global lcache (the LRU cache).
   518  	txn := NewTxn(r.startTs)
   519  
   520  	stream := pstore.NewStreamAt(r.startTs)
   521  	stream.LogPrefix = fmt.Sprintf("Rebuilding index for predicate %s:", r.attr)
   522  	stream.Prefix = r.prefix
   523  	stream.KeyToList = func(key []byte, itr *badger.Iterator) (*bpb.KVList, error) {
   524  		// We should return quickly if the context is no longer valid.
   525  		select {
   526  		case <-ctx.Done():
   527  			return nil, ctx.Err()
   528  		default:
   529  		}
   530  
   531  		pk, err := x.Parse(key)
   532  		if err != nil {
   533  			return nil, errors.Wrapf(err, "could not parse key %s", hex.Dump(key))
   534  		}
   535  
   536  		item := itr.Item()
   537  		keyCopy := item.KeyCopy(nil)
   538  		l, err := ReadPostingList(keyCopy, itr)
   539  		if err != nil {
   540  			return nil, err
   541  		}
   542  		if err := r.fn(pk.Uid, l, txn); err != nil {
   543  			return nil, err
   544  		}
   545  
   546  		return nil, nil
   547  	}
   548  	stream.Send = func(*bpb.KVList) error {
   549  		// The work of adding the index edges to the transaction is done by r.fn
   550  		// so this function doesn't have any work to do.
   551  		return nil
   552  	}
   553  
   554  	if err := stream.Orchestrate(ctx); err != nil {
   555  		return err
   556  	}
   557  	glog.V(1).Infof("Rebuilding index for predicate %s: Iteration done. Now committing at ts=%d\n",
   558  		r.attr, r.startTs)
   559  
   560  	// Convert data into deltas.
   561  	txn.Update()
   562  
   563  	// Now we write all the created posting lists to disk.
   564  	writer := NewTxnWriter(pstore)
   565  	counter := 0
   566  	numDeltas := len(txn.cache.deltas)
   567  	for key, delta := range txn.cache.deltas {
   568  		if len(delta) == 0 {
   569  			continue
   570  		}
   571  		// We choose to write the PL at r.startTs, so it won't be read by txns,
   572  		// which occurred before this schema mutation. Typically, we use
   573  		// kv.Version as the timestamp.
   574  		if err := writer.SetAt([]byte(key), delta, BitDeltaPosting, r.startTs); err != nil {
   575  			return err
   576  		}
   577  
   578  		counter++
   579  		if counter%1e5 == 0 {
   580  			glog.V(1).Infof("Rebuilding index for predicate %s: wrote %d of %d deltas to disk.\n",
   581  				r.attr, counter, numDeltas)
   582  		}
   583  	}
   584  	glog.V(1).Infoln("Rebuild: Flushing all writes.")
   585  	return writer.Flush()
   586  }
   587  
   588  // IndexRebuild holds the info needed to initiate a rebuilt of the indices.
   589  type IndexRebuild struct {
   590  	Attr          string
   591  	StartTs       uint64
   592  	OldSchema     *pb.SchemaUpdate
   593  	CurrentSchema *pb.SchemaUpdate
   594  }
   595  
   596  type indexOp int
   597  
   598  const (
   599  	indexNoop    indexOp = iota // Index should be left alone.
   600  	indexDelete          = iota // Index should be deleted.
   601  	indexRebuild         = iota // Index should be deleted and rebuilt.
   602  )
   603  
   604  // Run rebuilds all indices that need it.
   605  func (rb *IndexRebuild) Run(ctx context.Context) error {
   606  	if err := rebuildListType(ctx, rb); err != nil {
   607  		return err
   608  	}
   609  	if err := rebuildIndex(ctx, rb); err != nil {
   610  		return err
   611  	}
   612  	if err := rebuildReverseEdges(ctx, rb); err != nil {
   613  		return err
   614  	}
   615  	return rebuildCountIndex(ctx, rb)
   616  }
   617  
   618  type indexRebuildInfo struct {
   619  	op                  indexOp
   620  	tokenizersToDelete  []string
   621  	tokenizersToRebuild []string
   622  }
   623  
   624  func (rb *IndexRebuild) needsIndexRebuild() indexRebuildInfo {
   625  	x.AssertTruef(rb.CurrentSchema != nil, "Current schema cannot be nil.")
   626  
   627  	// If the old schema is nil, we can treat it as an empty schema. Copy it
   628  	// first to avoid overwriting it in rb.
   629  	old := rb.OldSchema
   630  	if old == nil {
   631  		old = &pb.SchemaUpdate{}
   632  	}
   633  
   634  	currIndex := rb.CurrentSchema.Directive == pb.SchemaUpdate_INDEX
   635  	prevIndex := old.Directive == pb.SchemaUpdate_INDEX
   636  
   637  	// Index does not need to be rebuilt or deleted if the scheme directive
   638  	// did not require an index before and now.
   639  	if !currIndex && !prevIndex {
   640  		return indexRebuildInfo{
   641  			op: indexNoop,
   642  		}
   643  	}
   644  
   645  	// Index only needs to be deleted if the schema directive changed and the
   646  	// new directive does not require an index. Predicate is not checking
   647  	// prevIndex since the previous if statement guarantees both values are
   648  	// different.
   649  	if !currIndex {
   650  		return indexRebuildInfo{
   651  			op:                 indexDelete,
   652  			tokenizersToDelete: old.Tokenizer,
   653  		}
   654  	}
   655  
   656  	// All tokenizers in the index need to be deleted and rebuilt if the value
   657  	// types have changed.
   658  	if currIndex && rb.CurrentSchema.ValueType != old.ValueType {
   659  		return indexRebuildInfo{
   660  			op:                  indexRebuild,
   661  			tokenizersToDelete:  old.Tokenizer,
   662  			tokenizersToRebuild: rb.CurrentSchema.Tokenizer,
   663  		}
   664  	}
   665  
   666  	// Index needs to be rebuilt if the tokenizers have changed
   667  	prevTokens := make(map[string]struct{})
   668  	for _, t := range old.Tokenizer {
   669  		prevTokens[t] = struct{}{}
   670  	}
   671  	currTokens := make(map[string]struct{})
   672  	for _, t := range rb.CurrentSchema.Tokenizer {
   673  		currTokens[t] = struct{}{}
   674  	}
   675  
   676  	newTokenizers, deletedTokenizers := x.Diff(currTokens, prevTokens)
   677  
   678  	// If the tokenizers are the same, nothing needs to be done.
   679  	if len(newTokenizers) == 0 && len(deletedTokenizers) == 0 {
   680  		return indexRebuildInfo{
   681  			op: indexNoop,
   682  		}
   683  	}
   684  
   685  	return indexRebuildInfo{
   686  		op:                  indexRebuild,
   687  		tokenizersToDelete:  deletedTokenizers,
   688  		tokenizersToRebuild: newTokenizers,
   689  	}
   690  }
   691  
   692  // rebuildIndex rebuilds index for a given attribute.
   693  // We commit mutations with startTs and ignore the errors.
   694  func rebuildIndex(ctx context.Context, rb *IndexRebuild) error {
   695  	// Exit early if indices do not need to be rebuilt.
   696  	rebuildInfo := rb.needsIndexRebuild()
   697  
   698  	if rebuildInfo.op == indexNoop {
   699  		return nil
   700  	}
   701  
   702  	glog.Infof("Deleting index for attr %s and tokenizers %s", rb.Attr,
   703  		rebuildInfo.tokenizersToDelete)
   704  	for _, tokenizer := range rebuildInfo.tokenizersToDelete {
   705  		if err := deleteTokensFor(rb.Attr, tokenizer); err != nil {
   706  			return err
   707  		}
   708  	}
   709  
   710  	// Exit early if the index only need to be deleted and not rebuilt.
   711  	if rebuildInfo.op == indexDelete {
   712  		return nil
   713  	}
   714  
   715  	// Exit early if there are no tokenizers to rebuild.
   716  	if len(rebuildInfo.tokenizersToRebuild) == 0 {
   717  		return nil
   718  	}
   719  
   720  	glog.Infof("Rebuilding index for attr %s and tokenizers %s", rb.Attr,
   721  		rebuildInfo.tokenizersToRebuild)
   722  	// Before rebuilding, the existing index needs to be deleted.
   723  	for _, tokenizer := range rebuildInfo.tokenizersToRebuild {
   724  		if err := deleteTokensFor(rb.Attr, tokenizer); err != nil {
   725  			return err
   726  		}
   727  	}
   728  
   729  	tokenizers, err := tok.GetTokenizers(rebuildInfo.tokenizersToRebuild)
   730  	if err != nil {
   731  		return err
   732  	}
   733  
   734  	pk := x.ParsedKey{Attr: rb.Attr}
   735  	builder := rebuilder{attr: rb.Attr, prefix: pk.DataPrefix(), startTs: rb.StartTs}
   736  	builder.fn = func(uid uint64, pl *List, txn *Txn) error {
   737  		edge := pb.DirectedEdge{Attr: rb.Attr, Entity: uid}
   738  		return pl.Iterate(txn.StartTs, 0, func(p *pb.Posting) error {
   739  			// Add index entries based on p.
   740  			val := types.Val{
   741  				Value: p.Value,
   742  				Tid:   types.TypeID(p.ValType),
   743  			}
   744  
   745  			for {
   746  				err := txn.addIndexMutations(ctx, &indexMutationInfo{
   747  					tokenizers: tokenizers,
   748  					edge:       &edge,
   749  					val:        val,
   750  					op:         pb.DirectedEdge_SET,
   751  				})
   752  				switch err {
   753  				case ErrRetry:
   754  					time.Sleep(10 * time.Millisecond)
   755  				default:
   756  					return err
   757  				}
   758  			}
   759  		})
   760  	}
   761  	return builder.Run(ctx)
   762  }
   763  
   764  func (rb *IndexRebuild) needsCountIndexRebuild() indexOp {
   765  	x.AssertTruef(rb.CurrentSchema != nil, "Current schema cannot be nil.")
   766  
   767  	// If the old schema is nil, treat it as an empty schema. Copy it to avoid
   768  	// overwriting it in rb.
   769  	old := rb.OldSchema
   770  	if old == nil {
   771  		old = &pb.SchemaUpdate{}
   772  	}
   773  
   774  	// Do nothing if the schema directive did not change.
   775  	if rb.CurrentSchema.Count == old.Count {
   776  		return indexNoop
   777  
   778  	}
   779  
   780  	// If the new schema does not require an index, delete the current index.
   781  	if !rb.CurrentSchema.Count {
   782  		return indexDelete
   783  	}
   784  
   785  	// Otherwise, the index needs to be rebuilt.
   786  	return indexRebuild
   787  }
   788  
   789  // rebuildCountIndex rebuilds the count index for a given attribute.
   790  func rebuildCountIndex(ctx context.Context, rb *IndexRebuild) error {
   791  	op := rb.needsCountIndexRebuild()
   792  	if op == indexNoop {
   793  		return nil
   794  	}
   795  
   796  	glog.Infof("Deleting count index for %s", rb.Attr)
   797  	if err := deleteCountIndex(rb.Attr); err != nil {
   798  		return err
   799  	}
   800  
   801  	// Exit early if attribute is index only needed to be deleted.
   802  	if op == indexDelete {
   803  		return nil
   804  	}
   805  
   806  	glog.Infof("Rebuilding count index for %s", rb.Attr)
   807  	var reverse bool
   808  	fn := func(uid uint64, pl *List, txn *Txn) error {
   809  		t := &pb.DirectedEdge{
   810  			ValueId: uid,
   811  			Attr:    rb.Attr,
   812  			Op:      pb.DirectedEdge_SET,
   813  		}
   814  		sz := pl.Length(rb.StartTs, 0)
   815  		if sz == -1 {
   816  			return nil
   817  		}
   818  		for {
   819  			err := txn.addCountMutation(ctx, t, uint32(sz), reverse)
   820  			switch err {
   821  			case ErrRetry:
   822  				time.Sleep(10 * time.Millisecond)
   823  			default:
   824  				return err
   825  			}
   826  		}
   827  	}
   828  
   829  	// Create the forward index.
   830  	pk := x.ParsedKey{Attr: rb.Attr}
   831  	builder := rebuilder{attr: rb.Attr, prefix: pk.DataPrefix(), startTs: rb.StartTs}
   832  	builder.fn = fn
   833  	if err := builder.Run(ctx); err != nil {
   834  		return err
   835  	}
   836  
   837  	// Create the reverse index. The count reverse index is created if this
   838  	// predicate has both a count and reverse directive in the schema. It's safe
   839  	// to call builder.Run even if that's not the case as the reverse prefix
   840  	// will be empty.
   841  	reverse = true
   842  	builder = rebuilder{attr: rb.Attr, prefix: pk.ReversePrefix(), startTs: rb.StartTs}
   843  	builder.fn = fn
   844  	return builder.Run(ctx)
   845  }
   846  
   847  func (rb *IndexRebuild) needsReverseEdgesRebuild() indexOp {
   848  	x.AssertTruef(rb.CurrentSchema != nil, "Current schema cannot be nil.")
   849  
   850  	// If old schema is nil, treat it as an empty schema. Copy it to avoid
   851  	// overwriting it in rb.
   852  	old := rb.OldSchema
   853  	if old == nil {
   854  		old = &pb.SchemaUpdate{}
   855  	}
   856  
   857  	currIndex := rb.CurrentSchema.Directive == pb.SchemaUpdate_REVERSE
   858  	prevIndex := old.Directive == pb.SchemaUpdate_REVERSE
   859  
   860  	// If the schema directive did not change, return indexNoop.
   861  	if currIndex == prevIndex {
   862  		return indexNoop
   863  	}
   864  
   865  	// If the current schema requires an index, index should be rebuilt.
   866  	if currIndex {
   867  		return indexRebuild
   868  	}
   869  	// Otherwise, index should only be deleted.
   870  	return indexDelete
   871  }
   872  
   873  // rebuildReverseEdges rebuilds the reverse edges for a given attribute.
   874  func rebuildReverseEdges(ctx context.Context, rb *IndexRebuild) error {
   875  	op := rb.needsReverseEdgesRebuild()
   876  	if op == indexNoop {
   877  		return nil
   878  	}
   879  
   880  	glog.Infof("Deleting reverse index for %s", rb.Attr)
   881  	if err := deleteReverseEdges(rb.Attr); err != nil {
   882  		return err
   883  	}
   884  
   885  	// Exit early if index only needed to be deleted.
   886  	if op == indexDelete {
   887  		return nil
   888  	}
   889  
   890  	glog.Infof("Rebuilding reverse index for %s", rb.Attr)
   891  	pk := x.ParsedKey{Attr: rb.Attr}
   892  	builder := rebuilder{attr: rb.Attr, prefix: pk.DataPrefix(), startTs: rb.StartTs}
   893  	builder.fn = func(uid uint64, pl *List, txn *Txn) error {
   894  		edge := pb.DirectedEdge{Attr: rb.Attr, Entity: uid}
   895  		return pl.Iterate(txn.StartTs, 0, func(pp *pb.Posting) error {
   896  			puid := pp.Uid
   897  			// Add reverse entries based on p.
   898  			edge.ValueId = puid
   899  			edge.Op = pb.DirectedEdge_SET
   900  			edge.Facets = pp.Facets
   901  			edge.Label = pp.Label
   902  
   903  			for {
   904  				err := txn.addReverseMutation(ctx, &edge)
   905  				switch err {
   906  				case ErrRetry:
   907  					time.Sleep(10 * time.Millisecond)
   908  				default:
   909  					return err
   910  				}
   911  			}
   912  		})
   913  	}
   914  	return builder.Run(ctx)
   915  }
   916  
   917  // needsListTypeRebuild returns true if the schema changed from a scalar to a
   918  // list. It returns true if the index can be left as is.
   919  func (rb *IndexRebuild) needsListTypeRebuild() (bool, error) {
   920  	x.AssertTruef(rb.CurrentSchema != nil, "Current schema cannot be nil.")
   921  
   922  	if rb.OldSchema == nil {
   923  		return false, nil
   924  	}
   925  	if rb.CurrentSchema.List && !rb.OldSchema.List {
   926  		return true, nil
   927  	}
   928  	if rb.OldSchema.List && !rb.CurrentSchema.List {
   929  		return false, errors.Errorf("Type can't be changed from list to scalar for attr: [%s]"+
   930  			" without dropping it first.", rb.CurrentSchema.Predicate)
   931  	}
   932  
   933  	return false, nil
   934  }
   935  
   936  // rebuildListType rebuilds the index when the schema is changed from scalar to list type.
   937  // We need to fingerprint the values to get the new ValueId.
   938  func rebuildListType(ctx context.Context, rb *IndexRebuild) error {
   939  	if needsRebuild, err := rb.needsListTypeRebuild(); !needsRebuild || err != nil {
   940  		return err
   941  	}
   942  
   943  	pk := x.ParsedKey{Attr: rb.Attr}
   944  	builder := rebuilder{attr: rb.Attr, prefix: pk.DataPrefix(), startTs: rb.StartTs}
   945  	builder.fn = func(uid uint64, pl *List, txn *Txn) error {
   946  		var mpost *pb.Posting
   947  		err := pl.Iterate(txn.StartTs, 0, func(p *pb.Posting) error {
   948  			// We only want to modify the untagged value. There could be other values with a
   949  			// lang tag.
   950  			if p.Uid == math.MaxUint64 {
   951  				mpost = p
   952  			}
   953  			return nil
   954  		})
   955  		if err != nil {
   956  			return err
   957  		}
   958  		if mpost == nil {
   959  			return nil
   960  		}
   961  		// Delete the old edge corresponding to ValueId math.MaxUint64
   962  		t := &pb.DirectedEdge{
   963  			ValueId: mpost.Uid,
   964  			Attr:    rb.Attr,
   965  			Op:      pb.DirectedEdge_DEL,
   966  		}
   967  
   968  		// Ensure that list is in the cache run by txn. Otherwise, nothing would
   969  		// get updated.
   970  		pl = txn.cache.SetIfAbsent(string(pl.key), pl)
   971  		if err := pl.addMutation(ctx, txn, t); err != nil {
   972  			return err
   973  		}
   974  		// Add the new edge with the fingerprinted value id.
   975  		newEdge := &pb.DirectedEdge{
   976  			Attr:      rb.Attr,
   977  			Value:     mpost.Value,
   978  			ValueType: mpost.ValType,
   979  			Op:        pb.DirectedEdge_SET,
   980  			Label:     mpost.Label,
   981  			Facets:    mpost.Facets,
   982  		}
   983  		return pl.addMutation(ctx, txn, newEdge)
   984  	}
   985  	return builder.Run(ctx)
   986  }
   987  
   988  // DeleteAll deletes all entries in the posting list.
   989  func DeleteAll() error {
   990  	return pstore.DropAll()
   991  }
   992  
   993  // DeleteData deletes all data but leaves types and schema intact.
   994  func DeleteData() error {
   995  	return pstore.DropPrefix([]byte{x.DefaultPrefix})
   996  }
   997  
   998  // DeletePredicate deletes all entries and indices for a given predicate.
   999  func DeletePredicate(ctx context.Context, attr string) error {
  1000  	glog.Infof("Dropping predicate: [%s]", attr)
  1001  	prefix := x.PredicatePrefix(attr)
  1002  	if err := pstore.DropPrefix(prefix); err != nil {
  1003  		return err
  1004  	}
  1005  
  1006  	return schema.State().Delete(attr)
  1007  }