github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/posting/mvcc.go (about)

     1  /*
     2   * Copyright 2017-2018 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package posting
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/hex"
    22  	"math"
    23  	"strconv"
    24  	"sync/atomic"
    25  
    26  	"github.com/dgraph-io/badger"
    27  	"github.com/dgraph-io/dgo/protos/api"
    28  	"github.com/dgraph-io/dgraph/protos/pb"
    29  	"github.com/dgraph-io/dgraph/x"
    30  	"github.com/pkg/errors"
    31  )
    32  
    33  var (
    34  	// ErrTsTooOld is returned when a transaction is too old to be applied.
    35  	ErrTsTooOld = errors.Errorf("Transaction is too old")
    36  )
    37  
    38  // ShouldAbort returns whether the transaction should be aborted.
    39  func (txn *Txn) ShouldAbort() bool {
    40  	if txn == nil {
    41  		return false
    42  	}
    43  	return atomic.LoadUint32(&txn.shouldAbort) > 0
    44  }
    45  
    46  func (txn *Txn) addConflictKey(conflictKey uint64) {
    47  	txn.Lock()
    48  	defer txn.Unlock()
    49  	if txn.conflicts == nil {
    50  		txn.conflicts = make(map[uint64]struct{})
    51  	}
    52  	if conflictKey > 0 {
    53  		txn.conflicts[conflictKey] = struct{}{}
    54  	}
    55  }
    56  
    57  // FillContext updates the given transaction context with data from this transaction.
    58  func (txn *Txn) FillContext(ctx *api.TxnContext, gid uint32) {
    59  	txn.Lock()
    60  	ctx.StartTs = txn.StartTs
    61  	for key := range txn.conflicts {
    62  		// We don'txn need to send the whole conflict key to Zero. Solving #2338
    63  		// should be done by sending a list of mutating predicates to Zero,
    64  		// along with the keys to be used for conflict detection.
    65  		fps := strconv.FormatUint(key, 36)
    66  		if !x.HasString(ctx.Keys, fps) {
    67  			ctx.Keys = append(ctx.Keys, fps)
    68  		}
    69  	}
    70  	txn.Unlock()
    71  
    72  	txn.Update()
    73  	txn.cache.fillPreds(ctx, gid)
    74  }
    75  
    76  // CommitToDisk commits a transaction to disk.
    77  // This function only stores deltas to the commit timestamps. It does not try to generate a state.
    78  // State generation is done via rollups, which happen when a snapshot is created.
    79  // Don't call this for schema mutations. Directly commit them.
    80  func (txn *Txn) CommitToDisk(writer *TxnWriter, commitTs uint64) error {
    81  	if commitTs == 0 {
    82  		return nil
    83  	}
    84  
    85  	cache := txn.cache
    86  	cache.Lock()
    87  	defer cache.Unlock()
    88  
    89  	var keys []string
    90  	for key := range cache.deltas {
    91  		keys = append(keys, key)
    92  	}
    93  
    94  	var idx int
    95  	for idx < len(keys) {
    96  		// writer.update can return early from the loop in case we encounter badger.ErrTxnTooBig. On
    97  		// that error, writer.update would still commit the transaction and return any error. If
    98  		// nil, we continue to process the remaining keys.
    99  		err := writer.update(commitTs, func(btxn *badger.Txn) error {
   100  			for ; idx < len(keys); idx++ {
   101  				key := keys[idx]
   102  				data := cache.deltas[key]
   103  				if len(data) == 0 {
   104  					continue
   105  				}
   106  				if ts := cache.maxVersions[key]; ts >= commitTs {
   107  					// Skip write because we already have a write at a higher ts.
   108  					// Logging here can cause a lot of output when doing Raft log replay. So, let's
   109  					// not output anything here.
   110  					continue
   111  				}
   112  				err := btxn.SetEntry(&badger.Entry{
   113  					Key:      []byte(key),
   114  					Value:    data,
   115  					UserMeta: BitDeltaPosting,
   116  				})
   117  				if err != nil {
   118  					return err
   119  				}
   120  			}
   121  			return nil
   122  		})
   123  		if err != nil {
   124  			return err
   125  		}
   126  	}
   127  	return nil
   128  }
   129  
   130  func unmarshalOrCopy(plist *pb.PostingList, item *badger.Item) error {
   131  	return item.Value(func(val []byte) error {
   132  		if len(val) == 0 {
   133  			// empty pl
   134  			return nil
   135  		}
   136  		return plist.Unmarshal(val)
   137  	})
   138  }
   139  
   140  // ReadPostingList constructs the posting list from the disk using the passed iterator.
   141  // Use forward iterator with allversions enabled in iter options.
   142  // key would now be owned by the posting list. So, ensure that it isn't reused elsewhere.
   143  func ReadPostingList(key []byte, it *badger.Iterator) (*List, error) {
   144  	l := new(List)
   145  	l.key = key
   146  	l.mutationMap = make(map[uint64]*pb.PostingList)
   147  	l.plist = new(pb.PostingList)
   148  
   149  	// Iterates from highest Ts to lowest Ts
   150  	for it.Valid() {
   151  		item := it.Item()
   152  		if !bytes.Equal(item.Key(), l.key) {
   153  			break
   154  		}
   155  		l.maxTs = x.Max(l.maxTs, item.Version())
   156  		if item.IsDeletedOrExpired() {
   157  			// Don't consider any more versions.
   158  			break
   159  		}
   160  
   161  		switch item.UserMeta() {
   162  		case BitEmptyPosting:
   163  			l.minTs = item.Version()
   164  			return l, nil
   165  		case BitCompletePosting:
   166  			if err := unmarshalOrCopy(l.plist, item); err != nil {
   167  				return nil, err
   168  			}
   169  			l.minTs = item.Version()
   170  			// No need to do Next here. The outer loop can take care of skipping
   171  			// more versions of the same key.
   172  			return l, nil
   173  		case BitDeltaPosting:
   174  			err := item.Value(func(val []byte) error {
   175  				pl := &pb.PostingList{}
   176  				x.Check(pl.Unmarshal(val))
   177  				pl.CommitTs = item.Version()
   178  				for _, mpost := range pl.Postings {
   179  					// commitTs, startTs are meant to be only in memory, not
   180  					// stored on disk.
   181  					mpost.CommitTs = item.Version()
   182  				}
   183  				l.mutationMap[pl.CommitTs] = pl
   184  				return nil
   185  			})
   186  			if err != nil {
   187  				return nil, err
   188  			}
   189  		case BitSchemaPosting:
   190  			return nil, errors.Errorf(
   191  				"Trying to read schema in ReadPostingList for key: %s", hex.Dump(key))
   192  		default:
   193  			return nil, errors.Errorf(
   194  				"Unexpected meta: %d for key: %s", item.UserMeta(), hex.Dump(key))
   195  		}
   196  		if item.DiscardEarlierVersions() {
   197  			break
   198  		}
   199  		it.Next()
   200  	}
   201  	return l, nil
   202  }
   203  
   204  // TODO: We should only create a posting list with a specific readTs.
   205  func getNew(key []byte, pstore *badger.DB) (*List, error) {
   206  	txn := pstore.NewTransactionAt(math.MaxUint64, false)
   207  	defer txn.Discard()
   208  
   209  	// When we do rollups, an older version would go to the top of the LSM tree, which can cause
   210  	// issues during txn.Get. Therefore, always iterate.
   211  	iterOpts := badger.DefaultIteratorOptions
   212  	iterOpts.AllVersions = true
   213  	iterOpts.PrefetchValues = false
   214  	itr := txn.NewKeyIterator(key, iterOpts)
   215  	defer itr.Close()
   216  	itr.Seek(key)
   217  	return ReadPostingList(key, itr)
   218  }