github.com/KinWaiYuen/client-go/v2@v2.5.4/internal/mockstore/mocktikv/mvcc_leveldb.go (about)

     1  // Copyright 2021 TiKV Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // NOTE: The code in this file is based on code from the
    16  // TiDB project, licensed under the Apache License v 2.0
    17  //
    18  // https://github.com/pingcap/tidb/tree/cc5e161ac06827589c4966674597c137cc9e809c/store/tikv/mockstore/mocktikv/mvcc_leveldb.go
    19  //
    20  
    21  // Copyright 2017 PingCAP, Inc.
    22  //
    23  // Licensed under the Apache License, Version 2.0 (the "License");
    24  // you may not use this file except in compliance with the License.
    25  // You may obtain a copy of the License at
    26  //
    27  //     http://www.apache.org/licenses/LICENSE-2.0
    28  //
    29  // Unless required by applicable law or agreed to in writing, software
    30  // distributed under the License is distributed on an "AS IS" BASIS,
    31  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    32  // See the License for the specific language governing permissions and
    33  // limitations under the License.
    34  
    35  package mocktikv
    36  
    37  import (
    38  	"bytes"
    39  	"math"
    40  	"sync"
    41  
    42  	"github.com/KinWaiYuen/client-go/v2/internal/logutil"
    43  	"github.com/KinWaiYuen/client-go/v2/internal/mockstore/deadlock"
    44  	"github.com/KinWaiYuen/client-go/v2/oracle"
    45  	"github.com/KinWaiYuen/client-go/v2/util/codec"
    46  	"github.com/dgryski/go-farm"
    47  	"github.com/pingcap/errors"
    48  	"github.com/pingcap/goleveldb/leveldb"
    49  	"github.com/pingcap/goleveldb/leveldb/iterator"
    50  	"github.com/pingcap/goleveldb/leveldb/opt"
    51  	"github.com/pingcap/goleveldb/leveldb/storage"
    52  	"github.com/pingcap/goleveldb/leveldb/util"
    53  	"github.com/pingcap/kvproto/pkg/kvrpcpb"
    54  	"github.com/pingcap/parser/terror"
    55  	"go.uber.org/zap"
    56  )
    57  
    58  // Used for pessimistic lock wait time
    59  // these two constants are special for lock protocol with tikv
    60  // 0 means always wait, -1 means nowait, others meaning lock wait in milliseconds
    61  var (
    62  	LockAlwaysWait = int64(0)
    63  	LockNoWait     = int64(-1)
    64  )
    65  
    66  // MVCCLevelDB implements the MVCCStore interface.
    67  type MVCCLevelDB struct {
    68  	// Key layout:
    69  	// ...
    70  	// Key_lock        -- (0)
    71  	// Key_verMax      -- (1)
    72  	// ...
    73  	// Key_ver+1       -- (2)
    74  	// Key_ver         -- (3)
    75  	// Key_ver-1       -- (4)
    76  	// ...
    77  	// Key_0           -- (5)
    78  	// NextKey_lock    -- (6)
    79  	// NextKey_verMax  -- (7)
    80  	// ...
    81  	// NextKey_ver+1   -- (8)
    82  	// NextKey_ver     -- (9)
    83  	// NextKey_ver-1   -- (10)
    84  	// ...
    85  	// NextKey_0       -- (11)
    86  	// ...
    87  	// EOF
    88  
    89  	// db represents leveldb
    90  	db *leveldb.DB
    91  	// mu used for lock
    92  	// leveldb can not guarantee multiple operations to be atomic, for example, read
    93  	// then write, another write may happen during it, so this lock is necessory.
    94  	mu               sync.RWMutex
    95  	deadlockDetector *deadlock.Detector
    96  }
    97  
    98  const lockVer uint64 = math.MaxUint64
    99  
   100  // ErrInvalidEncodedKey describes parsing an invalid format of EncodedKey.
   101  var ErrInvalidEncodedKey = errors.New("invalid encoded key")
   102  
   103  // mvccEncode returns the encoded key.
   104  func mvccEncode(key []byte, ver uint64) []byte {
   105  	b := codec.EncodeBytes(nil, key)
   106  	ret := codec.EncodeUintDesc(b, ver)
   107  	return ret
   108  }
   109  
   110  // mvccDecode parses the origin key and version of an encoded key, if the encoded key is a meta key,
   111  // just returns the origin key.
   112  func mvccDecode(encodedKey []byte) ([]byte, uint64, error) {
   113  	// Skip DataPrefix
   114  	remainBytes, key, err := codec.DecodeBytes(encodedKey, nil)
   115  	if err != nil {
   116  		// should never happen
   117  		return nil, 0, errors.Trace(err)
   118  	}
   119  	// if it's meta key
   120  	if len(remainBytes) == 0 {
   121  		return key, 0, nil
   122  	}
   123  	var ver uint64
   124  	remainBytes, ver, err = codec.DecodeUintDesc(remainBytes)
   125  	if err != nil {
   126  		// should never happen
   127  		return nil, 0, errors.Trace(err)
   128  	}
   129  	if len(remainBytes) != 0 {
   130  		return nil, 0, ErrInvalidEncodedKey
   131  	}
   132  	return key, ver, nil
   133  }
   134  
   135  // MustNewMVCCStore is used for testing, use NewMVCCLevelDB instead.
   136  func MustNewMVCCStore() MVCCStore {
   137  	mvccStore, err := NewMVCCLevelDB("")
   138  	if err != nil {
   139  		panic(err)
   140  	}
   141  	return mvccStore
   142  }
   143  
   144  // NewMVCCLevelDB returns a new MVCCLevelDB object.
   145  func NewMVCCLevelDB(path string) (*MVCCLevelDB, error) {
   146  	var (
   147  		d   *leveldb.DB
   148  		err error
   149  	)
   150  	if path == "" {
   151  		d, err = leveldb.Open(storage.NewMemStorage(), nil)
   152  	} else {
   153  		d, err = leveldb.OpenFile(path, &opt.Options{BlockCacheCapacity: 600 * 1024 * 1024})
   154  	}
   155  
   156  	return &MVCCLevelDB{db: d, deadlockDetector: deadlock.NewDetector()}, errors.Trace(err)
   157  }
   158  
   159  // Iterator wraps iterator.Iterator to provide Valid() method.
   160  type Iterator struct {
   161  	iterator.Iterator
   162  	valid bool
   163  }
   164  
   165  // Next moves the iterator to the next key/value pair.
   166  func (iter *Iterator) Next() {
   167  	iter.valid = iter.Iterator.Next()
   168  }
   169  
   170  // Valid returns whether the iterator is exhausted.
   171  func (iter *Iterator) Valid() bool {
   172  	return iter.valid
   173  }
   174  
   175  func newIterator(db *leveldb.DB, slice *util.Range) *Iterator {
   176  	iter := &Iterator{db.NewIterator(slice, nil), true}
   177  	iter.Next()
   178  	return iter
   179  }
   180  
   181  func newScanIterator(db *leveldb.DB, startKey, endKey []byte) (*Iterator, []byte, error) {
   182  	var start, end []byte
   183  	if len(startKey) > 0 {
   184  		start = mvccEncode(startKey, lockVer)
   185  	}
   186  	if len(endKey) > 0 {
   187  		end = mvccEncode(endKey, lockVer)
   188  	}
   189  	iter := newIterator(db, &util.Range{
   190  		Start: start,
   191  		Limit: end,
   192  	})
   193  	// newScanIterator must handle startKey is nil, in this case, the real startKey
   194  	// should be change the frist key of the store.
   195  	if len(startKey) == 0 && iter.Valid() {
   196  		key, _, err := mvccDecode(iter.Key())
   197  		if err != nil {
   198  			return nil, nil, errors.Trace(err)
   199  		}
   200  		startKey = key
   201  	}
   202  	return iter, startKey, nil
   203  }
   204  
   205  type lockDecoder struct {
   206  	lock      mvccLock
   207  	expectKey []byte
   208  }
   209  
   210  // Decode decodes the lock value if current iterator is at expectKey::lock.
   211  func (dec *lockDecoder) Decode(iter *Iterator) (bool, error) {
   212  	if iter.Error() != nil || !iter.Valid() {
   213  		return false, iter.Error()
   214  	}
   215  
   216  	iterKey := iter.Key()
   217  	key, ver, err := mvccDecode(iterKey)
   218  	if err != nil {
   219  		return false, errors.Trace(err)
   220  	}
   221  	if !bytes.Equal(key, dec.expectKey) {
   222  		return false, nil
   223  	}
   224  	if ver != lockVer {
   225  		return false, nil
   226  	}
   227  
   228  	var lock mvccLock
   229  	err = lock.UnmarshalBinary(iter.Value())
   230  	if err != nil {
   231  		return false, errors.Trace(err)
   232  	}
   233  	dec.lock = lock
   234  	iter.Next()
   235  	return true, nil
   236  }
   237  
   238  type valueDecoder struct {
   239  	value     mvccValue
   240  	expectKey []byte
   241  }
   242  
   243  // Decode decodes a mvcc value if iter key is expectKey.
   244  func (dec *valueDecoder) Decode(iter *Iterator) (bool, error) {
   245  	if iter.Error() != nil || !iter.Valid() {
   246  		return false, iter.Error()
   247  	}
   248  
   249  	key, ver, err := mvccDecode(iter.Key())
   250  	if err != nil {
   251  		return false, errors.Trace(err)
   252  	}
   253  	if !bytes.Equal(key, dec.expectKey) {
   254  		return false, nil
   255  	}
   256  	if ver == lockVer {
   257  		return false, nil
   258  	}
   259  
   260  	var value mvccValue
   261  	err = value.UnmarshalBinary(iter.Value())
   262  	if err != nil {
   263  		return false, errors.Trace(err)
   264  	}
   265  	dec.value = value
   266  	iter.Next()
   267  	return true, nil
   268  }
   269  
   270  type skipDecoder struct {
   271  	currKey []byte
   272  }
   273  
   274  // Decode skips the iterator as long as its key is currKey, the new key would be stored.
   275  func (dec *skipDecoder) Decode(iter *Iterator) (bool, error) {
   276  	if iter.Error() != nil {
   277  		return false, iter.Error()
   278  	}
   279  	for iter.Valid() {
   280  		key, _, err := mvccDecode(iter.Key())
   281  		if err != nil {
   282  			return false, errors.Trace(err)
   283  		}
   284  		if !bytes.Equal(key, dec.currKey) {
   285  			dec.currKey = key
   286  			return true, nil
   287  		}
   288  		iter.Next()
   289  	}
   290  	return false, nil
   291  }
   292  
   293  // Get implements the MVCCStore interface.
   294  // key cannot be nil or []byte{}
   295  func (mvcc *MVCCLevelDB) Get(key []byte, startTS uint64, isoLevel kvrpcpb.IsolationLevel, resolvedLocks []uint64) ([]byte, error) {
   296  	mvcc.mu.RLock()
   297  	defer mvcc.mu.RUnlock()
   298  
   299  	return mvcc.getValue(key, startTS, isoLevel, resolvedLocks)
   300  }
   301  
   302  func (mvcc *MVCCLevelDB) getValue(key []byte, startTS uint64, isoLevel kvrpcpb.IsolationLevel, resolvedLocks []uint64) ([]byte, error) {
   303  	startKey := mvccEncode(key, lockVer)
   304  	iter := newIterator(mvcc.db, &util.Range{
   305  		Start: startKey,
   306  	})
   307  	defer iter.Release()
   308  
   309  	return getValue(iter, key, startTS, isoLevel, resolvedLocks)
   310  }
   311  
   312  func getValue(iter *Iterator, key []byte, startTS uint64, isoLevel kvrpcpb.IsolationLevel, resolvedLocks []uint64) ([]byte, error) {
   313  	dec1 := lockDecoder{expectKey: key}
   314  	ok, err := dec1.Decode(iter)
   315  	if ok && isoLevel == kvrpcpb.IsolationLevel_SI {
   316  		startTS, err = dec1.lock.check(startTS, key, resolvedLocks)
   317  	}
   318  	if err != nil {
   319  		return nil, errors.Trace(err)
   320  	}
   321  	dec2 := valueDecoder{expectKey: key}
   322  	for iter.Valid() {
   323  		ok, err := dec2.Decode(iter)
   324  		if err != nil {
   325  			return nil, errors.Trace(err)
   326  		}
   327  		if !ok {
   328  			break
   329  		}
   330  
   331  		value := &dec2.value
   332  		if value.valueType == typeRollback || value.valueType == typeLock {
   333  			continue
   334  		}
   335  		// Read the first committed value that can be seen at startTS.
   336  		if value.commitTS <= startTS {
   337  			if value.valueType == typeDelete {
   338  				return nil, nil
   339  			}
   340  			return value.value, nil
   341  		}
   342  	}
   343  	return nil, nil
   344  }
   345  
   346  // BatchGet implements the MVCCStore interface.
   347  func (mvcc *MVCCLevelDB) BatchGet(ks [][]byte, startTS uint64, isoLevel kvrpcpb.IsolationLevel, resolvedLocks []uint64) []Pair {
   348  	mvcc.mu.RLock()
   349  	defer mvcc.mu.RUnlock()
   350  
   351  	pairs := make([]Pair, 0, len(ks))
   352  	for _, k := range ks {
   353  		v, err := mvcc.getValue(k, startTS, isoLevel, resolvedLocks)
   354  		if v == nil && err == nil {
   355  			continue
   356  		}
   357  		pairs = append(pairs, Pair{
   358  			Key:   k,
   359  			Value: v,
   360  			Err:   errors.Trace(err),
   361  		})
   362  	}
   363  	return pairs
   364  }
   365  
   366  // Scan implements the MVCCStore interface.
   367  func (mvcc *MVCCLevelDB) Scan(startKey, endKey []byte, limit int, startTS uint64, isoLevel kvrpcpb.IsolationLevel, resolvedLock []uint64) []Pair {
   368  	mvcc.mu.RLock()
   369  	defer mvcc.mu.RUnlock()
   370  
   371  	iter, currKey, err := newScanIterator(mvcc.db, startKey, endKey)
   372  	defer iter.Release()
   373  	if err != nil {
   374  		logutil.BgLogger().Error("scan new iterator fail", zap.Error(err))
   375  		return nil
   376  	}
   377  
   378  	ok := true
   379  	var pairs []Pair
   380  	for len(pairs) < limit && ok {
   381  		value, err := getValue(iter, currKey, startTS, isoLevel, resolvedLock)
   382  		if err != nil {
   383  			pairs = append(pairs, Pair{
   384  				Key: currKey,
   385  				Err: errors.Trace(err),
   386  			})
   387  		}
   388  		if value != nil {
   389  			pairs = append(pairs, Pair{
   390  				Key:   currKey,
   391  				Value: value,
   392  			})
   393  		}
   394  
   395  		skip := skipDecoder{currKey}
   396  		ok, err = skip.Decode(iter)
   397  		if err != nil {
   398  			logutil.BgLogger().Error("seek to next key error", zap.Error(err))
   399  			break
   400  		}
   401  		currKey = skip.currKey
   402  	}
   403  	return pairs
   404  }
   405  
   406  // ReverseScan implements the MVCCStore interface. The search range is [startKey, endKey).
   407  func (mvcc *MVCCLevelDB) ReverseScan(startKey, endKey []byte, limit int, startTS uint64, isoLevel kvrpcpb.IsolationLevel, resolvedLocks []uint64) []Pair {
   408  	mvcc.mu.RLock()
   409  	defer mvcc.mu.RUnlock()
   410  
   411  	var mvccEnd []byte
   412  	if len(endKey) != 0 {
   413  		mvccEnd = mvccEncode(endKey, lockVer)
   414  	}
   415  	iter := mvcc.db.NewIterator(&util.Range{
   416  		Limit: mvccEnd,
   417  	}, nil)
   418  	defer iter.Release()
   419  
   420  	succ := iter.Last()
   421  	currKey, _, err := mvccDecode(iter.Key())
   422  	// TODO: return error.
   423  	terror.Log(errors.Trace(err))
   424  	helper := reverseScanHelper{
   425  		startTS:       startTS,
   426  		isoLevel:      isoLevel,
   427  		currKey:       currKey,
   428  		resolvedLocks: resolvedLocks,
   429  	}
   430  
   431  	for succ && len(helper.pairs) < limit {
   432  		key, ver, err := mvccDecode(iter.Key())
   433  		if err != nil {
   434  			break
   435  		}
   436  		if bytes.Compare(key, startKey) < 0 {
   437  			break
   438  		}
   439  
   440  		if !bytes.Equal(key, helper.currKey) {
   441  			helper.finishEntry()
   442  			helper.currKey = key
   443  		}
   444  		if ver == lockVer {
   445  			var lock mvccLock
   446  			err = lock.UnmarshalBinary(iter.Value())
   447  			helper.entry.lock = &lock
   448  		} else {
   449  			var value mvccValue
   450  			err = value.UnmarshalBinary(iter.Value())
   451  			helper.entry.values = append(helper.entry.values, value)
   452  		}
   453  		if err != nil {
   454  			logutil.BgLogger().Error("unmarshal fail", zap.Error(err))
   455  			break
   456  		}
   457  		succ = iter.Prev()
   458  	}
   459  	if len(helper.pairs) < limit {
   460  		helper.finishEntry()
   461  	}
   462  	return helper.pairs
   463  }
   464  
   465  type reverseScanHelper struct {
   466  	startTS       uint64
   467  	isoLevel      kvrpcpb.IsolationLevel
   468  	resolvedLocks []uint64
   469  	currKey       []byte
   470  	entry         mvccEntry
   471  	pairs         []Pair
   472  }
   473  
   474  func (helper *reverseScanHelper) finishEntry() {
   475  	reverse(helper.entry.values)
   476  	helper.entry.key = NewMvccKey(helper.currKey)
   477  	val, err := helper.entry.Get(helper.startTS, helper.isoLevel, helper.resolvedLocks)
   478  	if len(val) != 0 || err != nil {
   479  		helper.pairs = append(helper.pairs, Pair{
   480  			Key:   helper.currKey,
   481  			Value: val,
   482  			Err:   err,
   483  		})
   484  	}
   485  	helper.entry = mvccEntry{}
   486  }
   487  
   488  func reverse(values []mvccValue) {
   489  	i, j := 0, len(values)-1
   490  	for i < j {
   491  		values[i], values[j] = values[j], values[i]
   492  		i++
   493  		j--
   494  	}
   495  }
   496  
   497  type lockCtx struct {
   498  	startTS     uint64
   499  	forUpdateTS uint64
   500  	primary     []byte
   501  	ttl         uint64
   502  	minCommitTs uint64
   503  
   504  	returnValues bool
   505  	values       [][]byte
   506  }
   507  
   508  // PessimisticLock writes the pessimistic lock.
   509  func (mvcc *MVCCLevelDB) PessimisticLock(req *kvrpcpb.PessimisticLockRequest) *kvrpcpb.PessimisticLockResponse {
   510  	resp := &kvrpcpb.PessimisticLockResponse{}
   511  	mvcc.mu.Lock()
   512  	defer mvcc.mu.Unlock()
   513  	mutations := req.Mutations
   514  	lCtx := &lockCtx{
   515  		startTS:      req.StartVersion,
   516  		forUpdateTS:  req.ForUpdateTs,
   517  		primary:      req.PrimaryLock,
   518  		ttl:          req.LockTtl,
   519  		minCommitTs:  req.MinCommitTs,
   520  		returnValues: req.ReturnValues,
   521  	}
   522  	lockWaitTime := req.WaitTimeout
   523  
   524  	anyError := false
   525  	batch := &leveldb.Batch{}
   526  	errs := make([]error, 0, len(mutations))
   527  	for _, m := range mutations {
   528  		err := mvcc.pessimisticLockMutation(batch, m, lCtx)
   529  		errs = append(errs, err)
   530  		if err != nil {
   531  			anyError = true
   532  		}
   533  		if lockWaitTime == LockNoWait {
   534  			if _, ok := err.(*ErrLocked); ok {
   535  				break
   536  			}
   537  		}
   538  	}
   539  	if anyError {
   540  		if lockWaitTime != LockNoWait {
   541  			// TODO: remove this when implement sever side wait.
   542  			simulateServerSideWaitLock(errs)
   543  		}
   544  		resp.Errors = convertToKeyErrors(errs)
   545  		return resp
   546  	}
   547  	if err := mvcc.db.Write(batch, nil); err != nil {
   548  		resp.Errors = convertToKeyErrors([]error{err})
   549  		return resp
   550  	}
   551  	if req.ReturnValues {
   552  		resp.Values = lCtx.values
   553  	}
   554  	return resp
   555  }
   556  
   557  func (mvcc *MVCCLevelDB) pessimisticLockMutation(batch *leveldb.Batch, mutation *kvrpcpb.Mutation, lctx *lockCtx) error {
   558  	startTS := lctx.startTS
   559  	forUpdateTS := lctx.forUpdateTS
   560  	startKey := mvccEncode(mutation.Key, lockVer)
   561  	iter := newIterator(mvcc.db, &util.Range{
   562  		Start: startKey,
   563  	})
   564  	defer iter.Release()
   565  
   566  	dec := lockDecoder{
   567  		expectKey: mutation.Key,
   568  	}
   569  	ok, err := dec.Decode(iter)
   570  	if err != nil {
   571  		return errors.Trace(err)
   572  	}
   573  	if ok {
   574  		if dec.lock.startTS != startTS {
   575  			errDeadlock := mvcc.deadlockDetector.Detect(startTS, dec.lock.startTS, farm.Fingerprint64(mutation.Key))
   576  			if errDeadlock != nil {
   577  				return &ErrDeadlock{
   578  					LockKey:        mutation.Key,
   579  					LockTS:         dec.lock.startTS,
   580  					DealockKeyHash: errDeadlock.KeyHash,
   581  				}
   582  			}
   583  			return dec.lock.lockErr(mutation.Key)
   584  		}
   585  		return nil
   586  	}
   587  
   588  	// For pessimisticLockMutation, check the correspond rollback record, there may be rollbackLock
   589  	// operation between startTS and forUpdateTS
   590  	val, err := checkConflictValue(iter, mutation, forUpdateTS, startTS, true)
   591  	if err != nil {
   592  		return err
   593  	}
   594  	if lctx.returnValues {
   595  		lctx.values = append(lctx.values, val)
   596  	}
   597  
   598  	lock := mvccLock{
   599  		startTS:     startTS,
   600  		primary:     lctx.primary,
   601  		op:          kvrpcpb.Op_PessimisticLock,
   602  		ttl:         lctx.ttl,
   603  		forUpdateTS: forUpdateTS,
   604  		minCommitTS: lctx.minCommitTs,
   605  	}
   606  	writeKey := mvccEncode(mutation.Key, lockVer)
   607  	writeValue, err := lock.MarshalBinary()
   608  	if err != nil {
   609  		return errors.Trace(err)
   610  	}
   611  
   612  	batch.Put(writeKey, writeValue)
   613  	return nil
   614  }
   615  
   616  // PessimisticRollback implements the MVCCStore interface.
   617  func (mvcc *MVCCLevelDB) PessimisticRollback(keys [][]byte, startTS, forUpdateTS uint64) []error {
   618  	mvcc.mu.Lock()
   619  	defer mvcc.mu.Unlock()
   620  
   621  	anyError := false
   622  	batch := &leveldb.Batch{}
   623  	errs := make([]error, 0, len(keys))
   624  	for _, key := range keys {
   625  		err := pessimisticRollbackKey(mvcc.db, batch, key, startTS, forUpdateTS)
   626  		errs = append(errs, err)
   627  		if err != nil {
   628  			anyError = true
   629  		}
   630  	}
   631  	if anyError {
   632  		return errs
   633  	}
   634  	if err := mvcc.db.Write(batch, nil); err != nil {
   635  		return []error{err}
   636  	}
   637  	return errs
   638  }
   639  
   640  func pessimisticRollbackKey(db *leveldb.DB, batch *leveldb.Batch, key []byte, startTS, forUpdateTS uint64) error {
   641  	startKey := mvccEncode(key, lockVer)
   642  	iter := newIterator(db, &util.Range{
   643  		Start: startKey,
   644  	})
   645  	defer iter.Release()
   646  
   647  	dec := lockDecoder{
   648  		expectKey: key,
   649  	}
   650  	ok, err := dec.Decode(iter)
   651  	if err != nil {
   652  		return errors.Trace(err)
   653  	}
   654  	if ok {
   655  		lock := dec.lock
   656  		if lock.op == kvrpcpb.Op_PessimisticLock && lock.startTS == startTS && lock.forUpdateTS <= forUpdateTS {
   657  			batch.Delete(startKey)
   658  		}
   659  	}
   660  	return nil
   661  }
   662  
   663  // Prewrite implements the MVCCStore interface.
   664  func (mvcc *MVCCLevelDB) Prewrite(req *kvrpcpb.PrewriteRequest) []error {
   665  	mutations := req.Mutations
   666  	primary := req.PrimaryLock
   667  	startTS := req.StartVersion
   668  	forUpdateTS := req.GetForUpdateTs()
   669  	ttl := req.LockTtl
   670  	minCommitTS := req.MinCommitTs
   671  	mvcc.mu.Lock()
   672  	defer mvcc.mu.Unlock()
   673  
   674  	anyError := false
   675  	batch := &leveldb.Batch{}
   676  	errs := make([]error, 0, len(mutations))
   677  	txnSize := req.TxnSize
   678  	for i, m := range mutations {
   679  		// If the operation is Insert, check if key is exists at first.
   680  		var err error
   681  		// no need to check insert values for pessimistic transaction.
   682  		op := m.GetOp()
   683  		if (op == kvrpcpb.Op_Insert || op == kvrpcpb.Op_CheckNotExists) && forUpdateTS == 0 {
   684  			v, err := mvcc.getValue(m.Key, startTS, kvrpcpb.IsolationLevel_SI, req.Context.ResolvedLocks)
   685  			if err != nil {
   686  				errs = append(errs, err)
   687  				anyError = true
   688  				continue
   689  			}
   690  			if v != nil {
   691  				err = &ErrKeyAlreadyExist{
   692  					Key: m.Key,
   693  				}
   694  				errs = append(errs, err)
   695  				anyError = true
   696  				continue
   697  			}
   698  		}
   699  		if op == kvrpcpb.Op_CheckNotExists {
   700  			continue
   701  		}
   702  		isPessimisticLock := len(req.IsPessimisticLock) > 0 && req.IsPessimisticLock[i]
   703  		err = prewriteMutation(mvcc.db, batch, m, startTS, primary, ttl, txnSize, isPessimisticLock, minCommitTS)
   704  		errs = append(errs, err)
   705  		if err != nil {
   706  			anyError = true
   707  		}
   708  	}
   709  	if anyError {
   710  		return errs
   711  	}
   712  	if err := mvcc.db.Write(batch, nil); err != nil {
   713  		return []error{err}
   714  	}
   715  
   716  	return errs
   717  }
   718  
   719  func checkConflictValue(iter *Iterator, m *kvrpcpb.Mutation, forUpdateTS uint64, startTS uint64, getVal bool) ([]byte, error) {
   720  	dec := &valueDecoder{
   721  		expectKey: m.Key,
   722  	}
   723  	ok, err := dec.Decode(iter)
   724  	if err != nil {
   725  		return nil, errors.Trace(err)
   726  	}
   727  	if !ok {
   728  		return nil, nil
   729  	}
   730  
   731  	// Note that it's a write conflict here, even if the value is a rollback one, or a op_lock record
   732  	if dec.value.commitTS > forUpdateTS {
   733  		return nil, &ErrConflict{
   734  			StartTS:          forUpdateTS,
   735  			ConflictTS:       dec.value.startTS,
   736  			ConflictCommitTS: dec.value.commitTS,
   737  			Key:              m.Key,
   738  		}
   739  	}
   740  
   741  	needGetVal := getVal
   742  	needCheckAssertion := m.Assertion == kvrpcpb.Assertion_NotExist
   743  	needCheckRollback := true
   744  	var retVal []byte
   745  	// do the check or get operations within one iteration to make CI faster
   746  	for ok {
   747  		if needCheckRollback {
   748  			if dec.value.valueType == typeRollback {
   749  				if dec.value.commitTS == startTS {
   750  					logutil.BgLogger().Warn("rollback value found",
   751  						zap.Uint64("txnID", startTS),
   752  						zap.Int32("rollbacked.valueType", int32(dec.value.valueType)),
   753  						zap.Uint64("rollbacked.startTS", dec.value.startTS),
   754  						zap.Uint64("rollbacked.commitTS", dec.value.commitTS))
   755  					return nil, &ErrAlreadyRollbacked{
   756  						startTS: startTS,
   757  						key:     m.Key,
   758  					}
   759  				}
   760  			}
   761  			if dec.value.commitTS < startTS {
   762  				needCheckRollback = false
   763  			}
   764  		}
   765  		if needCheckAssertion {
   766  			if dec.value.valueType == typePut || dec.value.valueType == typeLock {
   767  				if m.Op == kvrpcpb.Op_PessimisticLock {
   768  					return nil, &ErrKeyAlreadyExist{
   769  						Key: m.Key,
   770  					}
   771  				}
   772  			} else if dec.value.valueType == typeDelete {
   773  				needCheckAssertion = false
   774  			}
   775  		}
   776  		if needGetVal {
   777  			if dec.value.valueType == typeDelete || dec.value.valueType == typePut {
   778  				retVal = dec.value.value
   779  				needGetVal = false
   780  			}
   781  		}
   782  		if !needCheckAssertion && !needGetVal && !needCheckRollback {
   783  			break
   784  		}
   785  		ok, err = dec.Decode(iter)
   786  		if err != nil {
   787  			return nil, errors.Trace(err)
   788  		}
   789  	}
   790  	if getVal {
   791  		return retVal, nil
   792  	}
   793  	return nil, nil
   794  }
   795  
   796  func prewriteMutation(db *leveldb.DB, batch *leveldb.Batch,
   797  	mutation *kvrpcpb.Mutation, startTS uint64,
   798  	primary []byte, ttl uint64, txnSize uint64,
   799  	isPessimisticLock bool, minCommitTS uint64) error {
   800  	startKey := mvccEncode(mutation.Key, lockVer)
   801  	iter := newIterator(db, &util.Range{
   802  		Start: startKey,
   803  	})
   804  	defer iter.Release()
   805  
   806  	dec := lockDecoder{
   807  		expectKey: mutation.Key,
   808  	}
   809  	ok, err := dec.Decode(iter)
   810  	if err != nil {
   811  		return errors.Trace(err)
   812  	}
   813  	if ok {
   814  		if dec.lock.startTS != startTS {
   815  			if isPessimisticLock {
   816  				// NOTE: A special handling.
   817  				// When pessimistic txn prewrite meets lock, set the TTL = 0 means
   818  				// telling TiDB to rollback the transaction **unconditionly**.
   819  				dec.lock.ttl = 0
   820  			}
   821  			return dec.lock.lockErr(mutation.Key)
   822  		}
   823  		if dec.lock.op != kvrpcpb.Op_PessimisticLock {
   824  			return nil
   825  		}
   826  		// Overwrite the pessimistic lock.
   827  		if ttl < dec.lock.ttl {
   828  			// Maybe ttlManager has already set the lock TTL, don't decrease it.
   829  			ttl = dec.lock.ttl
   830  		}
   831  		if minCommitTS < dec.lock.minCommitTS {
   832  			// The minCommitTS has been pushed forward.
   833  			minCommitTS = dec.lock.minCommitTS
   834  		}
   835  	} else {
   836  		if isPessimisticLock {
   837  			return ErrAbort("pessimistic lock not found")
   838  		}
   839  		_, err = checkConflictValue(iter, mutation, startTS, startTS, false)
   840  		if err != nil {
   841  			return err
   842  		}
   843  	}
   844  
   845  	op := mutation.GetOp()
   846  	if op == kvrpcpb.Op_Insert {
   847  		op = kvrpcpb.Op_Put
   848  	}
   849  	lock := mvccLock{
   850  		startTS: startTS,
   851  		primary: primary,
   852  		value:   mutation.Value,
   853  		op:      op,
   854  		ttl:     ttl,
   855  		txnSize: txnSize,
   856  	}
   857  	// Write minCommitTS on the primary lock.
   858  	if bytes.Equal(primary, mutation.GetKey()) {
   859  		lock.minCommitTS = minCommitTS
   860  	}
   861  
   862  	writeKey := mvccEncode(mutation.Key, lockVer)
   863  	writeValue, err := lock.MarshalBinary()
   864  	if err != nil {
   865  		return errors.Trace(err)
   866  	}
   867  
   868  	batch.Put(writeKey, writeValue)
   869  	return nil
   870  }
   871  
   872  // Commit implements the MVCCStore interface.
   873  func (mvcc *MVCCLevelDB) Commit(keys [][]byte, startTS, commitTS uint64) error {
   874  	mvcc.mu.Lock()
   875  	defer func() {
   876  		mvcc.mu.Unlock()
   877  		mvcc.deadlockDetector.CleanUp(startTS)
   878  	}()
   879  
   880  	batch := &leveldb.Batch{}
   881  	for _, k := range keys {
   882  		err := commitKey(mvcc.db, batch, k, startTS, commitTS)
   883  		if err != nil {
   884  			return errors.Trace(err)
   885  		}
   886  	}
   887  	return mvcc.db.Write(batch, nil)
   888  }
   889  
   890  func commitKey(db *leveldb.DB, batch *leveldb.Batch, key []byte, startTS, commitTS uint64) error {
   891  	startKey := mvccEncode(key, lockVer)
   892  	iter := newIterator(db, &util.Range{
   893  		Start: startKey,
   894  	})
   895  	defer iter.Release()
   896  
   897  	dec := lockDecoder{
   898  		expectKey: key,
   899  	}
   900  	ok, err := dec.Decode(iter)
   901  	if err != nil {
   902  		return errors.Trace(err)
   903  	}
   904  	if !ok || dec.lock.startTS != startTS {
   905  		// If the lock of this transaction is not found, or the lock is replaced by
   906  		// another transaction, check commit information of this transaction.
   907  		c, ok, err1 := getTxnCommitInfo(iter, key, startTS)
   908  		if err1 != nil {
   909  			return errors.Trace(err1)
   910  		}
   911  		if ok && c.valueType != typeRollback {
   912  			// c.valueType != typeRollback means the transaction is already committed, do nothing.
   913  			return nil
   914  		}
   915  		return ErrRetryable("txn not found")
   916  	}
   917  	// Reject the commit request whose commitTS is less than minCommiTS.
   918  	if dec.lock.minCommitTS > commitTS {
   919  		return &ErrCommitTSExpired{
   920  			kvrpcpb.CommitTsExpired{
   921  				StartTs:           startTS,
   922  				AttemptedCommitTs: commitTS,
   923  				Key:               key,
   924  				MinCommitTs:       dec.lock.minCommitTS,
   925  			}}
   926  	}
   927  
   928  	if err = commitLock(batch, dec.lock, key, startTS, commitTS); err != nil {
   929  		return errors.Trace(err)
   930  	}
   931  	return nil
   932  }
   933  
   934  func commitLock(batch *leveldb.Batch, lock mvccLock, key []byte, startTS, commitTS uint64) error {
   935  	var valueType mvccValueType
   936  	if lock.op == kvrpcpb.Op_Put {
   937  		valueType = typePut
   938  	} else if lock.op == kvrpcpb.Op_Lock {
   939  		valueType = typeLock
   940  	} else {
   941  		valueType = typeDelete
   942  	}
   943  	value := mvccValue{
   944  		valueType: valueType,
   945  		startTS:   startTS,
   946  		commitTS:  commitTS,
   947  		value:     lock.value,
   948  	}
   949  	writeKey := mvccEncode(key, commitTS)
   950  	writeValue, err := value.MarshalBinary()
   951  	if err != nil {
   952  		return errors.Trace(err)
   953  	}
   954  	batch.Put(writeKey, writeValue)
   955  	batch.Delete(mvccEncode(key, lockVer))
   956  	return nil
   957  }
   958  
   959  // Rollback implements the MVCCStore interface.
   960  func (mvcc *MVCCLevelDB) Rollback(keys [][]byte, startTS uint64) error {
   961  	mvcc.mu.Lock()
   962  	defer func() {
   963  		mvcc.mu.Unlock()
   964  		mvcc.deadlockDetector.CleanUp(startTS)
   965  	}()
   966  
   967  	batch := &leveldb.Batch{}
   968  	for _, k := range keys {
   969  		err := rollbackKey(mvcc.db, batch, k, startTS)
   970  		if err != nil {
   971  			return errors.Trace(err)
   972  		}
   973  	}
   974  	return mvcc.db.Write(batch, nil)
   975  }
   976  
   977  func rollbackKey(db *leveldb.DB, batch *leveldb.Batch, key []byte, startTS uint64) error {
   978  	startKey := mvccEncode(key, lockVer)
   979  	iter := newIterator(db, &util.Range{
   980  		Start: startKey,
   981  	})
   982  	defer iter.Release()
   983  
   984  	if iter.Valid() {
   985  		dec := lockDecoder{
   986  			expectKey: key,
   987  		}
   988  		ok, err := dec.Decode(iter)
   989  		if err != nil {
   990  			return errors.Trace(err)
   991  		}
   992  		// If current transaction's lock exist.
   993  		if ok && dec.lock.startTS == startTS {
   994  			if err = rollbackLock(batch, key, startTS); err != nil {
   995  				return errors.Trace(err)
   996  			}
   997  			return nil
   998  		}
   999  
  1000  		// If current transaction's lock not exist.
  1001  		// If commit info of current transaction exist.
  1002  		c, ok, err := getTxnCommitInfo(iter, key, startTS)
  1003  		if err != nil {
  1004  			return errors.Trace(err)
  1005  		}
  1006  		if ok {
  1007  			// If current transaction is already committed.
  1008  			if c.valueType != typeRollback {
  1009  				return ErrAlreadyCommitted(c.commitTS)
  1010  			}
  1011  			// If current transaction is already rollback.
  1012  			return nil
  1013  		}
  1014  	}
  1015  
  1016  	// If current transaction is not prewritted before.
  1017  	value := mvccValue{
  1018  		valueType: typeRollback,
  1019  		startTS:   startTS,
  1020  		commitTS:  startTS,
  1021  	}
  1022  	writeKey := mvccEncode(key, startTS)
  1023  	writeValue, err := value.MarshalBinary()
  1024  	if err != nil {
  1025  		return errors.Trace(err)
  1026  	}
  1027  	batch.Put(writeKey, writeValue)
  1028  	return nil
  1029  }
  1030  
  1031  func writeRollback(batch *leveldb.Batch, key []byte, startTS uint64) error {
  1032  	tomb := mvccValue{
  1033  		valueType: typeRollback,
  1034  		startTS:   startTS,
  1035  		commitTS:  startTS,
  1036  	}
  1037  	writeKey := mvccEncode(key, startTS)
  1038  	writeValue, err := tomb.MarshalBinary()
  1039  	if err != nil {
  1040  		return errors.Trace(err)
  1041  	}
  1042  	batch.Put(writeKey, writeValue)
  1043  	return nil
  1044  }
  1045  
  1046  func rollbackLock(batch *leveldb.Batch, key []byte, startTS uint64) error {
  1047  	err := writeRollback(batch, key, startTS)
  1048  	if err != nil {
  1049  		return err
  1050  	}
  1051  	batch.Delete(mvccEncode(key, lockVer))
  1052  	return nil
  1053  }
  1054  
  1055  func getTxnCommitInfo(iter *Iterator, expectKey []byte, startTS uint64) (mvccValue, bool, error) {
  1056  	for iter.Valid() {
  1057  		dec := valueDecoder{
  1058  			expectKey: expectKey,
  1059  		}
  1060  		ok, err := dec.Decode(iter)
  1061  		if err != nil || !ok {
  1062  			return mvccValue{}, ok, errors.Trace(err)
  1063  		}
  1064  
  1065  		if dec.value.startTS == startTS {
  1066  			return dec.value, true, nil
  1067  		}
  1068  	}
  1069  	return mvccValue{}, false, nil
  1070  }
  1071  
  1072  // Cleanup implements the MVCCStore interface.
  1073  // Cleanup API is deprecated, use CheckTxnStatus instead.
  1074  func (mvcc *MVCCLevelDB) Cleanup(key []byte, startTS, currentTS uint64) error {
  1075  	mvcc.mu.Lock()
  1076  	defer func() {
  1077  		mvcc.mu.Unlock()
  1078  		mvcc.deadlockDetector.CleanUp(startTS)
  1079  	}()
  1080  
  1081  	batch := &leveldb.Batch{}
  1082  	startKey := mvccEncode(key, lockVer)
  1083  	iter := newIterator(mvcc.db, &util.Range{
  1084  		Start: startKey,
  1085  	})
  1086  	defer iter.Release()
  1087  
  1088  	if iter.Valid() {
  1089  		dec := lockDecoder{
  1090  			expectKey: key,
  1091  		}
  1092  		ok, err := dec.Decode(iter)
  1093  		if err != nil {
  1094  			return err
  1095  		}
  1096  		// If current transaction's lock exists.
  1097  		if ok && dec.lock.startTS == startTS {
  1098  			// If the lock has already outdated, clean up it.
  1099  			if currentTS == 0 || uint64(oracle.ExtractPhysical(dec.lock.startTS))+dec.lock.ttl < uint64(oracle.ExtractPhysical(currentTS)) {
  1100  				if err = rollbackLock(batch, key, startTS); err != nil {
  1101  					return err
  1102  				}
  1103  				return mvcc.db.Write(batch, nil)
  1104  			}
  1105  
  1106  			// Otherwise, return a locked error with the TTL information.
  1107  			return dec.lock.lockErr(key)
  1108  		}
  1109  
  1110  		// If current transaction's lock does not exist.
  1111  		// If the commit information of the current transaction exist.
  1112  		c, ok, err := getTxnCommitInfo(iter, key, startTS)
  1113  		if err != nil {
  1114  			return errors.Trace(err)
  1115  		}
  1116  		if ok {
  1117  			// If the current transaction has already committed.
  1118  			if c.valueType != typeRollback {
  1119  				return ErrAlreadyCommitted(c.commitTS)
  1120  			}
  1121  			// If the current transaction has already rollbacked.
  1122  			return nil
  1123  		}
  1124  	}
  1125  
  1126  	// If current transaction is not prewritted before.
  1127  	value := mvccValue{
  1128  		valueType: typeRollback,
  1129  		startTS:   startTS,
  1130  		commitTS:  startTS,
  1131  	}
  1132  	writeKey := mvccEncode(key, startTS)
  1133  	writeValue, err := value.MarshalBinary()
  1134  	if err != nil {
  1135  		return errors.Trace(err)
  1136  	}
  1137  	batch.Put(writeKey, writeValue)
  1138  	return nil
  1139  }
  1140  
  1141  // CheckTxnStatus checks the primary lock of a transaction to decide its status.
  1142  // The return values are (ttl, commitTS, err):
  1143  // If the transaction is active, this function returns the ttl of the lock;
  1144  // If the transaction is committed, this function returns the commitTS;
  1145  // If the transaction is rollbacked, this function returns (0, 0, nil)
  1146  // Note that CheckTxnStatus may also push forward the `minCommitTS` of the
  1147  // transaction, so it's not simply a read-only operation.
  1148  //
  1149  // primaryKey + lockTS together could locate the primary lock.
  1150  // callerStartTS is the start ts of reader transaction.
  1151  // currentTS is the current ts, but it may be inaccurate. Just use it to check TTL.
  1152  func (mvcc *MVCCLevelDB) CheckTxnStatus(primaryKey []byte, lockTS, callerStartTS, currentTS uint64,
  1153  	rollbackIfNotExist bool, resolvingPessimisticLock bool) (ttl uint64, commitTS uint64, action kvrpcpb.Action, err error) {
  1154  	mvcc.mu.Lock()
  1155  	defer mvcc.mu.Unlock()
  1156  
  1157  	action = kvrpcpb.Action_NoAction
  1158  
  1159  	startKey := mvccEncode(primaryKey, lockVer)
  1160  	iter := newIterator(mvcc.db, &util.Range{
  1161  		Start: startKey,
  1162  	})
  1163  	defer iter.Release()
  1164  
  1165  	if iter.Valid() {
  1166  		dec := lockDecoder{
  1167  			expectKey: primaryKey,
  1168  		}
  1169  		var ok bool
  1170  		ok, err = dec.Decode(iter)
  1171  		if err != nil {
  1172  			err = errors.Trace(err)
  1173  			return
  1174  		}
  1175  		// If current transaction's lock exists.
  1176  		if ok && dec.lock.startTS == lockTS {
  1177  			lock := dec.lock
  1178  			batch := &leveldb.Batch{}
  1179  
  1180  			// If the lock has already outdated, clean up it.
  1181  			if uint64(oracle.ExtractPhysical(lock.startTS))+lock.ttl < uint64(oracle.ExtractPhysical(currentTS)) {
  1182  				if resolvingPessimisticLock && lock.op == kvrpcpb.Op_PessimisticLock {
  1183  					action = kvrpcpb.Action_TTLExpirePessimisticRollback
  1184  					if err = pessimisticRollbackKey(mvcc.db, batch, primaryKey, lock.startTS, lock.forUpdateTS); err != nil {
  1185  						err = errors.Trace(err)
  1186  						return
  1187  					}
  1188  				} else {
  1189  					action = kvrpcpb.Action_TTLExpireRollback
  1190  					if err = rollbackLock(batch, primaryKey, lockTS); err != nil {
  1191  						err = errors.Trace(err)
  1192  						return
  1193  					}
  1194  				}
  1195  				if err = mvcc.db.Write(batch, nil); err != nil {
  1196  					err = errors.Trace(err)
  1197  					return
  1198  				}
  1199  				return 0, 0, action, nil
  1200  			}
  1201  
  1202  			// If the caller_start_ts is MaxUint64, it's a point get in the autocommit transaction.
  1203  			// Even though the MinCommitTs is not pushed, the point get can ingore the lock
  1204  			// next time because it's not committed. So we pretend it has been pushed.
  1205  			if callerStartTS == math.MaxUint64 {
  1206  				action = kvrpcpb.Action_MinCommitTSPushed
  1207  
  1208  				// If this is a large transaction and the lock is active, push forward the minCommitTS.
  1209  				// lock.minCommitTS == 0 may be a secondary lock, or not a large transaction (old version TiDB).
  1210  			} else if lock.minCommitTS > 0 {
  1211  				action = kvrpcpb.Action_MinCommitTSPushed
  1212  				// We *must* guarantee the invariance lock.minCommitTS >= callerStartTS + 1
  1213  				if lock.minCommitTS < callerStartTS+1 {
  1214  					lock.minCommitTS = callerStartTS + 1
  1215  
  1216  					// Remove this condition should not affect correctness.
  1217  					// We do it because pushing forward minCommitTS as far as possible could avoid
  1218  					// the lock been pushed again several times, and thus reduce write operations.
  1219  					if lock.minCommitTS < currentTS {
  1220  						lock.minCommitTS = currentTS
  1221  					}
  1222  
  1223  					writeKey := mvccEncode(primaryKey, lockVer)
  1224  					writeValue, err1 := lock.MarshalBinary()
  1225  					if err1 != nil {
  1226  						err = errors.Trace(err1)
  1227  						return
  1228  					}
  1229  					batch.Put(writeKey, writeValue)
  1230  					if err1 = mvcc.db.Write(batch, nil); err1 != nil {
  1231  						err = errors.Trace(err1)
  1232  						return
  1233  					}
  1234  				}
  1235  			}
  1236  
  1237  			return lock.ttl, 0, action, nil
  1238  		}
  1239  
  1240  		// If current transaction's lock does not exist.
  1241  		// If the commit info of the current transaction exists.
  1242  		c, ok, err1 := getTxnCommitInfo(iter, primaryKey, lockTS)
  1243  		if err1 != nil {
  1244  			err = errors.Trace(err1)
  1245  			return
  1246  		}
  1247  		if ok {
  1248  			// If current transaction is already committed.
  1249  			if c.valueType != typeRollback {
  1250  				return 0, c.commitTS, action, nil
  1251  			}
  1252  			// If current transaction is already rollback.
  1253  			return 0, 0, kvrpcpb.Action_NoAction, nil
  1254  		}
  1255  	}
  1256  
  1257  	// If current transaction is not prewritted before, it may be pessimistic lock.
  1258  	// When pessimistic txn rollback statement, it may not leave a 'rollbacked' tombstone.
  1259  
  1260  	// Or maybe caused by concurrent prewrite operation.
  1261  	// Especially in the non-block reading case, the secondary lock is likely to be
  1262  	// written before the primary lock.
  1263  
  1264  	if rollbackIfNotExist {
  1265  		if resolvingPessimisticLock {
  1266  			return 0, 0, kvrpcpb.Action_LockNotExistDoNothing, nil
  1267  		}
  1268  		// Write rollback record, but not delete the lock on the primary key. There may exist lock which has
  1269  		// different lock.startTS with input lockTS, for example the primary key could be already
  1270  		// locked by the caller transaction, deleting this key will mistakenly delete the lock on
  1271  		// primary key, see case TestSingleStatementRollback in session_test suite for example
  1272  		batch := &leveldb.Batch{}
  1273  		if err1 := writeRollback(batch, primaryKey, lockTS); err1 != nil {
  1274  			err = errors.Trace(err1)
  1275  			return
  1276  		}
  1277  		if err1 := mvcc.db.Write(batch, nil); err1 != nil {
  1278  			err = errors.Trace(err1)
  1279  			return
  1280  		}
  1281  		return 0, 0, kvrpcpb.Action_LockNotExistRollback, nil
  1282  	}
  1283  
  1284  	return 0, 0, action, &ErrTxnNotFound{kvrpcpb.TxnNotFound{
  1285  		StartTs:    lockTS,
  1286  		PrimaryKey: primaryKey,
  1287  	}}
  1288  }
  1289  
  1290  // TxnHeartBeat implements the MVCCStore interface.
  1291  func (mvcc *MVCCLevelDB) TxnHeartBeat(key []byte, startTS uint64, adviseTTL uint64) (uint64, error) {
  1292  	mvcc.mu.Lock()
  1293  	defer mvcc.mu.Unlock()
  1294  
  1295  	startKey := mvccEncode(key, lockVer)
  1296  	iter := newIterator(mvcc.db, &util.Range{
  1297  		Start: startKey,
  1298  	})
  1299  	defer iter.Release()
  1300  
  1301  	if iter.Valid() {
  1302  		dec := lockDecoder{
  1303  			expectKey: key,
  1304  		}
  1305  		ok, err := dec.Decode(iter)
  1306  		if err != nil {
  1307  			return 0, errors.Trace(err)
  1308  		}
  1309  		if ok && dec.lock.startTS == startTS {
  1310  			if !bytes.Equal(dec.lock.primary, key) {
  1311  				return 0, errors.New("txnHeartBeat on non-primary key, the code should not run here")
  1312  			}
  1313  
  1314  			lock := dec.lock
  1315  			batch := &leveldb.Batch{}
  1316  			// Increase the ttl of this transaction.
  1317  			if adviseTTL > lock.ttl {
  1318  				lock.ttl = adviseTTL
  1319  				writeKey := mvccEncode(key, lockVer)
  1320  				writeValue, err := lock.MarshalBinary()
  1321  				if err != nil {
  1322  					return 0, errors.Trace(err)
  1323  				}
  1324  				batch.Put(writeKey, writeValue)
  1325  				if err = mvcc.db.Write(batch, nil); err != nil {
  1326  					return 0, errors.Trace(err)
  1327  				}
  1328  			}
  1329  			return lock.ttl, nil
  1330  		}
  1331  	}
  1332  	return 0, errors.New("lock doesn't exist")
  1333  }
  1334  
  1335  // ScanLock implements the MVCCStore interface.
  1336  func (mvcc *MVCCLevelDB) ScanLock(startKey, endKey []byte, maxTS uint64) ([]*kvrpcpb.LockInfo, error) {
  1337  	mvcc.mu.RLock()
  1338  	defer mvcc.mu.RUnlock()
  1339  
  1340  	iter, currKey, err := newScanIterator(mvcc.db, startKey, endKey)
  1341  	defer iter.Release()
  1342  	if err != nil {
  1343  		return nil, errors.Trace(err)
  1344  	}
  1345  
  1346  	var locks []*kvrpcpb.LockInfo
  1347  	for iter.Valid() {
  1348  		dec := lockDecoder{expectKey: currKey}
  1349  		ok, err := dec.Decode(iter)
  1350  		if err != nil {
  1351  			return nil, errors.Trace(err)
  1352  		}
  1353  		if ok && dec.lock.startTS <= maxTS {
  1354  			locks = append(locks, &kvrpcpb.LockInfo{
  1355  				PrimaryLock: dec.lock.primary,
  1356  				LockVersion: dec.lock.startTS,
  1357  				Key:         currKey,
  1358  			})
  1359  		}
  1360  
  1361  		skip := skipDecoder{currKey: currKey}
  1362  		_, err = skip.Decode(iter)
  1363  		if err != nil {
  1364  			return nil, errors.Trace(err)
  1365  		}
  1366  		currKey = skip.currKey
  1367  	}
  1368  	return locks, nil
  1369  }
  1370  
  1371  // ResolveLock implements the MVCCStore interface.
  1372  func (mvcc *MVCCLevelDB) ResolveLock(startKey, endKey []byte, startTS, commitTS uint64) error {
  1373  	mvcc.mu.Lock()
  1374  	defer mvcc.mu.Unlock()
  1375  
  1376  	iter, currKey, err := newScanIterator(mvcc.db, startKey, endKey)
  1377  	defer iter.Release()
  1378  	if err != nil {
  1379  		return errors.Trace(err)
  1380  	}
  1381  
  1382  	batch := &leveldb.Batch{}
  1383  	for iter.Valid() {
  1384  		dec := lockDecoder{expectKey: currKey}
  1385  		ok, err := dec.Decode(iter)
  1386  		if err != nil {
  1387  			return errors.Trace(err)
  1388  		}
  1389  		if ok && dec.lock.startTS == startTS {
  1390  			if commitTS > 0 {
  1391  				err = commitLock(batch, dec.lock, currKey, startTS, commitTS)
  1392  			} else {
  1393  				err = rollbackLock(batch, currKey, startTS)
  1394  			}
  1395  			if err != nil {
  1396  				return errors.Trace(err)
  1397  			}
  1398  		}
  1399  
  1400  		skip := skipDecoder{currKey: currKey}
  1401  		_, err = skip.Decode(iter)
  1402  		if err != nil {
  1403  			return errors.Trace(err)
  1404  		}
  1405  		currKey = skip.currKey
  1406  	}
  1407  	return mvcc.db.Write(batch, nil)
  1408  }
  1409  
  1410  // BatchResolveLock implements the MVCCStore interface.
  1411  func (mvcc *MVCCLevelDB) BatchResolveLock(startKey, endKey []byte, txnInfos map[uint64]uint64) error {
  1412  	mvcc.mu.Lock()
  1413  	defer mvcc.mu.Unlock()
  1414  
  1415  	iter, currKey, err := newScanIterator(mvcc.db, startKey, endKey)
  1416  	defer iter.Release()
  1417  	if err != nil {
  1418  		return errors.Trace(err)
  1419  	}
  1420  
  1421  	batch := &leveldb.Batch{}
  1422  	for iter.Valid() {
  1423  		dec := lockDecoder{expectKey: currKey}
  1424  		ok, err := dec.Decode(iter)
  1425  		if err != nil {
  1426  			return errors.Trace(err)
  1427  		}
  1428  		if ok {
  1429  			if commitTS, ok := txnInfos[dec.lock.startTS]; ok {
  1430  				if commitTS > 0 {
  1431  					err = commitLock(batch, dec.lock, currKey, dec.lock.startTS, commitTS)
  1432  				} else {
  1433  					err = rollbackLock(batch, currKey, dec.lock.startTS)
  1434  				}
  1435  				if err != nil {
  1436  					return errors.Trace(err)
  1437  				}
  1438  			}
  1439  		}
  1440  
  1441  		skip := skipDecoder{currKey: currKey}
  1442  		_, err = skip.Decode(iter)
  1443  		if err != nil {
  1444  			return errors.Trace(err)
  1445  		}
  1446  		currKey = skip.currKey
  1447  	}
  1448  	return mvcc.db.Write(batch, nil)
  1449  }
  1450  
  1451  // GC implements the MVCCStore interface
  1452  func (mvcc *MVCCLevelDB) GC(startKey, endKey []byte, safePoint uint64) error {
  1453  	mvcc.mu.Lock()
  1454  	defer mvcc.mu.Unlock()
  1455  
  1456  	iter, currKey, err := newScanIterator(mvcc.db, startKey, endKey)
  1457  	defer iter.Release()
  1458  	if err != nil {
  1459  		return errors.Trace(err)
  1460  	}
  1461  
  1462  	// Mock TiKV usually doesn't need to process large amount of data. So write it in a single batch.
  1463  	batch := &leveldb.Batch{}
  1464  
  1465  	for iter.Valid() {
  1466  		lockDec := lockDecoder{expectKey: currKey}
  1467  		ok, err := lockDec.Decode(iter)
  1468  		if err != nil {
  1469  			return errors.Trace(err)
  1470  		}
  1471  		if ok && lockDec.lock.startTS <= safePoint {
  1472  			return errors.Errorf(
  1473  				"key %+q has lock with startTs %v which is under safePoint %v",
  1474  				currKey,
  1475  				lockDec.lock.startTS,
  1476  				safePoint)
  1477  		}
  1478  
  1479  		keepNext := true
  1480  		dec := valueDecoder{expectKey: currKey}
  1481  
  1482  		for iter.Valid() {
  1483  			ok, err := dec.Decode(iter)
  1484  			if err != nil {
  1485  				return errors.Trace(err)
  1486  			}
  1487  
  1488  			if !ok {
  1489  				// Go to the next key
  1490  				currKey, _, err = mvccDecode(iter.Key())
  1491  				if err != nil {
  1492  					return errors.Trace(err)
  1493  				}
  1494  				break
  1495  			}
  1496  
  1497  			if dec.value.commitTS > safePoint {
  1498  				continue
  1499  			}
  1500  
  1501  			if dec.value.valueType == typePut || dec.value.valueType == typeDelete {
  1502  				// Keep the latest version if it's `typePut`
  1503  				if !keepNext || dec.value.valueType == typeDelete {
  1504  					batch.Delete(mvccEncode(currKey, dec.value.commitTS))
  1505  				}
  1506  				keepNext = false
  1507  			} else {
  1508  				// Delete all other types
  1509  				batch.Delete(mvccEncode(currKey, dec.value.commitTS))
  1510  			}
  1511  		}
  1512  	}
  1513  
  1514  	return mvcc.db.Write(batch, nil)
  1515  }
  1516  
  1517  // DeleteRange implements the MVCCStore interface.
  1518  func (mvcc *MVCCLevelDB) DeleteRange(startKey, endKey []byte) error {
  1519  	return mvcc.doRawDeleteRange(codec.EncodeBytes(nil, startKey), codec.EncodeBytes(nil, endKey))
  1520  }
  1521  
  1522  // Close calls leveldb's Close to free resources.
  1523  func (mvcc *MVCCLevelDB) Close() error {
  1524  	return mvcc.db.Close()
  1525  }
  1526  
  1527  // RawPut implements the RawKV interface.
  1528  func (mvcc *MVCCLevelDB) RawPut(key, value []byte) {
  1529  	mvcc.mu.Lock()
  1530  	defer mvcc.mu.Unlock()
  1531  
  1532  	if value == nil {
  1533  		value = []byte{}
  1534  	}
  1535  	terror.Log(mvcc.db.Put(key, value, nil))
  1536  }
  1537  
  1538  // RawBatchPut implements the RawKV interface
  1539  func (mvcc *MVCCLevelDB) RawBatchPut(keys, values [][]byte) {
  1540  	mvcc.mu.Lock()
  1541  	defer mvcc.mu.Unlock()
  1542  
  1543  	batch := &leveldb.Batch{}
  1544  	for i, key := range keys {
  1545  		value := values[i]
  1546  		if value == nil {
  1547  			value = []byte{}
  1548  		}
  1549  		batch.Put(key, value)
  1550  	}
  1551  	terror.Log(mvcc.db.Write(batch, nil))
  1552  }
  1553  
  1554  // RawGet implements the RawKV interface.
  1555  func (mvcc *MVCCLevelDB) RawGet(key []byte) []byte {
  1556  	mvcc.mu.Lock()
  1557  	defer mvcc.mu.Unlock()
  1558  
  1559  	ret, err := mvcc.db.Get(key, nil)
  1560  	terror.Log(err)
  1561  	return ret
  1562  }
  1563  
  1564  // RawBatchGet implements the RawKV interface.
  1565  func (mvcc *MVCCLevelDB) RawBatchGet(keys [][]byte) [][]byte {
  1566  	mvcc.mu.Lock()
  1567  	defer mvcc.mu.Unlock()
  1568  
  1569  	values := make([][]byte, 0, len(keys))
  1570  	for _, key := range keys {
  1571  		value, err := mvcc.db.Get(key, nil)
  1572  		terror.Log(err)
  1573  		values = append(values, value)
  1574  	}
  1575  	return values
  1576  }
  1577  
  1578  // RawDelete implements the RawKV interface.
  1579  func (mvcc *MVCCLevelDB) RawDelete(key []byte) {
  1580  	mvcc.mu.Lock()
  1581  	defer mvcc.mu.Unlock()
  1582  
  1583  	terror.Log(mvcc.db.Delete(key, nil))
  1584  }
  1585  
  1586  // RawBatchDelete implements the RawKV interface.
  1587  func (mvcc *MVCCLevelDB) RawBatchDelete(keys [][]byte) {
  1588  	mvcc.mu.Lock()
  1589  	defer mvcc.mu.Unlock()
  1590  
  1591  	batch := &leveldb.Batch{}
  1592  	for _, key := range keys {
  1593  		batch.Delete(key)
  1594  	}
  1595  	terror.Log(mvcc.db.Write(batch, nil))
  1596  }
  1597  
  1598  // RawScan implements the RawKV interface.
  1599  func (mvcc *MVCCLevelDB) RawScan(startKey, endKey []byte, limit int) []Pair {
  1600  	mvcc.mu.Lock()
  1601  	defer mvcc.mu.Unlock()
  1602  
  1603  	iter := mvcc.db.NewIterator(&util.Range{
  1604  		Start: startKey,
  1605  	}, nil)
  1606  
  1607  	var pairs []Pair
  1608  	for iter.Next() && len(pairs) < limit {
  1609  		key := iter.Key()
  1610  		value := iter.Value()
  1611  		err := iter.Error()
  1612  		if len(endKey) > 0 && bytes.Compare(key, endKey) >= 0 {
  1613  			break
  1614  		}
  1615  		pairs = append(pairs, Pair{
  1616  			Key:   append([]byte{}, key...),
  1617  			Value: append([]byte{}, value...),
  1618  			Err:   err,
  1619  		})
  1620  	}
  1621  	return pairs
  1622  }
  1623  
  1624  // RawReverseScan implements the RawKV interface.
  1625  // Scan the range of [endKey, startKey)
  1626  // It doesn't support Scanning from "", because locating the last Region is not yet implemented.
  1627  func (mvcc *MVCCLevelDB) RawReverseScan(startKey, endKey []byte, limit int) []Pair {
  1628  	mvcc.mu.Lock()
  1629  	defer mvcc.mu.Unlock()
  1630  
  1631  	iter := mvcc.db.NewIterator(&util.Range{
  1632  		Limit: startKey,
  1633  	}, nil)
  1634  
  1635  	success := iter.Last()
  1636  
  1637  	var pairs []Pair
  1638  	for success && len(pairs) < limit {
  1639  		key := iter.Key()
  1640  		value := iter.Value()
  1641  		err := iter.Error()
  1642  		if bytes.Compare(key, endKey) < 0 {
  1643  			break
  1644  		}
  1645  		pairs = append(pairs, Pair{
  1646  			Key:   append([]byte{}, key...),
  1647  			Value: append([]byte{}, value...),
  1648  			Err:   err,
  1649  		})
  1650  		success = iter.Prev()
  1651  	}
  1652  	return pairs
  1653  }
  1654  
  1655  // RawDeleteRange implements the RawKV interface.
  1656  func (mvcc *MVCCLevelDB) RawDeleteRange(startKey, endKey []byte) {
  1657  	terror.Log(mvcc.doRawDeleteRange(startKey, endKey))
  1658  }
  1659  
  1660  // doRawDeleteRange deletes all keys in a range and return the error if any.
  1661  func (mvcc *MVCCLevelDB) doRawDeleteRange(startKey, endKey []byte) error {
  1662  	mvcc.mu.Lock()
  1663  	defer mvcc.mu.Unlock()
  1664  
  1665  	batch := &leveldb.Batch{}
  1666  
  1667  	iter := mvcc.db.NewIterator(&util.Range{
  1668  		Start: startKey,
  1669  		Limit: endKey,
  1670  	}, nil)
  1671  	for iter.Next() {
  1672  		batch.Delete(iter.Key())
  1673  	}
  1674  
  1675  	return mvcc.db.Write(batch, nil)
  1676  }
  1677  
  1678  // MvccGetByStartTS implements the MVCCDebugger interface.
  1679  func (mvcc *MVCCLevelDB) MvccGetByStartTS(starTS uint64) (*kvrpcpb.MvccInfo, []byte) {
  1680  	mvcc.mu.RLock()
  1681  	defer mvcc.mu.RUnlock()
  1682  
  1683  	var key []byte
  1684  	iter := newIterator(mvcc.db, nil)
  1685  	defer iter.Release()
  1686  
  1687  	// find the first committed key for which `start_ts` equals to `ts`
  1688  	for iter.Valid() {
  1689  		var value mvccValue
  1690  		err := value.UnmarshalBinary(iter.Value())
  1691  		if err == nil && value.startTS == starTS {
  1692  			if _, key, err = codec.DecodeBytes(iter.Key(), nil); err != nil {
  1693  				return nil, nil
  1694  			}
  1695  			break
  1696  		}
  1697  		iter.Next()
  1698  	}
  1699  
  1700  	return mvcc.mvccGetByKeyNoLock(key), key
  1701  }
  1702  
  1703  var valueTypeOpMap = [...]kvrpcpb.Op{
  1704  	typePut:      kvrpcpb.Op_Put,
  1705  	typeDelete:   kvrpcpb.Op_Del,
  1706  	typeRollback: kvrpcpb.Op_Rollback,
  1707  	typeLock:     kvrpcpb.Op_Lock,
  1708  }
  1709  
  1710  // MvccGetByKey implements the MVCCDebugger interface.
  1711  func (mvcc *MVCCLevelDB) MvccGetByKey(key []byte) *kvrpcpb.MvccInfo {
  1712  	mvcc.mu.RLock()
  1713  	defer mvcc.mu.RUnlock()
  1714  
  1715  	return mvcc.mvccGetByKeyNoLock(key)
  1716  }
  1717  
  1718  // mvcc.mu.RLock must be held before calling mvccGetByKeyNoLock.
  1719  func (mvcc *MVCCLevelDB) mvccGetByKeyNoLock(key []byte) *kvrpcpb.MvccInfo {
  1720  	info := &kvrpcpb.MvccInfo{}
  1721  
  1722  	startKey := mvccEncode(key, lockVer)
  1723  	iter := newIterator(mvcc.db, &util.Range{
  1724  		Start: startKey,
  1725  	})
  1726  	defer iter.Release()
  1727  
  1728  	dec1 := lockDecoder{expectKey: key}
  1729  	ok, err := dec1.Decode(iter)
  1730  	if err != nil {
  1731  		return nil
  1732  	}
  1733  	if ok {
  1734  		var shortValue []byte
  1735  		if isShortValue(dec1.lock.value) {
  1736  			shortValue = dec1.lock.value
  1737  		}
  1738  		info.Lock = &kvrpcpb.MvccLock{
  1739  			Type:       dec1.lock.op,
  1740  			StartTs:    dec1.lock.startTS,
  1741  			Primary:    dec1.lock.primary,
  1742  			ShortValue: shortValue,
  1743  		}
  1744  	}
  1745  
  1746  	dec2 := valueDecoder{expectKey: key}
  1747  	var writes []*kvrpcpb.MvccWrite
  1748  	var values []*kvrpcpb.MvccValue
  1749  	for iter.Valid() {
  1750  		ok, err := dec2.Decode(iter)
  1751  		if err != nil {
  1752  			return nil
  1753  		}
  1754  		if !ok {
  1755  			iter.Next()
  1756  			break
  1757  		}
  1758  		var shortValue []byte
  1759  		if isShortValue(dec2.value.value) {
  1760  			shortValue = dec2.value.value
  1761  		}
  1762  		write := &kvrpcpb.MvccWrite{
  1763  			Type:       valueTypeOpMap[dec2.value.valueType],
  1764  			StartTs:    dec2.value.startTS,
  1765  			CommitTs:   dec2.value.commitTS,
  1766  			ShortValue: shortValue,
  1767  		}
  1768  		writes = append(writes, write)
  1769  		value := &kvrpcpb.MvccValue{
  1770  			StartTs: dec2.value.startTS,
  1771  			Value:   dec2.value.value,
  1772  		}
  1773  		values = append(values, value)
  1774  	}
  1775  	info.Writes = writes
  1776  	info.Values = values
  1777  
  1778  	return info
  1779  }
  1780  
  1781  const shortValueMaxLen = 64
  1782  
  1783  func isShortValue(value []byte) bool {
  1784  	return len(value) <= shortValueMaxLen
  1785  }