github.com/KinWaiYuen/client-go/v2@v2.5.4/internal/mockstore/mocktikv/mvcc_leveldb.go (about) 1 // Copyright 2021 TiKV Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // NOTE: The code in this file is based on code from the 16 // TiDB project, licensed under the Apache License v 2.0 17 // 18 // https://github.com/pingcap/tidb/tree/cc5e161ac06827589c4966674597c137cc9e809c/store/tikv/mockstore/mocktikv/mvcc_leveldb.go 19 // 20 21 // Copyright 2017 PingCAP, Inc. 22 // 23 // Licensed under the Apache License, Version 2.0 (the "License"); 24 // you may not use this file except in compliance with the License. 25 // You may obtain a copy of the License at 26 // 27 // http://www.apache.org/licenses/LICENSE-2.0 28 // 29 // Unless required by applicable law or agreed to in writing, software 30 // distributed under the License is distributed on an "AS IS" BASIS, 31 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 32 // See the License for the specific language governing permissions and 33 // limitations under the License. 34 35 package mocktikv 36 37 import ( 38 "bytes" 39 "math" 40 "sync" 41 42 "github.com/KinWaiYuen/client-go/v2/internal/logutil" 43 "github.com/KinWaiYuen/client-go/v2/internal/mockstore/deadlock" 44 "github.com/KinWaiYuen/client-go/v2/oracle" 45 "github.com/KinWaiYuen/client-go/v2/util/codec" 46 "github.com/dgryski/go-farm" 47 "github.com/pingcap/errors" 48 "github.com/pingcap/goleveldb/leveldb" 49 "github.com/pingcap/goleveldb/leveldb/iterator" 50 "github.com/pingcap/goleveldb/leveldb/opt" 51 "github.com/pingcap/goleveldb/leveldb/storage" 52 "github.com/pingcap/goleveldb/leveldb/util" 53 "github.com/pingcap/kvproto/pkg/kvrpcpb" 54 "github.com/pingcap/parser/terror" 55 "go.uber.org/zap" 56 ) 57 58 // Used for pessimistic lock wait time 59 // these two constants are special for lock protocol with tikv 60 // 0 means always wait, -1 means nowait, others meaning lock wait in milliseconds 61 var ( 62 LockAlwaysWait = int64(0) 63 LockNoWait = int64(-1) 64 ) 65 66 // MVCCLevelDB implements the MVCCStore interface. 67 type MVCCLevelDB struct { 68 // Key layout: 69 // ... 70 // Key_lock -- (0) 71 // Key_verMax -- (1) 72 // ... 73 // Key_ver+1 -- (2) 74 // Key_ver -- (3) 75 // Key_ver-1 -- (4) 76 // ... 77 // Key_0 -- (5) 78 // NextKey_lock -- (6) 79 // NextKey_verMax -- (7) 80 // ... 81 // NextKey_ver+1 -- (8) 82 // NextKey_ver -- (9) 83 // NextKey_ver-1 -- (10) 84 // ... 85 // NextKey_0 -- (11) 86 // ... 87 // EOF 88 89 // db represents leveldb 90 db *leveldb.DB 91 // mu used for lock 92 // leveldb can not guarantee multiple operations to be atomic, for example, read 93 // then write, another write may happen during it, so this lock is necessory. 94 mu sync.RWMutex 95 deadlockDetector *deadlock.Detector 96 } 97 98 const lockVer uint64 = math.MaxUint64 99 100 // ErrInvalidEncodedKey describes parsing an invalid format of EncodedKey. 101 var ErrInvalidEncodedKey = errors.New("invalid encoded key") 102 103 // mvccEncode returns the encoded key. 104 func mvccEncode(key []byte, ver uint64) []byte { 105 b := codec.EncodeBytes(nil, key) 106 ret := codec.EncodeUintDesc(b, ver) 107 return ret 108 } 109 110 // mvccDecode parses the origin key and version of an encoded key, if the encoded key is a meta key, 111 // just returns the origin key. 112 func mvccDecode(encodedKey []byte) ([]byte, uint64, error) { 113 // Skip DataPrefix 114 remainBytes, key, err := codec.DecodeBytes(encodedKey, nil) 115 if err != nil { 116 // should never happen 117 return nil, 0, errors.Trace(err) 118 } 119 // if it's meta key 120 if len(remainBytes) == 0 { 121 return key, 0, nil 122 } 123 var ver uint64 124 remainBytes, ver, err = codec.DecodeUintDesc(remainBytes) 125 if err != nil { 126 // should never happen 127 return nil, 0, errors.Trace(err) 128 } 129 if len(remainBytes) != 0 { 130 return nil, 0, ErrInvalidEncodedKey 131 } 132 return key, ver, nil 133 } 134 135 // MustNewMVCCStore is used for testing, use NewMVCCLevelDB instead. 136 func MustNewMVCCStore() MVCCStore { 137 mvccStore, err := NewMVCCLevelDB("") 138 if err != nil { 139 panic(err) 140 } 141 return mvccStore 142 } 143 144 // NewMVCCLevelDB returns a new MVCCLevelDB object. 145 func NewMVCCLevelDB(path string) (*MVCCLevelDB, error) { 146 var ( 147 d *leveldb.DB 148 err error 149 ) 150 if path == "" { 151 d, err = leveldb.Open(storage.NewMemStorage(), nil) 152 } else { 153 d, err = leveldb.OpenFile(path, &opt.Options{BlockCacheCapacity: 600 * 1024 * 1024}) 154 } 155 156 return &MVCCLevelDB{db: d, deadlockDetector: deadlock.NewDetector()}, errors.Trace(err) 157 } 158 159 // Iterator wraps iterator.Iterator to provide Valid() method. 160 type Iterator struct { 161 iterator.Iterator 162 valid bool 163 } 164 165 // Next moves the iterator to the next key/value pair. 166 func (iter *Iterator) Next() { 167 iter.valid = iter.Iterator.Next() 168 } 169 170 // Valid returns whether the iterator is exhausted. 171 func (iter *Iterator) Valid() bool { 172 return iter.valid 173 } 174 175 func newIterator(db *leveldb.DB, slice *util.Range) *Iterator { 176 iter := &Iterator{db.NewIterator(slice, nil), true} 177 iter.Next() 178 return iter 179 } 180 181 func newScanIterator(db *leveldb.DB, startKey, endKey []byte) (*Iterator, []byte, error) { 182 var start, end []byte 183 if len(startKey) > 0 { 184 start = mvccEncode(startKey, lockVer) 185 } 186 if len(endKey) > 0 { 187 end = mvccEncode(endKey, lockVer) 188 } 189 iter := newIterator(db, &util.Range{ 190 Start: start, 191 Limit: end, 192 }) 193 // newScanIterator must handle startKey is nil, in this case, the real startKey 194 // should be change the frist key of the store. 195 if len(startKey) == 0 && iter.Valid() { 196 key, _, err := mvccDecode(iter.Key()) 197 if err != nil { 198 return nil, nil, errors.Trace(err) 199 } 200 startKey = key 201 } 202 return iter, startKey, nil 203 } 204 205 type lockDecoder struct { 206 lock mvccLock 207 expectKey []byte 208 } 209 210 // Decode decodes the lock value if current iterator is at expectKey::lock. 211 func (dec *lockDecoder) Decode(iter *Iterator) (bool, error) { 212 if iter.Error() != nil || !iter.Valid() { 213 return false, iter.Error() 214 } 215 216 iterKey := iter.Key() 217 key, ver, err := mvccDecode(iterKey) 218 if err != nil { 219 return false, errors.Trace(err) 220 } 221 if !bytes.Equal(key, dec.expectKey) { 222 return false, nil 223 } 224 if ver != lockVer { 225 return false, nil 226 } 227 228 var lock mvccLock 229 err = lock.UnmarshalBinary(iter.Value()) 230 if err != nil { 231 return false, errors.Trace(err) 232 } 233 dec.lock = lock 234 iter.Next() 235 return true, nil 236 } 237 238 type valueDecoder struct { 239 value mvccValue 240 expectKey []byte 241 } 242 243 // Decode decodes a mvcc value if iter key is expectKey. 244 func (dec *valueDecoder) Decode(iter *Iterator) (bool, error) { 245 if iter.Error() != nil || !iter.Valid() { 246 return false, iter.Error() 247 } 248 249 key, ver, err := mvccDecode(iter.Key()) 250 if err != nil { 251 return false, errors.Trace(err) 252 } 253 if !bytes.Equal(key, dec.expectKey) { 254 return false, nil 255 } 256 if ver == lockVer { 257 return false, nil 258 } 259 260 var value mvccValue 261 err = value.UnmarshalBinary(iter.Value()) 262 if err != nil { 263 return false, errors.Trace(err) 264 } 265 dec.value = value 266 iter.Next() 267 return true, nil 268 } 269 270 type skipDecoder struct { 271 currKey []byte 272 } 273 274 // Decode skips the iterator as long as its key is currKey, the new key would be stored. 275 func (dec *skipDecoder) Decode(iter *Iterator) (bool, error) { 276 if iter.Error() != nil { 277 return false, iter.Error() 278 } 279 for iter.Valid() { 280 key, _, err := mvccDecode(iter.Key()) 281 if err != nil { 282 return false, errors.Trace(err) 283 } 284 if !bytes.Equal(key, dec.currKey) { 285 dec.currKey = key 286 return true, nil 287 } 288 iter.Next() 289 } 290 return false, nil 291 } 292 293 // Get implements the MVCCStore interface. 294 // key cannot be nil or []byte{} 295 func (mvcc *MVCCLevelDB) Get(key []byte, startTS uint64, isoLevel kvrpcpb.IsolationLevel, resolvedLocks []uint64) ([]byte, error) { 296 mvcc.mu.RLock() 297 defer mvcc.mu.RUnlock() 298 299 return mvcc.getValue(key, startTS, isoLevel, resolvedLocks) 300 } 301 302 func (mvcc *MVCCLevelDB) getValue(key []byte, startTS uint64, isoLevel kvrpcpb.IsolationLevel, resolvedLocks []uint64) ([]byte, error) { 303 startKey := mvccEncode(key, lockVer) 304 iter := newIterator(mvcc.db, &util.Range{ 305 Start: startKey, 306 }) 307 defer iter.Release() 308 309 return getValue(iter, key, startTS, isoLevel, resolvedLocks) 310 } 311 312 func getValue(iter *Iterator, key []byte, startTS uint64, isoLevel kvrpcpb.IsolationLevel, resolvedLocks []uint64) ([]byte, error) { 313 dec1 := lockDecoder{expectKey: key} 314 ok, err := dec1.Decode(iter) 315 if ok && isoLevel == kvrpcpb.IsolationLevel_SI { 316 startTS, err = dec1.lock.check(startTS, key, resolvedLocks) 317 } 318 if err != nil { 319 return nil, errors.Trace(err) 320 } 321 dec2 := valueDecoder{expectKey: key} 322 for iter.Valid() { 323 ok, err := dec2.Decode(iter) 324 if err != nil { 325 return nil, errors.Trace(err) 326 } 327 if !ok { 328 break 329 } 330 331 value := &dec2.value 332 if value.valueType == typeRollback || value.valueType == typeLock { 333 continue 334 } 335 // Read the first committed value that can be seen at startTS. 336 if value.commitTS <= startTS { 337 if value.valueType == typeDelete { 338 return nil, nil 339 } 340 return value.value, nil 341 } 342 } 343 return nil, nil 344 } 345 346 // BatchGet implements the MVCCStore interface. 347 func (mvcc *MVCCLevelDB) BatchGet(ks [][]byte, startTS uint64, isoLevel kvrpcpb.IsolationLevel, resolvedLocks []uint64) []Pair { 348 mvcc.mu.RLock() 349 defer mvcc.mu.RUnlock() 350 351 pairs := make([]Pair, 0, len(ks)) 352 for _, k := range ks { 353 v, err := mvcc.getValue(k, startTS, isoLevel, resolvedLocks) 354 if v == nil && err == nil { 355 continue 356 } 357 pairs = append(pairs, Pair{ 358 Key: k, 359 Value: v, 360 Err: errors.Trace(err), 361 }) 362 } 363 return pairs 364 } 365 366 // Scan implements the MVCCStore interface. 367 func (mvcc *MVCCLevelDB) Scan(startKey, endKey []byte, limit int, startTS uint64, isoLevel kvrpcpb.IsolationLevel, resolvedLock []uint64) []Pair { 368 mvcc.mu.RLock() 369 defer mvcc.mu.RUnlock() 370 371 iter, currKey, err := newScanIterator(mvcc.db, startKey, endKey) 372 defer iter.Release() 373 if err != nil { 374 logutil.BgLogger().Error("scan new iterator fail", zap.Error(err)) 375 return nil 376 } 377 378 ok := true 379 var pairs []Pair 380 for len(pairs) < limit && ok { 381 value, err := getValue(iter, currKey, startTS, isoLevel, resolvedLock) 382 if err != nil { 383 pairs = append(pairs, Pair{ 384 Key: currKey, 385 Err: errors.Trace(err), 386 }) 387 } 388 if value != nil { 389 pairs = append(pairs, Pair{ 390 Key: currKey, 391 Value: value, 392 }) 393 } 394 395 skip := skipDecoder{currKey} 396 ok, err = skip.Decode(iter) 397 if err != nil { 398 logutil.BgLogger().Error("seek to next key error", zap.Error(err)) 399 break 400 } 401 currKey = skip.currKey 402 } 403 return pairs 404 } 405 406 // ReverseScan implements the MVCCStore interface. The search range is [startKey, endKey). 407 func (mvcc *MVCCLevelDB) ReverseScan(startKey, endKey []byte, limit int, startTS uint64, isoLevel kvrpcpb.IsolationLevel, resolvedLocks []uint64) []Pair { 408 mvcc.mu.RLock() 409 defer mvcc.mu.RUnlock() 410 411 var mvccEnd []byte 412 if len(endKey) != 0 { 413 mvccEnd = mvccEncode(endKey, lockVer) 414 } 415 iter := mvcc.db.NewIterator(&util.Range{ 416 Limit: mvccEnd, 417 }, nil) 418 defer iter.Release() 419 420 succ := iter.Last() 421 currKey, _, err := mvccDecode(iter.Key()) 422 // TODO: return error. 423 terror.Log(errors.Trace(err)) 424 helper := reverseScanHelper{ 425 startTS: startTS, 426 isoLevel: isoLevel, 427 currKey: currKey, 428 resolvedLocks: resolvedLocks, 429 } 430 431 for succ && len(helper.pairs) < limit { 432 key, ver, err := mvccDecode(iter.Key()) 433 if err != nil { 434 break 435 } 436 if bytes.Compare(key, startKey) < 0 { 437 break 438 } 439 440 if !bytes.Equal(key, helper.currKey) { 441 helper.finishEntry() 442 helper.currKey = key 443 } 444 if ver == lockVer { 445 var lock mvccLock 446 err = lock.UnmarshalBinary(iter.Value()) 447 helper.entry.lock = &lock 448 } else { 449 var value mvccValue 450 err = value.UnmarshalBinary(iter.Value()) 451 helper.entry.values = append(helper.entry.values, value) 452 } 453 if err != nil { 454 logutil.BgLogger().Error("unmarshal fail", zap.Error(err)) 455 break 456 } 457 succ = iter.Prev() 458 } 459 if len(helper.pairs) < limit { 460 helper.finishEntry() 461 } 462 return helper.pairs 463 } 464 465 type reverseScanHelper struct { 466 startTS uint64 467 isoLevel kvrpcpb.IsolationLevel 468 resolvedLocks []uint64 469 currKey []byte 470 entry mvccEntry 471 pairs []Pair 472 } 473 474 func (helper *reverseScanHelper) finishEntry() { 475 reverse(helper.entry.values) 476 helper.entry.key = NewMvccKey(helper.currKey) 477 val, err := helper.entry.Get(helper.startTS, helper.isoLevel, helper.resolvedLocks) 478 if len(val) != 0 || err != nil { 479 helper.pairs = append(helper.pairs, Pair{ 480 Key: helper.currKey, 481 Value: val, 482 Err: err, 483 }) 484 } 485 helper.entry = mvccEntry{} 486 } 487 488 func reverse(values []mvccValue) { 489 i, j := 0, len(values)-1 490 for i < j { 491 values[i], values[j] = values[j], values[i] 492 i++ 493 j-- 494 } 495 } 496 497 type lockCtx struct { 498 startTS uint64 499 forUpdateTS uint64 500 primary []byte 501 ttl uint64 502 minCommitTs uint64 503 504 returnValues bool 505 values [][]byte 506 } 507 508 // PessimisticLock writes the pessimistic lock. 509 func (mvcc *MVCCLevelDB) PessimisticLock(req *kvrpcpb.PessimisticLockRequest) *kvrpcpb.PessimisticLockResponse { 510 resp := &kvrpcpb.PessimisticLockResponse{} 511 mvcc.mu.Lock() 512 defer mvcc.mu.Unlock() 513 mutations := req.Mutations 514 lCtx := &lockCtx{ 515 startTS: req.StartVersion, 516 forUpdateTS: req.ForUpdateTs, 517 primary: req.PrimaryLock, 518 ttl: req.LockTtl, 519 minCommitTs: req.MinCommitTs, 520 returnValues: req.ReturnValues, 521 } 522 lockWaitTime := req.WaitTimeout 523 524 anyError := false 525 batch := &leveldb.Batch{} 526 errs := make([]error, 0, len(mutations)) 527 for _, m := range mutations { 528 err := mvcc.pessimisticLockMutation(batch, m, lCtx) 529 errs = append(errs, err) 530 if err != nil { 531 anyError = true 532 } 533 if lockWaitTime == LockNoWait { 534 if _, ok := err.(*ErrLocked); ok { 535 break 536 } 537 } 538 } 539 if anyError { 540 if lockWaitTime != LockNoWait { 541 // TODO: remove this when implement sever side wait. 542 simulateServerSideWaitLock(errs) 543 } 544 resp.Errors = convertToKeyErrors(errs) 545 return resp 546 } 547 if err := mvcc.db.Write(batch, nil); err != nil { 548 resp.Errors = convertToKeyErrors([]error{err}) 549 return resp 550 } 551 if req.ReturnValues { 552 resp.Values = lCtx.values 553 } 554 return resp 555 } 556 557 func (mvcc *MVCCLevelDB) pessimisticLockMutation(batch *leveldb.Batch, mutation *kvrpcpb.Mutation, lctx *lockCtx) error { 558 startTS := lctx.startTS 559 forUpdateTS := lctx.forUpdateTS 560 startKey := mvccEncode(mutation.Key, lockVer) 561 iter := newIterator(mvcc.db, &util.Range{ 562 Start: startKey, 563 }) 564 defer iter.Release() 565 566 dec := lockDecoder{ 567 expectKey: mutation.Key, 568 } 569 ok, err := dec.Decode(iter) 570 if err != nil { 571 return errors.Trace(err) 572 } 573 if ok { 574 if dec.lock.startTS != startTS { 575 errDeadlock := mvcc.deadlockDetector.Detect(startTS, dec.lock.startTS, farm.Fingerprint64(mutation.Key)) 576 if errDeadlock != nil { 577 return &ErrDeadlock{ 578 LockKey: mutation.Key, 579 LockTS: dec.lock.startTS, 580 DealockKeyHash: errDeadlock.KeyHash, 581 } 582 } 583 return dec.lock.lockErr(mutation.Key) 584 } 585 return nil 586 } 587 588 // For pessimisticLockMutation, check the correspond rollback record, there may be rollbackLock 589 // operation between startTS and forUpdateTS 590 val, err := checkConflictValue(iter, mutation, forUpdateTS, startTS, true) 591 if err != nil { 592 return err 593 } 594 if lctx.returnValues { 595 lctx.values = append(lctx.values, val) 596 } 597 598 lock := mvccLock{ 599 startTS: startTS, 600 primary: lctx.primary, 601 op: kvrpcpb.Op_PessimisticLock, 602 ttl: lctx.ttl, 603 forUpdateTS: forUpdateTS, 604 minCommitTS: lctx.minCommitTs, 605 } 606 writeKey := mvccEncode(mutation.Key, lockVer) 607 writeValue, err := lock.MarshalBinary() 608 if err != nil { 609 return errors.Trace(err) 610 } 611 612 batch.Put(writeKey, writeValue) 613 return nil 614 } 615 616 // PessimisticRollback implements the MVCCStore interface. 617 func (mvcc *MVCCLevelDB) PessimisticRollback(keys [][]byte, startTS, forUpdateTS uint64) []error { 618 mvcc.mu.Lock() 619 defer mvcc.mu.Unlock() 620 621 anyError := false 622 batch := &leveldb.Batch{} 623 errs := make([]error, 0, len(keys)) 624 for _, key := range keys { 625 err := pessimisticRollbackKey(mvcc.db, batch, key, startTS, forUpdateTS) 626 errs = append(errs, err) 627 if err != nil { 628 anyError = true 629 } 630 } 631 if anyError { 632 return errs 633 } 634 if err := mvcc.db.Write(batch, nil); err != nil { 635 return []error{err} 636 } 637 return errs 638 } 639 640 func pessimisticRollbackKey(db *leveldb.DB, batch *leveldb.Batch, key []byte, startTS, forUpdateTS uint64) error { 641 startKey := mvccEncode(key, lockVer) 642 iter := newIterator(db, &util.Range{ 643 Start: startKey, 644 }) 645 defer iter.Release() 646 647 dec := lockDecoder{ 648 expectKey: key, 649 } 650 ok, err := dec.Decode(iter) 651 if err != nil { 652 return errors.Trace(err) 653 } 654 if ok { 655 lock := dec.lock 656 if lock.op == kvrpcpb.Op_PessimisticLock && lock.startTS == startTS && lock.forUpdateTS <= forUpdateTS { 657 batch.Delete(startKey) 658 } 659 } 660 return nil 661 } 662 663 // Prewrite implements the MVCCStore interface. 664 func (mvcc *MVCCLevelDB) Prewrite(req *kvrpcpb.PrewriteRequest) []error { 665 mutations := req.Mutations 666 primary := req.PrimaryLock 667 startTS := req.StartVersion 668 forUpdateTS := req.GetForUpdateTs() 669 ttl := req.LockTtl 670 minCommitTS := req.MinCommitTs 671 mvcc.mu.Lock() 672 defer mvcc.mu.Unlock() 673 674 anyError := false 675 batch := &leveldb.Batch{} 676 errs := make([]error, 0, len(mutations)) 677 txnSize := req.TxnSize 678 for i, m := range mutations { 679 // If the operation is Insert, check if key is exists at first. 680 var err error 681 // no need to check insert values for pessimistic transaction. 682 op := m.GetOp() 683 if (op == kvrpcpb.Op_Insert || op == kvrpcpb.Op_CheckNotExists) && forUpdateTS == 0 { 684 v, err := mvcc.getValue(m.Key, startTS, kvrpcpb.IsolationLevel_SI, req.Context.ResolvedLocks) 685 if err != nil { 686 errs = append(errs, err) 687 anyError = true 688 continue 689 } 690 if v != nil { 691 err = &ErrKeyAlreadyExist{ 692 Key: m.Key, 693 } 694 errs = append(errs, err) 695 anyError = true 696 continue 697 } 698 } 699 if op == kvrpcpb.Op_CheckNotExists { 700 continue 701 } 702 isPessimisticLock := len(req.IsPessimisticLock) > 0 && req.IsPessimisticLock[i] 703 err = prewriteMutation(mvcc.db, batch, m, startTS, primary, ttl, txnSize, isPessimisticLock, minCommitTS) 704 errs = append(errs, err) 705 if err != nil { 706 anyError = true 707 } 708 } 709 if anyError { 710 return errs 711 } 712 if err := mvcc.db.Write(batch, nil); err != nil { 713 return []error{err} 714 } 715 716 return errs 717 } 718 719 func checkConflictValue(iter *Iterator, m *kvrpcpb.Mutation, forUpdateTS uint64, startTS uint64, getVal bool) ([]byte, error) { 720 dec := &valueDecoder{ 721 expectKey: m.Key, 722 } 723 ok, err := dec.Decode(iter) 724 if err != nil { 725 return nil, errors.Trace(err) 726 } 727 if !ok { 728 return nil, nil 729 } 730 731 // Note that it's a write conflict here, even if the value is a rollback one, or a op_lock record 732 if dec.value.commitTS > forUpdateTS { 733 return nil, &ErrConflict{ 734 StartTS: forUpdateTS, 735 ConflictTS: dec.value.startTS, 736 ConflictCommitTS: dec.value.commitTS, 737 Key: m.Key, 738 } 739 } 740 741 needGetVal := getVal 742 needCheckAssertion := m.Assertion == kvrpcpb.Assertion_NotExist 743 needCheckRollback := true 744 var retVal []byte 745 // do the check or get operations within one iteration to make CI faster 746 for ok { 747 if needCheckRollback { 748 if dec.value.valueType == typeRollback { 749 if dec.value.commitTS == startTS { 750 logutil.BgLogger().Warn("rollback value found", 751 zap.Uint64("txnID", startTS), 752 zap.Int32("rollbacked.valueType", int32(dec.value.valueType)), 753 zap.Uint64("rollbacked.startTS", dec.value.startTS), 754 zap.Uint64("rollbacked.commitTS", dec.value.commitTS)) 755 return nil, &ErrAlreadyRollbacked{ 756 startTS: startTS, 757 key: m.Key, 758 } 759 } 760 } 761 if dec.value.commitTS < startTS { 762 needCheckRollback = false 763 } 764 } 765 if needCheckAssertion { 766 if dec.value.valueType == typePut || dec.value.valueType == typeLock { 767 if m.Op == kvrpcpb.Op_PessimisticLock { 768 return nil, &ErrKeyAlreadyExist{ 769 Key: m.Key, 770 } 771 } 772 } else if dec.value.valueType == typeDelete { 773 needCheckAssertion = false 774 } 775 } 776 if needGetVal { 777 if dec.value.valueType == typeDelete || dec.value.valueType == typePut { 778 retVal = dec.value.value 779 needGetVal = false 780 } 781 } 782 if !needCheckAssertion && !needGetVal && !needCheckRollback { 783 break 784 } 785 ok, err = dec.Decode(iter) 786 if err != nil { 787 return nil, errors.Trace(err) 788 } 789 } 790 if getVal { 791 return retVal, nil 792 } 793 return nil, nil 794 } 795 796 func prewriteMutation(db *leveldb.DB, batch *leveldb.Batch, 797 mutation *kvrpcpb.Mutation, startTS uint64, 798 primary []byte, ttl uint64, txnSize uint64, 799 isPessimisticLock bool, minCommitTS uint64) error { 800 startKey := mvccEncode(mutation.Key, lockVer) 801 iter := newIterator(db, &util.Range{ 802 Start: startKey, 803 }) 804 defer iter.Release() 805 806 dec := lockDecoder{ 807 expectKey: mutation.Key, 808 } 809 ok, err := dec.Decode(iter) 810 if err != nil { 811 return errors.Trace(err) 812 } 813 if ok { 814 if dec.lock.startTS != startTS { 815 if isPessimisticLock { 816 // NOTE: A special handling. 817 // When pessimistic txn prewrite meets lock, set the TTL = 0 means 818 // telling TiDB to rollback the transaction **unconditionly**. 819 dec.lock.ttl = 0 820 } 821 return dec.lock.lockErr(mutation.Key) 822 } 823 if dec.lock.op != kvrpcpb.Op_PessimisticLock { 824 return nil 825 } 826 // Overwrite the pessimistic lock. 827 if ttl < dec.lock.ttl { 828 // Maybe ttlManager has already set the lock TTL, don't decrease it. 829 ttl = dec.lock.ttl 830 } 831 if minCommitTS < dec.lock.minCommitTS { 832 // The minCommitTS has been pushed forward. 833 minCommitTS = dec.lock.minCommitTS 834 } 835 } else { 836 if isPessimisticLock { 837 return ErrAbort("pessimistic lock not found") 838 } 839 _, err = checkConflictValue(iter, mutation, startTS, startTS, false) 840 if err != nil { 841 return err 842 } 843 } 844 845 op := mutation.GetOp() 846 if op == kvrpcpb.Op_Insert { 847 op = kvrpcpb.Op_Put 848 } 849 lock := mvccLock{ 850 startTS: startTS, 851 primary: primary, 852 value: mutation.Value, 853 op: op, 854 ttl: ttl, 855 txnSize: txnSize, 856 } 857 // Write minCommitTS on the primary lock. 858 if bytes.Equal(primary, mutation.GetKey()) { 859 lock.minCommitTS = minCommitTS 860 } 861 862 writeKey := mvccEncode(mutation.Key, lockVer) 863 writeValue, err := lock.MarshalBinary() 864 if err != nil { 865 return errors.Trace(err) 866 } 867 868 batch.Put(writeKey, writeValue) 869 return nil 870 } 871 872 // Commit implements the MVCCStore interface. 873 func (mvcc *MVCCLevelDB) Commit(keys [][]byte, startTS, commitTS uint64) error { 874 mvcc.mu.Lock() 875 defer func() { 876 mvcc.mu.Unlock() 877 mvcc.deadlockDetector.CleanUp(startTS) 878 }() 879 880 batch := &leveldb.Batch{} 881 for _, k := range keys { 882 err := commitKey(mvcc.db, batch, k, startTS, commitTS) 883 if err != nil { 884 return errors.Trace(err) 885 } 886 } 887 return mvcc.db.Write(batch, nil) 888 } 889 890 func commitKey(db *leveldb.DB, batch *leveldb.Batch, key []byte, startTS, commitTS uint64) error { 891 startKey := mvccEncode(key, lockVer) 892 iter := newIterator(db, &util.Range{ 893 Start: startKey, 894 }) 895 defer iter.Release() 896 897 dec := lockDecoder{ 898 expectKey: key, 899 } 900 ok, err := dec.Decode(iter) 901 if err != nil { 902 return errors.Trace(err) 903 } 904 if !ok || dec.lock.startTS != startTS { 905 // If the lock of this transaction is not found, or the lock is replaced by 906 // another transaction, check commit information of this transaction. 907 c, ok, err1 := getTxnCommitInfo(iter, key, startTS) 908 if err1 != nil { 909 return errors.Trace(err1) 910 } 911 if ok && c.valueType != typeRollback { 912 // c.valueType != typeRollback means the transaction is already committed, do nothing. 913 return nil 914 } 915 return ErrRetryable("txn not found") 916 } 917 // Reject the commit request whose commitTS is less than minCommiTS. 918 if dec.lock.minCommitTS > commitTS { 919 return &ErrCommitTSExpired{ 920 kvrpcpb.CommitTsExpired{ 921 StartTs: startTS, 922 AttemptedCommitTs: commitTS, 923 Key: key, 924 MinCommitTs: dec.lock.minCommitTS, 925 }} 926 } 927 928 if err = commitLock(batch, dec.lock, key, startTS, commitTS); err != nil { 929 return errors.Trace(err) 930 } 931 return nil 932 } 933 934 func commitLock(batch *leveldb.Batch, lock mvccLock, key []byte, startTS, commitTS uint64) error { 935 var valueType mvccValueType 936 if lock.op == kvrpcpb.Op_Put { 937 valueType = typePut 938 } else if lock.op == kvrpcpb.Op_Lock { 939 valueType = typeLock 940 } else { 941 valueType = typeDelete 942 } 943 value := mvccValue{ 944 valueType: valueType, 945 startTS: startTS, 946 commitTS: commitTS, 947 value: lock.value, 948 } 949 writeKey := mvccEncode(key, commitTS) 950 writeValue, err := value.MarshalBinary() 951 if err != nil { 952 return errors.Trace(err) 953 } 954 batch.Put(writeKey, writeValue) 955 batch.Delete(mvccEncode(key, lockVer)) 956 return nil 957 } 958 959 // Rollback implements the MVCCStore interface. 960 func (mvcc *MVCCLevelDB) Rollback(keys [][]byte, startTS uint64) error { 961 mvcc.mu.Lock() 962 defer func() { 963 mvcc.mu.Unlock() 964 mvcc.deadlockDetector.CleanUp(startTS) 965 }() 966 967 batch := &leveldb.Batch{} 968 for _, k := range keys { 969 err := rollbackKey(mvcc.db, batch, k, startTS) 970 if err != nil { 971 return errors.Trace(err) 972 } 973 } 974 return mvcc.db.Write(batch, nil) 975 } 976 977 func rollbackKey(db *leveldb.DB, batch *leveldb.Batch, key []byte, startTS uint64) error { 978 startKey := mvccEncode(key, lockVer) 979 iter := newIterator(db, &util.Range{ 980 Start: startKey, 981 }) 982 defer iter.Release() 983 984 if iter.Valid() { 985 dec := lockDecoder{ 986 expectKey: key, 987 } 988 ok, err := dec.Decode(iter) 989 if err != nil { 990 return errors.Trace(err) 991 } 992 // If current transaction's lock exist. 993 if ok && dec.lock.startTS == startTS { 994 if err = rollbackLock(batch, key, startTS); err != nil { 995 return errors.Trace(err) 996 } 997 return nil 998 } 999 1000 // If current transaction's lock not exist. 1001 // If commit info of current transaction exist. 1002 c, ok, err := getTxnCommitInfo(iter, key, startTS) 1003 if err != nil { 1004 return errors.Trace(err) 1005 } 1006 if ok { 1007 // If current transaction is already committed. 1008 if c.valueType != typeRollback { 1009 return ErrAlreadyCommitted(c.commitTS) 1010 } 1011 // If current transaction is already rollback. 1012 return nil 1013 } 1014 } 1015 1016 // If current transaction is not prewritted before. 1017 value := mvccValue{ 1018 valueType: typeRollback, 1019 startTS: startTS, 1020 commitTS: startTS, 1021 } 1022 writeKey := mvccEncode(key, startTS) 1023 writeValue, err := value.MarshalBinary() 1024 if err != nil { 1025 return errors.Trace(err) 1026 } 1027 batch.Put(writeKey, writeValue) 1028 return nil 1029 } 1030 1031 func writeRollback(batch *leveldb.Batch, key []byte, startTS uint64) error { 1032 tomb := mvccValue{ 1033 valueType: typeRollback, 1034 startTS: startTS, 1035 commitTS: startTS, 1036 } 1037 writeKey := mvccEncode(key, startTS) 1038 writeValue, err := tomb.MarshalBinary() 1039 if err != nil { 1040 return errors.Trace(err) 1041 } 1042 batch.Put(writeKey, writeValue) 1043 return nil 1044 } 1045 1046 func rollbackLock(batch *leveldb.Batch, key []byte, startTS uint64) error { 1047 err := writeRollback(batch, key, startTS) 1048 if err != nil { 1049 return err 1050 } 1051 batch.Delete(mvccEncode(key, lockVer)) 1052 return nil 1053 } 1054 1055 func getTxnCommitInfo(iter *Iterator, expectKey []byte, startTS uint64) (mvccValue, bool, error) { 1056 for iter.Valid() { 1057 dec := valueDecoder{ 1058 expectKey: expectKey, 1059 } 1060 ok, err := dec.Decode(iter) 1061 if err != nil || !ok { 1062 return mvccValue{}, ok, errors.Trace(err) 1063 } 1064 1065 if dec.value.startTS == startTS { 1066 return dec.value, true, nil 1067 } 1068 } 1069 return mvccValue{}, false, nil 1070 } 1071 1072 // Cleanup implements the MVCCStore interface. 1073 // Cleanup API is deprecated, use CheckTxnStatus instead. 1074 func (mvcc *MVCCLevelDB) Cleanup(key []byte, startTS, currentTS uint64) error { 1075 mvcc.mu.Lock() 1076 defer func() { 1077 mvcc.mu.Unlock() 1078 mvcc.deadlockDetector.CleanUp(startTS) 1079 }() 1080 1081 batch := &leveldb.Batch{} 1082 startKey := mvccEncode(key, lockVer) 1083 iter := newIterator(mvcc.db, &util.Range{ 1084 Start: startKey, 1085 }) 1086 defer iter.Release() 1087 1088 if iter.Valid() { 1089 dec := lockDecoder{ 1090 expectKey: key, 1091 } 1092 ok, err := dec.Decode(iter) 1093 if err != nil { 1094 return err 1095 } 1096 // If current transaction's lock exists. 1097 if ok && dec.lock.startTS == startTS { 1098 // If the lock has already outdated, clean up it. 1099 if currentTS == 0 || uint64(oracle.ExtractPhysical(dec.lock.startTS))+dec.lock.ttl < uint64(oracle.ExtractPhysical(currentTS)) { 1100 if err = rollbackLock(batch, key, startTS); err != nil { 1101 return err 1102 } 1103 return mvcc.db.Write(batch, nil) 1104 } 1105 1106 // Otherwise, return a locked error with the TTL information. 1107 return dec.lock.lockErr(key) 1108 } 1109 1110 // If current transaction's lock does not exist. 1111 // If the commit information of the current transaction exist. 1112 c, ok, err := getTxnCommitInfo(iter, key, startTS) 1113 if err != nil { 1114 return errors.Trace(err) 1115 } 1116 if ok { 1117 // If the current transaction has already committed. 1118 if c.valueType != typeRollback { 1119 return ErrAlreadyCommitted(c.commitTS) 1120 } 1121 // If the current transaction has already rollbacked. 1122 return nil 1123 } 1124 } 1125 1126 // If current transaction is not prewritted before. 1127 value := mvccValue{ 1128 valueType: typeRollback, 1129 startTS: startTS, 1130 commitTS: startTS, 1131 } 1132 writeKey := mvccEncode(key, startTS) 1133 writeValue, err := value.MarshalBinary() 1134 if err != nil { 1135 return errors.Trace(err) 1136 } 1137 batch.Put(writeKey, writeValue) 1138 return nil 1139 } 1140 1141 // CheckTxnStatus checks the primary lock of a transaction to decide its status. 1142 // The return values are (ttl, commitTS, err): 1143 // If the transaction is active, this function returns the ttl of the lock; 1144 // If the transaction is committed, this function returns the commitTS; 1145 // If the transaction is rollbacked, this function returns (0, 0, nil) 1146 // Note that CheckTxnStatus may also push forward the `minCommitTS` of the 1147 // transaction, so it's not simply a read-only operation. 1148 // 1149 // primaryKey + lockTS together could locate the primary lock. 1150 // callerStartTS is the start ts of reader transaction. 1151 // currentTS is the current ts, but it may be inaccurate. Just use it to check TTL. 1152 func (mvcc *MVCCLevelDB) CheckTxnStatus(primaryKey []byte, lockTS, callerStartTS, currentTS uint64, 1153 rollbackIfNotExist bool, resolvingPessimisticLock bool) (ttl uint64, commitTS uint64, action kvrpcpb.Action, err error) { 1154 mvcc.mu.Lock() 1155 defer mvcc.mu.Unlock() 1156 1157 action = kvrpcpb.Action_NoAction 1158 1159 startKey := mvccEncode(primaryKey, lockVer) 1160 iter := newIterator(mvcc.db, &util.Range{ 1161 Start: startKey, 1162 }) 1163 defer iter.Release() 1164 1165 if iter.Valid() { 1166 dec := lockDecoder{ 1167 expectKey: primaryKey, 1168 } 1169 var ok bool 1170 ok, err = dec.Decode(iter) 1171 if err != nil { 1172 err = errors.Trace(err) 1173 return 1174 } 1175 // If current transaction's lock exists. 1176 if ok && dec.lock.startTS == lockTS { 1177 lock := dec.lock 1178 batch := &leveldb.Batch{} 1179 1180 // If the lock has already outdated, clean up it. 1181 if uint64(oracle.ExtractPhysical(lock.startTS))+lock.ttl < uint64(oracle.ExtractPhysical(currentTS)) { 1182 if resolvingPessimisticLock && lock.op == kvrpcpb.Op_PessimisticLock { 1183 action = kvrpcpb.Action_TTLExpirePessimisticRollback 1184 if err = pessimisticRollbackKey(mvcc.db, batch, primaryKey, lock.startTS, lock.forUpdateTS); err != nil { 1185 err = errors.Trace(err) 1186 return 1187 } 1188 } else { 1189 action = kvrpcpb.Action_TTLExpireRollback 1190 if err = rollbackLock(batch, primaryKey, lockTS); err != nil { 1191 err = errors.Trace(err) 1192 return 1193 } 1194 } 1195 if err = mvcc.db.Write(batch, nil); err != nil { 1196 err = errors.Trace(err) 1197 return 1198 } 1199 return 0, 0, action, nil 1200 } 1201 1202 // If the caller_start_ts is MaxUint64, it's a point get in the autocommit transaction. 1203 // Even though the MinCommitTs is not pushed, the point get can ingore the lock 1204 // next time because it's not committed. So we pretend it has been pushed. 1205 if callerStartTS == math.MaxUint64 { 1206 action = kvrpcpb.Action_MinCommitTSPushed 1207 1208 // If this is a large transaction and the lock is active, push forward the minCommitTS. 1209 // lock.minCommitTS == 0 may be a secondary lock, or not a large transaction (old version TiDB). 1210 } else if lock.minCommitTS > 0 { 1211 action = kvrpcpb.Action_MinCommitTSPushed 1212 // We *must* guarantee the invariance lock.minCommitTS >= callerStartTS + 1 1213 if lock.minCommitTS < callerStartTS+1 { 1214 lock.minCommitTS = callerStartTS + 1 1215 1216 // Remove this condition should not affect correctness. 1217 // We do it because pushing forward minCommitTS as far as possible could avoid 1218 // the lock been pushed again several times, and thus reduce write operations. 1219 if lock.minCommitTS < currentTS { 1220 lock.minCommitTS = currentTS 1221 } 1222 1223 writeKey := mvccEncode(primaryKey, lockVer) 1224 writeValue, err1 := lock.MarshalBinary() 1225 if err1 != nil { 1226 err = errors.Trace(err1) 1227 return 1228 } 1229 batch.Put(writeKey, writeValue) 1230 if err1 = mvcc.db.Write(batch, nil); err1 != nil { 1231 err = errors.Trace(err1) 1232 return 1233 } 1234 } 1235 } 1236 1237 return lock.ttl, 0, action, nil 1238 } 1239 1240 // If current transaction's lock does not exist. 1241 // If the commit info of the current transaction exists. 1242 c, ok, err1 := getTxnCommitInfo(iter, primaryKey, lockTS) 1243 if err1 != nil { 1244 err = errors.Trace(err1) 1245 return 1246 } 1247 if ok { 1248 // If current transaction is already committed. 1249 if c.valueType != typeRollback { 1250 return 0, c.commitTS, action, nil 1251 } 1252 // If current transaction is already rollback. 1253 return 0, 0, kvrpcpb.Action_NoAction, nil 1254 } 1255 } 1256 1257 // If current transaction is not prewritted before, it may be pessimistic lock. 1258 // When pessimistic txn rollback statement, it may not leave a 'rollbacked' tombstone. 1259 1260 // Or maybe caused by concurrent prewrite operation. 1261 // Especially in the non-block reading case, the secondary lock is likely to be 1262 // written before the primary lock. 1263 1264 if rollbackIfNotExist { 1265 if resolvingPessimisticLock { 1266 return 0, 0, kvrpcpb.Action_LockNotExistDoNothing, nil 1267 } 1268 // Write rollback record, but not delete the lock on the primary key. There may exist lock which has 1269 // different lock.startTS with input lockTS, for example the primary key could be already 1270 // locked by the caller transaction, deleting this key will mistakenly delete the lock on 1271 // primary key, see case TestSingleStatementRollback in session_test suite for example 1272 batch := &leveldb.Batch{} 1273 if err1 := writeRollback(batch, primaryKey, lockTS); err1 != nil { 1274 err = errors.Trace(err1) 1275 return 1276 } 1277 if err1 := mvcc.db.Write(batch, nil); err1 != nil { 1278 err = errors.Trace(err1) 1279 return 1280 } 1281 return 0, 0, kvrpcpb.Action_LockNotExistRollback, nil 1282 } 1283 1284 return 0, 0, action, &ErrTxnNotFound{kvrpcpb.TxnNotFound{ 1285 StartTs: lockTS, 1286 PrimaryKey: primaryKey, 1287 }} 1288 } 1289 1290 // TxnHeartBeat implements the MVCCStore interface. 1291 func (mvcc *MVCCLevelDB) TxnHeartBeat(key []byte, startTS uint64, adviseTTL uint64) (uint64, error) { 1292 mvcc.mu.Lock() 1293 defer mvcc.mu.Unlock() 1294 1295 startKey := mvccEncode(key, lockVer) 1296 iter := newIterator(mvcc.db, &util.Range{ 1297 Start: startKey, 1298 }) 1299 defer iter.Release() 1300 1301 if iter.Valid() { 1302 dec := lockDecoder{ 1303 expectKey: key, 1304 } 1305 ok, err := dec.Decode(iter) 1306 if err != nil { 1307 return 0, errors.Trace(err) 1308 } 1309 if ok && dec.lock.startTS == startTS { 1310 if !bytes.Equal(dec.lock.primary, key) { 1311 return 0, errors.New("txnHeartBeat on non-primary key, the code should not run here") 1312 } 1313 1314 lock := dec.lock 1315 batch := &leveldb.Batch{} 1316 // Increase the ttl of this transaction. 1317 if adviseTTL > lock.ttl { 1318 lock.ttl = adviseTTL 1319 writeKey := mvccEncode(key, lockVer) 1320 writeValue, err := lock.MarshalBinary() 1321 if err != nil { 1322 return 0, errors.Trace(err) 1323 } 1324 batch.Put(writeKey, writeValue) 1325 if err = mvcc.db.Write(batch, nil); err != nil { 1326 return 0, errors.Trace(err) 1327 } 1328 } 1329 return lock.ttl, nil 1330 } 1331 } 1332 return 0, errors.New("lock doesn't exist") 1333 } 1334 1335 // ScanLock implements the MVCCStore interface. 1336 func (mvcc *MVCCLevelDB) ScanLock(startKey, endKey []byte, maxTS uint64) ([]*kvrpcpb.LockInfo, error) { 1337 mvcc.mu.RLock() 1338 defer mvcc.mu.RUnlock() 1339 1340 iter, currKey, err := newScanIterator(mvcc.db, startKey, endKey) 1341 defer iter.Release() 1342 if err != nil { 1343 return nil, errors.Trace(err) 1344 } 1345 1346 var locks []*kvrpcpb.LockInfo 1347 for iter.Valid() { 1348 dec := lockDecoder{expectKey: currKey} 1349 ok, err := dec.Decode(iter) 1350 if err != nil { 1351 return nil, errors.Trace(err) 1352 } 1353 if ok && dec.lock.startTS <= maxTS { 1354 locks = append(locks, &kvrpcpb.LockInfo{ 1355 PrimaryLock: dec.lock.primary, 1356 LockVersion: dec.lock.startTS, 1357 Key: currKey, 1358 }) 1359 } 1360 1361 skip := skipDecoder{currKey: currKey} 1362 _, err = skip.Decode(iter) 1363 if err != nil { 1364 return nil, errors.Trace(err) 1365 } 1366 currKey = skip.currKey 1367 } 1368 return locks, nil 1369 } 1370 1371 // ResolveLock implements the MVCCStore interface. 1372 func (mvcc *MVCCLevelDB) ResolveLock(startKey, endKey []byte, startTS, commitTS uint64) error { 1373 mvcc.mu.Lock() 1374 defer mvcc.mu.Unlock() 1375 1376 iter, currKey, err := newScanIterator(mvcc.db, startKey, endKey) 1377 defer iter.Release() 1378 if err != nil { 1379 return errors.Trace(err) 1380 } 1381 1382 batch := &leveldb.Batch{} 1383 for iter.Valid() { 1384 dec := lockDecoder{expectKey: currKey} 1385 ok, err := dec.Decode(iter) 1386 if err != nil { 1387 return errors.Trace(err) 1388 } 1389 if ok && dec.lock.startTS == startTS { 1390 if commitTS > 0 { 1391 err = commitLock(batch, dec.lock, currKey, startTS, commitTS) 1392 } else { 1393 err = rollbackLock(batch, currKey, startTS) 1394 } 1395 if err != nil { 1396 return errors.Trace(err) 1397 } 1398 } 1399 1400 skip := skipDecoder{currKey: currKey} 1401 _, err = skip.Decode(iter) 1402 if err != nil { 1403 return errors.Trace(err) 1404 } 1405 currKey = skip.currKey 1406 } 1407 return mvcc.db.Write(batch, nil) 1408 } 1409 1410 // BatchResolveLock implements the MVCCStore interface. 1411 func (mvcc *MVCCLevelDB) BatchResolveLock(startKey, endKey []byte, txnInfos map[uint64]uint64) error { 1412 mvcc.mu.Lock() 1413 defer mvcc.mu.Unlock() 1414 1415 iter, currKey, err := newScanIterator(mvcc.db, startKey, endKey) 1416 defer iter.Release() 1417 if err != nil { 1418 return errors.Trace(err) 1419 } 1420 1421 batch := &leveldb.Batch{} 1422 for iter.Valid() { 1423 dec := lockDecoder{expectKey: currKey} 1424 ok, err := dec.Decode(iter) 1425 if err != nil { 1426 return errors.Trace(err) 1427 } 1428 if ok { 1429 if commitTS, ok := txnInfos[dec.lock.startTS]; ok { 1430 if commitTS > 0 { 1431 err = commitLock(batch, dec.lock, currKey, dec.lock.startTS, commitTS) 1432 } else { 1433 err = rollbackLock(batch, currKey, dec.lock.startTS) 1434 } 1435 if err != nil { 1436 return errors.Trace(err) 1437 } 1438 } 1439 } 1440 1441 skip := skipDecoder{currKey: currKey} 1442 _, err = skip.Decode(iter) 1443 if err != nil { 1444 return errors.Trace(err) 1445 } 1446 currKey = skip.currKey 1447 } 1448 return mvcc.db.Write(batch, nil) 1449 } 1450 1451 // GC implements the MVCCStore interface 1452 func (mvcc *MVCCLevelDB) GC(startKey, endKey []byte, safePoint uint64) error { 1453 mvcc.mu.Lock() 1454 defer mvcc.mu.Unlock() 1455 1456 iter, currKey, err := newScanIterator(mvcc.db, startKey, endKey) 1457 defer iter.Release() 1458 if err != nil { 1459 return errors.Trace(err) 1460 } 1461 1462 // Mock TiKV usually doesn't need to process large amount of data. So write it in a single batch. 1463 batch := &leveldb.Batch{} 1464 1465 for iter.Valid() { 1466 lockDec := lockDecoder{expectKey: currKey} 1467 ok, err := lockDec.Decode(iter) 1468 if err != nil { 1469 return errors.Trace(err) 1470 } 1471 if ok && lockDec.lock.startTS <= safePoint { 1472 return errors.Errorf( 1473 "key %+q has lock with startTs %v which is under safePoint %v", 1474 currKey, 1475 lockDec.lock.startTS, 1476 safePoint) 1477 } 1478 1479 keepNext := true 1480 dec := valueDecoder{expectKey: currKey} 1481 1482 for iter.Valid() { 1483 ok, err := dec.Decode(iter) 1484 if err != nil { 1485 return errors.Trace(err) 1486 } 1487 1488 if !ok { 1489 // Go to the next key 1490 currKey, _, err = mvccDecode(iter.Key()) 1491 if err != nil { 1492 return errors.Trace(err) 1493 } 1494 break 1495 } 1496 1497 if dec.value.commitTS > safePoint { 1498 continue 1499 } 1500 1501 if dec.value.valueType == typePut || dec.value.valueType == typeDelete { 1502 // Keep the latest version if it's `typePut` 1503 if !keepNext || dec.value.valueType == typeDelete { 1504 batch.Delete(mvccEncode(currKey, dec.value.commitTS)) 1505 } 1506 keepNext = false 1507 } else { 1508 // Delete all other types 1509 batch.Delete(mvccEncode(currKey, dec.value.commitTS)) 1510 } 1511 } 1512 } 1513 1514 return mvcc.db.Write(batch, nil) 1515 } 1516 1517 // DeleteRange implements the MVCCStore interface. 1518 func (mvcc *MVCCLevelDB) DeleteRange(startKey, endKey []byte) error { 1519 return mvcc.doRawDeleteRange(codec.EncodeBytes(nil, startKey), codec.EncodeBytes(nil, endKey)) 1520 } 1521 1522 // Close calls leveldb's Close to free resources. 1523 func (mvcc *MVCCLevelDB) Close() error { 1524 return mvcc.db.Close() 1525 } 1526 1527 // RawPut implements the RawKV interface. 1528 func (mvcc *MVCCLevelDB) RawPut(key, value []byte) { 1529 mvcc.mu.Lock() 1530 defer mvcc.mu.Unlock() 1531 1532 if value == nil { 1533 value = []byte{} 1534 } 1535 terror.Log(mvcc.db.Put(key, value, nil)) 1536 } 1537 1538 // RawBatchPut implements the RawKV interface 1539 func (mvcc *MVCCLevelDB) RawBatchPut(keys, values [][]byte) { 1540 mvcc.mu.Lock() 1541 defer mvcc.mu.Unlock() 1542 1543 batch := &leveldb.Batch{} 1544 for i, key := range keys { 1545 value := values[i] 1546 if value == nil { 1547 value = []byte{} 1548 } 1549 batch.Put(key, value) 1550 } 1551 terror.Log(mvcc.db.Write(batch, nil)) 1552 } 1553 1554 // RawGet implements the RawKV interface. 1555 func (mvcc *MVCCLevelDB) RawGet(key []byte) []byte { 1556 mvcc.mu.Lock() 1557 defer mvcc.mu.Unlock() 1558 1559 ret, err := mvcc.db.Get(key, nil) 1560 terror.Log(err) 1561 return ret 1562 } 1563 1564 // RawBatchGet implements the RawKV interface. 1565 func (mvcc *MVCCLevelDB) RawBatchGet(keys [][]byte) [][]byte { 1566 mvcc.mu.Lock() 1567 defer mvcc.mu.Unlock() 1568 1569 values := make([][]byte, 0, len(keys)) 1570 for _, key := range keys { 1571 value, err := mvcc.db.Get(key, nil) 1572 terror.Log(err) 1573 values = append(values, value) 1574 } 1575 return values 1576 } 1577 1578 // RawDelete implements the RawKV interface. 1579 func (mvcc *MVCCLevelDB) RawDelete(key []byte) { 1580 mvcc.mu.Lock() 1581 defer mvcc.mu.Unlock() 1582 1583 terror.Log(mvcc.db.Delete(key, nil)) 1584 } 1585 1586 // RawBatchDelete implements the RawKV interface. 1587 func (mvcc *MVCCLevelDB) RawBatchDelete(keys [][]byte) { 1588 mvcc.mu.Lock() 1589 defer mvcc.mu.Unlock() 1590 1591 batch := &leveldb.Batch{} 1592 for _, key := range keys { 1593 batch.Delete(key) 1594 } 1595 terror.Log(mvcc.db.Write(batch, nil)) 1596 } 1597 1598 // RawScan implements the RawKV interface. 1599 func (mvcc *MVCCLevelDB) RawScan(startKey, endKey []byte, limit int) []Pair { 1600 mvcc.mu.Lock() 1601 defer mvcc.mu.Unlock() 1602 1603 iter := mvcc.db.NewIterator(&util.Range{ 1604 Start: startKey, 1605 }, nil) 1606 1607 var pairs []Pair 1608 for iter.Next() && len(pairs) < limit { 1609 key := iter.Key() 1610 value := iter.Value() 1611 err := iter.Error() 1612 if len(endKey) > 0 && bytes.Compare(key, endKey) >= 0 { 1613 break 1614 } 1615 pairs = append(pairs, Pair{ 1616 Key: append([]byte{}, key...), 1617 Value: append([]byte{}, value...), 1618 Err: err, 1619 }) 1620 } 1621 return pairs 1622 } 1623 1624 // RawReverseScan implements the RawKV interface. 1625 // Scan the range of [endKey, startKey) 1626 // It doesn't support Scanning from "", because locating the last Region is not yet implemented. 1627 func (mvcc *MVCCLevelDB) RawReverseScan(startKey, endKey []byte, limit int) []Pair { 1628 mvcc.mu.Lock() 1629 defer mvcc.mu.Unlock() 1630 1631 iter := mvcc.db.NewIterator(&util.Range{ 1632 Limit: startKey, 1633 }, nil) 1634 1635 success := iter.Last() 1636 1637 var pairs []Pair 1638 for success && len(pairs) < limit { 1639 key := iter.Key() 1640 value := iter.Value() 1641 err := iter.Error() 1642 if bytes.Compare(key, endKey) < 0 { 1643 break 1644 } 1645 pairs = append(pairs, Pair{ 1646 Key: append([]byte{}, key...), 1647 Value: append([]byte{}, value...), 1648 Err: err, 1649 }) 1650 success = iter.Prev() 1651 } 1652 return pairs 1653 } 1654 1655 // RawDeleteRange implements the RawKV interface. 1656 func (mvcc *MVCCLevelDB) RawDeleteRange(startKey, endKey []byte) { 1657 terror.Log(mvcc.doRawDeleteRange(startKey, endKey)) 1658 } 1659 1660 // doRawDeleteRange deletes all keys in a range and return the error if any. 1661 func (mvcc *MVCCLevelDB) doRawDeleteRange(startKey, endKey []byte) error { 1662 mvcc.mu.Lock() 1663 defer mvcc.mu.Unlock() 1664 1665 batch := &leveldb.Batch{} 1666 1667 iter := mvcc.db.NewIterator(&util.Range{ 1668 Start: startKey, 1669 Limit: endKey, 1670 }, nil) 1671 for iter.Next() { 1672 batch.Delete(iter.Key()) 1673 } 1674 1675 return mvcc.db.Write(batch, nil) 1676 } 1677 1678 // MvccGetByStartTS implements the MVCCDebugger interface. 1679 func (mvcc *MVCCLevelDB) MvccGetByStartTS(starTS uint64) (*kvrpcpb.MvccInfo, []byte) { 1680 mvcc.mu.RLock() 1681 defer mvcc.mu.RUnlock() 1682 1683 var key []byte 1684 iter := newIterator(mvcc.db, nil) 1685 defer iter.Release() 1686 1687 // find the first committed key for which `start_ts` equals to `ts` 1688 for iter.Valid() { 1689 var value mvccValue 1690 err := value.UnmarshalBinary(iter.Value()) 1691 if err == nil && value.startTS == starTS { 1692 if _, key, err = codec.DecodeBytes(iter.Key(), nil); err != nil { 1693 return nil, nil 1694 } 1695 break 1696 } 1697 iter.Next() 1698 } 1699 1700 return mvcc.mvccGetByKeyNoLock(key), key 1701 } 1702 1703 var valueTypeOpMap = [...]kvrpcpb.Op{ 1704 typePut: kvrpcpb.Op_Put, 1705 typeDelete: kvrpcpb.Op_Del, 1706 typeRollback: kvrpcpb.Op_Rollback, 1707 typeLock: kvrpcpb.Op_Lock, 1708 } 1709 1710 // MvccGetByKey implements the MVCCDebugger interface. 1711 func (mvcc *MVCCLevelDB) MvccGetByKey(key []byte) *kvrpcpb.MvccInfo { 1712 mvcc.mu.RLock() 1713 defer mvcc.mu.RUnlock() 1714 1715 return mvcc.mvccGetByKeyNoLock(key) 1716 } 1717 1718 // mvcc.mu.RLock must be held before calling mvccGetByKeyNoLock. 1719 func (mvcc *MVCCLevelDB) mvccGetByKeyNoLock(key []byte) *kvrpcpb.MvccInfo { 1720 info := &kvrpcpb.MvccInfo{} 1721 1722 startKey := mvccEncode(key, lockVer) 1723 iter := newIterator(mvcc.db, &util.Range{ 1724 Start: startKey, 1725 }) 1726 defer iter.Release() 1727 1728 dec1 := lockDecoder{expectKey: key} 1729 ok, err := dec1.Decode(iter) 1730 if err != nil { 1731 return nil 1732 } 1733 if ok { 1734 var shortValue []byte 1735 if isShortValue(dec1.lock.value) { 1736 shortValue = dec1.lock.value 1737 } 1738 info.Lock = &kvrpcpb.MvccLock{ 1739 Type: dec1.lock.op, 1740 StartTs: dec1.lock.startTS, 1741 Primary: dec1.lock.primary, 1742 ShortValue: shortValue, 1743 } 1744 } 1745 1746 dec2 := valueDecoder{expectKey: key} 1747 var writes []*kvrpcpb.MvccWrite 1748 var values []*kvrpcpb.MvccValue 1749 for iter.Valid() { 1750 ok, err := dec2.Decode(iter) 1751 if err != nil { 1752 return nil 1753 } 1754 if !ok { 1755 iter.Next() 1756 break 1757 } 1758 var shortValue []byte 1759 if isShortValue(dec2.value.value) { 1760 shortValue = dec2.value.value 1761 } 1762 write := &kvrpcpb.MvccWrite{ 1763 Type: valueTypeOpMap[dec2.value.valueType], 1764 StartTs: dec2.value.startTS, 1765 CommitTs: dec2.value.commitTS, 1766 ShortValue: shortValue, 1767 } 1768 writes = append(writes, write) 1769 value := &kvrpcpb.MvccValue{ 1770 StartTs: dec2.value.startTS, 1771 Value: dec2.value.value, 1772 } 1773 values = append(values, value) 1774 } 1775 info.Writes = writes 1776 info.Values = values 1777 1778 return info 1779 } 1780 1781 const shortValueMaxLen = 64 1782 1783 func isShortValue(value []byte) bool { 1784 return len(value) <= shortValueMaxLen 1785 }