github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/2pc.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package einsteindb
    15  
    16  import (
    17  	"bytes"
    18  	"context"
    19  	"math"
    20  	"strings"
    21  	"sync"
    22  	"sync/atomic"
    23  	"time"
    24  	"unsafe"
    25  
    26  	"github.com/prometheus/client_golang/prometheus"
    27  	"github.com/whtcorpsinc/BerolinaSQL/terror"
    28  	pb "github.com/whtcorpsinc/ekvproto/pkg/ekvrpcpb"
    29  	"github.com/whtcorpsinc/errors"
    30  	"github.com/whtcorpsinc/failpoint"
    31  	"github.com/whtcorpsinc/fidelpb/go-binlog"
    32  	"github.com/whtcorpsinc/milevadb/blockcodec"
    33  	"github.com/whtcorpsinc/milevadb/causetstore/einsteindb/einsteindbrpc"
    34  	"github.com/whtcorpsinc/milevadb/causetstore/einsteindb/oracle"
    35  	"github.com/whtcorpsinc/milevadb/config"
    36  	"github.com/whtcorpsinc/milevadb/ekv"
    37  	"github.com/whtcorpsinc/milevadb/metrics"
    38  	"github.com/whtcorpsinc/milevadb/soliton/execdetails"
    39  	"github.com/whtcorpsinc/milevadb/soliton/logutil"
    40  	"github.com/whtcorpsinc/milevadb/stochastikctx/binloginfo"
    41  	"go.uber.org/zap"
    42  )
    43  
    44  type twoPhaseCommitCausetAction interface {
    45  	handleSingleBatch(*twoPhaseCommitter, *Backoffer, batchMutations) error
    46  	EinsteinDBTxnRegionsNumHistogram() prometheus.Observer
    47  	String() string
    48  }
    49  
    50  var (
    51  	einsteindbSecondaryLockCleanupFailureCounterRollback = metrics.EinsteinDBSecondaryLockCleanupFailureCounter.WithLabelValues("rollback")
    52  	EinsteinDBTxnHeartBeatHistogramOK                    = metrics.EinsteinDBTxnHeartBeatHistogram.WithLabelValues("ok")
    53  	EinsteinDBTxnHeartBeatHistogramError                 = metrics.EinsteinDBTxnHeartBeatHistogram.WithLabelValues("err")
    54  )
    55  
    56  // Global variable set by config file.
    57  var (
    58  	ManagedLockTTL uint64 = 20000 // 20s
    59  )
    60  
    61  // metricsTag returns detail tag for metrics.
    62  func metricsTag(action string) string {
    63  	return "2pc_" + action
    64  }
    65  
    66  // twoPhaseCommitter executes a two-phase commit protocol.
    67  type twoPhaseCommitter struct {
    68  	causetstore         *einsteindbStore
    69  	txn                 *einsteindbTxn
    70  	startTS             uint64
    71  	mutations           CommitterMutations
    72  	lockTTL             uint64
    73  	commitTS            uint64
    74  	priority            pb.CommandPri
    75  	connID              uint64 // connID is used for log.
    76  	cleanWg             sync.WaitGroup
    77  	detail              unsafe.Pointer
    78  	txnSize             int
    79  	hasNoNeedCommitKeys bool
    80  
    81  	primaryKey     []byte
    82  	forUFIDelateTS uint64
    83  
    84  	mu struct {
    85  		sync.RWMutex
    86  		undeterminedErr error // undeterminedErr saves the rpc error we encounter when commit primary key.
    87  		committed       bool
    88  	}
    89  	syncLog bool
    90  	// For pessimistic transaction
    91  	isPessimistic bool
    92  	isFirstLock   bool
    93  	// regionTxnSize stores the number of keys involved in each region
    94  	regionTxnSize map[uint64]int
    95  	// Used by pessimistic transaction and large transaction.
    96  	ttlManager
    97  
    98  	testingKnobs struct {
    99  		acAfterCommitPrimary chan struct{}
   100  		bkAfterCommitPrimary chan struct{}
   101  		noFallBack           bool
   102  	}
   103  
   104  	useAsyncCommit uint32
   105  	minCommitTS    uint64
   106  }
   107  
   108  // CommitterMutations contains transaction operations.
   109  type CommitterMutations struct {
   110  	ops               []pb.Op
   111  	keys              [][]byte
   112  	values            [][]byte
   113  	isPessimisticLock []bool
   114  }
   115  
   116  // NewCommiterMutations creates a CommitterMutations object with sizeHint reserved.
   117  func NewCommiterMutations(sizeHint int) CommitterMutations {
   118  	return CommitterMutations{
   119  		ops:               make([]pb.Op, 0, sizeHint),
   120  		keys:              make([][]byte, 0, sizeHint),
   121  		values:            make([][]byte, 0, sizeHint),
   122  		isPessimisticLock: make([]bool, 0, sizeHint),
   123  	}
   124  }
   125  
   126  func (c *CommitterMutations) subRange(from, to int) CommitterMutations {
   127  	var res CommitterMutations
   128  	res.keys = c.keys[from:to]
   129  	if c.ops != nil {
   130  		res.ops = c.ops[from:to]
   131  	}
   132  	if c.values != nil {
   133  		res.values = c.values[from:to]
   134  	}
   135  	if c.isPessimisticLock != nil {
   136  		res.isPessimisticLock = c.isPessimisticLock[from:to]
   137  	}
   138  	return res
   139  }
   140  
   141  // Push another mutation into mutations.
   142  func (c *CommitterMutations) Push(op pb.Op, key []byte, value []byte, isPessimisticLock bool) {
   143  	c.ops = append(c.ops, op)
   144  	c.keys = append(c.keys, key)
   145  	c.values = append(c.values, value)
   146  	c.isPessimisticLock = append(c.isPessimisticLock, isPessimisticLock)
   147  }
   148  
   149  func (c *CommitterMutations) len() int {
   150  	return len(c.keys)
   151  }
   152  
   153  // GetKeys returns the keys.
   154  func (c *CommitterMutations) GetKeys() [][]byte {
   155  	return c.keys
   156  }
   157  
   158  // GetOps returns the key ops.
   159  func (c *CommitterMutations) GetOps() []pb.Op {
   160  	return c.ops
   161  }
   162  
   163  // GetValues returns the key values.
   164  func (c *CommitterMutations) GetValues() [][]byte {
   165  	return c.values
   166  }
   167  
   168  // GetPessimisticFlags returns the key pessimistic flags.
   169  func (c *CommitterMutations) GetPessimisticFlags() []bool {
   170  	return c.isPessimisticLock
   171  }
   172  
   173  // MergeMutations append input mutations into current mutations.
   174  func (c *CommitterMutations) MergeMutations(mutations CommitterMutations) {
   175  	c.ops = append(c.ops, mutations.ops...)
   176  	c.keys = append(c.keys, mutations.keys...)
   177  	c.values = append(c.values, mutations.values...)
   178  	c.isPessimisticLock = append(c.isPessimisticLock, mutations.isPessimisticLock...)
   179  }
   180  
   181  // newTwoPhaseCommitter creates a twoPhaseCommitter.
   182  func newTwoPhaseCommitter(txn *einsteindbTxn, connID uint64) (*twoPhaseCommitter, error) {
   183  	return &twoPhaseCommitter{
   184  		causetstore:   txn.causetstore,
   185  		txn:           txn,
   186  		startTS:       txn.StartTS(),
   187  		connID:        connID,
   188  		regionTxnSize: map[uint64]int{},
   189  		ttlManager: ttlManager{
   190  			ch: make(chan struct{}),
   191  		},
   192  		isPessimistic: txn.IsPessimistic(),
   193  	}, nil
   194  }
   195  
   196  func (c *twoPhaseCommitter) extractKeyExistsErr(key ekv.Key) error {
   197  	if !c.txn.us.HasPresumeKeyNotExists(key) {
   198  		return errors.Errorf("conn %d, existErr for key:%s should not be nil", c.connID, key)
   199  	}
   200  
   201  	_, handle, err := blockcodec.DecodeRecordKey(key)
   202  	if err == nil {
   203  		if handle.IsInt() {
   204  			return ekv.ErrKeyExists.FastGenByArgs(handle.String(), "PRIMARY")
   205  		}
   206  		trimLen := 0
   207  		for i := 0; i < handle.NumDefCauss(); i++ {
   208  			trimLen += len(handle.EncodedDefCaus(i))
   209  		}
   210  		values, err := blockcodec.DecodeValuesBytesToStrings(handle.Encoded()[:trimLen])
   211  		if err == nil {
   212  			return ekv.ErrKeyExists.FastGenByArgs(strings.Join(values, "-"), "PRIMARY")
   213  		}
   214  	}
   215  
   216  	blockID, indexID, indexValues, err := blockcodec.DecodeIndexKey(key)
   217  	if err == nil {
   218  		return ekv.ErrKeyExists.FastGenByArgs(strings.Join(indexValues, "-"), c.txn.us.GetIndexName(blockID, indexID))
   219  	}
   220  
   221  	return ekv.ErrKeyExists.FastGenByArgs(key.String(), "UNKNOWN")
   222  }
   223  
   224  func (c *twoPhaseCommitter) initKeysAndMutations() error {
   225  	var size, putCnt, delCnt, lockCnt, checkCnt int
   226  
   227  	txn := c.txn
   228  	memBuf := txn.GetMemBuffer()
   229  	sizeHint := txn.us.GetMemBuffer().Len()
   230  	mutations := NewCommiterMutations(sizeHint)
   231  	c.isPessimistic = txn.IsPessimistic()
   232  
   233  	var err error
   234  	for it := memBuf.IterWithFlags(nil, nil); it.Valid(); err = it.Next() {
   235  		_ = err
   236  		key := it.Key()
   237  		flags := it.Flags()
   238  		var value []byte
   239  		var op pb.Op
   240  
   241  		if !it.HasValue() {
   242  			if !flags.HasLocked() {
   243  				continue
   244  			}
   245  			op = pb.Op_Lock
   246  			lockCnt++
   247  		} else {
   248  			value = it.Value()
   249  			if len(value) > 0 {
   250  				if blockcodec.IsUntouchedIndexKValue(key, value) {
   251  					continue
   252  				}
   253  				op = pb.Op_Put
   254  				if flags.HasPresumeKeyNotExists() {
   255  					op = pb.Op_Insert
   256  				}
   257  				putCnt++
   258  			} else {
   259  				if !txn.IsPessimistic() && flags.HasPresumeKeyNotExists() {
   260  					// delete-your-writes keys in optimistic txn need check not exists in prewrite-phase
   261  					// due to `Op_CheckNotExists` doesn't prewrite dagger, so mark those keys should not be used in commit-phase.
   262  					op = pb.Op_CheckNotExists
   263  					checkCnt++
   264  					memBuf.UFIDelateFlags(key, ekv.SetNoNeedCommit)
   265  				} else {
   266  					// normal delete keys in optimistic txn can be delete without not exists checking
   267  					// delete-your-writes keys in pessimistic txn can ensure must be no exists so can directly delete them
   268  					op = pb.Op_Del
   269  					delCnt++
   270  				}
   271  			}
   272  		}
   273  
   274  		var isPessimistic bool
   275  		if flags.HasLocked() {
   276  			isPessimistic = c.isPessimistic
   277  		}
   278  		mutations.Push(op, key, value, isPessimistic)
   279  		size += len(key) + len(value)
   280  
   281  		if len(c.primaryKey) == 0 && op != pb.Op_CheckNotExists {
   282  			c.primaryKey = key
   283  		}
   284  	}
   285  
   286  	if mutations.len() == 0 {
   287  		return nil
   288  	}
   289  	c.txnSize = size
   290  
   291  	if size > int(ekv.TxnTotalSizeLimit) {
   292  		return ekv.ErrTxnTooLarge.GenWithStackByArgs(size)
   293  	}
   294  	const logEntryCount = 10000
   295  	const logSize = 4 * 1024 * 1024 // 4MB
   296  	if mutations.len() > logEntryCount || size > logSize {
   297  		blockID := blockcodec.DecodeTableID(mutations.keys[0])
   298  		logutil.BgLogger().Info("[BIG_TXN]",
   299  			zap.Uint64("con", c.connID),
   300  			zap.Int64("causet ID", blockID),
   301  			zap.Int("size", size),
   302  			zap.Int("keys", mutations.len()),
   303  			zap.Int("puts", putCnt),
   304  			zap.Int("dels", delCnt),
   305  			zap.Int("locks", lockCnt),
   306  			zap.Int("checks", checkCnt),
   307  			zap.Uint64("txnStartTS", txn.startTS))
   308  	}
   309  
   310  	// Sanity check for startTS.
   311  	if txn.StartTS() == math.MaxUint64 {
   312  		err = errors.Errorf("try to commit with invalid txnStartTS: %d", txn.StartTS())
   313  		logutil.BgLogger().Error("commit failed",
   314  			zap.Uint64("conn", c.connID),
   315  			zap.Error(err))
   316  		return errors.Trace(err)
   317  	}
   318  
   319  	commitDetail := &execdetails.CommitDetails{WriteSize: size, WriteKeys: mutations.len()}
   320  	metrics.EinsteinDBTxnWriteKVCountHistogram.Observe(float64(commitDetail.WriteKeys))
   321  	metrics.EinsteinDBTxnWriteSizeHistogram.Observe(float64(commitDetail.WriteSize))
   322  	c.hasNoNeedCommitKeys = checkCnt > 0
   323  	c.mutations = mutations
   324  	c.lockTTL = txnLockTTL(txn.startTime, size)
   325  	c.priority = getTxnPriority(txn)
   326  	c.syncLog = getTxnSyncLog(txn)
   327  	c.setDetail(commitDetail)
   328  	return nil
   329  }
   330  
   331  func (c *twoPhaseCommitter) primary() []byte {
   332  	if len(c.primaryKey) == 0 {
   333  		return c.mutations.keys[0]
   334  	}
   335  	return c.primaryKey
   336  }
   337  
   338  // asyncSecondaries returns all keys that must be checked in the recovery phase of an async commit.
   339  func (c *twoPhaseCommitter) asyncSecondaries() [][]byte {
   340  	secondaries := make([][]byte, 0, len(c.mutations.keys))
   341  	for i, k := range c.mutations.keys {
   342  		if bytes.Equal(k, c.primary()) || c.mutations.ops[i] == pb.Op_CheckNotExists {
   343  			continue
   344  		}
   345  		secondaries = append(secondaries, k)
   346  	}
   347  	return secondaries
   348  }
   349  
   350  const bytesPerMiB = 1024 * 1024
   351  
   352  func txnLockTTL(startTime time.Time, txnSize int) uint64 {
   353  	// Increase lockTTL for large transactions.
   354  	// The formula is `ttl = ttlFactor * sqrt(sizeInMiB)`.
   355  	// When writeSize is less than 256KB, the base ttl is defaultTTL (3s);
   356  	// When writeSize is 1MiB, 4MiB, or 10MiB, ttl is 6s, 12s, 20s correspondingly;
   357  	lockTTL := defaultLockTTL
   358  	if txnSize >= txnCommitBatchSize {
   359  		sizeMiB := float64(txnSize) / bytesPerMiB
   360  		lockTTL = uint64(float64(ttlFactor) * math.Sqrt(sizeMiB))
   361  		if lockTTL < defaultLockTTL {
   362  			lockTTL = defaultLockTTL
   363  		}
   364  		if lockTTL > ManagedLockTTL {
   365  			lockTTL = ManagedLockTTL
   366  		}
   367  	}
   368  
   369  	// Increase lockTTL by the transaction's read time.
   370  	// When resolving a dagger, we compare current ts and startTS+lockTTL to decide whether to clean up. If a txn
   371  	// takes a long time to read, increasing its TTL will help to prevent it from been aborted soon after prewrite.
   372  	elapsed := time.Since(startTime) / time.Millisecond
   373  	return lockTTL + uint64(elapsed)
   374  }
   375  
   376  var preSplitDetectThreshold uint32 = 100000
   377  var preSplitSizeThreshold uint32 = 32 << 20
   378  
   379  // doCausetActionOnMutations groups keys into primary batch and secondary batches, if primary batch exists in the key,
   380  // it does action on primary batch first, then on secondary batches. If action is commit, secondary batches
   381  // is done in background goroutine.
   382  func (c *twoPhaseCommitter) doCausetActionOnMutations(bo *Backoffer, action twoPhaseCommitCausetAction, mutations CommitterMutations) error {
   383  	if mutations.len() == 0 {
   384  		return nil
   385  	}
   386  	groups, err := c.groupMutations(bo, mutations)
   387  	if err != nil {
   388  		return errors.Trace(err)
   389  	}
   390  
   391  	return c.doCausetActionOnGroupMutations(bo, action, groups)
   392  }
   393  
   394  // groupMutations groups mutations by region, then checks for any large groups and in that case pre-splits the region.
   395  func (c *twoPhaseCommitter) groupMutations(bo *Backoffer, mutations CommitterMutations) ([]groupedMutations, error) {
   396  	groups, err := c.causetstore.regionCache.GroupSortedMutationsByRegion(bo, mutations)
   397  	if err != nil {
   398  		return nil, errors.Trace(err)
   399  	}
   400  
   401  	// Pre-split regions to avoid too much write workload into a single region.
   402  	// In the large transaction case, this operation is important to avoid EinsteinDB 'server is busy' error.
   403  	var didPreSplit bool
   404  	preSplitDetectThresholdVal := atomic.LoadUint32(&preSplitDetectThreshold)
   405  	for _, group := range groups {
   406  		if uint32(group.mutations.len()) >= preSplitDetectThresholdVal {
   407  			logutil.BgLogger().Info("2PC detect large amount of mutations on a single region",
   408  				zap.Uint64("region", group.region.GetID()),
   409  				zap.Int("mutations count", group.mutations.len()))
   410  			// Use context.Background, this time should not add up to Backoffer.
   411  			if c.causetstore.preSplitRegion(context.Background(), group) {
   412  				didPreSplit = true
   413  			}
   414  		}
   415  	}
   416  	// Reload region cache again.
   417  	if didPreSplit {
   418  		groups, err = c.causetstore.regionCache.GroupSortedMutationsByRegion(bo, mutations)
   419  		if err != nil {
   420  			return nil, errors.Trace(err)
   421  		}
   422  	}
   423  
   424  	return groups, nil
   425  }
   426  
   427  // doCausetActionOnGroupedMutations splits groups into batches (there is one group per region, and potentially many batches per group, but all mutations
   428  // in a batch will belong to the same region).
   429  func (c *twoPhaseCommitter) doCausetActionOnGroupMutations(bo *Backoffer, action twoPhaseCommitCausetAction, groups []groupedMutations) error {
   430  	action.EinsteinDBTxnRegionsNumHistogram().Observe(float64(len(groups)))
   431  
   432  	var sizeFunc = c.keySize
   433  
   434  	switch act := action.(type) {
   435  	case actionPrewrite:
   436  		// Do not uFIDelate regionTxnSize on retries. They are not used when building a PrewriteRequest.
   437  		if len(bo.errors) == 0 {
   438  			for _, group := range groups {
   439  				c.regionTxnSize[group.region.id] = group.mutations.len()
   440  			}
   441  		}
   442  		sizeFunc = c.keyValueSize
   443  		atomic.AddInt32(&c.getDetail().PrewriteRegionNum, int32(len(groups)))
   444  	case actionPessimisticLock:
   445  		if act.LockCtx.Stats != nil {
   446  			act.LockCtx.Stats.RegionNum = int32(len(groups))
   447  		}
   448  	}
   449  
   450  	batchBuilder := newBatched(c.primary())
   451  	for _, group := range groups {
   452  		batchBuilder.appendBatchMutationsBySize(group.region, group.mutations, sizeFunc, txnCommitBatchSize)
   453  	}
   454  	firstIsPrimary := batchBuilder.setPrimary()
   455  
   456  	actionCommit, actionIsCommit := action.(actionCommit)
   457  	_, actionIsCleanup := action.(actionCleanup)
   458  	_, actionIsPessimiticLock := action.(actionPessimisticLock)
   459  
   460  	var err error
   461  	failpoint.Inject("skipKeyReturnOK", func(val failpoint.Value) {
   462  		valStr, ok := val.(string)
   463  		if ok && c.connID > 0 {
   464  			if firstIsPrimary && actionIsPessimiticLock {
   465  				logutil.Logger(bo.ctx).Warn("pessimisticLock failpoint", zap.String("valStr", valStr))
   466  				switch valStr {
   467  				case "pessimisticLockSkipPrimary":
   468  					err = c.doCausetActionOnBatches(bo, action, batchBuilder.allBatches())
   469  					failpoint.Return(err)
   470  				case "pessimisticLockSkipSecondary":
   471  					err = c.doCausetActionOnBatches(bo, action, batchBuilder.primaryBatch())
   472  					failpoint.Return(err)
   473  				}
   474  			}
   475  		}
   476  	})
   477  	failpoint.Inject("pessimisticRollbackDoNth", func() {
   478  		_, actionIsPessimisticRollback := action.(actionPessimisticRollback)
   479  		if actionIsPessimisticRollback && c.connID > 0 {
   480  			logutil.Logger(bo.ctx).Warn("pessimisticRollbackDoNth failpoint")
   481  			failpoint.Return(nil)
   482  		}
   483  	})
   484  
   485  	if firstIsPrimary &&
   486  		((actionIsCommit && !c.isAsyncCommit()) || actionIsCleanup || actionIsPessimiticLock) {
   487  		// primary should be committed(not async commit)/cleanup/pessimistically locked first
   488  		err = c.doCausetActionOnBatches(bo, action, batchBuilder.primaryBatch())
   489  		if err != nil {
   490  			return errors.Trace(err)
   491  		}
   492  		if actionIsCommit && c.testingKnobs.bkAfterCommitPrimary != nil && c.testingKnobs.acAfterCommitPrimary != nil {
   493  			c.testingKnobs.acAfterCommitPrimary <- struct{}{}
   494  			<-c.testingKnobs.bkAfterCommitPrimary
   495  		}
   496  		batchBuilder.forgetPrimary()
   497  	}
   498  	// Already spawned a goroutine for async commit transaction.
   499  	if actionIsCommit && !actionCommit.retry && !c.isAsyncCommit() {
   500  		secondaryBo := NewBackofferWithVars(context.Background(), int(atomic.LoadUint64(&CommitMaxBackoff)), c.txn.vars)
   501  		go func() {
   502  			e := c.doCausetActionOnBatches(secondaryBo, action, batchBuilder.allBatches())
   503  			if e != nil {
   504  				logutil.BgLogger().Debug("2PC async doCausetActionOnBatches",
   505  					zap.Uint64("conn", c.connID),
   506  					zap.Stringer("action type", action),
   507  					zap.Error(e))
   508  				einsteindbSecondaryLockCleanupFailureCounterCommit.Inc()
   509  			}
   510  		}()
   511  	} else {
   512  		err = c.doCausetActionOnBatches(bo, action, batchBuilder.allBatches())
   513  	}
   514  	return errors.Trace(err)
   515  }
   516  
   517  // doCausetActionOnBatches does action to batches in parallel.
   518  func (c *twoPhaseCommitter) doCausetActionOnBatches(bo *Backoffer, action twoPhaseCommitCausetAction, batches []batchMutations) error {
   519  	if len(batches) == 0 {
   520  		return nil
   521  	}
   522  
   523  	noNeedFork := len(batches) == 1
   524  	if !noNeedFork {
   525  		if ac, ok := action.(actionCommit); ok && ac.retry {
   526  			noNeedFork = true
   527  		}
   528  	}
   529  	if noNeedFork {
   530  		for _, b := range batches {
   531  			e := action.handleSingleBatch(c, bo, b)
   532  			if e != nil {
   533  				logutil.BgLogger().Debug("2PC doCausetActionOnBatches failed",
   534  					zap.Uint64("conn", c.connID),
   535  					zap.Stringer("action type", action),
   536  					zap.Error(e),
   537  					zap.Uint64("txnStartTS", c.startTS))
   538  				return errors.Trace(e)
   539  			}
   540  		}
   541  		return nil
   542  	}
   543  	rateLim := len(batches)
   544  	// Set rateLim here for the large transaction.
   545  	// If the rate limit is too high, einsteindb will report service is busy.
   546  	// If the rate limit is too low, we can't full utilize the einsteindb's throughput.
   547  	// TODO: Find a self-adaptive way to control the rate limit here.
   548  	if rateLim > config.GetGlobalConfig().Performance.CommitterConcurrency {
   549  		rateLim = config.GetGlobalConfig().Performance.CommitterConcurrency
   550  	}
   551  	batchInterlockingDirectorate := newBatchInterlockingDirectorate(rateLim, c, action, bo)
   552  	err := batchInterlockingDirectorate.process(batches)
   553  	return errors.Trace(err)
   554  }
   555  
   556  func (c *twoPhaseCommitter) keyValueSize(key, value []byte) int {
   557  	return len(key) + len(value)
   558  }
   559  
   560  func (c *twoPhaseCommitter) keySize(key, value []byte) int {
   561  	return len(key)
   562  }
   563  
   564  type ttlManagerState uint32
   565  
   566  const (
   567  	stateUninitialized ttlManagerState = iota
   568  	stateRunning
   569  	stateClosed
   570  )
   571  
   572  type ttlManager struct {
   573  	state   ttlManagerState
   574  	ch      chan struct{}
   575  	lockCtx *ekv.LockCtx
   576  }
   577  
   578  func (tm *ttlManager) run(c *twoPhaseCommitter, lockCtx *ekv.LockCtx) {
   579  	// Run only once.
   580  	if !atomic.CompareAndSwapUint32((*uint32)(&tm.state), uint32(stateUninitialized), uint32(stateRunning)) {
   581  		return
   582  	}
   583  	tm.lockCtx = lockCtx
   584  	go tm.keepAlive(c)
   585  }
   586  
   587  func (tm *ttlManager) close() {
   588  	if !atomic.CompareAndSwapUint32((*uint32)(&tm.state), uint32(stateRunning), uint32(stateClosed)) {
   589  		return
   590  	}
   591  	close(tm.ch)
   592  }
   593  
   594  func (tm *ttlManager) keepAlive(c *twoPhaseCommitter) {
   595  	// Ticker is set to 1/2 of the ManagedLockTTL.
   596  	ticker := time.NewTicker(time.Duration(atomic.LoadUint64(&ManagedLockTTL)) * time.Millisecond / 2)
   597  	defer ticker.Stop()
   598  	for {
   599  		select {
   600  		case <-tm.ch:
   601  			return
   602  		case <-ticker.C:
   603  			// If kill signal is received, the ttlManager should exit.
   604  			if tm.lockCtx != nil && tm.lockCtx.Killed != nil && atomic.LoadUint32(tm.lockCtx.Killed) != 0 {
   605  				return
   606  			}
   607  			bo := NewBackofferWithVars(context.Background(), pessimisticLockMaxBackoff, c.txn.vars)
   608  			now, err := c.causetstore.GetOracle().GetTimestamp(bo.ctx)
   609  			if err != nil {
   610  				err1 := bo.Backoff(BoFIDelRPC, err)
   611  				if err1 != nil {
   612  					logutil.Logger(bo.ctx).Warn("keepAlive get tso fail",
   613  						zap.Error(err))
   614  					return
   615  				}
   616  				continue
   617  			}
   618  
   619  			uptime := uint64(oracle.ExtractPhysical(now) - oracle.ExtractPhysical(c.startTS))
   620  			if uptime > config.GetGlobalConfig().Performance.MaxTxnTTL {
   621  				// Checks maximum lifetime for the ttlManager, so when something goes wrong
   622  				// the key will not be locked forever.
   623  				logutil.Logger(bo.ctx).Info("ttlManager live up to its lifetime",
   624  					zap.Uint64("txnStartTS", c.startTS),
   625  					zap.Uint64("uptime", uptime),
   626  					zap.Uint64("maxTxnTTL", config.GetGlobalConfig().Performance.MaxTxnTTL))
   627  				metrics.EinsteinDBTTLLifeTimeReachCounter.Inc()
   628  				// the pessimistic locks may expire if the ttl manager has timed out, set `LockExpired` flag
   629  				// so that this transaction could only commit or rollback with no more memex executions
   630  				if c.isPessimistic && tm.lockCtx != nil && tm.lockCtx.LockExpired != nil {
   631  					atomic.StoreUint32(tm.lockCtx.LockExpired, 1)
   632  				}
   633  				return
   634  			}
   635  
   636  			newTTL := uptime + atomic.LoadUint64(&ManagedLockTTL)
   637  			logutil.Logger(bo.ctx).Info("send TxnHeartBeat",
   638  				zap.Uint64("startTS", c.startTS), zap.Uint64("newTTL", newTTL))
   639  			startTime := time.Now()
   640  			_, err = sendTxnHeartBeat(bo, c.causetstore, c.primary(), c.startTS, newTTL)
   641  			if err != nil {
   642  				EinsteinDBTxnHeartBeatHistogramError.Observe(time.Since(startTime).Seconds())
   643  				logutil.Logger(bo.ctx).Warn("send TxnHeartBeat failed",
   644  					zap.Error(err),
   645  					zap.Uint64("txnStartTS", c.startTS))
   646  				return
   647  			}
   648  			EinsteinDBTxnHeartBeatHistogramOK.Observe(time.Since(startTime).Seconds())
   649  		}
   650  	}
   651  }
   652  
   653  func sendTxnHeartBeat(bo *Backoffer, causetstore *einsteindbStore, primary []byte, startTS, ttl uint64) (uint64, error) {
   654  	req := einsteindbrpc.NewRequest(einsteindbrpc.CmdTxnHeartBeat, &pb.TxnHeartBeatRequest{
   655  		PrimaryLock:   primary,
   656  		StartVersion:  startTS,
   657  		AdviseLockTtl: ttl,
   658  	})
   659  	for {
   660  		loc, err := causetstore.GetRegionCache().LocateKey(bo, primary)
   661  		if err != nil {
   662  			return 0, errors.Trace(err)
   663  		}
   664  		resp, err := causetstore.SendReq(bo, req, loc.Region, readTimeoutShort)
   665  		if err != nil {
   666  			return 0, errors.Trace(err)
   667  		}
   668  		regionErr, err := resp.GetRegionError()
   669  		if err != nil {
   670  			return 0, errors.Trace(err)
   671  		}
   672  		if regionErr != nil {
   673  			err = bo.Backoff(BoRegionMiss, errors.New(regionErr.String()))
   674  			if err != nil {
   675  				return 0, errors.Trace(err)
   676  			}
   677  			continue
   678  		}
   679  		if resp.Resp == nil {
   680  			return 0, errors.Trace(ErrBodyMissing)
   681  		}
   682  		cmdResp := resp.Resp.(*pb.TxnHeartBeatResponse)
   683  		if keyErr := cmdResp.GetError(); keyErr != nil {
   684  			return 0, errors.Errorf("txn %d heartbeat fail, primary key = %v, err = %s", startTS, primary, keyErr.Abort)
   685  		}
   686  		return cmdResp.GetLockTtl(), nil
   687  	}
   688  }
   689  
   690  // checkAsyncCommit checks if async commit protocol is available for current transaction commit, true is returned if possible.
   691  func (c *twoPhaseCommitter) checkAsyncCommit() bool {
   692  	// TODO the keys limit need more tests, this value makes the unit test pass by now.
   693  	// Async commit is not compatible with Binlog because of the non unique timestamp issue.
   694  	if c.connID > 0 && config.GetGlobalConfig().EinsteinDBClient.EnableAsyncCommit &&
   695  		uint(len(c.mutations.keys)) <= config.GetGlobalConfig().EinsteinDBClient.AsyncCommitKeysLimit &&
   696  		!c.shouldWriteBinlog() {
   697  		return true
   698  	}
   699  	return false
   700  }
   701  
   702  func (c *twoPhaseCommitter) isAsyncCommit() bool {
   703  	return atomic.LoadUint32(&c.useAsyncCommit) > 0
   704  }
   705  
   706  func (c *twoPhaseCommitter) setAsyncCommit(val bool) {
   707  	if val {
   708  		atomic.StoreUint32(&c.useAsyncCommit, 1)
   709  	} else {
   710  		atomic.StoreUint32(&c.useAsyncCommit, 0)
   711  	}
   712  }
   713  
   714  func (c *twoPhaseCommitter) cleanup(ctx context.Context) {
   715  	c.cleanWg.Add(1)
   716  	go func() {
   717  		cleanupKeysCtx := context.WithValue(context.Background(), txnStartKey, ctx.Value(txnStartKey))
   718  		err := c.cleanupMutations(NewBackofferWithVars(cleanupKeysCtx, cleanupMaxBackoff, c.txn.vars), c.mutations)
   719  		if err != nil {
   720  			einsteindbSecondaryLockCleanupFailureCounterRollback.Inc()
   721  			logutil.Logger(ctx).Info("2PC cleanup failed",
   722  				zap.Error(err),
   723  				zap.Uint64("txnStartTS", c.startTS))
   724  		} else {
   725  			logutil.Logger(ctx).Info("2PC clean up done",
   726  				zap.Uint64("txnStartTS", c.startTS))
   727  		}
   728  		c.cleanWg.Done()
   729  	}()
   730  }
   731  
   732  // execute executes the two-phase commit protocol.
   733  func (c *twoPhaseCommitter) execute(ctx context.Context) (err error) {
   734  	var binlogSkipped bool
   735  	defer func() {
   736  		if !c.isAsyncCommit() {
   737  			// Always clean up all written keys if the txn does not commit.
   738  			c.mu.RLock()
   739  			committed := c.mu.committed
   740  			undetermined := c.mu.undeterminedErr != nil
   741  			c.mu.RUnlock()
   742  			if !committed && !undetermined {
   743  				c.cleanup(ctx)
   744  			}
   745  			c.txn.commitTS = c.commitTS
   746  			if binlogSkipped {
   747  				binloginfo.RemoveOneSkippedCommitter()
   748  			} else {
   749  				if err != nil {
   750  					c.writeFinishBinlog(ctx, binlog.BinlogType_Rollback, 0)
   751  				} else {
   752  					c.writeFinishBinlog(ctx, binlog.BinlogType_Commit, int64(c.commitTS))
   753  				}
   754  			}
   755  		} else {
   756  			// The error means the async commit should not succeed.
   757  			if err != nil {
   758  				c.cleanup(ctx)
   759  			}
   760  		}
   761  	}()
   762  
   763  	// Check async commit is available or not.
   764  	if c.checkAsyncCommit() {
   765  		c.setAsyncCommit(true)
   766  	}
   767  
   768  	binlogChan := c.prewriteBinlog(ctx)
   769  	prewriteBo := NewBackofferWithVars(ctx, PrewriteMaxBackoff, c.txn.vars)
   770  	start := time.Now()
   771  	err = c.prewriteMutations(prewriteBo, c.mutations)
   772  	commitDetail := c.getDetail()
   773  	commitDetail.PrewriteTime = time.Since(start)
   774  	if prewriteBo.totalSleep > 0 {
   775  		atomic.AddInt64(&commitDetail.CommitBackoffTime, int64(prewriteBo.totalSleep)*int64(time.Millisecond))
   776  		commitDetail.Mu.Lock()
   777  		commitDetail.Mu.BackoffTypes = append(commitDetail.Mu.BackoffTypes, prewriteBo.types...)
   778  		commitDetail.Mu.Unlock()
   779  	}
   780  	if binlogChan != nil {
   781  		startWaitBinlog := time.Now()
   782  		binlogWriteResult := <-binlogChan
   783  		commitDetail.WaitPrewriteBinlogTime = time.Since(startWaitBinlog)
   784  		if binlogWriteResult != nil {
   785  			binlogSkipped = binlogWriteResult.Skipped()
   786  			binlogErr := binlogWriteResult.GetError()
   787  			if binlogErr != nil {
   788  				return binlogErr
   789  			}
   790  		}
   791  	}
   792  	if err != nil {
   793  		logutil.Logger(ctx).Debug("2PC failed on prewrite",
   794  			zap.Error(err),
   795  			zap.Uint64("txnStartTS", c.startTS))
   796  		return errors.Trace(err)
   797  	}
   798  
   799  	// strip check_not_exists keys that no need to commit.
   800  	c.stripNoNeedCommitKeys()
   801  
   802  	var commitTS uint64
   803  	if c.isAsyncCommit() {
   804  		if c.minCommitTS == 0 {
   805  			err = errors.Errorf("conn %d invalid minCommitTS for async commit protocol after prewrite, startTS=%v", c.connID, c.startTS)
   806  			return errors.Trace(err)
   807  		}
   808  		commitTS = c.minCommitTS
   809  	} else {
   810  		start = time.Now()
   811  		logutil.Event(ctx, "start get commit ts")
   812  		commitTS, err = c.causetstore.getTimestampWithRetry(NewBackofferWithVars(ctx, tsoMaxBackoff, c.txn.vars))
   813  		if err != nil {
   814  			logutil.Logger(ctx).Warn("2PC get commitTS failed",
   815  				zap.Error(err),
   816  				zap.Uint64("txnStartTS", c.startTS))
   817  			return errors.Trace(err)
   818  		}
   819  		commitDetail.GetCommitTsTime = time.Since(start)
   820  		logutil.Event(ctx, "finish get commit ts")
   821  		logutil.SetTag(ctx, "commitTs", commitTS)
   822  	}
   823  
   824  	tryAmend := c.isPessimistic && c.connID > 0 && !c.isAsyncCommit() && c.txn.schemaAmender != nil
   825  	if !tryAmend {
   826  		_, _, err = c.checkSchemaValid(ctx, commitTS, c.txn.txnSchemaReplicant, false)
   827  		if err != nil {
   828  			return errors.Trace(err)
   829  		}
   830  	} else {
   831  		relatedSchemaChange, memAmended, err := c.checkSchemaValid(ctx, commitTS, c.txn.txnSchemaReplicant, true)
   832  		if err != nil {
   833  			return errors.Trace(err)
   834  		}
   835  		if memAmended {
   836  			// Get new commitTS and check schemaReplicant valid again.
   837  			newCommitTS, err := c.getCommitTS(ctx, commitDetail)
   838  			if err != nil {
   839  				return errors.Trace(err)
   840  			}
   841  			// If schemaReplicant check failed between commitTS and newCommitTs, report schemaReplicant change error.
   842  			_, _, err = c.checkSchemaValid(ctx, newCommitTS, relatedSchemaChange.LatestSchemaReplicant, false)
   843  			if err != nil {
   844  				return errors.Trace(err)
   845  			}
   846  			commitTS = newCommitTS
   847  		}
   848  	}
   849  	c.commitTS = commitTS
   850  
   851  	if c.causetstore.oracle.IsExpired(c.startTS, ekv.MaxTxnTimeUse) {
   852  		err = errors.Errorf("conn %d txn takes too much time, txnStartTS: %d, comm: %d",
   853  			c.connID, c.startTS, c.commitTS)
   854  		return err
   855  	}
   856  
   857  	if c.connID > 0 {
   858  		failpoint.Inject("beforeCommit", func() {})
   859  	}
   860  
   861  	if c.isAsyncCommit() {
   862  		// For async commit protocol, the commit is considered success here.
   863  		c.txn.commitTS = c.commitTS
   864  		logutil.Logger(ctx).Info("2PC will use async commit protocol to commit this txn", zap.Uint64("startTS", c.startTS),
   865  			zap.Uint64("commitTS", c.commitTS))
   866  		go func() {
   867  			failpoint.Inject("asyncCommitDoNothing", func() {
   868  				failpoint.Return()
   869  			})
   870  			defer c.ttlManager.close()
   871  			commitBo := NewBackofferWithVars(ctx, int(atomic.LoadUint64(&CommitMaxBackoff)), c.txn.vars)
   872  			err := c.commitMutations(commitBo, c.mutations)
   873  			if err != nil {
   874  				logutil.Logger(ctx).Warn("2PC async commit failed", zap.Uint64("connID", c.connID),
   875  					zap.Uint64("startTS", c.startTS), zap.Uint64("commitTS", c.commitTS), zap.Error(err))
   876  			}
   877  		}()
   878  		return nil
   879  	}
   880  	return c.commitTxn(ctx, commitDetail)
   881  }
   882  
   883  func (c *twoPhaseCommitter) commitTxn(ctx context.Context, commitDetail *execdetails.CommitDetails) error {
   884  	c.mutations.values = nil
   885  	c.txn.GetMemBuffer().DiscardValues()
   886  	start := time.Now()
   887  
   888  	commitBo := NewBackofferWithVars(ctx, int(atomic.LoadUint64(&CommitMaxBackoff)), c.txn.vars)
   889  	err := c.commitMutations(commitBo, c.mutations)
   890  	commitDetail.CommitTime = time.Since(start)
   891  	if commitBo.totalSleep > 0 {
   892  		atomic.AddInt64(&commitDetail.CommitBackoffTime, int64(commitBo.totalSleep)*int64(time.Millisecond))
   893  		commitDetail.Mu.Lock()
   894  		commitDetail.Mu.BackoffTypes = append(commitDetail.Mu.BackoffTypes, commitBo.types...)
   895  		commitDetail.Mu.Unlock()
   896  	}
   897  	if err != nil {
   898  		if undeterminedErr := c.getUndeterminedErr(); undeterminedErr != nil {
   899  			logutil.Logger(ctx).Error("2PC commit result undetermined",
   900  				zap.Error(err),
   901  				zap.NamedError("rpcErr", undeterminedErr),
   902  				zap.Uint64("txnStartTS", c.startTS))
   903  			err = errors.Trace(terror.ErrResultUndetermined)
   904  		}
   905  		if !c.mu.committed {
   906  			logutil.Logger(ctx).Debug("2PC failed on commit",
   907  				zap.Error(err),
   908  				zap.Uint64("txnStartTS", c.startTS))
   909  			return errors.Trace(err)
   910  		}
   911  		logutil.Logger(ctx).Debug("got some exceptions, but 2PC was still successful",
   912  			zap.Error(err),
   913  			zap.Uint64("txnStartTS", c.startTS))
   914  	}
   915  	return nil
   916  }
   917  
   918  func (c *twoPhaseCommitter) stripNoNeedCommitKeys() {
   919  	if !c.hasNoNeedCommitKeys {
   920  		return
   921  	}
   922  	m := &c.mutations
   923  	var newIdx int
   924  	for oldIdx := range m.keys {
   925  		key := m.keys[oldIdx]
   926  		flags, err := c.txn.GetMemBuffer().GetFlags(key)
   927  		if err == nil && flags.HasNoNeedCommit() {
   928  			continue
   929  		}
   930  		m.keys[newIdx] = key
   931  		if m.ops != nil {
   932  			m.ops[newIdx] = m.ops[oldIdx]
   933  		}
   934  		if m.values != nil {
   935  			m.values[newIdx] = m.values[oldIdx]
   936  		}
   937  		if m.isPessimisticLock != nil {
   938  			m.isPessimisticLock[newIdx] = m.isPessimisticLock[oldIdx]
   939  		}
   940  		newIdx++
   941  	}
   942  	c.mutations = m.subRange(0, newIdx)
   943  }
   944  
   945  // SchemaVer is the schemaReplicant which will return the schemaReplicant version.
   946  type SchemaVer interface {
   947  	// SchemaMetaVersion returns the spacetime schemaReplicant version.
   948  	SchemaMetaVersion() int64
   949  }
   950  
   951  type schemaLeaseChecker interface {
   952  	// CheckBySchemaVer checks if the schemaReplicant has changed for the transaction related blocks between the startSchemaVer
   953  	// and the schemaReplicant version at txnTS, all the related schemaReplicant changes will be returned.
   954  	CheckBySchemaVer(txnTS uint64, startSchemaVer SchemaVer) (*RelatedSchemaChange, error)
   955  }
   956  
   957  // RelatedSchemaChange contains information about schemaReplicant diff between two schemaReplicant versions.
   958  type RelatedSchemaChange struct {
   959  	PhyTblIDS             []int64
   960  	CausetActionTypes     []uint64
   961  	LatestSchemaReplicant SchemaVer
   962  	Amendable             bool
   963  }
   964  
   965  func (c *twoPhaseCommitter) tryAmendTxn(ctx context.Context, startSchemaReplicant SchemaVer, change *RelatedSchemaChange) (bool, error) {
   966  	addMutations, err := c.txn.schemaAmender.AmendTxn(ctx, startSchemaReplicant, change, c.mutations)
   967  	if err != nil {
   968  		return false, err
   969  	}
   970  	// Prewrite new mutations.
   971  	if addMutations != nil && len(addMutations.keys) > 0 {
   972  		prewriteBo := NewBackofferWithVars(ctx, PrewriteMaxBackoff, c.txn.vars)
   973  		err = c.prewriteMutations(prewriteBo, *addMutations)
   974  		if err != nil {
   975  			logutil.Logger(ctx).Warn("amend prewrite has failed", zap.Error(err), zap.Uint64("txnStartTS", c.startTS))
   976  			return false, err
   977  		}
   978  		logutil.Logger(ctx).Info("amend prewrite finished", zap.Uint64("txnStartTS", c.startTS))
   979  		return true, nil
   980  	}
   981  	return false, nil
   982  }
   983  
   984  func (c *twoPhaseCommitter) getCommitTS(ctx context.Context, commitDetail *execdetails.CommitDetails) (uint64, error) {
   985  	start := time.Now()
   986  	logutil.Event(ctx, "start get commit ts")
   987  	commitTS, err := c.causetstore.getTimestampWithRetry(NewBackofferWithVars(ctx, tsoMaxBackoff, c.txn.vars))
   988  	if err != nil {
   989  		logutil.Logger(ctx).Warn("2PC get commitTS failed",
   990  			zap.Error(err),
   991  			zap.Uint64("txnStartTS", c.startTS))
   992  		return 0, errors.Trace(err)
   993  	}
   994  	commitDetail.GetCommitTsTime = time.Since(start)
   995  	logutil.Event(ctx, "finish get commit ts")
   996  	logutil.SetTag(ctx, "commitTS", commitTS)
   997  
   998  	// Check commitTS.
   999  	if commitTS <= c.startTS {
  1000  		err = errors.Errorf("conn %d invalid transaction tso with txnStartTS=%v while txnCommitTS=%v",
  1001  			c.connID, c.startTS, commitTS)
  1002  		logutil.BgLogger().Error("invalid transaction", zap.Error(err))
  1003  		return 0, errors.Trace(err)
  1004  	}
  1005  	return commitTS, nil
  1006  }
  1007  
  1008  // checkSchemaValid checks if the schemaReplicant has changed, if tryAmend is set to true, committer will try to amend
  1009  // this transaction using the related schemaReplicant changes.
  1010  func (c *twoPhaseCommitter) checkSchemaValid(ctx context.Context, checkTS uint64, startSchemaReplicant SchemaVer,
  1011  	tryAmend bool) (*RelatedSchemaChange, bool, error) {
  1012  	checker, ok := c.txn.us.GetOption(ekv.SchemaChecker).(schemaLeaseChecker)
  1013  	if !ok {
  1014  		if c.connID > 0 {
  1015  			logutil.Logger(ctx).Warn("schemaLeaseChecker is not set for this transaction",
  1016  				zap.Uint64("connID", c.connID),
  1017  				zap.Uint64("startTS", c.startTS),
  1018  				zap.Uint64("commitTS", checkTS))
  1019  		}
  1020  		return nil, false, nil
  1021  	}
  1022  	relatedChanges, err := checker.CheckBySchemaVer(checkTS, startSchemaReplicant)
  1023  	if err != nil {
  1024  		if tryAmend && relatedChanges != nil && relatedChanges.Amendable && c.txn.schemaAmender != nil {
  1025  			memAmended, amendErr := c.tryAmendTxn(ctx, startSchemaReplicant, relatedChanges)
  1026  			if amendErr != nil {
  1027  				logutil.BgLogger().Info("txn amend has failed", zap.Uint64("connID", c.connID),
  1028  					zap.Uint64("startTS", c.startTS), zap.Error(amendErr))
  1029  				return nil, false, err
  1030  			}
  1031  			logutil.Logger(ctx).Info("amend txn successfully for pessimistic commit",
  1032  				zap.Uint64("connID", c.connID), zap.Uint64("txn startTS", c.startTS), zap.Bool("memAmended", memAmended),
  1033  				zap.Uint64("checkTS", checkTS), zap.Int64("startSchemaReplicantVer", startSchemaReplicant.SchemaMetaVersion()),
  1034  				zap.Int64s("causet ids", relatedChanges.PhyTblIDS), zap.Uint64s("action types", relatedChanges.CausetActionTypes))
  1035  			return relatedChanges, memAmended, nil
  1036  		}
  1037  		return nil, false, errors.Trace(err)
  1038  	}
  1039  	return nil, false, nil
  1040  }
  1041  
  1042  func (c *twoPhaseCommitter) prewriteBinlog(ctx context.Context) chan *binloginfo.WriteResult {
  1043  	if !c.shouldWriteBinlog() {
  1044  		return nil
  1045  	}
  1046  	ch := make(chan *binloginfo.WriteResult, 1)
  1047  	go func() {
  1048  		logutil.Eventf(ctx, "start prewrite binlog")
  1049  		binInfo := c.txn.us.GetOption(ekv.BinlogInfo).(*binloginfo.BinlogInfo)
  1050  		bin := binInfo.Data
  1051  		bin.StartTs = int64(c.startTS)
  1052  		if bin.Tp == binlog.BinlogType_Prewrite {
  1053  			bin.PrewriteKey = c.primary()
  1054  		}
  1055  		wr := binInfo.WriteBinlog(c.causetstore.clusterID)
  1056  		if wr.Skipped() {
  1057  			binInfo.Data.PrewriteValue = nil
  1058  			binloginfo.AddOneSkippedCommitter()
  1059  		}
  1060  		logutil.Eventf(ctx, "finish prewrite binlog")
  1061  		ch <- wr
  1062  	}()
  1063  	return ch
  1064  }
  1065  
  1066  func (c *twoPhaseCommitter) writeFinishBinlog(ctx context.Context, tp binlog.BinlogType, commitTS int64) {
  1067  	if !c.shouldWriteBinlog() {
  1068  		return
  1069  	}
  1070  	binInfo := c.txn.us.GetOption(ekv.BinlogInfo).(*binloginfo.BinlogInfo)
  1071  	binInfo.Data.Tp = tp
  1072  	binInfo.Data.CommitTs = commitTS
  1073  	binInfo.Data.PrewriteValue = nil
  1074  
  1075  	wg := sync.WaitGroup{}
  1076  	mock := false
  1077  	failpoint.Inject("mockSyncBinlogCommit", func(val failpoint.Value) {
  1078  		if val.(bool) {
  1079  			wg.Add(1)
  1080  			mock = true
  1081  		}
  1082  	})
  1083  	go func() {
  1084  		logutil.Eventf(ctx, "start write finish binlog")
  1085  		binlogWriteResult := binInfo.WriteBinlog(c.causetstore.clusterID)
  1086  		err := binlogWriteResult.GetError()
  1087  		if err != nil {
  1088  			logutil.BgLogger().Error("failed to write binlog",
  1089  				zap.Error(err))
  1090  		}
  1091  		logutil.Eventf(ctx, "finish write finish binlog")
  1092  		if mock {
  1093  			wg.Done()
  1094  		}
  1095  	}()
  1096  	if mock {
  1097  		wg.Wait()
  1098  	}
  1099  }
  1100  
  1101  func (c *twoPhaseCommitter) shouldWriteBinlog() bool {
  1102  	return c.txn.us.GetOption(ekv.BinlogInfo) != nil
  1103  }
  1104  
  1105  // EinsteinDB recommends each RPC packet should be less than ~1MB. We keep each packet's
  1106  // Key+Value size below 16KB.
  1107  const txnCommitBatchSize = 16 * 1024
  1108  
  1109  type batchMutations struct {
  1110  	region    RegionVerID
  1111  	mutations CommitterMutations
  1112  	isPrimary bool
  1113  }
  1114  type batched struct {
  1115  	batches    []batchMutations
  1116  	primaryIdx int
  1117  	primaryKey []byte
  1118  }
  1119  
  1120  func newBatched(primaryKey []byte) *batched {
  1121  	return &batched{
  1122  		primaryIdx: -1,
  1123  		primaryKey: primaryKey,
  1124  	}
  1125  }
  1126  
  1127  // appendBatchMutationsBySize appends mutations to b. It may split the keys to make
  1128  // sure each batch's size does not exceed the limit.
  1129  func (b *batched) appendBatchMutationsBySize(region RegionVerID, mutations CommitterMutations, sizeFn func(k, v []byte) int, limit int) {
  1130  	var start, end int
  1131  	for start = 0; start < mutations.len(); start = end {
  1132  		var size int
  1133  		for end = start; end < mutations.len() && size < limit; end++ {
  1134  			var k, v []byte
  1135  			k = mutations.keys[end]
  1136  			if end < len(mutations.values) {
  1137  				v = mutations.values[end]
  1138  			}
  1139  			size += sizeFn(k, v)
  1140  			if b.primaryIdx < 0 && bytes.Equal(k, b.primaryKey) {
  1141  				b.primaryIdx = len(b.batches)
  1142  			}
  1143  		}
  1144  		b.batches = append(b.batches, batchMutations{
  1145  			region:    region,
  1146  			mutations: mutations.subRange(start, end),
  1147  		})
  1148  	}
  1149  }
  1150  
  1151  func (b *batched) setPrimary() bool {
  1152  	// If the batches include the primary key, put it to the first
  1153  	if b.primaryIdx >= 0 {
  1154  		if len(b.batches) > 0 {
  1155  			b.batches[b.primaryIdx].isPrimary = true
  1156  			b.batches[0], b.batches[b.primaryIdx] = b.batches[b.primaryIdx], b.batches[0]
  1157  			b.primaryIdx = 0
  1158  		}
  1159  		return true
  1160  	}
  1161  
  1162  	return false
  1163  }
  1164  
  1165  func (b *batched) allBatches() []batchMutations {
  1166  	return b.batches
  1167  }
  1168  
  1169  // primaryBatch returns the batch containing the primary key.
  1170  // Precondition: `b.setPrimary() == true`
  1171  func (b *batched) primaryBatch() []batchMutations {
  1172  	return b.batches[:1]
  1173  }
  1174  
  1175  func (b *batched) forgetPrimary() {
  1176  	if len(b.batches) == 0 {
  1177  		return
  1178  	}
  1179  	b.batches = b.batches[1:]
  1180  }
  1181  
  1182  // batchInterlockingDirectorate is txn controller providing rate control like utils
  1183  type batchInterlockingDirectorate struct {
  1184  	rateLim           int                        // concurrent worker numbers
  1185  	rateLimiter       *rateLimit                 // rate limiter for concurrency control, maybe more strategies
  1186  	committer         *twoPhaseCommitter         // here maybe more different type committer in the future
  1187  	action            twoPhaseCommitCausetAction // the work action type
  1188  	backoffer         *Backoffer                 // Backoffer
  1189  	tokenWaitDuration time.Duration              // get token wait time
  1190  }
  1191  
  1192  // newBatchInterlockingDirectorate create processor to handle concurrent batch works(prewrite/commit etc)
  1193  func newBatchInterlockingDirectorate(rateLimit int, committer *twoPhaseCommitter,
  1194  	action twoPhaseCommitCausetAction, backoffer *Backoffer) *batchInterlockingDirectorate {
  1195  	return &batchInterlockingDirectorate{rateLimit, nil, committer,
  1196  		action, backoffer, 1 * time.Millisecond}
  1197  }
  1198  
  1199  // initUtils do initialize batchInterlockingDirectorate related policies like rateLimit soliton
  1200  func (batchInterDir *batchInterlockingDirectorate) initUtils() error {
  1201  	// init rateLimiter by injected rate limit number
  1202  	batchInterDir.rateLimiter = newRateLimit(batchInterDir.rateLim)
  1203  	return nil
  1204  }
  1205  
  1206  // startWork concurrently do the work for each batch considering rate limit
  1207  func (batchInterDir *batchInterlockingDirectorate) startWorker(exitCh chan struct{}, ch chan error, batches []batchMutations) {
  1208  	for idx, batch1 := range batches {
  1209  		waitStart := time.Now()
  1210  		if exit := batchInterDir.rateLimiter.getToken(exitCh); !exit {
  1211  			batchInterDir.tokenWaitDuration += time.Since(waitStart)
  1212  			batch := batch1
  1213  			go func() {
  1214  				defer batchInterDir.rateLimiter.putToken()
  1215  				var singleBatchBackoffer *Backoffer
  1216  				if _, ok := batchInterDir.action.(actionCommit); ok {
  1217  					// Because the secondary batches of the commit actions are implemented to be
  1218  					// committed asynchronously in background goroutines, we should not
  1219  					// fork a child context and call cancel() while the foreground goroutine exits.
  1220  					// Otherwise the background goroutines will be canceled execeptionally.
  1221  					// Here we makes a new clone of the original backoffer for this goroutine
  1222  					// exclusively to avoid the data race when using the same backoffer
  1223  					// in concurrent goroutines.
  1224  					singleBatchBackoffer = batchInterDir.backoffer.Clone()
  1225  				} else {
  1226  					var singleBatchCancel context.CancelFunc
  1227  					singleBatchBackoffer, singleBatchCancel = batchInterDir.backoffer.Fork()
  1228  					defer singleBatchCancel()
  1229  				}
  1230  				beforeSleep := singleBatchBackoffer.totalSleep
  1231  				ch <- batchInterDir.action.handleSingleBatch(batchInterDir.committer, singleBatchBackoffer, batch)
  1232  				commitDetail := batchInterDir.committer.getDetail()
  1233  				if commitDetail != nil { // dagger operations of pessimistic-txn will let commitDetail be nil
  1234  					if delta := singleBatchBackoffer.totalSleep - beforeSleep; delta > 0 {
  1235  						atomic.AddInt64(&commitDetail.CommitBackoffTime, int64(singleBatchBackoffer.totalSleep-beforeSleep)*int64(time.Millisecond))
  1236  						commitDetail.Mu.Lock()
  1237  						commitDetail.Mu.BackoffTypes = append(commitDetail.Mu.BackoffTypes, singleBatchBackoffer.types...)
  1238  						commitDetail.Mu.Unlock()
  1239  					}
  1240  				}
  1241  			}()
  1242  		} else {
  1243  			logutil.Logger(batchInterDir.backoffer.ctx).Info("break startWorker",
  1244  				zap.Stringer("action", batchInterDir.action), zap.Int("batch size", len(batches)),
  1245  				zap.Int("index", idx))
  1246  			break
  1247  		}
  1248  	}
  1249  }
  1250  
  1251  // process will start worker routine and collect results
  1252  func (batchInterDir *batchInterlockingDirectorate) process(batches []batchMutations) error {
  1253  	var err error
  1254  	err = batchInterDir.initUtils()
  1255  	if err != nil {
  1256  		logutil.Logger(batchInterDir.backoffer.ctx).Error("batchInterlockingDirectorate initUtils failed", zap.Error(err))
  1257  		return err
  1258  	}
  1259  
  1260  	// For prewrite, stop sending other requests after receiving first error.
  1261  	backoffer := batchInterDir.backoffer
  1262  	var cancel context.CancelFunc
  1263  	if _, ok := batchInterDir.action.(actionPrewrite); ok {
  1264  		backoffer, cancel = batchInterDir.backoffer.Fork()
  1265  		defer cancel()
  1266  	}
  1267  	// concurrently do the work for each batch.
  1268  	ch := make(chan error, len(batches))
  1269  	exitCh := make(chan struct{})
  1270  	go batchInterDir.startWorker(exitCh, ch, batches)
  1271  	// check results
  1272  	for i := 0; i < len(batches); i++ {
  1273  		if e := <-ch; e != nil {
  1274  			logutil.Logger(backoffer.ctx).Debug("2PC doCausetActionOnBatch failed",
  1275  				zap.Uint64("conn", batchInterDir.committer.connID),
  1276  				zap.Stringer("action type", batchInterDir.action),
  1277  				zap.Error(e),
  1278  				zap.Uint64("txnStartTS", batchInterDir.committer.startTS))
  1279  			// Cancel other requests and return the first error.
  1280  			if cancel != nil {
  1281  				logutil.Logger(backoffer.ctx).Debug("2PC doCausetActionOnBatch to cancel other actions",
  1282  					zap.Uint64("conn", batchInterDir.committer.connID),
  1283  					zap.Stringer("action type", batchInterDir.action),
  1284  					zap.Uint64("txnStartTS", batchInterDir.committer.startTS))
  1285  				cancel()
  1286  			}
  1287  			if err == nil {
  1288  				err = e
  1289  			}
  1290  		}
  1291  	}
  1292  	close(exitCh)
  1293  	metrics.EinsteinDBTokenWaitDuration.Observe(batchInterDir.tokenWaitDuration.Seconds())
  1294  	return err
  1295  }
  1296  
  1297  func getTxnPriority(txn *einsteindbTxn) pb.CommandPri {
  1298  	if pri := txn.us.GetOption(ekv.Priority); pri != nil {
  1299  		return ekvPriorityToCommandPri(pri.(int))
  1300  	}
  1301  	return pb.CommandPri_Normal
  1302  }
  1303  
  1304  func getTxnSyncLog(txn *einsteindbTxn) bool {
  1305  	if syncOption := txn.us.GetOption(ekv.SyncLog); syncOption != nil {
  1306  		return syncOption.(bool)
  1307  	}
  1308  	return false
  1309  }
  1310  
  1311  func ekvPriorityToCommandPri(pri int) pb.CommandPri {
  1312  	switch pri {
  1313  	case ekv.PriorityLow:
  1314  		return pb.CommandPri_Low
  1315  	case ekv.PriorityHigh:
  1316  		return pb.CommandPri_High
  1317  	default:
  1318  		return pb.CommandPri_Normal
  1319  	}
  1320  }
  1321  
  1322  func (c *twoPhaseCommitter) setDetail(d *execdetails.CommitDetails) {
  1323  	atomic.StorePointer(&c.detail, unsafe.Pointer(d))
  1324  }
  1325  
  1326  func (c *twoPhaseCommitter) getDetail() *execdetails.CommitDetails {
  1327  	return (*execdetails.CommitDetails)(atomic.LoadPointer(&c.detail))
  1328  }
  1329  
  1330  func (c *twoPhaseCommitter) setUndeterminedErr(err error) {
  1331  	c.mu.Lock()
  1332  	defer c.mu.Unlock()
  1333  	c.mu.undeterminedErr = err
  1334  }
  1335  
  1336  func (c *twoPhaseCommitter) getUndeterminedErr() error {
  1337  	c.mu.RLock()
  1338  	defer c.mu.RUnlock()
  1339  	return c.mu.undeterminedErr
  1340  }