github.com/KinWaiYuen/client-go/v2@v2.5.4/txnkv/transaction/prewrite.go (about)

     1  // Copyright 2021 TiKV Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // NOTE: The code in this file is based on code from the
    16  // TiDB project, licensed under the Apache License v 2.0
    17  //
    18  // https://github.com/pingcap/tidb/tree/cc5e161ac06827589c4966674597c137cc9e809c/store/tikv/prewrite.go
    19  //
    20  
    21  // Copyright 2020 PingCAP, Inc.
    22  //
    23  // Licensed under the Apache License, Version 2.0 (the "License");
    24  // you may not use this file except in compliance with the License.
    25  // You may obtain a copy of the License at
    26  //
    27  //     http://www.apache.org/licenses/LICENSE-2.0
    28  //
    29  // Unless required by applicable law or agreed to in writing, software
    30  // distributed under the License is distributed on an "AS IS" BASIS,
    31  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    32  // See the License for the specific language governing permissions and
    33  // limitations under the License.
    34  
    35  package transaction
    36  
    37  import (
    38  	"encoding/hex"
    39  	"math"
    40  	"strconv"
    41  	"sync/atomic"
    42  	"time"
    43  
    44  	"github.com/KinWaiYuen/client-go/v2/config"
    45  	tikverr "github.com/KinWaiYuen/client-go/v2/error"
    46  	"github.com/KinWaiYuen/client-go/v2/internal/client"
    47  	"github.com/KinWaiYuen/client-go/v2/internal/locate"
    48  	"github.com/KinWaiYuen/client-go/v2/internal/logutil"
    49  	"github.com/KinWaiYuen/client-go/v2/internal/retry"
    50  	"github.com/KinWaiYuen/client-go/v2/metrics"
    51  	"github.com/KinWaiYuen/client-go/v2/tikvrpc"
    52  	"github.com/KinWaiYuen/client-go/v2/txnkv/txnlock"
    53  	"github.com/KinWaiYuen/client-go/v2/util"
    54  	"github.com/opentracing/opentracing-go"
    55  	"github.com/pingcap/errors"
    56  	"github.com/pingcap/kvproto/pkg/kvrpcpb"
    57  	"github.com/prometheus/client_golang/prometheus"
    58  	"go.uber.org/zap"
    59  )
    60  
    61  type actionPrewrite struct{ retry bool }
    62  
    63  var _ twoPhaseCommitAction = actionPrewrite{}
    64  
    65  func (actionPrewrite) String() string {
    66  	return "prewrite"
    67  }
    68  
    69  func (actionPrewrite) tiKVTxnRegionsNumHistogram() prometheus.Observer {
    70  	return metrics.TxnRegionsNumHistogramPrewrite
    71  }
    72  
    73  func (c *twoPhaseCommitter) buildPrewriteRequest(batch batchMutations, txnSize uint64) *tikvrpc.Request {
    74  	m := batch.mutations
    75  	mutations := make([]*kvrpcpb.Mutation, m.Len())
    76  	isPessimisticLock := make([]bool, m.Len())
    77  	for i := 0; i < m.Len(); i++ {
    78  		mutations[i] = &kvrpcpb.Mutation{
    79  			Op:    m.GetOp(i),
    80  			Key:   m.GetKey(i),
    81  			Value: m.GetValue(i),
    82  		}
    83  		isPessimisticLock[i] = m.IsPessimisticLock(i)
    84  	}
    85  	c.mu.Lock()
    86  	minCommitTS := c.minCommitTS
    87  	c.mu.Unlock()
    88  	if c.forUpdateTS > 0 && c.forUpdateTS >= minCommitTS {
    89  		minCommitTS = c.forUpdateTS + 1
    90  	} else if c.startTS >= minCommitTS {
    91  		minCommitTS = c.startTS + 1
    92  	}
    93  
    94  	if val, err := util.EvalFailpoint("mockZeroCommitTS"); err == nil {
    95  		// Should be val.(uint64) but failpoint doesn't support that.
    96  		if tmp, ok := val.(int); ok && uint64(tmp) == c.startTS {
    97  			minCommitTS = 0
    98  		}
    99  	}
   100  
   101  	ttl := c.lockTTL
   102  
   103  	if c.sessionID > 0 {
   104  		if _, err := util.EvalFailpoint("twoPCShortLockTTL"); err == nil {
   105  			ttl = 1
   106  			keys := make([]string, 0, len(mutations))
   107  			for _, m := range mutations {
   108  				keys = append(keys, hex.EncodeToString(m.Key))
   109  			}
   110  			logutil.BgLogger().Info("[failpoint] injected lock ttl = 1 on prewrite",
   111  				zap.Uint64("txnStartTS", c.startTS), zap.Strings("keys", keys))
   112  		}
   113  	}
   114  
   115  	req := &kvrpcpb.PrewriteRequest{
   116  		Mutations:         mutations,
   117  		PrimaryLock:       c.primary(),
   118  		StartVersion:      c.startTS,
   119  		LockTtl:           ttl,
   120  		IsPessimisticLock: isPessimisticLock,
   121  		ForUpdateTs:       c.forUpdateTS,
   122  		TxnSize:           txnSize,
   123  		MinCommitTs:       minCommitTS,
   124  		MaxCommitTs:       c.maxCommitTS,
   125  	}
   126  
   127  	if _, err := util.EvalFailpoint("invalidMaxCommitTS"); err == nil {
   128  		if req.MaxCommitTs > 0 {
   129  			req.MaxCommitTs = minCommitTS - 1
   130  		}
   131  	}
   132  
   133  	if c.isAsyncCommit() {
   134  		if batch.isPrimary {
   135  			req.Secondaries = c.asyncSecondaries()
   136  		}
   137  		req.UseAsyncCommit = true
   138  	}
   139  
   140  	if c.isOnePC() {
   141  		req.TryOnePc = true
   142  	}
   143  
   144  	return tikvrpc.NewRequest(tikvrpc.CmdPrewrite, req,
   145  		kvrpcpb.Context{Priority: c.priority, SyncLog: c.syncLog, ResourceGroupTag: c.resourceGroupTag,
   146  			DiskFullOpt: c.diskFullOpt, MaxExecutionDurationMs: uint64(client.MaxWriteExecutionTime.Milliseconds())})
   147  }
   148  
   149  func (action actionPrewrite) handleSingleBatch(c *twoPhaseCommitter, bo *retry.Backoffer, batch batchMutations) (err error) {
   150  	// WARNING: This function only tries to send a single request to a single region, so it don't
   151  	// need to unset the `useOnePC` flag when it fails. A special case is that when TiKV returns
   152  	// regionErr, it's uncertain if the request will be splitted into multiple and sent to multiple
   153  	// regions. It invokes `prewriteMutations` recursively here, and the number of batches will be
   154  	// checked there.
   155  
   156  	if c.sessionID > 0 {
   157  		if batch.isPrimary {
   158  			if _, err := util.EvalFailpoint("prewritePrimaryFail"); err == nil {
   159  				// Delay to avoid cancelling other normally ongoing prewrite requests.
   160  				time.Sleep(time.Millisecond * 50)
   161  				logutil.Logger(bo.GetCtx()).Info("[failpoint] injected error on prewriting primary batch",
   162  					zap.Uint64("txnStartTS", c.startTS))
   163  				return errors.New("injected error on prewriting primary batch")
   164  			}
   165  			util.EvalFailpoint("prewritePrimary") // for other failures like sleep or pause
   166  		} else {
   167  			if _, err := util.EvalFailpoint("prewriteSecondaryFail"); err == nil {
   168  				// Delay to avoid cancelling other normally ongoing prewrite requests.
   169  				time.Sleep(time.Millisecond * 50)
   170  				logutil.Logger(bo.GetCtx()).Info("[failpoint] injected error on prewriting secondary batch",
   171  					zap.Uint64("txnStartTS", c.startTS))
   172  				return errors.New("injected error on prewriting secondary batch")
   173  			}
   174  			util.EvalFailpoint("prewriteSecondary") // for other failures like sleep or pause
   175  		}
   176  	}
   177  
   178  	txnSize := uint64(c.regionTxnSize[batch.region.GetID()])
   179  	// When we retry because of a region miss, we don't know the transaction size. We set the transaction size here
   180  	// to MaxUint64 to avoid unexpected "resolve lock lite".
   181  	if action.retry {
   182  		txnSize = math.MaxUint64
   183  	}
   184  
   185  	tBegin := time.Now()
   186  	attempts := 0
   187  
   188  	req := c.buildPrewriteRequest(batch, txnSize)
   189  	sender := locate.NewRegionRequestSender(c.store.GetRegionCache(), c.store.GetTiKVClient())
   190  	defer func() {
   191  		if err != nil {
   192  			// If we fail to receive response for async commit prewrite, it will be undetermined whether this
   193  			// transaction has been successfully committed.
   194  			// If prewrite has been cancelled, all ongoing prewrite RPCs will become errors, we needn't set undetermined
   195  			// errors.
   196  			if (c.isAsyncCommit() || c.isOnePC()) && sender.GetRPCError() != nil && atomic.LoadUint32(&c.prewriteCancelled) == 0 {
   197  				c.setUndeterminedErr(errors.Trace(sender.GetRPCError()))
   198  			}
   199  		}
   200  	}()
   201  	for {
   202  		attempts++
   203  		if time.Since(tBegin) > slowRequestThreshold {
   204  			logutil.BgLogger().Warn("slow prewrite request", zap.Uint64("startTS", c.startTS), zap.Stringer("region", &batch.region), zap.Int("attempts", attempts))
   205  			tBegin = time.Now()
   206  		}
   207  
   208  		resp, err := sender.SendReq(bo, req, batch.region, client.ReadTimeoutShort)
   209  		// Unexpected error occurs, return it
   210  		if err != nil {
   211  			return errors.Trace(err)
   212  		}
   213  
   214  		regionErr, err := resp.GetRegionError()
   215  		if err != nil {
   216  			return errors.Trace(err)
   217  		}
   218  		if regionErr != nil {
   219  			// For other region error and the fake region error, backoff because
   220  			// there's something wrong.
   221  			// For the real EpochNotMatch error, don't backoff.
   222  			if regionErr.GetEpochNotMatch() == nil || locate.IsFakeRegionError(regionErr) {
   223  				err = bo.Backoff(retry.BoRegionMiss, errors.New(regionErr.String()))
   224  				if err != nil {
   225  					return errors.Trace(err)
   226  				}
   227  			}
   228  			if regionErr.GetDiskFull() != nil {
   229  				storeIds := regionErr.GetDiskFull().GetStoreId()
   230  				desc := " "
   231  				for _, i := range storeIds {
   232  					desc += strconv.FormatUint(i, 10) + " "
   233  				}
   234  
   235  				logutil.Logger(bo.GetCtx()).Error("Request failed cause of TiKV disk full",
   236  					zap.String("store_id", desc),
   237  					zap.String("reason", regionErr.GetDiskFull().GetReason()))
   238  
   239  				return errors.Trace(errors.New(regionErr.String()))
   240  			}
   241  			same, err := batch.relocate(bo, c.store.GetRegionCache())
   242  			if err != nil {
   243  				return errors.Trace(err)
   244  			}
   245  			if same {
   246  				continue
   247  			}
   248  			err = c.doActionOnMutations(bo, actionPrewrite{true}, batch.mutations)
   249  			return errors.Trace(err)
   250  		}
   251  
   252  		if resp.Resp == nil {
   253  			return errors.Trace(tikverr.ErrBodyMissing)
   254  		}
   255  		prewriteResp := resp.Resp.(*kvrpcpb.PrewriteResponse)
   256  		keyErrs := prewriteResp.GetErrors()
   257  		if len(keyErrs) == 0 {
   258  			// Clear the RPC Error since the request is evaluated successfully.
   259  			sender.SetRPCError(nil)
   260  
   261  			if batch.isPrimary {
   262  				// After writing the primary key, if the size of the transaction is larger than 32M,
   263  				// start the ttlManager. The ttlManager will be closed in tikvTxn.Commit().
   264  				// In this case 1PC is not expected to be used, but still check it for safety.
   265  				if int64(c.txnSize) > config.GetGlobalConfig().TiKVClient.TTLRefreshedTxnSize &&
   266  					prewriteResp.OnePcCommitTs == 0 {
   267  					c.run(c, nil)
   268  				}
   269  			}
   270  
   271  			if c.isOnePC() {
   272  				if prewriteResp.OnePcCommitTs == 0 {
   273  					if prewriteResp.MinCommitTs != 0 {
   274  						return errors.Trace(errors.New("MinCommitTs must be 0 when 1pc falls back to 2pc"))
   275  					}
   276  					logutil.Logger(bo.GetCtx()).Warn("1pc failed and fallbacks to normal commit procedure",
   277  						zap.Uint64("startTS", c.startTS))
   278  					metrics.OnePCTxnCounterFallback.Inc()
   279  					c.setOnePC(false)
   280  					c.setAsyncCommit(false)
   281  				} else {
   282  					// For 1PC, there's no racing to access to access `onePCCommmitTS` so it's safe
   283  					// not to lock the mutex.
   284  					if c.onePCCommitTS != 0 {
   285  						logutil.Logger(bo.GetCtx()).Fatal("one pc happened multiple times",
   286  							zap.Uint64("startTS", c.startTS))
   287  					}
   288  					c.onePCCommitTS = prewriteResp.OnePcCommitTs
   289  				}
   290  				return nil
   291  			} else if prewriteResp.OnePcCommitTs != 0 {
   292  				logutil.Logger(bo.GetCtx()).Fatal("tikv committed a non-1pc transaction with 1pc protocol",
   293  					zap.Uint64("startTS", c.startTS))
   294  			}
   295  			if c.isAsyncCommit() {
   296  				// 0 if the min_commit_ts is not ready or any other reason that async
   297  				// commit cannot proceed. The client can then fallback to normal way to
   298  				// continue committing the transaction if prewrite are all finished.
   299  				if prewriteResp.MinCommitTs == 0 {
   300  					if c.testingKnobs.noFallBack {
   301  						return nil
   302  					}
   303  					logutil.Logger(bo.GetCtx()).Warn("async commit cannot proceed since the returned minCommitTS is zero, "+
   304  						"fallback to normal path", zap.Uint64("startTS", c.startTS))
   305  					c.setAsyncCommit(false)
   306  				} else {
   307  					c.mu.Lock()
   308  					if prewriteResp.MinCommitTs > c.minCommitTS {
   309  						c.minCommitTS = prewriteResp.MinCommitTs
   310  					}
   311  					c.mu.Unlock()
   312  				}
   313  			}
   314  			return nil
   315  		}
   316  		var locks []*txnlock.Lock
   317  		for _, keyErr := range keyErrs {
   318  			// Check already exists error
   319  			if alreadyExist := keyErr.GetAlreadyExist(); alreadyExist != nil {
   320  				e := &tikverr.ErrKeyExist{AlreadyExist: alreadyExist}
   321  				return c.extractKeyExistsErr(e)
   322  			}
   323  
   324  			// Extract lock from key error
   325  			lock, err1 := txnlock.ExtractLockFromKeyErr(keyErr)
   326  			if err1 != nil {
   327  				return errors.Trace(err1)
   328  			}
   329  			logutil.BgLogger().Info("prewrite encounters lock",
   330  				zap.Uint64("session", c.sessionID),
   331  				zap.Stringer("lock", lock))
   332  			locks = append(locks, lock)
   333  		}
   334  		start := time.Now()
   335  		msBeforeExpired, err := c.store.GetLockResolver().ResolveLocksForWrite(bo, c.startTS, c.forUpdateTS, locks)
   336  		if err != nil {
   337  			return errors.Trace(err)
   338  		}
   339  		atomic.AddInt64(&c.getDetail().ResolveLockTime, int64(time.Since(start)))
   340  		if msBeforeExpired > 0 {
   341  			err = bo.BackoffWithCfgAndMaxSleep(retry.BoTxnLock, int(msBeforeExpired), errors.Errorf("2PC prewrite lockedKeys: %d", len(locks)))
   342  			if err != nil {
   343  				return errors.Trace(err)
   344  			}
   345  		}
   346  	}
   347  }
   348  
   349  func (c *twoPhaseCommitter) prewriteMutations(bo *retry.Backoffer, mutations CommitterMutations) error {
   350  	if span := opentracing.SpanFromContext(bo.GetCtx()); span != nil && span.Tracer() != nil {
   351  		span1 := span.Tracer().StartSpan("twoPhaseCommitter.prewriteMutations", opentracing.ChildOf(span.Context()))
   352  		defer span1.Finish()
   353  		bo.SetCtx(opentracing.ContextWithSpan(bo.GetCtx(), span1))
   354  	}
   355  
   356  	// `doActionOnMutations` will unset `useOnePC` if the mutations is splitted into multiple batches.
   357  	return c.doActionOnMutations(bo, actionPrewrite{}, mutations)
   358  }