github.com/KinWaiYuen/client-go/v2@v2.5.4/txnkv/transaction/prewrite.go (about) 1 // Copyright 2021 TiKV Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // NOTE: The code in this file is based on code from the 16 // TiDB project, licensed under the Apache License v 2.0 17 // 18 // https://github.com/pingcap/tidb/tree/cc5e161ac06827589c4966674597c137cc9e809c/store/tikv/prewrite.go 19 // 20 21 // Copyright 2020 PingCAP, Inc. 22 // 23 // Licensed under the Apache License, Version 2.0 (the "License"); 24 // you may not use this file except in compliance with the License. 25 // You may obtain a copy of the License at 26 // 27 // http://www.apache.org/licenses/LICENSE-2.0 28 // 29 // Unless required by applicable law or agreed to in writing, software 30 // distributed under the License is distributed on an "AS IS" BASIS, 31 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 32 // See the License for the specific language governing permissions and 33 // limitations under the License. 34 35 package transaction 36 37 import ( 38 "encoding/hex" 39 "math" 40 "strconv" 41 "sync/atomic" 42 "time" 43 44 "github.com/KinWaiYuen/client-go/v2/config" 45 tikverr "github.com/KinWaiYuen/client-go/v2/error" 46 "github.com/KinWaiYuen/client-go/v2/internal/client" 47 "github.com/KinWaiYuen/client-go/v2/internal/locate" 48 "github.com/KinWaiYuen/client-go/v2/internal/logutil" 49 "github.com/KinWaiYuen/client-go/v2/internal/retry" 50 "github.com/KinWaiYuen/client-go/v2/metrics" 51 "github.com/KinWaiYuen/client-go/v2/tikvrpc" 52 "github.com/KinWaiYuen/client-go/v2/txnkv/txnlock" 53 "github.com/KinWaiYuen/client-go/v2/util" 54 "github.com/opentracing/opentracing-go" 55 "github.com/pingcap/errors" 56 "github.com/pingcap/kvproto/pkg/kvrpcpb" 57 "github.com/prometheus/client_golang/prometheus" 58 "go.uber.org/zap" 59 ) 60 61 type actionPrewrite struct{ retry bool } 62 63 var _ twoPhaseCommitAction = actionPrewrite{} 64 65 func (actionPrewrite) String() string { 66 return "prewrite" 67 } 68 69 func (actionPrewrite) tiKVTxnRegionsNumHistogram() prometheus.Observer { 70 return metrics.TxnRegionsNumHistogramPrewrite 71 } 72 73 func (c *twoPhaseCommitter) buildPrewriteRequest(batch batchMutations, txnSize uint64) *tikvrpc.Request { 74 m := batch.mutations 75 mutations := make([]*kvrpcpb.Mutation, m.Len()) 76 isPessimisticLock := make([]bool, m.Len()) 77 for i := 0; i < m.Len(); i++ { 78 mutations[i] = &kvrpcpb.Mutation{ 79 Op: m.GetOp(i), 80 Key: m.GetKey(i), 81 Value: m.GetValue(i), 82 } 83 isPessimisticLock[i] = m.IsPessimisticLock(i) 84 } 85 c.mu.Lock() 86 minCommitTS := c.minCommitTS 87 c.mu.Unlock() 88 if c.forUpdateTS > 0 && c.forUpdateTS >= minCommitTS { 89 minCommitTS = c.forUpdateTS + 1 90 } else if c.startTS >= minCommitTS { 91 minCommitTS = c.startTS + 1 92 } 93 94 if val, err := util.EvalFailpoint("mockZeroCommitTS"); err == nil { 95 // Should be val.(uint64) but failpoint doesn't support that. 96 if tmp, ok := val.(int); ok && uint64(tmp) == c.startTS { 97 minCommitTS = 0 98 } 99 } 100 101 ttl := c.lockTTL 102 103 if c.sessionID > 0 { 104 if _, err := util.EvalFailpoint("twoPCShortLockTTL"); err == nil { 105 ttl = 1 106 keys := make([]string, 0, len(mutations)) 107 for _, m := range mutations { 108 keys = append(keys, hex.EncodeToString(m.Key)) 109 } 110 logutil.BgLogger().Info("[failpoint] injected lock ttl = 1 on prewrite", 111 zap.Uint64("txnStartTS", c.startTS), zap.Strings("keys", keys)) 112 } 113 } 114 115 req := &kvrpcpb.PrewriteRequest{ 116 Mutations: mutations, 117 PrimaryLock: c.primary(), 118 StartVersion: c.startTS, 119 LockTtl: ttl, 120 IsPessimisticLock: isPessimisticLock, 121 ForUpdateTs: c.forUpdateTS, 122 TxnSize: txnSize, 123 MinCommitTs: minCommitTS, 124 MaxCommitTs: c.maxCommitTS, 125 } 126 127 if _, err := util.EvalFailpoint("invalidMaxCommitTS"); err == nil { 128 if req.MaxCommitTs > 0 { 129 req.MaxCommitTs = minCommitTS - 1 130 } 131 } 132 133 if c.isAsyncCommit() { 134 if batch.isPrimary { 135 req.Secondaries = c.asyncSecondaries() 136 } 137 req.UseAsyncCommit = true 138 } 139 140 if c.isOnePC() { 141 req.TryOnePc = true 142 } 143 144 return tikvrpc.NewRequest(tikvrpc.CmdPrewrite, req, 145 kvrpcpb.Context{Priority: c.priority, SyncLog: c.syncLog, ResourceGroupTag: c.resourceGroupTag, 146 DiskFullOpt: c.diskFullOpt, MaxExecutionDurationMs: uint64(client.MaxWriteExecutionTime.Milliseconds())}) 147 } 148 149 func (action actionPrewrite) handleSingleBatch(c *twoPhaseCommitter, bo *retry.Backoffer, batch batchMutations) (err error) { 150 // WARNING: This function only tries to send a single request to a single region, so it don't 151 // need to unset the `useOnePC` flag when it fails. A special case is that when TiKV returns 152 // regionErr, it's uncertain if the request will be splitted into multiple and sent to multiple 153 // regions. It invokes `prewriteMutations` recursively here, and the number of batches will be 154 // checked there. 155 156 if c.sessionID > 0 { 157 if batch.isPrimary { 158 if _, err := util.EvalFailpoint("prewritePrimaryFail"); err == nil { 159 // Delay to avoid cancelling other normally ongoing prewrite requests. 160 time.Sleep(time.Millisecond * 50) 161 logutil.Logger(bo.GetCtx()).Info("[failpoint] injected error on prewriting primary batch", 162 zap.Uint64("txnStartTS", c.startTS)) 163 return errors.New("injected error on prewriting primary batch") 164 } 165 util.EvalFailpoint("prewritePrimary") // for other failures like sleep or pause 166 } else { 167 if _, err := util.EvalFailpoint("prewriteSecondaryFail"); err == nil { 168 // Delay to avoid cancelling other normally ongoing prewrite requests. 169 time.Sleep(time.Millisecond * 50) 170 logutil.Logger(bo.GetCtx()).Info("[failpoint] injected error on prewriting secondary batch", 171 zap.Uint64("txnStartTS", c.startTS)) 172 return errors.New("injected error on prewriting secondary batch") 173 } 174 util.EvalFailpoint("prewriteSecondary") // for other failures like sleep or pause 175 } 176 } 177 178 txnSize := uint64(c.regionTxnSize[batch.region.GetID()]) 179 // When we retry because of a region miss, we don't know the transaction size. We set the transaction size here 180 // to MaxUint64 to avoid unexpected "resolve lock lite". 181 if action.retry { 182 txnSize = math.MaxUint64 183 } 184 185 tBegin := time.Now() 186 attempts := 0 187 188 req := c.buildPrewriteRequest(batch, txnSize) 189 sender := locate.NewRegionRequestSender(c.store.GetRegionCache(), c.store.GetTiKVClient()) 190 defer func() { 191 if err != nil { 192 // If we fail to receive response for async commit prewrite, it will be undetermined whether this 193 // transaction has been successfully committed. 194 // If prewrite has been cancelled, all ongoing prewrite RPCs will become errors, we needn't set undetermined 195 // errors. 196 if (c.isAsyncCommit() || c.isOnePC()) && sender.GetRPCError() != nil && atomic.LoadUint32(&c.prewriteCancelled) == 0 { 197 c.setUndeterminedErr(errors.Trace(sender.GetRPCError())) 198 } 199 } 200 }() 201 for { 202 attempts++ 203 if time.Since(tBegin) > slowRequestThreshold { 204 logutil.BgLogger().Warn("slow prewrite request", zap.Uint64("startTS", c.startTS), zap.Stringer("region", &batch.region), zap.Int("attempts", attempts)) 205 tBegin = time.Now() 206 } 207 208 resp, err := sender.SendReq(bo, req, batch.region, client.ReadTimeoutShort) 209 // Unexpected error occurs, return it 210 if err != nil { 211 return errors.Trace(err) 212 } 213 214 regionErr, err := resp.GetRegionError() 215 if err != nil { 216 return errors.Trace(err) 217 } 218 if regionErr != nil { 219 // For other region error and the fake region error, backoff because 220 // there's something wrong. 221 // For the real EpochNotMatch error, don't backoff. 222 if regionErr.GetEpochNotMatch() == nil || locate.IsFakeRegionError(regionErr) { 223 err = bo.Backoff(retry.BoRegionMiss, errors.New(regionErr.String())) 224 if err != nil { 225 return errors.Trace(err) 226 } 227 } 228 if regionErr.GetDiskFull() != nil { 229 storeIds := regionErr.GetDiskFull().GetStoreId() 230 desc := " " 231 for _, i := range storeIds { 232 desc += strconv.FormatUint(i, 10) + " " 233 } 234 235 logutil.Logger(bo.GetCtx()).Error("Request failed cause of TiKV disk full", 236 zap.String("store_id", desc), 237 zap.String("reason", regionErr.GetDiskFull().GetReason())) 238 239 return errors.Trace(errors.New(regionErr.String())) 240 } 241 same, err := batch.relocate(bo, c.store.GetRegionCache()) 242 if err != nil { 243 return errors.Trace(err) 244 } 245 if same { 246 continue 247 } 248 err = c.doActionOnMutations(bo, actionPrewrite{true}, batch.mutations) 249 return errors.Trace(err) 250 } 251 252 if resp.Resp == nil { 253 return errors.Trace(tikverr.ErrBodyMissing) 254 } 255 prewriteResp := resp.Resp.(*kvrpcpb.PrewriteResponse) 256 keyErrs := prewriteResp.GetErrors() 257 if len(keyErrs) == 0 { 258 // Clear the RPC Error since the request is evaluated successfully. 259 sender.SetRPCError(nil) 260 261 if batch.isPrimary { 262 // After writing the primary key, if the size of the transaction is larger than 32M, 263 // start the ttlManager. The ttlManager will be closed in tikvTxn.Commit(). 264 // In this case 1PC is not expected to be used, but still check it for safety. 265 if int64(c.txnSize) > config.GetGlobalConfig().TiKVClient.TTLRefreshedTxnSize && 266 prewriteResp.OnePcCommitTs == 0 { 267 c.run(c, nil) 268 } 269 } 270 271 if c.isOnePC() { 272 if prewriteResp.OnePcCommitTs == 0 { 273 if prewriteResp.MinCommitTs != 0 { 274 return errors.Trace(errors.New("MinCommitTs must be 0 when 1pc falls back to 2pc")) 275 } 276 logutil.Logger(bo.GetCtx()).Warn("1pc failed and fallbacks to normal commit procedure", 277 zap.Uint64("startTS", c.startTS)) 278 metrics.OnePCTxnCounterFallback.Inc() 279 c.setOnePC(false) 280 c.setAsyncCommit(false) 281 } else { 282 // For 1PC, there's no racing to access to access `onePCCommmitTS` so it's safe 283 // not to lock the mutex. 284 if c.onePCCommitTS != 0 { 285 logutil.Logger(bo.GetCtx()).Fatal("one pc happened multiple times", 286 zap.Uint64("startTS", c.startTS)) 287 } 288 c.onePCCommitTS = prewriteResp.OnePcCommitTs 289 } 290 return nil 291 } else if prewriteResp.OnePcCommitTs != 0 { 292 logutil.Logger(bo.GetCtx()).Fatal("tikv committed a non-1pc transaction with 1pc protocol", 293 zap.Uint64("startTS", c.startTS)) 294 } 295 if c.isAsyncCommit() { 296 // 0 if the min_commit_ts is not ready or any other reason that async 297 // commit cannot proceed. The client can then fallback to normal way to 298 // continue committing the transaction if prewrite are all finished. 299 if prewriteResp.MinCommitTs == 0 { 300 if c.testingKnobs.noFallBack { 301 return nil 302 } 303 logutil.Logger(bo.GetCtx()).Warn("async commit cannot proceed since the returned minCommitTS is zero, "+ 304 "fallback to normal path", zap.Uint64("startTS", c.startTS)) 305 c.setAsyncCommit(false) 306 } else { 307 c.mu.Lock() 308 if prewriteResp.MinCommitTs > c.minCommitTS { 309 c.minCommitTS = prewriteResp.MinCommitTs 310 } 311 c.mu.Unlock() 312 } 313 } 314 return nil 315 } 316 var locks []*txnlock.Lock 317 for _, keyErr := range keyErrs { 318 // Check already exists error 319 if alreadyExist := keyErr.GetAlreadyExist(); alreadyExist != nil { 320 e := &tikverr.ErrKeyExist{AlreadyExist: alreadyExist} 321 return c.extractKeyExistsErr(e) 322 } 323 324 // Extract lock from key error 325 lock, err1 := txnlock.ExtractLockFromKeyErr(keyErr) 326 if err1 != nil { 327 return errors.Trace(err1) 328 } 329 logutil.BgLogger().Info("prewrite encounters lock", 330 zap.Uint64("session", c.sessionID), 331 zap.Stringer("lock", lock)) 332 locks = append(locks, lock) 333 } 334 start := time.Now() 335 msBeforeExpired, err := c.store.GetLockResolver().ResolveLocksForWrite(bo, c.startTS, c.forUpdateTS, locks) 336 if err != nil { 337 return errors.Trace(err) 338 } 339 atomic.AddInt64(&c.getDetail().ResolveLockTime, int64(time.Since(start))) 340 if msBeforeExpired > 0 { 341 err = bo.BackoffWithCfgAndMaxSleep(retry.BoTxnLock, int(msBeforeExpired), errors.Errorf("2PC prewrite lockedKeys: %d", len(locks))) 342 if err != nil { 343 return errors.Trace(err) 344 } 345 } 346 } 347 } 348 349 func (c *twoPhaseCommitter) prewriteMutations(bo *retry.Backoffer, mutations CommitterMutations) error { 350 if span := opentracing.SpanFromContext(bo.GetCtx()); span != nil && span.Tracer() != nil { 351 span1 := span.Tracer().StartSpan("twoPhaseCommitter.prewriteMutations", opentracing.ChildOf(span.Context())) 352 defer span1.Finish() 353 bo.SetCtx(opentracing.ContextWithSpan(bo.GetCtx(), span1)) 354 } 355 356 // `doActionOnMutations` will unset `useOnePC` if the mutations is splitted into multiple batches. 357 return c.doActionOnMutations(bo, actionPrewrite{}, mutations) 358 }