github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/2pc.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package einsteindb 15 16 import ( 17 "bytes" 18 "context" 19 "math" 20 "strings" 21 "sync" 22 "sync/atomic" 23 "time" 24 "unsafe" 25 26 "github.com/prometheus/client_golang/prometheus" 27 "github.com/whtcorpsinc/BerolinaSQL/terror" 28 pb "github.com/whtcorpsinc/ekvproto/pkg/ekvrpcpb" 29 "github.com/whtcorpsinc/errors" 30 "github.com/whtcorpsinc/failpoint" 31 "github.com/whtcorpsinc/fidelpb/go-binlog" 32 "github.com/whtcorpsinc/milevadb/blockcodec" 33 "github.com/whtcorpsinc/milevadb/causetstore/einsteindb/einsteindbrpc" 34 "github.com/whtcorpsinc/milevadb/causetstore/einsteindb/oracle" 35 "github.com/whtcorpsinc/milevadb/config" 36 "github.com/whtcorpsinc/milevadb/ekv" 37 "github.com/whtcorpsinc/milevadb/metrics" 38 "github.com/whtcorpsinc/milevadb/soliton/execdetails" 39 "github.com/whtcorpsinc/milevadb/soliton/logutil" 40 "github.com/whtcorpsinc/milevadb/stochastikctx/binloginfo" 41 "go.uber.org/zap" 42 ) 43 44 type twoPhaseCommitCausetAction interface { 45 handleSingleBatch(*twoPhaseCommitter, *Backoffer, batchMutations) error 46 EinsteinDBTxnRegionsNumHistogram() prometheus.Observer 47 String() string 48 } 49 50 var ( 51 einsteindbSecondaryLockCleanupFailureCounterRollback = metrics.EinsteinDBSecondaryLockCleanupFailureCounter.WithLabelValues("rollback") 52 EinsteinDBTxnHeartBeatHistogramOK = metrics.EinsteinDBTxnHeartBeatHistogram.WithLabelValues("ok") 53 EinsteinDBTxnHeartBeatHistogramError = metrics.EinsteinDBTxnHeartBeatHistogram.WithLabelValues("err") 54 ) 55 56 // Global variable set by config file. 57 var ( 58 ManagedLockTTL uint64 = 20000 // 20s 59 ) 60 61 // metricsTag returns detail tag for metrics. 62 func metricsTag(action string) string { 63 return "2pc_" + action 64 } 65 66 // twoPhaseCommitter executes a two-phase commit protocol. 67 type twoPhaseCommitter struct { 68 causetstore *einsteindbStore 69 txn *einsteindbTxn 70 startTS uint64 71 mutations CommitterMutations 72 lockTTL uint64 73 commitTS uint64 74 priority pb.CommandPri 75 connID uint64 // connID is used for log. 76 cleanWg sync.WaitGroup 77 detail unsafe.Pointer 78 txnSize int 79 hasNoNeedCommitKeys bool 80 81 primaryKey []byte 82 forUFIDelateTS uint64 83 84 mu struct { 85 sync.RWMutex 86 undeterminedErr error // undeterminedErr saves the rpc error we encounter when commit primary key. 87 committed bool 88 } 89 syncLog bool 90 // For pessimistic transaction 91 isPessimistic bool 92 isFirstLock bool 93 // regionTxnSize stores the number of keys involved in each region 94 regionTxnSize map[uint64]int 95 // Used by pessimistic transaction and large transaction. 96 ttlManager 97 98 testingKnobs struct { 99 acAfterCommitPrimary chan struct{} 100 bkAfterCommitPrimary chan struct{} 101 noFallBack bool 102 } 103 104 useAsyncCommit uint32 105 minCommitTS uint64 106 } 107 108 // CommitterMutations contains transaction operations. 109 type CommitterMutations struct { 110 ops []pb.Op 111 keys [][]byte 112 values [][]byte 113 isPessimisticLock []bool 114 } 115 116 // NewCommiterMutations creates a CommitterMutations object with sizeHint reserved. 117 func NewCommiterMutations(sizeHint int) CommitterMutations { 118 return CommitterMutations{ 119 ops: make([]pb.Op, 0, sizeHint), 120 keys: make([][]byte, 0, sizeHint), 121 values: make([][]byte, 0, sizeHint), 122 isPessimisticLock: make([]bool, 0, sizeHint), 123 } 124 } 125 126 func (c *CommitterMutations) subRange(from, to int) CommitterMutations { 127 var res CommitterMutations 128 res.keys = c.keys[from:to] 129 if c.ops != nil { 130 res.ops = c.ops[from:to] 131 } 132 if c.values != nil { 133 res.values = c.values[from:to] 134 } 135 if c.isPessimisticLock != nil { 136 res.isPessimisticLock = c.isPessimisticLock[from:to] 137 } 138 return res 139 } 140 141 // Push another mutation into mutations. 142 func (c *CommitterMutations) Push(op pb.Op, key []byte, value []byte, isPessimisticLock bool) { 143 c.ops = append(c.ops, op) 144 c.keys = append(c.keys, key) 145 c.values = append(c.values, value) 146 c.isPessimisticLock = append(c.isPessimisticLock, isPessimisticLock) 147 } 148 149 func (c *CommitterMutations) len() int { 150 return len(c.keys) 151 } 152 153 // GetKeys returns the keys. 154 func (c *CommitterMutations) GetKeys() [][]byte { 155 return c.keys 156 } 157 158 // GetOps returns the key ops. 159 func (c *CommitterMutations) GetOps() []pb.Op { 160 return c.ops 161 } 162 163 // GetValues returns the key values. 164 func (c *CommitterMutations) GetValues() [][]byte { 165 return c.values 166 } 167 168 // GetPessimisticFlags returns the key pessimistic flags. 169 func (c *CommitterMutations) GetPessimisticFlags() []bool { 170 return c.isPessimisticLock 171 } 172 173 // MergeMutations append input mutations into current mutations. 174 func (c *CommitterMutations) MergeMutations(mutations CommitterMutations) { 175 c.ops = append(c.ops, mutations.ops...) 176 c.keys = append(c.keys, mutations.keys...) 177 c.values = append(c.values, mutations.values...) 178 c.isPessimisticLock = append(c.isPessimisticLock, mutations.isPessimisticLock...) 179 } 180 181 // newTwoPhaseCommitter creates a twoPhaseCommitter. 182 func newTwoPhaseCommitter(txn *einsteindbTxn, connID uint64) (*twoPhaseCommitter, error) { 183 return &twoPhaseCommitter{ 184 causetstore: txn.causetstore, 185 txn: txn, 186 startTS: txn.StartTS(), 187 connID: connID, 188 regionTxnSize: map[uint64]int{}, 189 ttlManager: ttlManager{ 190 ch: make(chan struct{}), 191 }, 192 isPessimistic: txn.IsPessimistic(), 193 }, nil 194 } 195 196 func (c *twoPhaseCommitter) extractKeyExistsErr(key ekv.Key) error { 197 if !c.txn.us.HasPresumeKeyNotExists(key) { 198 return errors.Errorf("conn %d, existErr for key:%s should not be nil", c.connID, key) 199 } 200 201 _, handle, err := blockcodec.DecodeRecordKey(key) 202 if err == nil { 203 if handle.IsInt() { 204 return ekv.ErrKeyExists.FastGenByArgs(handle.String(), "PRIMARY") 205 } 206 trimLen := 0 207 for i := 0; i < handle.NumDefCauss(); i++ { 208 trimLen += len(handle.EncodedDefCaus(i)) 209 } 210 values, err := blockcodec.DecodeValuesBytesToStrings(handle.Encoded()[:trimLen]) 211 if err == nil { 212 return ekv.ErrKeyExists.FastGenByArgs(strings.Join(values, "-"), "PRIMARY") 213 } 214 } 215 216 blockID, indexID, indexValues, err := blockcodec.DecodeIndexKey(key) 217 if err == nil { 218 return ekv.ErrKeyExists.FastGenByArgs(strings.Join(indexValues, "-"), c.txn.us.GetIndexName(blockID, indexID)) 219 } 220 221 return ekv.ErrKeyExists.FastGenByArgs(key.String(), "UNKNOWN") 222 } 223 224 func (c *twoPhaseCommitter) initKeysAndMutations() error { 225 var size, putCnt, delCnt, lockCnt, checkCnt int 226 227 txn := c.txn 228 memBuf := txn.GetMemBuffer() 229 sizeHint := txn.us.GetMemBuffer().Len() 230 mutations := NewCommiterMutations(sizeHint) 231 c.isPessimistic = txn.IsPessimistic() 232 233 var err error 234 for it := memBuf.IterWithFlags(nil, nil); it.Valid(); err = it.Next() { 235 _ = err 236 key := it.Key() 237 flags := it.Flags() 238 var value []byte 239 var op pb.Op 240 241 if !it.HasValue() { 242 if !flags.HasLocked() { 243 continue 244 } 245 op = pb.Op_Lock 246 lockCnt++ 247 } else { 248 value = it.Value() 249 if len(value) > 0 { 250 if blockcodec.IsUntouchedIndexKValue(key, value) { 251 continue 252 } 253 op = pb.Op_Put 254 if flags.HasPresumeKeyNotExists() { 255 op = pb.Op_Insert 256 } 257 putCnt++ 258 } else { 259 if !txn.IsPessimistic() && flags.HasPresumeKeyNotExists() { 260 // delete-your-writes keys in optimistic txn need check not exists in prewrite-phase 261 // due to `Op_CheckNotExists` doesn't prewrite dagger, so mark those keys should not be used in commit-phase. 262 op = pb.Op_CheckNotExists 263 checkCnt++ 264 memBuf.UFIDelateFlags(key, ekv.SetNoNeedCommit) 265 } else { 266 // normal delete keys in optimistic txn can be delete without not exists checking 267 // delete-your-writes keys in pessimistic txn can ensure must be no exists so can directly delete them 268 op = pb.Op_Del 269 delCnt++ 270 } 271 } 272 } 273 274 var isPessimistic bool 275 if flags.HasLocked() { 276 isPessimistic = c.isPessimistic 277 } 278 mutations.Push(op, key, value, isPessimistic) 279 size += len(key) + len(value) 280 281 if len(c.primaryKey) == 0 && op != pb.Op_CheckNotExists { 282 c.primaryKey = key 283 } 284 } 285 286 if mutations.len() == 0 { 287 return nil 288 } 289 c.txnSize = size 290 291 if size > int(ekv.TxnTotalSizeLimit) { 292 return ekv.ErrTxnTooLarge.GenWithStackByArgs(size) 293 } 294 const logEntryCount = 10000 295 const logSize = 4 * 1024 * 1024 // 4MB 296 if mutations.len() > logEntryCount || size > logSize { 297 blockID := blockcodec.DecodeTableID(mutations.keys[0]) 298 logutil.BgLogger().Info("[BIG_TXN]", 299 zap.Uint64("con", c.connID), 300 zap.Int64("causet ID", blockID), 301 zap.Int("size", size), 302 zap.Int("keys", mutations.len()), 303 zap.Int("puts", putCnt), 304 zap.Int("dels", delCnt), 305 zap.Int("locks", lockCnt), 306 zap.Int("checks", checkCnt), 307 zap.Uint64("txnStartTS", txn.startTS)) 308 } 309 310 // Sanity check for startTS. 311 if txn.StartTS() == math.MaxUint64 { 312 err = errors.Errorf("try to commit with invalid txnStartTS: %d", txn.StartTS()) 313 logutil.BgLogger().Error("commit failed", 314 zap.Uint64("conn", c.connID), 315 zap.Error(err)) 316 return errors.Trace(err) 317 } 318 319 commitDetail := &execdetails.CommitDetails{WriteSize: size, WriteKeys: mutations.len()} 320 metrics.EinsteinDBTxnWriteKVCountHistogram.Observe(float64(commitDetail.WriteKeys)) 321 metrics.EinsteinDBTxnWriteSizeHistogram.Observe(float64(commitDetail.WriteSize)) 322 c.hasNoNeedCommitKeys = checkCnt > 0 323 c.mutations = mutations 324 c.lockTTL = txnLockTTL(txn.startTime, size) 325 c.priority = getTxnPriority(txn) 326 c.syncLog = getTxnSyncLog(txn) 327 c.setDetail(commitDetail) 328 return nil 329 } 330 331 func (c *twoPhaseCommitter) primary() []byte { 332 if len(c.primaryKey) == 0 { 333 return c.mutations.keys[0] 334 } 335 return c.primaryKey 336 } 337 338 // asyncSecondaries returns all keys that must be checked in the recovery phase of an async commit. 339 func (c *twoPhaseCommitter) asyncSecondaries() [][]byte { 340 secondaries := make([][]byte, 0, len(c.mutations.keys)) 341 for i, k := range c.mutations.keys { 342 if bytes.Equal(k, c.primary()) || c.mutations.ops[i] == pb.Op_CheckNotExists { 343 continue 344 } 345 secondaries = append(secondaries, k) 346 } 347 return secondaries 348 } 349 350 const bytesPerMiB = 1024 * 1024 351 352 func txnLockTTL(startTime time.Time, txnSize int) uint64 { 353 // Increase lockTTL for large transactions. 354 // The formula is `ttl = ttlFactor * sqrt(sizeInMiB)`. 355 // When writeSize is less than 256KB, the base ttl is defaultTTL (3s); 356 // When writeSize is 1MiB, 4MiB, or 10MiB, ttl is 6s, 12s, 20s correspondingly; 357 lockTTL := defaultLockTTL 358 if txnSize >= txnCommitBatchSize { 359 sizeMiB := float64(txnSize) / bytesPerMiB 360 lockTTL = uint64(float64(ttlFactor) * math.Sqrt(sizeMiB)) 361 if lockTTL < defaultLockTTL { 362 lockTTL = defaultLockTTL 363 } 364 if lockTTL > ManagedLockTTL { 365 lockTTL = ManagedLockTTL 366 } 367 } 368 369 // Increase lockTTL by the transaction's read time. 370 // When resolving a dagger, we compare current ts and startTS+lockTTL to decide whether to clean up. If a txn 371 // takes a long time to read, increasing its TTL will help to prevent it from been aborted soon after prewrite. 372 elapsed := time.Since(startTime) / time.Millisecond 373 return lockTTL + uint64(elapsed) 374 } 375 376 var preSplitDetectThreshold uint32 = 100000 377 var preSplitSizeThreshold uint32 = 32 << 20 378 379 // doCausetActionOnMutations groups keys into primary batch and secondary batches, if primary batch exists in the key, 380 // it does action on primary batch first, then on secondary batches. If action is commit, secondary batches 381 // is done in background goroutine. 382 func (c *twoPhaseCommitter) doCausetActionOnMutations(bo *Backoffer, action twoPhaseCommitCausetAction, mutations CommitterMutations) error { 383 if mutations.len() == 0 { 384 return nil 385 } 386 groups, err := c.groupMutations(bo, mutations) 387 if err != nil { 388 return errors.Trace(err) 389 } 390 391 return c.doCausetActionOnGroupMutations(bo, action, groups) 392 } 393 394 // groupMutations groups mutations by region, then checks for any large groups and in that case pre-splits the region. 395 func (c *twoPhaseCommitter) groupMutations(bo *Backoffer, mutations CommitterMutations) ([]groupedMutations, error) { 396 groups, err := c.causetstore.regionCache.GroupSortedMutationsByRegion(bo, mutations) 397 if err != nil { 398 return nil, errors.Trace(err) 399 } 400 401 // Pre-split regions to avoid too much write workload into a single region. 402 // In the large transaction case, this operation is important to avoid EinsteinDB 'server is busy' error. 403 var didPreSplit bool 404 preSplitDetectThresholdVal := atomic.LoadUint32(&preSplitDetectThreshold) 405 for _, group := range groups { 406 if uint32(group.mutations.len()) >= preSplitDetectThresholdVal { 407 logutil.BgLogger().Info("2PC detect large amount of mutations on a single region", 408 zap.Uint64("region", group.region.GetID()), 409 zap.Int("mutations count", group.mutations.len())) 410 // Use context.Background, this time should not add up to Backoffer. 411 if c.causetstore.preSplitRegion(context.Background(), group) { 412 didPreSplit = true 413 } 414 } 415 } 416 // Reload region cache again. 417 if didPreSplit { 418 groups, err = c.causetstore.regionCache.GroupSortedMutationsByRegion(bo, mutations) 419 if err != nil { 420 return nil, errors.Trace(err) 421 } 422 } 423 424 return groups, nil 425 } 426 427 // doCausetActionOnGroupedMutations splits groups into batches (there is one group per region, and potentially many batches per group, but all mutations 428 // in a batch will belong to the same region). 429 func (c *twoPhaseCommitter) doCausetActionOnGroupMutations(bo *Backoffer, action twoPhaseCommitCausetAction, groups []groupedMutations) error { 430 action.EinsteinDBTxnRegionsNumHistogram().Observe(float64(len(groups))) 431 432 var sizeFunc = c.keySize 433 434 switch act := action.(type) { 435 case actionPrewrite: 436 // Do not uFIDelate regionTxnSize on retries. They are not used when building a PrewriteRequest. 437 if len(bo.errors) == 0 { 438 for _, group := range groups { 439 c.regionTxnSize[group.region.id] = group.mutations.len() 440 } 441 } 442 sizeFunc = c.keyValueSize 443 atomic.AddInt32(&c.getDetail().PrewriteRegionNum, int32(len(groups))) 444 case actionPessimisticLock: 445 if act.LockCtx.Stats != nil { 446 act.LockCtx.Stats.RegionNum = int32(len(groups)) 447 } 448 } 449 450 batchBuilder := newBatched(c.primary()) 451 for _, group := range groups { 452 batchBuilder.appendBatchMutationsBySize(group.region, group.mutations, sizeFunc, txnCommitBatchSize) 453 } 454 firstIsPrimary := batchBuilder.setPrimary() 455 456 actionCommit, actionIsCommit := action.(actionCommit) 457 _, actionIsCleanup := action.(actionCleanup) 458 _, actionIsPessimiticLock := action.(actionPessimisticLock) 459 460 var err error 461 failpoint.Inject("skipKeyReturnOK", func(val failpoint.Value) { 462 valStr, ok := val.(string) 463 if ok && c.connID > 0 { 464 if firstIsPrimary && actionIsPessimiticLock { 465 logutil.Logger(bo.ctx).Warn("pessimisticLock failpoint", zap.String("valStr", valStr)) 466 switch valStr { 467 case "pessimisticLockSkipPrimary": 468 err = c.doCausetActionOnBatches(bo, action, batchBuilder.allBatches()) 469 failpoint.Return(err) 470 case "pessimisticLockSkipSecondary": 471 err = c.doCausetActionOnBatches(bo, action, batchBuilder.primaryBatch()) 472 failpoint.Return(err) 473 } 474 } 475 } 476 }) 477 failpoint.Inject("pessimisticRollbackDoNth", func() { 478 _, actionIsPessimisticRollback := action.(actionPessimisticRollback) 479 if actionIsPessimisticRollback && c.connID > 0 { 480 logutil.Logger(bo.ctx).Warn("pessimisticRollbackDoNth failpoint") 481 failpoint.Return(nil) 482 } 483 }) 484 485 if firstIsPrimary && 486 ((actionIsCommit && !c.isAsyncCommit()) || actionIsCleanup || actionIsPessimiticLock) { 487 // primary should be committed(not async commit)/cleanup/pessimistically locked first 488 err = c.doCausetActionOnBatches(bo, action, batchBuilder.primaryBatch()) 489 if err != nil { 490 return errors.Trace(err) 491 } 492 if actionIsCommit && c.testingKnobs.bkAfterCommitPrimary != nil && c.testingKnobs.acAfterCommitPrimary != nil { 493 c.testingKnobs.acAfterCommitPrimary <- struct{}{} 494 <-c.testingKnobs.bkAfterCommitPrimary 495 } 496 batchBuilder.forgetPrimary() 497 } 498 // Already spawned a goroutine for async commit transaction. 499 if actionIsCommit && !actionCommit.retry && !c.isAsyncCommit() { 500 secondaryBo := NewBackofferWithVars(context.Background(), int(atomic.LoadUint64(&CommitMaxBackoff)), c.txn.vars) 501 go func() { 502 e := c.doCausetActionOnBatches(secondaryBo, action, batchBuilder.allBatches()) 503 if e != nil { 504 logutil.BgLogger().Debug("2PC async doCausetActionOnBatches", 505 zap.Uint64("conn", c.connID), 506 zap.Stringer("action type", action), 507 zap.Error(e)) 508 einsteindbSecondaryLockCleanupFailureCounterCommit.Inc() 509 } 510 }() 511 } else { 512 err = c.doCausetActionOnBatches(bo, action, batchBuilder.allBatches()) 513 } 514 return errors.Trace(err) 515 } 516 517 // doCausetActionOnBatches does action to batches in parallel. 518 func (c *twoPhaseCommitter) doCausetActionOnBatches(bo *Backoffer, action twoPhaseCommitCausetAction, batches []batchMutations) error { 519 if len(batches) == 0 { 520 return nil 521 } 522 523 noNeedFork := len(batches) == 1 524 if !noNeedFork { 525 if ac, ok := action.(actionCommit); ok && ac.retry { 526 noNeedFork = true 527 } 528 } 529 if noNeedFork { 530 for _, b := range batches { 531 e := action.handleSingleBatch(c, bo, b) 532 if e != nil { 533 logutil.BgLogger().Debug("2PC doCausetActionOnBatches failed", 534 zap.Uint64("conn", c.connID), 535 zap.Stringer("action type", action), 536 zap.Error(e), 537 zap.Uint64("txnStartTS", c.startTS)) 538 return errors.Trace(e) 539 } 540 } 541 return nil 542 } 543 rateLim := len(batches) 544 // Set rateLim here for the large transaction. 545 // If the rate limit is too high, einsteindb will report service is busy. 546 // If the rate limit is too low, we can't full utilize the einsteindb's throughput. 547 // TODO: Find a self-adaptive way to control the rate limit here. 548 if rateLim > config.GetGlobalConfig().Performance.CommitterConcurrency { 549 rateLim = config.GetGlobalConfig().Performance.CommitterConcurrency 550 } 551 batchInterlockingDirectorate := newBatchInterlockingDirectorate(rateLim, c, action, bo) 552 err := batchInterlockingDirectorate.process(batches) 553 return errors.Trace(err) 554 } 555 556 func (c *twoPhaseCommitter) keyValueSize(key, value []byte) int { 557 return len(key) + len(value) 558 } 559 560 func (c *twoPhaseCommitter) keySize(key, value []byte) int { 561 return len(key) 562 } 563 564 type ttlManagerState uint32 565 566 const ( 567 stateUninitialized ttlManagerState = iota 568 stateRunning 569 stateClosed 570 ) 571 572 type ttlManager struct { 573 state ttlManagerState 574 ch chan struct{} 575 lockCtx *ekv.LockCtx 576 } 577 578 func (tm *ttlManager) run(c *twoPhaseCommitter, lockCtx *ekv.LockCtx) { 579 // Run only once. 580 if !atomic.CompareAndSwapUint32((*uint32)(&tm.state), uint32(stateUninitialized), uint32(stateRunning)) { 581 return 582 } 583 tm.lockCtx = lockCtx 584 go tm.keepAlive(c) 585 } 586 587 func (tm *ttlManager) close() { 588 if !atomic.CompareAndSwapUint32((*uint32)(&tm.state), uint32(stateRunning), uint32(stateClosed)) { 589 return 590 } 591 close(tm.ch) 592 } 593 594 func (tm *ttlManager) keepAlive(c *twoPhaseCommitter) { 595 // Ticker is set to 1/2 of the ManagedLockTTL. 596 ticker := time.NewTicker(time.Duration(atomic.LoadUint64(&ManagedLockTTL)) * time.Millisecond / 2) 597 defer ticker.Stop() 598 for { 599 select { 600 case <-tm.ch: 601 return 602 case <-ticker.C: 603 // If kill signal is received, the ttlManager should exit. 604 if tm.lockCtx != nil && tm.lockCtx.Killed != nil && atomic.LoadUint32(tm.lockCtx.Killed) != 0 { 605 return 606 } 607 bo := NewBackofferWithVars(context.Background(), pessimisticLockMaxBackoff, c.txn.vars) 608 now, err := c.causetstore.GetOracle().GetTimestamp(bo.ctx) 609 if err != nil { 610 err1 := bo.Backoff(BoFIDelRPC, err) 611 if err1 != nil { 612 logutil.Logger(bo.ctx).Warn("keepAlive get tso fail", 613 zap.Error(err)) 614 return 615 } 616 continue 617 } 618 619 uptime := uint64(oracle.ExtractPhysical(now) - oracle.ExtractPhysical(c.startTS)) 620 if uptime > config.GetGlobalConfig().Performance.MaxTxnTTL { 621 // Checks maximum lifetime for the ttlManager, so when something goes wrong 622 // the key will not be locked forever. 623 logutil.Logger(bo.ctx).Info("ttlManager live up to its lifetime", 624 zap.Uint64("txnStartTS", c.startTS), 625 zap.Uint64("uptime", uptime), 626 zap.Uint64("maxTxnTTL", config.GetGlobalConfig().Performance.MaxTxnTTL)) 627 metrics.EinsteinDBTTLLifeTimeReachCounter.Inc() 628 // the pessimistic locks may expire if the ttl manager has timed out, set `LockExpired` flag 629 // so that this transaction could only commit or rollback with no more memex executions 630 if c.isPessimistic && tm.lockCtx != nil && tm.lockCtx.LockExpired != nil { 631 atomic.StoreUint32(tm.lockCtx.LockExpired, 1) 632 } 633 return 634 } 635 636 newTTL := uptime + atomic.LoadUint64(&ManagedLockTTL) 637 logutil.Logger(bo.ctx).Info("send TxnHeartBeat", 638 zap.Uint64("startTS", c.startTS), zap.Uint64("newTTL", newTTL)) 639 startTime := time.Now() 640 _, err = sendTxnHeartBeat(bo, c.causetstore, c.primary(), c.startTS, newTTL) 641 if err != nil { 642 EinsteinDBTxnHeartBeatHistogramError.Observe(time.Since(startTime).Seconds()) 643 logutil.Logger(bo.ctx).Warn("send TxnHeartBeat failed", 644 zap.Error(err), 645 zap.Uint64("txnStartTS", c.startTS)) 646 return 647 } 648 EinsteinDBTxnHeartBeatHistogramOK.Observe(time.Since(startTime).Seconds()) 649 } 650 } 651 } 652 653 func sendTxnHeartBeat(bo *Backoffer, causetstore *einsteindbStore, primary []byte, startTS, ttl uint64) (uint64, error) { 654 req := einsteindbrpc.NewRequest(einsteindbrpc.CmdTxnHeartBeat, &pb.TxnHeartBeatRequest{ 655 PrimaryLock: primary, 656 StartVersion: startTS, 657 AdviseLockTtl: ttl, 658 }) 659 for { 660 loc, err := causetstore.GetRegionCache().LocateKey(bo, primary) 661 if err != nil { 662 return 0, errors.Trace(err) 663 } 664 resp, err := causetstore.SendReq(bo, req, loc.Region, readTimeoutShort) 665 if err != nil { 666 return 0, errors.Trace(err) 667 } 668 regionErr, err := resp.GetRegionError() 669 if err != nil { 670 return 0, errors.Trace(err) 671 } 672 if regionErr != nil { 673 err = bo.Backoff(BoRegionMiss, errors.New(regionErr.String())) 674 if err != nil { 675 return 0, errors.Trace(err) 676 } 677 continue 678 } 679 if resp.Resp == nil { 680 return 0, errors.Trace(ErrBodyMissing) 681 } 682 cmdResp := resp.Resp.(*pb.TxnHeartBeatResponse) 683 if keyErr := cmdResp.GetError(); keyErr != nil { 684 return 0, errors.Errorf("txn %d heartbeat fail, primary key = %v, err = %s", startTS, primary, keyErr.Abort) 685 } 686 return cmdResp.GetLockTtl(), nil 687 } 688 } 689 690 // checkAsyncCommit checks if async commit protocol is available for current transaction commit, true is returned if possible. 691 func (c *twoPhaseCommitter) checkAsyncCommit() bool { 692 // TODO the keys limit need more tests, this value makes the unit test pass by now. 693 // Async commit is not compatible with Binlog because of the non unique timestamp issue. 694 if c.connID > 0 && config.GetGlobalConfig().EinsteinDBClient.EnableAsyncCommit && 695 uint(len(c.mutations.keys)) <= config.GetGlobalConfig().EinsteinDBClient.AsyncCommitKeysLimit && 696 !c.shouldWriteBinlog() { 697 return true 698 } 699 return false 700 } 701 702 func (c *twoPhaseCommitter) isAsyncCommit() bool { 703 return atomic.LoadUint32(&c.useAsyncCommit) > 0 704 } 705 706 func (c *twoPhaseCommitter) setAsyncCommit(val bool) { 707 if val { 708 atomic.StoreUint32(&c.useAsyncCommit, 1) 709 } else { 710 atomic.StoreUint32(&c.useAsyncCommit, 0) 711 } 712 } 713 714 func (c *twoPhaseCommitter) cleanup(ctx context.Context) { 715 c.cleanWg.Add(1) 716 go func() { 717 cleanupKeysCtx := context.WithValue(context.Background(), txnStartKey, ctx.Value(txnStartKey)) 718 err := c.cleanupMutations(NewBackofferWithVars(cleanupKeysCtx, cleanupMaxBackoff, c.txn.vars), c.mutations) 719 if err != nil { 720 einsteindbSecondaryLockCleanupFailureCounterRollback.Inc() 721 logutil.Logger(ctx).Info("2PC cleanup failed", 722 zap.Error(err), 723 zap.Uint64("txnStartTS", c.startTS)) 724 } else { 725 logutil.Logger(ctx).Info("2PC clean up done", 726 zap.Uint64("txnStartTS", c.startTS)) 727 } 728 c.cleanWg.Done() 729 }() 730 } 731 732 // execute executes the two-phase commit protocol. 733 func (c *twoPhaseCommitter) execute(ctx context.Context) (err error) { 734 var binlogSkipped bool 735 defer func() { 736 if !c.isAsyncCommit() { 737 // Always clean up all written keys if the txn does not commit. 738 c.mu.RLock() 739 committed := c.mu.committed 740 undetermined := c.mu.undeterminedErr != nil 741 c.mu.RUnlock() 742 if !committed && !undetermined { 743 c.cleanup(ctx) 744 } 745 c.txn.commitTS = c.commitTS 746 if binlogSkipped { 747 binloginfo.RemoveOneSkippedCommitter() 748 } else { 749 if err != nil { 750 c.writeFinishBinlog(ctx, binlog.BinlogType_Rollback, 0) 751 } else { 752 c.writeFinishBinlog(ctx, binlog.BinlogType_Commit, int64(c.commitTS)) 753 } 754 } 755 } else { 756 // The error means the async commit should not succeed. 757 if err != nil { 758 c.cleanup(ctx) 759 } 760 } 761 }() 762 763 // Check async commit is available or not. 764 if c.checkAsyncCommit() { 765 c.setAsyncCommit(true) 766 } 767 768 binlogChan := c.prewriteBinlog(ctx) 769 prewriteBo := NewBackofferWithVars(ctx, PrewriteMaxBackoff, c.txn.vars) 770 start := time.Now() 771 err = c.prewriteMutations(prewriteBo, c.mutations) 772 commitDetail := c.getDetail() 773 commitDetail.PrewriteTime = time.Since(start) 774 if prewriteBo.totalSleep > 0 { 775 atomic.AddInt64(&commitDetail.CommitBackoffTime, int64(prewriteBo.totalSleep)*int64(time.Millisecond)) 776 commitDetail.Mu.Lock() 777 commitDetail.Mu.BackoffTypes = append(commitDetail.Mu.BackoffTypes, prewriteBo.types...) 778 commitDetail.Mu.Unlock() 779 } 780 if binlogChan != nil { 781 startWaitBinlog := time.Now() 782 binlogWriteResult := <-binlogChan 783 commitDetail.WaitPrewriteBinlogTime = time.Since(startWaitBinlog) 784 if binlogWriteResult != nil { 785 binlogSkipped = binlogWriteResult.Skipped() 786 binlogErr := binlogWriteResult.GetError() 787 if binlogErr != nil { 788 return binlogErr 789 } 790 } 791 } 792 if err != nil { 793 logutil.Logger(ctx).Debug("2PC failed on prewrite", 794 zap.Error(err), 795 zap.Uint64("txnStartTS", c.startTS)) 796 return errors.Trace(err) 797 } 798 799 // strip check_not_exists keys that no need to commit. 800 c.stripNoNeedCommitKeys() 801 802 var commitTS uint64 803 if c.isAsyncCommit() { 804 if c.minCommitTS == 0 { 805 err = errors.Errorf("conn %d invalid minCommitTS for async commit protocol after prewrite, startTS=%v", c.connID, c.startTS) 806 return errors.Trace(err) 807 } 808 commitTS = c.minCommitTS 809 } else { 810 start = time.Now() 811 logutil.Event(ctx, "start get commit ts") 812 commitTS, err = c.causetstore.getTimestampWithRetry(NewBackofferWithVars(ctx, tsoMaxBackoff, c.txn.vars)) 813 if err != nil { 814 logutil.Logger(ctx).Warn("2PC get commitTS failed", 815 zap.Error(err), 816 zap.Uint64("txnStartTS", c.startTS)) 817 return errors.Trace(err) 818 } 819 commitDetail.GetCommitTsTime = time.Since(start) 820 logutil.Event(ctx, "finish get commit ts") 821 logutil.SetTag(ctx, "commitTs", commitTS) 822 } 823 824 tryAmend := c.isPessimistic && c.connID > 0 && !c.isAsyncCommit() && c.txn.schemaAmender != nil 825 if !tryAmend { 826 _, _, err = c.checkSchemaValid(ctx, commitTS, c.txn.txnSchemaReplicant, false) 827 if err != nil { 828 return errors.Trace(err) 829 } 830 } else { 831 relatedSchemaChange, memAmended, err := c.checkSchemaValid(ctx, commitTS, c.txn.txnSchemaReplicant, true) 832 if err != nil { 833 return errors.Trace(err) 834 } 835 if memAmended { 836 // Get new commitTS and check schemaReplicant valid again. 837 newCommitTS, err := c.getCommitTS(ctx, commitDetail) 838 if err != nil { 839 return errors.Trace(err) 840 } 841 // If schemaReplicant check failed between commitTS and newCommitTs, report schemaReplicant change error. 842 _, _, err = c.checkSchemaValid(ctx, newCommitTS, relatedSchemaChange.LatestSchemaReplicant, false) 843 if err != nil { 844 return errors.Trace(err) 845 } 846 commitTS = newCommitTS 847 } 848 } 849 c.commitTS = commitTS 850 851 if c.causetstore.oracle.IsExpired(c.startTS, ekv.MaxTxnTimeUse) { 852 err = errors.Errorf("conn %d txn takes too much time, txnStartTS: %d, comm: %d", 853 c.connID, c.startTS, c.commitTS) 854 return err 855 } 856 857 if c.connID > 0 { 858 failpoint.Inject("beforeCommit", func() {}) 859 } 860 861 if c.isAsyncCommit() { 862 // For async commit protocol, the commit is considered success here. 863 c.txn.commitTS = c.commitTS 864 logutil.Logger(ctx).Info("2PC will use async commit protocol to commit this txn", zap.Uint64("startTS", c.startTS), 865 zap.Uint64("commitTS", c.commitTS)) 866 go func() { 867 failpoint.Inject("asyncCommitDoNothing", func() { 868 failpoint.Return() 869 }) 870 defer c.ttlManager.close() 871 commitBo := NewBackofferWithVars(ctx, int(atomic.LoadUint64(&CommitMaxBackoff)), c.txn.vars) 872 err := c.commitMutations(commitBo, c.mutations) 873 if err != nil { 874 logutil.Logger(ctx).Warn("2PC async commit failed", zap.Uint64("connID", c.connID), 875 zap.Uint64("startTS", c.startTS), zap.Uint64("commitTS", c.commitTS), zap.Error(err)) 876 } 877 }() 878 return nil 879 } 880 return c.commitTxn(ctx, commitDetail) 881 } 882 883 func (c *twoPhaseCommitter) commitTxn(ctx context.Context, commitDetail *execdetails.CommitDetails) error { 884 c.mutations.values = nil 885 c.txn.GetMemBuffer().DiscardValues() 886 start := time.Now() 887 888 commitBo := NewBackofferWithVars(ctx, int(atomic.LoadUint64(&CommitMaxBackoff)), c.txn.vars) 889 err := c.commitMutations(commitBo, c.mutations) 890 commitDetail.CommitTime = time.Since(start) 891 if commitBo.totalSleep > 0 { 892 atomic.AddInt64(&commitDetail.CommitBackoffTime, int64(commitBo.totalSleep)*int64(time.Millisecond)) 893 commitDetail.Mu.Lock() 894 commitDetail.Mu.BackoffTypes = append(commitDetail.Mu.BackoffTypes, commitBo.types...) 895 commitDetail.Mu.Unlock() 896 } 897 if err != nil { 898 if undeterminedErr := c.getUndeterminedErr(); undeterminedErr != nil { 899 logutil.Logger(ctx).Error("2PC commit result undetermined", 900 zap.Error(err), 901 zap.NamedError("rpcErr", undeterminedErr), 902 zap.Uint64("txnStartTS", c.startTS)) 903 err = errors.Trace(terror.ErrResultUndetermined) 904 } 905 if !c.mu.committed { 906 logutil.Logger(ctx).Debug("2PC failed on commit", 907 zap.Error(err), 908 zap.Uint64("txnStartTS", c.startTS)) 909 return errors.Trace(err) 910 } 911 logutil.Logger(ctx).Debug("got some exceptions, but 2PC was still successful", 912 zap.Error(err), 913 zap.Uint64("txnStartTS", c.startTS)) 914 } 915 return nil 916 } 917 918 func (c *twoPhaseCommitter) stripNoNeedCommitKeys() { 919 if !c.hasNoNeedCommitKeys { 920 return 921 } 922 m := &c.mutations 923 var newIdx int 924 for oldIdx := range m.keys { 925 key := m.keys[oldIdx] 926 flags, err := c.txn.GetMemBuffer().GetFlags(key) 927 if err == nil && flags.HasNoNeedCommit() { 928 continue 929 } 930 m.keys[newIdx] = key 931 if m.ops != nil { 932 m.ops[newIdx] = m.ops[oldIdx] 933 } 934 if m.values != nil { 935 m.values[newIdx] = m.values[oldIdx] 936 } 937 if m.isPessimisticLock != nil { 938 m.isPessimisticLock[newIdx] = m.isPessimisticLock[oldIdx] 939 } 940 newIdx++ 941 } 942 c.mutations = m.subRange(0, newIdx) 943 } 944 945 // SchemaVer is the schemaReplicant which will return the schemaReplicant version. 946 type SchemaVer interface { 947 // SchemaMetaVersion returns the spacetime schemaReplicant version. 948 SchemaMetaVersion() int64 949 } 950 951 type schemaLeaseChecker interface { 952 // CheckBySchemaVer checks if the schemaReplicant has changed for the transaction related blocks between the startSchemaVer 953 // and the schemaReplicant version at txnTS, all the related schemaReplicant changes will be returned. 954 CheckBySchemaVer(txnTS uint64, startSchemaVer SchemaVer) (*RelatedSchemaChange, error) 955 } 956 957 // RelatedSchemaChange contains information about schemaReplicant diff between two schemaReplicant versions. 958 type RelatedSchemaChange struct { 959 PhyTblIDS []int64 960 CausetActionTypes []uint64 961 LatestSchemaReplicant SchemaVer 962 Amendable bool 963 } 964 965 func (c *twoPhaseCommitter) tryAmendTxn(ctx context.Context, startSchemaReplicant SchemaVer, change *RelatedSchemaChange) (bool, error) { 966 addMutations, err := c.txn.schemaAmender.AmendTxn(ctx, startSchemaReplicant, change, c.mutations) 967 if err != nil { 968 return false, err 969 } 970 // Prewrite new mutations. 971 if addMutations != nil && len(addMutations.keys) > 0 { 972 prewriteBo := NewBackofferWithVars(ctx, PrewriteMaxBackoff, c.txn.vars) 973 err = c.prewriteMutations(prewriteBo, *addMutations) 974 if err != nil { 975 logutil.Logger(ctx).Warn("amend prewrite has failed", zap.Error(err), zap.Uint64("txnStartTS", c.startTS)) 976 return false, err 977 } 978 logutil.Logger(ctx).Info("amend prewrite finished", zap.Uint64("txnStartTS", c.startTS)) 979 return true, nil 980 } 981 return false, nil 982 } 983 984 func (c *twoPhaseCommitter) getCommitTS(ctx context.Context, commitDetail *execdetails.CommitDetails) (uint64, error) { 985 start := time.Now() 986 logutil.Event(ctx, "start get commit ts") 987 commitTS, err := c.causetstore.getTimestampWithRetry(NewBackofferWithVars(ctx, tsoMaxBackoff, c.txn.vars)) 988 if err != nil { 989 logutil.Logger(ctx).Warn("2PC get commitTS failed", 990 zap.Error(err), 991 zap.Uint64("txnStartTS", c.startTS)) 992 return 0, errors.Trace(err) 993 } 994 commitDetail.GetCommitTsTime = time.Since(start) 995 logutil.Event(ctx, "finish get commit ts") 996 logutil.SetTag(ctx, "commitTS", commitTS) 997 998 // Check commitTS. 999 if commitTS <= c.startTS { 1000 err = errors.Errorf("conn %d invalid transaction tso with txnStartTS=%v while txnCommitTS=%v", 1001 c.connID, c.startTS, commitTS) 1002 logutil.BgLogger().Error("invalid transaction", zap.Error(err)) 1003 return 0, errors.Trace(err) 1004 } 1005 return commitTS, nil 1006 } 1007 1008 // checkSchemaValid checks if the schemaReplicant has changed, if tryAmend is set to true, committer will try to amend 1009 // this transaction using the related schemaReplicant changes. 1010 func (c *twoPhaseCommitter) checkSchemaValid(ctx context.Context, checkTS uint64, startSchemaReplicant SchemaVer, 1011 tryAmend bool) (*RelatedSchemaChange, bool, error) { 1012 checker, ok := c.txn.us.GetOption(ekv.SchemaChecker).(schemaLeaseChecker) 1013 if !ok { 1014 if c.connID > 0 { 1015 logutil.Logger(ctx).Warn("schemaLeaseChecker is not set for this transaction", 1016 zap.Uint64("connID", c.connID), 1017 zap.Uint64("startTS", c.startTS), 1018 zap.Uint64("commitTS", checkTS)) 1019 } 1020 return nil, false, nil 1021 } 1022 relatedChanges, err := checker.CheckBySchemaVer(checkTS, startSchemaReplicant) 1023 if err != nil { 1024 if tryAmend && relatedChanges != nil && relatedChanges.Amendable && c.txn.schemaAmender != nil { 1025 memAmended, amendErr := c.tryAmendTxn(ctx, startSchemaReplicant, relatedChanges) 1026 if amendErr != nil { 1027 logutil.BgLogger().Info("txn amend has failed", zap.Uint64("connID", c.connID), 1028 zap.Uint64("startTS", c.startTS), zap.Error(amendErr)) 1029 return nil, false, err 1030 } 1031 logutil.Logger(ctx).Info("amend txn successfully for pessimistic commit", 1032 zap.Uint64("connID", c.connID), zap.Uint64("txn startTS", c.startTS), zap.Bool("memAmended", memAmended), 1033 zap.Uint64("checkTS", checkTS), zap.Int64("startSchemaReplicantVer", startSchemaReplicant.SchemaMetaVersion()), 1034 zap.Int64s("causet ids", relatedChanges.PhyTblIDS), zap.Uint64s("action types", relatedChanges.CausetActionTypes)) 1035 return relatedChanges, memAmended, nil 1036 } 1037 return nil, false, errors.Trace(err) 1038 } 1039 return nil, false, nil 1040 } 1041 1042 func (c *twoPhaseCommitter) prewriteBinlog(ctx context.Context) chan *binloginfo.WriteResult { 1043 if !c.shouldWriteBinlog() { 1044 return nil 1045 } 1046 ch := make(chan *binloginfo.WriteResult, 1) 1047 go func() { 1048 logutil.Eventf(ctx, "start prewrite binlog") 1049 binInfo := c.txn.us.GetOption(ekv.BinlogInfo).(*binloginfo.BinlogInfo) 1050 bin := binInfo.Data 1051 bin.StartTs = int64(c.startTS) 1052 if bin.Tp == binlog.BinlogType_Prewrite { 1053 bin.PrewriteKey = c.primary() 1054 } 1055 wr := binInfo.WriteBinlog(c.causetstore.clusterID) 1056 if wr.Skipped() { 1057 binInfo.Data.PrewriteValue = nil 1058 binloginfo.AddOneSkippedCommitter() 1059 } 1060 logutil.Eventf(ctx, "finish prewrite binlog") 1061 ch <- wr 1062 }() 1063 return ch 1064 } 1065 1066 func (c *twoPhaseCommitter) writeFinishBinlog(ctx context.Context, tp binlog.BinlogType, commitTS int64) { 1067 if !c.shouldWriteBinlog() { 1068 return 1069 } 1070 binInfo := c.txn.us.GetOption(ekv.BinlogInfo).(*binloginfo.BinlogInfo) 1071 binInfo.Data.Tp = tp 1072 binInfo.Data.CommitTs = commitTS 1073 binInfo.Data.PrewriteValue = nil 1074 1075 wg := sync.WaitGroup{} 1076 mock := false 1077 failpoint.Inject("mockSyncBinlogCommit", func(val failpoint.Value) { 1078 if val.(bool) { 1079 wg.Add(1) 1080 mock = true 1081 } 1082 }) 1083 go func() { 1084 logutil.Eventf(ctx, "start write finish binlog") 1085 binlogWriteResult := binInfo.WriteBinlog(c.causetstore.clusterID) 1086 err := binlogWriteResult.GetError() 1087 if err != nil { 1088 logutil.BgLogger().Error("failed to write binlog", 1089 zap.Error(err)) 1090 } 1091 logutil.Eventf(ctx, "finish write finish binlog") 1092 if mock { 1093 wg.Done() 1094 } 1095 }() 1096 if mock { 1097 wg.Wait() 1098 } 1099 } 1100 1101 func (c *twoPhaseCommitter) shouldWriteBinlog() bool { 1102 return c.txn.us.GetOption(ekv.BinlogInfo) != nil 1103 } 1104 1105 // EinsteinDB recommends each RPC packet should be less than ~1MB. We keep each packet's 1106 // Key+Value size below 16KB. 1107 const txnCommitBatchSize = 16 * 1024 1108 1109 type batchMutations struct { 1110 region RegionVerID 1111 mutations CommitterMutations 1112 isPrimary bool 1113 } 1114 type batched struct { 1115 batches []batchMutations 1116 primaryIdx int 1117 primaryKey []byte 1118 } 1119 1120 func newBatched(primaryKey []byte) *batched { 1121 return &batched{ 1122 primaryIdx: -1, 1123 primaryKey: primaryKey, 1124 } 1125 } 1126 1127 // appendBatchMutationsBySize appends mutations to b. It may split the keys to make 1128 // sure each batch's size does not exceed the limit. 1129 func (b *batched) appendBatchMutationsBySize(region RegionVerID, mutations CommitterMutations, sizeFn func(k, v []byte) int, limit int) { 1130 var start, end int 1131 for start = 0; start < mutations.len(); start = end { 1132 var size int 1133 for end = start; end < mutations.len() && size < limit; end++ { 1134 var k, v []byte 1135 k = mutations.keys[end] 1136 if end < len(mutations.values) { 1137 v = mutations.values[end] 1138 } 1139 size += sizeFn(k, v) 1140 if b.primaryIdx < 0 && bytes.Equal(k, b.primaryKey) { 1141 b.primaryIdx = len(b.batches) 1142 } 1143 } 1144 b.batches = append(b.batches, batchMutations{ 1145 region: region, 1146 mutations: mutations.subRange(start, end), 1147 }) 1148 } 1149 } 1150 1151 func (b *batched) setPrimary() bool { 1152 // If the batches include the primary key, put it to the first 1153 if b.primaryIdx >= 0 { 1154 if len(b.batches) > 0 { 1155 b.batches[b.primaryIdx].isPrimary = true 1156 b.batches[0], b.batches[b.primaryIdx] = b.batches[b.primaryIdx], b.batches[0] 1157 b.primaryIdx = 0 1158 } 1159 return true 1160 } 1161 1162 return false 1163 } 1164 1165 func (b *batched) allBatches() []batchMutations { 1166 return b.batches 1167 } 1168 1169 // primaryBatch returns the batch containing the primary key. 1170 // Precondition: `b.setPrimary() == true` 1171 func (b *batched) primaryBatch() []batchMutations { 1172 return b.batches[:1] 1173 } 1174 1175 func (b *batched) forgetPrimary() { 1176 if len(b.batches) == 0 { 1177 return 1178 } 1179 b.batches = b.batches[1:] 1180 } 1181 1182 // batchInterlockingDirectorate is txn controller providing rate control like utils 1183 type batchInterlockingDirectorate struct { 1184 rateLim int // concurrent worker numbers 1185 rateLimiter *rateLimit // rate limiter for concurrency control, maybe more strategies 1186 committer *twoPhaseCommitter // here maybe more different type committer in the future 1187 action twoPhaseCommitCausetAction // the work action type 1188 backoffer *Backoffer // Backoffer 1189 tokenWaitDuration time.Duration // get token wait time 1190 } 1191 1192 // newBatchInterlockingDirectorate create processor to handle concurrent batch works(prewrite/commit etc) 1193 func newBatchInterlockingDirectorate(rateLimit int, committer *twoPhaseCommitter, 1194 action twoPhaseCommitCausetAction, backoffer *Backoffer) *batchInterlockingDirectorate { 1195 return &batchInterlockingDirectorate{rateLimit, nil, committer, 1196 action, backoffer, 1 * time.Millisecond} 1197 } 1198 1199 // initUtils do initialize batchInterlockingDirectorate related policies like rateLimit soliton 1200 func (batchInterDir *batchInterlockingDirectorate) initUtils() error { 1201 // init rateLimiter by injected rate limit number 1202 batchInterDir.rateLimiter = newRateLimit(batchInterDir.rateLim) 1203 return nil 1204 } 1205 1206 // startWork concurrently do the work for each batch considering rate limit 1207 func (batchInterDir *batchInterlockingDirectorate) startWorker(exitCh chan struct{}, ch chan error, batches []batchMutations) { 1208 for idx, batch1 := range batches { 1209 waitStart := time.Now() 1210 if exit := batchInterDir.rateLimiter.getToken(exitCh); !exit { 1211 batchInterDir.tokenWaitDuration += time.Since(waitStart) 1212 batch := batch1 1213 go func() { 1214 defer batchInterDir.rateLimiter.putToken() 1215 var singleBatchBackoffer *Backoffer 1216 if _, ok := batchInterDir.action.(actionCommit); ok { 1217 // Because the secondary batches of the commit actions are implemented to be 1218 // committed asynchronously in background goroutines, we should not 1219 // fork a child context and call cancel() while the foreground goroutine exits. 1220 // Otherwise the background goroutines will be canceled execeptionally. 1221 // Here we makes a new clone of the original backoffer for this goroutine 1222 // exclusively to avoid the data race when using the same backoffer 1223 // in concurrent goroutines. 1224 singleBatchBackoffer = batchInterDir.backoffer.Clone() 1225 } else { 1226 var singleBatchCancel context.CancelFunc 1227 singleBatchBackoffer, singleBatchCancel = batchInterDir.backoffer.Fork() 1228 defer singleBatchCancel() 1229 } 1230 beforeSleep := singleBatchBackoffer.totalSleep 1231 ch <- batchInterDir.action.handleSingleBatch(batchInterDir.committer, singleBatchBackoffer, batch) 1232 commitDetail := batchInterDir.committer.getDetail() 1233 if commitDetail != nil { // dagger operations of pessimistic-txn will let commitDetail be nil 1234 if delta := singleBatchBackoffer.totalSleep - beforeSleep; delta > 0 { 1235 atomic.AddInt64(&commitDetail.CommitBackoffTime, int64(singleBatchBackoffer.totalSleep-beforeSleep)*int64(time.Millisecond)) 1236 commitDetail.Mu.Lock() 1237 commitDetail.Mu.BackoffTypes = append(commitDetail.Mu.BackoffTypes, singleBatchBackoffer.types...) 1238 commitDetail.Mu.Unlock() 1239 } 1240 } 1241 }() 1242 } else { 1243 logutil.Logger(batchInterDir.backoffer.ctx).Info("break startWorker", 1244 zap.Stringer("action", batchInterDir.action), zap.Int("batch size", len(batches)), 1245 zap.Int("index", idx)) 1246 break 1247 } 1248 } 1249 } 1250 1251 // process will start worker routine and collect results 1252 func (batchInterDir *batchInterlockingDirectorate) process(batches []batchMutations) error { 1253 var err error 1254 err = batchInterDir.initUtils() 1255 if err != nil { 1256 logutil.Logger(batchInterDir.backoffer.ctx).Error("batchInterlockingDirectorate initUtils failed", zap.Error(err)) 1257 return err 1258 } 1259 1260 // For prewrite, stop sending other requests after receiving first error. 1261 backoffer := batchInterDir.backoffer 1262 var cancel context.CancelFunc 1263 if _, ok := batchInterDir.action.(actionPrewrite); ok { 1264 backoffer, cancel = batchInterDir.backoffer.Fork() 1265 defer cancel() 1266 } 1267 // concurrently do the work for each batch. 1268 ch := make(chan error, len(batches)) 1269 exitCh := make(chan struct{}) 1270 go batchInterDir.startWorker(exitCh, ch, batches) 1271 // check results 1272 for i := 0; i < len(batches); i++ { 1273 if e := <-ch; e != nil { 1274 logutil.Logger(backoffer.ctx).Debug("2PC doCausetActionOnBatch failed", 1275 zap.Uint64("conn", batchInterDir.committer.connID), 1276 zap.Stringer("action type", batchInterDir.action), 1277 zap.Error(e), 1278 zap.Uint64("txnStartTS", batchInterDir.committer.startTS)) 1279 // Cancel other requests and return the first error. 1280 if cancel != nil { 1281 logutil.Logger(backoffer.ctx).Debug("2PC doCausetActionOnBatch to cancel other actions", 1282 zap.Uint64("conn", batchInterDir.committer.connID), 1283 zap.Stringer("action type", batchInterDir.action), 1284 zap.Uint64("txnStartTS", batchInterDir.committer.startTS)) 1285 cancel() 1286 } 1287 if err == nil { 1288 err = e 1289 } 1290 } 1291 } 1292 close(exitCh) 1293 metrics.EinsteinDBTokenWaitDuration.Observe(batchInterDir.tokenWaitDuration.Seconds()) 1294 return err 1295 } 1296 1297 func getTxnPriority(txn *einsteindbTxn) pb.CommandPri { 1298 if pri := txn.us.GetOption(ekv.Priority); pri != nil { 1299 return ekvPriorityToCommandPri(pri.(int)) 1300 } 1301 return pb.CommandPri_Normal 1302 } 1303 1304 func getTxnSyncLog(txn *einsteindbTxn) bool { 1305 if syncOption := txn.us.GetOption(ekv.SyncLog); syncOption != nil { 1306 return syncOption.(bool) 1307 } 1308 return false 1309 } 1310 1311 func ekvPriorityToCommandPri(pri int) pb.CommandPri { 1312 switch pri { 1313 case ekv.PriorityLow: 1314 return pb.CommandPri_Low 1315 case ekv.PriorityHigh: 1316 return pb.CommandPri_High 1317 default: 1318 return pb.CommandPri_Normal 1319 } 1320 } 1321 1322 func (c *twoPhaseCommitter) setDetail(d *execdetails.CommitDetails) { 1323 atomic.StorePointer(&c.detail, unsafe.Pointer(d)) 1324 } 1325 1326 func (c *twoPhaseCommitter) getDetail() *execdetails.CommitDetails { 1327 return (*execdetails.CommitDetails)(atomic.LoadPointer(&c.detail)) 1328 } 1329 1330 func (c *twoPhaseCommitter) setUndeterminedErr(err error) { 1331 c.mu.Lock() 1332 defer c.mu.Unlock() 1333 c.mu.undeterminedErr = err 1334 } 1335 1336 func (c *twoPhaseCommitter) getUndeterminedErr() error { 1337 c.mu.RLock() 1338 defer c.mu.RUnlock() 1339 return c.mu.undeterminedErr 1340 }